1 %{ 2 /*************************************************************************** 3 * 4 * $Id: lexer.l 184 2011-02-28 21:38:28Z Michael.McTernan $ 5 * 6 * Mscgen language lexer definition. 7 * Copyright (C) 2009 Michael C McTernan, Michael.McTernan.2001@cs.bris.ac.uk 8 * 9 * This file is part of msclib. 10 * 11 * Msc is free software; you can redistribute it and/or modify it 12 * under the terms of the GNU Lesser General Public License as published 13 * by the Free Software Foundation; either version 2.1 of the License, or 14 * (at your option) any later version. 15 * 16 * Msclib is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * along with msclib; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 * 25 ***************************************************************************/ 26 27 #include <stdio.h> 28 #include <string.h> 29 #include <ctype.h> 30 #include "mscgen_config.h" 31 #include "mscgen_msc.h" 32 #include "mscgen_bool.h" 33 #include "mscgen_safe.h" 34 #include "mscgen_lexer.h" 35 #include "mscgen_language.h" /* Token definitions from Yacc/Bison */ 36 /* Counter for error reporting */ 37 static unsigned long lex_linenum = 1; 38 static char *lex_line = NULL; 39 static Boolean lex_utf8 = FALSE; 40 41 /* Local function prototypes */ 42 static void newline(const char *text, unsigned int n); 43 static char *trimQstring(char *s); 44 static const char *stateToString(int state); 45 %} 46 47 /* Not used, so prevent compiler warning */ 48 %option never-interactive 49 %option noinput 50 %option noyywrap 51 52 %x IN_COMMENT 53 %x BODY 54 %% 55 56 <INITIAL>{ 57 \xef\xbb\xbf lex_utf8 = TRUE; BEGIN(BODY); 58 (\r\n).* newline(yytext, 2); BEGIN(BODY); 59 (\r|\n).* newline(yytext, 1); BEGIN(BODY); 60 . unput(yytext[0]); BEGIN(BODY); 61 } 62 63 <IN_COMMENT>{ 64 "*/" BEGIN(BODY); 65 [^*\n]+ 66 "*" 67 (\r\n).* newline(yytext, 2); 68 (\r|\n).* newline(yytext, 1); 69 } 70 71 <BODY>{ 72 73 "/*" BEGIN(IN_COMMENT); 74 75 (\r\n).* newline(yytext, 2); 76 (\r|\n).* newline(yytext, 1); 77 78 #.*$ /* Ignore lines after '#' */ 79 \/\/.*$ /* Ignore lines after '//' */ 80 81 msc return TOK_MSC; 82 HSCALE|hscale yylval.optType = MSC_OPT_HSCALE; return TOK_OPT_HSCALE; 83 WIDTH|width yylval.optType = MSC_OPT_WIDTH; return TOK_OPT_WIDTH; 84 ARCGRADIENT|arcgradient yylval.optType = MSC_OPT_ARCGRADIENT; return TOK_OPT_ARCGRADIENT; 85 WORDWRAPARCS|wordwraparcs yylval.optType = MSC_OPT_WORDWRAPARCS; return TOK_OPT_WORDWRAPARCS; 86 URL|url yylval.attribType = MSC_ATTR_URL; return TOK_ATTR_URL; 87 LABEL|label yylval.attribType = MSC_ATTR_LABEL; return TOK_ATTR_LABEL; 88 IDURL|idurl yylval.attribType = MSC_ATTR_IDURL; return TOK_ATTR_IDURL; 89 ID|id yylval.attribType = MSC_ATTR_ID; return TOK_ATTR_ID; 90 LINECOLO(U?)R|linecolo(u?)r yylval.attribType = MSC_ATTR_LINE_COLOUR; return TOK_ATTR_LINE_COLOUR; 91 TEXTCOLO(U?)R|textcolo(u?)r yylval.attribType = MSC_ATTR_TEXT_COLOUR; return TOK_ATTR_TEXT_COLOUR; 92 TEXTBGCOLO(U?)R|textbgcolo(u?)r yylval.attribType = MSC_ATTR_TEXT_BGCOLOUR; return TOK_ATTR_TEXT_BGCOLOUR; 93 ARCLINECOLO(U?)R|arclinecolo(u?)r yylval.attribType = MSC_ATTR_ARC_LINE_COLOUR; return TOK_ATTR_ARC_LINE_COLOUR; 94 ARCTEXTCOLO(U?)R|arctextcolo(u?)r yylval.attribType = MSC_ATTR_ARC_TEXT_COLOUR; return TOK_ATTR_ARC_TEXT_COLOUR; 95 ARCTEXTBGCOLO(U?)R|arctextbgcolo(u?)r yylval.attribType = MSC_ATTR_ARC_TEXT_BGCOLOUR; return TOK_ATTR_ARC_TEXT_BGCOLOUR; 96 ARCSKIP|arcskip yylval.attribType = MSC_ATTR_ARC_SKIP; return TOK_ATTR_ARC_SKIP; 97 \.\.\. yylval.arctype = MSC_ARC_DISCO; return TOK_SPECIAL_ARC; /* ... */ 98 --- yylval.arctype = MSC_ARC_DIVIDER; return TOK_SPECIAL_ARC; /* --- */ 99 \|\|\| yylval.arctype = MSC_ARC_SPACE; return TOK_SPECIAL_ARC; /* ||| */ 100 \<-\> yylval.arctype = MSC_ARC_SIGNAL; return TOK_REL_SIG_BI; /* <-> */ 101 -\> yylval.arctype = MSC_ARC_SIGNAL; return TOK_REL_SIG_TO; /* -> */ 102 \<- yylval.arctype = MSC_ARC_SIGNAL; return TOK_REL_SIG_FROM; /* <- */ 103 -- yylval.arctype = MSC_ARC_SIGNAL; return TOK_REL_SIG; /* -- */ 104 -[Xx] yylval.arctype = MSC_ARC_LOSS; return TOK_REL_LOSS_TO; /* -x */ 105 [Xx]- yylval.arctype = MSC_ARC_LOSS; return TOK_REL_LOSS_FROM; /* x- */ 106 \<=\> yylval.arctype = MSC_ARC_METHOD; return TOK_REL_METHOD_BI; /* <=> */ 107 =\> yylval.arctype = MSC_ARC_METHOD; return TOK_REL_METHOD_TO; /* => */ 108 \<= yylval.arctype = MSC_ARC_METHOD; return TOK_REL_METHOD_FROM; /* <= */ 109 == yylval.arctype = MSC_ARC_METHOD; return TOK_REL_METHOD; /* == */ 110 \<\<\>\> yylval.arctype = MSC_ARC_RETVAL; return TOK_REL_RETVAL_BI; /* <<>> */ 111 \>\> yylval.arctype = MSC_ARC_RETVAL; return TOK_REL_RETVAL_TO; /* >> */ 112 \<\< yylval.arctype = MSC_ARC_RETVAL; return TOK_REL_RETVAL_FROM; /* << */ 113 \.\. yylval.arctype = MSC_ARC_RETVAL; return TOK_REL_RETVAL; /* .. */ 114 \<:\> yylval.arctype = MSC_ARC_DOUBLE; return TOK_REL_DOUBLE_BI; /* <:> */ 115 :\> yylval.arctype = MSC_ARC_DOUBLE; return TOK_REL_DOUBLE_TO; /* :> */ 116 \<: yylval.arctype = MSC_ARC_DOUBLE; return TOK_REL_DOUBLE_FROM; /* <: */ 117 :: yylval.arctype = MSC_ARC_DOUBLE; return TOK_REL_DOUBLE; /* :: */ 118 \<\<=\>\> yylval.arctype = MSC_ARC_CALLBACK; return TOK_REL_CALLBACK_BI; /* <<=>> */ 119 =\>\> yylval.arctype = MSC_ARC_CALLBACK; return TOK_REL_CALLBACK_TO; /* =>> */ 120 \<\<= yylval.arctype = MSC_ARC_CALLBACK; return TOK_REL_CALLBACK_FROM; /* <<= */ 121 BOX|box yylval.arctype = MSC_ARC_BOX; return TOK_REL_BOX; /* box */ 122 ABOX|abox yylval.arctype = MSC_ARC_ABOX; return TOK_REL_ABOX; /* abox */ 123 RBOX|rbox yylval.arctype = MSC_ARC_RBOX; return TOK_REL_RBOX; /* rbox */ 124 NOTE|note yylval.arctype = MSC_ARC_NOTE; return TOK_REL_NOTE; /* note */ 125 [A-Za-z0-9_]+ yylval.string = strdup_s(yytext); return TOK_STRING; 126 \"(\\\"|[^\"])*\" yylval.string = trimQstring(strdup_s(yytext)); return TOK_QSTRING; 127 = return TOK_EQUAL; 128 , return TOK_COMMA; 129 \; return TOK_SEMICOLON; 130 \{ return TOK_OCBRACKET; 131 \} return TOK_CCBRACKET; 132 \[ return TOK_OSBRACKET; 133 \] return TOK_CSBRACKET; 134 \* return TOK_ASTERISK; 135 [ \t]+ /* ignore whitespace */; 136 137 } 138 139 140 <*>.|\n|\r return TOK_UNKNOWN; 141 142 %% 143 144 /* Handle a new line of input. 145 * This counts the line number and duplicates the string in case we need 146 * it for error reporting. The line is then returned back for parsing 147 * without the newline characters prefixed. 148 */ 149 static void newline(const char *text, unsigned int n) 150 { 151 lex_linenum++; 152 if(lex_line != NULL) 153 { 154 free(lex_line); 155 } 156 157 lex_line = strdup(text + n); 158 yyless(n); 159 } 160 161 162 /* Trim a multi-line quoted string. 163 * This allows the parsed input quoted strings to span multiple lines of 164 * input but be condensed to only a single line of output e.g. 165 * a->b [label="line 1 166 * line 1 too"]; 167 * Will parse to a string such as"line1\n line1 too". This function 168 * will collapse the \n and whitespace into a single space. 169 */ 170 static char *trimQstring(char *const s) 171 { 172 int i = 0, o = 0, skipmode = 0; 173 174 /* Strip leading " */ 175 if(s[i] == '\"') 176 { 177 i++; 178 } 179 180 /* Copy body, compacting whitespace after newline sequences */ 181 while(s[i] != '\0') 182 { 183 if(s[i] == '\r' || s[i] == '\n' || s[i] == '\f') 184 { 185 skipmode = 1; 186 } 187 else if(!skipmode || !isspace(s[i])) 188 { 189 if(skipmode) 190 { 191 s[o] = ' '; 192 o++; 193 } 194 195 skipmode = 0; 196 s[o] = s[i]; 197 o++; 198 } 199 200 i++; 201 } 202 203 /* Null terminate */ 204 s[o] = '\0'; 205 206 /* Remove trailing " */ 207 if(o >= 1 && s[o - 1] == '\"') 208 s[o-1] = '\0'; 209 210 return s; 211 } 212 213 unsigned long lex_getlinenum(void) 214 { 215 return lex_linenum; 216 } 217 218 char *lex_getline(void) 219 { 220 return lex_line; 221 } 222 223 void lex_destroy(void) 224 { 225 if(lex_line != NULL) 226 { 227 free(lex_line); 228 lex_line = NULL; 229 } 230 } 231 232 Boolean lex_getutf8(void) 233 { 234 return lex_utf8; 235 } 236 237 void lex_resetparser() 238 { 239 lex_linenum = 1; 240 lex_line = NULL; 241 lex_utf8 = FALSE; 242 } 243 244 #include "mscgen_lexer.l.h" 245 /* END OF FILE */ 246