1 %{
2 /***************************************************************************
3  *
4  * $Id: lexer.l 184 2011-02-28 21:38:28Z Michael.McTernan $
5  *
6  * Mscgen language lexer definition.
7  * Copyright (C) 2009 Michael C McTernan, Michael.McTernan.2001@cs.bris.ac.uk
8  *
9  * This file is part of msclib.
10  *
11  * Msc is free software; you can redistribute it and/or modify it
12  * under the terms of the GNU Lesser General Public License as published
13  * by the Free Software Foundation; either version 2.1 of the License, or
14  * (at your option) any later version.
15  *
16  * Msclib is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * along with msclib; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24  *
25  ***************************************************************************/
26 
27 #include <stdio.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include "mscgen_config.h"
31 #include "mscgen_msc.h"
32 #include "mscgen_bool.h"
33 #include "mscgen_safe.h"
34 #include "mscgen_lexer.h"
35 #include "mscgen_language.h"  /* Token definitions from Yacc/Bison */
36 /* Counter for error reporting */
37 static unsigned long  lex_linenum = 1;
38 static char          *lex_line = NULL;
39 static Boolean        lex_utf8 = FALSE;
40 
41 /* Local function prototypes */
42 static void newline(const char *text, unsigned int n);
43 static char *trimQstring(char *s);
44 static const char *stateToString(int state);
45 %}
46 
47 /* Not used, so prevent compiler warning */
48 %option never-interactive
49 %option noinput
50 %option noyywrap
51 
52 %x IN_COMMENT
53 %x BODY
54 %%
55 
56 <INITIAL>{
57 \xef\xbb\xbf                          lex_utf8 = TRUE; BEGIN(BODY);
58 (\r\n).*                              newline(yytext, 2); BEGIN(BODY);
59 (\r|\n).*                             newline(yytext, 1); BEGIN(BODY);
60 .                                     unput(yytext[0]); BEGIN(BODY);
61 }
62 
63 <IN_COMMENT>{
64 "*/"                                  BEGIN(BODY);
65 [^*\n]+
66 "*"
67 (\r\n).*                              newline(yytext, 2);
68 (\r|\n).*                             newline(yytext, 1);
69 }
70 
71 <BODY>{
72 
73 "/*"                                  BEGIN(IN_COMMENT);
74 
75 (\r\n).*                              newline(yytext, 2);
76 (\r|\n).*                             newline(yytext, 1);
77 
78 #.*$                                  /* Ignore lines after '#' */
79 \/\/.*$                               /* Ignore lines after '//' */
80 
81 msc                                   return TOK_MSC;
82 HSCALE|hscale                         yylval.optType = MSC_OPT_HSCALE;                return TOK_OPT_HSCALE;
83 WIDTH|width                           yylval.optType = MSC_OPT_WIDTH;                 return TOK_OPT_WIDTH;
84 ARCGRADIENT|arcgradient               yylval.optType = MSC_OPT_ARCGRADIENT;           return TOK_OPT_ARCGRADIENT;
85 WORDWRAPARCS|wordwraparcs             yylval.optType = MSC_OPT_WORDWRAPARCS;          return TOK_OPT_WORDWRAPARCS;
86 URL|url                               yylval.attribType = MSC_ATTR_URL;               return TOK_ATTR_URL;
87 LABEL|label                           yylval.attribType = MSC_ATTR_LABEL;             return TOK_ATTR_LABEL;
88 IDURL|idurl                           yylval.attribType = MSC_ATTR_IDURL;             return TOK_ATTR_IDURL;
89 ID|id                                 yylval.attribType = MSC_ATTR_ID;                return TOK_ATTR_ID;
90 LINECOLO(U?)R|linecolo(u?)r           yylval.attribType = MSC_ATTR_LINE_COLOUR;       return TOK_ATTR_LINE_COLOUR;
91 TEXTCOLO(U?)R|textcolo(u?)r           yylval.attribType = MSC_ATTR_TEXT_COLOUR;       return TOK_ATTR_TEXT_COLOUR;
92 TEXTBGCOLO(U?)R|textbgcolo(u?)r       yylval.attribType = MSC_ATTR_TEXT_BGCOLOUR;     return TOK_ATTR_TEXT_BGCOLOUR;
93 ARCLINECOLO(U?)R|arclinecolo(u?)r     yylval.attribType = MSC_ATTR_ARC_LINE_COLOUR;   return TOK_ATTR_ARC_LINE_COLOUR;
94 ARCTEXTCOLO(U?)R|arctextcolo(u?)r     yylval.attribType = MSC_ATTR_ARC_TEXT_COLOUR;   return TOK_ATTR_ARC_TEXT_COLOUR;
95 ARCTEXTBGCOLO(U?)R|arctextbgcolo(u?)r yylval.attribType = MSC_ATTR_ARC_TEXT_BGCOLOUR; return TOK_ATTR_ARC_TEXT_BGCOLOUR;
96 ARCSKIP|arcskip                       yylval.attribType = MSC_ATTR_ARC_SKIP;          return TOK_ATTR_ARC_SKIP;
97 \.\.\.                                yylval.arctype = MSC_ARC_DISCO;    return TOK_SPECIAL_ARC;        /* ... */
98 ---                                   yylval.arctype = MSC_ARC_DIVIDER;  return TOK_SPECIAL_ARC;        /* --- */
99 \|\|\|                                yylval.arctype = MSC_ARC_SPACE;    return TOK_SPECIAL_ARC;        /* ||| */
100 \<-\>                                 yylval.arctype = MSC_ARC_SIGNAL;   return TOK_REL_SIG_BI;         /* <-> */
101 -\>                                   yylval.arctype = MSC_ARC_SIGNAL;   return TOK_REL_SIG_TO;         /* -> */
102 \<-                                   yylval.arctype = MSC_ARC_SIGNAL;   return TOK_REL_SIG_FROM;       /* <- */
103 --                                    yylval.arctype = MSC_ARC_SIGNAL;   return TOK_REL_SIG;            /* -- */
104 -[Xx]                                 yylval.arctype = MSC_ARC_LOSS;     return TOK_REL_LOSS_TO;        /* -x */
105 [Xx]-                                 yylval.arctype = MSC_ARC_LOSS;     return TOK_REL_LOSS_FROM;      /* x- */
106 \<=\>                                 yylval.arctype = MSC_ARC_METHOD;   return TOK_REL_METHOD_BI;      /* <=> */
107 =\>                                   yylval.arctype = MSC_ARC_METHOD;   return TOK_REL_METHOD_TO;      /* => */
108 \<=                                   yylval.arctype = MSC_ARC_METHOD;   return TOK_REL_METHOD_FROM;    /* <= */
109 ==                                    yylval.arctype = MSC_ARC_METHOD;   return TOK_REL_METHOD;         /* == */
110 \<\<\>\>                              yylval.arctype = MSC_ARC_RETVAL;   return TOK_REL_RETVAL_BI;      /* <<>> */
111 \>\>                                  yylval.arctype = MSC_ARC_RETVAL;   return TOK_REL_RETVAL_TO;      /* >> */
112 \<\<                                  yylval.arctype = MSC_ARC_RETVAL;   return TOK_REL_RETVAL_FROM;    /* << */
113 \.\.                                  yylval.arctype = MSC_ARC_RETVAL;   return TOK_REL_RETVAL;         /* .. */
114 \<:\>                                 yylval.arctype = MSC_ARC_DOUBLE;   return TOK_REL_DOUBLE_BI;      /* <:> */
115 :\>                                   yylval.arctype = MSC_ARC_DOUBLE;   return TOK_REL_DOUBLE_TO;      /* :> */
116 \<:                                   yylval.arctype = MSC_ARC_DOUBLE;   return TOK_REL_DOUBLE_FROM;    /* <: */
117 ::                                    yylval.arctype = MSC_ARC_DOUBLE;   return TOK_REL_DOUBLE;         /* :: */
118 \<\<=\>\>                             yylval.arctype = MSC_ARC_CALLBACK; return TOK_REL_CALLBACK_BI;    /* <<=>> */
119 =\>\>                                 yylval.arctype = MSC_ARC_CALLBACK; return TOK_REL_CALLBACK_TO;    /* =>> */
120 \<\<=                                 yylval.arctype = MSC_ARC_CALLBACK; return TOK_REL_CALLBACK_FROM;  /* <<= */
121 BOX|box                               yylval.arctype = MSC_ARC_BOX;      return TOK_REL_BOX;            /* box */
122 ABOX|abox                             yylval.arctype = MSC_ARC_ABOX;     return TOK_REL_ABOX;           /* abox */
123 RBOX|rbox                             yylval.arctype = MSC_ARC_RBOX;     return TOK_REL_RBOX;           /* rbox */
124 NOTE|note                             yylval.arctype = MSC_ARC_NOTE;     return TOK_REL_NOTE;           /* note */
125 [A-Za-z0-9_]+                         yylval.string = strdup_s(yytext);  return TOK_STRING;
126 \"(\\\"|[^\"])*\"                     yylval.string = trimQstring(strdup_s(yytext)); return TOK_QSTRING;
127 =                                     return TOK_EQUAL;
128 ,                                     return TOK_COMMA;
129 \;                                    return TOK_SEMICOLON;
130 \{                                    return TOK_OCBRACKET;
131 \}                                    return TOK_CCBRACKET;
132 \[                                    return TOK_OSBRACKET;
133 \]                                    return TOK_CSBRACKET;
134 \*                                    return TOK_ASTERISK;
135 [ \t]+                                /* ignore whitespace */;
136 
137 }
138 
139 
140 <*>.|\n|\r                            return TOK_UNKNOWN;
141 
142 %%
143 
144 /* Handle a new line of input.
145  *  This counts the line number and duplicates the string in case we need
146  *  it for error reporting.  The line is then returned back for parsing
147  *  without the newline characters prefixed.
148  */
149 static void newline(const char *text, unsigned int n)
150 {
151     lex_linenum++;
152     if(lex_line != NULL)
153     {
154         free(lex_line);
155     }
156 
157     lex_line = strdup(text + n);
158     yyless(n);
159 }
160 
161 
162 /* Trim a multi-line quoted string.
163  *  This allows the parsed input quoted strings to span multiple lines of
164  *  input but be condensed to only a single line of output e.g.
165  *    a->b [label="line 1
166  *                 line 1 too"];
167  *  Will parse to a string such as"line1\n       line1 too".  This function
168  *  will collapse the \n and whitespace into a single space.
169  */
170 static char *trimQstring(char *const s)
171 {
172     int i = 0, o = 0, skipmode = 0;
173 
174     /* Strip leading " */
175     if(s[i] == '\"')
176     {
177         i++;
178     }
179 
180     /* Copy body, compacting whitespace after newline sequences */
181     while(s[i] != '\0')
182     {
183         if(s[i] == '\r' || s[i] == '\n' || s[i] == '\f')
184         {
185             skipmode = 1;
186         }
187         else if(!skipmode || !isspace(s[i]))
188         {
189             if(skipmode)
190             {
191                 s[o] = ' ';
192                 o++;
193             }
194 
195             skipmode = 0;
196             s[o] = s[i];
197             o++;
198         }
199 
200         i++;
201     }
202 
203     /* Null terminate */
204     s[o] = '\0';
205 
206     /* Remove trailing " */
207     if(o >= 1 && s[o - 1] == '\"')
208         s[o-1] = '\0';
209 
210     return s;
211 }
212 
213 unsigned long lex_getlinenum(void)
214 {
215     return lex_linenum;
216 }
217 
218 char *lex_getline(void)
219 {
220     return lex_line;
221 }
222 
223 void lex_destroy(void)
224 {
225     if(lex_line != NULL)
226     {
227         free(lex_line);
228         lex_line = NULL;
229     }
230 }
231 
232 Boolean lex_getutf8(void)
233 {
234     return lex_utf8;
235 }
236 
237 void lex_resetparser()
238 {
239   lex_linenum = 1;
240   lex_line = NULL;
241   lex_utf8 = FALSE;
242 }
243 
244 #include "mscgen_lexer.l.h"
245 /* END OF FILE */
246