1(****************************************************************************
2*Copyright 2008
3*  Andrew Gacek, Steven Holte, Gopalan Nadathur, Xiaochu Qi, Zach Snow
4****************************************************************************)
5(****************************************************************************
6* This file is part of Teyjus.
7*
8* Teyjus is free software: you can redistribute it and/or modify
9* it under the terms of the GNU General Public License as published by
10* the Free Software Foundation, either version 3 of the License, or
11* (at your option) any later version.
12*
13* Teyjus is distributed in the hope that it will be useful,
14* but WITHOUT ANY WARRANTY; without even the implied warranty of
15* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16* GNU General Public License for more details.
17*
18* You should have received a copy of the GNU General Public License
19* along with Teyjus.  If not, see <http://www.gnu.org/licenses/>.
20****************************************************************************)
21{
22open Lexing
23open Lpyacc
24
25let setFileName lexbuf name =
26  lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = name }
27
28let incrline lexbuf =
29  lexbuf.lex_curr_p <- {
30    lexbuf.lex_curr_p with
31      pos_bol = lexbuf.lex_curr_p.pos_cnum ;
32      pos_lnum = 1 + lexbuf.lex_curr_p.pos_lnum }
33
34let maxStringLength = Int32.to_int (Int32.div Int32.max_int (Int32.of_int 2))
35
36let commentLev = ref 0
37
38let stringBuffer = Buffer.create 16
39let string_of_char = String.make 1
40
41(**********************************************************************
42*truncateString:
43* Issue a warning and truncate string if longer than maxStringLength
44**********************************************************************)
45let truncateString s pos =
46  if String.length s > maxStringLength then
47    (Errormsg.warning pos ("Maximum string/id length exceeded; truncating to " ^
48                             (string_of_int maxStringLength) ^ " characters") ;
49     String.sub s 0 maxStringLength)
50  else
51    s
52
53(**********************************************************************
54* extractCurrentString:
55*  Return the current string and reset the string buffer
56**********************************************************************)
57let extractCurrentString pos =
58  let str = Buffer.contents stringBuffer in
59  let trim_str = truncateString str pos in
60  Buffer.reset stringBuffer ;
61  trim_str
62
63(**********************************************************************
64*addString:
65* Add a string to the current string.
66**********************************************************************)
67let addString s =
68  Buffer.add_string stringBuffer s
69
70(**********************************************************************
71*addChar:
72* Add a character to the current string.
73**********************************************************************)
74let addChar c =
75  Buffer.add_char stringBuffer c
76
77(**********************************************************************
78*addHex:
79* This *should* convert the given string into a character by interpreting
80* it as either 1 or 2 hexadecimal characters.
81**********************************************************************)
82let addHex s =
83  addChar (Char.chr (int_of_string ("0x" ^ s)))
84
85(**********************************************************************
86*addOctal:
87* This *should* convert the given string into a character by interpreting
88* it as either 1 or 3 octal characters.
89**********************************************************************)
90let addOctal s =
91  addChar (Char.chr (int_of_string ("0o" ^ s)))
92
93(**********************************************************************
94*addControl:
95* This *should* convert the given string into a character control.
96**********************************************************************)
97let addControl s =
98  addChar (Char.chr ((Char.code (String.get s 0)) - (Char.code '@')))
99}
100
101let DIGIT = ['0'-'9']
102let OCTAL = ['0'-'7']
103let HEX = ['0'-'9' 'A'-'F' 'a'-'f']
104let SCHAR = ['+' '-' '*' '/' '^' '<' '>' '=' '`' '\'' '?' '@' '#' '$' '&' '!' '_' '~']
105let SCHAR1 = ['+' '-' '/' '^' '<' '>' '=' '`' '\'' '?' '@' '#' '$' '&' '!' '_' '~']
106let SCHAR2 = ['+' '-' '*' '^' '<' '>' '=' '`' '\'' '?' '@' '#' '$' '&' '!' '~']
107let FCHAR = [' ' '\t' '\x0b' '\x0d']
108let PCHAR = ['\040' '!'-'&' '(' '[' ']'-'~']
109let LCASE = ['a' - 'z']
110let UCASE = ['A' - 'Z']
111let IDCHAR = (LCASE|UCASE|DIGIT|SCHAR)
112let IDCHAR1 = (LCASE|UCASE|DIGIT|SCHAR1)
113let WSPACE = [' ' '\t' '\r']+
114let NUM = DIGIT+
115
116
117
118rule initial = parse
119| WSPACE        {initial lexbuf}
120| '\n'          {incrline lexbuf; initial lexbuf}
121
122| "module"      {MODULE}
123| "end"         {END}
124| "import"      {IMPORT}
125| "accumulate"  {ACCUMULATE}
126| "accum_sig"   {ACCUMSIG}
127| "use_sig"     {USESIG}
128| "local"       {LOCAL}
129| "localkind"   {LOCALKIND}
130| "closed"      {CLOSED}
131| "sig"         {SIG}
132| "kind"        {KIND}
133| "type"        {TYPE}
134| "typeabbrev"  {TYPEABBREV}
135| "exportdef"   {EXPORTDEF}
136| "useonly"     {USEONLY}
137| "infixl"      {INFIXL}
138| "infix"       {INFIX}
139| "infixr"      {INFIXR}
140| "prefix"      {PREFIX}
141| "prefixr"     {PREFIXR}
142| "postfix"     {POSTFIX}
143| "postfixl"    {POSTFIXL}
144| ":-"          {COLONDASH}
145| "=>"          {IMPLIES}
146| "\\"          {INFIXLAMBDA}
147| "->"          {TYARROW}
148| "!"           {CUT}
149
150| "pi"          {PI}
151| "sigma"       {SIGMA}
152| ","           {COMMA}
153| ";"           {SEMICOLON}
154| "&"           {AMPAND}
155| "/"           {RDIVIDE}
156| "nil"         {NILLIST}
157| "::"          {LISTCONS}
158| "="           {EQUAL}
159
160| "+"           {PLUS}
161| "-"           {MINUS}
162| "*"           {TIMES}
163| "<"           {LESS}
164| "=<"          {LEQ}
165| ">"           {GTR}
166| ">="          {GEQ}
167| "~"           {UMINUS}
168
169| "."           {PERIOD}
170| "("           {LPAREN}
171| ")"           {RPAREN}
172| "["           {LBRACK}
173| "]"           {RBRACK}
174| ":"           {COLON}
175| "|"           {VBAR}
176
177| (NUM? "." NUM) as num   {REALLIT(float_of_string(num))}
178| NUM as num              {INTLIT(int_of_string(num))}
179
180| UCASE IDCHAR* as name       {UPCID(name, Preabsyn.CVID)}
181| LCASE IDCHAR* as name       {ID(name, Preabsyn.ConstID)}
182| (("/"(IDCHAR1 IDCHAR*))|(SCHAR2 IDCHAR*)) as
183                                        name {SYID(name, Preabsyn.ConstID)}
184
185| "_" as word         {VID((string_of_char word), Preabsyn.AVID)}
186| "_" IDCHAR+ as word {VID(word, Preabsyn.VarID)}
187
188| "\""            {stringstate lexbuf; }
189
190| "%"             {comment1 lexbuf}
191
192| "/*"            {commentLev := 1; comment2 lexbuf}
193| _ as c          {Errormsg.error lexbuf.lex_curr_p
194                     ("Invalid token: " ^ (string_of_char c));
195                     STRLIT(extractCurrentString lexbuf.lex_curr_p)}
196| eof             {EOF}
197
198(**********************************************************************
199*stringstate:
200* This state handles reading a quoted string.
201**********************************************************************)
202and stringstate = parse
203| [^ '"' '\\' '\n']+ as text  {addString text; stringstate lexbuf}
204| '"'                         {STRLIT(extractCurrentString lexbuf.lex_curr_p)}
205
206| '\n'          {Errormsg.error lexbuf.lex_curr_p
207                    "String literal ended with newline";
208                    incrline lexbuf;
209                    STRLIT(extractCurrentString lexbuf.lex_curr_p)}
210| "\\b"         {addChar '\b'; stringstate lexbuf}
211| "\\t"         {addChar '\t'; stringstate lexbuf}
212| "\\n"         {addChar '\n'; stringstate lexbuf}
213| "\\r"         {addChar '\r'; stringstate lexbuf}
214| "\\\\"        {addChar '\\'; stringstate lexbuf}
215| "\\\""        {addChar '"'; stringstate lexbuf}
216| "\"\""        {addChar '"'; stringstate lexbuf}
217
218| "\\^" (['@'-'z'] as text)         {addControl (String.make 1 text);
219                                     stringstate lexbuf}
220| "\\" (OCTAL as text)              {addOctal (String.make 1 text);
221                                     stringstate lexbuf}
222| "\\" (OCTAL OCTAL OCTAL as text)  {addOctal text; stringstate lexbuf}
223| "\\x" (HEX as text)               {addHex (String.make 1 text);
224                                     stringstate lexbuf}
225| "\\x" (HEX HEX as text)           {addHex text; stringstate lexbuf}
226
227| "\\x" _         {Errormsg.error lexbuf.lex_curr_p
228                    "Illegal hex character specification";
229                   stringstate lexbuf}
230| "\\" FCHAR      {strflush1 lexbuf}
231| "\\\n"          {incrline lexbuf; strflush1 lexbuf}
232| "\\c"           {strflush2 lexbuf}
233| "\\" _          {Errormsg.error lexbuf.lex_curr_p
234                    "Illegal escape character in string";
235                   stringstate lexbuf}
236| eof             {Errormsg.error lexbuf.lex_curr_p
237                     "String not closed at end-of-file";
238                   initial lexbuf}
239
240
241and strflush1 = parse
242| FCHAR+    {strflush1 lexbuf}
243| "\\"      {strflush1 lexbuf}
244| _ as text {Errormsg.error lexbuf.lex_curr_p
245                "Unterminated string escape sequence";
246                addChar text;
247                stringstate lexbuf}
248| eof             {Errormsg.error lexbuf.lex_curr_p
249                     "String not closed at end-of-file";
250                   initial lexbuf}
251
252and strflush2 = parse
253| FCHAR+    {strflush2 lexbuf}
254| _ as text {addChar text; stringstate lexbuf}
255| eof             {Errormsg.error lexbuf.lex_curr_p
256                     "String not closed at end-of-file";
257                   initial lexbuf}
258
259and comment1 = parse
260| [^ '\n']+       {comment1 lexbuf}
261| "\n"            {incrline lexbuf; initial lexbuf}
262| eof             {initial lexbuf}
263| _ as text       {Errormsg.error lexbuf.lex_curr_p
264                     ("Illegal character " ^ (string_of_char text) ^
265                        " in input");
266                   comment1 lexbuf}
267
268and comment2 = parse
269| [^ '*' '/' '\n']+   {comment2 lexbuf}
270| "/*"                {incr commentLev ; comment2 lexbuf}
271| "*/"                {decr commentLev ;
272                       if !commentLev = 0 then
273                         initial lexbuf
274                       else
275                         comment2 lexbuf}
276| "*"                 {comment2 lexbuf}
277| "/"                 {comment2 lexbuf}
278| "\n"                {incrline lexbuf; comment2 lexbuf}
279| eof                 {Errormsg.warning lexbuf.lex_curr_p
280                         "Comment not closed at end-of-file";
281                       initial lexbuf}
282| _ as text           {Errormsg.error lexbuf.lex_curr_p
283                         ("Illegal character " ^
284                            (string_of_char text) ^ " in input");
285                       comment2 lexbuf}
286