1(**************************************************************************** 2*Copyright 2008 3* Andrew Gacek, Steven Holte, Gopalan Nadathur, Xiaochu Qi, Zach Snow 4****************************************************************************) 5(**************************************************************************** 6* This file is part of Teyjus. 7* 8* Teyjus is free software: you can redistribute it and/or modify 9* it under the terms of the GNU General Public License as published by 10* the Free Software Foundation, either version 3 of the License, or 11* (at your option) any later version. 12* 13* Teyjus is distributed in the hope that it will be useful, 14* but WITHOUT ANY WARRANTY; without even the implied warranty of 15* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16* GNU General Public License for more details. 17* 18* You should have received a copy of the GNU General Public License 19* along with Teyjus. If not, see <http://www.gnu.org/licenses/>. 20****************************************************************************) 21{ 22open Lexing 23open Lpyacc 24 25let setFileName lexbuf name = 26 lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = name } 27 28let incrline lexbuf = 29 lexbuf.lex_curr_p <- { 30 lexbuf.lex_curr_p with 31 pos_bol = lexbuf.lex_curr_p.pos_cnum ; 32 pos_lnum = 1 + lexbuf.lex_curr_p.pos_lnum } 33 34let maxStringLength = Int32.to_int (Int32.div Int32.max_int (Int32.of_int 2)) 35 36let commentLev = ref 0 37 38let stringBuffer = Buffer.create 16 39let string_of_char = String.make 1 40 41(********************************************************************** 42*truncateString: 43* Issue a warning and truncate string if longer than maxStringLength 44**********************************************************************) 45let truncateString s pos = 46 if String.length s > maxStringLength then 47 (Errormsg.warning pos ("Maximum string/id length exceeded; truncating to " ^ 48 (string_of_int maxStringLength) ^ " characters") ; 49 String.sub s 0 maxStringLength) 50 else 51 s 52 53(********************************************************************** 54* extractCurrentString: 55* Return the current string and reset the string buffer 56**********************************************************************) 57let extractCurrentString pos = 58 let str = Buffer.contents stringBuffer in 59 let trim_str = truncateString str pos in 60 Buffer.reset stringBuffer ; 61 trim_str 62 63(********************************************************************** 64*addString: 65* Add a string to the current string. 66**********************************************************************) 67let addString s = 68 Buffer.add_string stringBuffer s 69 70(********************************************************************** 71*addChar: 72* Add a character to the current string. 73**********************************************************************) 74let addChar c = 75 Buffer.add_char stringBuffer c 76 77(********************************************************************** 78*addHex: 79* This *should* convert the given string into a character by interpreting 80* it as either 1 or 2 hexadecimal characters. 81**********************************************************************) 82let addHex s = 83 addChar (Char.chr (int_of_string ("0x" ^ s))) 84 85(********************************************************************** 86*addOctal: 87* This *should* convert the given string into a character by interpreting 88* it as either 1 or 3 octal characters. 89**********************************************************************) 90let addOctal s = 91 addChar (Char.chr (int_of_string ("0o" ^ s))) 92 93(********************************************************************** 94*addControl: 95* This *should* convert the given string into a character control. 96**********************************************************************) 97let addControl s = 98 addChar (Char.chr ((Char.code (String.get s 0)) - (Char.code '@'))) 99} 100 101let DIGIT = ['0'-'9'] 102let OCTAL = ['0'-'7'] 103let HEX = ['0'-'9' 'A'-'F' 'a'-'f'] 104let SCHAR = ['+' '-' '*' '/' '^' '<' '>' '=' '`' '\'' '?' '@' '#' '$' '&' '!' '_' '~'] 105let SCHAR1 = ['+' '-' '/' '^' '<' '>' '=' '`' '\'' '?' '@' '#' '$' '&' '!' '_' '~'] 106let SCHAR2 = ['+' '-' '*' '^' '<' '>' '=' '`' '\'' '?' '@' '#' '$' '&' '!' '~'] 107let FCHAR = [' ' '\t' '\x0b' '\x0d'] 108let PCHAR = ['\040' '!'-'&' '(' '[' ']'-'~'] 109let LCASE = ['a' - 'z'] 110let UCASE = ['A' - 'Z'] 111let IDCHAR = (LCASE|UCASE|DIGIT|SCHAR) 112let IDCHAR1 = (LCASE|UCASE|DIGIT|SCHAR1) 113let WSPACE = [' ' '\t' '\r']+ 114let NUM = DIGIT+ 115 116 117 118rule initial = parse 119| WSPACE {initial lexbuf} 120| '\n' {incrline lexbuf; initial lexbuf} 121 122| "module" {MODULE} 123| "end" {END} 124| "import" {IMPORT} 125| "accumulate" {ACCUMULATE} 126| "accum_sig" {ACCUMSIG} 127| "use_sig" {USESIG} 128| "local" {LOCAL} 129| "localkind" {LOCALKIND} 130| "closed" {CLOSED} 131| "sig" {SIG} 132| "kind" {KIND} 133| "type" {TYPE} 134| "typeabbrev" {TYPEABBREV} 135| "exportdef" {EXPORTDEF} 136| "useonly" {USEONLY} 137| "infixl" {INFIXL} 138| "infix" {INFIX} 139| "infixr" {INFIXR} 140| "prefix" {PREFIX} 141| "prefixr" {PREFIXR} 142| "postfix" {POSTFIX} 143| "postfixl" {POSTFIXL} 144| ":-" {COLONDASH} 145| "=>" {IMPLIES} 146| "\\" {INFIXLAMBDA} 147| "->" {TYARROW} 148| "!" {CUT} 149 150| "pi" {PI} 151| "sigma" {SIGMA} 152| "," {COMMA} 153| ";" {SEMICOLON} 154| "&" {AMPAND} 155| "/" {RDIVIDE} 156| "nil" {NILLIST} 157| "::" {LISTCONS} 158| "=" {EQUAL} 159 160| "+" {PLUS} 161| "-" {MINUS} 162| "*" {TIMES} 163| "<" {LESS} 164| "=<" {LEQ} 165| ">" {GTR} 166| ">=" {GEQ} 167| "~" {UMINUS} 168 169| "." {PERIOD} 170| "(" {LPAREN} 171| ")" {RPAREN} 172| "[" {LBRACK} 173| "]" {RBRACK} 174| ":" {COLON} 175| "|" {VBAR} 176 177| (NUM? "." NUM) as num {REALLIT(float_of_string(num))} 178| NUM as num {INTLIT(int_of_string(num))} 179 180| UCASE IDCHAR* as name {UPCID(name, Preabsyn.CVID)} 181| LCASE IDCHAR* as name {ID(name, Preabsyn.ConstID)} 182| (("/"(IDCHAR1 IDCHAR*))|(SCHAR2 IDCHAR*)) as 183 name {SYID(name, Preabsyn.ConstID)} 184 185| "_" as word {VID((string_of_char word), Preabsyn.AVID)} 186| "_" IDCHAR+ as word {VID(word, Preabsyn.VarID)} 187 188| "\"" {stringstate lexbuf; } 189 190| "%" {comment1 lexbuf} 191 192| "/*" {commentLev := 1; comment2 lexbuf} 193| _ as c {Errormsg.error lexbuf.lex_curr_p 194 ("Invalid token: " ^ (string_of_char c)); 195 STRLIT(extractCurrentString lexbuf.lex_curr_p)} 196| eof {EOF} 197 198(********************************************************************** 199*stringstate: 200* This state handles reading a quoted string. 201**********************************************************************) 202and stringstate = parse 203| [^ '"' '\\' '\n']+ as text {addString text; stringstate lexbuf} 204| '"' {STRLIT(extractCurrentString lexbuf.lex_curr_p)} 205 206| '\n' {Errormsg.error lexbuf.lex_curr_p 207 "String literal ended with newline"; 208 incrline lexbuf; 209 STRLIT(extractCurrentString lexbuf.lex_curr_p)} 210| "\\b" {addChar '\b'; stringstate lexbuf} 211| "\\t" {addChar '\t'; stringstate lexbuf} 212| "\\n" {addChar '\n'; stringstate lexbuf} 213| "\\r" {addChar '\r'; stringstate lexbuf} 214| "\\\\" {addChar '\\'; stringstate lexbuf} 215| "\\\"" {addChar '"'; stringstate lexbuf} 216| "\"\"" {addChar '"'; stringstate lexbuf} 217 218| "\\^" (['@'-'z'] as text) {addControl (String.make 1 text); 219 stringstate lexbuf} 220| "\\" (OCTAL as text) {addOctal (String.make 1 text); 221 stringstate lexbuf} 222| "\\" (OCTAL OCTAL OCTAL as text) {addOctal text; stringstate lexbuf} 223| "\\x" (HEX as text) {addHex (String.make 1 text); 224 stringstate lexbuf} 225| "\\x" (HEX HEX as text) {addHex text; stringstate lexbuf} 226 227| "\\x" _ {Errormsg.error lexbuf.lex_curr_p 228 "Illegal hex character specification"; 229 stringstate lexbuf} 230| "\\" FCHAR {strflush1 lexbuf} 231| "\\\n" {incrline lexbuf; strflush1 lexbuf} 232| "\\c" {strflush2 lexbuf} 233| "\\" _ {Errormsg.error lexbuf.lex_curr_p 234 "Illegal escape character in string"; 235 stringstate lexbuf} 236| eof {Errormsg.error lexbuf.lex_curr_p 237 "String not closed at end-of-file"; 238 initial lexbuf} 239 240 241and strflush1 = parse 242| FCHAR+ {strflush1 lexbuf} 243| "\\" {strflush1 lexbuf} 244| _ as text {Errormsg.error lexbuf.lex_curr_p 245 "Unterminated string escape sequence"; 246 addChar text; 247 stringstate lexbuf} 248| eof {Errormsg.error lexbuf.lex_curr_p 249 "String not closed at end-of-file"; 250 initial lexbuf} 251 252and strflush2 = parse 253| FCHAR+ {strflush2 lexbuf} 254| _ as text {addChar text; stringstate lexbuf} 255| eof {Errormsg.error lexbuf.lex_curr_p 256 "String not closed at end-of-file"; 257 initial lexbuf} 258 259and comment1 = parse 260| [^ '\n']+ {comment1 lexbuf} 261| "\n" {incrline lexbuf; initial lexbuf} 262| eof {initial lexbuf} 263| _ as text {Errormsg.error lexbuf.lex_curr_p 264 ("Illegal character " ^ (string_of_char text) ^ 265 " in input"); 266 comment1 lexbuf} 267 268and comment2 = parse 269| [^ '*' '/' '\n']+ {comment2 lexbuf} 270| "/*" {incr commentLev ; comment2 lexbuf} 271| "*/" {decr commentLev ; 272 if !commentLev = 0 then 273 initial lexbuf 274 else 275 comment2 lexbuf} 276| "*" {comment2 lexbuf} 277| "/" {comment2 lexbuf} 278| "\n" {incrline lexbuf; comment2 lexbuf} 279| eof {Errormsg.warning lexbuf.lex_curr_p 280 "Comment not closed at end-of-file"; 281 initial lexbuf} 282| _ as text {Errormsg.error lexbuf.lex_curr_p 283 ("Illegal character " ^ 284 (string_of_char text) ^ " in input"); 285 comment2 lexbuf} 286