1%% Copyright (c) 2008-2013 Robert Virding 2%% 3%% Licensed under the Apache License, Version 2.0 (the "License"); 4%% you may not use this file except in compliance with the License. 5%% You may obtain a copy of the License at 6%% 7%% http://www.apache.org/licenses/LICENSE-2.0 8%% 9%% Unless required by applicable law or agreed to in writing, software 10%% distributed under the License is distributed on an "AS IS" BASIS, 11%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12%% See the License for the specific language governing permissions and 13%% limitations under the License. 14 15%% File : lfe_scan.xrl 16%% Author : Robert Virding 17%% Purpose : Token definitions for Lisp Flavoured Erlang. 18 19Definitions. 20B = [01] 21O = [0-7] 22D = [0-9] 23H = [0-9a-fA-F] 24B36 = [0-9a-zA-Z] 25U = [A-Z] 26L = [a-z] 27A = ({U}|{L}) 28DEL = [][()}{";\000-\s] 29SYM = [^][()}{";\000-\s\177-\237] 30SSYM = [^][()}{"|;#`',\000-\s\177-\237] 31WS = ([\000-\s]|;[^\n]*) 32 33Rules. 34%% Bracketed Comments using #| foo |# 35#{D}*\|[^\|]*\|+([^#\|][^\|]*\|+)*# : 36 block_comment(string:substr(TokenChars, 3)). 37 38%% Separators 39' : {token,{'\'',TokenLine}}. 40` : {token,{'`',TokenLine}}. 41, : {token,{',',TokenLine}}. 42,@ : {token,{',@',TokenLine}}. 43\. : {token,{'.',TokenLine}}. 44[][()}{] : {token,{list_to_atom(TokenChars),TokenLine}}. 45 46#{D}*[bB]\( : {token,{'#B(',TokenLine}}. 47#{D}*[mM]\( : {token,{'#M(',TokenLine}}. 48#{D}*\( : {token,{'#(',TokenLine}}. 49#{D}*\. : {token,{'#.',TokenLine}}. 50 51#{D}*` : {token,{'#`',TokenLine}}. 52#{D}*; : {token,{'#;',TokenLine}}. 53#{D}*, : {token,{'#,',TokenLine}}. 54#{D}*,@ : {token,{'#,@',TokenLine}}. 55 56%% Characters 57#{D}*\\(x{H}+|.) : char_token(skip_past(TokenChars, $\\, $\\), TokenLine). 58 59%% Based numbers 60#{D}*\*{SYM}+ : base_token(skip_past(TokenChars, $*, $*), 2, TokenLine). 61#{D}*[bB]{SYM}+ : base_token(skip_past(TokenChars, $b, $B), 2, TokenLine). 62#{D}*[oO]{SYM}+ : base_token(skip_past(TokenChars, $o, $O), 8, TokenLine). 63#{D}*[dD]{SYM}+ : base_token(skip_past(TokenChars, $d, $D), 10, TokenLine). 64#{D}*[xX]{SYM}+ : base_token(skip_past(TokenChars, $x, $X), 16, TokenLine). 65#{D}*[rR]{SYM}+ : 66 %% Scan over digit chars to get base. 67 {Base,[_|Ds]} = base1(tl(TokenChars), 10, 0), 68 base_token(Ds, Base, TokenLine). 69 70%% String 71"(\\x{H}+;|\\.|[^"\\])*" : 72 %% Strip quotes. 73 S = string:substr(TokenChars, 2, TokenLen - 2), 74 {token,{string,TokenLine,chars(S)}}. 75%% Binary string 76#"(\\x{H}+;|\\.|[^"\\])*" : 77 %% Strip quotes. 78 S = string:substr(TokenChars, 3, TokenLen - 3), 79 Bin = unicode:characters_to_binary(chars(S), utf8, utf8), 80 {token,{binary,TokenLine,Bin}}. 81%% Symbols 82\|(\\x{H}+;|\\.|[^|\\])*\| : 83 %% Strip quotes. 84 S = string:substr(TokenChars, 2, TokenLen - 2), 85 symbol_token(chars(S), TokenLine). 86%% Funs 87#'{SSYM}{SYM}*/{D}+ : 88 %% Strip sharpsign single-quote. 89 FunStr = string:substr(TokenChars,3), 90 {token,{'#\'',TokenLine,FunStr}}. 91%% Atoms 92[+-]?{D}+ : 93 case catch {ok,list_to_integer(TokenChars)} of 94 {ok,I} -> {token,{number,TokenLine,I}}; 95 _ -> {error,"illegal integer"} 96 end. 97[+-]?{D}+\.{D}+([eE][+-]?{D}+)? : 98 case catch {ok,list_to_float(TokenChars)} of 99 {ok,F} -> {token,{number,TokenLine,F}}; 100 _ -> {error,"illegal float"} 101 end. 102{SSYM}{SYM}* : 103 symbol_token(TokenChars, TokenLine). 104{WS}+ : skip_token. 105 106Erlang code. 107%% Copyright (c) 2008-2013 Robert Virding 108%% 109%% Licensed under the Apache License, Version 2.0 (the "License"); 110%% you may not use this file except in compliance with the License. 111%% You may obtain a copy of the License at 112%% 113%% http://www.apache.org/licenses/LICENSE-2.0 114%% 115%% Unless required by applicable law or agreed to in writing, software 116%% distributed under the License is distributed on an "AS IS" BASIS, 117%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 118%% See the License for the specific language governing permissions and 119%% limitations under the License. 120 121%% File : lfe_scan.erl 122%% Author : Robert Virding 123%% Purpose : Token definitions for Lisp Flavoured Erlang. 124 125-export([start_symbol_char/1,symbol_char/1]). 126 127-import(string, [substr/2,substr/3]). 128 129%% start_symbol_char(Char) -> true | false. 130%% symbol_char(Char) -> true | false. 131%% Define start symbol chars and symbol chars. 132 133start_symbol_char($#) -> false; 134start_symbol_char($`) -> false; 135start_symbol_char($') -> false; %' 136start_symbol_char($,) -> false; 137start_symbol_char($|) -> false; %Symbol quote character 138start_symbol_char(C) -> symbol_char(C). 139 140symbol_char($() -> false; 141symbol_char($)) -> false; 142symbol_char($[) -> false; 143symbol_char($]) -> false; 144symbol_char(${) -> false; 145symbol_char($}) -> false; 146symbol_char($") -> false; 147symbol_char($;) -> false; 148symbol_char(C) -> ((C > $\s) and (C =< $~)) orelse (C > $\240). 149 150%% symbol_token(Chars, Line) -> {token,{symbol,Line,Symbol}} | {error,E}. 151%% Build a symbol from list of legal characters, else error. 152 153symbol_token(Cs, L) -> 154 case catch {ok,list_to_atom(Cs)} of 155 {ok,S} -> {token,{symbol,L,S}}; 156 _ -> {error,"illegal symbol"} 157 end. 158 159%% base_token(Chars, Base, Line) -> Integer. 160%% Convert a string of Base characters into a number. We only allow 161%% base betqeen 2 and 36, and an optional sign character first. 162 163base_token(_, B, _) when B < 2; B > 36 -> 164 {error,"illegal number base"}; 165base_token([$+|Cs], B, L) -> base_token(Cs, B, +1, L); 166base_token([$-|Cs], B, L) -> base_token(Cs, B, -1, L); 167base_token(Cs, B, L) -> base_token(Cs, B, +1, L). 168 169base_token(Cs, B, S, L) -> 170 case base1(Cs, B, 0) of 171 {N,[]} -> {token,{number,L,S*N}}; 172 {_,_} -> {error,"illegal based number"} 173 end. 174 175base1([C|Cs], Base, SoFar) when C >= $0, C =< $9, C < Base + $0 -> 176 Next = SoFar * Base + (C - $0), 177 base1(Cs, Base, Next); 178base1([C|Cs], Base, SoFar) when C >= $a, C =< $z, C < Base + $a - 10 -> 179 Next = SoFar * Base + (C - $a + 10), 180 base1(Cs, Base, Next); 181base1([C|Cs], Base, SoFar) when C >= $A, C =< $Z, C < Base + $A - 10 -> 182 Next = SoFar * Base + (C - $A + 10), 183 base1(Cs, Base, Next); 184base1([C|Cs], _Base, SoFar) -> {SoFar,[C|Cs]}; 185base1([], _Base, N) -> {N,[]}. 186 187-define(IS_UNICODE(C), ((C >= 0) and (C =< 16#10FFFF))). 188 189%% char_token(InputChars, Line) -> {token,{number,L,N}} | {error,E}. 190%% Convert an input string into the corresponding character. For a 191%% sequence of hex characters we check resultant is code is in the 192%% unicode range. 193 194char_token([$x,C|Cs], L) -> 195 case base1([C|Cs], 16, 0) of 196 {N,[]} when ?IS_UNICODE(N) -> {token,{number,L,N}}; 197 _ -> {error,"illegal character"} 198 end; 199char_token([C], L) -> {token,{number,L,C}}. 200 201%% chars(InputChars) -> Chars. 202%% Convert an input string into the corresponding string characters. 203%% We know that the input string is correct. 204 205chars([$\\,$x,C|Cs0]) -> 206 case hex_char(C) of 207 true -> 208 case base1([C|Cs0], 16, 0) of 209 {N,[$;|Cs1]} -> [N|chars(Cs1)]; 210 _Other -> [escape_char($x)|chars([C|Cs0])] 211 end; 212 false -> [escape_char($x)|chars([C|Cs0])] 213 end; 214chars([$\\,C|Cs]) -> [escape_char(C)|chars(Cs)]; 215chars([C|Cs]) -> [C|chars(Cs)]; 216chars([]) -> []. 217 218hex_char(C) when C >= $0, C =< $9 -> true; 219hex_char(C) when C >= $a, C =< $f -> true; 220hex_char(C) when C >= $A, C =< $F -> true; 221hex_char(_) -> false. 222 223escape_char($b) -> $\b; %\b = BS 224escape_char($t) -> $\t; %\t = TAB 225escape_char($n) -> $\n; %\n = LF 226escape_char($v) -> $\v; %\v = VT 227escape_char($f) -> $\f; %\f = FF 228escape_char($r) -> $\r; %\r = CR 229escape_char($e) -> $\e; %\e = ESC 230escape_char($s) -> $\s; %\s = SPC 231escape_char($d) -> $\d; %\d = DEL 232escape_char(C) -> C. 233 234%% Block Comment: 235%% Provide a sensible error when people attempt to include nested 236%% comments because currently the parser cannot process them without 237%% a rebuild. But simply exploding on a '#|' is not going to be that 238%% helpful. 239 240block_comment(TokenChars) -> 241 %% Check we're not opening another comment block. 242 case string:str(TokenChars, "#|") of 243 0 -> skip_token; %% No nesting found 244 _ -> {error, "illegal nested block comment"} 245 end. 246 247%% skip_until(String, Char1, Char2) -> String. 248%% skip_past(String, Char1, Char2) -> String. 249 250%% skip_until([C|_]=Cs, C1, C2) when C =:= C1 ; C =:= C2 -> Cs; 251%% skip_until([_|Cs], C1, C2) -> skip_until(Cs, C1, C2); 252%% skip_until([], _, _) -> []. 253 254skip_past([C|Cs], C1, C2) when C =:= C1 ; C =:= C2 -> Cs; 255skip_past([_|Cs], C1, C2) -> skip_past(Cs, C1, C2); 256skip_past([], _, _) -> []. 257