1(* Parsercomb -- parser combinators *) 2 3signature Parsercomb = 4sig 5 type 'elm stream 6 type ('elm, 'res) parser = 'elm stream -> ('res * 'elm stream) option 7 8 exception Parse of string 9 10 val stream : ('src -> ('elm * 'src) option) -> 'src -> 'elm stream 11 12 val -- : ('a, 'b) parser * ('a, 'c) parser -> ('a, 'b * 'c) parser 13 val || : ('a, 'b) parser * ('a, 'b) parser -> ('a, 'b) parser 14 val >> : ('a, 'b) parser * ('b -> 'c) -> ('a, 'c) parser 15 val >>* : ('a, 'b) parser * ('b -> 'c option) -> ('a, 'c) parser 16 val >>= : ('a, 'b) parser * ('b -> ('a, 'c) parser) 17 -> ('a, 'c) parser 18 19 val #-- : ('elm, 'a) parser * ('elm, 'b) parser -> ('elm, 'b) parser 20 val --# : ('elm, 'a) parser * ('elm, 'b) parser -> ('elm, 'a) parser 21 22 val $-- : string * (char, 'a) parser -> (char, 'a) parser 23 val --$ : (char, 'a) parser * string -> (char, 'a) parser 24 25 val repeat0 : ('elm, 'res) parser -> ('elm, 'res list) parser 26 val repeat1 : ('elm, 'res) parser -> ('elm, 'res * 'res list) parser 27 val optional : ('elm, 'res) parser -> ('elm, 'res option) parser 28 29 val getItem : ('elm, 'elm) parser 30 val failure : ('elm, 'res) parser 31 val success : 'res -> ('elm, 'res) parser 32 val eof : 'res -> ('elm, 'res) parser 33 34 val $# : char -> (char, char) parser 35 val $ : string -> (char, string) parser 36 37 val getLit : ''elm -> (''elm, ''elm) parser 38 39 val getChar : (char -> bool) -> (char, char) parser 40 val getChars0 : (char -> bool) -> (char, string) parser 41 val getChars1 : (char -> bool) -> (char, string) parser 42 43 val getElem : ('elm -> bool) -> ('elm, 'elm) parser 44 val getElems0 : ('elm -> bool) -> ('elm, 'elm list) parser 45 val getElems1 : ('elm -> bool) -> ('elm, 'elm * 'elm list) parser 46 47 val commitChar : string -> (char, 'res) parser -> (char, 'res) parser 48 val commitElem : string -> ('elm -> string) 49 -> ('elm, 'res) parser -> ('elm, 'res) parser 50 51 (* Applying a parser to a stream *) 52 53 val parse : ('elm, 'res) parser -> 'elm stream -> 'res option 54 55 val scan : (('a, 'a stream) StringCvt.reader -> 'a stream -> 'b) 56 -> 'a stream -> 'b 57 58 val skipWS : (char, 'res) parser -> (char, 'res) parser 59 60 val scanString : (char, 'res) parser -> string -> 'res option 61 val scanSubstr : (char, 'res) parser -> substring -> 'res option 62 val scanList : ('elm, 'res) parser -> 'elm list -> 'res option 63end 64 65(* 66 ['elm stream] is the type of a lazy stream (sequence) of 'elm values. 67 68 [('elm, 'res) parser] is the type of parsers that consume elements 69 from an 'elm stream to produce a result of type 'res. The attempt 70 may succeed with a result of type 'res, in which case the parser 71 may have consumed elements of the stream, or it may fail, in which 72 case the 'elm stream remains unchanged. 73 74 Below we indicate those parsers that are guaranteed to consume from 75 the stream if they succeed. Such parsers can be safely used as the 76 argument to e.g. the repeat0 and repeat1 parser-transformers. 77 78 [stream getc src] builds a stream from a reader getc and a stream 79 state src. 80 81 [par1 -- par2] succeeds with (r1, r2) if par1 succeeds with r1 and 82 then par2 succeeds with r2 on the remainder of the stream. 83 84 [par1 || par2] succeeds with r1 if par1 succeeds with r1 on stream; 85 otherwise succeeds with r2 if par2 succeeds with r2 on stream; 86 otherwise fails. 87 88 [par >> f] succeeds with f(r) if par succeeds with r on the stream. 89 90 [par >>* f] succeeds with y if par succeeds with r on the stream, 91 and f r = SOME y; fails otherwise. 92 93 [par1 >>= parf2] succeeds with r2 if par1 succeeds with r1 and then 94 (parf2 r1) succeeds with r2 on the remainder of the stream. 95 96 [par1 #-- par2] succeeds with r2 if par1 succeeds with r1 and 97 then par2 succeeds with r2 on the remainder of the stream. 98 99 [par1 --# par2] succeeds with r1 if par1 succeeds with r1 and 100 then par2 succeeds with r2 on the remainder of the stream. 101 102 [s $-- par] succeeds with r if the stream begins with the 103 characters of the string s, and par succeeds with r on the 104 remainder of the stream. 105 106 [par $-- s] succeeds with r if p succeeds with r, and then the 107 remainder of the stream begins with the characters of the string s. 108 109 Recommended fixities: 110 111 infix 6 $-- --$ #-- --# 112 infix 5 -- 113 infix 3 >> >>* 114 infix 2 >>= 115 infix 0 || 116 117 [repeat0 par] succeeds with [r1, r2, ..., rn] if par succeeds with 118 r1, and then par succeeds with r2 on the remainder of the stream, 119 and then par succeeds with r3 on the remainder, etc, for some n>=0. 120 May succeed without consuming from the stream, even if par must 121 consume from the stream to succeed. 122 123 [repeat1 par] succeeds with (r1, [r2, ..., rn]) if par succeeds 124 with r1, and then par succeeds with r2 on the remainder of the 125 stream, and then par succeeds with r3 on the remainder, etc, for 126 some n>=1. Must consume from the stream to succeed, provided par 127 must consume from the stream to succeed. Equivalent to 128 par -- repeat0 par 129 130 [optional par] succeeds with SOME(r) if par succeeds with r; 131 succeeds with NONE otherwise. May succeed without consuming from 132 the stream. Equivalent to 133 par || succeed NONE 134 135 [success v] always succeeds with v, consuming nothing from the stream. 136 137 [failure] always fails. 138 139 [getItem] succeeds with r if the stream is non-empty and its first 140 element is r; fails otherwise. Must consume from the stream if it 141 succeeds. 142 143 [eof v] succeeds with v if the stream is empty (end of file); fails 144 otherwise. 145 146 [$# c] succeeds with c if the stream begins with character c. 147 Consumes from the stream if it succeeds. 148 149 [$ s] succeeds with s if the stream begins with string s. Consumes 150 from the stream if it succeeds. 151 152 [getChar p] succeeds with c if the first character c of the 153 stream satisfies predicate p. Consumes from the stream if it 154 succeeds. 155 156 [getChars0 p] always succeeds with s, where s is the longest prefix 157 of the stream all of whose characters satisfy predicate p. May 158 succeed without consuming from the stream. Equivalent to 159 repeat0 (getChar p) >> String.implode 160 161 [getChars1 p] succeeds with s, where s is the longest non-empty 162 prefix of the stream all of whose characters satisfy predicate p, 163 if such a prefix exists; fails if the first character of the stream 164 does not satisfy p. Consumes from the stream if it succeeds. 165 Equivalent to 166 repeat1 (getChar p) >> op:: >> String.implode 167 168 [getLit elm] succeeds with elm if the first element of the stream 169 equals elm; fails otherwise. Consumes from the stream if it 170 succeeds. 171 172 [getElem p] succeeds with elm if the first element elm of the 173 stream satisfies predicate p. Consumes from the stream if it 174 succeeds. 175 176 [getElems0 p] always succeeds with elms, where elms is the longest 177 prefix of the stream all of whose elements satisfy predicate p. 178 May succeed without consuming from the stream. Equivalent to 179 repeat0 (getElem pred) strm 180 181 [getElems1 p] succeeds with (elm1, elmr), where elm1::elmr is the 182 longest non-empty prefix of the stream all of whose elements 183 satisfy predicate p; fails if the first element of the stream does 184 not satisfy p. Consumes from the stream if it succeeds. 185 Equivalent to 186 repeat1 (getElem pred) strm 187 188 [commitChar cexpd par] succeeds with r if par succeeds with r; 189 raises exception Parse otherwise. The exception's string argument 190 will have the form 191 Expected <cexpd> but found <cfound> at character number n 192 where cfound is the first character of the stream to which 193 commitChar is applied, or the text "eof" if that stream is empty, 194 and n is the number of the stream character cfound counting from the 195 beginning of the stream. The first character has number 0. 196 197 [commitElem eexpd showe par] succeeds with r if par succeeds with r; 198 raises exception Parse otherwise. The exception's string argument 199 will have the form 200 Expected <eexpd> but found <efound> at element number n 201 where efound is the result of applying showe to the first element 202 of the stream, or the text "eof" if the stream is empty, and n is 203 the number of stream element efound counting from the beginning of 204 the stream. The first element has number 0. 205 206 [parse par strm] applies the parser par to the stream strm. 207 Returns SOME(v) if the parser succeeds with v; returns NONE 208 otherwise. 209 210 [scan scfn] produces a parser corresponding to the scan function 211 scfn. Typical examples are: 212 213 scan Bool.scan : (char, bool) parser 214 scan Date.scan : (char, Date.date) parser 215 scan Real.scan : (char, real) parser 216 scan Time.scan : (char, Time.time) parser 217 scan (Int.scan StringCvt.DEC) : (char, int) parser 218 scan (Word.scan StringCvt.HEX) : (char, Word.word) parser 219 scan (Word8.scan StringCvt.HEX) : (char, Word8.word) parser 220 221 For instance, one may produce a list of real numbers by parsing 222 space-separated real numerals from a string s: 223 224 scanString (repeat0 (getChars0 Char.isSpace #-- scan Real.scan)) s 225 226 [skipWS par] removes any leading whitespace from the stream and 227 then applies par to the remainder. 228 229 [scanSubstring par sus] applies the parser par to the stream of 230 characters taken from the substring sus. Returns SOME v if par 231 succeeds with v; returns NONE otherwise. Equivalent to 232 parse par (stream Substring.getc sus) 233 234 [scanString par s] applies the parser par to the stream of 235 characters taken from the string s. Returns SOME v if par succeeds 236 with v; returns NONE otherwise. 237 238 Example: Parsing an integer numeral from the string "723984626junk": 239 240 val intphr = getChars1 Char.isDigit 241 val i : string option = scanString intphr "723984626junk" 242 243 [scanList phr xs] applies the parser par to the stream of elements 244 from the list xs. Returns SOME v if par succeeds with v; returns 245 NONE otherwise. Equivalent to 246 parse par (stream List.getItem cs) 247*) 248