1(**************************************************************************) 2(* *) 3(* OCaml *) 4(* *) 5(* Pierre Weis, projet Cristal, INRIA Rocquencourt *) 6(* *) 7(* Copyright 2002 Institut National de Recherche en Informatique et *) 8(* en Automatique. *) 9(* *) 10(* All rights reserved. This file is distributed under the terms of *) 11(* the GNU Lesser General Public License version 2.1, with the *) 12(* special exception on linking described in the file LICENSE. *) 13(* *) 14(**************************************************************************) 15 16open CamlinternalFormatBasics 17open CamlinternalFormat 18 19(* alias to avoid warning for ambiguity between 20 Pervasives.format6 21 and CamlinternalFormatBasics.format6 22 23 (the former is in fact an alias for the latter, 24 but the ambiguity warning doesn't care) 25*) 26type ('a, 'b, 'c, 'd, 'e, 'f) format6 = 27 ('a, 'b, 'c, 'd, 'e, 'f) Pervasives.format6 28 29 30(* The run-time library for scanners. *) 31 32(* Scanning buffers. *) 33module type SCANNING = sig 34 35 type in_channel 36 37 type scanbuf = in_channel 38 39 type file_name = string 40 41 val stdin : in_channel 42 (* The scanning buffer reading from [Pervasives.stdin]. 43 [stdib] is equivalent to [Scanning.from_channel Pervasives.stdin]. *) 44 45 val stdib : in_channel 46 (* An alias for [Scanf.stdin], the scanning buffer reading from 47 [Pervasives.stdin]. *) 48 49 val next_char : scanbuf -> char 50 (* [Scanning.next_char ib] advance the scanning buffer for 51 one character. 52 If no more character can be read, sets a end of file condition and 53 returns '\000'. *) 54 55 val invalidate_current_char : scanbuf -> unit 56 (* [Scanning.invalidate_current_char ib] mark the current_char as already 57 scanned. *) 58 59 val peek_char : scanbuf -> char 60 (* [Scanning.peek_char ib] returns the current char available in 61 the buffer or reads one if necessary (when the current character is 62 already scanned). 63 If no character can be read, sets an end of file condition and 64 returns '\000'. *) 65 66 val checked_peek_char : scanbuf -> char 67 (* Same as [Scanning.peek_char] above but always returns a valid char or 68 fails: instead of returning a null char when the reading method of the 69 input buffer has reached an end of file, the function raises exception 70 [End_of_file]. *) 71 72 val store_char : int -> scanbuf -> char -> int 73 (* [Scanning.store_char lim ib c] adds [c] to the token buffer 74 of the scanning buffer [ib]. It also advances the scanning buffer for 75 one character and returns [lim - 1], indicating the new limit for the 76 length of the current token. *) 77 78 val skip_char : int -> scanbuf -> int 79 (* [Scanning.skip_char lim ib] ignores the current character. *) 80 81 val ignore_char : int -> scanbuf -> int 82 (* [Scanning.ignore_char ib lim] ignores the current character and 83 decrements the limit. *) 84 85 val token : scanbuf -> string 86 (* [Scanning.token ib] returns the string stored into the token 87 buffer of the scanning buffer: it returns the token matched by the 88 format. *) 89 90 val reset_token : scanbuf -> unit 91 (* [Scanning.reset_token ib] resets the token buffer of 92 the given scanning buffer. *) 93 94 val char_count : scanbuf -> int 95 (* [Scanning.char_count ib] returns the number of characters 96 read so far from the given buffer. *) 97 98 val line_count : scanbuf -> int 99 (* [Scanning.line_count ib] returns the number of new line 100 characters read so far from the given buffer. *) 101 102 val token_count : scanbuf -> int 103 (* [Scanning.token_count ib] returns the number of tokens read 104 so far from [ib]. *) 105 106 val eof : scanbuf -> bool 107 (* [Scanning.eof ib] returns the end of input condition 108 of the given buffer. *) 109 110 val end_of_input : scanbuf -> bool 111 (* [Scanning.end_of_input ib] tests the end of input condition 112 of the given buffer (if no char has ever been read, an attempt to 113 read one is performed). *) 114 115 val beginning_of_input : scanbuf -> bool 116 (* [Scanning.beginning_of_input ib] tests the beginning of input 117 condition of the given buffer. *) 118 119 val name_of_input : scanbuf -> string 120 (* [Scanning.name_of_input ib] returns the name of the character 121 source for input buffer [ib]. *) 122 123 val open_in : file_name -> in_channel 124 val open_in_bin : file_name -> in_channel 125 val from_file : file_name -> in_channel 126 val from_file_bin : file_name -> in_channel 127 val from_string : string -> in_channel 128 val from_function : (unit -> char) -> in_channel 129 val from_channel : Pervasives.in_channel -> in_channel 130 131 val close_in : in_channel -> unit 132 133 val memo_from_channel : Pervasives.in_channel -> in_channel 134 (* Obsolete. *) 135 136end 137 138 139module Scanning : SCANNING = struct 140 141 (* The run-time library for scanf. *) 142 143 type file_name = string 144 145 type in_channel_name = 146 | From_channel of Pervasives.in_channel 147 | From_file of file_name * Pervasives.in_channel 148 | From_function 149 | From_string 150 151 152 type in_channel = { 153 mutable ic_eof : bool; 154 mutable ic_current_char : char; 155 mutable ic_current_char_is_valid : bool; 156 mutable ic_char_count : int; 157 mutable ic_line_count : int; 158 mutable ic_token_count : int; 159 mutable ic_get_next_char : unit -> char; 160 ic_token_buffer : Buffer.t; 161 ic_input_name : in_channel_name; 162 } 163 164 165 type scanbuf = in_channel 166 167 let null_char = '\000' 168 169 (* Reads a new character from input buffer. 170 Next_char never fails, even in case of end of input: 171 it then simply sets the end of file condition. *) 172 let next_char ib = 173 try 174 let c = ib.ic_get_next_char () in 175 ib.ic_current_char <- c; 176 ib.ic_current_char_is_valid <- true; 177 ib.ic_char_count <- succ ib.ic_char_count; 178 if c = '\n' then ib.ic_line_count <- succ ib.ic_line_count; 179 c with 180 | End_of_file -> 181 let c = null_char in 182 ib.ic_current_char <- c; 183 ib.ic_current_char_is_valid <- false; 184 ib.ic_eof <- true; 185 c 186 187 188 let peek_char ib = 189 if ib.ic_current_char_is_valid 190 then ib.ic_current_char 191 else next_char ib 192 193 194 (* Returns a valid current char for the input buffer. In particular 195 no irrelevant null character (as set by [next_char] in case of end 196 of input) is returned, since [End_of_file] is raised when 197 [next_char] sets the end of file condition while trying to read a 198 new character. *) 199 let checked_peek_char ib = 200 let c = peek_char ib in 201 if ib.ic_eof then raise End_of_file; 202 c 203 204 205 let end_of_input ib = 206 ignore (peek_char ib); 207 ib.ic_eof 208 209 210 let eof ib = ib.ic_eof 211 212 let beginning_of_input ib = ib.ic_char_count = 0 213 214 let name_of_input ib = 215 match ib.ic_input_name with 216 | From_channel _ic -> "unnamed Pervasives input channel" 217 | From_file (fname, _ic) -> fname 218 | From_function -> "unnamed function" 219 | From_string -> "unnamed character string" 220 221 222 let char_count ib = 223 if ib.ic_current_char_is_valid 224 then ib.ic_char_count - 1 225 else ib.ic_char_count 226 227 228 let line_count ib = ib.ic_line_count 229 230 let reset_token ib = Buffer.reset ib.ic_token_buffer 231 232 let invalidate_current_char ib = ib.ic_current_char_is_valid <- false 233 234 let token ib = 235 let token_buffer = ib.ic_token_buffer in 236 let tok = Buffer.contents token_buffer in 237 Buffer.clear token_buffer; 238 ib.ic_token_count <- succ ib.ic_token_count; 239 tok 240 241 242 let token_count ib = ib.ic_token_count 243 244 let skip_char width ib = 245 invalidate_current_char ib; 246 width 247 248 249 let ignore_char width ib = skip_char (width - 1) ib 250 251 let store_char width ib c = 252 Buffer.add_char ib.ic_token_buffer c; 253 ignore_char width ib 254 255 256 let default_token_buffer_size = 1024 257 258 let create iname next = { 259 ic_eof = false; 260 ic_current_char = null_char; 261 ic_current_char_is_valid = false; 262 ic_char_count = 0; 263 ic_line_count = 0; 264 ic_token_count = 0; 265 ic_get_next_char = next; 266 ic_token_buffer = Buffer.create default_token_buffer_size; 267 ic_input_name = iname; 268 } 269 270 271 let from_string s = 272 let i = ref 0 in 273 let len = String.length s in 274 let next () = 275 if !i >= len then raise End_of_file else 276 let c = s.[!i] in 277 incr i; 278 c in 279 create From_string next 280 281 282 let from_function = create From_function 283 284 (* Scanning from an input channel. *) 285 286 (* Position of the problem: 287 288 We cannot prevent the scanning mechanism to use one lookahead character, 289 if needed by the semantics of the format string specifications (e.g. a 290 trailing 'skip space' specification in the format string); in this case, 291 the mandatory lookahead character is indeed read from the input and not 292 used to return the token read. It is thus mandatory to be able to store 293 an unused lookahead character somewhere to get it as the first character 294 of the next scan. 295 296 To circumvent this problem, all the scanning functions get a low level 297 input buffer argument where they store the lookahead character when 298 needed; additionally, the input buffer is the only source of character of 299 a scanner. The [scanbuf] input buffers are defined in module {!Scanning}. 300 301 Now we understand that it is extremely important that related and 302 successive calls to scanners indeed read from the same input buffer. 303 In effect, if a scanner [scan1] is reading from [ib1] and stores an 304 unused lookahead character [c1] into its input buffer [ib1], then 305 another scanner [scan2] not reading from the same buffer [ib1] will miss 306 the character [c1], seemingly vanished in the air from the point of view 307 of [scan2]. 308 309 This mechanism works perfectly to read from strings, from files, and from 310 functions, since in those cases, allocating two buffers reading from the 311 same source is unnatural. 312 313 Still, there is a difficulty in the case of scanning from an input 314 channel. In effect, when scanning from an input channel [ic], this channel 315 may not have been allocated from within this library. Hence, it may be 316 shared (two functions of the user's program may successively read from 317 [ic]). This is highly error prone since, one of the function may seek the 318 input channel, while the other function has still an unused lookahead 319 character in its input buffer. In conclusion, you should never mix direct 320 low level reading and high level scanning from the same input channel. 321 322 *) 323 324 (* Perform bufferized input to improve efficiency. *) 325 let file_buffer_size = ref 1024 326 327 (* The scanner closes the input channel at end of input. *) 328 let scan_close_at_end ic = Pervasives.close_in ic; raise End_of_file 329 330 (* The scanner does not close the input channel at end of input: 331 it just raises [End_of_file]. *) 332 let scan_raise_at_end _ic = raise End_of_file 333 334 let from_ic scan_close_ic iname ic = 335 let len = !file_buffer_size in 336 let buf = Bytes.create len in 337 let i = ref 0 in 338 let lim = ref 0 in 339 let eof = ref false in 340 let next () = 341 if !i < !lim then begin let c = Bytes.get buf !i in incr i; c end else 342 if !eof then raise End_of_file else begin 343 lim := input ic buf 0 len; 344 if !lim = 0 then begin eof := true; scan_close_ic ic end else begin 345 i := 1; 346 Bytes.get buf 0 347 end 348 end in 349 create iname next 350 351 352 let from_ic_close_at_end = from_ic scan_close_at_end 353 let from_ic_raise_at_end = from_ic scan_raise_at_end 354 355 (* The scanning buffer reading from [Pervasives.stdin]. 356 One could try to define [stdib] as a scanning buffer reading a character 357 at a time (no bufferization at all), but unfortunately the top-level 358 interaction would be wrong. This is due to some kind of 359 'race condition' when reading from [Pervasives.stdin], 360 since the interactive compiler and [Scanf.scanf] will simultaneously 361 read the material they need from [Pervasives.stdin]; then, confusion 362 will result from what should be read by the top-level and what should be 363 read by [Scanf.scanf]. 364 This is even more complicated by the one character lookahead that 365 [Scanf.scanf] is sometimes obliged to maintain: the lookahead character 366 will be available for the next [Scanf.scanf] entry, seemingly coming from 367 nowhere. 368 Also no [End_of_file] is raised when reading from stdin: if not enough 369 characters have been read, we simply ask to read more. *) 370 let stdin = 371 from_ic scan_raise_at_end 372 (From_file ("-", Pervasives.stdin)) Pervasives.stdin 373 374 375 let stdib = stdin 376 377 let open_in_file open_in fname = 378 match fname with 379 | "-" -> stdin 380 | fname -> 381 let ic = open_in fname in 382 from_ic_close_at_end (From_file (fname, ic)) ic 383 384 385 let open_in = open_in_file Pervasives.open_in 386 let open_in_bin = open_in_file Pervasives.open_in_bin 387 388 let from_file = open_in 389 let from_file_bin = open_in_bin 390 391 let from_channel ic = 392 from_ic_raise_at_end (From_channel ic) ic 393 394 395 let close_in ib = 396 match ib.ic_input_name with 397 | From_channel ic -> 398 Pervasives.close_in ic 399 | From_file (_fname, ic) -> Pervasives.close_in ic 400 | From_function | From_string -> () 401 402 403 (* 404 Obsolete: a memo [from_channel] version to build a [Scanning.in_channel] 405 scanning buffer out of a [Pervasives.in_channel]. 406 This function was used to try to preserve the scanning 407 semantics for the (now obsolete) function [fscanf]. 408 Given that all scanner must read from a [Scanning.in_channel] scanning 409 buffer, [fscanf] must read from one! 410 More precisely, given [ic], all successive calls [fscanf ic] must read 411 from the same scanning buffer. 412 This obliged this library to allocated scanning buffers that were 413 not properly garbbage collectable, hence leading to memory leaks. 414 If you need to read from a [Pervasives.in_channel] input channel 415 [ic], simply define a [Scanning.in_channel] formatted input channel as in 416 [let ib = Scanning.from_channel ic], then use [Scanf.bscanf ib] as usual. 417 *) 418 let memo_from_ic = 419 let memo = ref [] in 420 (fun scan_close_ic ic -> 421 try List.assq ic !memo with 422 | Not_found -> 423 let ib = 424 from_ic scan_close_ic (From_channel ic) ic in 425 memo := (ic, ib) :: !memo; 426 ib) 427 428 429 (* Obsolete: see {!memo_from_ic} above. *) 430 let memo_from_channel = memo_from_ic scan_raise_at_end 431 432end 433 434 435(* Formatted input functions. *) 436 437type ('a, 'b, 'c, 'd) scanner = 438 ('a, Scanning.in_channel, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c 439 440 441(* Reporting errors. *) 442exception Scan_failure of string 443 444let bad_input s = raise (Scan_failure s) 445 446let bad_input_escape c = 447 bad_input (Printf.sprintf "illegal escape character %C" c) 448 449 450let bad_token_length message = 451 bad_input 452 (Printf.sprintf 453 "scanning of %s failed: \ 454 the specified length was too short for token" 455 message) 456 457 458let bad_end_of_input message = 459 bad_input 460 (Printf.sprintf 461 "scanning of %s failed: \ 462 premature end of file occurred before end of token" 463 message) 464 465 466let bad_float () = 467 bad_input "no dot or exponent part found in float token" 468 469 470let bad_hex_float () = 471 bad_input "not a valid float in hexadecimal notation" 472 473 474let character_mismatch_err c ci = 475 Printf.sprintf "looking for %C, found %C" c ci 476 477 478let character_mismatch c ci = 479 bad_input (character_mismatch_err c ci) 480 481 482let rec skip_whites ib = 483 let c = Scanning.peek_char ib in 484 if not (Scanning.eof ib) then begin 485 match c with 486 | ' ' | '\t' | '\n' | '\r' -> 487 Scanning.invalidate_current_char ib; skip_whites ib 488 | _ -> () 489 end 490 491 492(* Checking that [c] is indeed in the input, then skips it. 493 In this case, the character [c] has been explicitly specified in the 494 format as being mandatory in the input; hence we should fail with 495 [End_of_file] in case of end_of_input. 496 (Remember that [Scan_failure] is raised only when (we can prove by 497 evidence) that the input does not match the format string given. We must 498 thus differentiate [End_of_file] as an error due to lack of input, and 499 [Scan_failure] which is due to provably wrong input. I am not sure this is 500 worth the burden: it is complex and somehow subliminal; should be clearer 501 to fail with Scan_failure "Not enough input to complete scanning"!) 502 503 That's why, waiting for a better solution, we use checked_peek_char here. 504 We are also careful to treat "\r\n" in the input as an end of line marker: 505 it always matches a '\n' specification in the input format string. *) 506let rec check_char ib c = 507 match c with 508 | ' ' -> skip_whites ib 509 | '\n' -> check_newline ib 510 | c -> check_this_char ib c 511 512and check_this_char ib c = 513 let ci = Scanning.checked_peek_char ib in 514 if ci = c then Scanning.invalidate_current_char ib else 515 character_mismatch c ci 516 517and check_newline ib = 518 let ci = Scanning.checked_peek_char ib in 519 match ci with 520 | '\n' -> Scanning.invalidate_current_char ib 521 | '\r' -> Scanning.invalidate_current_char ib; check_this_char ib '\n' 522 | _ -> character_mismatch '\n' ci 523 524 525(* Extracting tokens from the output token buffer. *) 526 527let token_char ib = (Scanning.token ib).[0] 528 529let token_string = Scanning.token 530 531let token_bool ib = 532 match Scanning.token ib with 533 | "true" -> true 534 | "false" -> false 535 | s -> bad_input (Printf.sprintf "invalid boolean '%s'" s) 536 537 538(* The type of integer conversions. *) 539type integer_conversion = 540 | B_conversion (* Unsigned binary conversion *) 541 | D_conversion (* Signed decimal conversion *) 542 | I_conversion (* Signed integer conversion *) 543 | O_conversion (* Unsigned octal conversion *) 544 | U_conversion (* Unsigned decimal conversion *) 545 | X_conversion (* Unsigned hexadecimal conversion *) 546 547 548let integer_conversion_of_char = function 549 | 'b' -> B_conversion 550 | 'd' -> D_conversion 551 | 'i' -> I_conversion 552 | 'o' -> O_conversion 553 | 'u' -> U_conversion 554 | 'x' | 'X' -> X_conversion 555 | _ -> assert false 556 557 558(* Extract an integer literal token. 559 Since the functions Pervasives.*int*_of_string do not accept a leading +, 560 we skip it if necessary. *) 561let token_int_literal conv ib = 562 let tok = 563 match conv with 564 | D_conversion | I_conversion -> Scanning.token ib 565 | U_conversion -> "0u" ^ Scanning.token ib 566 | O_conversion -> "0o" ^ Scanning.token ib 567 | X_conversion -> "0x" ^ Scanning.token ib 568 | B_conversion -> "0b" ^ Scanning.token ib in 569 let l = String.length tok in 570 if l = 0 || tok.[0] <> '+' then tok else String.sub tok 1 (l - 1) 571 572 573(* All the functions that convert a string to a number raise the exception 574 Failure when the conversion is not possible. 575 This exception is then trapped in [kscanf]. *) 576let token_int conv ib = int_of_string (token_int_literal conv ib) 577 578let token_float ib = float_of_string (Scanning.token ib) 579 580(* To scan native ints, int32 and int64 integers. 581 We cannot access to conversions to/from strings for those types, 582 Nativeint.of_string, Int32.of_string, and Int64.of_string, 583 since those modules are not available to [Scanf]. 584 However, we can bind and use the corresponding primitives that are 585 available in the runtime. *) 586external nativeint_of_string : string -> nativeint 587 = "caml_nativeint_of_string" 588 589external int32_of_string : string -> int32 590 = "caml_int32_of_string" 591 592external int64_of_string : string -> int64 593 = "caml_int64_of_string" 594 595 596let token_nativeint conv ib = nativeint_of_string (token_int_literal conv ib) 597let token_int32 conv ib = int32_of_string (token_int_literal conv ib) 598let token_int64 conv ib = int64_of_string (token_int_literal conv ib) 599 600(* Scanning numbers. *) 601 602(* Digits scanning functions suppose that one character has been checked and 603 is available, since they return at end of file with the currently found 604 token selected. 605 606 Put it in another way, the digits scanning functions scan for a possibly 607 empty sequence of digits, (hence, a successful scanning from one of those 608 functions does not imply that the token is a well-formed number: to get a 609 true number, it is mandatory to check that at least one valid digit is 610 available before calling one of the digit scanning functions). *) 611 612(* The decimal case is treated especially for optimization purposes. *) 613let rec scan_decimal_digit_star width ib = 614 if width = 0 then width else 615 let c = Scanning.peek_char ib in 616 if Scanning.eof ib then width else 617 match c with 618 | '0' .. '9' as c -> 619 let width = Scanning.store_char width ib c in 620 scan_decimal_digit_star width ib 621 | '_' -> 622 let width = Scanning.ignore_char width ib in 623 scan_decimal_digit_star width ib 624 | _ -> width 625 626 627let scan_decimal_digit_plus width ib = 628 if width = 0 then bad_token_length "decimal digits" else 629 let c = Scanning.checked_peek_char ib in 630 match c with 631 | '0' .. '9' -> 632 let width = Scanning.store_char width ib c in 633 scan_decimal_digit_star width ib 634 | c -> 635 bad_input (Printf.sprintf "character %C is not a decimal digit" c) 636 637 638(* To scan numbers from other bases, we use a predicate argument to 639 scan digits. *) 640let scan_digit_star digitp width ib = 641 let rec scan_digits width ib = 642 if width = 0 then width else 643 let c = Scanning.peek_char ib in 644 if Scanning.eof ib then width else 645 match c with 646 | c when digitp c -> 647 let width = Scanning.store_char width ib c in 648 scan_digits width ib 649 | '_' -> 650 let width = Scanning.ignore_char width ib in 651 scan_digits width ib 652 | _ -> width in 653 scan_digits width ib 654 655 656let scan_digit_plus basis digitp width ib = 657 (* Ensure we have got enough width left, 658 and read at list one digit. *) 659 if width = 0 then bad_token_length "digits" else 660 let c = Scanning.checked_peek_char ib in 661 if digitp c then 662 let width = Scanning.store_char width ib c in 663 scan_digit_star digitp width ib 664 else 665 bad_input (Printf.sprintf "character %C is not a valid %s digit" c basis) 666 667 668let is_binary_digit = function 669 | '0' .. '1' -> true 670 | _ -> false 671 672 673let scan_binary_int = scan_digit_plus "binary" is_binary_digit 674 675let is_octal_digit = function 676 | '0' .. '7' -> true 677 | _ -> false 678 679 680let scan_octal_int = scan_digit_plus "octal" is_octal_digit 681 682let is_hexa_digit = function 683 | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true 684 | _ -> false 685 686 687let scan_hexadecimal_int = scan_digit_plus "hexadecimal" is_hexa_digit 688 689(* Scan a decimal integer. *) 690let scan_unsigned_decimal_int = scan_decimal_digit_plus 691 692let scan_sign width ib = 693 let c = Scanning.checked_peek_char ib in 694 match c with 695 | '+' -> Scanning.store_char width ib c 696 | '-' -> Scanning.store_char width ib c 697 | _ -> width 698 699 700let scan_optionally_signed_decimal_int width ib = 701 let width = scan_sign width ib in 702 scan_unsigned_decimal_int width ib 703 704 705(* Scan an unsigned integer that could be given in any (common) basis. 706 If digits are prefixed by one of 0x, 0X, 0o, or 0b, the number is 707 assumed to be written respectively in hexadecimal, hexadecimal, 708 octal, or binary. *) 709let scan_unsigned_int width ib = 710 match Scanning.checked_peek_char ib with 711 | '0' as c -> 712 let width = Scanning.store_char width ib c in 713 if width = 0 then width else 714 let c = Scanning.peek_char ib in 715 if Scanning.eof ib then width else 716 begin match c with 717 | 'x' | 'X' -> scan_hexadecimal_int (Scanning.store_char width ib c) ib 718 | 'o' -> scan_octal_int (Scanning.store_char width ib c) ib 719 | 'b' -> scan_binary_int (Scanning.store_char width ib c) ib 720 | _ -> scan_decimal_digit_star width ib end 721 | _ -> scan_unsigned_decimal_int width ib 722 723 724let scan_optionally_signed_int width ib = 725 let width = scan_sign width ib in 726 scan_unsigned_int width ib 727 728 729let scan_int_conversion conv width ib = 730 match conv with 731 | B_conversion -> scan_binary_int width ib 732 | D_conversion -> scan_optionally_signed_decimal_int width ib 733 | I_conversion -> scan_optionally_signed_int width ib 734 | O_conversion -> scan_octal_int width ib 735 | U_conversion -> scan_unsigned_decimal_int width ib 736 | X_conversion -> scan_hexadecimal_int width ib 737 738 739(* Scanning floating point numbers. *) 740 741(* Fractional part is optional and can be reduced to 0 digits. *) 742let scan_fractional_part width ib = 743 if width = 0 then width else 744 let c = Scanning.peek_char ib in 745 if Scanning.eof ib then width else 746 match c with 747 | '0' .. '9' as c -> 748 scan_decimal_digit_star (Scanning.store_char width ib c) ib 749 | _ -> width 750 751 752(* Exp part is optional and can be reduced to 0 digits. *) 753let scan_exponent_part width ib = 754 if width = 0 then width else 755 let c = Scanning.peek_char ib in 756 if Scanning.eof ib then width else 757 match c with 758 | 'e' | 'E' as c -> 759 scan_optionally_signed_decimal_int (Scanning.store_char width ib c) ib 760 | _ -> width 761 762 763(* Scan the integer part of a floating point number, (not using the 764 OCaml lexical convention since the integer part can be empty): 765 an optional sign, followed by a possibly empty sequence of decimal 766 digits (e.g. -.1). *) 767let scan_integer_part width ib = 768 let width = scan_sign width ib in 769 scan_decimal_digit_star width ib 770 771 772(* 773 For the time being we have (as found in scanf.mli): 774 the field width is composed of an optional integer literal 775 indicating the maximal width of the token to read. 776 Unfortunately, the type-checker let the user write an optional precision, 777 since this is valid for printf format strings. 778 779 Thus, the next step for Scanf is to support a full width and precision 780 indication, more or less similar to the one for printf, possibly extended 781 to the specification of a [max, min] range for the width of the token read 782 for strings. Something like the following spec for scanf.mli: 783 784 The optional [width] is an integer indicating the maximal 785 width of the token read. For instance, [%6d] reads an integer, 786 having at most 6 characters. 787 788 The optional [precision] is a dot [.] followed by an integer: 789 790 - in the floating point number conversions ([%f], [%e], [%g], [%F], [%E], 791 and [%F] conversions, the [precision] indicates the maximum number of 792 digits that may follow the decimal point. For instance, [%.4f] reads a 793 [float] with at most 4 fractional digits, 794 795 - in the string conversions ([%s], [%S], [%\[ range \]]), and in the 796 integer number conversions ([%i], [%d], [%u], [%x], [%o], and their 797 [int32], [int64], and [native_int] correspondent), the [precision] 798 indicates the required minimum width of the token read, 799 800 - on all other conversions, the width and precision specify the [max, min] 801 range for the width of the token read. 802*) 803let scan_float width precision ib = 804 let width = scan_integer_part width ib in 805 if width = 0 then width, precision else 806 let c = Scanning.peek_char ib in 807 if Scanning.eof ib then width, precision else 808 match c with 809 | '.' -> 810 let width = Scanning.store_char width ib c in 811 let precision = min width precision in 812 let width = width - (precision - scan_fractional_part precision ib) in 813 scan_exponent_part width ib, precision 814 | _ -> 815 scan_exponent_part width ib, precision 816 817 818let check_case_insensitive_string width ib error str = 819 let lowercase c = 820 match c with 821 | 'A' .. 'Z' -> 822 char_of_int (int_of_char c - int_of_char 'A' + int_of_char 'a') 823 | _ -> c in 824 let len = String.length str in 825 let width = ref width in 826 for i = 0 to len - 1 do 827 let c = Scanning.peek_char ib in 828 if lowercase c <> lowercase str.[i] then error (); 829 if !width = 0 then error (); 830 width := Scanning.store_char !width ib c; 831 done; 832 !width 833 834 835let scan_hex_float width precision ib = 836 if width = 0 || Scanning.end_of_input ib then bad_hex_float (); 837 let width = scan_sign width ib in 838 if width = 0 || Scanning.end_of_input ib then bad_hex_float (); 839 match Scanning.peek_char ib with 840 | '0' as c -> ( 841 let width = Scanning.store_char width ib c in 842 if width = 0 || Scanning.end_of_input ib then bad_hex_float (); 843 let width = check_case_insensitive_string width ib bad_hex_float "x" in 844 if width = 0 || Scanning.end_of_input ib then width else 845 let width = match Scanning.peek_char ib with 846 | '.' | 'p' | 'P' -> width 847 | _ -> scan_hexadecimal_int width ib in 848 if width = 0 || Scanning.end_of_input ib then width else 849 let width = match Scanning.peek_char ib with 850 | '.' as c -> ( 851 let width = Scanning.store_char width ib c in 852 if width = 0 || Scanning.end_of_input ib then width else 853 match Scanning.peek_char ib with 854 | 'p' | 'P' -> width 855 | _ -> 856 let precision = min width precision in 857 width - (precision - scan_hexadecimal_int precision ib) 858 ) 859 | _ -> width in 860 if width = 0 || Scanning.end_of_input ib then width else 861 match Scanning.peek_char ib with 862 | 'p' | 'P' as c -> 863 let width = Scanning.store_char width ib c in 864 if width = 0 || Scanning.end_of_input ib then bad_hex_float (); 865 scan_optionally_signed_decimal_int width ib 866 | _ -> width 867 ) 868 | 'n' | 'N' as c -> 869 let width = Scanning.store_char width ib c in 870 if width = 0 || Scanning.end_of_input ib then bad_hex_float (); 871 check_case_insensitive_string width ib bad_hex_float "an" 872 | 'i' | 'I' as c -> 873 let width = Scanning.store_char width ib c in 874 if width = 0 || Scanning.end_of_input ib then bad_hex_float (); 875 check_case_insensitive_string width ib bad_hex_float "nfinity" 876 | _ -> bad_hex_float () 877 878 879let scan_caml_float_rest width precision ib = 880 if width = 0 || Scanning.end_of_input ib then bad_float (); 881 let width = scan_decimal_digit_star width ib in 882 if width = 0 || Scanning.end_of_input ib then bad_float (); 883 let c = Scanning.peek_char ib in 884 match c with 885 | '.' -> 886 let width = Scanning.store_char width ib c in 887 (* The effective width available for scanning the fractional part is 888 the minimum of declared precision and width left. *) 889 let precision = min width precision in 890 (* After scanning the fractional part with [precision] provisional width, 891 [width_precision] is left. *) 892 let width_precision = scan_fractional_part precision ib in 893 (* Hence, scanning the fractional part took exactly 894 [precision - width_precision] chars. *) 895 let frac_width = precision - width_precision in 896 (* And new provisional width is [width - width_precision. *) 897 let width = width - frac_width in 898 scan_exponent_part width ib 899 | 'e' | 'E' -> 900 scan_exponent_part width ib 901 | _ -> bad_float () 902 903 904let scan_caml_float width precision ib = 905 if width = 0 || Scanning.end_of_input ib then bad_float (); 906 let width = scan_sign width ib in 907 if width = 0 || Scanning.end_of_input ib then bad_float (); 908 match Scanning.peek_char ib with 909 | '0' as c -> ( 910 let width = Scanning.store_char width ib c in 911 if width = 0 || Scanning.end_of_input ib then bad_float (); 912 match Scanning.peek_char ib with 913 | 'x' | 'X' as c -> ( 914 let width = Scanning.store_char width ib c in 915 if width = 0 || Scanning.end_of_input ib then bad_float (); 916 let width = scan_hexadecimal_int width ib in 917 if width = 0 || Scanning.end_of_input ib then bad_float (); 918 let width = match Scanning.peek_char ib with 919 | '.' as c -> ( 920 let width = Scanning.store_char width ib c in 921 if width = 0 || Scanning.end_of_input ib then width else 922 match Scanning.peek_char ib with 923 | 'p' | 'P' -> width 924 | _ -> 925 let precision = min width precision in 926 width - (precision - scan_hexadecimal_int precision ib) 927 ) 928 | 'p' | 'P' -> width 929 | _ -> bad_float () in 930 if width = 0 || Scanning.end_of_input ib then width else 931 match Scanning.peek_char ib with 932 | 'p' | 'P' as c -> 933 let width = Scanning.store_char width ib c in 934 if width = 0 || Scanning.end_of_input ib then bad_hex_float (); 935 scan_optionally_signed_decimal_int width ib 936 | _ -> width 937 ) 938 | _ -> 939 scan_caml_float_rest width precision ib 940 ) 941 | '1' .. '9' as c -> 942 let width = Scanning.store_char width ib c in 943 if width = 0 || Scanning.end_of_input ib then bad_float (); 944 scan_caml_float_rest width precision ib 945(* Special case of nan and infinity: 946 | 'i' -> 947 | 'n' -> 948*) 949 | _ -> bad_float () 950 951 952(* Scan a regular string: 953 stops when encountering a space, if no scanning indication has been given; 954 otherwise, stops when encountering the characters in the scanning 955 indication [stp]. 956 It also stops at end of file or when the maximum number of characters has 957 been read. *) 958let scan_string stp width ib = 959 let rec loop width = 960 if width = 0 then width else 961 let c = Scanning.peek_char ib in 962 if Scanning.eof ib then width else 963 match stp with 964 | Some c' when c = c' -> Scanning.skip_char width ib 965 | Some _ -> loop (Scanning.store_char width ib c) 966 | None -> 967 match c with 968 | ' ' | '\t' | '\n' | '\r' -> width 969 | _ -> loop (Scanning.store_char width ib c) in 970 loop width 971 972 973(* Scan a char: peek strictly one character in the input, whatsoever. *) 974let scan_char width ib = 975 (* The case width = 0 could not happen here, since it is tested before 976 calling scan_char, in the main scanning function. 977 if width = 0 then bad_token_length "a character" else *) 978 Scanning.store_char width ib (Scanning.checked_peek_char ib) 979 980 981let char_for_backslash = function 982 | 'n' -> '\010' 983 | 'r' -> '\013' 984 | 'b' -> '\008' 985 | 't' -> '\009' 986 | c -> c 987 988 989(* The integer value corresponding to the facial value of a valid 990 decimal digit character. *) 991let decimal_value_of_char c = int_of_char c - int_of_char '0' 992 993let char_for_decimal_code c0 c1 c2 = 994 let c = 995 100 * decimal_value_of_char c0 + 996 10 * decimal_value_of_char c1 + 997 decimal_value_of_char c2 in 998 if c < 0 || c > 255 then 999 bad_input 1000 (Printf.sprintf 1001 "bad character decimal encoding \\%c%c%c" c0 c1 c2) else 1002 char_of_int c 1003 1004 1005(* The integer value corresponding to the facial value of a valid 1006 hexadecimal digit character. *) 1007let hexadecimal_value_of_char c = 1008 let d = int_of_char c in 1009 (* Could also be: 1010 if d <= int_of_char '9' then d - int_of_char '0' else 1011 if d <= int_of_char 'F' then 10 + d - int_of_char 'A' else 1012 if d <= int_of_char 'f' then 10 + d - int_of_char 'a' else assert false 1013 *) 1014 if d >= int_of_char 'a' then 1015 d - 87 (* 10 + int_of_char c - int_of_char 'a' *) else 1016 if d >= int_of_char 'A' then 1017 d - 55 (* 10 + int_of_char c - int_of_char 'A' *) else 1018 d - int_of_char '0' 1019 1020 1021let char_for_hexadecimal_code c1 c2 = 1022 let c = 1023 16 * hexadecimal_value_of_char c1 + 1024 hexadecimal_value_of_char c2 in 1025 if c < 0 || c > 255 then 1026 bad_input 1027 (Printf.sprintf "bad character hexadecimal encoding \\%c%c" c1 c2) else 1028 char_of_int c 1029 1030 1031(* Called in particular when encountering '\\' as starter of a char. 1032 Stops before the corresponding '\''. *) 1033let check_next_char message width ib = 1034 if width = 0 then bad_token_length message else 1035 let c = Scanning.peek_char ib in 1036 if Scanning.eof ib then bad_end_of_input message else 1037 c 1038 1039 1040let check_next_char_for_char = check_next_char "a Char" 1041let check_next_char_for_string = check_next_char "a String" 1042 1043let scan_backslash_char width ib = 1044 match check_next_char_for_char width ib with 1045 | '\\' | '\'' | '\"' | 'n' | 't' | 'b' | 'r' as c -> 1046 Scanning.store_char width ib (char_for_backslash c) 1047 | '0' .. '9' as c -> 1048 let get_digit () = 1049 let c = Scanning.next_char ib in 1050 match c with 1051 | '0' .. '9' as c -> c 1052 | c -> bad_input_escape c in 1053 let c0 = c in 1054 let c1 = get_digit () in 1055 let c2 = get_digit () in 1056 Scanning.store_char (width - 2) ib (char_for_decimal_code c0 c1 c2) 1057 | 'x' -> 1058 let get_digit () = 1059 let c = Scanning.next_char ib in 1060 match c with 1061 | '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' as c -> c 1062 | c -> bad_input_escape c in 1063 let c1 = get_digit () in 1064 let c2 = get_digit () in 1065 Scanning.store_char (width - 2) ib (char_for_hexadecimal_code c1 c2) 1066 | c -> 1067 bad_input_escape c 1068 1069 1070(* Scan a character (an OCaml token). *) 1071let scan_caml_char width ib = 1072 1073 let rec find_start width = 1074 match Scanning.checked_peek_char ib with 1075 | '\'' -> find_char (Scanning.ignore_char width ib) 1076 | c -> character_mismatch '\'' c 1077 1078 and find_char width = 1079 match check_next_char_for_char width ib with 1080 | '\\' -> 1081 find_stop (scan_backslash_char (Scanning.ignore_char width ib) ib) 1082 | c -> 1083 find_stop (Scanning.store_char width ib c) 1084 1085 and find_stop width = 1086 match check_next_char_for_char width ib with 1087 | '\'' -> Scanning.ignore_char width ib 1088 | c -> character_mismatch '\'' c in 1089 1090 find_start width 1091 1092 1093(* Scan a delimited string (an OCaml token). *) 1094let scan_caml_string width ib = 1095 1096 let rec find_start width = 1097 match Scanning.checked_peek_char ib with 1098 | '\"' -> find_stop (Scanning.ignore_char width ib) 1099 | c -> character_mismatch '\"' c 1100 1101 and find_stop width = 1102 match check_next_char_for_string width ib with 1103 | '\"' -> Scanning.ignore_char width ib 1104 | '\\' -> scan_backslash (Scanning.ignore_char width ib) 1105 | c -> find_stop (Scanning.store_char width ib c) 1106 1107 and scan_backslash width = 1108 match check_next_char_for_string width ib with 1109 | '\r' -> skip_newline (Scanning.ignore_char width ib) 1110 | '\n' -> skip_spaces (Scanning.ignore_char width ib) 1111 | _ -> find_stop (scan_backslash_char width ib) 1112 1113 and skip_newline width = 1114 match check_next_char_for_string width ib with 1115 | '\n' -> skip_spaces (Scanning.ignore_char width ib) 1116 | _ -> find_stop (Scanning.store_char width ib '\r') 1117 1118 and skip_spaces width = 1119 match check_next_char_for_string width ib with 1120 | ' ' -> skip_spaces (Scanning.ignore_char width ib) 1121 | _ -> find_stop width in 1122 1123 find_start width 1124 1125 1126(* Scan a boolean (an OCaml token). *) 1127let scan_bool ib = 1128 let c = Scanning.checked_peek_char ib in 1129 let m = 1130 match c with 1131 | 't' -> 4 1132 | 'f' -> 5 1133 | c -> 1134 bad_input 1135 (Printf.sprintf "the character %C cannot start a boolean" c) in 1136 scan_string None m ib 1137 1138 1139(* Scan a string containing elements in char_set and terminated by scan_indic 1140 if provided. *) 1141let scan_chars_in_char_set char_set scan_indic width ib = 1142 let rec scan_chars i stp = 1143 let c = Scanning.peek_char ib in 1144 if i > 0 && not (Scanning.eof ib) && 1145 is_in_char_set char_set c && 1146 int_of_char c <> stp then 1147 let _ = Scanning.store_char max_int ib c in 1148 scan_chars (i - 1) stp in 1149 match scan_indic with 1150 | None -> scan_chars width (-1); 1151 | Some c -> 1152 scan_chars width (int_of_char c); 1153 if not (Scanning.eof ib) then 1154 let ci = Scanning.peek_char ib in 1155 if c = ci 1156 then Scanning.invalidate_current_char ib 1157 else character_mismatch c ci 1158 1159 1160(* The global error report function for [Scanf]. *) 1161let scanf_bad_input ib = function 1162 | Scan_failure s | Failure s -> 1163 let i = Scanning.char_count ib in 1164 bad_input (Printf.sprintf "scanf: bad input at char number %i: %s" i s) 1165 | x -> raise x 1166 1167 1168(* Get the content of a counter from an input buffer. *) 1169let get_counter ib counter = 1170 match counter with 1171 | Line_counter -> Scanning.line_count ib 1172 | Char_counter -> Scanning.char_count ib 1173 | Token_counter -> Scanning.token_count ib 1174 1175 1176(* Compute the width of a padding option (see "%42{" and "%123("). *) 1177let width_of_pad_opt pad_opt = match pad_opt with 1178 | None -> max_int 1179 | Some width -> width 1180 1181 1182let stopper_of_formatting_lit fmting = 1183 if fmting = Escaped_percent then '%', "" else 1184 let str = string_of_formatting_lit fmting in 1185 let stp = str.[1] in 1186 let sub_str = String.sub str 2 (String.length str - 2) in 1187 stp, sub_str 1188 1189 1190(******************************************************************************) 1191 (* Readers managment *) 1192 1193(* A call to take_format_readers on a format is evaluated into functions 1194 taking readers as arguments and aggregate them into an heterogeneous list *) 1195(* When all readers are taken, finally pass the list of the readers to the 1196 continuation k. *) 1197let rec take_format_readers : type a c d e f . 1198 ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, e, f) fmt -> 1199 d = 1200fun k fmt -> match fmt with 1201 | Reader fmt_rest -> 1202 fun reader -> 1203 let new_k readers_rest = k (Cons (reader, readers_rest)) in 1204 take_format_readers new_k fmt_rest 1205 | Char rest -> take_format_readers k rest 1206 | Caml_char rest -> take_format_readers k rest 1207 | String (_, rest) -> take_format_readers k rest 1208 | Caml_string (_, rest) -> take_format_readers k rest 1209 | Int (_, _, _, rest) -> take_format_readers k rest 1210 | Int32 (_, _, _, rest) -> take_format_readers k rest 1211 | Nativeint (_, _, _, rest) -> take_format_readers k rest 1212 | Int64 (_, _, _, rest) -> take_format_readers k rest 1213 | Float (_, _, _, rest) -> take_format_readers k rest 1214 | Bool rest -> take_format_readers k rest 1215 | Alpha rest -> take_format_readers k rest 1216 | Theta rest -> take_format_readers k rest 1217 | Flush rest -> take_format_readers k rest 1218 | String_literal (_, rest) -> take_format_readers k rest 1219 | Char_literal (_, rest) -> take_format_readers k rest 1220 | Custom (_, _, rest) -> take_format_readers k rest 1221 1222 | Scan_char_set (_, _, rest) -> take_format_readers k rest 1223 | Scan_get_counter (_, rest) -> take_format_readers k rest 1224 | Scan_next_char rest -> take_format_readers k rest 1225 1226 | Formatting_lit (_, rest) -> take_format_readers k rest 1227 | Formatting_gen (Open_tag (Format (fmt, _)), rest) -> 1228 take_format_readers k (concat_fmt fmt rest) 1229 | Formatting_gen (Open_box (Format (fmt, _)), rest) -> 1230 take_format_readers k (concat_fmt fmt rest) 1231 1232 | Format_arg (_, _, rest) -> take_format_readers k rest 1233 | Format_subst (_, fmtty, rest) -> 1234 take_fmtty_format_readers k (erase_rel (symm fmtty)) rest 1235 | Ignored_param (ign, rest) -> take_ignored_format_readers k ign rest 1236 1237 | End_of_format -> k Nil 1238 1239(* Take readers associated to an fmtty coming from a Format_subst "%(...%)". *) 1240and take_fmtty_format_readers : type x y a c d e f . 1241 ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) fmtty -> 1242 (y, Scanning.in_channel, c, x, e, f) fmt -> d = 1243fun k fmtty fmt -> match fmtty with 1244 | Reader_ty fmt_rest -> 1245 fun reader -> 1246 let new_k readers_rest = k (Cons (reader, readers_rest)) in 1247 take_fmtty_format_readers new_k fmt_rest fmt 1248 | Ignored_reader_ty fmt_rest -> 1249 fun reader -> 1250 let new_k readers_rest = k (Cons (reader, readers_rest)) in 1251 take_fmtty_format_readers new_k fmt_rest fmt 1252 | Char_ty rest -> take_fmtty_format_readers k rest fmt 1253 | String_ty rest -> take_fmtty_format_readers k rest fmt 1254 | Int_ty rest -> take_fmtty_format_readers k rest fmt 1255 | Int32_ty rest -> take_fmtty_format_readers k rest fmt 1256 | Nativeint_ty rest -> take_fmtty_format_readers k rest fmt 1257 | Int64_ty rest -> take_fmtty_format_readers k rest fmt 1258 | Float_ty rest -> take_fmtty_format_readers k rest fmt 1259 | Bool_ty rest -> take_fmtty_format_readers k rest fmt 1260 | Alpha_ty rest -> take_fmtty_format_readers k rest fmt 1261 | Theta_ty rest -> take_fmtty_format_readers k rest fmt 1262 | Any_ty rest -> take_fmtty_format_readers k rest fmt 1263 | Format_arg_ty (_, rest) -> take_fmtty_format_readers k rest fmt 1264 | End_of_fmtty -> take_format_readers k fmt 1265 | Format_subst_ty (ty1, ty2, rest) -> 1266 let ty = trans (symm ty1) ty2 in 1267 take_fmtty_format_readers k (concat_fmtty ty rest) fmt 1268 1269(* Take readers associated to an ignored parameter. *) 1270and take_ignored_format_readers : type x y a c d e f . 1271 ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) ignored -> 1272 (y, Scanning.in_channel, c, x, e, f) fmt -> d = 1273fun k ign fmt -> match ign with 1274 | Ignored_reader -> 1275 fun reader -> 1276 let new_k readers_rest = k (Cons (reader, readers_rest)) in 1277 take_format_readers new_k fmt 1278 | Ignored_char -> take_format_readers k fmt 1279 | Ignored_caml_char -> take_format_readers k fmt 1280 | Ignored_string _ -> take_format_readers k fmt 1281 | Ignored_caml_string _ -> take_format_readers k fmt 1282 | Ignored_int (_, _) -> take_format_readers k fmt 1283 | Ignored_int32 (_, _) -> take_format_readers k fmt 1284 | Ignored_nativeint (_, _) -> take_format_readers k fmt 1285 | Ignored_int64 (_, _) -> take_format_readers k fmt 1286 | Ignored_float (_, _) -> take_format_readers k fmt 1287 | Ignored_bool -> take_format_readers k fmt 1288 | Ignored_format_arg _ -> take_format_readers k fmt 1289 | Ignored_format_subst (_, fmtty) -> take_fmtty_format_readers k fmtty fmt 1290 | Ignored_scan_char_set _ -> take_format_readers k fmt 1291 | Ignored_scan_get_counter _ -> take_format_readers k fmt 1292 | Ignored_scan_next_char -> take_format_readers k fmt 1293 1294(******************************************************************************) 1295 (* Generic scanning *) 1296 1297(* Make a generic scanning function. *) 1298(* Scan a stream according to a format and readers obtained by 1299 take_format_readers, and aggegate scanned values into an 1300 heterogeneous list. *) 1301(* Return the heterogeneous list of scanned values. *) 1302let rec make_scanf : type a c d e f. 1303 Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt -> 1304 (d, e) heter_list -> (a, f) heter_list = 1305fun ib fmt readers -> match fmt with 1306 | Char rest -> 1307 let _ = scan_char 0 ib in 1308 let c = token_char ib in 1309 Cons (c, make_scanf ib rest readers) 1310 | Caml_char rest -> 1311 let _ = scan_caml_char 0 ib in 1312 let c = token_char ib in 1313 Cons (c, make_scanf ib rest readers) 1314 1315 | String (pad, Formatting_lit (fmting_lit, rest)) -> 1316 let stp, str = stopper_of_formatting_lit fmting_lit in 1317 let scan width _ ib = scan_string (Some stp) width ib in 1318 let str_rest = String_literal (str, rest) in 1319 pad_prec_scanf ib str_rest readers pad No_precision scan token_string 1320 | String (pad, Formatting_gen (Open_tag (Format (fmt', _)), rest)) -> 1321 let scan width _ ib = scan_string (Some '{') width ib in 1322 pad_prec_scanf ib (concat_fmt fmt' rest) readers pad No_precision scan 1323 token_string 1324 | String (pad, Formatting_gen (Open_box (Format (fmt', _)), rest)) -> 1325 let scan width _ ib = scan_string (Some '[') width ib in 1326 pad_prec_scanf ib (concat_fmt fmt' rest) readers pad No_precision scan 1327 token_string 1328 | String (pad, rest) -> 1329 let scan width _ ib = scan_string None width ib in 1330 pad_prec_scanf ib rest readers pad No_precision scan token_string 1331 1332 | Caml_string (pad, rest) -> 1333 let scan width _ ib = scan_caml_string width ib in 1334 pad_prec_scanf ib rest readers pad No_precision scan token_string 1335 | Int (iconv, pad, prec, rest) -> 1336 let c = integer_conversion_of_char (char_of_iconv iconv) in 1337 let scan width _ ib = scan_int_conversion c width ib in 1338 pad_prec_scanf ib rest readers pad prec scan (token_int c) 1339 | Int32 (iconv, pad, prec, rest) -> 1340 let c = integer_conversion_of_char (char_of_iconv iconv) in 1341 let scan width _ ib = scan_int_conversion c width ib in 1342 pad_prec_scanf ib rest readers pad prec scan (token_int32 c) 1343 | Nativeint (iconv, pad, prec, rest) -> 1344 let c = integer_conversion_of_char (char_of_iconv iconv) in 1345 let scan width _ ib = scan_int_conversion c width ib in 1346 pad_prec_scanf ib rest readers pad prec scan (token_nativeint c) 1347 | Int64 (iconv, pad, prec, rest) -> 1348 let c = integer_conversion_of_char (char_of_iconv iconv) in 1349 let scan width _ ib = scan_int_conversion c width ib in 1350 pad_prec_scanf ib rest readers pad prec scan (token_int64 c) 1351 | Float (Float_F, pad, prec, rest) -> 1352 pad_prec_scanf ib rest readers pad prec scan_caml_float token_float 1353 | Float ((Float_f | Float_pf | Float_sf | Float_e | Float_pe | Float_se 1354 | Float_E | Float_pE | Float_sE | Float_g | Float_pg | Float_sg 1355 | Float_G | Float_pG | Float_sG), pad, prec, rest) -> 1356 pad_prec_scanf ib rest readers pad prec scan_float token_float 1357 | Float ((Float_h | Float_ph | Float_sh | Float_H | Float_pH | Float_sH), 1358 pad, prec, rest) -> 1359 pad_prec_scanf ib rest readers pad prec scan_hex_float token_float 1360 | Bool rest -> 1361 let _ = scan_bool ib in 1362 let b = token_bool ib in 1363 Cons (b, make_scanf ib rest readers) 1364 | Alpha _ -> 1365 invalid_arg "scanf: bad conversion \"%a\"" 1366 | Theta _ -> 1367 invalid_arg "scanf: bad conversion \"%t\"" 1368 | Custom _ -> 1369 invalid_arg "scanf: bad conversion \"%?\" (custom converter)" 1370 | Reader fmt_rest -> 1371 begin match readers with 1372 | Cons (reader, readers_rest) -> 1373 let x = reader ib in 1374 Cons (x, make_scanf ib fmt_rest readers_rest) 1375 | Nil -> 1376 invalid_arg "scanf: missing reader" 1377 end 1378 | Flush rest -> 1379 if Scanning.end_of_input ib then make_scanf ib rest readers 1380 else bad_input "end of input not found" 1381 1382 | String_literal (str, rest) -> 1383 String.iter (check_char ib) str; 1384 make_scanf ib rest readers 1385 | Char_literal (chr, rest) -> 1386 check_char ib chr; 1387 make_scanf ib rest readers 1388 1389 | Format_arg (pad_opt, fmtty, rest) -> 1390 let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in 1391 let s = token_string ib in 1392 let fmt = 1393 try format_of_string_fmtty s fmtty 1394 with Failure msg -> bad_input msg 1395 in 1396 Cons (fmt, make_scanf ib rest readers) 1397 | Format_subst (pad_opt, fmtty, rest) -> 1398 let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in 1399 let s = token_string ib in 1400 let fmt, fmt' = 1401 try 1402 let Fmt_EBB fmt = fmt_ebb_of_string s in 1403 let Fmt_EBB fmt' = fmt_ebb_of_string s in 1404 (* TODO: find a way to avoid reparsing twice *) 1405 1406 (* TODO: these type-checks below *can* fail because of type 1407 ambiguity in presence of ignored-readers: "%_r%d" and "%d%_r" 1408 are typed in the same way. 1409 1410 # Scanf.sscanf "\"%_r%d\"3" "%(%d%_r%)" ignore 1411 (fun fmt n -> string_of_format fmt, n) 1412 Exception: CamlinternalFormat.Type_mismatch. 1413 1414 We should properly catch this exception. 1415 *) 1416 type_format fmt (erase_rel fmtty), 1417 type_format fmt' (erase_rel (symm fmtty)) 1418 with Failure msg -> bad_input msg 1419 in 1420 Cons (Format (fmt, s), 1421 make_scanf ib (concat_fmt fmt' rest) readers) 1422 1423 | Scan_char_set (width_opt, char_set, Formatting_lit (fmting_lit, rest)) -> 1424 let stp, str = stopper_of_formatting_lit fmting_lit in 1425 let width = width_of_pad_opt width_opt in 1426 scan_chars_in_char_set char_set (Some stp) width ib; 1427 let s = token_string ib in 1428 let str_rest = String_literal (str, rest) in 1429 Cons (s, make_scanf ib str_rest readers) 1430 | Scan_char_set (width_opt, char_set, rest) -> 1431 let width = width_of_pad_opt width_opt in 1432 scan_chars_in_char_set char_set None width ib; 1433 let s = token_string ib in 1434 Cons (s, make_scanf ib rest readers) 1435 | Scan_get_counter (counter, rest) -> 1436 let count = get_counter ib counter in 1437 Cons (count, make_scanf ib rest readers) 1438 | Scan_next_char rest -> 1439 let c = Scanning.checked_peek_char ib in 1440 Cons (c, make_scanf ib rest readers) 1441 1442 | Formatting_lit (formatting_lit, rest) -> 1443 String.iter (check_char ib) (string_of_formatting_lit formatting_lit); 1444 make_scanf ib rest readers 1445 | Formatting_gen (Open_tag (Format (fmt', _)), rest) -> 1446 check_char ib '@'; check_char ib '{'; 1447 make_scanf ib (concat_fmt fmt' rest) readers 1448 | Formatting_gen (Open_box (Format (fmt', _)), rest) -> 1449 check_char ib '@'; check_char ib '['; 1450 make_scanf ib (concat_fmt fmt' rest) readers 1451 1452 | Ignored_param (ign, rest) -> 1453 let Param_format_EBB fmt' = param_format_of_ignored_format ign rest in 1454 begin match make_scanf ib fmt' readers with 1455 | Cons (_, arg_rest) -> arg_rest 1456 | Nil -> assert false 1457 end 1458 1459 | End_of_format -> 1460 Nil 1461 1462(* Case analysis on padding and precision. *) 1463(* Reject formats containing "%*" or "%.*". *) 1464(* Pass padding and precision to the generic scanner `scan'. *) 1465and pad_prec_scanf : type a c d e f x y z t . 1466 Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt -> 1467 (d, e) heter_list -> (x, y) padding -> (y, z -> a) precision -> 1468 (int -> int -> Scanning.in_channel -> t) -> 1469 (Scanning.in_channel -> z) -> 1470 (x, f) heter_list = 1471fun ib fmt readers pad prec scan token -> match pad, prec with 1472 | No_padding, No_precision -> 1473 let _ = scan max_int max_int ib in 1474 let x = token ib in 1475 Cons (x, make_scanf ib fmt readers) 1476 | No_padding, Lit_precision p -> 1477 let _ = scan max_int p ib in 1478 let x = token ib in 1479 Cons (x, make_scanf ib fmt readers) 1480 | Lit_padding ((Right | Zeros), w), No_precision -> 1481 let _ = scan w max_int ib in 1482 let x = token ib in 1483 Cons (x, make_scanf ib fmt readers) 1484 | Lit_padding ((Right | Zeros), w), Lit_precision p -> 1485 let _ = scan w p ib in 1486 let x = token ib in 1487 Cons (x, make_scanf ib fmt readers) 1488 | Lit_padding (Left, _), _ -> 1489 invalid_arg "scanf: bad conversion \"%-\"" 1490 | Lit_padding ((Right | Zeros), _), Arg_precision -> 1491 invalid_arg "scanf: bad conversion \"%*\"" 1492 | Arg_padding _, _ -> 1493 invalid_arg "scanf: bad conversion \"%*\"" 1494 | No_padding, Arg_precision -> 1495 invalid_arg "scanf: bad conversion \"%*\"" 1496 1497(******************************************************************************) 1498 (* Defining [scanf] and various flavors of [scanf] *) 1499 1500type 'a kscanf_result = Args of 'a | Exc of exn 1501 1502let kscanf ib ef (Format (fmt, str)) = 1503 let rec apply : type a b . a -> (a, b) heter_list -> b = 1504 fun f args -> match args with 1505 | Cons (x, r) -> apply (f x) r 1506 | Nil -> f 1507 in 1508 let k readers f = 1509 Scanning.reset_token ib; 1510 match try Args (make_scanf ib fmt readers) with 1511 | (Scan_failure _ | Failure _ | End_of_file) as exc -> Exc exc 1512 | Invalid_argument msg -> 1513 invalid_arg (msg ^ " in format \"" ^ String.escaped str ^ "\"") 1514 with 1515 | Args args -> apply f args 1516 | Exc exc -> ef ib exc 1517 in 1518 take_format_readers k fmt 1519 1520(***) 1521 1522let kbscanf = kscanf 1523let bscanf ib fmt = kbscanf ib scanf_bad_input fmt 1524 1525let ksscanf s ef fmt = kbscanf (Scanning.from_string s) ef fmt 1526let sscanf s fmt = kbscanf (Scanning.from_string s) scanf_bad_input fmt 1527 1528let scanf fmt = kscanf Scanning.stdib scanf_bad_input fmt 1529 1530(***) 1531 1532(* Scanning format strings. *) 1533let bscanf_format : 1534 Scanning.in_channel -> ('a, 'b, 'c, 'd, 'e, 'f) format6 -> 1535 (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g = 1536 fun ib format f -> 1537 let _ = scan_caml_string max_int ib in 1538 let str = token_string ib in 1539 let fmt' = 1540 try format_of_string_format str format 1541 with Failure msg -> bad_input msg in 1542 f fmt' 1543 1544 1545let sscanf_format : 1546 string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 -> 1547 (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g = 1548 fun s format f -> bscanf_format (Scanning.from_string s) format f 1549 1550 1551let string_to_String s = 1552 let l = String.length s in 1553 let b = Buffer.create (l + 2) in 1554 Buffer.add_char b '\"'; 1555 for i = 0 to l - 1 do 1556 let c = s.[i] in 1557 if c = '\"' then Buffer.add_char b '\\'; 1558 Buffer.add_char b c; 1559 done; 1560 Buffer.add_char b '\"'; 1561 Buffer.contents b 1562 1563 1564let format_from_string s fmt = 1565 sscanf_format (string_to_String s) fmt (fun x -> x) 1566 1567 1568let unescaped s = 1569 sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x) 1570 1571 1572(* Deprecated *) 1573let kfscanf ic ef fmt = kbscanf (Scanning.memo_from_channel ic) ef fmt 1574let fscanf ic fmt = kscanf (Scanning.memo_from_channel ic) scanf_bad_input fmt 1575