1(**************************************************************************)
2(*                                                                        *)
3(*                                 OCaml                                  *)
4(*                                                                        *)
5(*             Pierre Weis, projet Cristal, INRIA Rocquencourt            *)
6(*                                                                        *)
7(*   Copyright 2002 Institut National de Recherche en Informatique et     *)
8(*     en Automatique.                                                    *)
9(*                                                                        *)
10(*   All rights reserved.  This file is distributed under the terms of    *)
11(*   the GNU Lesser General Public License version 2.1, with the          *)
12(*   special exception on linking described in the file LICENSE.          *)
13(*                                                                        *)
14(**************************************************************************)
15
16open CamlinternalFormatBasics
17open CamlinternalFormat
18
19(* alias to avoid warning for ambiguity between
20   Pervasives.format6
21   and CamlinternalFormatBasics.format6
22
23   (the former is in fact an alias for the latter,
24    but the ambiguity warning doesn't care)
25*)
26type ('a, 'b, 'c, 'd, 'e, 'f) format6 =
27  ('a, 'b, 'c, 'd, 'e, 'f) Pervasives.format6
28
29
30(* The run-time library for scanners. *)
31
32(* Scanning buffers. *)
33module type SCANNING = sig
34
35  type in_channel
36
37  type scanbuf = in_channel
38
39  type file_name = string
40
41  val stdin : in_channel
42  (* The scanning buffer reading from [Pervasives.stdin].
43     [stdib] is equivalent to [Scanning.from_channel Pervasives.stdin]. *)
44
45  val stdib : in_channel
46  (* An alias for [Scanf.stdin], the scanning buffer reading from
47     [Pervasives.stdin]. *)
48
49  val next_char : scanbuf -> char
50  (* [Scanning.next_char ib] advance the scanning buffer for
51     one character.
52     If no more character can be read, sets a end of file condition and
53     returns '\000'. *)
54
55  val invalidate_current_char : scanbuf -> unit
56  (* [Scanning.invalidate_current_char ib] mark the current_char as already
57     scanned. *)
58
59  val peek_char : scanbuf -> char
60  (* [Scanning.peek_char ib] returns the current char available in
61     the buffer or reads one if necessary (when the current character is
62     already scanned).
63     If no character can be read, sets an end of file condition and
64     returns '\000'. *)
65
66  val checked_peek_char : scanbuf -> char
67  (* Same as [Scanning.peek_char] above but always returns a valid char or
68     fails: instead of returning a null char when the reading method of the
69     input buffer has reached an end of file, the function raises exception
70     [End_of_file]. *)
71
72  val store_char : int -> scanbuf -> char -> int
73  (* [Scanning.store_char lim ib c] adds [c] to the token buffer
74     of the scanning buffer [ib]. It also advances the scanning buffer for
75     one character and returns [lim - 1], indicating the new limit for the
76     length of the current token. *)
77
78  val skip_char : int -> scanbuf -> int
79  (* [Scanning.skip_char lim ib] ignores the current character. *)
80
81  val ignore_char : int -> scanbuf -> int
82  (* [Scanning.ignore_char ib lim] ignores the current character and
83     decrements the limit. *)
84
85  val token : scanbuf -> string
86  (* [Scanning.token ib] returns the string stored into the token
87     buffer of the scanning buffer: it returns the token matched by the
88     format. *)
89
90  val reset_token : scanbuf -> unit
91  (* [Scanning.reset_token ib] resets the token buffer of
92     the given scanning buffer. *)
93
94  val char_count : scanbuf -> int
95  (* [Scanning.char_count ib] returns the number of characters
96     read so far from the given buffer. *)
97
98  val line_count : scanbuf -> int
99  (* [Scanning.line_count ib] returns the number of new line
100     characters read so far from the given buffer. *)
101
102  val token_count : scanbuf -> int
103  (* [Scanning.token_count ib] returns the number of tokens read
104     so far from [ib]. *)
105
106  val eof : scanbuf -> bool
107  (* [Scanning.eof ib] returns the end of input condition
108     of the given buffer. *)
109
110  val end_of_input : scanbuf -> bool
111  (* [Scanning.end_of_input ib] tests the end of input condition
112     of the given buffer (if no char has ever been read, an attempt to
113     read one is performed). *)
114
115  val beginning_of_input : scanbuf -> bool
116  (* [Scanning.beginning_of_input ib] tests the beginning of input
117     condition of the given buffer. *)
118
119  val name_of_input : scanbuf -> string
120  (* [Scanning.name_of_input ib] returns the name of the character
121     source for input buffer [ib]. *)
122
123  val open_in : file_name -> in_channel
124  val open_in_bin : file_name -> in_channel
125  val from_file : file_name -> in_channel
126  val from_file_bin : file_name -> in_channel
127  val from_string : string -> in_channel
128  val from_function : (unit -> char) -> in_channel
129  val from_channel : Pervasives.in_channel -> in_channel
130
131  val close_in : in_channel -> unit
132
133  val memo_from_channel : Pervasives.in_channel -> in_channel
134  (* Obsolete. *)
135
136end
137
138
139module Scanning : SCANNING = struct
140
141  (* The run-time library for scanf. *)
142
143  type file_name = string
144
145  type in_channel_name =
146    | From_channel of Pervasives.in_channel
147    | From_file of file_name * Pervasives.in_channel
148    | From_function
149    | From_string
150
151
152  type in_channel = {
153    mutable ic_eof : bool;
154    mutable ic_current_char : char;
155    mutable ic_current_char_is_valid : bool;
156    mutable ic_char_count : int;
157    mutable ic_line_count : int;
158    mutable ic_token_count : int;
159    mutable ic_get_next_char : unit -> char;
160    ic_token_buffer : Buffer.t;
161    ic_input_name : in_channel_name;
162  }
163
164
165  type scanbuf = in_channel
166
167  let null_char = '\000'
168
169  (* Reads a new character from input buffer.
170     Next_char never fails, even in case of end of input:
171     it then simply sets the end of file condition. *)
172  let next_char ib =
173    try
174      let c = ib.ic_get_next_char () in
175      ib.ic_current_char <- c;
176      ib.ic_current_char_is_valid <- true;
177      ib.ic_char_count <- succ ib.ic_char_count;
178      if c = '\n' then ib.ic_line_count <- succ ib.ic_line_count;
179      c with
180    | End_of_file ->
181      let c = null_char in
182      ib.ic_current_char <- c;
183      ib.ic_current_char_is_valid <- false;
184      ib.ic_eof <- true;
185      c
186
187
188  let peek_char ib =
189    if ib.ic_current_char_is_valid
190    then ib.ic_current_char
191    else next_char ib
192
193
194  (* Returns a valid current char for the input buffer. In particular
195     no irrelevant null character (as set by [next_char] in case of end
196     of input) is returned, since [End_of_file] is raised when
197     [next_char] sets the end of file condition while trying to read a
198     new character. *)
199  let checked_peek_char ib =
200    let c = peek_char ib in
201    if ib.ic_eof then raise End_of_file;
202    c
203
204
205  let end_of_input ib =
206    ignore (peek_char ib);
207    ib.ic_eof
208
209
210  let eof ib = ib.ic_eof
211
212  let beginning_of_input ib = ib.ic_char_count = 0
213
214  let name_of_input ib =
215    match ib.ic_input_name with
216    | From_channel _ic -> "unnamed Pervasives input channel"
217    | From_file (fname, _ic) -> fname
218    | From_function -> "unnamed function"
219    | From_string -> "unnamed character string"
220
221
222  let char_count ib =
223    if ib.ic_current_char_is_valid
224    then ib.ic_char_count - 1
225    else ib.ic_char_count
226
227
228  let line_count ib = ib.ic_line_count
229
230  let reset_token ib = Buffer.reset ib.ic_token_buffer
231
232  let invalidate_current_char ib = ib.ic_current_char_is_valid <- false
233
234  let token ib =
235    let token_buffer = ib.ic_token_buffer in
236    let tok = Buffer.contents token_buffer in
237    Buffer.clear token_buffer;
238    ib.ic_token_count <- succ ib.ic_token_count;
239    tok
240
241
242  let token_count ib = ib.ic_token_count
243
244  let skip_char width ib =
245    invalidate_current_char ib;
246    width
247
248
249  let ignore_char width ib = skip_char (width - 1) ib
250
251  let store_char width ib c =
252    Buffer.add_char ib.ic_token_buffer c;
253    ignore_char width ib
254
255
256  let default_token_buffer_size = 1024
257
258  let create iname next = {
259    ic_eof = false;
260    ic_current_char = null_char;
261    ic_current_char_is_valid = false;
262    ic_char_count = 0;
263    ic_line_count = 0;
264    ic_token_count = 0;
265    ic_get_next_char = next;
266    ic_token_buffer = Buffer.create default_token_buffer_size;
267    ic_input_name = iname;
268  }
269
270
271  let from_string s =
272    let i = ref 0 in
273    let len = String.length s in
274    let next () =
275      if !i >= len then raise End_of_file else
276      let c = s.[!i] in
277      incr i;
278      c in
279    create From_string next
280
281
282  let from_function = create From_function
283
284  (* Scanning from an input channel. *)
285
286  (* Position of the problem:
287
288     We cannot prevent the scanning mechanism to use one lookahead character,
289     if needed by the semantics of the format string specifications (e.g. a
290     trailing 'skip space' specification in the format string); in this case,
291     the mandatory lookahead character is indeed read from the input and not
292     used to return the token read. It is thus mandatory to be able to store
293     an unused lookahead character somewhere to get it as the first character
294     of the next scan.
295
296     To circumvent this problem, all the scanning functions get a low level
297     input buffer argument where they store the lookahead character when
298     needed; additionally, the input buffer is the only source of character of
299     a scanner. The [scanbuf] input buffers are defined in module {!Scanning}.
300
301     Now we understand that it is extremely important that related and
302     successive calls to scanners indeed read from the same input buffer.
303     In effect, if a scanner [scan1] is reading from [ib1] and stores an
304     unused lookahead character [c1] into its input buffer [ib1], then
305     another scanner [scan2] not reading from the same buffer [ib1] will miss
306     the character [c1], seemingly vanished in the air from the point of view
307     of [scan2].
308
309     This mechanism works perfectly to read from strings, from files, and from
310     functions, since in those cases, allocating two buffers reading from the
311     same source is unnatural.
312
313     Still, there is a difficulty in the case of scanning from an input
314     channel. In effect, when scanning from an input channel [ic], this channel
315     may not have been allocated from within this library. Hence, it may be
316     shared (two functions of the user's program may successively read from
317     [ic]). This is highly error prone since, one of the function may seek the
318     input channel, while the other function has still an unused lookahead
319     character in its input buffer. In conclusion, you should never mix direct
320     low level reading and high level scanning from the same input channel.
321
322  *)
323
324  (* Perform bufferized input to improve efficiency. *)
325  let file_buffer_size = ref 1024
326
327  (* The scanner closes the input channel at end of input. *)
328  let scan_close_at_end ic = Pervasives.close_in ic; raise End_of_file
329
330  (* The scanner does not close the input channel at end of input:
331     it just raises [End_of_file]. *)
332  let scan_raise_at_end _ic = raise End_of_file
333
334  let from_ic scan_close_ic iname ic =
335    let len = !file_buffer_size in
336    let buf = Bytes.create len in
337    let i = ref 0 in
338    let lim = ref 0 in
339    let eof = ref false in
340    let next () =
341      if !i < !lim then begin let c = Bytes.get buf !i in incr i; c end else
342      if !eof then raise End_of_file else begin
343        lim := input ic buf 0 len;
344        if !lim = 0 then begin eof := true; scan_close_ic ic end else begin
345          i := 1;
346          Bytes.get buf 0
347        end
348      end in
349    create iname next
350
351
352  let from_ic_close_at_end = from_ic scan_close_at_end
353  let from_ic_raise_at_end = from_ic scan_raise_at_end
354
355  (* The scanning buffer reading from [Pervasives.stdin].
356     One could try to define [stdib] as a scanning buffer reading a character
357     at a time (no bufferization at all), but unfortunately the top-level
358     interaction would be wrong. This is due to some kind of
359     'race condition' when reading from [Pervasives.stdin],
360     since the interactive compiler and [Scanf.scanf] will simultaneously
361     read the material they need from [Pervasives.stdin]; then, confusion
362     will result from what should be read by the top-level and what should be
363     read by [Scanf.scanf].
364     This is even more complicated by the one character lookahead that
365     [Scanf.scanf] is sometimes obliged to maintain: the lookahead character
366     will be available for the next [Scanf.scanf] entry, seemingly coming from
367     nowhere.
368     Also no [End_of_file] is raised when reading from stdin: if not enough
369     characters have been read, we simply ask to read more. *)
370  let stdin =
371    from_ic scan_raise_at_end
372      (From_file ("-", Pervasives.stdin)) Pervasives.stdin
373
374
375  let stdib = stdin
376
377  let open_in_file open_in fname =
378    match fname with
379    | "-" -> stdin
380    | fname ->
381      let ic = open_in fname in
382      from_ic_close_at_end (From_file (fname, ic)) ic
383
384
385  let open_in = open_in_file Pervasives.open_in
386  let open_in_bin = open_in_file Pervasives.open_in_bin
387
388  let from_file = open_in
389  let from_file_bin = open_in_bin
390
391  let from_channel ic =
392    from_ic_raise_at_end (From_channel ic) ic
393
394
395  let close_in ib =
396    match ib.ic_input_name with
397    | From_channel ic ->
398      Pervasives.close_in ic
399    | From_file (_fname, ic) -> Pervasives.close_in ic
400    | From_function | From_string -> ()
401
402
403  (*
404     Obsolete: a memo [from_channel] version to build a [Scanning.in_channel]
405     scanning buffer out of a [Pervasives.in_channel].
406     This function was used to try to preserve the scanning
407     semantics for the (now obsolete) function [fscanf].
408     Given that all scanner must read from a [Scanning.in_channel] scanning
409     buffer, [fscanf] must read from one!
410     More precisely, given [ic], all successive calls [fscanf ic] must read
411     from the same scanning buffer.
412     This obliged this library to allocated scanning buffers that were
413     not properly garbbage collectable, hence leading to memory leaks.
414     If you need to read from a [Pervasives.in_channel] input channel
415     [ic], simply define a [Scanning.in_channel] formatted input channel as in
416     [let ib = Scanning.from_channel ic], then use [Scanf.bscanf ib] as usual.
417  *)
418  let memo_from_ic =
419    let memo = ref [] in
420    (fun scan_close_ic ic ->
421     try List.assq ic !memo with
422     | Not_found ->
423       let ib =
424         from_ic scan_close_ic (From_channel ic) ic in
425       memo := (ic, ib) :: !memo;
426       ib)
427
428
429  (* Obsolete: see {!memo_from_ic} above. *)
430  let memo_from_channel = memo_from_ic scan_raise_at_end
431
432end
433
434
435(* Formatted input functions. *)
436
437type ('a, 'b, 'c, 'd) scanner =
438     ('a, Scanning.in_channel, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c
439
440
441(* Reporting errors. *)
442exception Scan_failure of string
443
444let bad_input s = raise (Scan_failure s)
445
446let bad_input_escape c =
447  bad_input (Printf.sprintf "illegal escape character %C" c)
448
449
450let bad_token_length message =
451  bad_input
452    (Printf.sprintf
453       "scanning of %s failed: \
454        the specified length was too short for token"
455       message)
456
457
458let bad_end_of_input message =
459  bad_input
460    (Printf.sprintf
461       "scanning of %s failed: \
462        premature end of file occurred before end of token"
463       message)
464
465
466let bad_float () =
467  bad_input "no dot or exponent part found in float token"
468
469
470let bad_hex_float () =
471  bad_input "not a valid float in hexadecimal notation"
472
473
474let character_mismatch_err c ci =
475  Printf.sprintf "looking for %C, found %C" c ci
476
477
478let character_mismatch c ci =
479  bad_input (character_mismatch_err c ci)
480
481
482let rec skip_whites ib =
483  let c = Scanning.peek_char ib in
484  if not (Scanning.eof ib) then begin
485    match c with
486    | ' ' | '\t' | '\n' | '\r' ->
487      Scanning.invalidate_current_char ib; skip_whites ib
488    | _ -> ()
489  end
490
491
492(* Checking that [c] is indeed in the input, then skips it.
493   In this case, the character [c] has been explicitly specified in the
494   format as being mandatory in the input; hence we should fail with
495   [End_of_file] in case of end_of_input.
496   (Remember that [Scan_failure] is raised only when (we can prove by
497   evidence) that the input does not match the format string given. We must
498   thus differentiate [End_of_file] as an error due to lack of input, and
499   [Scan_failure] which is due to provably wrong input. I am not sure this is
500   worth the burden: it is complex and somehow subliminal; should be clearer
501   to fail with Scan_failure "Not enough input to complete scanning"!)
502
503   That's why, waiting for a better solution, we use checked_peek_char here.
504   We are also careful to treat "\r\n" in the input as an end of line marker:
505   it always matches a '\n' specification in the input format string. *)
506let rec check_char ib c =
507  match c with
508  | ' ' -> skip_whites ib
509  | '\n' -> check_newline ib
510  | c -> check_this_char ib c
511
512and check_this_char ib c =
513  let ci = Scanning.checked_peek_char ib in
514  if ci = c then Scanning.invalidate_current_char ib else
515  character_mismatch c ci
516
517and check_newline ib =
518  let ci = Scanning.checked_peek_char ib in
519  match ci with
520  | '\n' -> Scanning.invalidate_current_char ib
521  | '\r' -> Scanning.invalidate_current_char ib; check_this_char ib '\n'
522  | _ -> character_mismatch '\n' ci
523
524
525(* Extracting tokens from the output token buffer. *)
526
527let token_char ib = (Scanning.token ib).[0]
528
529let token_string = Scanning.token
530
531let token_bool ib =
532  match Scanning.token ib with
533  | "true" -> true
534  | "false" -> false
535  | s -> bad_input (Printf.sprintf "invalid boolean '%s'" s)
536
537
538(* The type of integer conversions. *)
539type integer_conversion =
540  | B_conversion (* Unsigned binary conversion *)
541  | D_conversion (* Signed decimal conversion *)
542  | I_conversion (* Signed integer conversion *)
543  | O_conversion (* Unsigned octal conversion *)
544  | U_conversion (* Unsigned decimal conversion *)
545  | X_conversion (* Unsigned hexadecimal conversion *)
546
547
548let integer_conversion_of_char = function
549  | 'b' -> B_conversion
550  | 'd' -> D_conversion
551  | 'i' -> I_conversion
552  | 'o' -> O_conversion
553  | 'u' -> U_conversion
554  | 'x' | 'X' -> X_conversion
555  | _ -> assert false
556
557
558(* Extract an integer literal token.
559   Since the functions Pervasives.*int*_of_string do not accept a leading +,
560   we skip it if necessary. *)
561let token_int_literal conv ib =
562  let tok =
563    match conv with
564    | D_conversion | I_conversion -> Scanning.token ib
565    | U_conversion -> "0u" ^ Scanning.token ib
566    | O_conversion -> "0o" ^ Scanning.token ib
567    | X_conversion -> "0x" ^ Scanning.token ib
568    | B_conversion -> "0b" ^ Scanning.token ib in
569  let l = String.length tok in
570  if l = 0 || tok.[0] <> '+' then tok else String.sub tok 1 (l - 1)
571
572
573(* All the functions that convert a string to a number raise the exception
574   Failure when the conversion is not possible.
575   This exception is then trapped in [kscanf]. *)
576let token_int conv ib = int_of_string (token_int_literal conv ib)
577
578let token_float ib = float_of_string (Scanning.token ib)
579
580(* To scan native ints, int32 and int64 integers.
581   We cannot access to conversions to/from strings for those types,
582   Nativeint.of_string, Int32.of_string, and Int64.of_string,
583   since those modules are not available to [Scanf].
584   However, we can bind and use the corresponding primitives that are
585   available in the runtime. *)
586external nativeint_of_string : string -> nativeint
587  = "caml_nativeint_of_string"
588
589external int32_of_string : string -> int32
590  = "caml_int32_of_string"
591
592external int64_of_string : string -> int64
593  = "caml_int64_of_string"
594
595
596let token_nativeint conv ib = nativeint_of_string (token_int_literal conv ib)
597let token_int32 conv ib = int32_of_string (token_int_literal conv ib)
598let token_int64 conv ib = int64_of_string (token_int_literal conv ib)
599
600(* Scanning numbers. *)
601
602(* Digits scanning functions suppose that one character has been checked and
603   is available, since they return at end of file with the currently found
604   token selected.
605
606   Put it in another way, the digits scanning functions scan for a possibly
607   empty sequence of digits, (hence, a successful scanning from one of those
608   functions does not imply that the token is a well-formed number: to get a
609   true number, it is mandatory to check that at least one valid digit is
610   available before calling one of the digit scanning functions). *)
611
612(* The decimal case is treated especially for optimization purposes. *)
613let rec scan_decimal_digit_star width ib =
614  if width = 0 then width else
615  let c = Scanning.peek_char ib in
616  if Scanning.eof ib then width else
617  match c with
618  | '0' .. '9' as c ->
619    let width = Scanning.store_char width ib c in
620    scan_decimal_digit_star width ib
621  | '_' ->
622    let width = Scanning.ignore_char width ib in
623    scan_decimal_digit_star width ib
624  | _ -> width
625
626
627let scan_decimal_digit_plus width ib =
628  if width = 0 then bad_token_length "decimal digits" else
629  let c = Scanning.checked_peek_char ib in
630  match c with
631  | '0' .. '9' ->
632    let width = Scanning.store_char width ib c in
633    scan_decimal_digit_star width ib
634  | c ->
635    bad_input (Printf.sprintf "character %C is not a decimal digit" c)
636
637
638(* To scan numbers from other bases, we use a predicate argument to
639   scan digits. *)
640let scan_digit_star digitp width ib =
641  let rec scan_digits width ib =
642    if width = 0 then width else
643    let c = Scanning.peek_char ib in
644    if Scanning.eof ib then width else
645    match c with
646    | c when digitp c ->
647      let width = Scanning.store_char width ib c in
648      scan_digits width ib
649    | '_' ->
650      let width = Scanning.ignore_char width ib in
651      scan_digits width ib
652    | _ -> width in
653  scan_digits width ib
654
655
656let scan_digit_plus basis digitp width ib =
657  (* Ensure we have got enough width left,
658     and read at list one digit. *)
659  if width = 0 then bad_token_length "digits" else
660  let c = Scanning.checked_peek_char ib in
661  if digitp c then
662    let width = Scanning.store_char width ib c in
663    scan_digit_star digitp width ib
664  else
665    bad_input (Printf.sprintf "character %C is not a valid %s digit" c basis)
666
667
668let is_binary_digit = function
669  | '0' .. '1' -> true
670  | _ -> false
671
672
673let scan_binary_int = scan_digit_plus "binary" is_binary_digit
674
675let is_octal_digit = function
676  | '0' .. '7' -> true
677  | _ -> false
678
679
680let scan_octal_int = scan_digit_plus "octal" is_octal_digit
681
682let is_hexa_digit = function
683  | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true
684  | _ -> false
685
686
687let scan_hexadecimal_int = scan_digit_plus "hexadecimal" is_hexa_digit
688
689(* Scan a decimal integer. *)
690let scan_unsigned_decimal_int = scan_decimal_digit_plus
691
692let scan_sign width ib =
693  let c = Scanning.checked_peek_char ib in
694  match c with
695  | '+' -> Scanning.store_char width ib c
696  | '-' -> Scanning.store_char width ib c
697  | _ -> width
698
699
700let scan_optionally_signed_decimal_int width ib =
701  let width = scan_sign width ib in
702  scan_unsigned_decimal_int width ib
703
704
705(* Scan an unsigned integer that could be given in any (common) basis.
706   If digits are prefixed by one of 0x, 0X, 0o, or 0b, the number is
707   assumed to be written respectively in hexadecimal, hexadecimal,
708   octal, or binary. *)
709let scan_unsigned_int width ib =
710  match Scanning.checked_peek_char ib with
711  | '0' as c ->
712    let width = Scanning.store_char width ib c in
713    if width = 0 then width else
714    let c = Scanning.peek_char ib in
715    if Scanning.eof ib then width else
716    begin match c with
717    | 'x' | 'X' -> scan_hexadecimal_int (Scanning.store_char width ib c) ib
718    | 'o' -> scan_octal_int (Scanning.store_char width ib c) ib
719    | 'b' -> scan_binary_int (Scanning.store_char width ib c) ib
720    | _ -> scan_decimal_digit_star width ib end
721  | _ -> scan_unsigned_decimal_int width ib
722
723
724let scan_optionally_signed_int width ib =
725  let width = scan_sign width ib in
726  scan_unsigned_int width ib
727
728
729let scan_int_conversion conv width ib =
730  match conv with
731  | B_conversion -> scan_binary_int width ib
732  | D_conversion -> scan_optionally_signed_decimal_int width ib
733  | I_conversion -> scan_optionally_signed_int width ib
734  | O_conversion -> scan_octal_int width ib
735  | U_conversion -> scan_unsigned_decimal_int width ib
736  | X_conversion -> scan_hexadecimal_int width ib
737
738
739(* Scanning floating point numbers. *)
740
741(* Fractional part is optional and can be reduced to 0 digits. *)
742let scan_fractional_part width ib =
743  if width = 0 then width else
744  let c = Scanning.peek_char ib in
745  if Scanning.eof ib then width else
746  match c with
747  | '0' .. '9' as c ->
748    scan_decimal_digit_star (Scanning.store_char width ib c) ib
749  | _ -> width
750
751
752(* Exp part is optional and can be reduced to 0 digits. *)
753let scan_exponent_part width ib =
754  if width = 0 then width else
755  let c = Scanning.peek_char ib in
756  if Scanning.eof ib then width else
757  match c with
758  | 'e' | 'E' as c ->
759    scan_optionally_signed_decimal_int (Scanning.store_char width ib c) ib
760  | _ -> width
761
762
763(* Scan the integer part of a floating point number, (not using the
764   OCaml lexical convention since the integer part can be empty):
765   an optional sign, followed by a possibly empty sequence of decimal
766   digits (e.g. -.1). *)
767let scan_integer_part width ib =
768  let width = scan_sign width ib in
769  scan_decimal_digit_star width ib
770
771
772(*
773   For the time being we have (as found in scanf.mli):
774   the field width is composed of an optional integer literal
775   indicating the maximal width of the token to read.
776   Unfortunately, the type-checker let the user write an optional precision,
777   since this is valid for printf format strings.
778
779   Thus, the next step for Scanf is to support a full width and precision
780   indication, more or less similar to the one for printf, possibly extended
781   to the specification of a [max, min] range for the width of the token read
782   for strings. Something like the following spec for scanf.mli:
783
784   The optional [width] is an integer indicating the maximal
785   width of the token read. For instance, [%6d] reads an integer,
786   having at most 6 characters.
787
788   The optional [precision] is a dot [.] followed by an integer:
789
790   - in the floating point number conversions ([%f], [%e], [%g], [%F], [%E],
791   and [%F] conversions, the [precision] indicates the maximum number of
792   digits that may follow the decimal point. For instance, [%.4f] reads a
793   [float] with at most 4 fractional digits,
794
795   - in the string conversions ([%s], [%S], [%\[ range \]]), and in the
796   integer number conversions ([%i], [%d], [%u], [%x], [%o], and their
797   [int32], [int64], and [native_int] correspondent), the [precision]
798   indicates the required minimum width of the token read,
799
800   - on all other conversions, the width and precision specify the [max, min]
801   range for the width of the token read.
802*)
803let scan_float width precision ib =
804  let width = scan_integer_part width ib in
805  if width = 0 then width, precision else
806  let c = Scanning.peek_char ib in
807  if Scanning.eof ib then width, precision else
808  match c with
809  | '.' ->
810    let width = Scanning.store_char width ib c in
811    let precision = min width precision in
812    let width = width - (precision - scan_fractional_part precision ib) in
813    scan_exponent_part width ib, precision
814  | _ ->
815    scan_exponent_part width ib, precision
816
817
818let check_case_insensitive_string width ib error str =
819  let lowercase c =
820    match c with
821    | 'A' .. 'Z' ->
822      char_of_int (int_of_char c - int_of_char 'A' + int_of_char 'a')
823    | _ -> c in
824  let len = String.length str in
825  let width = ref width in
826  for i = 0 to len - 1 do
827    let c = Scanning.peek_char ib in
828    if lowercase c <> lowercase str.[i] then error ();
829    if !width = 0 then error ();
830    width := Scanning.store_char !width ib c;
831  done;
832  !width
833
834
835let scan_hex_float width precision ib =
836  if width = 0 || Scanning.end_of_input ib then bad_hex_float ();
837  let width = scan_sign width ib in
838  if width = 0 || Scanning.end_of_input ib then bad_hex_float ();
839  match Scanning.peek_char ib with
840  | '0' as c -> (
841    let width = Scanning.store_char width ib c in
842    if width = 0 || Scanning.end_of_input ib then bad_hex_float ();
843    let width = check_case_insensitive_string width ib bad_hex_float "x" in
844    if width = 0 || Scanning.end_of_input ib then width else
845      let width = match Scanning.peek_char ib with
846        | '.' | 'p' | 'P' -> width
847        | _ -> scan_hexadecimal_int width ib in
848      if width = 0 || Scanning.end_of_input ib then width else
849        let width = match Scanning.peek_char ib with
850          | '.' as c -> (
851            let width = Scanning.store_char width ib c in
852            if width = 0 || Scanning.end_of_input ib then width else
853              match Scanning.peek_char ib with
854              | 'p' | 'P' -> width
855              | _ ->
856                let precision = min width precision in
857                width - (precision - scan_hexadecimal_int precision ib)
858          )
859          | _ -> width in
860        if width = 0 || Scanning.end_of_input ib then width else
861          match Scanning.peek_char ib with
862          | 'p' | 'P' as c ->
863            let width = Scanning.store_char width ib c in
864            if width = 0 || Scanning.end_of_input ib then bad_hex_float ();
865            scan_optionally_signed_decimal_int width ib
866          | _ -> width
867  )
868  | 'n' | 'N' as c ->
869    let width = Scanning.store_char width ib c in
870    if width = 0 || Scanning.end_of_input ib then bad_hex_float ();
871    check_case_insensitive_string width ib bad_hex_float "an"
872  | 'i' | 'I' as c ->
873    let width = Scanning.store_char width ib c in
874    if width = 0 || Scanning.end_of_input ib then bad_hex_float ();
875    check_case_insensitive_string width ib bad_hex_float "nfinity"
876  | _ -> bad_hex_float ()
877
878
879let scan_caml_float_rest width precision ib =
880  if width = 0 || Scanning.end_of_input ib then bad_float ();
881  let width = scan_decimal_digit_star width ib in
882  if width = 0 || Scanning.end_of_input ib then bad_float ();
883  let c = Scanning.peek_char ib in
884  match c with
885  | '.' ->
886    let width = Scanning.store_char width ib c in
887    (* The effective width available for scanning the fractional part is
888       the minimum of declared precision and width left. *)
889    let precision = min width precision in
890    (* After scanning the fractional part with [precision] provisional width,
891       [width_precision] is left. *)
892    let width_precision = scan_fractional_part precision ib in
893    (* Hence, scanning the fractional part took exactly
894       [precision - width_precision] chars. *)
895    let frac_width = precision - width_precision in
896    (* And new provisional width is [width - width_precision. *)
897    let width = width - frac_width in
898    scan_exponent_part width ib
899  | 'e' | 'E' ->
900    scan_exponent_part width ib
901  | _ -> bad_float ()
902
903
904let scan_caml_float width precision ib =
905  if width = 0 || Scanning.end_of_input ib then bad_float ();
906  let width = scan_sign width ib in
907  if width = 0 || Scanning.end_of_input ib then bad_float ();
908  match Scanning.peek_char ib with
909  | '0' as c -> (
910    let width = Scanning.store_char width ib c in
911    if width = 0 || Scanning.end_of_input ib then bad_float ();
912    match Scanning.peek_char ib with
913    | 'x' | 'X' as c -> (
914      let width = Scanning.store_char width ib c in
915      if width = 0 || Scanning.end_of_input ib then bad_float ();
916      let width = scan_hexadecimal_int width ib in
917      if width = 0 || Scanning.end_of_input ib then bad_float ();
918      let width = match Scanning.peek_char ib with
919        | '.' as c -> (
920          let width = Scanning.store_char width ib c in
921          if width = 0 || Scanning.end_of_input ib then width else
922            match Scanning.peek_char ib with
923            | 'p' | 'P' -> width
924            | _ ->
925              let precision = min width precision in
926              width - (precision - scan_hexadecimal_int precision ib)
927        )
928        | 'p' | 'P' -> width
929        | _ -> bad_float () in
930      if width = 0 || Scanning.end_of_input ib then width else
931        match Scanning.peek_char ib with
932        | 'p' | 'P' as c ->
933          let width = Scanning.store_char width ib c in
934          if width = 0 || Scanning.end_of_input ib then bad_hex_float ();
935          scan_optionally_signed_decimal_int width ib
936        | _ -> width
937    )
938    | _ ->
939      scan_caml_float_rest width precision ib
940  )
941  | '1' .. '9' as c ->
942    let width = Scanning.store_char width ib c in
943    if width = 0 || Scanning.end_of_input ib then bad_float ();
944    scan_caml_float_rest width precision ib
945(* Special case of nan and infinity:
946  | 'i' ->
947  | 'n' ->
948*)
949  | _ -> bad_float ()
950
951
952(* Scan a regular string:
953   stops when encountering a space, if no scanning indication has been given;
954   otherwise, stops when encountering the characters in the scanning
955   indication [stp].
956   It also stops at end of file or when the maximum number of characters has
957   been read. *)
958let scan_string stp width ib =
959  let rec loop width =
960    if width = 0 then width else
961    let c = Scanning.peek_char ib in
962    if Scanning.eof ib then width else
963      match stp with
964      | Some c' when c = c' -> Scanning.skip_char width ib
965      | Some _ -> loop (Scanning.store_char width ib c)
966      | None ->
967        match c with
968        | ' ' | '\t' | '\n' | '\r' -> width
969        | _ -> loop (Scanning.store_char width ib c) in
970  loop width
971
972
973(* Scan a char: peek strictly one character in the input, whatsoever. *)
974let scan_char width ib =
975  (* The case width = 0 could not happen here, since it is tested before
976     calling scan_char, in the main scanning function.
977    if width = 0 then bad_token_length "a character" else *)
978  Scanning.store_char width ib (Scanning.checked_peek_char ib)
979
980
981let char_for_backslash = function
982  | 'n' -> '\010'
983  | 'r' -> '\013'
984  | 'b' -> '\008'
985  | 't' -> '\009'
986  | c -> c
987
988
989(* The integer value corresponding to the facial value of a valid
990   decimal digit character. *)
991let decimal_value_of_char c = int_of_char c - int_of_char '0'
992
993let char_for_decimal_code c0 c1 c2 =
994  let c =
995    100 * decimal_value_of_char c0 +
996     10 * decimal_value_of_char c1 +
997          decimal_value_of_char c2 in
998  if c < 0 || c > 255 then
999    bad_input
1000      (Printf.sprintf
1001         "bad character decimal encoding \\%c%c%c" c0 c1 c2) else
1002  char_of_int c
1003
1004
1005(* The integer value corresponding to the facial value of a valid
1006   hexadecimal digit character. *)
1007let hexadecimal_value_of_char c =
1008  let d = int_of_char c in
1009  (* Could also be:
1010    if d <= int_of_char '9' then d - int_of_char '0' else
1011    if d <= int_of_char 'F' then 10 + d - int_of_char 'A' else
1012    if d <= int_of_char 'f' then 10 + d - int_of_char 'a' else assert false
1013  *)
1014  if d >= int_of_char 'a' then
1015    d - 87 (* 10 + int_of_char c - int_of_char 'a' *) else
1016  if d >= int_of_char 'A' then
1017    d - 55  (* 10 + int_of_char c - int_of_char 'A' *) else
1018    d - int_of_char '0'
1019
1020
1021let char_for_hexadecimal_code c1 c2 =
1022  let c =
1023    16 * hexadecimal_value_of_char c1 +
1024         hexadecimal_value_of_char c2 in
1025  if c < 0 || c > 255 then
1026    bad_input
1027      (Printf.sprintf "bad character hexadecimal encoding \\%c%c" c1 c2) else
1028  char_of_int c
1029
1030
1031(* Called in particular when encountering '\\' as starter of a char.
1032   Stops before the corresponding '\''. *)
1033let check_next_char message width ib =
1034  if width = 0 then bad_token_length message else
1035  let c = Scanning.peek_char ib in
1036  if Scanning.eof ib then bad_end_of_input message else
1037  c
1038
1039
1040let check_next_char_for_char = check_next_char "a Char"
1041let check_next_char_for_string = check_next_char "a String"
1042
1043let scan_backslash_char width ib =
1044  match check_next_char_for_char width ib with
1045  | '\\' | '\'' | '\"' | 'n' | 't' | 'b' | 'r' as c ->
1046    Scanning.store_char width ib (char_for_backslash c)
1047  | '0' .. '9' as c ->
1048    let get_digit () =
1049      let c = Scanning.next_char ib in
1050      match c with
1051      | '0' .. '9' as c -> c
1052      | c -> bad_input_escape c in
1053    let c0 = c in
1054    let c1 = get_digit () in
1055    let c2 = get_digit () in
1056    Scanning.store_char (width - 2) ib (char_for_decimal_code c0 c1 c2)
1057  | 'x' ->
1058    let get_digit () =
1059      let c = Scanning.next_char ib in
1060      match c with
1061      | '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' as c -> c
1062      | c -> bad_input_escape c in
1063    let c1 = get_digit () in
1064    let c2 = get_digit () in
1065    Scanning.store_char (width - 2) ib (char_for_hexadecimal_code c1 c2)
1066  | c ->
1067    bad_input_escape c
1068
1069
1070(* Scan a character (an OCaml token). *)
1071let scan_caml_char width ib =
1072
1073  let rec find_start width =
1074    match Scanning.checked_peek_char ib with
1075    | '\'' -> find_char (Scanning.ignore_char width ib)
1076    | c -> character_mismatch '\'' c
1077
1078  and find_char width =
1079    match check_next_char_for_char width ib with
1080    | '\\' ->
1081      find_stop (scan_backslash_char (Scanning.ignore_char width ib) ib)
1082    | c ->
1083      find_stop (Scanning.store_char width ib c)
1084
1085  and find_stop width =
1086    match check_next_char_for_char width ib with
1087    | '\'' -> Scanning.ignore_char width ib
1088    | c -> character_mismatch '\'' c in
1089
1090  find_start width
1091
1092
1093(* Scan a delimited string (an OCaml token). *)
1094let scan_caml_string width ib =
1095
1096  let rec find_start width =
1097    match Scanning.checked_peek_char ib with
1098    | '\"' -> find_stop (Scanning.ignore_char width ib)
1099    | c -> character_mismatch '\"' c
1100
1101  and find_stop width =
1102    match check_next_char_for_string width ib with
1103    | '\"' -> Scanning.ignore_char width ib
1104    | '\\' -> scan_backslash (Scanning.ignore_char width ib)
1105    | c -> find_stop (Scanning.store_char width ib c)
1106
1107  and scan_backslash width =
1108    match check_next_char_for_string width ib with
1109    | '\r' -> skip_newline (Scanning.ignore_char width ib)
1110    | '\n' -> skip_spaces (Scanning.ignore_char width ib)
1111    | _ -> find_stop (scan_backslash_char width ib)
1112
1113  and skip_newline width =
1114    match check_next_char_for_string width ib with
1115    | '\n' -> skip_spaces (Scanning.ignore_char width ib)
1116    | _ -> find_stop (Scanning.store_char width ib '\r')
1117
1118  and skip_spaces width =
1119    match check_next_char_for_string width ib with
1120    | ' ' -> skip_spaces (Scanning.ignore_char width ib)
1121    | _ -> find_stop width in
1122
1123  find_start width
1124
1125
1126(* Scan a boolean (an OCaml token). *)
1127let scan_bool ib =
1128  let c = Scanning.checked_peek_char ib in
1129  let m =
1130    match c with
1131    | 't' -> 4
1132    | 'f' -> 5
1133    | c ->
1134      bad_input
1135        (Printf.sprintf "the character %C cannot start a boolean" c) in
1136  scan_string None m ib
1137
1138
1139(* Scan a string containing elements in char_set and terminated by scan_indic
1140   if provided. *)
1141let scan_chars_in_char_set char_set scan_indic width ib =
1142  let rec scan_chars i stp =
1143    let c = Scanning.peek_char ib in
1144    if i > 0 && not (Scanning.eof ib) &&
1145       is_in_char_set char_set c &&
1146       int_of_char c <> stp then
1147      let _ = Scanning.store_char max_int ib c in
1148      scan_chars (i - 1) stp in
1149  match scan_indic with
1150  | None -> scan_chars width (-1);
1151  | Some c ->
1152    scan_chars width (int_of_char c);
1153    if not (Scanning.eof ib) then
1154      let ci = Scanning.peek_char ib in
1155      if c = ci
1156      then Scanning.invalidate_current_char ib
1157      else character_mismatch c ci
1158
1159
1160(* The global error report function for [Scanf]. *)
1161let scanf_bad_input ib = function
1162  | Scan_failure s | Failure s ->
1163    let i = Scanning.char_count ib in
1164    bad_input (Printf.sprintf "scanf: bad input at char number %i: %s" i s)
1165  | x -> raise x
1166
1167
1168(* Get the content of a counter from an input buffer. *)
1169let get_counter ib counter =
1170  match counter with
1171  | Line_counter -> Scanning.line_count ib
1172  | Char_counter -> Scanning.char_count ib
1173  | Token_counter -> Scanning.token_count ib
1174
1175
1176(* Compute the width of a padding option (see "%42{" and "%123("). *)
1177let width_of_pad_opt pad_opt = match pad_opt with
1178  | None -> max_int
1179  | Some width -> width
1180
1181
1182let stopper_of_formatting_lit fmting =
1183  if fmting = Escaped_percent then '%', "" else
1184    let str = string_of_formatting_lit fmting in
1185    let stp = str.[1] in
1186    let sub_str = String.sub str 2 (String.length str - 2) in
1187    stp, sub_str
1188
1189
1190(******************************************************************************)
1191                           (* Readers managment *)
1192
1193(* A call to take_format_readers on a format is evaluated into functions
1194   taking readers as arguments and aggregate them into an heterogeneous list *)
1195(* When all readers are taken, finally pass the list of the readers to the
1196   continuation k. *)
1197let rec take_format_readers : type a c d e f .
1198    ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, e, f) fmt ->
1199    d =
1200fun k fmt -> match fmt with
1201  | Reader fmt_rest ->
1202    fun reader ->
1203      let new_k readers_rest = k (Cons (reader, readers_rest)) in
1204      take_format_readers new_k fmt_rest
1205  | Char rest                        -> take_format_readers k rest
1206  | Caml_char rest                   -> take_format_readers k rest
1207  | String (_, rest)                 -> take_format_readers k rest
1208  | Caml_string (_, rest)            -> take_format_readers k rest
1209  | Int (_, _, _, rest)              -> take_format_readers k rest
1210  | Int32 (_, _, _, rest)            -> take_format_readers k rest
1211  | Nativeint (_, _, _, rest)        -> take_format_readers k rest
1212  | Int64 (_, _, _, rest)            -> take_format_readers k rest
1213  | Float (_, _, _, rest)            -> take_format_readers k rest
1214  | Bool rest                        -> take_format_readers k rest
1215  | Alpha rest                       -> take_format_readers k rest
1216  | Theta rest                       -> take_format_readers k rest
1217  | Flush rest                       -> take_format_readers k rest
1218  | String_literal (_, rest)         -> take_format_readers k rest
1219  | Char_literal (_, rest)           -> take_format_readers k rest
1220  | Custom (_, _, rest)              -> take_format_readers k rest
1221
1222  | Scan_char_set (_, _, rest)       -> take_format_readers k rest
1223  | Scan_get_counter (_, rest)       -> take_format_readers k rest
1224  | Scan_next_char rest              -> take_format_readers k rest
1225
1226  | Formatting_lit (_, rest)         -> take_format_readers k rest
1227  | Formatting_gen (Open_tag (Format (fmt, _)), rest) ->
1228      take_format_readers k (concat_fmt fmt rest)
1229  | Formatting_gen (Open_box (Format (fmt, _)), rest) ->
1230      take_format_readers k (concat_fmt fmt rest)
1231
1232  | Format_arg (_, _, rest)          -> take_format_readers k rest
1233  | Format_subst (_, fmtty, rest)    ->
1234     take_fmtty_format_readers k (erase_rel (symm fmtty)) rest
1235  | Ignored_param (ign, rest)        -> take_ignored_format_readers k ign rest
1236
1237  | End_of_format                    -> k Nil
1238
1239(* Take readers associated to an fmtty coming from a Format_subst "%(...%)". *)
1240and take_fmtty_format_readers : type x y a c d e f .
1241    ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) fmtty ->
1242      (y, Scanning.in_channel, c, x, e, f) fmt -> d =
1243fun k fmtty fmt -> match fmtty with
1244  | Reader_ty fmt_rest ->
1245    fun reader ->
1246      let new_k readers_rest = k (Cons (reader, readers_rest)) in
1247      take_fmtty_format_readers new_k fmt_rest fmt
1248  | Ignored_reader_ty fmt_rest ->
1249    fun reader ->
1250      let new_k readers_rest = k (Cons (reader, readers_rest)) in
1251      take_fmtty_format_readers new_k fmt_rest fmt
1252  | Char_ty rest                -> take_fmtty_format_readers k rest fmt
1253  | String_ty rest              -> take_fmtty_format_readers k rest fmt
1254  | Int_ty rest                 -> take_fmtty_format_readers k rest fmt
1255  | Int32_ty rest               -> take_fmtty_format_readers k rest fmt
1256  | Nativeint_ty rest           -> take_fmtty_format_readers k rest fmt
1257  | Int64_ty rest               -> take_fmtty_format_readers k rest fmt
1258  | Float_ty rest               -> take_fmtty_format_readers k rest fmt
1259  | Bool_ty rest                -> take_fmtty_format_readers k rest fmt
1260  | Alpha_ty rest               -> take_fmtty_format_readers k rest fmt
1261  | Theta_ty rest               -> take_fmtty_format_readers k rest fmt
1262  | Any_ty rest                 -> take_fmtty_format_readers k rest fmt
1263  | Format_arg_ty (_, rest)     -> take_fmtty_format_readers k rest fmt
1264  | End_of_fmtty                -> take_format_readers k fmt
1265  | Format_subst_ty (ty1, ty2, rest) ->
1266    let ty = trans (symm ty1) ty2 in
1267    take_fmtty_format_readers k (concat_fmtty ty rest) fmt
1268
1269(* Take readers associated to an ignored parameter. *)
1270and take_ignored_format_readers : type x y a c d e f .
1271    ((d, e) heter_list -> e) -> (a, Scanning.in_channel, c, d, x, y) ignored ->
1272      (y, Scanning.in_channel, c, x, e, f) fmt -> d =
1273fun k ign fmt -> match ign with
1274  | Ignored_reader ->
1275    fun reader ->
1276      let new_k readers_rest = k (Cons (reader, readers_rest)) in
1277      take_format_readers new_k fmt
1278  | Ignored_char                    -> take_format_readers k fmt
1279  | Ignored_caml_char               -> take_format_readers k fmt
1280  | Ignored_string _                -> take_format_readers k fmt
1281  | Ignored_caml_string _           -> take_format_readers k fmt
1282  | Ignored_int (_, _)              -> take_format_readers k fmt
1283  | Ignored_int32 (_, _)            -> take_format_readers k fmt
1284  | Ignored_nativeint (_, _)        -> take_format_readers k fmt
1285  | Ignored_int64 (_, _)            -> take_format_readers k fmt
1286  | Ignored_float (_, _)            -> take_format_readers k fmt
1287  | Ignored_bool                    -> take_format_readers k fmt
1288  | Ignored_format_arg _            -> take_format_readers k fmt
1289  | Ignored_format_subst (_, fmtty) -> take_fmtty_format_readers k fmtty fmt
1290  | Ignored_scan_char_set _         -> take_format_readers k fmt
1291  | Ignored_scan_get_counter _      -> take_format_readers k fmt
1292  | Ignored_scan_next_char          -> take_format_readers k fmt
1293
1294(******************************************************************************)
1295                          (* Generic scanning *)
1296
1297(* Make a generic scanning function. *)
1298(* Scan a stream according to a format and readers obtained by
1299   take_format_readers, and aggegate scanned values into an
1300   heterogeneous list. *)
1301(* Return the heterogeneous list of scanned values. *)
1302let rec make_scanf : type a c d e f.
1303    Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt ->
1304      (d, e) heter_list -> (a, f) heter_list =
1305fun ib fmt readers -> match fmt with
1306  | Char rest ->
1307    let _ = scan_char 0 ib in
1308    let c = token_char ib in
1309    Cons (c, make_scanf ib rest readers)
1310  | Caml_char rest ->
1311    let _ = scan_caml_char 0 ib in
1312    let c = token_char ib in
1313    Cons (c, make_scanf ib rest readers)
1314
1315  | String (pad, Formatting_lit (fmting_lit, rest)) ->
1316    let stp, str = stopper_of_formatting_lit fmting_lit in
1317    let scan width _ ib = scan_string (Some stp) width ib in
1318    let str_rest = String_literal (str, rest) in
1319    pad_prec_scanf ib str_rest readers pad No_precision scan token_string
1320  | String (pad, Formatting_gen (Open_tag (Format (fmt', _)), rest)) ->
1321    let scan width _ ib = scan_string (Some '{') width ib in
1322    pad_prec_scanf ib (concat_fmt fmt' rest) readers pad No_precision scan
1323                   token_string
1324  | String (pad, Formatting_gen (Open_box (Format (fmt', _)), rest)) ->
1325    let scan width _ ib = scan_string (Some '[') width ib in
1326    pad_prec_scanf ib (concat_fmt fmt' rest) readers pad No_precision scan
1327                   token_string
1328  | String (pad, rest) ->
1329    let scan width _ ib = scan_string None width ib in
1330    pad_prec_scanf ib rest readers pad No_precision scan token_string
1331
1332  | Caml_string (pad, rest) ->
1333    let scan width _ ib = scan_caml_string width ib in
1334    pad_prec_scanf ib rest readers pad No_precision scan token_string
1335  | Int (iconv, pad, prec, rest) ->
1336    let c = integer_conversion_of_char (char_of_iconv iconv) in
1337    let scan width _ ib = scan_int_conversion c width ib in
1338    pad_prec_scanf ib rest readers pad prec scan (token_int c)
1339  | Int32 (iconv, pad, prec, rest) ->
1340    let c = integer_conversion_of_char (char_of_iconv iconv) in
1341    let scan width _ ib = scan_int_conversion c width ib in
1342    pad_prec_scanf ib rest readers pad prec scan (token_int32 c)
1343  | Nativeint (iconv, pad, prec, rest) ->
1344    let c = integer_conversion_of_char (char_of_iconv iconv) in
1345    let scan width _ ib = scan_int_conversion c width ib in
1346    pad_prec_scanf ib rest readers pad prec scan (token_nativeint c)
1347  | Int64 (iconv, pad, prec, rest) ->
1348    let c = integer_conversion_of_char (char_of_iconv iconv) in
1349    let scan width _ ib = scan_int_conversion c width ib in
1350    pad_prec_scanf ib rest readers pad prec scan (token_int64 c)
1351  | Float (Float_F, pad, prec, rest) ->
1352    pad_prec_scanf ib rest readers pad prec scan_caml_float token_float
1353  | Float ((Float_f | Float_pf | Float_sf | Float_e | Float_pe | Float_se
1354               | Float_E | Float_pE | Float_sE | Float_g | Float_pg | Float_sg
1355               | Float_G | Float_pG | Float_sG), pad, prec, rest) ->
1356    pad_prec_scanf ib rest readers pad prec scan_float token_float
1357  | Float ((Float_h | Float_ph | Float_sh | Float_H | Float_pH | Float_sH),
1358           pad, prec, rest) ->
1359    pad_prec_scanf ib rest readers pad prec scan_hex_float token_float
1360  | Bool rest ->
1361    let _ = scan_bool ib in
1362    let b = token_bool ib in
1363    Cons (b, make_scanf ib rest readers)
1364  | Alpha _ ->
1365    invalid_arg "scanf: bad conversion \"%a\""
1366  | Theta _ ->
1367    invalid_arg "scanf: bad conversion \"%t\""
1368  | Custom _ ->
1369    invalid_arg "scanf: bad conversion \"%?\" (custom converter)"
1370  | Reader fmt_rest ->
1371    begin match readers with
1372    | Cons (reader, readers_rest) ->
1373        let x = reader ib in
1374        Cons (x, make_scanf ib fmt_rest readers_rest)
1375    | Nil ->
1376        invalid_arg "scanf: missing reader"
1377    end
1378  | Flush rest ->
1379    if Scanning.end_of_input ib then make_scanf ib rest readers
1380    else bad_input "end of input not found"
1381
1382  | String_literal (str, rest) ->
1383    String.iter (check_char ib) str;
1384    make_scanf ib rest readers
1385  | Char_literal (chr, rest) ->
1386    check_char ib chr;
1387    make_scanf ib rest readers
1388
1389  | Format_arg (pad_opt, fmtty, rest) ->
1390    let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in
1391    let s = token_string ib in
1392    let fmt =
1393      try format_of_string_fmtty s fmtty
1394      with Failure msg -> bad_input msg
1395    in
1396    Cons (fmt, make_scanf ib rest readers)
1397  | Format_subst (pad_opt, fmtty, rest) ->
1398    let _ = scan_caml_string (width_of_pad_opt pad_opt) ib in
1399    let s = token_string ib in
1400    let fmt, fmt' =
1401      try
1402        let Fmt_EBB fmt = fmt_ebb_of_string s in
1403        let Fmt_EBB fmt' = fmt_ebb_of_string s in
1404        (* TODO: find a way to avoid reparsing twice *)
1405
1406        (* TODO: these type-checks below *can* fail because of type
1407           ambiguity in presence of ignored-readers: "%_r%d" and "%d%_r"
1408           are typed in the same way.
1409
1410           # Scanf.sscanf "\"%_r%d\"3" "%(%d%_r%)" ignore
1411             (fun fmt n -> string_of_format fmt, n)
1412           Exception: CamlinternalFormat.Type_mismatch.
1413
1414           We should properly catch this exception.
1415        *)
1416        type_format fmt (erase_rel fmtty),
1417        type_format fmt' (erase_rel (symm fmtty))
1418      with Failure msg -> bad_input msg
1419    in
1420    Cons (Format (fmt, s),
1421          make_scanf ib (concat_fmt fmt' rest) readers)
1422
1423  | Scan_char_set (width_opt, char_set, Formatting_lit (fmting_lit, rest)) ->
1424    let stp, str = stopper_of_formatting_lit fmting_lit in
1425    let width = width_of_pad_opt width_opt in
1426    scan_chars_in_char_set char_set (Some stp) width ib;
1427    let s = token_string ib in
1428    let str_rest = String_literal (str, rest) in
1429    Cons (s, make_scanf ib str_rest readers)
1430  | Scan_char_set (width_opt, char_set, rest) ->
1431    let width = width_of_pad_opt width_opt in
1432    scan_chars_in_char_set char_set None width ib;
1433    let s = token_string ib in
1434    Cons (s, make_scanf ib rest readers)
1435  | Scan_get_counter (counter, rest) ->
1436    let count = get_counter ib counter in
1437    Cons (count, make_scanf ib rest readers)
1438  | Scan_next_char rest ->
1439    let c = Scanning.checked_peek_char ib in
1440    Cons (c, make_scanf ib rest readers)
1441
1442  | Formatting_lit (formatting_lit, rest) ->
1443    String.iter (check_char ib) (string_of_formatting_lit formatting_lit);
1444    make_scanf ib rest readers
1445  | Formatting_gen (Open_tag (Format (fmt', _)), rest) ->
1446    check_char ib '@'; check_char ib '{';
1447    make_scanf ib (concat_fmt fmt' rest) readers
1448  | Formatting_gen (Open_box (Format (fmt', _)), rest) ->
1449    check_char ib '@'; check_char ib '[';
1450    make_scanf ib (concat_fmt fmt' rest) readers
1451
1452  | Ignored_param (ign, rest) ->
1453    let Param_format_EBB fmt' = param_format_of_ignored_format ign rest in
1454    begin match make_scanf ib fmt' readers with
1455    | Cons (_, arg_rest) -> arg_rest
1456    | Nil -> assert false
1457    end
1458
1459  | End_of_format ->
1460    Nil
1461
1462(* Case analysis on padding and precision. *)
1463(* Reject formats containing "%*" or "%.*". *)
1464(* Pass padding and precision to the generic scanner `scan'. *)
1465and pad_prec_scanf : type a c d e f x y z t .
1466    Scanning.in_channel -> (a, Scanning.in_channel, c, d, e, f) fmt ->
1467      (d, e) heter_list -> (x, y) padding -> (y, z -> a) precision ->
1468      (int -> int -> Scanning.in_channel -> t) ->
1469      (Scanning.in_channel -> z) ->
1470      (x, f) heter_list =
1471fun ib fmt readers pad prec scan token -> match pad, prec with
1472  | No_padding, No_precision ->
1473    let _ = scan max_int max_int ib in
1474    let x = token ib in
1475    Cons (x, make_scanf ib fmt readers)
1476  | No_padding, Lit_precision p ->
1477    let _ = scan max_int p ib in
1478    let x = token ib in
1479    Cons (x, make_scanf ib fmt readers)
1480  | Lit_padding ((Right | Zeros), w), No_precision ->
1481    let _ = scan w max_int ib in
1482    let x = token ib in
1483    Cons (x, make_scanf ib fmt readers)
1484  | Lit_padding ((Right | Zeros), w), Lit_precision p ->
1485    let _ = scan w p ib in
1486    let x = token ib in
1487    Cons (x, make_scanf ib fmt readers)
1488  | Lit_padding (Left, _), _ ->
1489    invalid_arg "scanf: bad conversion \"%-\""
1490  | Lit_padding ((Right | Zeros), _), Arg_precision ->
1491    invalid_arg "scanf: bad conversion \"%*\""
1492  | Arg_padding _, _ ->
1493    invalid_arg "scanf: bad conversion \"%*\""
1494  | No_padding, Arg_precision ->
1495    invalid_arg "scanf: bad conversion \"%*\""
1496
1497(******************************************************************************)
1498            (* Defining [scanf] and various flavors of [scanf] *)
1499
1500type 'a kscanf_result = Args of 'a | Exc of exn
1501
1502let kscanf ib ef (Format (fmt, str)) =
1503  let rec apply : type a b . a -> (a, b) heter_list -> b =
1504    fun f args -> match args with
1505    | Cons (x, r) -> apply (f x) r
1506    | Nil -> f
1507  in
1508  let k readers f =
1509    Scanning.reset_token ib;
1510    match try Args (make_scanf ib fmt readers) with
1511      | (Scan_failure _ | Failure _ | End_of_file) as exc -> Exc exc
1512      | Invalid_argument msg ->
1513        invalid_arg (msg ^ " in format \"" ^ String.escaped str ^ "\"")
1514    with
1515      | Args args -> apply f args
1516      | Exc exc -> ef ib exc
1517  in
1518  take_format_readers k fmt
1519
1520(***)
1521
1522let kbscanf = kscanf
1523let bscanf ib fmt = kbscanf ib scanf_bad_input fmt
1524
1525let ksscanf s ef fmt = kbscanf (Scanning.from_string s) ef fmt
1526let sscanf s fmt = kbscanf (Scanning.from_string s) scanf_bad_input fmt
1527
1528let scanf fmt = kscanf Scanning.stdib scanf_bad_input fmt
1529
1530(***)
1531
1532(* Scanning format strings. *)
1533let bscanf_format :
1534  Scanning.in_channel -> ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
1535  (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g =
1536  fun ib format f ->
1537    let _ = scan_caml_string max_int ib in
1538    let str = token_string ib in
1539    let fmt' =
1540      try format_of_string_format str format
1541      with Failure msg -> bad_input msg in
1542    f fmt'
1543
1544
1545let sscanf_format :
1546  string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 ->
1547  (('a, 'b, 'c, 'd, 'e, 'f) format6 -> 'g) -> 'g =
1548  fun s format f -> bscanf_format (Scanning.from_string s) format f
1549
1550
1551let string_to_String s =
1552  let l = String.length s in
1553  let b = Buffer.create (l + 2) in
1554  Buffer.add_char b '\"';
1555  for i = 0 to l - 1 do
1556    let c = s.[i] in
1557    if c = '\"' then Buffer.add_char b '\\';
1558    Buffer.add_char b c;
1559  done;
1560  Buffer.add_char b '\"';
1561  Buffer.contents b
1562
1563
1564let format_from_string s fmt =
1565  sscanf_format (string_to_String s) fmt (fun x -> x)
1566
1567
1568let unescaped s =
1569  sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x)
1570
1571
1572(* Deprecated *)
1573let kfscanf ic ef fmt = kbscanf (Scanning.memo_from_channel ic) ef fmt
1574let fscanf ic fmt = kscanf (Scanning.memo_from_channel ic) scanf_bad_input fmt
1575