1(* $Id: unescape.ml,v 1.1.1.1 2003/08/10 20:12:28 yori Exp $ *)
2(* Copyright 2003 Yamagata Yoriyuki *)
3
4open Camomile
5
6exception Error of string
7
8let () = Get_enc.setlocale ""
9
10let enc =
11  let enc = ref None in
12  Arg.parse
13    ["--encoding", Arg.String (fun s -> enc := Some s), "Encoding"]
14    (fun _ -> raise (Arg.Bad "Too many arguments"))
15    "Usage: unescape [--encoding enc] < input > output";
16  let s = match !enc with Some s -> s | None -> Get_enc.get_enc () in
17  CharEncoding.of_name s
18
19class ['a] unget (c : 'a obj_input_channel) =
20  let q = Queue.create () in
21  object
22    method get =
23      try Queue.take q with Queue.Empty -> c#get
24    method unget x : unit = Queue.add x q
25    method close : unit = Queue.clear q; c#close
26  end
27
28let src = new unget (new CharEncoding.in_channel enc stdin)
29let dst = new CharEncoding.out_channel enc stdout
30
31let char_u = Char.code 'u'
32let char_U = Char.code 'U'
33let char_0 = Char.code '0'
34let char_9 = Char.code '9'
35let char_a = Char.code 'a'
36let char_f = Char.code 'f'
37let char_A = Char.code 'A'
38let char_F = Char.code 'F'
39let char_bs = Char.code '\\'
40
41let count = ref 0
42
43let rec read_escape m c =
44  if c = 0 then dst#put (uchar_of_int m) else begin
45    incr count;
46    let n = int_of_uchar src#get in
47    if n >= char_0 && n <= char_9 then
48      read_escape (m lsl 4 lor (n - char_0)) (c - 1)
49    else if n >= char_a && n <= char_f then
50      read_escape (m lsl 4 lor (n - char_a + 10)) (c - 1)
51    else if n >= char_A && n <= char_F then
52      read_escape (m lsl 4 lor (n - char_A + 10)) (c - 1)
53    else begin
54      dst#put (uchar_of_int m);
55      src#unget (uchar_of_int n)
56    end
57  end
58
59let () =
60  try while true do
61    incr count;
62    let u = src#get in
63    let n = int_of_uchar u in
64    if n = char_bs then begin
65      incr count;
66      let u = src#get in
67      let n = int_of_uchar u in
68      if n = char_bs then dst#put (uchar_of_int char_bs) else
69      if n = char_u then read_escape 0 4 else
70      if n = char_U then read_escape 0 8 else
71      raise (Error "Broken escape notation")
72    end else
73      dst#put u
74  done; assert false with
75    Error s ->
76      Printf.eprintf "Error \"%s\" occurs in the location %d\n" s !count;
77      exit 1
78  | CharEncoding.Malformed_code ->
79      Printf.eprintf "Malformed code in the location %d\n" !count;
80      exit 1
81  | CharEncoding.Out_of_range ->
82      Printf.eprintf "Out of range character in the location %d\n" !count;
83      exit 1
84  | End_of_file ->
85      dst#close;
86      exit 0
87