1(* $Id: unescape.ml,v 1.1.1.1 2003/08/10 20:12:28 yori Exp $ *) 2(* Copyright 2003 Yamagata Yoriyuki *) 3 4open Camomile 5 6exception Error of string 7 8let () = Get_enc.setlocale "" 9 10let enc = 11 let enc = ref None in 12 Arg.parse 13 ["--encoding", Arg.String (fun s -> enc := Some s), "Encoding"] 14 (fun _ -> raise (Arg.Bad "Too many arguments")) 15 "Usage: unescape [--encoding enc] < input > output"; 16 let s = match !enc with Some s -> s | None -> Get_enc.get_enc () in 17 CharEncoding.of_name s 18 19class ['a] unget (c : 'a obj_input_channel) = 20 let q = Queue.create () in 21 object 22 method get = 23 try Queue.take q with Queue.Empty -> c#get 24 method unget x : unit = Queue.add x q 25 method close : unit = Queue.clear q; c#close 26 end 27 28let src = new unget (new CharEncoding.in_channel enc stdin) 29let dst = new CharEncoding.out_channel enc stdout 30 31let char_u = Char.code 'u' 32let char_U = Char.code 'U' 33let char_0 = Char.code '0' 34let char_9 = Char.code '9' 35let char_a = Char.code 'a' 36let char_f = Char.code 'f' 37let char_A = Char.code 'A' 38let char_F = Char.code 'F' 39let char_bs = Char.code '\\' 40 41let count = ref 0 42 43let rec read_escape m c = 44 if c = 0 then dst#put (uchar_of_int m) else begin 45 incr count; 46 let n = int_of_uchar src#get in 47 if n >= char_0 && n <= char_9 then 48 read_escape (m lsl 4 lor (n - char_0)) (c - 1) 49 else if n >= char_a && n <= char_f then 50 read_escape (m lsl 4 lor (n - char_a + 10)) (c - 1) 51 else if n >= char_A && n <= char_F then 52 read_escape (m lsl 4 lor (n - char_A + 10)) (c - 1) 53 else begin 54 dst#put (uchar_of_int m); 55 src#unget (uchar_of_int n) 56 end 57 end 58 59let () = 60 try while true do 61 incr count; 62 let u = src#get in 63 let n = int_of_uchar u in 64 if n = char_bs then begin 65 incr count; 66 let u = src#get in 67 let n = int_of_uchar u in 68 if n = char_bs then dst#put (uchar_of_int char_bs) else 69 if n = char_u then read_escape 0 4 else 70 if n = char_U then read_escape 0 8 else 71 raise (Error "Broken escape notation") 72 end else 73 dst#put u 74 done; assert false with 75 Error s -> 76 Printf.eprintf "Error \"%s\" occurs in the location %d\n" s !count; 77 exit 1 78 | CharEncoding.Malformed_code -> 79 Printf.eprintf "Malformed code in the location %d\n" !count; 80 exit 1 81 | CharEncoding.Out_of_range -> 82 Printf.eprintf "Out of range character in the location %d\n" !count; 83 exit 1 84 | End_of_file -> 85 dst#close; 86 exit 0 87