1(***********************************************************************)
2(*                                                                     *)
3(*                          HEVEA                                      *)
4(*  Luc Maranget, projet PARA, INRIA Rocquencourt                      *)
5(*                                                                     *)
6(*  Copyright 2012 Institut National de Recherche en Informatique et   *)
7(*  en Automatique.  Distributed only by permission.                   *)
8(*                                                                     *)
9(*                                                                     *)
10(***********************************************************************)
11
12(* URL encoding and decoding, As the issue is still pending, apply to fragment only! *)
13
14{
15 open Printf
16
17 type url =
18 {
19  scheme : string option ;
20  authority : string option ;
21  path : string ;
22  query : string option ;
23  fragment : string option ;
24 }
25
26 exception Error
27}
28
29let hex = ['0'-'9''A'-'F''a'-'f']
30
31rule break = parse
32|
33([^':''/''?''#']+ as scheme ':') ?
34("//" ([^'/''?''#']* as authority)) ?
35([^'?''#']* as path)
36('?' [^'#']* as query)?
37('#' (_* as fragment))?
38{ {scheme; authority; path; query; fragment;} }
39| "" { raise Error }
40
41and do_decode putc = parse
42| '%' (hex as a) (hex as b)
43  { let n =
44    try int_of_string (sprintf "0x%c%c" a b) with _ -> assert false in
45  putc (Char.chr n) ;
46  do_decode putc lexbuf }
47| _ as c { putc c ; do_decode putc lexbuf }
48| eof    { () }
49
50{
51(* See
52http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding/#Thereservedcharactersarenotwhatyouthinktheyare
53*)
54 let do_encode_fragment putc put c =  match c with
55 |  'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '~' | '.'
56   -> putc c
57 | _ -> put (sprintf "%%%02X" (Char.code c))
58
59 let do_encode putc put specific u =
60   let len = String.length u in
61   for k =0 to len-1 do
62     let c = String.unsafe_get u k in
63     specific putc put c
64   done
65
66 let apply putc put f u =
67   begin match u.scheme with
68   | None -> ()
69   | Some s -> f s ; putc ':'
70   end ;
71   begin match u.authority with
72   | None -> ()
73   | Some s -> put "//" ; f s
74   end ;
75   f u.path ;
76   begin match u.query with
77   | None -> ()
78   | Some s -> putc '?' ; f s
79   end ;
80   begin match u.fragment with
81   | None -> ()
82   | Some s -> putc '#' ; f s
83   end ;
84   ()
85
86 let _encode putc put u =
87   let u = break (MyLexing.from_string u) in
88   apply putc put (do_encode putc put do_encode_fragment) u
89
90 let _decode putc put u =
91   let u = break (MyLexing.from_string u) in
92   let do_decode s = do_decode putc (MyLexing.from_string s) in
93   apply putc put do_decode u
94
95
96  let encode_fragment putc put u =
97    do_encode putc put do_encode_fragment u
98
99  let decode_fragment putc _put u = do_decode putc (MyLexing.from_string u)
100}
101