1(***********************************************************************) 2(* *) 3(* HEVEA *) 4(* Luc Maranget, projet PARA, INRIA Rocquencourt *) 5(* *) 6(* Copyright 2012 Institut National de Recherche en Informatique et *) 7(* en Automatique. Distributed only by permission. *) 8(* *) 9(* *) 10(***********************************************************************) 11 12(* URL encoding and decoding, As the issue is still pending, apply to fragment only! *) 13 14{ 15 open Printf 16 17 type url = 18 { 19 scheme : string option ; 20 authority : string option ; 21 path : string ; 22 query : string option ; 23 fragment : string option ; 24 } 25 26 exception Error 27} 28 29let hex = ['0'-'9''A'-'F''a'-'f'] 30 31rule break = parse 32| 33([^':''/''?''#']+ as scheme ':') ? 34("//" ([^'/''?''#']* as authority)) ? 35([^'?''#']* as path) 36('?' [^'#']* as query)? 37('#' (_* as fragment))? 38{ {scheme; authority; path; query; fragment;} } 39| "" { raise Error } 40 41and do_decode putc = parse 42| '%' (hex as a) (hex as b) 43 { let n = 44 try int_of_string (sprintf "0x%c%c" a b) with _ -> assert false in 45 putc (Char.chr n) ; 46 do_decode putc lexbuf } 47| _ as c { putc c ; do_decode putc lexbuf } 48| eof { () } 49 50{ 51(* See 52http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding/#Thereservedcharactersarenotwhatyouthinktheyare 53*) 54 let do_encode_fragment putc put c = match c with 55 | 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '~' | '.' 56 -> putc c 57 | _ -> put (sprintf "%%%02X" (Char.code c)) 58 59 let do_encode putc put specific u = 60 let len = String.length u in 61 for k =0 to len-1 do 62 let c = String.unsafe_get u k in 63 specific putc put c 64 done 65 66 let apply putc put f u = 67 begin match u.scheme with 68 | None -> () 69 | Some s -> f s ; putc ':' 70 end ; 71 begin match u.authority with 72 | None -> () 73 | Some s -> put "//" ; f s 74 end ; 75 f u.path ; 76 begin match u.query with 77 | None -> () 78 | Some s -> putc '?' ; f s 79 end ; 80 begin match u.fragment with 81 | None -> () 82 | Some s -> putc '#' ; f s 83 end ; 84 () 85 86 let _encode putc put u = 87 let u = break (MyLexing.from_string u) in 88 apply putc put (do_encode putc put do_encode_fragment) u 89 90 let _decode putc put u = 91 let u = break (MyLexing.from_string u) in 92 let do_decode s = do_decode putc (MyLexing.from_string s) in 93 apply putc put do_decode u 94 95 96 let encode_fragment putc put u = 97 do_encode putc put do_encode_fragment u 98 99 let decode_fragment putc _put u = do_decode putc (MyLexing.from_string u) 100} 101