1%% ``Licensed under the Apache License, Version 2.0 (the "License"); 2%% you may not use this file except in compliance with the License. 3%% You may obtain a copy of the License at 4%% 5%% http://www.apache.org/licenses/LICENSE-2.0 6%% 7%% Unless required by applicable law or agreed to in writing, software 8%% distributed under the License is distributed on an "AS IS" BASIS, 9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10%% See the License for the specific language governing permissions and 11%% limitations under the License. 12%% 13%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. 14%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings 15%% AB. All Rights Reserved.'' 16%% 17%% $Id: httpd_parse.erl,v 1.1 2008/12/17 09:53:34 mikpe Exp $ 18%% 19-module(httpd_parse). 20-export([ 21 request_header/1, 22 hsplit/2, 23 get_request_record/10, 24 split_lines/1, 25 tagup_header/1]). 26-include("httpd.hrl"). 27 28 29%%---------------------------------------------------------------------- 30%% request_header 31%% 32%% Input: The request as sent from the client (list of characters) 33%% (may include part of the entity body) 34%% 35%% Returns: 36%% {ok, Info#mod} 37%% {not_implemented,Info#mod} 38%% {bad_request, Reason} 39%%---------------------------------------------------------------------- 40 41request_header(Header)-> 42 [RequestLine|HeaderFields] = split_lines(Header), 43 ?DEBUG("request ->" 44 "~n RequestLine: ~p" 45 "~n Header: ~p",[RequestLine,Header]), 46 ParsedHeader = tagup_header(HeaderFields), 47 ?DEBUG("request ->" 48 "~n ParseHeader: ~p",[ParsedHeader]), 49 case verify_request(string:tokens(RequestLine," ")) of 50 ["HEAD", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] -> 51 {ok, ["HEAD", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N], RequestLine, 52 ParsedHeader]}; 53 ["GET", RequestURI, "HTTP/0.9"] -> 54 {ok, ["GET", RequestURI, "HTTP/0.9", RequestLine, ParsedHeader]}; 55 ["GET", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] -> 56 {ok, ["GET", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N], RequestLine, 57 ParsedHeader]}; 58 ["POST", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] -> 59 {ok, ["POST", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N], RequestLine, 60 ParsedHeader]}; 61 %%HTTP must be 1.1 or higher 62 ["TRACE", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] when N>48-> 63 {ok, ["TRACE", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N], RequestLine, 64 ParsedHeader]}; 65 [Method, RequestURI] -> 66 {not_implemented, RequestLine, Method, RequestURI,ParsedHeader,"HTTP/0.9"}; 67 [Method, RequestURI, HTTPVersion] -> 68 {not_implemented, RequestLine, Method, RequestURI,ParsedHeader, HTTPVersion}; 69 {bad_request, Reason} -> 70 {bad_request, Reason}; 71 Reason -> 72 {bad_request, "Unknown request method"} 73 end. 74 75 76 77 78 79 80%%---------------------------------------------------------------------- 81%% The request is passed through the server as a record of type mod get it 82%% ---------------------------------------------------------------------- 83 84get_request_record(Socket,SocketType,ConfigDB,Method,RequestURI, 85 HTTPVersion,RequestLine,ParsedHeader,EntityBody,InitData)-> 86 PersistentConn=get_persistens(HTTPVersion,ParsedHeader,ConfigDB), 87 Info=#mod{init_data=InitData, 88 data=[], 89 socket_type=SocketType, 90 socket=Socket, 91 config_db=ConfigDB, 92 method=Method, 93 absolute_uri=formatAbsoluteURI(RequestURI,ParsedHeader), 94 request_uri=formatRequestUri(RequestURI), 95 http_version=HTTPVersion, 96 request_line=RequestLine, 97 parsed_header=ParsedHeader, 98 entity_body=maybe_remove_nl(ParsedHeader,EntityBody), 99 connection=PersistentConn}, 100 {ok,Info}. 101 102%%---------------------------------------------------------------------- 103%% Conmtrol wheater we shall maintain a persistent connection or not 104%%---------------------------------------------------------------------- 105get_persistens(HTTPVersion,ParsedHeader,ConfigDB)-> 106 case httpd_util:lookup(ConfigDB,persistent_conn,true) of 107 true-> 108 case HTTPVersion of 109 %%If it is version prio to 1.1 kill the conneciton 110 [$H, $T, $T, $P, $\/, $1, $.,N] -> 111 case httpd_util:key1search(ParsedHeader,"connection","keep-alive")of 112 %%if the connection isn't ordered to go down let it live 113 %%The keep-alive value is the older http/1.1 might be older 114 %%Clients that use it. 115 "keep-alive" when N >= 49 -> 116 ?DEBUG("CONNECTION MODE: ~p",[true]), 117 true; 118 "close" -> 119 ?DEBUG("CONNECTION MODE: ~p",[false]), 120 false; 121 Connect -> 122 ?DEBUG("CONNECTION MODE: ~p VALUE: ~p",[false,Connect]), 123 false 124 end; 125 _ -> 126 ?DEBUG("CONNECTION MODE: ~p VERSION: ~p",[false,HTTPVersion]), 127 false 128 129 end; 130 _ -> 131 false 132 end. 133 134 135 136 137%%---------------------------------------------------------------------- 138%% Control whether the last newline of the body is a part of the message or 139%%it is a part of the multipart message. 140%%---------------------------------------------------------------------- 141maybe_remove_nl(Header,Rest) -> 142 case find_content_type(Header) of 143 false -> 144 {ok,EntityBody,_}=regexp:sub(Rest,"\r\n\$",""), 145 EntityBody; 146 {ok, Value} -> 147 case string:str(Value, "multipart/form-data") of 148 0 -> 149 {ok,EntityBody,_}=regexp:sub(Rest,"\r\n\$",""), 150 EntityBody; 151 _ -> 152 Rest 153 end 154 end. 155 156%%---------------------------------------------------------------------- 157%% Cet the content type of the incomming request 158%%---------------------------------------------------------------------- 159 160 161find_content_type([]) -> 162 false; 163find_content_type([{Name,Value}|Tail]) -> 164 case httpd_util:to_lower(Name) of 165 "content-type" -> 166 {ok, Value}; 167 _ -> 168 find_content_type(Tail) 169 end. 170 171%%---------------------------------------------------------------------- 172%% Split the header to a list of strings where each string represents a 173%% HTTP header-field 174%%---------------------------------------------------------------------- 175split_lines(Request) -> 176 split_lines(Request, [], []). 177split_lines([], CAcc, Acc) -> 178 lists:reverse([lists:reverse(CAcc)|Acc]); 179 180%%White space in the header fields are allowed but the new line must begin with LWS se 181%%rfc2616 chap 4.2. The rfc do not say what to 182split_lines([$\r, $\n, $\t |Rest], CAcc, Acc) -> 183 split_lines(Rest, [$\r, $\n |CAcc], Acc); 184 185split_lines([$\r, $\n, $\s |Rest], CAcc, Acc) -> 186 split_lines(Rest, [$\r, $\n |CAcc], Acc); 187 188split_lines([$\r, $\n|Rest], CAcc, Acc) -> 189 split_lines(Rest, [], [lists:reverse(CAcc)|Acc]); 190split_lines([Chr|Rest], CAcc, Acc) -> 191 split_lines(Rest, [Chr|CAcc], Acc). 192 193 194%%---------------------------------------------------------------------- 195%% This is a 'hack' to stop people from trying to access directories/files 196%% relative to the ServerRoot. 197%%---------------------------------------------------------------------- 198 199 200verify_request([Request, RequestURI]) -> 201 verify_request([Request, RequestURI, "HTTP/0.9"]); 202 203verify_request([Request, RequestURI, Protocol]) -> 204 NewRequestURI = 205 case string:str(RequestURI, "?") of 206 0 -> 207 RequestURI; 208 Ndx -> 209 string:left(RequestURI, Ndx) 210 end, 211 case string:str(NewRequestURI, "..") of 212 0 -> 213 [Request, RequestURI, Protocol]; 214 _ -> 215 {bad_request, {forbidden, RequestURI}} 216 end; 217verify_request(Request) -> 218 Request. 219 220%%---------------------------------------------------------------------- 221%% tagup_header 222%% 223%% Parses the header of a HTTP request and returns a key,value tuple 224%% list containing Name and Value of each header directive as of: 225%% 226%% Content-Type: multipart/mixed -> {"Content-Type", "multipart/mixed"} 227%% 228%% But in http/1.1 the field-names are case insencitive so now it must be 229%% Content-Type: multipart/mixed -> {"content-type", "multipart/mixed"} 230%% The standard furthermore says that leading and traling white space 231%% is not a part of the fieldvalue and shall therefore be removed. 232%%---------------------------------------------------------------------- 233 234tagup_header([]) -> []; 235tagup_header([Line|Rest]) -> [tag(Line, [])|tagup_header(Rest)]. 236 237tag([], Tag) -> 238 {httpd_util:to_lower(lists:reverse(Tag)), ""}; 239tag([$:|Rest], Tag) -> 240 {httpd_util:to_lower(lists:reverse(Tag)), httpd_util:strip(Rest)}; 241tag([Chr|Rest], Tag) -> 242 tag(Rest, [Chr|Tag]). 243 244 245%%---------------------------------------------------------------------- 246%% There are 3 possible forms of the reuqest URI 247%% 248%% 1. * When the request is not for a special assset. is is instead 249%% to the server itself 250%% 251%% 2. absoluteURI the whole servername port and asset is in the request 252%% 253%% 3. The most common form that http/1.0 used abs path that is a path 254%% to the requested asset. 255%5---------------------------------------------------------------------- 256formatRequestUri("*")-> 257 "*"; 258formatRequestUri([$h,$t,$t,$p,$:,$\/,$\/|ServerAndPath]) -> 259 removeServer(ServerAndPath); 260 261formatRequestUri([$H,$T,$T,$P,$:,$\/,$\/|ServerAndPath]) -> 262 removeServer(ServerAndPath); 263 264formatRequestUri(ABSPath) -> 265 ABSPath. 266 267removeServer([$\/|Url])-> 268 case Url of 269 []-> 270 "/"; 271 _-> 272 [$\/|Url] 273 end; 274removeServer([N|Url]) -> 275 removeServer(Url). 276 277 278formatAbsoluteURI([$h,$t,$t,$p,$:,$\/,$\/|Uri],ParsedHeader)-> 279 [$H,$T,$T,$P,$:,$\/,$\/|Uri]; 280 281formatAbsoluteURI([$H,$T,$T,$P,$:,$\/,$\/|Uri],ParsedHeader)-> 282 [$H,$T,$T,$P,$:,$\/,$\/|Uri]; 283 284formatAbsoluteURI(Uri,ParsedHeader)-> 285 case httpd_util:key1search(ParsedHeader,"host") of 286 undefined -> 287 nohost; 288 Host -> 289 Host++Uri 290 end. 291%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 292%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 293%%Code below is crap from an older version shall be removed when 294%%transformation to http/1.1 is finished 295%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 296%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 297 298 299 300%request(Request) -> 301% ?DEBUG("request -> entry with:" 302% "~n Request: ~s",[Request]), 303 % {BeforeEntityBody, Rest} = hsplit([], Request), 304 % ?DEBUG("request ->" 305% "~n BeforeEntityBody: ~p" 306% "~n Rest: ~p",[BeforeEntityBody, Rest]), 307% [RequestLine|Header] = split_lines(BeforeEntityBody), 308% ?DEBUG("request ->" 309% "~n RequestLine: ~p" 310% "~n Header: ~p",[RequestLine,Header]), 311% ParsedHeader = tagup_header(Header), 312% ?DEBUG("request ->" 313% "~n ParseHeader: ~p",[ParsedHeader]), 314% EntityBody = maybe_remove_nl(ParsedHeader,Rest), 315% ?DEBUG("request ->" 316% "~n EntityBody: ~p",[EntityBody]), 317% case verify_request(string:tokens(RequestLine," ")) of 318% ["HEAD", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] -> 319% {ok, ["HEAD", formatRequestUri(RequestURI), [$H,$T,$T,$P,$/,$1,$.,N], RequestLine, 320% ParsedHeader, EntityBody]}; 321% ["GET", RequestURI, "HTTP/0.9"] -> 322% {ok, ["GET", RequestURI, "HTTP/0.9", RequestLine, ParsedHeader, 323% EntityBody]}; 324% ["GET", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] -> 325% {ok, ["GET", formatRequestUri(RequestURI), [$H,$T,$T,$P,$/,$1,$.,N], RequestLine, 326% ParsedHeader,EntityBody]}; 327%% ["POST", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] -> 328% {ok, ["POST", formatRequestUri(RequestURI), [$H,$T,$T,$P,$/,$1,$.,N], RequestLine, 329% ParsedHeader, EntityBody]}; 330% [Method, RequestURI] -> 331% {not_implemented, RequestLine, Method, RequestURI,ParsedHeader,"HTTP/0.9"}; 332% [Method, RequestURI, HTTPVersion] -> 333% {not_implemented, RequestLine, Method, RequestURI,ParsedHeader, HTTPVersion}; 334% {bad_request, Reason} -> 335% {bad_request, Reason}; 336% Reason -> 337% {bad_request, "Unknown request method"} 338% end. 339 340hsplit(Accu,[]) -> 341 {lists:reverse(Accu), []}; 342hsplit(Accu, [ $\r, $\n, $\r, $\n | Tail]) -> 343 {lists:reverse(Accu), Tail}; 344hsplit(Accu, [H|T]) -> 345 hsplit([H|Accu],T). 346