1%% ``Licensed under the Apache License, Version 2.0 (the "License");
2%% you may not use this file except in compliance with the License.
3%% You may obtain a copy of the License at
4%%
5%%     http://www.apache.org/licenses/LICENSE-2.0
6%%
7%% Unless required by applicable law or agreed to in writing, software
8%% distributed under the License is distributed on an "AS IS" BASIS,
9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10%% See the License for the specific language governing permissions and
11%% limitations under the License.
12%%
13%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
14%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
15%% AB. All Rights Reserved.''
16%%
17%%     $Id: httpd_parse.erl,v 1.1 2008/12/17 09:53:34 mikpe Exp $
18%%
19-module(httpd_parse).
20-export([
21	 request_header/1,
22	 hsplit/2,
23	 get_request_record/10,
24	 split_lines/1,
25	 tagup_header/1]).
26-include("httpd.hrl").
27
28
29%%----------------------------------------------------------------------
30%% request_header
31%%
32%% Input: The request as sent from the client (list of characters)
33%%        (may include part of the entity body)
34%%
35%% Returns:
36%%   {ok, Info#mod}
37%%   {not_implemented,Info#mod}
38%%   {bad_request, Reason}
39%%----------------------------------------------------------------------
40
41request_header(Header)->
42    [RequestLine|HeaderFields] = split_lines(Header),
43    ?DEBUG("request ->"
44	   "~n   RequestLine: ~p"
45	   "~n   Header:      ~p",[RequestLine,Header]),
46    ParsedHeader = tagup_header(HeaderFields),
47    ?DEBUG("request ->"
48	   "~n   ParseHeader: ~p",[ParsedHeader]),
49    case verify_request(string:tokens(RequestLine," ")) of
50	["HEAD", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] ->
51	    {ok, ["HEAD", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N], RequestLine,
52		 ParsedHeader]};
53	["GET", RequestURI, "HTTP/0.9"] ->
54	    {ok, ["GET", RequestURI, "HTTP/0.9", RequestLine, ParsedHeader]};
55	["GET", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] ->
56	    {ok, ["GET", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N], RequestLine,
57		 ParsedHeader]};
58	["POST", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] ->
59	    {ok, ["POST", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N], RequestLine,
60		 ParsedHeader]};
61	%%HTTP must be 1.1 or higher
62	["TRACE", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] when N>48->
63	    {ok, ["TRACE", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N], RequestLine,
64		 ParsedHeader]};
65	[Method, RequestURI] ->
66	    {not_implemented, RequestLine, Method, RequestURI,ParsedHeader,"HTTP/0.9"};
67	[Method, RequestURI, HTTPVersion] ->
68	    {not_implemented, RequestLine, Method, RequestURI,ParsedHeader, HTTPVersion};
69	{bad_request, Reason} ->
70	    {bad_request, Reason};
71	Reason ->
72	    {bad_request, "Unknown request method"}
73    end.
74
75
76
77
78
79
80%%----------------------------------------------------------------------
81%% The request is passed through the server as a record of type mod get it
82%% ----------------------------------------------------------------------
83
84get_request_record(Socket,SocketType,ConfigDB,Method,RequestURI,
85		 HTTPVersion,RequestLine,ParsedHeader,EntityBody,InitData)->
86    PersistentConn=get_persistens(HTTPVersion,ParsedHeader,ConfigDB),
87    Info=#mod{init_data=InitData,
88	      data=[],
89	      socket_type=SocketType,
90	      socket=Socket,
91	      config_db=ConfigDB,
92	      method=Method,
93	      absolute_uri=formatAbsoluteURI(RequestURI,ParsedHeader),
94	      request_uri=formatRequestUri(RequestURI),
95	      http_version=HTTPVersion,
96	      request_line=RequestLine,
97	      parsed_header=ParsedHeader,
98	      entity_body=maybe_remove_nl(ParsedHeader,EntityBody),
99	      connection=PersistentConn},
100    {ok,Info}.
101
102%%----------------------------------------------------------------------
103%% Conmtrol wheater we shall maintain a persistent connection or not
104%%----------------------------------------------------------------------
105get_persistens(HTTPVersion,ParsedHeader,ConfigDB)->
106    case httpd_util:lookup(ConfigDB,persistent_conn,true) of
107	true->
108	    case HTTPVersion of
109		%%If it is version prio to 1.1 kill the conneciton
110		[$H, $T, $T, $P, $\/, $1, $.,N] ->
111		    case httpd_util:key1search(ParsedHeader,"connection","keep-alive")of
112			%%if the connection isn't ordered to go down let it live
113			%%The keep-alive value is the older http/1.1 might be older
114			%%Clients that use it.
115			"keep-alive" when N >= 49 ->
116			    ?DEBUG("CONNECTION MODE: ~p",[true]),
117			    true;
118			"close" ->
119			    ?DEBUG("CONNECTION MODE: ~p",[false]),
120			    false;
121			Connect ->
122			    ?DEBUG("CONNECTION MODE: ~p VALUE: ~p",[false,Connect]),
123			    false
124		    end;
125		_ ->
126		    ?DEBUG("CONNECTION MODE: ~p VERSION: ~p",[false,HTTPVersion]),
127		    false
128
129	    end;
130	_ ->
131	    false
132    end.
133
134
135
136
137%%----------------------------------------------------------------------
138%% Control whether the last newline of the body is a part of the message or
139%%it is a part of the multipart message.
140%%----------------------------------------------------------------------
141maybe_remove_nl(Header,Rest) ->
142    case find_content_type(Header) of
143	false ->
144	    {ok,EntityBody,_}=regexp:sub(Rest,"\r\n\$",""),
145	    EntityBody;
146	{ok, Value} ->
147	    case string:str(Value, "multipart/form-data") of
148		0 ->
149		    {ok,EntityBody,_}=regexp:sub(Rest,"\r\n\$",""),
150		    EntityBody;
151		_ ->
152		    Rest
153	    end
154    end.
155
156%%----------------------------------------------------------------------
157%% Cet the content type of the incomming request
158%%----------------------------------------------------------------------
159
160
161find_content_type([]) ->
162    false;
163find_content_type([{Name,Value}|Tail]) ->
164    case httpd_util:to_lower(Name) of
165	"content-type" ->
166	    {ok, Value};
167	_ ->
168	    find_content_type(Tail)
169    end.
170
171%%----------------------------------------------------------------------
172%% Split the header to a list of strings where each string represents a
173%% HTTP header-field
174%%----------------------------------------------------------------------
175split_lines(Request) ->
176    split_lines(Request, [], []).
177split_lines([], CAcc, Acc) ->
178    lists:reverse([lists:reverse(CAcc)|Acc]);
179
180%%White space in the header fields are allowed but the new line must begin with LWS se
181%%rfc2616 chap 4.2. The rfc do not say what to
182split_lines([$\r, $\n, $\t |Rest], CAcc, Acc) ->
183    split_lines(Rest, [$\r, $\n |CAcc], Acc);
184
185split_lines([$\r, $\n, $\s |Rest], CAcc, Acc) ->
186    split_lines(Rest, [$\r, $\n |CAcc], Acc);
187
188split_lines([$\r, $\n|Rest], CAcc, Acc) ->
189    split_lines(Rest, [], [lists:reverse(CAcc)|Acc]);
190split_lines([Chr|Rest], CAcc, Acc) ->
191    split_lines(Rest, [Chr|CAcc], Acc).
192
193
194%%----------------------------------------------------------------------
195%% This is a 'hack' to stop people from trying to access directories/files
196%% relative to the ServerRoot.
197%%----------------------------------------------------------------------
198
199
200verify_request([Request, RequestURI]) ->
201    verify_request([Request, RequestURI, "HTTP/0.9"]);
202
203verify_request([Request, RequestURI, Protocol]) ->
204    NewRequestURI =
205	case string:str(RequestURI, "?") of
206	    0 ->
207		RequestURI;
208	    Ndx ->
209		string:left(RequestURI, Ndx)
210	end,
211   case string:str(NewRequestURI, "..") of
212	0 ->
213	    [Request, RequestURI, Protocol];
214	_ ->
215	    {bad_request, {forbidden, RequestURI}}
216    end;
217verify_request(Request) ->
218    Request.
219
220%%----------------------------------------------------------------------
221%% tagup_header
222%%
223%% Parses the header of a HTTP request and returns a key,value tuple
224%% list containing Name and Value of each header directive as of:
225%%
226%% Content-Type: multipart/mixed -> {"Content-Type", "multipart/mixed"}
227%%
228%% But in http/1.1 the field-names are case insencitive so now it must be
229%% Content-Type: multipart/mixed -> {"content-type", "multipart/mixed"}
230%% The standard furthermore says that leading and traling white space
231%% is not a part of the fieldvalue and shall therefore be removed.
232%%----------------------------------------------------------------------
233
234tagup_header([]) ->          [];
235tagup_header([Line|Rest]) -> [tag(Line, [])|tagup_header(Rest)].
236
237tag([], Tag) ->
238    {httpd_util:to_lower(lists:reverse(Tag)), ""};
239tag([$:|Rest], Tag) ->
240    {httpd_util:to_lower(lists:reverse(Tag)), httpd_util:strip(Rest)};
241tag([Chr|Rest], Tag) ->
242    tag(Rest, [Chr|Tag]).
243
244
245%%----------------------------------------------------------------------
246%% There are 3 possible forms of the reuqest URI
247%%
248%%  1. * When the request is not for a special assset. is is instead
249%%     to the server itself
250%%
251%%  2. absoluteURI the whole servername port and asset is in the request
252%%
253%%  3. The most common form that http/1.0 used abs path that is a path
254%%     to the requested asset.
255%5----------------------------------------------------------------------
256formatRequestUri("*")->
257    "*";
258formatRequestUri([$h,$t,$t,$p,$:,$\/,$\/|ServerAndPath]) ->
259   removeServer(ServerAndPath);
260
261formatRequestUri([$H,$T,$T,$P,$:,$\/,$\/|ServerAndPath]) ->
262    removeServer(ServerAndPath);
263
264formatRequestUri(ABSPath) ->
265    ABSPath.
266
267removeServer([$\/|Url])->
268    case Url of
269	[]->
270	    "/";
271        _->
272	    [$\/|Url]
273    end;
274removeServer([N|Url]) ->
275    removeServer(Url).
276
277
278formatAbsoluteURI([$h,$t,$t,$p,$:,$\/,$\/|Uri],ParsedHeader)->
279    [$H,$T,$T,$P,$:,$\/,$\/|Uri];
280
281formatAbsoluteURI([$H,$T,$T,$P,$:,$\/,$\/|Uri],ParsedHeader)->
282    [$H,$T,$T,$P,$:,$\/,$\/|Uri];
283
284formatAbsoluteURI(Uri,ParsedHeader)->
285    case httpd_util:key1search(ParsedHeader,"host") of
286	undefined ->
287	    nohost;
288	Host ->
289	    Host++Uri
290    end.
291%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
292%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
293%%Code below is crap from an older version shall be removed when
294%%transformation to http/1.1 is finished
295%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
296%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
297
298
299
300%request(Request) ->
301%    ?DEBUG("request -> entry with:"
302%	   "~n   Request: ~s",[Request]),
303 %   {BeforeEntityBody, Rest} = hsplit([], Request),
304 %   ?DEBUG("request ->"
305%	   "~n   BeforeEntityBody: ~p"
306%	   "~n   Rest:             ~p",[BeforeEntityBody, Rest]),
307%    [RequestLine|Header] = split_lines(BeforeEntityBody),
308%    ?DEBUG("request ->"
309%	   "~n   RequestLine: ~p"
310%	   "~n   Header:      ~p",[RequestLine,Header]),
311%    ParsedHeader = tagup_header(Header),
312%    ?DEBUG("request ->"
313%	   "~n   ParseHeader: ~p",[ParsedHeader]),
314%    EntityBody = maybe_remove_nl(ParsedHeader,Rest),
315%    ?DEBUG("request ->"
316%	   "~n   EntityBody: ~p",[EntityBody]),
317%    case verify_request(string:tokens(RequestLine," ")) of
318%	["HEAD", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] ->
319%	    {ok, ["HEAD", formatRequestUri(RequestURI), [$H,$T,$T,$P,$/,$1,$.,N], RequestLine,
320%		 ParsedHeader, EntityBody]};
321%	["GET", RequestURI, "HTTP/0.9"] ->
322%	    {ok, ["GET", RequestURI, "HTTP/0.9", RequestLine, ParsedHeader,
323%		 EntityBody]};
324%	["GET", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] ->
325%	    {ok, ["GET", formatRequestUri(RequestURI), [$H,$T,$T,$P,$/,$1,$.,N], RequestLine,
326%		 ParsedHeader,EntityBody]};
327%%	["POST", RequestURI, [$H,$T,$T,$P,$/,$1,$.,N]] ->
328%	    {ok, ["POST", formatRequestUri(RequestURI), [$H,$T,$T,$P,$/,$1,$.,N], RequestLine,
329%		 ParsedHeader, EntityBody]};
330%	[Method, RequestURI] ->
331%	    {not_implemented, RequestLine, Method, RequestURI,ParsedHeader,"HTTP/0.9"};
332%	[Method, RequestURI, HTTPVersion] ->
333%	    {not_implemented, RequestLine, Method, RequestURI,ParsedHeader, HTTPVersion};
334%	{bad_request, Reason} ->
335%	    {bad_request, Reason};
336%	Reason ->
337%	    {bad_request, "Unknown request method"}
338%    end.
339
340hsplit(Accu,[]) ->
341    {lists:reverse(Accu), []};
342hsplit(Accu, [ $\r, $\n, $\r, $\n | Tail]) ->
343    {lists:reverse(Accu), Tail};
344hsplit(Accu, [H|T]) ->
345    hsplit([H|Accu],T).
346