1/*  $Id$
2
3    Part of SWI-Prolog
4
5    Author:        Jan Wielemaker
6    E-mail:        J.Wielemaker@cs.vu.nl
7    WWW:           http://www.swi-prolog.org
8    Copyright (C): 1985-2010, University of Amsterdam, VU University Amsterdam
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License
12    as published by the Free Software Foundation; either version 2
13    of the License, or (at your option) any later version.
14
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23
24    As a special exception, if you link this library with other files,
25    compiled with a Free Software compiler, to produce an executable, this
26    library does not by itself cause the resulting executable to be covered
27    by the GNU General Public License. This exception does not however
28    invalidate any other reasons why the executable file might be covered by
29    the GNU General Public License.
30*/
31
32
33:- module(http_client,
34	  [ http_get/3,			% +URL, -Reply, +Options
35	    http_delete/3,		% +URL, -Reply, +Options
36	    http_post/4,		% +URL, +In, -Reply, +Options
37	    http_put/4,			% +URL, +In, -Reply, +Options
38	    http_read_data/3,		% +Header, -Data, +Options
39	    http_disconnect/1		% +What
40	  ]).
41:- use_module(library(socket)).
42:- use_module(library(url)).
43:- use_module(http_header).
44:- use_module(http_stream).
45:- use_module(library(debug)).
46:- use_module(library(memfile)).
47:- use_module(library(lists)).
48:- use_module(library(error)).
49:- use_module(library(option)).
50:- use_module(http_stream).
51:- use_module(dcg_basics).
52
53:- multifile
54	http_convert_data/4,		% http_read_data plugin-hook
55	post_data_hook/3,		% http_post_data/3 hook
56	open_connection/4,		% do_connect/5 hook
57	close_connection/4.
58
59%%	open_connection(+Scheme, +Address, -In, -Out) is semidet.
60%
61%	Hook to open a connection for the  given URL-scheme to the given
62%	address. If successful, In and  Out   must  be  two valid Prolog
63%	streams that connect to the server.
64%
65%	@param Scheme is the URL schema (=http= or =https=)
66%	@param Address is a term Host:Port as used by tcp_connect/4.
67
68%%	close_connection(+Scheme, +Address, +In, +Out) is semidet.
69%
70%	Hook to close a specific connection.   If the hook succeeds, the
71%	HTTP client assumes that In and Out are no longer to be used. If
72%	the hook fails, the client calls close/2 on both streams.
73
74:- dynamic
75	connection/5.			% Host:Port, Protocol, Thread, In, Out
76
77:- expects_dialect(swi).
78:- assert(system:swi_io).
79
80user_agent('SWI-Prolog (http://www.swi-prolog.org)').
81
82%%	connect(+UrlParts, -Read, -Write, +Options) is det.
83%%	disconnect(+UrlParts) is det.
84%
85%	Connect/disconnect on the basis of a parsed URL.
86
87connect(Parts, Read, Write, _) :-
88	memberchk(socket(Read, Write), Parts), !.
89connect(Parts, Read, Write, Options) :-
90	address(Parts, Address, Protocol, Options),
91	with_mutex(http_client_connect,
92		   connect2(Address, Protocol, Read, Write, Options)).
93
94connect2(Address, Protocol, In, Out, _) :-
95	thread_self(Self),
96	connection(Address, Protocol, Self, In, Out), !.
97connect2(Address, Protocol, In, Out, Options) :-
98	thread_self(Self),
99	do_connect(Address, Protocol, In, Out, Options),
100	assert(connection(Address, Protocol, Self, In, Out)).
101
102do_connect(Address, Protocol, In, Out, Options) :-
103	debug(http(client), 'http_client: Connecting to ~p ...', [Address]),
104	(   open_connection(Protocol, Address, In, Out)
105	->  true
106	;   tcp_socket(Socket),
107	    catch(tcp_connect(Socket, Address, In, Out),
108		  E,
109		  (   tcp_close_socket(Socket),
110		      throw(E)
111		  ))
112	),
113	debug(http(client), '\tok ~p --> ~p', [In, Out]),
114	(   memberchk(timeout(Timeout), Options)
115        ->  set_stream(In, timeout(Timeout))
116        ;   true
117	), !.
118do_connect(Address, _, _, _, _) :-		% can this happen!?
119	throw(error(failed(connect, Address), _)).
120
121
122disconnect(Parts) :-
123	protocol(Parts, Protocol),
124	address(Parts, Protocol, Address, []), !,
125	disconnect(Address, Protocol).
126
127disconnect(Address, Protocol) :-
128	with_mutex(http_client_connect,
129		   disconnect_locked(Address, Protocol)).
130
131disconnect_locked(Address, Protocol) :-
132	thread_self(Me),
133	debug(connection, '~w: Closing connection to ~w~n', [Me, Address]),
134	thread_self(Self),
135	retract(connection(Address, Protocol, Self, In, Out)), !,
136	disconnect(Protocol, Address, In, Out).
137
138disconnect(Protocol, Address, In, Out) :-
139	close_connection(Protocol, Address, In, Out), !.
140disconnect(_, _, In, Out) :-
141	close(Out, [force(true)]),
142	close(In,  [force(true)]).
143
144%%	http_disconnect(+Connections) is det.
145%
146%	Close down some connections. Currently Connections must have the
147%	value =all=, closing all connections.
148
149http_disconnect(all) :-
150	(   thread_self(Self),
151	    connection(Address, Protocol, Self, _, _),
152	    disconnect(Address, Protocol),
153	    fail
154	;   true
155	).
156
157address(_Parts, Host:Port, Protocol, Options) :-
158	(   memberchk(proxy(Host, Port, Protocol), Options)
159	->  true
160	;   memberchk(proxy(Host, Port), Options),
161	    Protocol = http
162	).
163address(Parts, Host:Port, Protocol, _Options) :-
164	memberchk(host(Host), Parts),
165	port(Parts, Port),
166	protocol(Parts, Protocol).
167
168port(Parts, Port) :-
169	memberchk(port(Port), Parts), !.
170port(Parts, 80) :-
171	memberchk(protocol(http), Parts).
172
173protocol(Parts, Protocol) :-
174	memberchk(protocol(Protocol), Parts), !.
175protocol(_, http).
176
177		 /*******************************
178		 *	        GET		*
179		 *******************************/
180
181%%	http_delete(+URL, -Data, +Options) is det.
182%
183%	Execute a DELETE method on the server.
184%
185%	@tbd Properly map the 201, 202 and 204 replies.
186
187http_delete(URL, Data, Options) :-
188	http_get(URL, Data, [method('DELETE')|Options]).
189
190
191%%	http_get(+URL, -Data, +Options) is det.
192%
193%	Get data from an HTTP server.
194
195http_get(URL, Data, Options) :-
196	atomic(URL), !,
197	parse_url(URL, Parts),
198        http_get(Parts, Data, Options).
199http_get(Parts, Data, Options) :-
200	must_be(list, Options),
201	memberchk(connection(Connection), Options),
202	downcase_atom(Connection, 'keep-alive'), !,
203	between(0, 1, _),
204	catch(http_do_get(Parts, Data, Options), E,
205	      (	  message_to_string(E, Msg),
206	          debug(keep_alive, 'Error: ~w; retrying~n', [Msg]),
207	          disconnect(Parts),
208		  fail
209	      )), !.
210http_get(Parts, Data, Options) :-
211	address(Parts, Address, Protocol, Options),
212	do_connect(Address, Protocol, Read, Write, Options),
213	call_cleanup(http_do_get([socket(Read, Write)|Parts], Data, Options),
214		     disconnect(Protocol, Address, Read, Write)).
215
216http_do_get(Parts, Data, Options) :-
217	connect(Parts, Read, Write, Options),
218	(   select(proxy(_,_), Options, Options1)
219	->  parse_url(Location, Parts)
220	;   http_location(Parts, Location),
221	    Options1 = Options
222	),
223	memberchk(host(Host), Parts),
224	option(method(Method), Options, 'GET'),
225	http_write_header(Write, Method, Location, Host,
226			  Options1, ReplyOptions),
227	write(Write, '\r\n'),
228	flush_output(Write),
229	http_read_reply(Read, Data0, ReplyOptions), !,
230	(   Data0 = redirect(Redirect),
231	    nonvar(Redirect)
232	->  debug(http(redirect), 'Redirect to ~w', [Redirect]),
233	    parse_url(Redirect, Parts, NewParts),
234	    http_get(NewParts, Data, Options)
235	;   Data = Data0
236	).
237http_do_get(Parts, _Data, _Options) :-
238	throw(error(failed(http_get, Parts), _)).
239
240http_read_reply(In, Data, Options) :-
241	between(0, 1, _),
242	    http_read_reply_header(In, Fields),
243	\+ memberchk(status(continue, _), Fields), !,
244	(   memberchk(location(Location), Fields),
245	    (   memberchk(status(moved, _), Fields)
246	    ;	memberchk(status(moved_temporary, _), Fields)
247	    ;	memberchk(status(see_other, _), Fields)
248	    )
249	->  Data = redirect(Location)
250	;   (   select(reply_header(Fields), Options, ReadOptions)
251	    ->  true
252	    ;   ReadOptions = Options
253	    ),
254	    http_read_data(In, Fields, Data, ReadOptions)
255	),
256	(   memberchk(connection(Connection), Fields),
257	    downcase_atom(Connection, 'keep-alive')
258	->  true
259	;   thread_self(Self),
260	    connection(Address, Protocol, Self, In, _Out)
261	->  disconnect(Address, Protocol)
262	;   true
263	).
264http_read_reply(In, _Data, _Options) :-
265	format(user_error, 'Get FAILED~n', []),
266	throw(error(failed(read_reply, In), _)).
267
268
269%%	http_write_header(+Out, +Method, +Location,
270%%			  +Host, +Options, -RestOptions) is det.
271%
272%	Write the request header.  It accepts the following options:
273%
274%		* http_version(Major-Minor)
275%		* connection(Connection)
276%		* user_agent(Agent)
277%		* request_header(Name=Value)
278%
279%	Remaining options are returned in RestOptions.
280
281http_write_header(Out, Method, Location, Host, Options, RestOptions) :-
282	(   select(http_version(Major-Minor), Options, Options1)
283	->  true
284	;   Major = 1, Minor = 1,
285	    Options1 = Options
286	),
287	format(Out, '~w ~w HTTP/~w.~w\r\n', [Method, Location, Major, Minor]),
288	format(Out, 'Host: ~w\r\n', [Host]),
289	(   select(connection(Connection), Options1, Options2)
290	->  true
291	;   Connection = 'Keep-Alive',
292	    Options2 = Options1
293	),
294	(   select(user_agent(Agent), Options2, Options3)
295	->  true
296	;   user_agent(Agent),
297	    Options3 = Options2
298	),
299	format(Out, 'User-Agent: ~w\r\n\
300		     Connection: ~w\r\n', [Agent, Connection]),
301	x_headers(Options3, Out, RestOptions).
302
303%%	x_headers(+Options, +Out, -RestOptions) is det.
304%
305%	Pass additional request options.  For example:
306%
307%		request_header('Accept-Language' = 'nl, en')
308%
309%	No checking is performed on the fieldname or value. Both are
310%	copied literally and in the order of appearance to the request.
311
312x_headers([], _, []).
313x_headers([H|T0], Out, Options) :-
314	x_header(H, Out), !,
315	x_headers(T0, Out, Options).
316x_headers([H|T0], Out, [H|T]) :-
317	x_headers(T0, Out, T).
318
319x_header(request_header(Name=Value), Out) :-
320	format(Out, '~w: ~w\r\n', [Name, Value]).
321x_header(proxy_authorization(ProxyAuthorization), Out) :-
322	proxy_auth_header(ProxyAuthorization, Out).
323x_header(range(Spec), Out) :-
324	Spec =.. [Unit, From, To],
325	(   To == end
326	->  ToT = ''
327	;   must_be(integer, To),
328	    ToT = To
329	),
330	format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
331
332proxy_auth_header(basic(User, Password), Out) :- !,
333	format(codes(Codes), '~w:~w', [User, Password]),
334	phrase(base64(Codes), Base64Codes),
335	format(Out, 'Proxy-Authorization: basic ~s\r\n', [Base64Codes]).
336proxy_auth_header(Auth, _) :-
337	domain_error(authorization, Auth).
338
339%%	http_read_data(+Fields, -Data, +Options) is det.
340%
341%	Read data from an HTTP connection.   Options must contain a term
342%	input(In) that provides the input stream   from the HTTP server.
343%	Fields is the parsed http  reply-header. Options is one of:
344%
345%		* to(stream(+WriteStream))
346%		Append the content of the message to Stream
347%		* to(atom)
348%		Return the reply as an atom
349%		* to(codes)
350%		Return the reply as a list of codes
351
352http_read_data(Fields, Data, Options) :-
353	memberchk(input(In), Fields),
354	(   http_read_data(In, Fields, Data, Options)
355	->  true
356	;   throw(error(failed(http_read_data), _))
357	).
358
359
360http_read_data(In, Fields, Data, Options) :-	% Transfer-encoding: chunked
361	select(transfer_encoding(chunked), Fields, RestFields), !,
362	http_chunked_open(In, DataStream, []),
363	call_cleanup(http_read_data(DataStream, RestFields, Data, Options),
364		     close(DataStream)).
365http_read_data(In, Fields, Data, Options) :-
366	memberchk(to(X), Options), !,
367	(   X = stream(Stream)
368	->  (   memberchk(content_length(Bytes), Fields)
369	    ->  copy_stream_data(In, Stream, Bytes)
370	    ;   copy_stream_data(In, Stream)
371	    )
372	;   new_memory_file(MemFile),
373	    open_memory_file(MemFile, write, Stream, [encoding(octet)]),
374	    (   memberchk(content_length(Bytes), Fields)
375	    ->  copy_stream_data(In, Stream, Bytes)
376	    ;   copy_stream_data(In, Stream)
377	    ),
378	    close(Stream),
379	    encoding(Fields, Encoding),
380	    (   X == atom
381	    ->  memory_file_to_atom(MemFile, Data0, Encoding)
382	    ;	X == codes
383	    ->	memory_file_to_codes(MemFile, Data0, Encoding)
384	    ;	domain_error(return_type, X)
385	    ),
386	    free_memory_file(MemFile),
387	    Data = Data0
388	).
389http_read_data(In, Fields, Data, _) :-
390	memberchk(content_type('application/x-www-form-urlencoded'), Fields), !,
391	http_read_data(In, Fields, Codes, [to(codes)]),
392	parse_url_search(Codes, Data).
393http_read_data(In, Fields, Data, Options) :- 			% call hook
394	(   select(content_type(Type), Options, Options1)
395	->  delete(Fields, content_type(_), Fields1),
396	    http_convert_data(In, [content_type(Type)|Fields1], Data, Options1)
397	;   http_convert_data(In, Fields, Data, Options)
398	), !.
399http_read_data(In, Fields, Data, Options) :-
400	http_read_data(In, Fields, Data, [to(atom)|Options]).
401
402
403encoding(Fields, utf8) :-
404	memberchk(content_type(Type), Fields),
405	(   sub_atom(Type, _, _, _, 'UTF-8')
406	->  true
407	;   sub_atom(Type, _, _, _, 'utf-8')
408	), !.
409encoding(_, octet).
410
411
412		 /*******************************
413		 *	       POST		*
414		 *******************************/
415
416%%	http_put(+URL, +In, -Out, +Options)
417%
418%	Issue an HTTP PUT request.
419
420http_put(URL, In, Out, Options) :-
421	http_post(URL, In, Out, [method('PUT')|Options]).
422
423
424%%	http_post(+URL, +In, -Out, +Options)
425%
426%	Issue an HTTP POST request, In is modelled after the reply
427%	from the HTTP server module.  In is one of:
428%
429%		* string(String)
430%		* string(MimeType, String)
431%		* html(Tokens)
432%		* file(MimeType, File)
433
434http_post(URL, In, Out, Options) :-
435	atomic(URL), !,
436	parse_url(URL, Parts),
437	http_post(Parts, In, Out, Options).
438http_post(Parts, In, Out, Options) :-
439	memberchk(connection(Connection), Options),
440	downcase_atom(Connection, 'keep-alive'), !,
441	between(0, 1, _),
442	catch(http_do_post(Parts, In, Out, Options), error(io_error, _),
443	      (	  disconnect(Parts),
444		  fail
445	      )), !.
446http_post(Parts, In, Out, Options) :-
447	address(Parts, Address, Protocol, Options),
448	do_connect(Address, Protocol, Read, Write, Options),
449	call_cleanup(http_do_post([socket(Read, Write)|Parts],
450				  In, Out, Options),
451		     disconnect(Protocol, Address, Read, Write)).
452
453http_do_post(Parts, In, Out, Options) :-
454	connect(Parts, Read, Write, Options),
455	(   memberchk(proxy(_,_), Options)
456	->  parse_url(Location, Parts)
457	;   http_location(Parts, Location)
458	),
459	memberchk(host(Host), Parts),
460	split_options(Options, PostOptions, ReplyOptions),
461	write_post_header(Write, Location, Host, In, PostOptions),
462	http_read_reply(Read, Out, ReplyOptions).
463
464write_post_header(Out, Location, Host, In, Options) :-
465	option(method(Method), Options, 'POST'),
466	http_write_header(Out, Method, Location, Host, Options, DataOptions),
467	http_post_data(In, Out, DataOptions),
468	flush_output(Out).
469
470post_option(connection(_)).
471post_option(http_version(_)).
472post_option(cache_control(_)).
473post_option(request_header(_)).
474
475split_options([], [], []).
476split_options([H|T], [H|P], R) :-
477	post_option(H), !,
478	split_options(T, P, R).
479split_options([H|T], P, [H|R]) :-
480	split_options(T, P, R).
481
482:- retract(system:swi_io).
483
484