1/* $Id$ 2 3 Part of SWI-Prolog 4 5 Author: Jan Wielemaker 6 E-mail: J.Wielemaker@cs.vu.nl 7 WWW: http://www.swi-prolog.org 8 Copyright (C): 1985-2010, University of Amsterdam, VU University Amsterdam 9 10 This program is free software; you can redistribute it and/or 11 modify it under the terms of the GNU General Public License 12 as published by the Free Software Foundation; either version 2 13 of the License, or (at your option) any later version. 14 15 This program is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU Lesser General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 24 As a special exception, if you link this library with other files, 25 compiled with a Free Software compiler, to produce an executable, this 26 library does not by itself cause the resulting executable to be covered 27 by the GNU General Public License. This exception does not however 28 invalidate any other reasons why the executable file might be covered by 29 the GNU General Public License. 30*/ 31 32 33:- module(http_client, 34 [ http_get/3, % +URL, -Reply, +Options 35 http_delete/3, % +URL, -Reply, +Options 36 http_post/4, % +URL, +In, -Reply, +Options 37 http_put/4, % +URL, +In, -Reply, +Options 38 http_read_data/3, % +Header, -Data, +Options 39 http_disconnect/1 % +What 40 ]). 41:- use_module(library(socket)). 42:- use_module(library(url)). 43:- use_module(http_header). 44:- use_module(http_stream). 45:- use_module(library(debug)). 46:- use_module(library(memfile)). 47:- use_module(library(lists)). 48:- use_module(library(error)). 49:- use_module(library(option)). 50:- use_module(http_stream). 51:- use_module(dcg_basics). 52 53:- multifile 54 http_convert_data/4, % http_read_data plugin-hook 55 post_data_hook/3, % http_post_data/3 hook 56 open_connection/4, % do_connect/5 hook 57 close_connection/4. 58 59%% open_connection(+Scheme, +Address, -In, -Out) is semidet. 60% 61% Hook to open a connection for the given URL-scheme to the given 62% address. If successful, In and Out must be two valid Prolog 63% streams that connect to the server. 64% 65% @param Scheme is the URL schema (=http= or =https=) 66% @param Address is a term Host:Port as used by tcp_connect/4. 67 68%% close_connection(+Scheme, +Address, +In, +Out) is semidet. 69% 70% Hook to close a specific connection. If the hook succeeds, the 71% HTTP client assumes that In and Out are no longer to be used. If 72% the hook fails, the client calls close/2 on both streams. 73 74:- dynamic 75 connection/5. % Host:Port, Protocol, Thread, In, Out 76 77:- expects_dialect(swi). 78:- assert(system:swi_io). 79 80user_agent('SWI-Prolog (http://www.swi-prolog.org)'). 81 82%% connect(+UrlParts, -Read, -Write, +Options) is det. 83%% disconnect(+UrlParts) is det. 84% 85% Connect/disconnect on the basis of a parsed URL. 86 87connect(Parts, Read, Write, _) :- 88 memberchk(socket(Read, Write), Parts), !. 89connect(Parts, Read, Write, Options) :- 90 address(Parts, Address, Protocol, Options), 91 with_mutex(http_client_connect, 92 connect2(Address, Protocol, Read, Write, Options)). 93 94connect2(Address, Protocol, In, Out, _) :- 95 thread_self(Self), 96 connection(Address, Protocol, Self, In, Out), !. 97connect2(Address, Protocol, In, Out, Options) :- 98 thread_self(Self), 99 do_connect(Address, Protocol, In, Out, Options), 100 assert(connection(Address, Protocol, Self, In, Out)). 101 102do_connect(Address, Protocol, In, Out, Options) :- 103 debug(http(client), 'http_client: Connecting to ~p ...', [Address]), 104 ( open_connection(Protocol, Address, In, Out) 105 -> true 106 ; tcp_socket(Socket), 107 catch(tcp_connect(Socket, Address, In, Out), 108 E, 109 ( tcp_close_socket(Socket), 110 throw(E) 111 )) 112 ), 113 debug(http(client), '\tok ~p --> ~p', [In, Out]), 114 ( memberchk(timeout(Timeout), Options) 115 -> set_stream(In, timeout(Timeout)) 116 ; true 117 ), !. 118do_connect(Address, _, _, _, _) :- % can this happen!? 119 throw(error(failed(connect, Address), _)). 120 121 122disconnect(Parts) :- 123 protocol(Parts, Protocol), 124 address(Parts, Protocol, Address, []), !, 125 disconnect(Address, Protocol). 126 127disconnect(Address, Protocol) :- 128 with_mutex(http_client_connect, 129 disconnect_locked(Address, Protocol)). 130 131disconnect_locked(Address, Protocol) :- 132 thread_self(Me), 133 debug(connection, '~w: Closing connection to ~w~n', [Me, Address]), 134 thread_self(Self), 135 retract(connection(Address, Protocol, Self, In, Out)), !, 136 disconnect(Protocol, Address, In, Out). 137 138disconnect(Protocol, Address, In, Out) :- 139 close_connection(Protocol, Address, In, Out), !. 140disconnect(_, _, In, Out) :- 141 close(Out, [force(true)]), 142 close(In, [force(true)]). 143 144%% http_disconnect(+Connections) is det. 145% 146% Close down some connections. Currently Connections must have the 147% value =all=, closing all connections. 148 149http_disconnect(all) :- 150 ( thread_self(Self), 151 connection(Address, Protocol, Self, _, _), 152 disconnect(Address, Protocol), 153 fail 154 ; true 155 ). 156 157address(_Parts, Host:Port, Protocol, Options) :- 158 ( memberchk(proxy(Host, Port, Protocol), Options) 159 -> true 160 ; memberchk(proxy(Host, Port), Options), 161 Protocol = http 162 ). 163address(Parts, Host:Port, Protocol, _Options) :- 164 memberchk(host(Host), Parts), 165 port(Parts, Port), 166 protocol(Parts, Protocol). 167 168port(Parts, Port) :- 169 memberchk(port(Port), Parts), !. 170port(Parts, 80) :- 171 memberchk(protocol(http), Parts). 172 173protocol(Parts, Protocol) :- 174 memberchk(protocol(Protocol), Parts), !. 175protocol(_, http). 176 177 /******************************* 178 * GET * 179 *******************************/ 180 181%% http_delete(+URL, -Data, +Options) is det. 182% 183% Execute a DELETE method on the server. 184% 185% @tbd Properly map the 201, 202 and 204 replies. 186 187http_delete(URL, Data, Options) :- 188 http_get(URL, Data, [method('DELETE')|Options]). 189 190 191%% http_get(+URL, -Data, +Options) is det. 192% 193% Get data from an HTTP server. 194 195http_get(URL, Data, Options) :- 196 atomic(URL), !, 197 parse_url(URL, Parts), 198 http_get(Parts, Data, Options). 199http_get(Parts, Data, Options) :- 200 must_be(list, Options), 201 memberchk(connection(Connection), Options), 202 downcase_atom(Connection, 'keep-alive'), !, 203 between(0, 1, _), 204 catch(http_do_get(Parts, Data, Options), E, 205 ( message_to_string(E, Msg), 206 debug(keep_alive, 'Error: ~w; retrying~n', [Msg]), 207 disconnect(Parts), 208 fail 209 )), !. 210http_get(Parts, Data, Options) :- 211 address(Parts, Address, Protocol, Options), 212 do_connect(Address, Protocol, Read, Write, Options), 213 call_cleanup(http_do_get([socket(Read, Write)|Parts], Data, Options), 214 disconnect(Protocol, Address, Read, Write)). 215 216http_do_get(Parts, Data, Options) :- 217 connect(Parts, Read, Write, Options), 218 ( select(proxy(_,_), Options, Options1) 219 -> parse_url(Location, Parts) 220 ; http_location(Parts, Location), 221 Options1 = Options 222 ), 223 memberchk(host(Host), Parts), 224 option(method(Method), Options, 'GET'), 225 http_write_header(Write, Method, Location, Host, 226 Options1, ReplyOptions), 227 write(Write, '\r\n'), 228 flush_output(Write), 229 http_read_reply(Read, Data0, ReplyOptions), !, 230 ( Data0 = redirect(Redirect), 231 nonvar(Redirect) 232 -> debug(http(redirect), 'Redirect to ~w', [Redirect]), 233 parse_url(Redirect, Parts, NewParts), 234 http_get(NewParts, Data, Options) 235 ; Data = Data0 236 ). 237http_do_get(Parts, _Data, _Options) :- 238 throw(error(failed(http_get, Parts), _)). 239 240http_read_reply(In, Data, Options) :- 241 between(0, 1, _), 242 http_read_reply_header(In, Fields), 243 \+ memberchk(status(continue, _), Fields), !, 244 ( memberchk(location(Location), Fields), 245 ( memberchk(status(moved, _), Fields) 246 ; memberchk(status(moved_temporary, _), Fields) 247 ; memberchk(status(see_other, _), Fields) 248 ) 249 -> Data = redirect(Location) 250 ; ( select(reply_header(Fields), Options, ReadOptions) 251 -> true 252 ; ReadOptions = Options 253 ), 254 http_read_data(In, Fields, Data, ReadOptions) 255 ), 256 ( memberchk(connection(Connection), Fields), 257 downcase_atom(Connection, 'keep-alive') 258 -> true 259 ; thread_self(Self), 260 connection(Address, Protocol, Self, In, _Out) 261 -> disconnect(Address, Protocol) 262 ; true 263 ). 264http_read_reply(In, _Data, _Options) :- 265 format(user_error, 'Get FAILED~n', []), 266 throw(error(failed(read_reply, In), _)). 267 268 269%% http_write_header(+Out, +Method, +Location, 270%% +Host, +Options, -RestOptions) is det. 271% 272% Write the request header. It accepts the following options: 273% 274% * http_version(Major-Minor) 275% * connection(Connection) 276% * user_agent(Agent) 277% * request_header(Name=Value) 278% 279% Remaining options are returned in RestOptions. 280 281http_write_header(Out, Method, Location, Host, Options, RestOptions) :- 282 ( select(http_version(Major-Minor), Options, Options1) 283 -> true 284 ; Major = 1, Minor = 1, 285 Options1 = Options 286 ), 287 format(Out, '~w ~w HTTP/~w.~w\r\n', [Method, Location, Major, Minor]), 288 format(Out, 'Host: ~w\r\n', [Host]), 289 ( select(connection(Connection), Options1, Options2) 290 -> true 291 ; Connection = 'Keep-Alive', 292 Options2 = Options1 293 ), 294 ( select(user_agent(Agent), Options2, Options3) 295 -> true 296 ; user_agent(Agent), 297 Options3 = Options2 298 ), 299 format(Out, 'User-Agent: ~w\r\n\ 300 Connection: ~w\r\n', [Agent, Connection]), 301 x_headers(Options3, Out, RestOptions). 302 303%% x_headers(+Options, +Out, -RestOptions) is det. 304% 305% Pass additional request options. For example: 306% 307% request_header('Accept-Language' = 'nl, en') 308% 309% No checking is performed on the fieldname or value. Both are 310% copied literally and in the order of appearance to the request. 311 312x_headers([], _, []). 313x_headers([H|T0], Out, Options) :- 314 x_header(H, Out), !, 315 x_headers(T0, Out, Options). 316x_headers([H|T0], Out, [H|T]) :- 317 x_headers(T0, Out, T). 318 319x_header(request_header(Name=Value), Out) :- 320 format(Out, '~w: ~w\r\n', [Name, Value]). 321x_header(proxy_authorization(ProxyAuthorization), Out) :- 322 proxy_auth_header(ProxyAuthorization, Out). 323x_header(range(Spec), Out) :- 324 Spec =.. [Unit, From, To], 325 ( To == end 326 -> ToT = '' 327 ; must_be(integer, To), 328 ToT = To 329 ), 330 format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]). 331 332proxy_auth_header(basic(User, Password), Out) :- !, 333 format(codes(Codes), '~w:~w', [User, Password]), 334 phrase(base64(Codes), Base64Codes), 335 format(Out, 'Proxy-Authorization: basic ~s\r\n', [Base64Codes]). 336proxy_auth_header(Auth, _) :- 337 domain_error(authorization, Auth). 338 339%% http_read_data(+Fields, -Data, +Options) is det. 340% 341% Read data from an HTTP connection. Options must contain a term 342% input(In) that provides the input stream from the HTTP server. 343% Fields is the parsed http reply-header. Options is one of: 344% 345% * to(stream(+WriteStream)) 346% Append the content of the message to Stream 347% * to(atom) 348% Return the reply as an atom 349% * to(codes) 350% Return the reply as a list of codes 351 352http_read_data(Fields, Data, Options) :- 353 memberchk(input(In), Fields), 354 ( http_read_data(In, Fields, Data, Options) 355 -> true 356 ; throw(error(failed(http_read_data), _)) 357 ). 358 359 360http_read_data(In, Fields, Data, Options) :- % Transfer-encoding: chunked 361 select(transfer_encoding(chunked), Fields, RestFields), !, 362 http_chunked_open(In, DataStream, []), 363 call_cleanup(http_read_data(DataStream, RestFields, Data, Options), 364 close(DataStream)). 365http_read_data(In, Fields, Data, Options) :- 366 memberchk(to(X), Options), !, 367 ( X = stream(Stream) 368 -> ( memberchk(content_length(Bytes), Fields) 369 -> copy_stream_data(In, Stream, Bytes) 370 ; copy_stream_data(In, Stream) 371 ) 372 ; new_memory_file(MemFile), 373 open_memory_file(MemFile, write, Stream, [encoding(octet)]), 374 ( memberchk(content_length(Bytes), Fields) 375 -> copy_stream_data(In, Stream, Bytes) 376 ; copy_stream_data(In, Stream) 377 ), 378 close(Stream), 379 encoding(Fields, Encoding), 380 ( X == atom 381 -> memory_file_to_atom(MemFile, Data0, Encoding) 382 ; X == codes 383 -> memory_file_to_codes(MemFile, Data0, Encoding) 384 ; domain_error(return_type, X) 385 ), 386 free_memory_file(MemFile), 387 Data = Data0 388 ). 389http_read_data(In, Fields, Data, _) :- 390 memberchk(content_type('application/x-www-form-urlencoded'), Fields), !, 391 http_read_data(In, Fields, Codes, [to(codes)]), 392 parse_url_search(Codes, Data). 393http_read_data(In, Fields, Data, Options) :- % call hook 394 ( select(content_type(Type), Options, Options1) 395 -> delete(Fields, content_type(_), Fields1), 396 http_convert_data(In, [content_type(Type)|Fields1], Data, Options1) 397 ; http_convert_data(In, Fields, Data, Options) 398 ), !. 399http_read_data(In, Fields, Data, Options) :- 400 http_read_data(In, Fields, Data, [to(atom)|Options]). 401 402 403encoding(Fields, utf8) :- 404 memberchk(content_type(Type), Fields), 405 ( sub_atom(Type, _, _, _, 'UTF-8') 406 -> true 407 ; sub_atom(Type, _, _, _, 'utf-8') 408 ), !. 409encoding(_, octet). 410 411 412 /******************************* 413 * POST * 414 *******************************/ 415 416%% http_put(+URL, +In, -Out, +Options) 417% 418% Issue an HTTP PUT request. 419 420http_put(URL, In, Out, Options) :- 421 http_post(URL, In, Out, [method('PUT')|Options]). 422 423 424%% http_post(+URL, +In, -Out, +Options) 425% 426% Issue an HTTP POST request, In is modelled after the reply 427% from the HTTP server module. In is one of: 428% 429% * string(String) 430% * string(MimeType, String) 431% * html(Tokens) 432% * file(MimeType, File) 433 434http_post(URL, In, Out, Options) :- 435 atomic(URL), !, 436 parse_url(URL, Parts), 437 http_post(Parts, In, Out, Options). 438http_post(Parts, In, Out, Options) :- 439 memberchk(connection(Connection), Options), 440 downcase_atom(Connection, 'keep-alive'), !, 441 between(0, 1, _), 442 catch(http_do_post(Parts, In, Out, Options), error(io_error, _), 443 ( disconnect(Parts), 444 fail 445 )), !. 446http_post(Parts, In, Out, Options) :- 447 address(Parts, Address, Protocol, Options), 448 do_connect(Address, Protocol, Read, Write, Options), 449 call_cleanup(http_do_post([socket(Read, Write)|Parts], 450 In, Out, Options), 451 disconnect(Protocol, Address, Read, Write)). 452 453http_do_post(Parts, In, Out, Options) :- 454 connect(Parts, Read, Write, Options), 455 ( memberchk(proxy(_,_), Options) 456 -> parse_url(Location, Parts) 457 ; http_location(Parts, Location) 458 ), 459 memberchk(host(Host), Parts), 460 split_options(Options, PostOptions, ReplyOptions), 461 write_post_header(Write, Location, Host, In, PostOptions), 462 http_read_reply(Read, Out, ReplyOptions). 463 464write_post_header(Out, Location, Host, In, Options) :- 465 option(method(Method), Options, 'POST'), 466 http_write_header(Out, Method, Location, Host, Options, DataOptions), 467 http_post_data(In, Out, DataOptions), 468 flush_output(Out). 469 470post_option(connection(_)). 471post_option(http_version(_)). 472post_option(cache_control(_)). 473post_option(request_header(_)). 474 475split_options([], [], []). 476split_options([H|T], [H|P], R) :- 477 post_option(H), !, 478 split_options(T, P, R). 479split_options([H|T], P, [H|R]) :- 480 split_options(T, P, R). 481 482:- retract(system:swi_io). 483 484