1/*  Part of SWI-Prolog
2
3    Author:        Jan Wielemaker
4    E-mail:        J.Wielemaker@vu.nl
5    WWW:           http://www.swi-prolog.org
6    Copyright (c)  2007-2016, University of Amsterdam
7                              VU University Amsterdam
8    All rights reserved.
9
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions
12    are met:
13
14    1. Redistributions of source code must retain the above copyright
15       notice, this list of conditions and the following disclaimer.
16
17    2. Redistributions in binary form must reproduce the above copyright
18       notice, this list of conditions and the following disclaimer in
19       the documentation and/or other materials provided with the
20       distribution.
21
22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33    POSSIBILITY OF SUCH DAMAGE.
34*/
35
36:- module(http_stream,
37          [ http_chunked_open/3,        % +Stream, -DataStream, +Options
38            stream_range_open/3,        % +Stream, -DataStream, +Options
39            multipart_open/3,           % +Stream, +DataStream, +Options)
40            multipart_open_next/1,      % +DataStream
41
42                                        % CGI Stream interaction
43            cgi_open/4,                 % +Stream, -DataStream, :Hook, +Options
44            cgi_property/2,             % +Stream, -Property
45            cgi_set/2,                  % +Stream, -Property
46            cgi_discard/1,              % +Stream
47            is_cgi_stream/1,            % +Stream
48            cgi_statistics/1            % ?Statistics
49          ]).
50:- use_foreign_library(foreign(http_stream)).
51:- public http_stream_debug/1.          % set debug level
52
53:- meta_predicate
54    stream_range_open(+,-,:).       % onclose option is module sensitive
55
56/** <module> HTTP Streams
57
58This module realises  encoding  and   decoding  filters,  implemented as
59Prolog streams that read/write to an  underlying stream. This allows for
60sequences of streams acting as an in-process pipeline.
61
62The predicate http_chunked_open/3 realises encoding  and decoding of the
63HTTP _Chunked_ encoding. This encoding is an obligatory part of the HTTP
641.1 specification. Messages are split into chunks, each preceeded by the
65length of the chunk. Chunked  encoding   allows  sending messages over a
66serial link (typically a TCP/IP stream) for  which the reader knows when
67the message is ended. Unlike standard HTTP   though, the sender does not
68need to know the message length  in   advance.  The  protocol allows for
69sending short chunks. This is  supported   totally  transparent  using a
70flush on the output stream.
71
72The predicate stream_range_open/3 handles the Content-length on an input
73stream for handlers that are designed  to   process  an entire file. The
74filtering stream claims end-of-file after reading  a specified number of
75bytes, dispite the fact that the underlying stream may be longer.
76
77@see    The HTTP 1.1 protocol http://www.w3.org/Protocols/rfc2616/rfc2616.html
78@author Jan Wielemaker
79*/
80
81%!  http_chunked_open(+RawStream, -DataStream, +Options) is det.
82%
83%   Create a stream to realise HTTP   chunked  encoding or decoding.
84%   The technique is similar to library(zlib), using a Prolog stream
85%   as a filter on another stream.  Options:
86%
87%           * close_parent(+Bool)
88%           If =true= (default =false=), the parent stream is closed
89%           if DataStream is closed.
90%
91%           * max_chunk_size(+PosInt)
92%           Define the maximum size of a chunk.  Default is the
93%           default buffer size of fully buffered streams (4096).
94%           Larger values may improve throughput.  It is also
95%           allowed to use =|set_stream(DataStream, buffer(line))|=
96%           on the data stream to get line-buffered output. See
97%           set_stream/2 for details. Switching buffering to =false=
98%           is supported.
99%
100%   Here is example code to write a chunked data to a stream
101%
102%   ==
103%           http_chunked_open(Out, S, []),
104%           format(S, 'Hello world~n', []),
105%           close(S).
106%   ==
107%
108%   If a stream is known to contain chunked data, we can extract
109%   this data using
110%
111%   ==
112%           http_chunked_open(In, S, []),
113%           read_stream_to_codes(S, Codes),
114%           close(S).
115%   ==
116%
117%   The current implementation does not  generate chunked extensions
118%   or an HTTP trailer. If such extensions  appear on the input they
119%   are silently ignored. This  is  compatible   with  the  HTTP 1.1
120%   specifications. Although a filtering  stream   is  an  excellent
121%   mechanism for encoding and decoding   the core chunked protocol,
122%   it does not well support out-of-band data.
123%
124%   After http_chunked_open/3, the encoding  of   DataStream  is the
125%   same as the  encoding  of  RawStream,   while  the  encoding  of
126%   RawStream is =octet=, the only value   allowed  for HTTP chunked
127%   streams. Closing the DataStream  restores   the  old encoding on
128%   RawStream.
129%
130%   @error  io_error(read, Stream) where the message context provides
131%           an indication of the problem.  This error is raised if
132%           the input is not valid HTTP chunked data.
133
134
135                 /*******************************
136                 *             RANGES           *
137                 *******************************/
138
139%!  stream_range_open(+RawStream, -DataStream, +Options) is det.
140%
141%   DataStream is a stream  whose  size   is  defined  by the option
142%   size(ContentLength).   Closing   DataStream   does   not   close
143%   RawStream.  Options processed:
144%
145%     - size(+Bytes)
146%     Number of bytes represented by the main stream.
147%     - onclose(:Closure)
148%     Calls call(Closure, RawStream, BytesLeft) when DataStream is
149%     closed. BytesLeft is the number of bytes of the range stream
150%     that have *not* been read, i.e., 0 (zero) if all data has been
151%     read from the stream when the range is closed. This was
152%     introduced for supporting Keep-alive in http_open/3 to
153%     reschedule the original stream for a new request if the data
154%     of the previous request was processed.
155
156
157                 /*******************************
158                 *            MULTIPART         *
159                 *******************************/
160
161%!  multipart_open(+Stream, -DataSttream, +Options) is det.
162%
163%   DataStream  is  a  stream  that  signals  `end_of_file`  if  the
164%   multipart _boundary_ is encountered. The stream  can be reset to
165%   read the next part using multipart_open_next/1. Options:
166%
167%     - close_parent(+Boolean)
168%     Close Stream if DataStream is closed.
169%     - boundary(+Text)
170%     Define the boundary string.  Text is an atom, string, code or
171%     character list.
172%
173%   All parts of a multipart input can   be read using the following
174%   skeleton:
175%
176%     ==
177%     process_multipart(Stream) :-
178%           multipart_open(Stream, DataStream, [boundary(...)]),
179%           process_parts(DataStream).
180%
181%     process_parts(DataStream) :-
182%           process_part(DataStream),
183%           (   multipart_open_next(DataStream)
184%           ->  process_parts(DataStream)
185%           ;   close(DataStream)
186%           ).
187%     ==
188%
189%   @license The multipart parser contains   code licensed under the
190%   MIT license, based on _node-formidable_   by Felix Geisendoerfer
191%   and Igor Afonov.
192
193%!  multipart_open_next(+DataStream) is semidet.
194%
195%   Prepare DataStream to read the  next   part  from  the multipart
196%   input data. Succeeds if a next part exists and fails if the last
197%   part was processed. Note that it is  mandatory to read each part
198%   up to the end_of_file.
199
200
201                 /*******************************
202                 *           CGI SUPPORT        *
203                 *******************************/
204
205%!  cgi_open(+OutStream, -CGIStream, :Hook, +Options) is det.
206%
207%   Process CGI output. OutStream is   normally the socket returning
208%   data to the HTTP client. CGIStream   is  the stream the (Prolog)
209%   code writes to. The CGIStream provides the following functions:
210%
211%       * At the end of the header, it calls Hook using
212%       call(Hook, header, Stream), where Stream is a stream holding
213%       the buffered header.
214%
215%       * If the stream is closed, it calls Hook using
216%       call(Hook, data, Stream), where Stream holds the buffered
217%       data.
218%
219%   The stream calls Hook, adding  the   event  and CGIStream to the
220%   closure. Defined events are:
221%
222%       * header
223%       Called  if  the  header  is   complete.  Typically  it  uses
224%       cgi_property/2 to extract the collected  header and combines
225%       these with the request and policies   to decide on encoding,
226%       transfer-encoding, connection parameters and   the  complete
227%       header (as a Prolog term). Typically   it  uses cgi_set/2 to
228%       associate these with the stream.
229%
230%       * send_header
231%       Called if the HTTP header must  be sent. This is immediately
232%       after setting the transfer encoding to =chunked= or when the
233%       CGI stream is closed.  Typically   it  requests  the current
234%       header, optionally the content-length and   sends the header
235%       to the original (client) stream.
236%
237%       * close
238%       Called from close/1 on the CGI   stream  after everything is
239%       complete.
240%
241%   The predicates cgi_property/2  and  cgi_set/2   can  be  used to
242%   control the stream and store status   info.  Terms are stored as
243%   Prolog records and can thus be transferred between threads.
244
245%!  cgi_property(+CGIStream, ?Property) is det.
246%
247%   Inquire the status of the CGI stream.  Defined properties are:
248%
249%       * request(-Term)
250%       The original request
251%       * header(-Term)
252%       Term is the header term as registered using cgi_set/2
253%       * client(-Stream)
254%       Stream is the original output stream used to create
255%       this stream.
256%       * thread(-ThreadID)
257%       ThreadID is the identifier of the `owning thread'
258%       * transfer_encoding(-Tranfer)
259%       One of =chunked= or =none=.
260%       * connection(-Connection)
261%       One of =Keep-Alive= or =close=
262%       * content_length(-ContentLength)
263%       Total byte-size of the content.  Available in the close
264%       handler if the transfer_encoding is =none=.
265%       * header_codes(-Codes)
266%       Codes represents the header collected.  Available in the
267%       header handler.
268%       * state(-State)
269%       One of =header=, =data= or =discarded=
270%       * id(-ID)
271%       Request sequence number.  This number is guaranteed to be
272%       unique.
273
274%!  cgi_set(+CGIStream, ?Property) is det.
275%
276%   Change one of the properies.  Supported properties are:
277%
278%       * request(+Term)
279%       Associate a request to the stream.
280%       * header(+Term)
281%       Register a reply header.  This header is normally retrieved
282%       from the =send_header= hook to send the reply header to the
283%       client.
284%       * connection(-Connection)
285%       One of =Keep-Alive= or =close=.
286%       * transfer_encoding(-Tranfer)
287%       One of =chunked= or =none=.  Initially set to =none=.  When
288%       switching to =chunked= from the =header= hook, it calls the
289%       =send_header= hook and if there is data queed this is send
290%       as first chunk.  Each subsequent write to the CGI stream
291%       emits a chunk.
292
293%!  cgi_discard(+CGIStream) is det.
294%
295%   Discard content produced sofar. It sets   the  state property to
296%   =discarded=, causing close to omit the   writing  the data. This
297%   must be used for an alternate output (e.g. an error page) if the
298%   page generator fails.
299
300%!  is_cgi_stream(+Stream) is semidet.
301%
302%   True if Stream is a CGI stream created using cgi_open/4.
303
304:- multifile
305    http:encoding_filter/3,                 % +Encoding, +In0,  -In
306    http:current_transfer_encoding/1.       % ?Encoding
307
308:- public
309    http:encoding_filter/3,
310    http:current_transfer_encoding/1.
311
312%!  http:encoding_filter(+Encoding, +In0, -In) is semidet.
313%
314%   Install a filter to deal with   =chunked= encoded messages. Used
315%   by library(http_open).
316
317http:encoding_filter(chunked, In0, In) :-
318    http_chunked_open(In0, In,
319                      [ close_parent(true)
320                      ]).
321
322%!  http:current_transfer_encoding(?Encoding) is semidet.
323%
324%   True if Encoding is supported. Used by library(http_open).
325
326http:current_transfer_encoding(chunked).
327
328%!  cgi_statistics(?Term)
329%
330%   Return statistics on the CGI stream subsystem. Currently defined
331%   statistics are:
332%
333%       * requests(-Integer)
334%       Total number of requests processed
335%       * bytes_sent(-Integer)
336%       Total number of bytes sent.
337
338cgi_statistics(requests(Requests)) :-
339    cgi_statistics_(Requests, _).
340cgi_statistics(bytes_sent(Bytes)) :-
341    cgi_statistics_(_, Bytes).
342
343