1;;; www/http.scm --- HTTP client library for Guile
2
3;; 	Copyright (C) 1997,2001,2002 Free Software Foundation, Inc.
4;;
5;; This program is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation; either version 2, or (at your option)
8;; any later version.
9;;
10;; This program is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13;; GNU General Public License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with this software; see the file COPYING.  If not, write to
17;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18;; Boston, MA 02111-1307 USA
19;;
20
21;;; Commentary:
22
23;; This module exports the following variables and procedures:
24;;     http:version
25;;     http:user-agent
26;;    (http:message-version msg)
27;;    (http:message-status-code msg)
28;;    (http:message-status-text msg)
29;;    (http:message-status-ok? msg)
30;;    (http:status-ok? status)
31;;    (http:message-body msg)
32;;    (http:message-headers msg)
33;;    (http:message-header header msg)
34;;    (http:get url)
35;;    (http:open host . args)
36;;    (http:request method url . args)
37
38;;; Code:
39
40(define-module (www http)
41  :use-module (www url)
42  :use-module (ice-9 regex))
43
44
45;;; Compatibility
46
47(or (defined? 'read-line)
48    (use-modules (ice-9 rdelim)))
49
50
51;;; Variables that affect HTTP usage.
52
53(define-public http:version "HTTP/1.0")  ; bump up to 1.1 when ready
54(define-public http:user-agent "GuileHTTP 0.1")
55
56;; An HTTP message is represented by a vector:
57;;	#(VERSION STATUS-CODE STATUS-TEXT HEADERS BODY)
58;;
59;; Each of VERSION, STATUS-CODE, STATUS-TEXT are strings.  HEADERS
60;; is an alist of headers and their contents.  BODY is a single string.
61
62(define (http:make-message version statcode stattext headers body)
63  (vector version statcode stattext headers body))
64
65;;;; HTTP status predicates.
66
67;; (http:message-version MSG)
68;;	Returns the HTTP version in use in HTTP message MSG.
69;;
70;; (http:message-status-code MSG)
71;;	Returns the status code returned in HTTP message MSG.
72;;
73;; (http:message-status-text MSG)
74;;	Returns the text of the status line from HTTP message MSG.
75;;
76;; (http:message-status-ok? STATUS)
77;;	Returns #t if status code STATUS indicates a successful request,
78;;	#f otherwise.
79
80(define-public (http:message-version msg)     (vector-ref msg 0))
81(define-public (http:message-status-code msg) (vector-ref msg 1))
82(define-public (http:message-status-text msg) (vector-ref msg 2))
83(define-public (http:message-status-ok? msg)
84  (http:status-ok? (http:status-code msg)))
85(define-public (http:status-ok? status)
86  (char=? #\2 (string-ref status 0)))
87
88(define-public (http:message-body msg) (vector-ref msg 4))
89
90;; HTTP response headers functions
91;;
92;; An HTTP message header is represented here by a pair.  The CAR is a
93;; symbol representing the header name, and the CDR is a string
94;; containing the header text.  E.g.:
95;;
96;;	'((date . "Thu, 29 May 1997 23:48:27 GMT")
97;;	  (server . "NCSA/1.5.1")
98;;	  (last-modified . "Tue, 06 May 1997 18:32:03 GMT")
99;;	  (content-type . "text/html")
100;;	  (content-length . "8097"))
101;;
102;; Note: these symbols are all lowercase, although the original headers
103;; were mixed-case.  Clients using this library should keep this in
104;; mind, since Guile symbols are case-sensitive.
105;;
106;; FIXME: should headers with known semantics be parsed automatically?
107;;   I.e. should the Content-Length header automatically get string->number?
108;;   Should Date and Last-Modified headers be run through strptime?
109;;   It is advantageous to keep headers in a uniform format, but it may
110;;   be convenient to parse headers that have unambiguous meanings.
111;;
112;; (http:message-headers MSG)
113;;	Returns a list of the headers from HTTP message MSG.
114;; (http:message-header HEADER MSG)
115;;	Return the header field named HEADER from HTTP message MSG, or
116;;	#f if no such header is present in the message.
117
118(define-public (http:message-headers msg) (vector-ref msg 3))
119(define-public (http:message-header header msg)
120  (http:fetch-header header (http:message-headers msg)))
121
122(define (http:fetch-header header header-alist)
123  (assq-ref header-alist header))
124
125(define header-regex (make-regexp ": *"))
126
127(define (http:header-parse hd)
128  (let ((match (regexp-exec header-regex hd)))
129    (cons (string->symbol
130	   (apply string
131		  (map char-downcase
132		       (string->list (match:prefix match)))))
133	  (match:suffix match))))
134
135(define (parse-status-line statline)
136  (let* ((first (string-index statline #\space))
137	 (second (string-index statline #\space (1+ first))))
138    (list (make-shared-substring statline 0 first)
139	  (make-shared-substring statline (1+ first) second)
140	  (make-shared-substring statline (1+ second)))))
141
142
143;;; HTTP connection management functions.
144
145;; Open connections are cached on hostname in the connection-table.
146;; If an HTTP connection is already open to a particular host and TCP port,
147;; looking up the hostname and port number in connection-table will yield
148;; a Scheme port that may be used to communicate with that server.
149
150(define connection-table '())
151
152;; FIXME: you can only re-use a connection if the server sends the
153;; Keep-Alive header, I think.  With these definitions, we were trying to
154;; send more requests on connections the server assumed were dead.
155;; (define (add-open-connection! host tcp-port port)
156;;   (set! connection-table
157;; 	(assoc-set! connection-table (cons host tcp-port) port)))
158;; (define (get-open-connection host tcp-port)
159;;   (assoc-ref connection-table (cons host tcp-port)))
160
161(define (add-open-connection! host tcp-port port)
162  #f)
163(define (get-open-connection host tcp-port)
164  #f)
165
166
167;;; HTTP methods.
168
169;; Common methods: GET, POST etc.
170
171(define-public (http:get url)
172  ;; FIXME: if http:open returns an old connection that has been
173  ;; closed remotely, this will fail.
174  (http:request "GET" url))
175
176;; Connection-oriented functions:
177;;
178;; (http:open HOST [PORT])
179;;     Return an HTTP connection to HOST on TCP port PORT (default 80).
180;;     If an open connection already exists, use it; otherwise, create
181;;     a new socket.
182
183(define-public (http:open host . args)
184  (let ((port (cond ((null? args) 80)
185		    ((not (car args)) 80)
186		    (else (car args)))))
187    (or (get-open-connection host port)
188	(let* ((tcp (vector-ref (getproto "tcp") 2))
189	       (addr (car (vector-ref (gethost host) 4)))
190	       (sock (socket AF_INET SOCK_STREAM tcp)))
191	  (connect sock AF_INET addr port)
192	  (add-open-connection! host port sock)
193	  sock))))
194
195;; (http:request METHOD URL [HEADERS [BODY]])
196;;	Submit an HTTP request.
197;;     URL is a structure returned by url:parse.
198;;     METHOD is the name of some HTTP method, e.g. "GET" or "POST".
199;;     The optional HEADERS and BODY arguments are lists of strings
200;;     which describe HTTP messages.  The `Content-Length' header
201;;     is calculated automatically and should not be supplied.
202;;
203;;	Example usage:
204;;	  (http:request "get" parsed-url
205;;			(list "User-Agent: GuileHTTP 0.1"
206;;			      "Content-Type: text/plain"))
207;;       (http:request "post" parsed-url
208;;			(list "User-Agent: GuileHTTP 0.1"
209;;			      "Content-Type: unknown/x-www-form-urlencoded")
210;;			(list "search=Gosper"
211;;			      "case=no"
212;;			      "max_hits=50"))
213
214(define-public (http:request method url . args)
215  (let ((host     (url:host url))
216	(tcp-port (or (url:port url) 80))
217	(path     (format #f "/~A" (or (url:path url) ""))))
218    (let ((sock (http:open host tcp-port))
219	  (request (format #f "~A ~A ~A" method path http:version))
220	  (headers (if (pair? args) (car args) '()))
221	  (body    (if (and (pair? args) (pair? (cdr args)))
222		       (cadr args)
223		       '())))
224      (let* ((content-length
225	      (apply +
226		     (map (lambda (line)
227			    (+ 2 (string-length line)))	; + 2 for CRLF
228			  body)))
229	     (headers (if (positive? content-length)
230			  (cons (format #f "Content-Length: ~A" content-length)
231				headers)
232			  headers)))
233
234	(with-output-to-port sock
235	  (lambda ()
236	    (display-with-crlf request)
237	    (for-each display-with-crlf headers)
238	    (display "\r\n")
239	    (for-each display-with-crlf body)))
240
241	;; parse and add status line
242	;; also cons up a list of response headers
243	(let* ((response-status-line (sans-trailing-whitespace
244				      (read-line sock 'trim)))
245	       (response-headers
246		(let make-header-list ((ln (sans-trailing-whitespace
247					    (read-line sock 'trim)))
248				       (hlist '()))
249		  (if (= 0 (string-length ln))
250		      hlist
251		      (make-header-list (sans-trailing-whitespace
252					 (read-line sock 'trim))
253					(cons (http:header-parse ln)
254					      hlist)))))
255	       (response-status-fields
256		(parse-status-line response-status-line))
257	       (response-version (car response-status-fields))
258	       (response-code    (cadr response-status-fields))
259	       (response-text    (caddr response-status-fields)))
260
261	  ;; signal error if HTTP status is invalid
262	  ;; (or (http:status-ok? response-code)
263	  ;; (error 'http-status "HTTP server returned bad status"
264	  ;;        response-status-line))
265	  ;; Get message body: if Content-Length header was supplied, read
266	  ;; that many chars.  Otherwise, read until EOF
267
268	  (let ((content-length (http:fetch-header
269				 "content-length"
270				 response-headers)))
271	    (let ((response-body
272		   (if (and content-length
273			    (not (string-ci=? method "HEAD")))
274		       (read-n-chars (string->number content-length) sock)
275		       (with-output-to-string
276			 (lambda ()
277			   (while (not (eof-object? (peek-char sock)))
278				  (display (read-char sock))))))))
279
280	      ;; FIXME: what about keepalives?
281	      (close-port sock)
282
283	      (http:make-message response-version
284				 response-code
285				 response-text
286				 response-headers
287				 response-body))))))))
288
289
290
291;;; System interface cruft & string funcs
292
293(define (read-n-chars num . port-arg)
294  (let ((p (if (null? port-arg)
295	       (current-input-port)
296	       (car port-arg)))
297	(s (make-string num)))
298    (do ((i   0              (+ i 1))
299	 (ch  (read-char p)  (read-char p)))
300	((or (>= i num) (eof-object? ch)) s)
301      (string-set! s i ch))))
302
303(define (display-with-crlf line . p)
304  (apply display line p)
305  (apply display "\r\n" p))
306
307;; (sans-trailing-whitespace STR)
308;;	These are defined in module (ice-9 string-fun), so this code
309;;	will prob.  be discarded when the module system and boot-9
310;;	settle down.
311
312(define (sans-trailing-whitespace s)
313  (let ((st 0)
314	(end (string-length s)))
315    (while (and (< 0 end)
316		(char-whitespace? (string-ref s (1- end))))
317	   (set! end (1- end)))
318    (if (< end st)
319	""
320	(make-shared-substring s st end))))
321
322;;; www/http.scm ends here
323