1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file url.c
7  * \brief URL fetch utility
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12 
13 #include <yaz/url.h>
14 #include <yaz/comstack.h>
15 #include <yaz/log.h>
16 #include <yaz/wrbuf.h>
17 #include <yaz/cookie.h>
18 #include <yaz/poll.h>
19 
20 struct yaz_url {
21     ODR odr_in;
22     ODR odr_out;
23     char *proxy;
24     int max_redirects;
25     WRBUF w_error;
26     int verbose;
27     int timeout_sec;
28     int timeout_ns;
29     yaz_cookies_t cookies;
30 };
31 
yaz_url_create(void)32 yaz_url_t yaz_url_create(void)
33 {
34     yaz_url_t p = xmalloc(sizeof(*p));
35     p->odr_in = odr_createmem(ODR_DECODE);
36     p->odr_out = odr_createmem(ODR_ENCODE);
37     p->proxy = 0;
38     p->max_redirects = 10;
39     p->w_error = wrbuf_alloc();
40     p->verbose = 0;
41     p->timeout_sec = 30;
42     p->timeout_ns = 0;
43     p->cookies = yaz_cookies_create();
44     return p;
45 }
46 
yaz_url_destroy(yaz_url_t p)47 void yaz_url_destroy(yaz_url_t p)
48 {
49     if (p)
50     {
51         odr_destroy(p->odr_in);
52         odr_destroy(p->odr_out);
53         xfree(p->proxy);
54         wrbuf_destroy(p->w_error);
55         yaz_cookies_destroy(p->cookies);
56         xfree(p);
57     }
58 }
59 
yaz_url_set_proxy(yaz_url_t p,const char * proxy)60 void yaz_url_set_proxy(yaz_url_t p, const char *proxy)
61 {
62     xfree(p->proxy);
63     p->proxy = 0;
64     if (proxy && *proxy)
65         p->proxy = xstrdup(proxy);
66 }
67 
yaz_url_set_max_redirects(yaz_url_t p,int num)68 void yaz_url_set_max_redirects(yaz_url_t p, int num)
69 {
70     p->max_redirects = num;
71 }
72 
yaz_url_set_verbose(yaz_url_t p,int num)73 void yaz_url_set_verbose(yaz_url_t p, int num)
74 {
75     p->verbose = num;
76 }
77 
yaz_url_set_timeout(yaz_url_t p,int sec,int ns)78 void yaz_url_set_timeout(yaz_url_t p, int sec, int ns)
79 {
80     p->timeout_sec = sec;
81     p->timeout_ns = ns;
82 }
83 
extract_user_pass(NMEM nmem,const char * uri,char ** uri_lean,char ** http_user,char ** http_pass)84 static void extract_user_pass(NMEM nmem,
85                               const char *uri,
86                               char **uri_lean, char **http_user,
87                               char **http_pass)
88 {
89     const char *cp1 = strchr(uri, '/');
90     *uri_lean = 0;
91     *http_user = 0;
92     *http_pass = 0;
93     if (cp1 && cp1 > uri)
94     {
95         cp1--;
96 
97         if (!strncmp(cp1, "://", 3))
98         {
99             const char *cp3 = 0;
100             const char *cp2 = cp1 + 3;
101             while (*cp2 && *cp2 != '/' && *cp2 != '@')
102             {
103                 if (*cp2 == ':')
104                     cp3 = cp2;
105                 cp2++;
106             }
107             if (*cp2 == '@' && cp3)
108             {
109                 *uri_lean = nmem_malloc(nmem, strlen(uri) + 1);
110                 memcpy(*uri_lean, uri, cp1 + 3 - uri);
111                 strcpy(*uri_lean + (cp1 + 3 - uri), cp2 + 1);
112 
113                 *http_user = nmem_strdupn(nmem, cp1 + 3, cp3 - (cp1 + 3));
114                 *http_pass = nmem_strdupn(nmem, cp3 + 1, cp2 - (cp3 + 1));
115             }
116         }
117     }
118     if (*uri_lean == 0)
119         *uri_lean = nmem_strdup(nmem, uri);
120 }
121 
yaz_url_get_error(yaz_url_t p)122 const char *yaz_url_get_error(yaz_url_t p)
123 {
124     return wrbuf_cstr(p->w_error);
125 }
126 
log_warn(yaz_url_t p)127 static void log_warn(yaz_url_t p)
128 {
129     yaz_log(YLOG_WARN, "yaz_url: %s", wrbuf_cstr(p->w_error));
130 }
131 
yaz_url_exec(yaz_url_t p,const char * uri,const char * method,Z_HTTP_Header * user_headers,const char * buf,size_t len)132 Z_HTTP_Response *yaz_url_exec(yaz_url_t p, const char *uri,
133                               const char *method,
134                               Z_HTTP_Header *user_headers,
135                               const char *buf, size_t len)
136 {
137     Z_HTTP_Response *res;
138     int number_of_redirects = 0;
139 
140     odr_reset(p->odr_out);
141     yaz_cookies_reset(p->cookies);
142     wrbuf_rewind(p->w_error);
143     while (1)
144     {
145         void *add;
146         COMSTACK conn = 0;
147         int code;
148         const char *location = 0;
149         char *http_user = 0;
150         char *http_pass = 0;
151         char *uri_lean = 0;
152         int proxy_mode = 0;
153         int ret;
154         Z_GDU *gdu;
155 
156         res = 0;
157         extract_user_pass(p->odr_out->mem, uri, &uri_lean,
158                           &http_user, &http_pass);
159         conn = cs_create_host2(uri_lean, 0, &add, p->proxy, &proxy_mode);
160         if (!conn)
161         {
162             wrbuf_printf(p->w_error, "Can not resolve URL %s", uri);
163             log_warn(p);
164             return res;
165         }
166         gdu = z_get_HTTP_Request_uri(p->odr_out, uri_lean, 0, proxy_mode);
167         gdu->u.HTTP_Request->method = odr_strdup(p->odr_out, method);
168         yaz_cookies_request(p->cookies, p->odr_out, gdu->u.HTTP_Request);
169         for ( ; user_headers; user_headers = user_headers->next)
170         {
171             /* prefer new Host over user-supplied Host */
172             if (!strcmp(user_headers->name, "Host"))
173                 ;
174             /* prefer user-supplied User-Agent over YAZ' own */
175             else if (!strcmp(user_headers->name, "User-Agent"))
176                 z_HTTP_header_set(p->odr_out, &gdu->u.HTTP_Request->headers,
177                                   user_headers->name, user_headers->value);
178             else
179                 z_HTTP_header_add(p->odr_out, &gdu->u.HTTP_Request->headers,
180                                   user_headers->name, user_headers->value);
181         }
182         if (http_user && http_pass)
183             z_HTTP_header_add_basic_auth(p->odr_out,
184                                          &gdu->u.HTTP_Request->headers,
185                                          http_user, http_pass);
186         if (buf && len)
187         {
188             gdu->u.HTTP_Request->content_buf = (char *) buf;
189             gdu->u.HTTP_Request->content_len = len;
190         }
191         if (!z_GDU(p->odr_out, &gdu, 0, 0))
192         {
193             wrbuf_printf(p->w_error, "Can not encode HTTP request for URL %s",
194                          uri);
195             log_warn(p);
196         }
197         else if ((ret = cs_connect(conn, add)) < 0)
198         {
199             wrbuf_printf(p->w_error, "Can not connect to URL %s", uri);
200             log_warn(p);
201         }
202         else
203         {
204             char *netbuffer = 0;
205             int netlen = 0;
206             int len_out;
207             char *buf_out = odr_getbuf(p->odr_out, &len_out, 0);
208             int state = 0; /* 0=connect phase, 1=send, 2=recv */
209             if (p->verbose)
210                 fwrite(buf_out, 1, len_out, stdout);
211             if (!strcmp(gdu->u.HTTP_Request->method, "HEAD"))
212                 cs_set_head_only(conn, 1);
213             if (ret == 0)
214                 state = 1; /* connect complete, so send phase */
215             while (1)
216             {
217                 if (ret == 1) /* incomplete , wait */
218                 {
219                     struct yaz_poll_fd yp;
220                     enum yaz_poll_mask input_mask = yaz_poll_none;
221                     yaz_poll_add(input_mask, yaz_poll_except);
222                     if (conn->io_pending & CS_WANT_WRITE)
223                         yaz_poll_add(input_mask, yaz_poll_write);
224                     if (conn->io_pending & CS_WANT_READ)
225                         yaz_poll_add(input_mask, yaz_poll_read);
226                     yp.fd = cs_fileno(conn);
227                     yp.input_mask = input_mask;
228                     ret = yaz_poll(&yp, 1, p->timeout_sec, p->timeout_ns);
229                     if (ret == 0)
230                     {
231                         wrbuf_printf(p->w_error, "timeout URL %s", uri);
232                         break;
233                     }
234                     else if (ret < 0)
235                     {
236                         wrbuf_printf(p->w_error, "poll error URL %s", uri);
237                         break;
238                     }
239                 }
240                 if (state == 0) /* connect phase */
241                 {
242                     ret = cs_rcvconnect(conn);
243                     if (ret < 0)
244                     {
245                         wrbuf_printf(p->w_error,
246                                      "cs_rcvconnect failed for URL %s", uri);
247                         log_warn(p);
248                         break;
249                     }
250                     else if (ret == 0)
251                         state = 1;
252                 }
253                 else if (state == 1) /* write request phase */
254                 {
255                     ret = cs_put(conn, buf_out, len_out);
256                     if (ret < 0)
257                     {
258                         wrbuf_printf(p->w_error, "cs_put fail for URL %s", uri);
259                         log_warn(p);
260                         break;
261                     }
262                     else if (ret == 0)
263                     {
264                         state = 2;
265                     }
266                 }
267                 else if (state == 2) /* read response phase */
268                 {
269                     ret = cs_get(conn, &netbuffer, &netlen);
270                     if (ret  <= 0)
271                     {
272                         wrbuf_printf(p->w_error, "cs_get failed for URL %s",
273                                      uri);
274                         log_warn(p);
275                         break;
276                     }
277                     else if (ret > 1)
278                     {
279                         Z_GDU *gdu;
280                         if (p->verbose)
281                             fwrite(netbuffer, 1, ret, stdout);
282                         odr_setbuf(p->odr_in, netbuffer, ret, 0);
283                         if (!z_GDU(p->odr_in, &gdu, 0, 0)
284                             || gdu->which != Z_GDU_HTTP_Response)
285                         {
286                             wrbuf_printf(p->w_error, "HTTP decoding fail for "
287                                          "URL %s", uri);
288                             log_warn(p);
289                         }
290                         else
291                         {
292                             res = gdu->u.HTTP_Response;
293                             break;
294                         }
295                     }
296                 }
297             }
298             xfree(netbuffer);
299         }
300         cs_close(conn);
301         if (!res)
302             break;
303         code = res->code;
304         location = z_HTTP_header_lookup(res->headers, "Location");
305         if (++number_of_redirects <= p->max_redirects &&
306             location && (code == 301 || code == 302 || code == 307))
307         {
308             int host_change = 0;
309             const char *nlocation = yaz_check_location(p->odr_in, uri,
310                                                        location, &host_change);
311 
312             odr_reset(p->odr_out);
313             uri = odr_strdup(p->odr_out, nlocation);
314         }
315         else
316             break;
317         yaz_cookies_response(p->cookies, res);
318         odr_reset(p->odr_in);
319     }
320     return res;
321 }
322 
323 /*
324  * Local variables:
325  * c-basic-offset: 4
326  * c-file-style: "Stroustrup"
327  * indent-tabs-mode: nil
328  * End:
329  * vim: shiftwidth=4 tabstop=8 expandtab
330  */
331 
332