1 // src/HttpClient.hh
2 // This file is part of libpbe; see http://svn.chezphil.org/libpbe/
3 // (C) 2008 Philip Endecott
4 
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 
19 #include "HttpClient.hh"
20 
21 #include "HttpRequest.hh"
22 #include "TcpClientSocket.hh"
23 #include "Gunzipper.hh"
24 #include "Bunzipper.hh"
25 #include "FileDescriptor.hh"
26 #include "rfcdate.hh"
27 #include "FileType.hh"
28 #include "atomic_ofstream.hh"
29 
30 #include <iostream>
31 #include <algorithm>
32 
33 using namespace std;
34 
35 
36 namespace pbe {
37 
38 
get(const URI & uri,int redirect_hops)39 HttpResponse HttpClient::get(const URI& uri, int redirect_hops)
40 {
41   if (uri.scheme != "http") {
42     throw "Not an HTTP URI";
43   }
44   HttpRequest req(uri);
45   req.headers["Date"] = rfc_date();
46   req.headers["Connection"] = "close";
47   req.headers["User-Agent"] = user_agent;
48   TcpClientSocket sock(uri.host, uri.port ? uri.port : 80);
49   sock.writeall(req.request_line() + req.headers_str() + "\r\n");
50   HttpResponse response(sock.readall());
51   if (response.status_code==301 || response.status_code==302
52    || response.status_code==303 || response.status_code==307) {
53     if (redirect_hops<=0) {
54       throw "Redirection limit reached";
55     }
56     return get(response.headers["Location"], redirect_hops-1);
57   }
58   return response;
59 }
60 
61 
62 template <typename Processor, bool use_etag>
get_process_save(const URI & uri,std::string fn,int redirect_hops)63 void HttpClient::get_process_save(const URI& uri, std::string fn, int redirect_hops)
64 {
65   if (uri.scheme != "http") {
66     throw "Not an HTTP URI";
67   }
68   const string etag_fn = fn+".etag";
69   HttpRequest req(uri);
70   req.headers["Date"] = rfc_date();
71   req.headers["Connection"] = "close";
72   req.headers["User-Agent"] = user_agent;
73   if (use_etag && file_exists(etag_fn) && file_exists(fn)) {
74     ifstream etagf(etag_fn.c_str());
75     string etag;
76     getline(etagf,etag);
77     req.headers["If-None-Match"] = etag;
78   }
79   TcpClientSocket sock(uri.host, uri.port ? uri.port : 80);
80   sock.writeall(req.request_line() + req.headers_str() + "\r\n");
81 
82   string resp_start;
83   string::iterator crlf2pos;
84   do {
85     bool timed_out = wait_until(sock.readable(), 30)==-1;
86     if (timed_out) {
87       throw TimedOut("read()");
88     }
89     resp_start.append(sock.readsome());
90     string crlf2 = "\r\n\r\n";
91     crlf2pos = search(resp_start.begin(),resp_start.end(), crlf2.begin(),crlf2.end());
92   } while (crlf2pos==resp_start.end());
93   HttpResponse response(string(resp_start.begin(),crlf2pos+4));
94 
95   if (use_etag && response.status_code==304) {
96     return;
97   }
98 
99   if (response.status_code==301 || response.status_code==302
100    || response.status_code==303 || response.status_code==307) {
101     if (redirect_hops<=0) {
102       throw "Redirection limit reached";
103     }
104     get_process_save<Processor,use_etag>(response.headers["Location"], fn, redirect_hops-1);
105     return;
106   }
107 
108   if (response.status_code != 200) {
109     throw response;
110   }
111 
112   string tmp_fn = fn+".part";
113 
114   {
115     FileDescriptor fd(tmp_fn,FileDescriptor::create);
116     try {
117       Processor proc;
118       fd.writeall(proc(string(crlf2pos+4,resp_start.end())));
119 
120       while (1) {
121         bool timed_out = wait_until(sock.readable(), 30)==-1;
122         if (timed_out) {
123           throw TimedOut("read()");
124         }
125         string s = sock.readsome();
126         if (s.empty()) {
127           break;
128         }
129         fd.writeall(proc(s));
130       }
131     }
132     catch (...) {
133       unlink(tmp_fn.c_str());
134       throw;
135     }
136   }
137 
138   // Ideally we should rename both the data file and the etag file atomically, but
139   // we can't do that.  A safe alternative is to delete the old etag file first; in this
140   // case the worst that can happen is that we end up with a valid data file and a
141   // missing etag file.
142 
143   if (use_etag) {
144     unlink(etag_fn.c_str());
145   }
146 
147   int rc = rename(tmp_fn.c_str(),fn.c_str());
148   if (rc==-1) {
149     throw_ErrnoException("rename("+tmp_fn+","+fn+")");
150   }
151 
152   if (use_etag && response.headers.find("ETag")!=response.headers.end()) {
153     atomic_ofstream etagf(etag_fn);
154     etagf << response.headers["ETag"];
155     etagf.commit();
156   }
157 }
158 
159 
160 struct identity_processor {
operator ()pbe::identity_processor161   std::string operator()(string s) const { return s; }
162 };
163 
164 
get_save(const URI & uri,std::string fn,int redirect_hops)165 void HttpClient::get_save(const URI& uri, std::string fn, int redirect_hops)
166 {
167   return get_process_save<identity_processor,false>(uri,fn,redirect_hops);
168 }
169 
get_save_with_etag(const URI & uri,std::string fn,int redirect_hops)170 void HttpClient::get_save_with_etag(const URI& uri, std::string fn, int redirect_hops)
171 {
172   return get_process_save<identity_processor,true>(uri,fn,redirect_hops);
173 }
174 
175 
get_gunzip_save(const URI & uri,std::string fn,int redirect_hops)176 void HttpClient::get_gunzip_save(const URI& uri, std::string fn, int redirect_hops)
177 {
178   return get_process_save<Gunzipper,false>(uri,fn,redirect_hops);
179 }
180 
get_gunzip_save_with_etag(const URI & uri,std::string fn,int redirect_hops)181 void HttpClient::get_gunzip_save_with_etag(const URI& uri, std::string fn, int redirect_hops)
182 {
183   return get_process_save<Gunzipper,true>(uri,fn,redirect_hops);
184 }
185 
186 
187 #ifdef HAVE_BZIP
188 
get_bunzip_save(const URI & uri,std::string fn,int redirect_hops)189 void HttpClient::get_bunzip_save(const URI& uri, std::string fn, int redirect_hops)
190 {
191   return get_process_save<Bunzipper,false>(uri,fn,redirect_hops);
192 }
193 
get_bunzip_save_with_etag(const URI & uri,std::string fn,int redirect_hops)194 void HttpClient::get_bunzip_save_with_etag(const URI& uri, std::string fn, int redirect_hops)
195 {
196   return get_process_save<Bunzipper,true>(uri,fn,redirect_hops);
197 }
198 
199 
200 #endif
201 
202 };
203