1 // src/HttpClient.hh
2 // This file is part of libpbe; see http://svn.chezphil.org/libpbe/
3 // (C) 2008 Philip Endecott
4
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
19 #include "HttpClient.hh"
20
21 #include "HttpRequest.hh"
22 #include "TcpClientSocket.hh"
23 #include "Gunzipper.hh"
24 #include "Bunzipper.hh"
25 #include "FileDescriptor.hh"
26 #include "rfcdate.hh"
27 #include "FileType.hh"
28 #include "atomic_ofstream.hh"
29
30 #include <iostream>
31 #include <algorithm>
32
33 using namespace std;
34
35
36 namespace pbe {
37
38
get(const URI & uri,int redirect_hops)39 HttpResponse HttpClient::get(const URI& uri, int redirect_hops)
40 {
41 if (uri.scheme != "http") {
42 throw "Not an HTTP URI";
43 }
44 HttpRequest req(uri);
45 req.headers["Date"] = rfc_date();
46 req.headers["Connection"] = "close";
47 req.headers["User-Agent"] = user_agent;
48 TcpClientSocket sock(uri.host, uri.port ? uri.port : 80);
49 sock.writeall(req.request_line() + req.headers_str() + "\r\n");
50 HttpResponse response(sock.readall());
51 if (response.status_code==301 || response.status_code==302
52 || response.status_code==303 || response.status_code==307) {
53 if (redirect_hops<=0) {
54 throw "Redirection limit reached";
55 }
56 return get(response.headers["Location"], redirect_hops-1);
57 }
58 return response;
59 }
60
61
62 template <typename Processor, bool use_etag>
get_process_save(const URI & uri,std::string fn,int redirect_hops)63 void HttpClient::get_process_save(const URI& uri, std::string fn, int redirect_hops)
64 {
65 if (uri.scheme != "http") {
66 throw "Not an HTTP URI";
67 }
68 const string etag_fn = fn+".etag";
69 HttpRequest req(uri);
70 req.headers["Date"] = rfc_date();
71 req.headers["Connection"] = "close";
72 req.headers["User-Agent"] = user_agent;
73 if (use_etag && file_exists(etag_fn) && file_exists(fn)) {
74 ifstream etagf(etag_fn.c_str());
75 string etag;
76 getline(etagf,etag);
77 req.headers["If-None-Match"] = etag;
78 }
79 TcpClientSocket sock(uri.host, uri.port ? uri.port : 80);
80 sock.writeall(req.request_line() + req.headers_str() + "\r\n");
81
82 string resp_start;
83 string::iterator crlf2pos;
84 do {
85 bool timed_out = wait_until(sock.readable(), 30)==-1;
86 if (timed_out) {
87 throw TimedOut("read()");
88 }
89 resp_start.append(sock.readsome());
90 string crlf2 = "\r\n\r\n";
91 crlf2pos = search(resp_start.begin(),resp_start.end(), crlf2.begin(),crlf2.end());
92 } while (crlf2pos==resp_start.end());
93 HttpResponse response(string(resp_start.begin(),crlf2pos+4));
94
95 if (use_etag && response.status_code==304) {
96 return;
97 }
98
99 if (response.status_code==301 || response.status_code==302
100 || response.status_code==303 || response.status_code==307) {
101 if (redirect_hops<=0) {
102 throw "Redirection limit reached";
103 }
104 get_process_save<Processor,use_etag>(response.headers["Location"], fn, redirect_hops-1);
105 return;
106 }
107
108 if (response.status_code != 200) {
109 throw response;
110 }
111
112 string tmp_fn = fn+".part";
113
114 {
115 FileDescriptor fd(tmp_fn,FileDescriptor::create);
116 try {
117 Processor proc;
118 fd.writeall(proc(string(crlf2pos+4,resp_start.end())));
119
120 while (1) {
121 bool timed_out = wait_until(sock.readable(), 30)==-1;
122 if (timed_out) {
123 throw TimedOut("read()");
124 }
125 string s = sock.readsome();
126 if (s.empty()) {
127 break;
128 }
129 fd.writeall(proc(s));
130 }
131 }
132 catch (...) {
133 unlink(tmp_fn.c_str());
134 throw;
135 }
136 }
137
138 // Ideally we should rename both the data file and the etag file atomically, but
139 // we can't do that. A safe alternative is to delete the old etag file first; in this
140 // case the worst that can happen is that we end up with a valid data file and a
141 // missing etag file.
142
143 if (use_etag) {
144 unlink(etag_fn.c_str());
145 }
146
147 int rc = rename(tmp_fn.c_str(),fn.c_str());
148 if (rc==-1) {
149 throw_ErrnoException("rename("+tmp_fn+","+fn+")");
150 }
151
152 if (use_etag && response.headers.find("ETag")!=response.headers.end()) {
153 atomic_ofstream etagf(etag_fn);
154 etagf << response.headers["ETag"];
155 etagf.commit();
156 }
157 }
158
159
160 struct identity_processor {
operator ()pbe::identity_processor161 std::string operator()(string s) const { return s; }
162 };
163
164
get_save(const URI & uri,std::string fn,int redirect_hops)165 void HttpClient::get_save(const URI& uri, std::string fn, int redirect_hops)
166 {
167 return get_process_save<identity_processor,false>(uri,fn,redirect_hops);
168 }
169
get_save_with_etag(const URI & uri,std::string fn,int redirect_hops)170 void HttpClient::get_save_with_etag(const URI& uri, std::string fn, int redirect_hops)
171 {
172 return get_process_save<identity_processor,true>(uri,fn,redirect_hops);
173 }
174
175
get_gunzip_save(const URI & uri,std::string fn,int redirect_hops)176 void HttpClient::get_gunzip_save(const URI& uri, std::string fn, int redirect_hops)
177 {
178 return get_process_save<Gunzipper,false>(uri,fn,redirect_hops);
179 }
180
get_gunzip_save_with_etag(const URI & uri,std::string fn,int redirect_hops)181 void HttpClient::get_gunzip_save_with_etag(const URI& uri, std::string fn, int redirect_hops)
182 {
183 return get_process_save<Gunzipper,true>(uri,fn,redirect_hops);
184 }
185
186
187 #ifdef HAVE_BZIP
188
get_bunzip_save(const URI & uri,std::string fn,int redirect_hops)189 void HttpClient::get_bunzip_save(const URI& uri, std::string fn, int redirect_hops)
190 {
191 return get_process_save<Bunzipper,false>(uri,fn,redirect_hops);
192 }
193
get_bunzip_save_with_etag(const URI & uri,std::string fn,int redirect_hops)194 void HttpClient::get_bunzip_save_with_etag(const URI& uri, std::string fn, int redirect_hops)
195 {
196 return get_process_save<Bunzipper,true>(uri,fn,redirect_hops);
197 }
198
199
200 #endif
201
202 };
203