1 /*
2
3 Copyright (c) 2003-2018, Arvid Norberg
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the distribution.
15 * Neither the name of the author nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 POSSIBILITY OF SUCH DAMAGE.
30
31 */
32
33 #include "libtorrent/config.hpp"
34
35 #include <functional>
36 #include <cstdlib>
37 #include <cstdio> // for snprintf
38 #include <cinttypes> // for PRId64 et.al.
39
40 #include "libtorrent/web_peer_connection.hpp"
41 #include "libtorrent/session.hpp"
42 #include "libtorrent/entry.hpp"
43 #include "libtorrent/bencode.hpp"
44 #include "libtorrent/alert_types.hpp"
45 #include "libtorrent/invariant_check.hpp"
46 #include "libtorrent/io.hpp"
47 #include "libtorrent/parse_url.hpp"
48 #include "libtorrent/peer_info.hpp"
49 #include "libtorrent/aux_/session_interface.hpp"
50 #include "libtorrent/alert_manager.hpp" // for alert_manager
51 #include "libtorrent/aux_/escape_string.hpp" // for escape_path
52 #include "libtorrent/hex.hpp" // for is_hex
53 #include "libtorrent/torrent.hpp"
54 #include "libtorrent/http_parser.hpp"
55
56 namespace libtorrent {
57
58 constexpr int request_size_overhead = 5000;
59
60 std::string escape_file_path(file_storage const& storage, file_index_t index);
61
web_peer_connection(peer_connection_args const & pack,web_seed_t & web)62 web_peer_connection::web_peer_connection(peer_connection_args const& pack
63 , web_seed_t& web)
64 : web_connection_base(pack, web)
65 , m_url(web.url)
66 , m_web(&web)
67 , m_received_body(0)
68 , m_chunk_pos(0)
69 , m_partial_chunk_header(0)
70 , m_num_responses(0)
71 {
72 INVARIANT_CHECK;
73
74 if (!m_settings.get_bool(settings_pack::report_web_seed_downloads))
75 ignore_stats(true);
76
77 std::shared_ptr<torrent> tor = pack.tor.lock();
78 TORRENT_ASSERT(tor);
79
80 // if the web server is known not to support keep-alive. request 4MiB
81 // but we want to have at least piece size to prevent block based requests
82 int const min_size = std::max((web.supports_keepalive ? 1 : 4) * 1024 * 1024,
83 tor->torrent_file().piece_length());
84
85 // we prefer downloading large chunks from web seeds,
86 // but still want to be able to split requests
87 int const preferred_size = std::max(min_size, m_settings.get_int(settings_pack::urlseed_max_request_bytes));
88
89 prefer_contiguous_blocks(preferred_size / tor->block_size());
90
91 std::shared_ptr<torrent> t = associated_torrent().lock();
92 bool const single_file_request = t->torrent_file().num_files() == 1;
93
94 if (!single_file_request)
95 {
96 // handle incorrect .torrent files which are multi-file
97 // but have web seeds not ending with a slash
98 ensure_trailing_slash(m_path);
99 ensure_trailing_slash(m_url);
100 }
101 else
102 {
103 // handle .torrent files that don't include the filename in the url
104 if (m_path.empty()) m_path += '/';
105 if (m_path[m_path.size() - 1] == '/')
106 {
107 m_path += escape_string(t->torrent_file().name());
108 }
109
110 if (!m_url.empty() && m_url[m_url.size() - 1] == '/')
111 {
112 m_url += escape_file_path(t->torrent_file().files(), file_index_t(0));
113 }
114 }
115
116 // we want large blocks as well, so
117 // we can request more bytes at once
118 // this setting will merge adjacent requests
119 // into single larger ones
120 request_large_blocks(true);
121
122 #ifndef TORRENT_DISABLE_LOGGING
123 peer_log(peer_log_alert::info, "URL", "web_peer_connection %s", m_url.c_str());
124 #endif
125 }
126
escape_file_path(file_storage const & storage,file_index_t index)127 std::string escape_file_path(file_storage const& storage, file_index_t index)
128 {
129 std::string new_path { storage.file_path(index) };
130 #ifdef TORRENT_WINDOWS
131 convert_path_to_posix(new_path);
132 #endif
133 return escape_path(new_path);
134 }
135
on_connected()136 void web_peer_connection::on_connected()
137 {
138 if (m_web->have_files.empty())
139 {
140 incoming_have_all();
141 }
142 else if (m_web->have_files.none_set())
143 {
144 incoming_have_none();
145 m_web->interesting = false;
146 #ifndef TORRENT_DISABLE_LOGGING
147 peer_log(peer_log_alert::info, "WEB-SEED", "have no files, not interesting. %s", m_url.c_str());
148 #endif
149 }
150 else
151 {
152 std::shared_ptr<torrent> t = associated_torrent().lock();
153
154 // only advertise pieces that are contained within the files we have as
155 // indicated by m_web->have_files AND padfiles!
156 // it's important to include pieces that may overlap many files, as long
157 // as we have all those files, so instead of starting with a clear bitfield
158 // and setting the pieces corresponding to files we have, we do it the
159 // other way around. Start with assuming we have all files, and clear
160 // pieces overlapping with files we *don't* have.
161 typed_bitfield<piece_index_t> have;
162 file_storage const& fs = t->torrent_file().files();
163 have.resize(fs.num_pieces(), true);
164 for (auto const i : fs.file_range())
165 {
166 // if we have the file, no need to do anything
167 if (m_web->have_files.get_bit(i) || fs.pad_file_at(i)) continue;
168
169 auto const range = aux::file_piece_range_inclusive(fs, i);
170 for (piece_index_t k = std::get<0>(range); k < std::get<1>(range); ++k)
171 have.clear_bit(k);
172 }
173 t->set_seed(peer_info_struct(), false);
174 if (have.none_set())
175 {
176 incoming_have_none();
177 m_web->interesting = false;
178 #ifndef TORRENT_DISABLE_LOGGING
179 peer_log(peer_log_alert::info, "WEB-SEED", "have no pieces, not interesting. %s", m_url.c_str());
180 #endif
181 }
182 else
183 {
184 incoming_bitfield(have);
185 }
186 }
187
188 // TODO: 3 this should be an optional<piece_index_t>, piece index -1 should
189 // not be allowed
190 if (m_web->restart_request.piece != piece_index_t(-1))
191 {
192 // increase the chances of requesting the block
193 // we have partial data for already, to finish it
194 incoming_suggest(m_web->restart_request.piece);
195 }
196 web_connection_base::on_connected();
197 }
198
disconnect(error_code const & ec,operation_t op,disconnect_severity_t const error)199 void web_peer_connection::disconnect(error_code const& ec
200 , operation_t op, disconnect_severity_t const error)
201 {
202 if (is_disconnecting()) return;
203
204 if (op == operation_t::sock_write && ec == boost::system::errc::broken_pipe)
205 {
206 #ifndef TORRENT_DISABLE_LOGGING
207 // a write operation failed with broken-pipe. This typically happens
208 // with HTTP 1.0 servers that close their incoming channel of the TCP
209 // stream whenever they're done reading one full request. Instead of
210 // us bailing out and failing the entire request just because our
211 // write-end was closed, ignore it and keep reading until the read-end
212 // also is closed.
213 peer_log(peer_log_alert::info, "WRITE_DIRECTION", "CLOSED");
214 #endif
215
216 // prevent the peer from trying to send anything more
217 m_send_buffer.clear();
218
219 // when the web server closed our write-end of the socket (i.e. its
220 // read-end), if it's an HTTP 1.0 server. we will stop sending more
221 // requests. We'll close the connection once we receive the last bytes,
222 // and our read end is closed as well.
223 incoming_choke();
224 return;
225 }
226
227 if (op == operation_t::connect && m_web && !m_web->endpoints.empty())
228 {
229 // we failed to connect to this IP. remove it so that the next attempt
230 // uses the next IP in the list.
231 m_web->endpoints.erase(m_web->endpoints.begin());
232 }
233
234 if (ec == errors::uninteresting_upload_peer && m_web)
235 {
236 // if this is an "ephemeral" web seed, it means it was added by receiving
237 // an HTTP redirect. If we disconnect because we're not interested in any
238 // of its pieces, mark it as uninteresting, to avoid reconnecting to it
239 // repeatedly
240 if (m_web->ephemeral) m_web->interesting = false;
241
242 // if the web seed is not ephemeral, but we're still not interested. That
243 // implies that all files either have failed with 404 or with a
244 // redirection to a different web server.
245 m_web->retry = std::max(m_web->retry, aux::time_now32()
246 + seconds32(m_settings.get_int(settings_pack::urlseed_wait_retry)));
247 TORRENT_ASSERT(m_web->retry > aux::time_now32());
248 }
249
250 std::shared_ptr<torrent> t = associated_torrent().lock();
251
252 if (!m_requests.empty() && !m_file_requests.empty()
253 && !m_piece.empty() && m_web)
254 {
255 #ifndef TORRENT_DISABLE_LOGGING
256 if (should_log(peer_log_alert::info))
257 {
258 peer_log(peer_log_alert::info, "SAVE_RESTART_DATA"
259 , "data: %d req: %d off: %d"
260 , int(m_piece.size()), int(m_requests.front().piece)
261 , m_requests.front().start);
262 }
263 #endif
264 m_web->restart_request = m_requests.front();
265 if (!m_web->restart_piece.empty())
266 {
267 // we're about to replace a different restart piece
268 // buffer. So it was wasted download
269 if (t) t->add_redundant_bytes(int(m_web->restart_piece.size())
270 , waste_reason::piece_closing);
271 }
272 m_web->restart_piece.swap(m_piece);
273
274 // we have to do this to not count this data as redundant. The
275 // upper layer will call downloading_piece_progress and assume
276 // it's all wasted download. Since we're saving it here, it isn't.
277 m_requests.clear();
278 }
279
280 if (m_web && !m_web->supports_keepalive && error == peer_connection_interface::normal)
281 {
282 // if the web server doesn't support keepalive and we were
283 // disconnected as a graceful EOF, reconnect right away
284 if (t) get_io_service().post(
285 std::bind(&torrent::maybe_connect_web_seeds, t));
286 }
287
288 if (error >= failure)
289 {
290 m_web->retry = std::max(m_web->retry, aux::time_now32()
291 + seconds32(m_settings.get_int(settings_pack::urlseed_wait_retry)));
292 }
293
294 peer_connection::disconnect(ec, op, error);
295 if (t) t->disconnect_web_seed(this);
296 }
297
downloading_piece_progress() const298 piece_block_progress web_peer_connection::downloading_piece_progress() const
299 {
300 if (m_requests.empty()) return {};
301
302 std::shared_ptr<torrent> t = associated_torrent().lock();
303 TORRENT_ASSERT(t);
304
305 piece_block_progress ret;
306
307 ret.piece_index = m_requests.front().piece;
308 ret.bytes_downloaded = int(m_piece.size());
309 // this is used to make sure that the block_index stays within
310 // bounds. If the entire piece is downloaded, the block_index
311 // would otherwise point to one past the end
312 int correction = m_piece.empty() ? 0 : -1;
313 ret.block_index = (m_requests.front().start + int(m_piece.size()) + correction) / t->block_size();
314 TORRENT_ASSERT(ret.block_index < int(piece_block::invalid.block_index));
315 TORRENT_ASSERT(ret.piece_index < piece_block::invalid.piece_index);
316
317 ret.full_block_bytes = t->block_size();
318 piece_index_t const last_piece = t->torrent_file().last_piece();
319 if (ret.piece_index == last_piece && ret.block_index
320 == t->torrent_file().piece_size(last_piece) / t->block_size())
321 {
322 ret.full_block_bytes = t->torrent_file().piece_size(last_piece) % t->block_size();
323 }
324 return ret;
325 }
326
write_request(peer_request const & r)327 void web_peer_connection::write_request(peer_request const& r)
328 {
329 INVARIANT_CHECK;
330
331 std::shared_ptr<torrent> t = associated_torrent().lock();
332 TORRENT_ASSERT(t);
333
334 TORRENT_ASSERT(t->valid_metadata());
335
336 torrent_info const& info = t->torrent_file();
337 peer_request req = r;
338
339 std::string request;
340 request.reserve(400);
341
342 int size = r.length;
343 const int block_size = t->block_size();
344 const int piece_size = t->torrent_file().piece_length();
345 peer_request pr{};
346
347 while (size > 0)
348 {
349 int request_offset = r.start + r.length - size;
350 pr.start = request_offset % piece_size;
351 pr.length = std::min(block_size, size);
352 pr.piece = piece_index_t(static_cast<int>(r.piece) + request_offset / piece_size);
353 m_requests.push_back(pr);
354
355 if (m_web->restart_request == m_requests.front())
356 {
357 m_piece.swap(m_web->restart_piece);
358 peer_request const& front = m_requests.front();
359 TORRENT_ASSERT(front.length > int(m_piece.size()));
360
361 #ifndef TORRENT_DISABLE_LOGGING
362 peer_log(peer_log_alert::info, "RESTART_DATA",
363 "data: %d req: (%d, %d) size: %d"
364 , int(m_piece.size()), static_cast<int>(front.piece), front.start
365 , front.start + front.length - 1);
366 #else
367 TORRENT_UNUSED(front);
368 #endif
369
370 req.start += int(m_piece.size());
371 req.length -= int(m_piece.size());
372
373 // just to keep the accounting straight for the upper layer.
374 // it doesn't know we just re-wrote the request
375 incoming_piece_fragment(int(m_piece.size()));
376 m_web->restart_request.piece = piece_index_t(-1);
377 }
378
379 #if 0
380 std::cerr << this << " REQ: p: " << pr.piece << " " << pr.start << std::endl;
381 #endif
382 size -= pr.length;
383 }
384
385 #ifndef TORRENT_DISABLE_LOGGING
386 peer_log(peer_log_alert::outgoing_message, "REQUESTING", "(piece: %d start: %d) - (piece: %d end: %d)"
387 , static_cast<int>(r.piece), r.start
388 , static_cast<int>(pr.piece), pr.start + pr.length);
389 #endif
390
391 bool const single_file_request = t->torrent_file().num_files() == 1;
392 int const proxy_type = m_settings.get_int(settings_pack::proxy_type);
393 bool const using_proxy = (proxy_type == settings_pack::http
394 || proxy_type == settings_pack::http_pw) && !m_ssl;
395
396 // the number of pad files that have been "requested". In case we _only_
397 // request padfiles, we can't rely on handling them in the on_receive()
398 // callback (because we won't receive anything), instead we have to post a
399 // pretend read callback where we can deliver the zeroes for the partfile
400 int num_pad_files = 0;
401
402 // TODO: 3 do we really need a special case here? wouldn't the multi-file
403 // case handle single file torrents correctly too?
404 if (single_file_request)
405 {
406 file_request_t file_req;
407 file_req.file_index = file_index_t(0);
408 file_req.start = std::int64_t(static_cast<int>(req.piece)) * info.piece_length()
409 + req.start;
410 file_req.length = req.length;
411
412 request += "GET ";
413 // do not encode single file paths, they are
414 // assumed to be encoded in the torrent file
415 request += using_proxy ? m_url : m_path;
416 request += " HTTP/1.1\r\n";
417 add_headers(request, m_settings, using_proxy);
418 request += "\r\nRange: bytes=";
419 request += to_string(file_req.start).data();
420 request += "-";
421 request += to_string(file_req.start + file_req.length - 1).data();
422 request += "\r\n\r\n";
423 m_first_request = false;
424
425 m_file_requests.push_back(file_req);
426 }
427 else
428 {
429 std::vector<file_slice> files = info.orig_files().map_block(req.piece, req.start
430 , req.length);
431
432 for (auto const &f : files)
433 {
434 file_request_t file_req;
435 file_req.file_index = f.file_index;
436 file_req.start = f.offset;
437 file_req.length = int(f.size);
438
439 if (info.orig_files().pad_file_at(f.file_index))
440 {
441 m_file_requests.push_back(file_req);
442 ++num_pad_files;
443 continue;
444 }
445
446 request += "GET ";
447 if (using_proxy)
448 {
449 // m_url is already a properly escaped URL
450 // with the correct slashes. Don't encode it again
451 request += m_url;
452 }
453
454 auto redirection = m_web->redirects.find(f.file_index);
455 if (redirection != m_web->redirects.end())
456 {
457 auto const& redirect = redirection->second;
458 // in case of http proxy "request" already contains m_url with trailing slash, so let's skip dup slash
459 bool const trailing_slash = using_proxy && !redirect.empty() && redirect[0] == '/';
460 request.append(redirect, trailing_slash, std::string::npos);
461 }
462 else
463 {
464 if (!using_proxy)
465 {
466 // m_path is already a properly escaped URL
467 // with the correct slashes. Don't encode it again
468 request += m_path;
469 }
470
471 request += escape_file_path(info.orig_files(), f.file_index);
472 }
473 request += " HTTP/1.1\r\n";
474 add_headers(request, m_settings, using_proxy);
475 request += "\r\nRange: bytes=";
476 request += to_string(f.offset).data();
477 request += "-";
478 request += to_string(f.offset + f.size - 1).data();
479 request += "\r\n\r\n";
480 m_first_request = false;
481
482 #if 0
483 std::cerr << this << " SEND-REQUEST: f: " << f.file_index
484 << " s: " << f.offset
485 << " e: " << (f.offset + f.size - 1) << std::endl;
486 #endif
487 // TODO: 3 file_index_t should not allow negative values
488 TORRENT_ASSERT(f.file_index >= file_index_t(0));
489
490 m_file_requests.push_back(file_req);
491 }
492 }
493
494 if (num_pad_files == int(m_file_requests.size()))
495 {
496 get_io_service().post(std::bind(
497 &web_peer_connection::on_receive_padfile,
498 std::static_pointer_cast<web_peer_connection>(self())));
499 return;
500 }
501
502 #ifndef TORRENT_DISABLE_LOGGING
503 peer_log(peer_log_alert::outgoing_message, "REQUEST", "%s", request.c_str());
504 #endif
505
506 send_buffer(request);
507 }
508
509 namespace {
510
get_peer_name(http_parser const & p,std::string const & host)511 std::string get_peer_name(http_parser const& p, std::string const& host)
512 {
513 std::string ret = "URL seed @ ";
514 ret += host;
515
516 std::string const& server_version = p.header("server");
517 if (!server_version.empty())
518 {
519 ret += " (";
520 ret += server_version;
521 ret += ")";
522 }
523 return ret;
524 }
525
get_range(http_parser const & parser,error_code & ec)526 std::tuple<std::int64_t, std::int64_t> get_range(
527 http_parser const& parser, error_code& ec)
528 {
529 std::int64_t range_start;
530 std::int64_t range_end;
531 if (parser.status_code() == 206)
532 {
533 std::tie(range_start, range_end) = parser.content_range();
534 if (range_start < 0 || range_end < range_start)
535 {
536 ec = errors::invalid_range;
537 range_start = 0;
538 range_end = 0;
539 }
540 else
541 {
542 // the http range is inclusive
543 range_end++;
544 }
545 }
546 else
547 {
548 range_start = 0;
549 range_end = parser.content_length();
550 if (range_end < 0)
551 {
552 range_end = 0;
553 ec = errors::no_content_length;
554 }
555 }
556 return std::make_tuple(range_start, range_end);
557 }
558 }
559
560 // --------------------------
561 // RECEIVE DATA
562 // --------------------------
563
received_invalid_data(piece_index_t const index,bool single_peer)564 bool web_peer_connection::received_invalid_data(piece_index_t const index, bool single_peer)
565 {
566 if (!single_peer) return peer_connection::received_invalid_data(index, single_peer);
567
568 // when a web seed fails a hash check, do the following:
569 // 1. if the whole piece only overlaps a single file, mark that file as not
570 // have for this peer
571 // 2. if the piece overlaps more than one file, mark the piece as not have
572 // for this peer
573 // 3. if it's a single file torrent, just ban it right away
574 // this handles the case where web seeds may have some files updated but not other
575
576 std::shared_ptr<torrent> t = associated_torrent().lock();
577 file_storage const& fs = t->torrent_file().files();
578
579 // single file torrent
580 if (fs.num_files() == 1) return peer_connection::received_invalid_data(index, single_peer);
581
582 std::vector<file_slice> files = fs.map_block(index, 0, fs.piece_size(index));
583
584 if (files.size() == 1)
585 {
586 // assume the web seed has a different copy of this specific file
587 // than what we expect, and pretend not to have it.
588 auto const range = file_piece_range_inclusive(fs, files[0].file_index);
589 for (piece_index_t i = std::get<0>(range); i != std::get<1>(range); ++i)
590 incoming_dont_have(i);
591 }
592 else
593 {
594 incoming_dont_have(index);
595 }
596
597 peer_connection::received_invalid_data(index, single_peer);
598
599 // if we don't think we have any of the files, allow banning the web seed
600 if (num_have_pieces() == 0) return true;
601
602 // don't disconnect, we won't request anything from this file again
603 return false;
604 }
605
on_receive_padfile()606 void web_peer_connection::on_receive_padfile()
607 {
608 handle_padfile();
609 }
610
handle_error(int const bytes_left)611 void web_peer_connection::handle_error(int const bytes_left)
612 {
613 std::shared_ptr<torrent> t = associated_torrent().lock();
614 TORRENT_ASSERT(t);
615
616 // TODO: 2 just make this peer not have the pieces
617 // associated with the file we just requested. Only
618 // when it doesn't have any of the file do the following
619 // pad files will make it complicated
620
621 // temporarily unavailable, retry later
622 t->retry_web_seed(this, m_parser.header_duration("retry-after"));
623 if (t->alerts().should_post<url_seed_alert>())
624 {
625 std::string const error_msg = to_string(m_parser.status_code()).data()
626 + (" " + m_parser.message());
627 t->alerts().emplace_alert<url_seed_alert>(t->get_handle(), m_url
628 , error_msg);
629 }
630 received_bytes(0, bytes_left);
631 disconnect(error_code(m_parser.status_code(), http_category()), operation_t::bittorrent, failure);
632 }
633
handle_redirect(int const bytes_left)634 void web_peer_connection::handle_redirect(int const bytes_left)
635 {
636 // this means we got a redirection request
637 // look for the location header
638 std::string location = m_parser.header("location");
639 received_bytes(0, bytes_left);
640
641 std::shared_ptr<torrent> t = associated_torrent().lock();
642 TORRENT_ASSERT(t);
643
644 if (location.empty())
645 {
646 // we should not try this server again.
647 t->remove_web_seed_conn(this, errors::missing_location, operation_t::bittorrent, peer_error);
648 m_web = nullptr;
649 TORRENT_ASSERT(is_disconnecting());
650 return;
651 }
652
653 bool const single_file_request = !m_path.empty()
654 && m_path[m_path.size() - 1] != '/';
655
656 // add the redirected url and remove the current one
657 if (!single_file_request)
658 {
659 TORRENT_ASSERT(!m_file_requests.empty());
660 file_index_t const file_index = m_file_requests.front().file_index;
661
662 location = resolve_redirect_location(m_url, location);
663 #ifndef TORRENT_DISABLE_LOGGING
664 peer_log(peer_log_alert::info, "LOCATION", "%s", location.c_str());
665 #endif
666 // TODO: 3 this could be made more efficient for the case when we use an
667 // HTTP proxy. Then we wouldn't need to add new web seeds to the torrent,
668 // we could just make the redirect table contain full URLs.
669 std::string redirect_base;
670 std::string redirect_path;
671 error_code ec;
672 std::tie(redirect_base, redirect_path) = split_url(location, ec);
673
674 if (ec)
675 {
676 // we should not try this server again.
677 disconnect(errors::missing_location, operation_t::bittorrent, failure);
678 return;
679 }
680
681 // add_web_seed won't add duplicates. If we have already added an entry
682 // with this URL, we'll get back the existing entry
683
684 // "ephemeral" flag should be set to avoid "web_seed_t" saving in resume data.
685 // E.g. original "web_seed_t" request url points to "http://example1.com/file1" and
686 // web server responses with redirect location "http://example2.com/subpath/file2".
687 // "handle_redirect" process this location to create new "web_seed_t"
688 // with base url=="http://example2.com/" and redirects[0]=="/subpath/file2").
689 // If we try to load resume with such "web_seed_t" then "web_peer_connection" will send
690 // request with wrong path "http://example2.com/file1" (cause "redirects" map is not serialized in resume)
691 web_seed_t* web = t->add_web_seed(redirect_base, web_seed_entry::url_seed
692 , m_external_auth, m_extra_headers, torrent::ephemeral);
693 web->have_files.resize(t->torrent_file().num_files(), false);
694
695 // the new web seed we're adding only has this file for now
696 // we may add more files later
697 web->redirects[file_index] = redirect_path;
698 if (web->have_files.get_bit(file_index) == false)
699 {
700 web->have_files.set_bit(file_index);
701
702 if (web->peer_info.connection != nullptr)
703 {
704 auto* pc = static_cast<peer_connection*>(web->peer_info.connection);
705
706 // we just learned that this host has this file, and we're currently
707 // connected to it. Make it advertise that it has this file to the
708 // bittorrent engine
709 file_storage const& fs = t->torrent_file().files();
710 auto const range = aux::file_piece_range_inclusive(fs, file_index);
711 for (piece_index_t i = std::get<0>(range); i < std::get<1>(range); ++i)
712 pc->incoming_have(i);
713 }
714 // we just learned about another file this web server has, make sure
715 // it's marked interesting to enable connecting to it
716 web->interesting = true;
717 }
718
719 // we don't have this file on this server. Don't ask for it again
720 m_web->have_files.resize(t->torrent_file().num_files(), true);
721 if (m_web->have_files[file_index])
722 {
723 m_web->have_files.clear_bit(file_index);
724 #ifndef TORRENT_DISABLE_LOGGING
725 peer_log(peer_log_alert::info, "MISSING_FILE", "redirection | file: %d"
726 , static_cast<int>(file_index));
727 #endif
728 }
729 disconnect(errors::redirecting, operation_t::bittorrent, normal);
730 }
731 else
732 {
733 location = resolve_redirect_location(m_url, location);
734 #ifndef TORRENT_DISABLE_LOGGING
735 peer_log(peer_log_alert::info, "LOCATION", "%s", location.c_str());
736 #endif
737 t->add_web_seed(location, web_seed_entry::url_seed, m_external_auth
738 , m_extra_headers, torrent::ephemeral);
739
740 // this web seed doesn't have any files. Don't try to request from it
741 // again this session
742 m_web->have_files.resize(t->torrent_file().num_files(), false);
743 disconnect(errors::redirecting, operation_t::bittorrent, normal);
744 m_web = nullptr;
745 TORRENT_ASSERT(is_disconnecting());
746 }
747 }
748
on_receive(error_code const & error,std::size_t bytes_transferred)749 void web_peer_connection::on_receive(error_code const& error
750 , std::size_t bytes_transferred)
751 {
752 INVARIANT_CHECK;
753
754 if (error)
755 {
756 received_bytes(0, int(bytes_transferred));
757 #ifndef TORRENT_DISABLE_LOGGING
758 if (should_log(peer_log_alert::info))
759 {
760 peer_log(peer_log_alert::info, "ERROR"
761 , "web_peer_connection error: %s", error.message().c_str());
762 }
763 #endif
764 return;
765 }
766
767 std::shared_ptr<torrent> t = associated_torrent().lock();
768 TORRENT_ASSERT(t);
769
770 // in case the first file on this series of requests is a padfile
771 // we need to handle it right now
772 span<char const> recv_buffer = m_recv_buffer.get();
773 handle_padfile();
774 if (associated_torrent().expired()) return;
775
776 for (;;)
777 {
778 int payload;
779 int protocol;
780 bool header_finished = m_parser.header_finished();
781 if (!header_finished)
782 {
783 bool failed = false;
784 std::tie(payload, protocol) = m_parser.incoming(recv_buffer, failed);
785 received_bytes(0, protocol);
786 TORRENT_ASSERT(int(recv_buffer.size()) >= protocol);
787
788 if (failed)
789 {
790 received_bytes(0, int(recv_buffer.size()));
791 #ifndef TORRENT_DISABLE_LOGGING
792 if (should_log(peer_log_alert::info))
793 {
794 peer_log(peer_log_alert::info, "RECEIVE_BYTES"
795 , "%*s", int(recv_buffer.size()), recv_buffer.data());
796 }
797 #endif
798 disconnect(errors::http_parse_error, operation_t::bittorrent, peer_error);
799 return;
800 }
801
802 TORRENT_ASSERT(recv_buffer.empty() || recv_buffer[0] == 'H');
803 TORRENT_ASSERT(int(recv_buffer.size()) <= m_recv_buffer.packet_size());
804
805 // this means the entire status line hasn't been received yet
806 if (m_parser.status_code() == -1)
807 {
808 TORRENT_ASSERT(payload == 0);
809 break;
810 }
811
812 if (!m_parser.header_finished())
813 {
814 TORRENT_ASSERT(payload == 0);
815 break;
816 }
817
818 m_body_start = m_parser.body_start();
819 m_received_body = 0;
820 }
821
822 // we just completed reading the header
823 if (!header_finished)
824 {
825 ++m_num_responses;
826
827 if (m_parser.connection_close())
828 {
829 incoming_choke();
830 if (m_num_responses == 1)
831 m_web->supports_keepalive = false;
832 }
833
834 #ifndef TORRENT_DISABLE_LOGGING
835 if (should_log(peer_log_alert::info))
836 {
837 peer_log(peer_log_alert::info, "STATUS"
838 , "%d %s", m_parser.status_code(), m_parser.message().c_str());
839 std::multimap<std::string, std::string> const& headers = m_parser.headers();
840 for (auto const &i : headers)
841 peer_log(peer_log_alert::info, "STATUS", " %s: %s", i.first.c_str(), i.second.c_str());
842 }
843 #endif
844
845 // if the status code is not one of the accepted ones, abort
846 if (!is_ok_status(m_parser.status_code()))
847 {
848 if (!m_file_requests.empty())
849 {
850 file_request_t const& file_req = m_file_requests.front();
851 m_web->have_files.resize(t->torrent_file().num_files(), true);
852 m_web->have_files.clear_bit(file_req.file_index);
853
854 #ifndef TORRENT_DISABLE_LOGGING
855 peer_log(peer_log_alert::info, "MISSING_FILE", "http-code: %d | file: %d"
856 , m_parser.status_code(), static_cast<int>(file_req.file_index));
857 #endif
858 }
859 handle_error(int(recv_buffer.size()));
860 return;
861 }
862
863 if (is_redirect(m_parser.status_code()))
864 {
865 handle_redirect(int(recv_buffer.size()));
866 return;
867 }
868
869 m_server_string = get_peer_name(m_parser, m_host);
870
871 recv_buffer = recv_buffer.subspan(m_body_start);
872
873 m_body_start = m_parser.body_start();
874 m_received_body = 0;
875 }
876
877 // we only received the header, no data
878 if (recv_buffer.empty()) break;
879
880 // ===================================
881 // ======= RESPONSE BYTE RANGE =======
882 // ===================================
883
884 // despite the HTTP range being inclusive, range_start and range_end are
885 // exclusive to fit better into C++. i.e. range_end points one byte past
886 // the end of the payload
887 std::int64_t range_start;
888 std::int64_t range_end;
889 error_code ec;
890 std::tie(range_start, range_end) = get_range(m_parser, ec);
891 if (ec)
892 {
893 received_bytes(0, int(recv_buffer.size()));
894 // we should not try this server again.
895 t->remove_web_seed_conn(this, ec, operation_t::bittorrent, peer_error);
896 m_web = nullptr;
897 TORRENT_ASSERT(is_disconnecting());
898 return;
899 }
900
901 TORRENT_ASSERT(!m_file_requests.empty());
902 file_request_t const& file_req = m_file_requests.front();
903 if (range_start != file_req.start
904 || range_end != file_req.start + file_req.length)
905 {
906 // the byte range in the http response is different what we expected
907 received_bytes(0, int(recv_buffer.size()));
908
909 #ifndef TORRENT_DISABLE_LOGGING
910 if (should_log(peer_log_alert::incoming))
911 {
912 peer_log(peer_log_alert::incoming, "INVALID HTTP RESPONSE"
913 , "in=(%d, %" PRId64 "-%" PRId64 ") expected=(%d, %" PRId64 "-%" PRId64 ") ]"
914 , static_cast<int>(file_req.file_index), range_start, range_end
915 , static_cast<int>(file_req.file_index), file_req.start, file_req.start + file_req.length - 1);
916 }
917 #endif
918 disconnect(errors::invalid_range, operation_t::bittorrent, peer_error);
919 return;
920 }
921
922 if (m_parser.chunked_encoding())
923 {
924
925 // =========================
926 // === CHUNKED ENCODING ===
927 // =========================
928
929 while (m_chunk_pos >= 0 && !recv_buffer.empty())
930 {
931 // first deliver any payload we have in the buffer so far, ahead of
932 // the next chunk header.
933 if (m_chunk_pos > 0)
934 {
935 int const copy_size = std::min(m_chunk_pos, int(recv_buffer.size()));
936 TORRENT_ASSERT(copy_size > 0);
937
938 if (m_received_body + copy_size > file_req.length)
939 {
940 // the byte range in the http response is different what we expected
941 received_bytes(0, int(recv_buffer.size()));
942
943 #ifndef TORRENT_DISABLE_LOGGING
944 peer_log(peer_log_alert::incoming, "INVALID HTTP RESPONSE"
945 , "received body: %d request size: %d"
946 , m_received_body, file_req.length);
947 #endif
948 disconnect(errors::invalid_range, operation_t::bittorrent, peer_error);
949 return;
950 }
951 incoming_payload(recv_buffer.data(), copy_size);
952
953 recv_buffer = recv_buffer.subspan(copy_size);
954 m_chunk_pos -= copy_size;
955
956 if (recv_buffer.empty()) goto done;
957 }
958
959 TORRENT_ASSERT(m_chunk_pos == 0);
960
961 int header_size = 0;
962 std::int64_t chunk_size = 0;
963 span<char const> chunk_start = recv_buffer.subspan(m_chunk_pos);
964 TORRENT_ASSERT(chunk_start[0] == '\r'
965 || aux::is_hex({chunk_start.data(), 1}));
966 bool const ret = m_parser.parse_chunk_header(chunk_start, &chunk_size, &header_size);
967 if (!ret)
968 {
969 received_bytes(0, int(chunk_start.size()) - m_partial_chunk_header);
970 m_partial_chunk_header = int(chunk_start.size());
971 goto done;
972 }
973 #ifndef TORRENT_DISABLE_LOGGING
974 peer_log(peer_log_alert::info, "CHUNKED_ENCODING"
975 , "parsed chunk: %" PRId64 " header_size: %d"
976 , chunk_size, header_size);
977 #endif
978 received_bytes(0, header_size - m_partial_chunk_header);
979 m_partial_chunk_header = 0;
980 TORRENT_ASSERT(chunk_size != 0
981 || int(chunk_start.size()) <= header_size || chunk_start[header_size] == 'H');
982 TORRENT_ASSERT(m_body_start + m_chunk_pos < INT_MAX);
983 m_chunk_pos += int(chunk_size);
984 recv_buffer = recv_buffer.subspan(header_size);
985
986 // a chunk size of zero means the request is complete. Make sure the
987 // number of payload bytes we've received matches the number we
988 // requested. If that's not the case, we got an invalid response.
989 if (chunk_size == 0)
990 {
991 TORRENT_ASSERT_VAL(m_chunk_pos == 0, m_chunk_pos);
992
993 #if TORRENT_USE_ASSERTS
994 span<char const> chunk = recv_buffer.subspan(m_chunk_pos);
995 TORRENT_ASSERT(chunk.size() == 0 || chunk[0] == 'H');
996 #endif
997 m_chunk_pos = -1;
998
999 TORRENT_ASSERT(m_received_body <= file_req.length);
1000 if (m_received_body != file_req.length)
1001 {
1002 // the byte range in the http response is different what we expected
1003 received_bytes(0, int(recv_buffer.size()));
1004
1005 #ifndef TORRENT_DISABLE_LOGGING
1006 peer_log(peer_log_alert::incoming, "INVALID HTTP RESPONSE"
1007 , "received body: %d request size: %d"
1008 , m_received_body, file_req.length);
1009 #endif
1010 disconnect(errors::invalid_range, operation_t::bittorrent, peer_error);
1011 return;
1012 }
1013 // we just completed an HTTP file request. pop it from m_file_requests
1014 m_file_requests.pop_front();
1015 m_parser.reset();
1016 m_body_start = 0;
1017 m_received_body = 0;
1018 m_chunk_pos = 0;
1019 m_partial_chunk_header = 0;
1020
1021 // in between each file request, there may be an implicit
1022 // pad-file request
1023 handle_padfile();
1024 break;
1025 }
1026
1027 // if all of the receive buffer was just consumed as chunk
1028 // header, we're done
1029 if (recv_buffer.empty()) goto done;
1030 }
1031 }
1032 else
1033 {
1034 // this is the simple case, where we don't have chunked encoding
1035 TORRENT_ASSERT(m_received_body <= file_req.length);
1036 int const copy_size = std::min(file_req.length - m_received_body
1037 , int(recv_buffer.size()));
1038 incoming_payload(recv_buffer.data(), copy_size);
1039 recv_buffer = recv_buffer.subspan(copy_size);
1040
1041 TORRENT_ASSERT(m_received_body <= file_req.length);
1042 if (m_received_body == file_req.length)
1043 {
1044 // we just completed an HTTP file request. pop it from m_file_requests
1045 m_file_requests.pop_front();
1046 m_parser.reset();
1047 m_body_start = 0;
1048 m_received_body = 0;
1049 m_chunk_pos = 0;
1050 m_partial_chunk_header = 0;
1051
1052 // in between each file request, there may be an implicit
1053 // pad-file request
1054 handle_padfile();
1055 }
1056 }
1057
1058 if (recv_buffer.empty()) break;
1059 }
1060 done:
1061
1062 // now, remove all the bytes we've processed from the receive buffer
1063 m_recv_buffer.cut(int(recv_buffer.data() - m_recv_buffer.get().begin())
1064 , t->block_size() + request_size_overhead);
1065 }
1066
incoming_payload(char const * buf,int len)1067 void web_peer_connection::incoming_payload(char const* buf, int len)
1068 {
1069 received_bytes(len, 0);
1070 m_received_body += len;
1071
1072 if (is_disconnecting()) return;
1073
1074 #ifndef TORRENT_DISABLE_LOGGING
1075 peer_log(peer_log_alert::incoming_message, "INCOMING_PAYLOAD", "%d bytes", len);
1076 #endif
1077
1078 // deliver all complete bittorrent requests to the bittorrent engine
1079 while (len > 0)
1080 {
1081 if (m_requests.empty()) return;
1082
1083 TORRENT_ASSERT(!m_requests.empty());
1084 peer_request const& front_request = m_requests.front();
1085 int const piece_size = int(m_piece.size());
1086 int const copy_size = std::min(front_request.length - piece_size, len);
1087
1088 // m_piece may not hold more than the response to the next BT request
1089 TORRENT_ASSERT(front_request.length > piece_size);
1090
1091 // copy_size is the number of bytes we need to add to the end of m_piece
1092 // to not exceed the size of the next bittorrent request to be delivered.
1093 // m_piece can only hold the response for a single BT request at a time
1094 m_piece.resize(piece_size + copy_size);
1095 std::memcpy(m_piece.data() + piece_size, buf, aux::numeric_cast<std::size_t>(copy_size));
1096 len -= copy_size;
1097 buf += copy_size;
1098
1099 // keep peer stats up-to-date
1100 incoming_piece_fragment(copy_size);
1101
1102 TORRENT_ASSERT(front_request.length >= piece_size);
1103 if (int(m_piece.size()) == front_request.length)
1104 {
1105 std::shared_ptr<torrent> t = associated_torrent().lock();
1106 TORRENT_ASSERT(t);
1107
1108 #ifndef TORRENT_DISABLE_LOGGING
1109 peer_log(peer_log_alert::incoming_message, "POP_REQUEST"
1110 , "piece: %d start: %d len: %d"
1111 , static_cast<int>(front_request.piece), front_request.start, front_request.length);
1112 #endif
1113
1114 // Make a copy of the request and pop it off the queue before calling
1115 // incoming_piece because that may lead to a call to disconnect()
1116 // which will clear the request queue and invalidate any references
1117 // to the request
1118 peer_request const front_request_copy = front_request;
1119 m_requests.pop_front();
1120
1121 incoming_piece(front_request_copy, m_piece.data());
1122
1123 m_piece.clear();
1124 }
1125 }
1126 }
1127
incoming_zeroes(int len)1128 void web_peer_connection::incoming_zeroes(int len)
1129 {
1130 #ifndef TORRENT_DISABLE_LOGGING
1131 peer_log(peer_log_alert::incoming_message, "INCOMING_ZEROES", "%d bytes", len);
1132 #endif
1133
1134 // deliver all complete bittorrent requests to the bittorrent engine
1135 while (len > 0)
1136 {
1137 TORRENT_ASSERT(!m_requests.empty());
1138 peer_request const& front_request = m_requests.front();
1139 int const piece_size = int(m_piece.size());
1140 int const copy_size = std::min(front_request.length - piece_size, len);
1141
1142 // m_piece may not hold more than the response to the next BT request
1143 TORRENT_ASSERT(front_request.length > piece_size);
1144
1145 // copy_size is the number of bytes we need to add to the end of m_piece
1146 // to not exceed the size of the next bittorrent request to be delivered.
1147 // m_piece can only hold the response for a single BT request at a time
1148 m_piece.resize(piece_size + copy_size, 0);
1149 len -= copy_size;
1150
1151 // keep peer stats up-to-date
1152 incoming_piece_fragment(copy_size);
1153
1154 maybe_harvest_piece();
1155 }
1156 }
1157
maybe_harvest_piece()1158 void web_peer_connection::maybe_harvest_piece()
1159 {
1160 peer_request const& front_request = m_requests.front();
1161 TORRENT_ASSERT(front_request.length >= int(m_piece.size()));
1162 if (int(m_piece.size()) != front_request.length) return;
1163
1164 std::shared_ptr<torrent> t = associated_torrent().lock();
1165 TORRENT_ASSERT(t);
1166
1167 #ifndef TORRENT_DISABLE_LOGGING
1168 peer_log(peer_log_alert::incoming_message, "POP_REQUEST"
1169 , "piece: %d start: %d len: %d"
1170 , static_cast<int>(front_request.piece)
1171 , front_request.start, front_request.length);
1172 #endif
1173 m_requests.pop_front();
1174
1175 incoming_piece(front_request, m_piece.data());
1176 m_piece.clear();
1177 }
1178
get_specific_peer_info(peer_info & p) const1179 void web_peer_connection::get_specific_peer_info(peer_info& p) const
1180 {
1181 web_connection_base::get_specific_peer_info(p);
1182 p.flags |= peer_info::local_connection;
1183 p.connection_type = peer_info::web_seed;
1184 }
1185
handle_padfile()1186 void web_peer_connection::handle_padfile()
1187 {
1188 if (m_file_requests.empty()) return;
1189 if (m_requests.empty()) return;
1190
1191 std::shared_ptr<torrent> t = associated_torrent().lock();
1192 TORRENT_ASSERT(t);
1193 torrent_info const& info = t->torrent_file();
1194
1195 while (!m_file_requests.empty()
1196 && info.orig_files().pad_file_at(m_file_requests.front().file_index))
1197 {
1198 // the next file is a pad file. We didn't actually send
1199 // a request for this since it most likely doesn't exist on
1200 // the web server anyway. Just pretend that we received a
1201 // bunch of zeroes here and pop it again
1202 std::int64_t file_size = m_file_requests.front().length;
1203
1204 // in theory the pad file can span multiple bocks, hence the loop
1205 while (file_size > 0)
1206 {
1207 peer_request const front_request = m_requests.front();
1208 TORRENT_ASSERT(int(m_piece.size()) < front_request.length);
1209
1210 int pad_size = int(std::min(file_size
1211 , front_request.length - std::int64_t(m_piece.size())));
1212 TORRENT_ASSERT(pad_size > 0);
1213 file_size -= pad_size;
1214
1215 incoming_zeroes(pad_size);
1216
1217 #ifndef TORRENT_DISABLE_LOGGING
1218 if (should_log(peer_log_alert::info))
1219 {
1220 peer_log(peer_log_alert::info, "HANDLE_PADFILE"
1221 , "file: %d start: %" PRId64 " len: %d"
1222 , static_cast<int>(m_file_requests.front().file_index)
1223 , m_file_requests.front().start
1224 , m_file_requests.front().length);
1225 }
1226 #endif
1227 }
1228
1229 m_file_requests.pop_front();
1230 }
1231 }
1232
1233 } // libtorrent namespace
1234