1 /*
2 
3 Copyright (c) 2003-2018, Arvid Norberg
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 
10  * Redistributions of source code must retain the above copyright
11 	notice, this list of conditions and the following disclaimer.
12  * Redistributions in binary form must reproduce the above copyright
13 	notice, this list of conditions and the following disclaimer in
14 	the documentation and/or other materials provided with the distribution.
15  * Neither the name of the author nor the names of its
16 	contributors may be used to endorse or promote products derived
17 	from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 POSSIBILITY OF SUCH DAMAGE.
30 
31 */
32 
33 #include "libtorrent/config.hpp"
34 
35 #include <functional>
36 #include <cstdlib>
37 #include <cstdio> // for snprintf
38 #include <cinttypes> // for PRId64 et.al.
39 
40 #include "libtorrent/web_peer_connection.hpp"
41 #include "libtorrent/session.hpp"
42 #include "libtorrent/entry.hpp"
43 #include "libtorrent/bencode.hpp"
44 #include "libtorrent/alert_types.hpp"
45 #include "libtorrent/invariant_check.hpp"
46 #include "libtorrent/io.hpp"
47 #include "libtorrent/parse_url.hpp"
48 #include "libtorrent/peer_info.hpp"
49 #include "libtorrent/aux_/session_interface.hpp"
50 #include "libtorrent/alert_manager.hpp" // for alert_manager
51 #include "libtorrent/aux_/escape_string.hpp" // for escape_path
52 #include "libtorrent/hex.hpp" // for is_hex
53 #include "libtorrent/torrent.hpp"
54 #include "libtorrent/http_parser.hpp"
55 
56 namespace libtorrent {
57 
58 constexpr int request_size_overhead = 5000;
59 
60 std::string escape_file_path(file_storage const& storage, file_index_t index);
61 
web_peer_connection(peer_connection_args const & pack,web_seed_t & web)62 web_peer_connection::web_peer_connection(peer_connection_args const& pack
63 	, web_seed_t& web)
64 	: web_connection_base(pack, web)
65 	, m_url(web.url)
66 	, m_web(&web)
67 	, m_received_body(0)
68 	, m_chunk_pos(0)
69 	, m_partial_chunk_header(0)
70 	, m_num_responses(0)
71 {
72 	INVARIANT_CHECK;
73 
74 	if (!m_settings.get_bool(settings_pack::report_web_seed_downloads))
75 		ignore_stats(true);
76 
77 	std::shared_ptr<torrent> tor = pack.tor.lock();
78 	TORRENT_ASSERT(tor);
79 
80 	// if the web server is known not to support keep-alive. request 4MiB
81 	// but we want to have at least piece size to prevent block based requests
82 	int const min_size = std::max((web.supports_keepalive ? 1 : 4) * 1024 * 1024,
83 		tor->torrent_file().piece_length());
84 
85 	// we prefer downloading large chunks from web seeds,
86 	// but still want to be able to split requests
87 	int const preferred_size = std::max(min_size, m_settings.get_int(settings_pack::urlseed_max_request_bytes));
88 
89 	prefer_contiguous_blocks(preferred_size / tor->block_size());
90 
91 	std::shared_ptr<torrent> t = associated_torrent().lock();
92 	bool const single_file_request = t->torrent_file().num_files() == 1;
93 
94 	if (!single_file_request)
95 	{
96 		// handle incorrect .torrent files which are multi-file
97 		// but have web seeds not ending with a slash
98 		ensure_trailing_slash(m_path);
99 		ensure_trailing_slash(m_url);
100 	}
101 	else
102 	{
103 		// handle .torrent files that don't include the filename in the url
104 		if (m_path.empty()) m_path += '/';
105 		if (m_path[m_path.size() - 1] == '/')
106 		{
107 			m_path += escape_string(t->torrent_file().name());
108 		}
109 
110 		if (!m_url.empty() && m_url[m_url.size() - 1] == '/')
111 		{
112 			m_url += escape_file_path(t->torrent_file().files(), file_index_t(0));
113 		}
114 	}
115 
116 	// we want large blocks as well, so
117 	// we can request more bytes at once
118 	// this setting will merge adjacent requests
119 	// into single larger ones
120 	request_large_blocks(true);
121 
122 #ifndef TORRENT_DISABLE_LOGGING
123 	peer_log(peer_log_alert::info, "URL", "web_peer_connection %s", m_url.c_str());
124 #endif
125 }
126 
escape_file_path(file_storage const & storage,file_index_t index)127 std::string escape_file_path(file_storage const& storage, file_index_t index)
128 {
129 	std::string new_path { storage.file_path(index) };
130 #ifdef TORRENT_WINDOWS
131 	convert_path_to_posix(new_path);
132 #endif
133 	return escape_path(new_path);
134 }
135 
on_connected()136 void web_peer_connection::on_connected()
137 {
138 	if (m_web->have_files.empty())
139 	{
140 		incoming_have_all();
141 	}
142 	else if (m_web->have_files.none_set())
143 	{
144 		incoming_have_none();
145 		m_web->interesting = false;
146 #ifndef TORRENT_DISABLE_LOGGING
147 		peer_log(peer_log_alert::info, "WEB-SEED", "have no files, not interesting. %s", m_url.c_str());
148 #endif
149 	}
150 	else
151 	{
152 		std::shared_ptr<torrent> t = associated_torrent().lock();
153 
154 		// only advertise pieces that are contained within the files we have as
155 		// indicated by m_web->have_files AND padfiles!
156 		// it's important to include pieces that may overlap many files, as long
157 		// as we have all those files, so instead of starting with a clear bitfield
158 		// and setting the pieces corresponding to files we have, we do it the
159 		// other way around. Start with assuming we have all files, and clear
160 		// pieces overlapping with files we *don't* have.
161 		typed_bitfield<piece_index_t> have;
162 		file_storage const& fs = t->torrent_file().files();
163 		have.resize(fs.num_pieces(), true);
164 		for (auto const i : fs.file_range())
165 		{
166 			// if we have the file, no need to do anything
167 			if (m_web->have_files.get_bit(i) || fs.pad_file_at(i)) continue;
168 
169 			auto const range = aux::file_piece_range_inclusive(fs, i);
170 			for (piece_index_t k = std::get<0>(range); k < std::get<1>(range); ++k)
171 				have.clear_bit(k);
172 		}
173 		t->set_seed(peer_info_struct(), false);
174 		if (have.none_set())
175 		{
176 			incoming_have_none();
177 			m_web->interesting = false;
178 #ifndef TORRENT_DISABLE_LOGGING
179 			peer_log(peer_log_alert::info, "WEB-SEED", "have no pieces, not interesting. %s", m_url.c_str());
180 #endif
181 		}
182 		else
183 		{
184 			incoming_bitfield(have);
185 		}
186 	}
187 
188 	// TODO: 3 this should be an optional<piece_index_t>, piece index -1 should
189 	// not be allowed
190 	if (m_web->restart_request.piece != piece_index_t(-1))
191 	{
192 		// increase the chances of requesting the block
193 		// we have partial data for already, to finish it
194 		incoming_suggest(m_web->restart_request.piece);
195 	}
196 	web_connection_base::on_connected();
197 }
198 
disconnect(error_code const & ec,operation_t op,disconnect_severity_t const error)199 void web_peer_connection::disconnect(error_code const& ec
200 	, operation_t op, disconnect_severity_t const error)
201 {
202 	if (is_disconnecting()) return;
203 
204 	if (op == operation_t::sock_write && ec == boost::system::errc::broken_pipe)
205 	{
206 #ifndef TORRENT_DISABLE_LOGGING
207 		// a write operation failed with broken-pipe. This typically happens
208 		// with HTTP 1.0 servers that close their incoming channel of the TCP
209 		// stream whenever they're done reading one full request. Instead of
210 		// us bailing out and failing the entire request just because our
211 		// write-end was closed, ignore it and keep reading until the read-end
212 		// also is closed.
213 		peer_log(peer_log_alert::info, "WRITE_DIRECTION", "CLOSED");
214 #endif
215 
216 		// prevent the peer from trying to send anything more
217 		m_send_buffer.clear();
218 
219 		// when the web server closed our write-end of the socket (i.e. its
220 		// read-end), if it's an HTTP 1.0 server. we will stop sending more
221 		// requests. We'll close the connection once we receive the last bytes,
222 		// and our read end is closed as well.
223 		incoming_choke();
224 		return;
225 	}
226 
227 	if (op == operation_t::connect && m_web && !m_web->endpoints.empty())
228 	{
229 		// we failed to connect to this IP. remove it so that the next attempt
230 		// uses the next IP in the list.
231 		m_web->endpoints.erase(m_web->endpoints.begin());
232 	}
233 
234 	if (ec == errors::uninteresting_upload_peer && m_web)
235 	{
236 		// if this is an "ephemeral" web seed, it means it was added by receiving
237 		// an HTTP redirect. If we disconnect because we're not interested in any
238 		// of its pieces, mark it as uninteresting, to avoid reconnecting to it
239 		// repeatedly
240 		if (m_web->ephemeral) m_web->interesting = false;
241 
242 		// if the web seed is not ephemeral, but we're still not interested. That
243 		// implies that all files either have failed with 404 or with a
244 		// redirection to a different web server.
245 		m_web->retry = std::max(m_web->retry, aux::time_now32()
246 			+ seconds32(m_settings.get_int(settings_pack::urlseed_wait_retry)));
247 		TORRENT_ASSERT(m_web->retry > aux::time_now32());
248 	}
249 
250 	std::shared_ptr<torrent> t = associated_torrent().lock();
251 
252 	if (!m_requests.empty() && !m_file_requests.empty()
253 		&& !m_piece.empty() && m_web)
254 	{
255 #ifndef TORRENT_DISABLE_LOGGING
256 		if (should_log(peer_log_alert::info))
257 		{
258 			peer_log(peer_log_alert::info, "SAVE_RESTART_DATA"
259 				, "data: %d req: %d off: %d"
260 				, int(m_piece.size()), int(m_requests.front().piece)
261 				, m_requests.front().start);
262 		}
263 #endif
264 		m_web->restart_request = m_requests.front();
265 		if (!m_web->restart_piece.empty())
266 		{
267 			// we're about to replace a different restart piece
268 			// buffer. So it was wasted download
269 			if (t) t->add_redundant_bytes(int(m_web->restart_piece.size())
270 				, waste_reason::piece_closing);
271 		}
272 		m_web->restart_piece.swap(m_piece);
273 
274 		// we have to do this to not count this data as redundant. The
275 		// upper layer will call downloading_piece_progress and assume
276 		// it's all wasted download. Since we're saving it here, it isn't.
277 		m_requests.clear();
278 	}
279 
280 	if (m_web && !m_web->supports_keepalive && error == peer_connection_interface::normal)
281 	{
282 		// if the web server doesn't support keepalive and we were
283 		// disconnected as a graceful EOF, reconnect right away
284 		if (t) get_io_service().post(
285 			std::bind(&torrent::maybe_connect_web_seeds, t));
286 	}
287 
288 	if (error >= failure)
289 	{
290 		m_web->retry = std::max(m_web->retry, aux::time_now32()
291 			+ seconds32(m_settings.get_int(settings_pack::urlseed_wait_retry)));
292 	}
293 
294 	peer_connection::disconnect(ec, op, error);
295 	if (t) t->disconnect_web_seed(this);
296 }
297 
downloading_piece_progress() const298 piece_block_progress web_peer_connection::downloading_piece_progress() const
299 {
300 	if (m_requests.empty()) return {};
301 
302 	std::shared_ptr<torrent> t = associated_torrent().lock();
303 	TORRENT_ASSERT(t);
304 
305 	piece_block_progress ret;
306 
307 	ret.piece_index = m_requests.front().piece;
308 	ret.bytes_downloaded = int(m_piece.size());
309 	// this is used to make sure that the block_index stays within
310 	// bounds. If the entire piece is downloaded, the block_index
311 	// would otherwise point to one past the end
312 	int correction = m_piece.empty() ? 0 : -1;
313 	ret.block_index = (m_requests.front().start + int(m_piece.size()) + correction) / t->block_size();
314 	TORRENT_ASSERT(ret.block_index < int(piece_block::invalid.block_index));
315 	TORRENT_ASSERT(ret.piece_index < piece_block::invalid.piece_index);
316 
317 	ret.full_block_bytes = t->block_size();
318 	piece_index_t const last_piece = t->torrent_file().last_piece();
319 	if (ret.piece_index == last_piece && ret.block_index
320 		== t->torrent_file().piece_size(last_piece) / t->block_size())
321 	{
322 		ret.full_block_bytes = t->torrent_file().piece_size(last_piece) % t->block_size();
323 	}
324 	return ret;
325 }
326 
write_request(peer_request const & r)327 void web_peer_connection::write_request(peer_request const& r)
328 {
329 	INVARIANT_CHECK;
330 
331 	std::shared_ptr<torrent> t = associated_torrent().lock();
332 	TORRENT_ASSERT(t);
333 
334 	TORRENT_ASSERT(t->valid_metadata());
335 
336 	torrent_info const& info = t->torrent_file();
337 	peer_request req = r;
338 
339 	std::string request;
340 	request.reserve(400);
341 
342 	int size = r.length;
343 	const int block_size = t->block_size();
344 	const int piece_size = t->torrent_file().piece_length();
345 	peer_request pr{};
346 
347 	while (size > 0)
348 	{
349 		int request_offset = r.start + r.length - size;
350 		pr.start = request_offset % piece_size;
351 		pr.length = std::min(block_size, size);
352 		pr.piece = piece_index_t(static_cast<int>(r.piece) + request_offset / piece_size);
353 		m_requests.push_back(pr);
354 
355 		if (m_web->restart_request == m_requests.front())
356 		{
357 			m_piece.swap(m_web->restart_piece);
358 			peer_request const& front = m_requests.front();
359 			TORRENT_ASSERT(front.length > int(m_piece.size()));
360 
361 #ifndef TORRENT_DISABLE_LOGGING
362 			peer_log(peer_log_alert::info, "RESTART_DATA",
363 				"data: %d req: (%d, %d) size: %d"
364 					, int(m_piece.size()), static_cast<int>(front.piece), front.start
365 					, front.start + front.length - 1);
366 #else
367 			TORRENT_UNUSED(front);
368 #endif
369 
370 			req.start += int(m_piece.size());
371 			req.length -= int(m_piece.size());
372 
373 			// just to keep the accounting straight for the upper layer.
374 			// it doesn't know we just re-wrote the request
375 			incoming_piece_fragment(int(m_piece.size()));
376 			m_web->restart_request.piece = piece_index_t(-1);
377 		}
378 
379 #if 0
380 			std::cerr << this << " REQ: p: " << pr.piece << " " << pr.start << std::endl;
381 #endif
382 		size -= pr.length;
383 	}
384 
385 #ifndef TORRENT_DISABLE_LOGGING
386 	peer_log(peer_log_alert::outgoing_message, "REQUESTING", "(piece: %d start: %d) - (piece: %d end: %d)"
387 		, static_cast<int>(r.piece), r.start
388 		, static_cast<int>(pr.piece), pr.start + pr.length);
389 #endif
390 
391 	bool const single_file_request = t->torrent_file().num_files() == 1;
392 	int const proxy_type = m_settings.get_int(settings_pack::proxy_type);
393 	bool const using_proxy = (proxy_type == settings_pack::http
394 		|| proxy_type == settings_pack::http_pw) && !m_ssl;
395 
396 	// the number of pad files that have been "requested". In case we _only_
397 	// request padfiles, we can't rely on handling them in the on_receive()
398 	// callback (because we won't receive anything), instead we have to post a
399 	// pretend read callback where we can deliver the zeroes for the partfile
400 	int num_pad_files = 0;
401 
402 	// TODO: 3 do we really need a special case here? wouldn't the multi-file
403 	// case handle single file torrents correctly too?
404 	if (single_file_request)
405 	{
406 		file_request_t file_req;
407 		file_req.file_index = file_index_t(0);
408 		file_req.start = std::int64_t(static_cast<int>(req.piece)) * info.piece_length()
409 			+ req.start;
410 		file_req.length = req.length;
411 
412 		request += "GET ";
413 		// do not encode single file paths, they are
414 		// assumed to be encoded in the torrent file
415 		request += using_proxy ? m_url : m_path;
416 		request += " HTTP/1.1\r\n";
417 		add_headers(request, m_settings, using_proxy);
418 		request += "\r\nRange: bytes=";
419 		request += to_string(file_req.start).data();
420 		request += "-";
421 		request += to_string(file_req.start + file_req.length - 1).data();
422 		request += "\r\n\r\n";
423 		m_first_request = false;
424 
425 		m_file_requests.push_back(file_req);
426 	}
427 	else
428 	{
429 		std::vector<file_slice> files = info.orig_files().map_block(req.piece, req.start
430 			, req.length);
431 
432 		for (auto const &f : files)
433 		{
434 			file_request_t file_req;
435 			file_req.file_index = f.file_index;
436 			file_req.start = f.offset;
437 			file_req.length = int(f.size);
438 
439 			if (info.orig_files().pad_file_at(f.file_index))
440 			{
441 				m_file_requests.push_back(file_req);
442 				++num_pad_files;
443 				continue;
444 			}
445 
446 			request += "GET ";
447 			if (using_proxy)
448 			{
449 				// m_url is already a properly escaped URL
450 				// with the correct slashes. Don't encode it again
451 				request += m_url;
452 			}
453 
454 			auto redirection = m_web->redirects.find(f.file_index);
455 			if (redirection != m_web->redirects.end())
456 			{
457 				auto const& redirect = redirection->second;
458 				// in case of http proxy "request" already contains m_url with trailing slash, so let's skip dup slash
459 				bool const trailing_slash = using_proxy && !redirect.empty() && redirect[0] == '/';
460 				request.append(redirect, trailing_slash, std::string::npos);
461 			}
462 			else
463 			{
464 				if (!using_proxy)
465 				{
466 					// m_path is already a properly escaped URL
467 					// with the correct slashes. Don't encode it again
468 					request += m_path;
469 				}
470 
471 				request += escape_file_path(info.orig_files(), f.file_index);
472 			}
473 			request += " HTTP/1.1\r\n";
474 			add_headers(request, m_settings, using_proxy);
475 			request += "\r\nRange: bytes=";
476 			request += to_string(f.offset).data();
477 			request += "-";
478 			request += to_string(f.offset + f.size - 1).data();
479 			request += "\r\n\r\n";
480 			m_first_request = false;
481 
482 #if 0
483 			std::cerr << this << " SEND-REQUEST: f: " << f.file_index
484 				<< " s: " << f.offset
485 				<< " e: " << (f.offset + f.size - 1) << std::endl;
486 #endif
487 			// TODO: 3 file_index_t should not allow negative values
488 			TORRENT_ASSERT(f.file_index >= file_index_t(0));
489 
490 			m_file_requests.push_back(file_req);
491 		}
492 	}
493 
494 	if (num_pad_files == int(m_file_requests.size()))
495 	{
496 		get_io_service().post(std::bind(
497 			&web_peer_connection::on_receive_padfile,
498 			std::static_pointer_cast<web_peer_connection>(self())));
499 		return;
500 	}
501 
502 #ifndef TORRENT_DISABLE_LOGGING
503 	peer_log(peer_log_alert::outgoing_message, "REQUEST", "%s", request.c_str());
504 #endif
505 
506 	send_buffer(request);
507 }
508 
509 namespace {
510 
get_peer_name(http_parser const & p,std::string const & host)511 	std::string get_peer_name(http_parser const& p, std::string const& host)
512 	{
513 		std::string ret = "URL seed @ ";
514 		ret += host;
515 
516 		std::string const& server_version = p.header("server");
517 		if (!server_version.empty())
518 		{
519 			ret += " (";
520 			ret += server_version;
521 			ret += ")";
522 		}
523 		return ret;
524 	}
525 
get_range(http_parser const & parser,error_code & ec)526 	std::tuple<std::int64_t, std::int64_t> get_range(
527 		http_parser const& parser, error_code& ec)
528 	{
529 		std::int64_t range_start;
530 		std::int64_t range_end;
531 		if (parser.status_code() == 206)
532 		{
533 			std::tie(range_start, range_end) = parser.content_range();
534 			if (range_start < 0 || range_end < range_start)
535 			{
536 				ec = errors::invalid_range;
537 				range_start = 0;
538 				range_end = 0;
539 			}
540 			else
541 			{
542 				// the http range is inclusive
543 				range_end++;
544 			}
545 		}
546 		else
547 		{
548 			range_start = 0;
549 			range_end = parser.content_length();
550 			if (range_end < 0)
551 			{
552 				range_end = 0;
553 				ec = errors::no_content_length;
554 			}
555 		}
556 		return std::make_tuple(range_start, range_end);
557 	}
558 }
559 
560 // --------------------------
561 // RECEIVE DATA
562 // --------------------------
563 
received_invalid_data(piece_index_t const index,bool single_peer)564 bool web_peer_connection::received_invalid_data(piece_index_t const index, bool single_peer)
565 {
566 	if (!single_peer) return peer_connection::received_invalid_data(index, single_peer);
567 
568 	// when a web seed fails a hash check, do the following:
569 	// 1. if the whole piece only overlaps a single file, mark that file as not
570 	//    have for this peer
571 	// 2. if the piece overlaps more than one file, mark the piece as not have
572 	//    for this peer
573 	// 3. if it's a single file torrent, just ban it right away
574 	// this handles the case where web seeds may have some files updated but not other
575 
576 	std::shared_ptr<torrent> t = associated_torrent().lock();
577 	file_storage const& fs = t->torrent_file().files();
578 
579 	// single file torrent
580 	if (fs.num_files() == 1) return peer_connection::received_invalid_data(index, single_peer);
581 
582 	std::vector<file_slice> files = fs.map_block(index, 0, fs.piece_size(index));
583 
584 	if (files.size() == 1)
585 	{
586 		// assume the web seed has a different copy of this specific file
587 		// than what we expect, and pretend not to have it.
588 		auto const range = file_piece_range_inclusive(fs, files[0].file_index);
589 		for (piece_index_t i = std::get<0>(range); i != std::get<1>(range); ++i)
590 			incoming_dont_have(i);
591 	}
592 	else
593 	{
594 		incoming_dont_have(index);
595 	}
596 
597 	peer_connection::received_invalid_data(index, single_peer);
598 
599 	// if we don't think we have any of the files, allow banning the web seed
600 	if (num_have_pieces() == 0) return true;
601 
602 	// don't disconnect, we won't request anything from this file again
603 	return false;
604 }
605 
on_receive_padfile()606 void web_peer_connection::on_receive_padfile()
607 {
608 	handle_padfile();
609 }
610 
handle_error(int const bytes_left)611 void web_peer_connection::handle_error(int const bytes_left)
612 {
613 	std::shared_ptr<torrent> t = associated_torrent().lock();
614 	TORRENT_ASSERT(t);
615 
616 	// TODO: 2 just make this peer not have the pieces
617 	// associated with the file we just requested. Only
618 	// when it doesn't have any of the file do the following
619 	// pad files will make it complicated
620 
621 	// temporarily unavailable, retry later
622 	t->retry_web_seed(this, m_parser.header_duration("retry-after"));
623 	if (t->alerts().should_post<url_seed_alert>())
624 	{
625 		std::string const error_msg = to_string(m_parser.status_code()).data()
626 			+ (" " + m_parser.message());
627 		t->alerts().emplace_alert<url_seed_alert>(t->get_handle(), m_url
628 			, error_msg);
629 	}
630 	received_bytes(0, bytes_left);
631 	disconnect(error_code(m_parser.status_code(), http_category()), operation_t::bittorrent, failure);
632 }
633 
handle_redirect(int const bytes_left)634 void web_peer_connection::handle_redirect(int const bytes_left)
635 {
636 	// this means we got a redirection request
637 	// look for the location header
638 	std::string location = m_parser.header("location");
639 	received_bytes(0, bytes_left);
640 
641 	std::shared_ptr<torrent> t = associated_torrent().lock();
642 	TORRENT_ASSERT(t);
643 
644 	if (location.empty())
645 	{
646 		// we should not try this server again.
647 		t->remove_web_seed_conn(this, errors::missing_location, operation_t::bittorrent, peer_error);
648 		m_web = nullptr;
649 		TORRENT_ASSERT(is_disconnecting());
650 		return;
651 	}
652 
653 	bool const single_file_request = !m_path.empty()
654 		&& m_path[m_path.size() - 1] != '/';
655 
656 	// add the redirected url and remove the current one
657 	if (!single_file_request)
658 	{
659 		TORRENT_ASSERT(!m_file_requests.empty());
660 		file_index_t const file_index = m_file_requests.front().file_index;
661 
662 		location = resolve_redirect_location(m_url, location);
663 #ifndef TORRENT_DISABLE_LOGGING
664 		peer_log(peer_log_alert::info, "LOCATION", "%s", location.c_str());
665 #endif
666 		// TODO: 3 this could be made more efficient for the case when we use an
667 		// HTTP proxy. Then we wouldn't need to add new web seeds to the torrent,
668 		// we could just make the redirect table contain full URLs.
669 		std::string redirect_base;
670 		std::string redirect_path;
671 		error_code ec;
672 		std::tie(redirect_base, redirect_path) = split_url(location, ec);
673 
674 		if (ec)
675 		{
676 			// we should not try this server again.
677 			disconnect(errors::missing_location, operation_t::bittorrent, failure);
678 			return;
679 		}
680 
681 		// add_web_seed won't add duplicates. If we have already added an entry
682 		// with this URL, we'll get back the existing entry
683 
684 		// "ephemeral" flag should be set to avoid "web_seed_t" saving in resume data.
685 		// E.g. original "web_seed_t" request url points to "http://example1.com/file1" and
686 		// web server responses with redirect location "http://example2.com/subpath/file2".
687 		// "handle_redirect" process this location to create new "web_seed_t"
688 		// with base url=="http://example2.com/" and redirects[0]=="/subpath/file2").
689 		// If we try to load resume with such "web_seed_t" then "web_peer_connection" will send
690 		// request with wrong path "http://example2.com/file1" (cause "redirects" map is not serialized in resume)
691 		web_seed_t* web = t->add_web_seed(redirect_base, web_seed_entry::url_seed
692 			, m_external_auth, m_extra_headers, torrent::ephemeral);
693 		web->have_files.resize(t->torrent_file().num_files(), false);
694 
695 		// the new web seed we're adding only has this file for now
696 		// we may add more files later
697 		web->redirects[file_index] = redirect_path;
698 		if (web->have_files.get_bit(file_index) == false)
699 		{
700 			web->have_files.set_bit(file_index);
701 
702 			if (web->peer_info.connection != nullptr)
703 			{
704 				auto* pc = static_cast<peer_connection*>(web->peer_info.connection);
705 
706 				// we just learned that this host has this file, and we're currently
707 				// connected to it. Make it advertise that it has this file to the
708 				// bittorrent engine
709 				file_storage const& fs = t->torrent_file().files();
710 				auto const range = aux::file_piece_range_inclusive(fs, file_index);
711 				for (piece_index_t i = std::get<0>(range); i < std::get<1>(range); ++i)
712 					pc->incoming_have(i);
713 			}
714 			// we just learned about another file this web server has, make sure
715 			// it's marked interesting to enable connecting to it
716 			web->interesting = true;
717 		}
718 
719 		// we don't have this file on this server. Don't ask for it again
720 		m_web->have_files.resize(t->torrent_file().num_files(), true);
721 		if (m_web->have_files[file_index])
722 		{
723 			m_web->have_files.clear_bit(file_index);
724 #ifndef TORRENT_DISABLE_LOGGING
725 			peer_log(peer_log_alert::info, "MISSING_FILE", "redirection | file: %d"
726 				, static_cast<int>(file_index));
727 #endif
728 		}
729 		disconnect(errors::redirecting, operation_t::bittorrent, normal);
730 	}
731 	else
732 	{
733 		location = resolve_redirect_location(m_url, location);
734 #ifndef TORRENT_DISABLE_LOGGING
735 		peer_log(peer_log_alert::info, "LOCATION", "%s", location.c_str());
736 #endif
737 		t->add_web_seed(location, web_seed_entry::url_seed, m_external_auth
738 			, m_extra_headers, torrent::ephemeral);
739 
740 		// this web seed doesn't have any files. Don't try to request from it
741 		// again this session
742 		m_web->have_files.resize(t->torrent_file().num_files(), false);
743 		disconnect(errors::redirecting, operation_t::bittorrent, normal);
744 		m_web = nullptr;
745 		TORRENT_ASSERT(is_disconnecting());
746 	}
747 }
748 
on_receive(error_code const & error,std::size_t bytes_transferred)749 void web_peer_connection::on_receive(error_code const& error
750 	, std::size_t bytes_transferred)
751 {
752 	INVARIANT_CHECK;
753 
754 	if (error)
755 	{
756 		received_bytes(0, int(bytes_transferred));
757 #ifndef TORRENT_DISABLE_LOGGING
758 		if (should_log(peer_log_alert::info))
759 		{
760 			peer_log(peer_log_alert::info, "ERROR"
761 				, "web_peer_connection error: %s", error.message().c_str());
762 		}
763 #endif
764 		return;
765 	}
766 
767 	std::shared_ptr<torrent> t = associated_torrent().lock();
768 	TORRENT_ASSERT(t);
769 
770 	// in case the first file on this series of requests is a padfile
771 	// we need to handle it right now
772 	span<char const> recv_buffer = m_recv_buffer.get();
773 	handle_padfile();
774 	if (associated_torrent().expired()) return;
775 
776 	for (;;)
777 	{
778 		int payload;
779 		int protocol;
780 		bool header_finished = m_parser.header_finished();
781 		if (!header_finished)
782 		{
783 			bool failed = false;
784 			std::tie(payload, protocol) = m_parser.incoming(recv_buffer, failed);
785 			received_bytes(0, protocol);
786 			TORRENT_ASSERT(int(recv_buffer.size()) >= protocol);
787 
788 			if (failed)
789 			{
790 				received_bytes(0, int(recv_buffer.size()));
791 #ifndef TORRENT_DISABLE_LOGGING
792 				if (should_log(peer_log_alert::info))
793 				{
794 					peer_log(peer_log_alert::info, "RECEIVE_BYTES"
795 						, "%*s", int(recv_buffer.size()), recv_buffer.data());
796 				}
797 #endif
798 				disconnect(errors::http_parse_error, operation_t::bittorrent, peer_error);
799 				return;
800 			}
801 
802 			TORRENT_ASSERT(recv_buffer.empty() || recv_buffer[0] == 'H');
803 			TORRENT_ASSERT(int(recv_buffer.size()) <= m_recv_buffer.packet_size());
804 
805 			// this means the entire status line hasn't been received yet
806 			if (m_parser.status_code() == -1)
807 			{
808 				TORRENT_ASSERT(payload == 0);
809 				break;
810 			}
811 
812 			if (!m_parser.header_finished())
813 			{
814 				TORRENT_ASSERT(payload == 0);
815 				break;
816 			}
817 
818 			m_body_start = m_parser.body_start();
819 			m_received_body = 0;
820 		}
821 
822 		// we just completed reading the header
823 		if (!header_finished)
824 		{
825 			++m_num_responses;
826 
827 			if (m_parser.connection_close())
828 			{
829 				incoming_choke();
830 				if (m_num_responses == 1)
831 					m_web->supports_keepalive = false;
832 			}
833 
834 #ifndef TORRENT_DISABLE_LOGGING
835 			if (should_log(peer_log_alert::info))
836 			{
837 				peer_log(peer_log_alert::info, "STATUS"
838 					, "%d %s", m_parser.status_code(), m_parser.message().c_str());
839 				std::multimap<std::string, std::string> const& headers = m_parser.headers();
840 				for (auto const &i : headers)
841 					peer_log(peer_log_alert::info, "STATUS", "   %s: %s", i.first.c_str(), i.second.c_str());
842 			}
843 #endif
844 
845 			// if the status code is not one of the accepted ones, abort
846 			if (!is_ok_status(m_parser.status_code()))
847 			{
848 				if (!m_file_requests.empty())
849 				{
850 					file_request_t const& file_req = m_file_requests.front();
851 					m_web->have_files.resize(t->torrent_file().num_files(), true);
852 					m_web->have_files.clear_bit(file_req.file_index);
853 
854 #ifndef TORRENT_DISABLE_LOGGING
855 					peer_log(peer_log_alert::info, "MISSING_FILE", "http-code: %d | file: %d"
856 						, m_parser.status_code(), static_cast<int>(file_req.file_index));
857 #endif
858 				}
859 				handle_error(int(recv_buffer.size()));
860 				return;
861 			}
862 
863 			if (is_redirect(m_parser.status_code()))
864 			{
865 				handle_redirect(int(recv_buffer.size()));
866 				return;
867 			}
868 
869 			m_server_string = get_peer_name(m_parser, m_host);
870 
871 			recv_buffer = recv_buffer.subspan(m_body_start);
872 
873 			m_body_start = m_parser.body_start();
874 			m_received_body = 0;
875 		}
876 
877 		// we only received the header, no data
878 		if (recv_buffer.empty()) break;
879 
880 		// ===================================
881 		// ======= RESPONSE BYTE RANGE =======
882 		// ===================================
883 
884 		// despite the HTTP range being inclusive, range_start and range_end are
885 		// exclusive to fit better into C++. i.e. range_end points one byte past
886 		// the end of the payload
887 		std::int64_t range_start;
888 		std::int64_t range_end;
889 		error_code ec;
890 		std::tie(range_start, range_end) = get_range(m_parser, ec);
891 		if (ec)
892 		{
893 			received_bytes(0, int(recv_buffer.size()));
894 			// we should not try this server again.
895 			t->remove_web_seed_conn(this, ec, operation_t::bittorrent, peer_error);
896 			m_web = nullptr;
897 			TORRENT_ASSERT(is_disconnecting());
898 			return;
899 		}
900 
901 		TORRENT_ASSERT(!m_file_requests.empty());
902 		file_request_t const& file_req = m_file_requests.front();
903 		if (range_start != file_req.start
904 			|| range_end != file_req.start + file_req.length)
905 		{
906 			// the byte range in the http response is different what we expected
907 			received_bytes(0, int(recv_buffer.size()));
908 
909 #ifndef TORRENT_DISABLE_LOGGING
910 			if (should_log(peer_log_alert::incoming))
911 			{
912 				peer_log(peer_log_alert::incoming, "INVALID HTTP RESPONSE"
913 					, "in=(%d, %" PRId64 "-%" PRId64 ") expected=(%d, %" PRId64 "-%" PRId64 ") ]"
914 					, static_cast<int>(file_req.file_index), range_start, range_end
915 					, static_cast<int>(file_req.file_index), file_req.start, file_req.start + file_req.length - 1);
916 			}
917 #endif
918 			disconnect(errors::invalid_range, operation_t::bittorrent, peer_error);
919 			return;
920 		}
921 
922 		if (m_parser.chunked_encoding())
923 		{
924 
925 			// =========================
926 			// === CHUNKED ENCODING  ===
927 			// =========================
928 
929 			while (m_chunk_pos >= 0 && !recv_buffer.empty())
930 			{
931 				// first deliver any payload we have in the buffer so far, ahead of
932 				// the next chunk header.
933 				if (m_chunk_pos > 0)
934 				{
935 					int const copy_size = std::min(m_chunk_pos, int(recv_buffer.size()));
936 					TORRENT_ASSERT(copy_size > 0);
937 
938 					if (m_received_body + copy_size > file_req.length)
939 					{
940 						// the byte range in the http response is different what we expected
941 						received_bytes(0, int(recv_buffer.size()));
942 
943 #ifndef TORRENT_DISABLE_LOGGING
944 						peer_log(peer_log_alert::incoming, "INVALID HTTP RESPONSE"
945 							, "received body: %d request size: %d"
946 							, m_received_body, file_req.length);
947 #endif
948 						disconnect(errors::invalid_range, operation_t::bittorrent, peer_error);
949 						return;
950 					}
951 					incoming_payload(recv_buffer.data(), copy_size);
952 
953 					recv_buffer = recv_buffer.subspan(copy_size);
954 					m_chunk_pos -= copy_size;
955 
956 					if (recv_buffer.empty()) goto done;
957 				}
958 
959 				TORRENT_ASSERT(m_chunk_pos == 0);
960 
961 				int header_size = 0;
962 				std::int64_t chunk_size = 0;
963 				span<char const> chunk_start = recv_buffer.subspan(m_chunk_pos);
964 				TORRENT_ASSERT(chunk_start[0] == '\r'
965 					|| aux::is_hex({chunk_start.data(), 1}));
966 				bool const ret = m_parser.parse_chunk_header(chunk_start, &chunk_size, &header_size);
967 				if (!ret)
968 				{
969 					received_bytes(0, int(chunk_start.size()) - m_partial_chunk_header);
970 					m_partial_chunk_header = int(chunk_start.size());
971 					goto done;
972 				}
973 #ifndef TORRENT_DISABLE_LOGGING
974 				peer_log(peer_log_alert::info, "CHUNKED_ENCODING"
975 					, "parsed chunk: %" PRId64 " header_size: %d"
976 					, chunk_size, header_size);
977 #endif
978 				received_bytes(0, header_size - m_partial_chunk_header);
979 				m_partial_chunk_header = 0;
980 				TORRENT_ASSERT(chunk_size != 0
981 					|| int(chunk_start.size()) <= header_size || chunk_start[header_size] == 'H');
982 				TORRENT_ASSERT(m_body_start + m_chunk_pos < INT_MAX);
983 				m_chunk_pos += int(chunk_size);
984 				recv_buffer = recv_buffer.subspan(header_size);
985 
986 				// a chunk size of zero means the request is complete. Make sure the
987 				// number of payload bytes we've received matches the number we
988 				// requested. If that's not the case, we got an invalid response.
989 				if (chunk_size == 0)
990 				{
991 					TORRENT_ASSERT_VAL(m_chunk_pos == 0, m_chunk_pos);
992 
993 #if TORRENT_USE_ASSERTS
994 					span<char const> chunk = recv_buffer.subspan(m_chunk_pos);
995 					TORRENT_ASSERT(chunk.size() == 0 || chunk[0] == 'H');
996 #endif
997 					m_chunk_pos = -1;
998 
999 					TORRENT_ASSERT(m_received_body <= file_req.length);
1000 					if (m_received_body != file_req.length)
1001 					{
1002 						// the byte range in the http response is different what we expected
1003 						received_bytes(0, int(recv_buffer.size()));
1004 
1005 #ifndef TORRENT_DISABLE_LOGGING
1006 						peer_log(peer_log_alert::incoming, "INVALID HTTP RESPONSE"
1007 							, "received body: %d request size: %d"
1008 							, m_received_body, file_req.length);
1009 #endif
1010 						disconnect(errors::invalid_range, operation_t::bittorrent, peer_error);
1011 						return;
1012 					}
1013 					// we just completed an HTTP file request. pop it from m_file_requests
1014 					m_file_requests.pop_front();
1015 					m_parser.reset();
1016 					m_body_start = 0;
1017 					m_received_body = 0;
1018 					m_chunk_pos = 0;
1019 					m_partial_chunk_header = 0;
1020 
1021 					// in between each file request, there may be an implicit
1022 					// pad-file request
1023 					handle_padfile();
1024 					break;
1025 				}
1026 
1027 				// if all of the receive buffer was just consumed as chunk
1028 				// header, we're done
1029 				if (recv_buffer.empty()) goto done;
1030 			}
1031 		}
1032 		else
1033 		{
1034 			// this is the simple case, where we don't have chunked encoding
1035 			TORRENT_ASSERT(m_received_body <= file_req.length);
1036 			int const copy_size = std::min(file_req.length - m_received_body
1037 				, int(recv_buffer.size()));
1038 			incoming_payload(recv_buffer.data(), copy_size);
1039 			recv_buffer = recv_buffer.subspan(copy_size);
1040 
1041 			TORRENT_ASSERT(m_received_body <= file_req.length);
1042 			if (m_received_body == file_req.length)
1043 			{
1044 				// we just completed an HTTP file request. pop it from m_file_requests
1045 				m_file_requests.pop_front();
1046 				m_parser.reset();
1047 				m_body_start = 0;
1048 				m_received_body = 0;
1049 				m_chunk_pos = 0;
1050 				m_partial_chunk_header = 0;
1051 
1052 				// in between each file request, there may be an implicit
1053 				// pad-file request
1054 				handle_padfile();
1055 			}
1056 		}
1057 
1058 		if (recv_buffer.empty()) break;
1059 	}
1060 done:
1061 
1062 	// now, remove all the bytes we've processed from the receive buffer
1063 	m_recv_buffer.cut(int(recv_buffer.data() - m_recv_buffer.get().begin())
1064 		, t->block_size() + request_size_overhead);
1065 }
1066 
incoming_payload(char const * buf,int len)1067 void web_peer_connection::incoming_payload(char const* buf, int len)
1068 {
1069 	received_bytes(len, 0);
1070 	m_received_body += len;
1071 
1072 	if (is_disconnecting()) return;
1073 
1074 #ifndef TORRENT_DISABLE_LOGGING
1075 	peer_log(peer_log_alert::incoming_message, "INCOMING_PAYLOAD", "%d bytes", len);
1076 #endif
1077 
1078 	// deliver all complete bittorrent requests to the bittorrent engine
1079 	while (len > 0)
1080 	{
1081 		if (m_requests.empty()) return;
1082 
1083 		TORRENT_ASSERT(!m_requests.empty());
1084 		peer_request const& front_request = m_requests.front();
1085 		int const piece_size = int(m_piece.size());
1086 		int const copy_size = std::min(front_request.length - piece_size, len);
1087 
1088 		// m_piece may not hold more than the response to the next BT request
1089 		TORRENT_ASSERT(front_request.length > piece_size);
1090 
1091 		// copy_size is the number of bytes we need to add to the end of m_piece
1092 		// to not exceed the size of the next bittorrent request to be delivered.
1093 		// m_piece can only hold the response for a single BT request at a time
1094 		m_piece.resize(piece_size + copy_size);
1095 		std::memcpy(m_piece.data() + piece_size, buf, aux::numeric_cast<std::size_t>(copy_size));
1096 		len -= copy_size;
1097 		buf += copy_size;
1098 
1099 		// keep peer stats up-to-date
1100 		incoming_piece_fragment(copy_size);
1101 
1102 		TORRENT_ASSERT(front_request.length >= piece_size);
1103 		if (int(m_piece.size()) == front_request.length)
1104 		{
1105 			std::shared_ptr<torrent> t = associated_torrent().lock();
1106 			TORRENT_ASSERT(t);
1107 
1108 #ifndef TORRENT_DISABLE_LOGGING
1109 			peer_log(peer_log_alert::incoming_message, "POP_REQUEST"
1110 				, "piece: %d start: %d len: %d"
1111 				, static_cast<int>(front_request.piece), front_request.start, front_request.length);
1112 #endif
1113 
1114 			// Make a copy of the request and pop it off the queue before calling
1115 			// incoming_piece because that may lead to a call to disconnect()
1116 			// which will clear the request queue and invalidate any references
1117 			// to the request
1118 			peer_request const front_request_copy = front_request;
1119 			m_requests.pop_front();
1120 
1121 			incoming_piece(front_request_copy, m_piece.data());
1122 
1123 			m_piece.clear();
1124 		}
1125 	}
1126 }
1127 
incoming_zeroes(int len)1128 void web_peer_connection::incoming_zeroes(int len)
1129 {
1130 #ifndef TORRENT_DISABLE_LOGGING
1131 	peer_log(peer_log_alert::incoming_message, "INCOMING_ZEROES", "%d bytes", len);
1132 #endif
1133 
1134 	// deliver all complete bittorrent requests to the bittorrent engine
1135 	while (len > 0)
1136 	{
1137 		TORRENT_ASSERT(!m_requests.empty());
1138 		peer_request const& front_request = m_requests.front();
1139 		int const piece_size = int(m_piece.size());
1140 		int const copy_size = std::min(front_request.length - piece_size, len);
1141 
1142 		// m_piece may not hold more than the response to the next BT request
1143 		TORRENT_ASSERT(front_request.length > piece_size);
1144 
1145 		// copy_size is the number of bytes we need to add to the end of m_piece
1146 		// to not exceed the size of the next bittorrent request to be delivered.
1147 		// m_piece can only hold the response for a single BT request at a time
1148 		m_piece.resize(piece_size + copy_size, 0);
1149 		len -= copy_size;
1150 
1151 		// keep peer stats up-to-date
1152 		incoming_piece_fragment(copy_size);
1153 
1154 		maybe_harvest_piece();
1155 	}
1156 }
1157 
maybe_harvest_piece()1158 void web_peer_connection::maybe_harvest_piece()
1159 {
1160 	peer_request const& front_request = m_requests.front();
1161 	TORRENT_ASSERT(front_request.length >= int(m_piece.size()));
1162 	if (int(m_piece.size()) != front_request.length) return;
1163 
1164 	std::shared_ptr<torrent> t = associated_torrent().lock();
1165 	TORRENT_ASSERT(t);
1166 
1167 #ifndef TORRENT_DISABLE_LOGGING
1168 	peer_log(peer_log_alert::incoming_message, "POP_REQUEST"
1169 		, "piece: %d start: %d len: %d"
1170 		, static_cast<int>(front_request.piece)
1171 		, front_request.start, front_request.length);
1172 #endif
1173 	m_requests.pop_front();
1174 
1175 	incoming_piece(front_request, m_piece.data());
1176 	m_piece.clear();
1177 }
1178 
get_specific_peer_info(peer_info & p) const1179 void web_peer_connection::get_specific_peer_info(peer_info& p) const
1180 {
1181 	web_connection_base::get_specific_peer_info(p);
1182 	p.flags |= peer_info::local_connection;
1183 	p.connection_type = peer_info::web_seed;
1184 }
1185 
handle_padfile()1186 void web_peer_connection::handle_padfile()
1187 {
1188 	if (m_file_requests.empty()) return;
1189 	if (m_requests.empty()) return;
1190 
1191 	std::shared_ptr<torrent> t = associated_torrent().lock();
1192 	TORRENT_ASSERT(t);
1193 	torrent_info const& info = t->torrent_file();
1194 
1195 	while (!m_file_requests.empty()
1196 		&& info.orig_files().pad_file_at(m_file_requests.front().file_index))
1197 	{
1198 		// the next file is a pad file. We didn't actually send
1199 		// a request for this since it most likely doesn't exist on
1200 		// the web server anyway. Just pretend that we received a
1201 		// bunch of zeroes here and pop it again
1202 		std::int64_t file_size = m_file_requests.front().length;
1203 
1204 		// in theory the pad file can span multiple bocks, hence the loop
1205 		while (file_size > 0)
1206 		{
1207 			peer_request const front_request = m_requests.front();
1208 			TORRENT_ASSERT(int(m_piece.size()) < front_request.length);
1209 
1210 			int pad_size = int(std::min(file_size
1211 					, front_request.length - std::int64_t(m_piece.size())));
1212 			TORRENT_ASSERT(pad_size > 0);
1213 			file_size -= pad_size;
1214 
1215 			incoming_zeroes(pad_size);
1216 
1217 #ifndef TORRENT_DISABLE_LOGGING
1218 			if (should_log(peer_log_alert::info))
1219 			{
1220 				peer_log(peer_log_alert::info, "HANDLE_PADFILE"
1221 					, "file: %d start: %" PRId64 " len: %d"
1222 					, static_cast<int>(m_file_requests.front().file_index)
1223 					, m_file_requests.front().start
1224 					, m_file_requests.front().length);
1225 			}
1226 #endif
1227 		}
1228 
1229 		m_file_requests.pop_front();
1230 	}
1231 }
1232 
1233 } // libtorrent namespace
1234