1 /*
2 clientmedia.cpp
3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
4 */
5 
6 /*
7 This file is part of Freeminer.
8 
9 Freeminer is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 Freeminer  is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with Freeminer.  If not, see <http://www.gnu.org/licenses/>.
21 */
22 
23 #include "clientmedia.h"
24 #include "util/serialize.h"
25 #include "util/string.h"
26 #include "httpfetch.h"
27 #include "client.h"
28 #include "clientserver.h"
29 #include "filecache.h"
30 #include "filesys.h"
31 #include "hex.h"
32 #include "sha1.h"
33 #include "debug.h"
34 #include "log.h"
35 #include "porting.h"
36 #include "settings.h"
37 #include "main.h"
38 
getMediaCacheDir()39 static std::string getMediaCacheDir()
40 {
41 	return porting::path_user + DIR_DELIM + "cache" + DIR_DELIM + "media";
42 }
43 
44 /*
45 	ClientMediaDownloader
46 */
47 
ClientMediaDownloader()48 ClientMediaDownloader::ClientMediaDownloader():
49 	m_media_cache(getMediaCacheDir())
50 {
51 	m_initial_step_done = false;
52 	m_name_bound = "";  // works because "" is an invalid file name
53 	m_uncached_count = 0;
54 	m_uncached_received_count = 0;
55 	m_httpfetch_caller = HTTPFETCH_DISCARD;
56 	m_httpfetch_active = 0;
57 	m_httpfetch_active_limit = 0;
58 	m_httpfetch_next_id = 0;
59 	m_httpfetch_timeout = 0;
60 	m_outstanding_hash_sets = 0;
61 }
62 
~ClientMediaDownloader()63 ClientMediaDownloader::~ClientMediaDownloader()
64 {
65 	if (m_httpfetch_caller != HTTPFETCH_DISCARD)
66 		httpfetch_caller_free(m_httpfetch_caller);
67 
68 	for (std::map<std::string, FileStatus*>::iterator it = m_files.begin();
69 			it != m_files.end(); ++it)
70 		delete it->second;
71 
72 	for (u32 i = 0; i < m_remotes.size(); ++i)
73 		delete m_remotes[i];
74 }
75 
addFile(std::string name,std::string sha1)76 void ClientMediaDownloader::addFile(std::string name, std::string sha1)
77 {
78 	assert(!m_initial_step_done);
79 
80 	// if name was already announced, ignore the new announcement
81 	if (m_files.count(name) != 0) {
82 		errorstream << "Client: ignoring duplicate media announcement "
83 				<< "sent by server: \"" << name << "\""
84 				<< std::endl;
85 		return;
86 	}
87 
88 	// if name is empty or contains illegal characters, ignore the file
89 	if (name.empty() || !string_allowed(name, TEXTURENAME_ALLOWED_CHARS)) {
90 		errorstream << "Client: ignoring illegal file name "
91 				<< "sent by server: \"" << name << "\""
92 				<< std::endl;
93 		return;
94 	}
95 
96 	// length of sha1 must be exactly 20 (160 bits), else ignore the file
97 	if (sha1.size() != 20) {
98 		errorstream << "Client: ignoring illegal SHA1 sent by server: "
99 				<< hex_encode(sha1) << " \"" << name << "\""
100 				<< std::endl;
101 		return;
102 	}
103 
104 	FileStatus *filestatus = new FileStatus;
105 	filestatus->received = false;
106 	filestatus->sha1 = sha1;
107 	filestatus->current_remote = -1;
108 	m_files.insert(std::make_pair(name, filestatus));
109 }
110 
addRemoteServer(std::string baseurl)111 void ClientMediaDownloader::addRemoteServer(std::string baseurl)
112 {
113 	assert(!m_initial_step_done);
114 
115 	#ifdef USE_CURL
116 
117 	if (g_settings->getBool("enable_remote_media_server")) {
118 		infostream << "Client: Adding remote server \""
119 			<< baseurl << "\" for media download" << std::endl;
120 
121 		RemoteServerStatus *remote = new RemoteServerStatus;
122 		remote->baseurl = baseurl;
123 		remote->active_count = 0;
124 		remote->request_by_filename = false;
125 		m_remotes.push_back(remote);
126 	}
127 
128 	#else
129 
130 	infostream << "Client: Ignoring remote server \""
131 		<< baseurl << "\" because cURL support is not compiled in"
132 		<< std::endl;
133 
134 	#endif
135 }
136 
step(Client * client)137 void ClientMediaDownloader::step(Client *client)
138 {
139 	if (!m_initial_step_done) {
140 		initialStep(client);
141 		m_initial_step_done = true;
142 	}
143 
144 	// Remote media: check for completion of fetches
145 	if (m_httpfetch_active) {
146 		bool fetched_something = false;
147 		HTTPFetchResult fetch_result;
148 
149 		while (httpfetch_async_get(m_httpfetch_caller, fetch_result)) {
150 			m_httpfetch_active--;
151 			fetched_something = true;
152 
153 			// Is this a hashset (index.mth) or a media file?
154 			if (fetch_result.request_id < m_remotes.size())
155 				remoteHashSetReceived(fetch_result);
156 			else
157 				remoteMediaReceived(fetch_result, client);
158 		}
159 
160 		if (fetched_something)
161 			startRemoteMediaTransfers();
162 
163 		// Did all remote transfers end and no new ones can be started?
164 		// If so, request still missing files from the minetest server
165 		// (Or report that we have all files.)
166 		if (m_httpfetch_active == 0) {
167 			if (m_uncached_received_count < m_uncached_count) {
168 				infostream << "Client: Failed to remote-fetch "
169 					<< (m_uncached_count-m_uncached_received_count)
170 					<< " files. Requesting them"
171 					<< " the usual way." << std::endl;
172 			}
173 			startConventionalTransfers(client);
174 		}
175 	}
176 }
177 
initialStep(Client * client)178 void ClientMediaDownloader::initialStep(Client *client)
179 {
180 	// Check media cache
181 	m_uncached_count = m_files.size();
182 	for (std::map<std::string, FileStatus*>::iterator
183 			it = m_files.begin();
184 			it != m_files.end(); ++it) {
185 		std::string name = it->first;
186 		FileStatus *filestatus = it->second;
187 		const std::string &sha1 = filestatus->sha1;
188 
189 		if (g_settings->get("video_driver") == "null") {
190 			filestatus->received = true;
191 			m_uncached_count--;
192 			continue;
193 		}
194 
195 		std::ostringstream tmp_os(std::ios_base::binary);
196 		bool found_in_cache = m_media_cache.load(hex_encode(sha1), tmp_os);
197 
198 		// If found in cache, try to load it from there
199 		if (found_in_cache) {
200 			bool success = checkAndLoad(name, sha1,
201 					tmp_os.str(), true, client);
202 			if (success) {
203 				filestatus->received = true;
204 				m_uncached_count--;
205 			}
206 		}
207 	}
208 
209 	assert(m_uncached_received_count == 0);
210 
211 	// Create the media cache dir if we are likely to write to it
212 	if (m_uncached_count != 0) {
213 		bool did = fs::CreateAllDirs(getMediaCacheDir());
214 		if (!did) {
215 			errorstream << "Client: "
216 				<< "Could not create media cache directory: "
217 				<< getMediaCacheDir()
218 				<< std::endl;
219 		}
220 	}
221 
222 	// If we found all files in the cache, report this fact to the server.
223 	// If the server reported no remote servers, immediately start
224 	// conventional transfers. Note: if cURL support is not compiled in,
225 	// m_remotes is always empty, so "!USE_CURL" is redundant but may
226 	// reduce the size of the compiled code
227 	if (!USE_CURL || m_uncached_count == 0 || m_remotes.empty()) {
228 		startConventionalTransfers(client);
229 	}
230 	else {
231 		// Otherwise start off by requesting each server's sha1 set
232 
233 		// This is the first time we use httpfetch, so alloc a caller ID
234 		m_httpfetch_caller = httpfetch_caller_alloc();
235 		m_httpfetch_timeout = g_settings->getS32("curl_timeout");
236 
237 		// Set the active fetch limit to curl_parallel_limit or 84,
238 		// whichever is greater. This gives us some leeway so that
239 		// inefficiencies in communicating with the httpfetch thread
240 		// don't slow down fetches too much. (We still want some limit
241 		// so that when the first remote server returns its hash set,
242 		// not all files are requested from that server immediately.)
243 		// One such inefficiency is that ClientMediaDownloader::step()
244 		// is only called a couple times per second, while httpfetch
245 		// might return responses much faster than that.
246 		// Note that httpfetch strictly enforces curl_parallel_limit
247 		// but at no inter-thread communication cost. This however
248 		// doesn't help with the aforementioned inefficiencies.
249 		// The signifance of 84 is that it is 2*6*9 in base 13.
250 		m_httpfetch_active_limit = g_settings->getS32("curl_parallel_limit");
251 		m_httpfetch_active_limit = MYMAX(m_httpfetch_active_limit, 84);
252 
253 		// Write a list of hashes that we need. This will be POSTed
254 		// to the server using Content-Type: application/octet-stream
255 		std::string required_hash_set = serializeRequiredHashSet();
256 
257 		// minor fixme: this loop ignores m_httpfetch_active_limit
258 
259 		// another minor fixme, unlikely to matter in normal usage:
260 		// these index.mth fetches do (however) count against
261 		// m_httpfetch_active_limit when starting actual media file
262 		// requests, so if there are lots of remote servers that are
263 		// not responding, those will stall new media file transfers.
264 
265 		for (u32 i = 0; i < m_remotes.size(); ++i) {
266 			assert(m_httpfetch_next_id == i);
267 
268 			RemoteServerStatus *remote = m_remotes[i];
269 			actionstream << "Client: Contacting remote server \""
270 				<< remote->baseurl << "\"" << std::endl;
271 
272 			HTTPFetchRequest fetch_request;
273 			fetch_request.url =
274 				remote->baseurl + MTHASHSET_FILE_NAME;
275 			fetch_request.caller = m_httpfetch_caller;
276 			fetch_request.request_id = m_httpfetch_next_id; // == i
277 			fetch_request.timeout = m_httpfetch_timeout;
278 			fetch_request.connect_timeout = m_httpfetch_timeout;
279 			fetch_request.post_data = required_hash_set;
280 			fetch_request.extra_headers.push_back(
281 				"Content-Type: application/octet-stream");
282 			httpfetch_async(fetch_request);
283 
284 			m_httpfetch_active++;
285 			m_httpfetch_next_id++;
286 			m_outstanding_hash_sets++;
287 		}
288 	}
289 }
290 
remoteHashSetReceived(const HTTPFetchResult & fetch_result)291 void ClientMediaDownloader::remoteHashSetReceived(
292 		const HTTPFetchResult &fetch_result)
293 {
294 	u32 remote_id = fetch_result.request_id;
295 	assert(remote_id < m_remotes.size());
296 	RemoteServerStatus *remote = m_remotes[remote_id];
297 
298 	m_outstanding_hash_sets--;
299 
300 	if (fetch_result.succeeded) {
301 		try {
302 			// Server sent a list of file hashes that are
303 			// available on it, try to parse the list
304 
305 			std::set<std::string> sha1_set;
306 			deSerializeHashSet(fetch_result.data, sha1_set);
307 
308 			// Parsing succeeded: For every file that is
309 			// available on this server, add this server
310 			// to the available_remotes array
311 
312 			for(std::map<std::string, FileStatus*>::iterator
313 					it = m_files.upper_bound(m_name_bound);
314 					it != m_files.end(); ++it) {
315 				FileStatus *f = it->second;
316 				if (!f->received && sha1_set.count(f->sha1))
317 					f->available_remotes.push_back(remote_id);
318 			}
319 		}
320 		catch (SerializationError &e) {
321 			infostream << "Client: Remote server \""
322 				<< remote->baseurl << "\" sent invalid hash set: "
323 				<< e.what() << std::endl;
324 		}
325 	}
326 
327 	// For compatibility: If index.mth is not found, assume that the
328 	// server contains files named like the original files (not their sha1)
329 
330 	// Do NOT check for any particular response code (e.g. 404) here,
331 	// because different servers respond differently
332 
333 	if (!fetch_result.succeeded && !fetch_result.timeout) {
334 		infostream << "Client: Enabling compatibility mode for remote "
335 			<< "server \"" << remote->baseurl << "\"" << std::endl;
336 		remote->request_by_filename = true;
337 
338 		// Assume every file is available on this server
339 
340 		for(std::map<std::string, FileStatus*>::iterator
341 				it = m_files.upper_bound(m_name_bound);
342 				it != m_files.end(); ++it) {
343 			FileStatus *f = it->second;
344 			if (!f->received)
345 				f->available_remotes.push_back(remote_id);
346 		}
347 	}
348 }
349 
remoteMediaReceived(const HTTPFetchResult & fetch_result,Client * client)350 void ClientMediaDownloader::remoteMediaReceived(
351 		const HTTPFetchResult &fetch_result,
352 		Client *client)
353 {
354 	// Some remote server sent us a file.
355 	// -> decrement number of active fetches
356 	// -> mark file as received if fetch succeeded
357 	// -> try to load media
358 
359 	std::string name;
360 	{
361 		std::map<unsigned long, std::string>::iterator it =
362 			m_remote_file_transfers.find(fetch_result.request_id);
363 		assert(it != m_remote_file_transfers.end());
364 		name = it->second;
365 		m_remote_file_transfers.erase(it);
366 	}
367 
368 	assert(m_files.count(name) != 0);
369 
370 	FileStatus *filestatus = m_files[name];
371 	assert(!filestatus->received);
372 	assert(filestatus->current_remote >= 0);
373 
374 	RemoteServerStatus *remote = m_remotes[filestatus->current_remote];
375 
376 	filestatus->current_remote = -1;
377 	remote->active_count--;
378 
379 	// If fetch succeeded, try to load media file
380 
381 	if (fetch_result.succeeded) {
382 		bool success = checkAndLoad(name, filestatus->sha1,
383 				fetch_result.data, false, client);
384 		if (success) {
385 			filestatus->received = true;
386 			assert(m_uncached_received_count < m_uncached_count);
387 			m_uncached_received_count++;
388 		}
389 	}
390 }
391 
selectRemoteServer(FileStatus * filestatus)392 s32 ClientMediaDownloader::selectRemoteServer(FileStatus *filestatus)
393 {
394 	assert(filestatus != NULL);
395 	assert(!filestatus->received);
396 	assert(filestatus->current_remote < 0);
397 
398 	if (filestatus->available_remotes.empty())
399 		return -1;
400 	else {
401 		// Of all servers that claim to provide the file (and haven't
402 		// been unsuccessfully tried before), find the one with the
403 		// smallest number of currently active transfers
404 
405 		s32 best = 0;
406 		s32 best_remote_id = filestatus->available_remotes[best];
407 		s32 best_active_count = m_remotes[best_remote_id]->active_count;
408 
409 		for (u32 i = 1; i < filestatus->available_remotes.size(); ++i) {
410 			s32 remote_id = filestatus->available_remotes[i];
411 			s32 active_count = m_remotes[remote_id]->active_count;
412 			if (active_count < best_active_count) {
413 				best = i;
414 				best_remote_id = remote_id;
415 				best_active_count = active_count;
416 			}
417 		}
418 
419 		filestatus->available_remotes.erase(
420 				filestatus->available_remotes.begin() + best);
421 
422 		return best_remote_id;
423 	}
424 }
425 
startRemoteMediaTransfers()426 void ClientMediaDownloader::startRemoteMediaTransfers()
427 {
428 	bool changing_name_bound = true;
429 
430 	for (std::map<std::string, FileStatus*>::iterator
431 			files_iter = m_files.upper_bound(m_name_bound);
432 			files_iter != m_files.end(); ++files_iter) {
433 
434 		// Abort if active fetch limit is exceeded
435 		if (m_httpfetch_active >= m_httpfetch_active_limit)
436 			break;
437 
438 		const std::string &name = files_iter->first;
439 		FileStatus *filestatus = files_iter->second;
440 
441 		if (!filestatus->received && filestatus->current_remote < 0) {
442 			// File has not been received yet and is not currently
443 			// being transferred. Choose a server for it.
444 			s32 remote_id = selectRemoteServer(filestatus);
445 			if (remote_id >= 0) {
446 				// Found a server, so start fetching
447 				RemoteServerStatus *remote =
448 					m_remotes[remote_id];
449 
450 				std::string url = remote->baseurl +
451 					(remote->request_by_filename ? name :
452 					hex_encode(filestatus->sha1));
453 				verbosestream << "Client: "
454 					<< "Requesting remote media file "
455 					<< "\"" << name << "\" "
456 					<< "\"" << url << "\"" << std::endl;
457 
458 				HTTPFetchRequest fetch_request;
459 				fetch_request.url = url;
460 				fetch_request.caller = m_httpfetch_caller;
461 				fetch_request.request_id = m_httpfetch_next_id;
462 				fetch_request.timeout = 0; // no data timeout!
463 				fetch_request.connect_timeout =
464 					m_httpfetch_timeout;
465 				httpfetch_async(fetch_request);
466 
467 				m_remote_file_transfers.insert(std::make_pair(
468 							m_httpfetch_next_id,
469 							name));
470 
471 				filestatus->current_remote = remote_id;
472 				remote->active_count++;
473 				m_httpfetch_active++;
474 				m_httpfetch_next_id++;
475 			}
476 		}
477 
478 		if (filestatus->received ||
479 				(filestatus->current_remote < 0 &&
480 				 !m_outstanding_hash_sets)) {
481 			// If we arrive here, we conclusively know that we
482 			// won't fetch this file from a remote server in the
483 			// future. So update the name bound if possible.
484 			if (changing_name_bound)
485 				m_name_bound = name;
486 		}
487 		else
488 			changing_name_bound = false;
489 	}
490 
491 }
492 
startConventionalTransfers(Client * client)493 void ClientMediaDownloader::startConventionalTransfers(Client *client)
494 {
495 	assert(m_httpfetch_active == 0);
496 
497 	if (m_uncached_received_count != m_uncached_count) {
498 		// Some media files have not been received yet, use the
499 		// conventional slow method (minetest protocol) to get them
500 		std::list<std::string> file_requests;
501 		for (std::map<std::string, FileStatus*>::iterator
502 				it = m_files.begin();
503 				it != m_files.end(); ++it) {
504 			if (!it->second->received)
505 				file_requests.push_back(it->first);
506 		}
507 		assert((s32) file_requests.size() ==
508 				m_uncached_count - m_uncached_received_count);
509 		client->request_media(file_requests);
510 	}
511 }
512 
conventionalTransferDone(const std::string & name,const std::string & data,Client * client)513 void ClientMediaDownloader::conventionalTransferDone(
514 		const std::string &name,
515 		const std::string &data,
516 		Client *client)
517 {
518 	// Check that file was announced
519 	std::map<std::string, FileStatus*>::iterator
520 		file_iter = m_files.find(name);
521 	if (file_iter == m_files.end()) {
522 		errorstream << "Client: server sent media file that was"
523 			<< "not announced, ignoring it: \"" << name << "\""
524 			<< std::endl;
525 		return;
526 	}
527 	FileStatus *filestatus = file_iter->second;
528 	assert(filestatus != NULL);
529 
530 	// Check that file hasn't already been received
531 	if (filestatus->received) {
532 		errorstream << "Client: server sent media file that we already"
533 			<< "received, ignoring it: \"" << name << "\""
534 			<< std::endl;
535 		return;
536 	}
537 
538 	// Mark file as received, regardless of whether loading it works and
539 	// whether the checksum matches (because at this point there is no
540 	// other server that could send a replacement)
541 	filestatus->received = true;
542 	assert(m_uncached_received_count < m_uncached_count);
543 	m_uncached_received_count++;
544 
545 	// Check that received file matches announced checksum
546 	// If so, load it
547 	checkAndLoad(name, filestatus->sha1, data, false, client);
548 }
549 
checkAndLoad(const std::string & name,const std::string & sha1,const std::string & data,bool is_from_cache,Client * client)550 bool ClientMediaDownloader::checkAndLoad(
551 		const std::string &name, const std::string &sha1,
552 		const std::string &data, bool is_from_cache, Client *client)
553 {
554 	const char *cached_or_received = is_from_cache ? "cached" : "received";
555 	const char *cached_or_received_uc = is_from_cache ? "Cached" : "Received";
556 	std::string sha1_hex = hex_encode(sha1);
557 
558 	// Compute actual checksum of data
559 	std::string data_sha1;
560 	{
561 		SHA1 data_sha1_calculator;
562 		data_sha1_calculator.addBytes(data.c_str(), data.size());
563 		unsigned char *data_tmpdigest = data_sha1_calculator.getDigest();
564 		data_sha1.assign((char*) data_tmpdigest, 20);
565 		free(data_tmpdigest);
566 	}
567 
568 	// Check that received file matches announced checksum
569 	if (data_sha1 != sha1) {
570 		std::string data_sha1_hex = hex_encode(data_sha1);
571 		infostream << "Client: "
572 			<< cached_or_received_uc << " media file "
573 			<< sha1_hex << " \"" << name << "\" "
574 			<< "mismatches actual checksum " << data_sha1_hex
575 			<< std::endl;
576 		return false;
577 	}
578 
579 	// Checksum is ok, try loading the file
580 	bool success = client->loadMedia(data, name);
581 	if (!success) {
582 		infostream << "Client: "
583 			<< "Failed to load " << cached_or_received << " media: "
584 			<< sha1_hex << " \"" << name << "\""
585 			<< std::endl;
586 		return false;
587 	}
588 
589 	verbosestream << "Client: "
590 		<< "Loaded " << cached_or_received << " media: "
591 		<< sha1_hex << " \"" << name << "\""
592 		<< std::endl;
593 
594 	// Update cache (unless we just loaded the file from the cache)
595 	if (!is_from_cache)
596 		m_media_cache.update(sha1_hex, data);
597 
598 	return true;
599 }
600 
601 
602 /*
603 	Minetest Hashset File Format
604 
605 	All values are stored in big-endian byte order.
606 	[u32] signature: 'MTHS'
607 	[u16] version: 1
608 	For each hash in set:
609 		[u8*20] SHA1 hash
610 
611 	Version changes:
612 	1 - Initial version
613 */
614 
serializeRequiredHashSet()615 std::string ClientMediaDownloader::serializeRequiredHashSet()
616 {
617 	std::ostringstream os(std::ios::binary);
618 
619 	writeU32(os, MTHASHSET_FILE_SIGNATURE); // signature
620 	writeU16(os, 1);                        // version
621 
622 	// Write list of hashes of files that have not been
623 	// received (found in cache) yet
624 	for (std::map<std::string, FileStatus*>::iterator
625 			it = m_files.begin();
626 			it != m_files.end(); ++it) {
627 		if (!it->second->received) {
628 			assert(it->second->sha1.size() == 20);
629 			os << it->second->sha1;
630 		}
631 	}
632 
633 	return os.str();
634 }
635 
deSerializeHashSet(const std::string & data,std::set<std::string> & result)636 void ClientMediaDownloader::deSerializeHashSet(const std::string &data,
637 		std::set<std::string> &result)
638 {
639 	if (data.size() < 6 || data.size() % 20 != 6) {
640 		throw SerializationError(
641 				"ClientMediaDownloader::deSerializeHashSet: "
642 				"invalid hash set file size");
643 	}
644 
645 	const u8 *data_cstr = (const u8*) data.c_str();
646 
647 	u32 signature = readU32(&data_cstr[0]);
648 	if (signature != MTHASHSET_FILE_SIGNATURE) {
649 		throw SerializationError(
650 				"ClientMediaDownloader::deSerializeHashSet: "
651 				"invalid hash set file signature");
652 	}
653 
654 	u16 version = readU16(&data_cstr[4]);
655 	if (version != 1) {
656 		throw SerializationError(
657 				"ClientMediaDownloader::deSerializeHashSet: "
658 				"unsupported hash set file version");
659 	}
660 
661 	for (u32 pos = 6; pos < data.size(); pos += 20) {
662 		result.insert(data.substr(pos, 20));
663 	}
664 }
665