1 //
2 // Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2021
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 #include "td/telegram/files/FileDownloader.h"
8
9 #include "td/telegram/FileReferenceManager.h"
10 #include "td/telegram/files/FileLoaderUtils.h"
11 #include "td/telegram/files/FileType.h"
12 #include "td/telegram/Global.h"
13 #include "td/telegram/net/DcId.h"
14 #include "td/telegram/SecureStorage.h"
15 #include "td/telegram/UniqueId.h"
16
17 #include "td/utils/as.h"
18 #include "td/utils/base64.h"
19 #include "td/utils/buffer.h"
20 #include "td/utils/common.h"
21 #include "td/utils/crypto.h"
22 #include "td/utils/format.h"
23 #include "td/utils/logging.h"
24 #include "td/utils/misc.h"
25 #include "td/utils/port/path.h"
26 #include "td/utils/port/Stat.h"
27 #include "td/utils/ScopeGuard.h"
28 #include "td/utils/SliceBuilder.h"
29 #include "td/utils/UInt.h"
30
31 #include <tuple>
32
33 namespace td {
34
FileDownloader(const FullRemoteFileLocation & remote,const LocalFileLocation & local,int64 size,string name,const FileEncryptionKey & encryption_key,bool is_small,bool search_file,int64 offset,int64 limit,unique_ptr<Callback> callback)35 FileDownloader::FileDownloader(const FullRemoteFileLocation &remote, const LocalFileLocation &local, int64 size,
36 string name, const FileEncryptionKey &encryption_key, bool is_small, bool search_file,
37 int64 offset, int64 limit, unique_ptr<Callback> callback)
38 : remote_(remote)
39 , local_(local)
40 , size_(size)
41 , name_(std::move(name))
42 , encryption_key_(encryption_key)
43 , callback_(std::move(callback))
44 , is_small_(is_small)
45 , search_file_(search_file)
46 , offset_(offset)
47 , limit_(limit) {
48 if (encryption_key.is_secret()) {
49 set_ordered_flag(true);
50 }
51 if (!encryption_key.empty()) {
52 CHECK(offset_ == 0);
53 }
54 }
55
init()56 Result<FileLoader::FileInfo> FileDownloader::init() {
57 SCOPE_EXIT {
58 try_release_fd();
59 };
60 if (local_.type() == LocalFileLocation::Type::Full) {
61 return Status::Error("File is already downloaded");
62 }
63 if (encryption_key_.is_secure() && !encryption_key_.has_value_hash()) {
64 LOG(ERROR) << "Can't download Secure file with unknown value_hash";
65 }
66 if (remote_.file_type_ == FileType::Secure) {
67 size_ = 0;
68 }
69 int32 part_size = 0;
70 Bitmask bitmask{Bitmask::Ones{}, 0};
71 if (local_.type() == LocalFileLocation::Type::Partial) {
72 const auto &partial = local_.partial();
73 path_ = partial.path_;
74 auto result_fd = FileFd::open(path_, FileFd::Write | FileFd::Read);
75 // TODO: check timestamps..
76 if (result_fd.is_ok()) {
77 bitmask = Bitmask(Bitmask::Decode{}, partial.ready_bitmask_);
78 if (encryption_key_.is_secret()) {
79 LOG_CHECK(partial.iv_.size() == 32) << partial.iv_.size();
80 encryption_key_.mutable_iv() = as<UInt256>(partial.iv_.data());
81 next_part_ = narrow_cast<int32>(bitmask.get_ready_parts(0));
82 }
83 fd_ = result_fd.move_as_ok();
84 part_size = partial.part_size_;
85 }
86 }
87 if (search_file_ && fd_.empty() && size_ > 0 && size_ < 1000 * (1 << 20) && encryption_key_.empty() &&
88 !remote_.is_web()) {
89 [&] {
90 TRY_RESULT(path, search_file(get_files_dir(remote_.file_type_), name_, size_));
91 TRY_RESULT(fd, FileFd::open(path, FileFd::Read));
92 LOG(INFO) << "Check hash of local file " << path;
93 path_ = std::move(path);
94 fd_ = std::move(fd);
95 need_check_ = true;
96 only_check_ = true;
97 part_size = 32 * (1 << 10);
98 bitmask = Bitmask{Bitmask::Ones{}, (size_ + part_size - 1) / part_size};
99 return Status::OK();
100 }();
101 }
102
103 FileInfo res;
104 res.size = size_;
105 res.is_size_final = true;
106 res.part_size = part_size;
107 res.ready_parts = bitmask.as_vector();
108 res.use_part_count_limit = false;
109 res.only_check = only_check_;
110 auto file_type = remote_.file_type_;
111 res.need_delay =
112 !is_small_ &&
113 (file_type == FileType::VideoNote || file_type == FileType::Document || file_type == FileType::DocumentAsFile ||
114 file_type == FileType::VoiceNote || file_type == FileType::Audio || file_type == FileType::Video ||
115 file_type == FileType::Animation || (file_type == FileType::Encrypted && size_ > (1 << 20)));
116 res.offset = offset_;
117 res.limit = limit_;
118 return res;
119 }
120
on_ok(int64 size)121 Status FileDownloader::on_ok(int64 size) {
122 auto dir = get_files_dir(remote_.file_type_);
123
124 std::string path;
125 fd_.close();
126 if (encryption_key_.is_secure()) {
127 TRY_RESULT(file_path, open_temp_file(remote_.file_type_));
128 string tmp_path;
129 std::tie(std::ignore, tmp_path) = std::move(file_path);
130 TRY_STATUS(secure_storage::decrypt_file(encryption_key_.secret(), encryption_key_.value_hash(), path_, tmp_path));
131 unlink(path_).ignore();
132 path_ = std::move(tmp_path);
133 TRY_RESULT(path_stat, stat(path_));
134 size = path_stat.size_;
135 }
136 if (only_check_) {
137 path = path_;
138 } else {
139 TRY_RESULT_ASSIGN(path, create_from_temp(path_, dir, name_));
140 }
141 callback_->on_ok(FullLocalFileLocation(remote_.file_type_, std::move(path), 0), size, !only_check_);
142 return Status::OK();
143 }
144
on_error(Status status)145 void FileDownloader::on_error(Status status) {
146 fd_.close();
147 callback_->on_error(std::move(status));
148 }
149
should_restart_part(Part part,NetQueryPtr & net_query)150 Result<bool> FileDownloader::should_restart_part(Part part, NetQueryPtr &net_query) {
151 // Check if we should use CDN or reupload file to CDN
152
153 if (net_query->is_error()) {
154 if (net_query->error().message() == "FILE_TOKEN_INVALID") {
155 use_cdn_ = false;
156 return true;
157 }
158 if (net_query->error().message() == "REQUEST_TOKEN_INVALID") {
159 return true;
160 }
161 return false;
162 }
163
164 switch (narrow_cast<QueryType>(UniqueId::extract_key(net_query->id()))) {
165 case QueryType::Default: {
166 if (net_query->ok_tl_constructor() == telegram_api::upload_fileCdnRedirect::ID) {
167 TRY_RESULT(file_base, fetch_result<telegram_api::upload_getFile>(net_query->ok()));
168 CHECK(file_base->get_id() == telegram_api::upload_fileCdnRedirect::ID);
169 auto file = move_tl_object_as<telegram_api::upload_fileCdnRedirect>(file_base);
170 LOG(DEBUG) << part.id << " got REDIRECT " << to_string(file);
171
172 auto new_cdn_file_token = file->file_token_.as_slice();
173 if (cdn_file_token_ == new_cdn_file_token) {
174 return true;
175 }
176
177 use_cdn_ = true;
178 need_check_ = true;
179 cdn_file_token_generation_++;
180 cdn_file_token_ = new_cdn_file_token.str();
181 cdn_dc_id_ = DcId::external(file->dc_id_);
182 cdn_encryption_key_ = file->encryption_key_.as_slice().str();
183 cdn_encryption_iv_ = file->encryption_iv_.as_slice().str();
184 add_hash_info(file->file_hashes_);
185 if (cdn_encryption_iv_.size() != 16 || cdn_encryption_key_.size() != 32) {
186 return Status::Error("Wrong ctr key or iv size");
187 }
188
189 return true;
190 }
191 return false;
192 }
193 case QueryType::ReuploadCDN: {
194 TRY_RESULT(file_hashes, fetch_result<telegram_api::upload_reuploadCdnFile>(net_query->ok()));
195 add_hash_info(file_hashes);
196 LOG(DEBUG) << part.id << " got REUPLOAD_OK";
197 return true;
198 }
199 case QueryType::CDN: {
200 if (net_query->ok_tl_constructor() == telegram_api::upload_cdnFileReuploadNeeded::ID) {
201 TRY_RESULT(file_base, fetch_result<telegram_api::upload_getCdnFile>(net_query->ok()));
202 CHECK(file_base->get_id() == telegram_api::upload_cdnFileReuploadNeeded::ID);
203 auto file = move_tl_object_as<telegram_api::upload_cdnFileReuploadNeeded>(file_base);
204 LOG(DEBUG) << part.id << " got REUPLOAD " << to_string(file);
205 cdn_part_reupload_token_[part.id] = file->request_token_.as_slice().str();
206 return true;
207 }
208 auto it = cdn_part_file_token_generation_.find(part.id);
209 CHECK(it != cdn_part_file_token_generation_.end());
210 if (it->second != cdn_file_token_generation_) {
211 LOG(DEBUG) << part.id << " got part with old file_token";
212 return true;
213 }
214 return false;
215 }
216 default:
217 UNREACHABLE();
218 }
219
220 return false;
221 }
222
start_part(Part part,int32 part_count,int64 streaming_offset)223 Result<std::pair<NetQueryPtr, bool>> FileDownloader::start_part(Part part, int32 part_count, int64 streaming_offset) {
224 if (encryption_key_.is_secret()) {
225 part.size = (part.size + 15) & ~15; // fix for last part
226 }
227 // auto size = part.size;
228 //// sometimes we can ask more than server has, just to check size
229 // if (size < get_part_size()) {
230 // size = min(size + 16, get_part_size());
231 // LOG(INFO) << "Ask " << size << " instead of " << part.size;
232 //}
233 auto size = get_part_size();
234 CHECK(part.size <= size);
235
236 callback_->on_start_download();
237
238 auto net_query_type = is_small_ ? NetQuery::Type::DownloadSmall : NetQuery::Type::Download;
239 NetQueryPtr net_query;
240 if (!use_cdn_) {
241 int32 flags = 0;
242 #if !TD_EMSCRIPTEN
243 // CDN is supported, unless we use domains instead of IPs from a browser
244 if (streaming_offset == 0) {
245 flags |= telegram_api::upload_getFile::CDN_SUPPORTED_MASK;
246 }
247 #endif
248 DcId dc_id = remote_.is_web() ? G()->get_webfile_dc_id() : remote_.get_dc_id();
249 auto id = UniqueId::next(UniqueId::Type::Default, static_cast<uint8>(QueryType::Default));
250 net_query = remote_.is_web()
251 ? G()->net_query_creator().create(
252 id,
253 telegram_api::upload_getWebFile(remote_.as_input_web_file_location(),
254 static_cast<int32>(part.offset), static_cast<int32>(size)),
255 dc_id, net_query_type, NetQuery::AuthFlag::On)
256 : G()->net_query_creator().create(
257 id,
258 telegram_api::upload_getFile(flags, false /*ignored*/, false /*ignored*/,
259 remote_.as_input_file_location(),
260 static_cast<int32>(part.offset), static_cast<int32>(size)),
261 dc_id, net_query_type, NetQuery::AuthFlag::On);
262 } else {
263 if (remote_.is_web()) {
264 return Status::Error("Can't download web file from CDN");
265 }
266 auto it = cdn_part_reupload_token_.find(part.id);
267 if (it == cdn_part_reupload_token_.end()) {
268 auto query = telegram_api::upload_getCdnFile(BufferSlice(cdn_file_token_), static_cast<int32>(part.offset),
269 static_cast<int32>(size));
270 cdn_part_file_token_generation_[part.id] = cdn_file_token_generation_;
271 LOG(DEBUG) << part.id << " " << to_string(query);
272 net_query =
273 G()->net_query_creator().create(UniqueId::next(UniqueId::Type::Default, static_cast<uint8>(QueryType::CDN)),
274 query, cdn_dc_id_, net_query_type, NetQuery::AuthFlag::Off);
275 } else {
276 auto query = telegram_api::upload_reuploadCdnFile(BufferSlice(cdn_file_token_), BufferSlice(it->second));
277 LOG(DEBUG) << part.id << " " << to_string(query);
278 net_query = G()->net_query_creator().create(
279 UniqueId::next(UniqueId::Type::Default, static_cast<uint8>(QueryType::ReuploadCDN)), query,
280 remote_.get_dc_id(), net_query_type, NetQuery::AuthFlag::On);
281 cdn_part_reupload_token_.erase(it);
282 }
283 }
284 net_query->file_type_ = narrow_cast<int32>(remote_.file_type_);
285 return std::make_pair(std::move(net_query), false);
286 }
287
check_net_query(NetQueryPtr & net_query)288 Status FileDownloader::check_net_query(NetQueryPtr &net_query) {
289 if (net_query->is_error()) {
290 auto error = net_query->move_as_error();
291 if (FileReferenceManager::is_file_reference_error(error)) {
292 VLOG(file_references) << "Receive " << error << " for being downloaded file";
293 error = Status::Error(error.code(),
294 PSLICE() << error.message() << "#BASE64" << base64_encode(remote_.get_file_reference()));
295 }
296 return error;
297 }
298 return Status::OK();
299 }
300
process_part(Part part,NetQueryPtr net_query)301 Result<size_t> FileDownloader::process_part(Part part, NetQueryPtr net_query) {
302 TRY_STATUS(check_net_query(net_query));
303
304 BufferSlice bytes;
305 bool need_cdn_decrypt = false;
306 auto query_type = narrow_cast<QueryType>(UniqueId::extract_key(net_query->id()));
307 switch (query_type) {
308 case QueryType::Default: {
309 if (remote_.is_web()) {
310 TRY_RESULT(file, fetch_result<telegram_api::upload_getWebFile>(net_query->ok()));
311 bytes = std::move(file->bytes_);
312 } else {
313 TRY_RESULT(file_base, fetch_result<telegram_api::upload_getFile>(net_query->ok()));
314 CHECK(file_base->get_id() == telegram_api::upload_file::ID);
315 auto file = move_tl_object_as<telegram_api::upload_file>(file_base);
316 LOG(DEBUG) << part.id << " upload_getFile result " << to_string(file);
317 bytes = std::move(file->bytes_);
318 }
319 break;
320 }
321 case QueryType::CDN: {
322 TRY_RESULT(file_base, fetch_result<telegram_api::upload_getCdnFile>(net_query->ok()));
323 CHECK(file_base->get_id() == telegram_api::upload_cdnFile::ID);
324 auto file = move_tl_object_as<telegram_api::upload_cdnFile>(file_base);
325 LOG(DEBUG) << part.id << " upload_getCdnFile result " << to_string(file);
326 bytes = std::move(file->bytes_);
327 need_cdn_decrypt = true;
328 break;
329 }
330 default:
331 UNREACHABLE();
332 }
333
334 auto padded_size = part.size;
335 if (encryption_key_.is_secret()) {
336 padded_size = (part.size + 15) & ~15;
337 }
338 if (bytes.size() > padded_size) {
339 return Status::Error("Part size is more than requested");
340 }
341 if (bytes.empty()) {
342 return 0;
343 }
344
345 // Encryption
346 if (need_cdn_decrypt) {
347 CHECK(part.offset % 16 == 0);
348 auto offset = narrow_cast<uint32>(part.offset / 16);
349 offset =
350 ((offset & 0xff) << 24) | ((offset & 0xff00) << 8) | ((offset & 0xff0000) >> 8) | ((offset & 0xff000000) >> 24);
351
352 AesCtrState ctr_state;
353 string iv = cdn_encryption_iv_;
354 as<uint32>(&iv[12]) = offset;
355 ctr_state.init(cdn_encryption_key_, iv);
356 ctr_state.decrypt(bytes.as_slice(), bytes.as_slice());
357 }
358 if (encryption_key_.is_secret()) {
359 LOG_CHECK(next_part_ == part.id) << tag("expected part.id", next_part_) << "!=" << tag("part.id", part.id);
360 CHECK(!next_part_stop_);
361 next_part_++;
362 if (part.size % 16 != 0) {
363 next_part_stop_ = true;
364 }
365 aes_ige_decrypt(as_slice(encryption_key_.key()), as_slice(encryption_key_.mutable_iv()), bytes.as_slice(),
366 bytes.as_slice());
367 }
368
369 auto slice = bytes.as_slice().substr(0, part.size);
370 TRY_STATUS(acquire_fd());
371 LOG(INFO) << "Got " << slice.size() << " bytes at offset " << part.offset << " for \"" << path_ << '"';
372 TRY_RESULT(written, fd_.pwrite(slice, part.offset));
373 LOG(INFO) << "Written " << written << " bytes";
374 // may write less than part.size, when size of downloadable file is unknown
375 if (written != slice.size()) {
376 return Status::Error("Failed to save file part to the file");
377 }
378 return written;
379 }
380
on_progress(Progress progress)381 void FileDownloader::on_progress(Progress progress) {
382 if (progress.is_ready) {
383 // do not send partial location. will lead to wrong local_size
384 return;
385 }
386 if (progress.ready_size == 0 || path_.empty()) {
387 return;
388 }
389 if (encryption_key_.empty() || encryption_key_.is_secure()) {
390 callback_->on_partial_download(
391 PartialLocalFileLocation{remote_.file_type_, progress.part_size, path_, "", std::move(progress.ready_bitmask)},
392 progress.ready_size, progress.size);
393 } else if (encryption_key_.is_secret()) {
394 UInt256 iv;
395 if (progress.ready_part_count == next_part_) {
396 iv = encryption_key_.mutable_iv();
397 } else {
398 LOG(FATAL) << tag("ready_part_count", progress.ready_part_count) << tag("next_part", next_part_);
399 }
400 callback_->on_partial_download(PartialLocalFileLocation{remote_.file_type_, progress.part_size, path_,
401 as_slice(iv).str(), std::move(progress.ready_bitmask)},
402 progress.ready_size, progress.size);
403 } else {
404 UNREACHABLE();
405 }
406 }
407
get_callback()408 FileLoader::Callback *FileDownloader::get_callback() {
409 return static_cast<FileLoader::Callback *>(callback_.get());
410 }
411
process_check_query(NetQueryPtr net_query)412 Status FileDownloader::process_check_query(NetQueryPtr net_query) {
413 has_hash_query_ = false;
414 TRY_STATUS(check_net_query(net_query));
415 TRY_RESULT(file_hashes, fetch_result<telegram_api::upload_getCdnFileHashes>(std::move(net_query)));
416 add_hash_info(file_hashes);
417 return Status::OK();
418 }
419
check_loop(int64 checked_prefix_size,int64 ready_prefix_size,bool is_ready)420 Result<FileLoader::CheckInfo> FileDownloader::check_loop(int64 checked_prefix_size, int64 ready_prefix_size,
421 bool is_ready) {
422 if (!need_check_) {
423 return CheckInfo{};
424 }
425 SCOPE_EXIT {
426 try_release_fd();
427 };
428 CheckInfo info;
429 while (checked_prefix_size < ready_prefix_size) {
430 //LOG(ERROR) << "NEED TO CHECK: " << checked_prefix_size << "->" << ready_prefix_size - checked_prefix_size;
431 HashInfo search_info;
432 search_info.offset = checked_prefix_size;
433 auto it = hash_info_.upper_bound(search_info);
434 if (it != hash_info_.begin()) {
435 --it;
436 }
437 if (it != hash_info_.end() && it->offset <= checked_prefix_size &&
438 it->offset + narrow_cast<int64>(it->size) > checked_prefix_size) {
439 int64 begin_offset = it->offset;
440 int64 end_offset = it->offset + narrow_cast<int64>(it->size);
441 if (ready_prefix_size < end_offset) {
442 if (!is_ready) {
443 break;
444 }
445 end_offset = ready_prefix_size;
446 }
447 auto size = narrow_cast<size_t>(end_offset - begin_offset);
448 auto slice = BufferSlice(size);
449 TRY_STATUS(acquire_fd());
450 TRY_RESULT(read_size, fd_.pread(slice.as_slice(), begin_offset));
451 if (size != read_size) {
452 return Status::Error("Failed to read file to check hash");
453 }
454 string hash(32, ' ');
455 sha256(slice.as_slice(), hash);
456
457 if (hash != it->hash) {
458 if (only_check_) {
459 return Status::Error("FILE_DOWNLOAD_RESTART");
460 }
461 return Status::Error("Hash mismatch");
462 }
463
464 checked_prefix_size = end_offset;
465 info.changed = true;
466 continue;
467 }
468 if (!has_hash_query_) {
469 has_hash_query_ = true;
470 auto query =
471 telegram_api::upload_getFileHashes(remote_.as_input_file_location(), narrow_cast<int32>(checked_prefix_size));
472 auto net_query_type = is_small_ ? NetQuery::Type::DownloadSmall : NetQuery::Type::Download;
473 auto net_query = G()->net_query_creator().create(query, remote_.get_dc_id(), net_query_type);
474 info.queries.push_back(std::move(net_query));
475 break;
476 }
477 // Should fail?
478 break;
479 }
480 info.need_check = need_check_;
481 info.checked_prefix_size = checked_prefix_size;
482 return std::move(info);
483 }
484
add_hash_info(const std::vector<telegram_api::object_ptr<telegram_api::fileHash>> & hashes)485 void FileDownloader::add_hash_info(const std::vector<telegram_api::object_ptr<telegram_api::fileHash>> &hashes) {
486 for (auto &hash : hashes) {
487 //LOG(ERROR) << "ADD HASH " << hash->offset_ << "->" << hash->limit_;
488 HashInfo hash_info;
489 hash_info.size = hash->limit_;
490 hash_info.offset = hash->offset_;
491 hash_info.hash = hash->hash_.as_slice().str();
492 hash_info_.insert(std::move(hash_info));
493 }
494 }
495
keep_fd_flag(bool keep_fd)496 void FileDownloader::keep_fd_flag(bool keep_fd) {
497 keep_fd_ = keep_fd;
498 try_release_fd();
499 }
500
try_release_fd()501 void FileDownloader::try_release_fd() {
502 if (!keep_fd_ && !fd_.empty()) {
503 fd_.close();
504 }
505 }
506
acquire_fd()507 Status FileDownloader::acquire_fd() {
508 if (fd_.empty()) {
509 if (path_.empty()) {
510 TRY_RESULT_ASSIGN(std::tie(fd_, path_), open_temp_file(remote_.file_type_));
511 } else {
512 TRY_RESULT_ASSIGN(fd_, FileFd::open(path_, (only_check_ ? 0 : FileFd::Write) | FileFd::Read));
513 }
514 }
515 return Status::OK();
516 }
517
518 } // namespace td
519