1 //
2 // Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2021
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 #include "td/telegram/files/FileDownloader.h"
8 
9 #include "td/telegram/FileReferenceManager.h"
10 #include "td/telegram/files/FileLoaderUtils.h"
11 #include "td/telegram/files/FileType.h"
12 #include "td/telegram/Global.h"
13 #include "td/telegram/net/DcId.h"
14 #include "td/telegram/SecureStorage.h"
15 #include "td/telegram/UniqueId.h"
16 
17 #include "td/utils/as.h"
18 #include "td/utils/base64.h"
19 #include "td/utils/buffer.h"
20 #include "td/utils/common.h"
21 #include "td/utils/crypto.h"
22 #include "td/utils/format.h"
23 #include "td/utils/logging.h"
24 #include "td/utils/misc.h"
25 #include "td/utils/port/path.h"
26 #include "td/utils/port/Stat.h"
27 #include "td/utils/ScopeGuard.h"
28 #include "td/utils/SliceBuilder.h"
29 #include "td/utils/UInt.h"
30 
31 #include <tuple>
32 
33 namespace td {
34 
FileDownloader(const FullRemoteFileLocation & remote,const LocalFileLocation & local,int64 size,string name,const FileEncryptionKey & encryption_key,bool is_small,bool search_file,int64 offset,int64 limit,unique_ptr<Callback> callback)35 FileDownloader::FileDownloader(const FullRemoteFileLocation &remote, const LocalFileLocation &local, int64 size,
36                                string name, const FileEncryptionKey &encryption_key, bool is_small, bool search_file,
37                                int64 offset, int64 limit, unique_ptr<Callback> callback)
38     : remote_(remote)
39     , local_(local)
40     , size_(size)
41     , name_(std::move(name))
42     , encryption_key_(encryption_key)
43     , callback_(std::move(callback))
44     , is_small_(is_small)
45     , search_file_(search_file)
46     , offset_(offset)
47     , limit_(limit) {
48   if (encryption_key.is_secret()) {
49     set_ordered_flag(true);
50   }
51   if (!encryption_key.empty()) {
52     CHECK(offset_ == 0);
53   }
54 }
55 
init()56 Result<FileLoader::FileInfo> FileDownloader::init() {
57   SCOPE_EXIT {
58     try_release_fd();
59   };
60   if (local_.type() == LocalFileLocation::Type::Full) {
61     return Status::Error("File is already downloaded");
62   }
63   if (encryption_key_.is_secure() && !encryption_key_.has_value_hash()) {
64     LOG(ERROR) << "Can't download Secure file with unknown value_hash";
65   }
66   if (remote_.file_type_ == FileType::Secure) {
67     size_ = 0;
68   }
69   int32 part_size = 0;
70   Bitmask bitmask{Bitmask::Ones{}, 0};
71   if (local_.type() == LocalFileLocation::Type::Partial) {
72     const auto &partial = local_.partial();
73     path_ = partial.path_;
74     auto result_fd = FileFd::open(path_, FileFd::Write | FileFd::Read);
75     // TODO: check timestamps..
76     if (result_fd.is_ok()) {
77       bitmask = Bitmask(Bitmask::Decode{}, partial.ready_bitmask_);
78       if (encryption_key_.is_secret()) {
79         LOG_CHECK(partial.iv_.size() == 32) << partial.iv_.size();
80         encryption_key_.mutable_iv() = as<UInt256>(partial.iv_.data());
81         next_part_ = narrow_cast<int32>(bitmask.get_ready_parts(0));
82       }
83       fd_ = result_fd.move_as_ok();
84       part_size = partial.part_size_;
85     }
86   }
87   if (search_file_ && fd_.empty() && size_ > 0 && size_ < 1000 * (1 << 20) && encryption_key_.empty() &&
88       !remote_.is_web()) {
89     [&] {
90       TRY_RESULT(path, search_file(get_files_dir(remote_.file_type_), name_, size_));
91       TRY_RESULT(fd, FileFd::open(path, FileFd::Read));
92       LOG(INFO) << "Check hash of local file " << path;
93       path_ = std::move(path);
94       fd_ = std::move(fd);
95       need_check_ = true;
96       only_check_ = true;
97       part_size = 32 * (1 << 10);
98       bitmask = Bitmask{Bitmask::Ones{}, (size_ + part_size - 1) / part_size};
99       return Status::OK();
100     }();
101   }
102 
103   FileInfo res;
104   res.size = size_;
105   res.is_size_final = true;
106   res.part_size = part_size;
107   res.ready_parts = bitmask.as_vector();
108   res.use_part_count_limit = false;
109   res.only_check = only_check_;
110   auto file_type = remote_.file_type_;
111   res.need_delay =
112       !is_small_ &&
113       (file_type == FileType::VideoNote || file_type == FileType::Document || file_type == FileType::DocumentAsFile ||
114        file_type == FileType::VoiceNote || file_type == FileType::Audio || file_type == FileType::Video ||
115        file_type == FileType::Animation || (file_type == FileType::Encrypted && size_ > (1 << 20)));
116   res.offset = offset_;
117   res.limit = limit_;
118   return res;
119 }
120 
on_ok(int64 size)121 Status FileDownloader::on_ok(int64 size) {
122   auto dir = get_files_dir(remote_.file_type_);
123 
124   std::string path;
125   fd_.close();
126   if (encryption_key_.is_secure()) {
127     TRY_RESULT(file_path, open_temp_file(remote_.file_type_));
128     string tmp_path;
129     std::tie(std::ignore, tmp_path) = std::move(file_path);
130     TRY_STATUS(secure_storage::decrypt_file(encryption_key_.secret(), encryption_key_.value_hash(), path_, tmp_path));
131     unlink(path_).ignore();
132     path_ = std::move(tmp_path);
133     TRY_RESULT(path_stat, stat(path_));
134     size = path_stat.size_;
135   }
136   if (only_check_) {
137     path = path_;
138   } else {
139     TRY_RESULT_ASSIGN(path, create_from_temp(path_, dir, name_));
140   }
141   callback_->on_ok(FullLocalFileLocation(remote_.file_type_, std::move(path), 0), size, !only_check_);
142   return Status::OK();
143 }
144 
on_error(Status status)145 void FileDownloader::on_error(Status status) {
146   fd_.close();
147   callback_->on_error(std::move(status));
148 }
149 
should_restart_part(Part part,NetQueryPtr & net_query)150 Result<bool> FileDownloader::should_restart_part(Part part, NetQueryPtr &net_query) {
151   // Check if we should use CDN or reupload file to CDN
152 
153   if (net_query->is_error()) {
154     if (net_query->error().message() == "FILE_TOKEN_INVALID") {
155       use_cdn_ = false;
156       return true;
157     }
158     if (net_query->error().message() == "REQUEST_TOKEN_INVALID") {
159       return true;
160     }
161     return false;
162   }
163 
164   switch (narrow_cast<QueryType>(UniqueId::extract_key(net_query->id()))) {
165     case QueryType::Default: {
166       if (net_query->ok_tl_constructor() == telegram_api::upload_fileCdnRedirect::ID) {
167         TRY_RESULT(file_base, fetch_result<telegram_api::upload_getFile>(net_query->ok()));
168         CHECK(file_base->get_id() == telegram_api::upload_fileCdnRedirect::ID);
169         auto file = move_tl_object_as<telegram_api::upload_fileCdnRedirect>(file_base);
170         LOG(DEBUG) << part.id << " got REDIRECT " << to_string(file);
171 
172         auto new_cdn_file_token = file->file_token_.as_slice();
173         if (cdn_file_token_ == new_cdn_file_token) {
174           return true;
175         }
176 
177         use_cdn_ = true;
178         need_check_ = true;
179         cdn_file_token_generation_++;
180         cdn_file_token_ = new_cdn_file_token.str();
181         cdn_dc_id_ = DcId::external(file->dc_id_);
182         cdn_encryption_key_ = file->encryption_key_.as_slice().str();
183         cdn_encryption_iv_ = file->encryption_iv_.as_slice().str();
184         add_hash_info(file->file_hashes_);
185         if (cdn_encryption_iv_.size() != 16 || cdn_encryption_key_.size() != 32) {
186           return Status::Error("Wrong ctr key or iv size");
187         }
188 
189         return true;
190       }
191       return false;
192     }
193     case QueryType::ReuploadCDN: {
194       TRY_RESULT(file_hashes, fetch_result<telegram_api::upload_reuploadCdnFile>(net_query->ok()));
195       add_hash_info(file_hashes);
196       LOG(DEBUG) << part.id << " got REUPLOAD_OK";
197       return true;
198     }
199     case QueryType::CDN: {
200       if (net_query->ok_tl_constructor() == telegram_api::upload_cdnFileReuploadNeeded::ID) {
201         TRY_RESULT(file_base, fetch_result<telegram_api::upload_getCdnFile>(net_query->ok()));
202         CHECK(file_base->get_id() == telegram_api::upload_cdnFileReuploadNeeded::ID);
203         auto file = move_tl_object_as<telegram_api::upload_cdnFileReuploadNeeded>(file_base);
204         LOG(DEBUG) << part.id << " got REUPLOAD " << to_string(file);
205         cdn_part_reupload_token_[part.id] = file->request_token_.as_slice().str();
206         return true;
207       }
208       auto it = cdn_part_file_token_generation_.find(part.id);
209       CHECK(it != cdn_part_file_token_generation_.end());
210       if (it->second != cdn_file_token_generation_) {
211         LOG(DEBUG) << part.id << " got part with old file_token";
212         return true;
213       }
214       return false;
215     }
216     default:
217       UNREACHABLE();
218   }
219 
220   return false;
221 }
222 
start_part(Part part,int32 part_count,int64 streaming_offset)223 Result<std::pair<NetQueryPtr, bool>> FileDownloader::start_part(Part part, int32 part_count, int64 streaming_offset) {
224   if (encryption_key_.is_secret()) {
225     part.size = (part.size + 15) & ~15;  // fix for last part
226   }
227   // auto size = part.size;
228   //// sometimes we can ask more than server has, just to check size
229   // if (size < get_part_size()) {
230   // size = min(size + 16, get_part_size());
231   // LOG(INFO) << "Ask " << size << " instead of " << part.size;
232   //}
233   auto size = get_part_size();
234   CHECK(part.size <= size);
235 
236   callback_->on_start_download();
237 
238   auto net_query_type = is_small_ ? NetQuery::Type::DownloadSmall : NetQuery::Type::Download;
239   NetQueryPtr net_query;
240   if (!use_cdn_) {
241     int32 flags = 0;
242 #if !TD_EMSCRIPTEN
243     // CDN is supported, unless we use domains instead of IPs from a browser
244     if (streaming_offset == 0) {
245       flags |= telegram_api::upload_getFile::CDN_SUPPORTED_MASK;
246     }
247 #endif
248     DcId dc_id = remote_.is_web() ? G()->get_webfile_dc_id() : remote_.get_dc_id();
249     auto id = UniqueId::next(UniqueId::Type::Default, static_cast<uint8>(QueryType::Default));
250     net_query = remote_.is_web()
251                     ? G()->net_query_creator().create(
252                           id,
253                           telegram_api::upload_getWebFile(remote_.as_input_web_file_location(),
254                                                           static_cast<int32>(part.offset), static_cast<int32>(size)),
255                           dc_id, net_query_type, NetQuery::AuthFlag::On)
256                     : G()->net_query_creator().create(
257                           id,
258                           telegram_api::upload_getFile(flags, false /*ignored*/, false /*ignored*/,
259                                                        remote_.as_input_file_location(),
260                                                        static_cast<int32>(part.offset), static_cast<int32>(size)),
261                           dc_id, net_query_type, NetQuery::AuthFlag::On);
262   } else {
263     if (remote_.is_web()) {
264       return Status::Error("Can't download web file from CDN");
265     }
266     auto it = cdn_part_reupload_token_.find(part.id);
267     if (it == cdn_part_reupload_token_.end()) {
268       auto query = telegram_api::upload_getCdnFile(BufferSlice(cdn_file_token_), static_cast<int32>(part.offset),
269                                                    static_cast<int32>(size));
270       cdn_part_file_token_generation_[part.id] = cdn_file_token_generation_;
271       LOG(DEBUG) << part.id << " " << to_string(query);
272       net_query =
273           G()->net_query_creator().create(UniqueId::next(UniqueId::Type::Default, static_cast<uint8>(QueryType::CDN)),
274                                           query, cdn_dc_id_, net_query_type, NetQuery::AuthFlag::Off);
275     } else {
276       auto query = telegram_api::upload_reuploadCdnFile(BufferSlice(cdn_file_token_), BufferSlice(it->second));
277       LOG(DEBUG) << part.id << " " << to_string(query);
278       net_query = G()->net_query_creator().create(
279           UniqueId::next(UniqueId::Type::Default, static_cast<uint8>(QueryType::ReuploadCDN)), query,
280           remote_.get_dc_id(), net_query_type, NetQuery::AuthFlag::On);
281       cdn_part_reupload_token_.erase(it);
282     }
283   }
284   net_query->file_type_ = narrow_cast<int32>(remote_.file_type_);
285   return std::make_pair(std::move(net_query), false);
286 }
287 
check_net_query(NetQueryPtr & net_query)288 Status FileDownloader::check_net_query(NetQueryPtr &net_query) {
289   if (net_query->is_error()) {
290     auto error = net_query->move_as_error();
291     if (FileReferenceManager::is_file_reference_error(error)) {
292       VLOG(file_references) << "Receive " << error << " for being downloaded file";
293       error = Status::Error(error.code(),
294                             PSLICE() << error.message() << "#BASE64" << base64_encode(remote_.get_file_reference()));
295     }
296     return error;
297   }
298   return Status::OK();
299 }
300 
process_part(Part part,NetQueryPtr net_query)301 Result<size_t> FileDownloader::process_part(Part part, NetQueryPtr net_query) {
302   TRY_STATUS(check_net_query(net_query));
303 
304   BufferSlice bytes;
305   bool need_cdn_decrypt = false;
306   auto query_type = narrow_cast<QueryType>(UniqueId::extract_key(net_query->id()));
307   switch (query_type) {
308     case QueryType::Default: {
309       if (remote_.is_web()) {
310         TRY_RESULT(file, fetch_result<telegram_api::upload_getWebFile>(net_query->ok()));
311         bytes = std::move(file->bytes_);
312       } else {
313         TRY_RESULT(file_base, fetch_result<telegram_api::upload_getFile>(net_query->ok()));
314         CHECK(file_base->get_id() == telegram_api::upload_file::ID);
315         auto file = move_tl_object_as<telegram_api::upload_file>(file_base);
316         LOG(DEBUG) << part.id << " upload_getFile result " << to_string(file);
317         bytes = std::move(file->bytes_);
318       }
319       break;
320     }
321     case QueryType::CDN: {
322       TRY_RESULT(file_base, fetch_result<telegram_api::upload_getCdnFile>(net_query->ok()));
323       CHECK(file_base->get_id() == telegram_api::upload_cdnFile::ID);
324       auto file = move_tl_object_as<telegram_api::upload_cdnFile>(file_base);
325       LOG(DEBUG) << part.id << " upload_getCdnFile result " << to_string(file);
326       bytes = std::move(file->bytes_);
327       need_cdn_decrypt = true;
328       break;
329     }
330     default:
331       UNREACHABLE();
332   }
333 
334   auto padded_size = part.size;
335   if (encryption_key_.is_secret()) {
336     padded_size = (part.size + 15) & ~15;
337   }
338   if (bytes.size() > padded_size) {
339     return Status::Error("Part size is more than requested");
340   }
341   if (bytes.empty()) {
342     return 0;
343   }
344 
345   // Encryption
346   if (need_cdn_decrypt) {
347     CHECK(part.offset % 16 == 0);
348     auto offset = narrow_cast<uint32>(part.offset / 16);
349     offset =
350         ((offset & 0xff) << 24) | ((offset & 0xff00) << 8) | ((offset & 0xff0000) >> 8) | ((offset & 0xff000000) >> 24);
351 
352     AesCtrState ctr_state;
353     string iv = cdn_encryption_iv_;
354     as<uint32>(&iv[12]) = offset;
355     ctr_state.init(cdn_encryption_key_, iv);
356     ctr_state.decrypt(bytes.as_slice(), bytes.as_slice());
357   }
358   if (encryption_key_.is_secret()) {
359     LOG_CHECK(next_part_ == part.id) << tag("expected part.id", next_part_) << "!=" << tag("part.id", part.id);
360     CHECK(!next_part_stop_);
361     next_part_++;
362     if (part.size % 16 != 0) {
363       next_part_stop_ = true;
364     }
365     aes_ige_decrypt(as_slice(encryption_key_.key()), as_slice(encryption_key_.mutable_iv()), bytes.as_slice(),
366                     bytes.as_slice());
367   }
368 
369   auto slice = bytes.as_slice().substr(0, part.size);
370   TRY_STATUS(acquire_fd());
371   LOG(INFO) << "Got " << slice.size() << " bytes at offset " << part.offset << " for \"" << path_ << '"';
372   TRY_RESULT(written, fd_.pwrite(slice, part.offset));
373   LOG(INFO) << "Written " << written << " bytes";
374   // may write less than part.size, when size of downloadable file is unknown
375   if (written != slice.size()) {
376     return Status::Error("Failed to save file part to the file");
377   }
378   return written;
379 }
380 
on_progress(Progress progress)381 void FileDownloader::on_progress(Progress progress) {
382   if (progress.is_ready) {
383     // do not send partial location. will lead to wrong local_size
384     return;
385   }
386   if (progress.ready_size == 0 || path_.empty()) {
387     return;
388   }
389   if (encryption_key_.empty() || encryption_key_.is_secure()) {
390     callback_->on_partial_download(
391         PartialLocalFileLocation{remote_.file_type_, progress.part_size, path_, "", std::move(progress.ready_bitmask)},
392         progress.ready_size, progress.size);
393   } else if (encryption_key_.is_secret()) {
394     UInt256 iv;
395     if (progress.ready_part_count == next_part_) {
396       iv = encryption_key_.mutable_iv();
397     } else {
398       LOG(FATAL) << tag("ready_part_count", progress.ready_part_count) << tag("next_part", next_part_);
399     }
400     callback_->on_partial_download(PartialLocalFileLocation{remote_.file_type_, progress.part_size, path_,
401                                                             as_slice(iv).str(), std::move(progress.ready_bitmask)},
402                                    progress.ready_size, progress.size);
403   } else {
404     UNREACHABLE();
405   }
406 }
407 
get_callback()408 FileLoader::Callback *FileDownloader::get_callback() {
409   return static_cast<FileLoader::Callback *>(callback_.get());
410 }
411 
process_check_query(NetQueryPtr net_query)412 Status FileDownloader::process_check_query(NetQueryPtr net_query) {
413   has_hash_query_ = false;
414   TRY_STATUS(check_net_query(net_query));
415   TRY_RESULT(file_hashes, fetch_result<telegram_api::upload_getCdnFileHashes>(std::move(net_query)));
416   add_hash_info(file_hashes);
417   return Status::OK();
418 }
419 
check_loop(int64 checked_prefix_size,int64 ready_prefix_size,bool is_ready)420 Result<FileLoader::CheckInfo> FileDownloader::check_loop(int64 checked_prefix_size, int64 ready_prefix_size,
421                                                          bool is_ready) {
422   if (!need_check_) {
423     return CheckInfo{};
424   }
425   SCOPE_EXIT {
426     try_release_fd();
427   };
428   CheckInfo info;
429   while (checked_prefix_size < ready_prefix_size) {
430     //LOG(ERROR) << "NEED TO CHECK: " << checked_prefix_size << "->" << ready_prefix_size - checked_prefix_size;
431     HashInfo search_info;
432     search_info.offset = checked_prefix_size;
433     auto it = hash_info_.upper_bound(search_info);
434     if (it != hash_info_.begin()) {
435       --it;
436     }
437     if (it != hash_info_.end() && it->offset <= checked_prefix_size &&
438         it->offset + narrow_cast<int64>(it->size) > checked_prefix_size) {
439       int64 begin_offset = it->offset;
440       int64 end_offset = it->offset + narrow_cast<int64>(it->size);
441       if (ready_prefix_size < end_offset) {
442         if (!is_ready) {
443           break;
444         }
445         end_offset = ready_prefix_size;
446       }
447       auto size = narrow_cast<size_t>(end_offset - begin_offset);
448       auto slice = BufferSlice(size);
449       TRY_STATUS(acquire_fd());
450       TRY_RESULT(read_size, fd_.pread(slice.as_slice(), begin_offset));
451       if (size != read_size) {
452         return Status::Error("Failed to read file to check hash");
453       }
454       string hash(32, ' ');
455       sha256(slice.as_slice(), hash);
456 
457       if (hash != it->hash) {
458         if (only_check_) {
459           return Status::Error("FILE_DOWNLOAD_RESTART");
460         }
461         return Status::Error("Hash mismatch");
462       }
463 
464       checked_prefix_size = end_offset;
465       info.changed = true;
466       continue;
467     }
468     if (!has_hash_query_) {
469       has_hash_query_ = true;
470       auto query =
471           telegram_api::upload_getFileHashes(remote_.as_input_file_location(), narrow_cast<int32>(checked_prefix_size));
472       auto net_query_type = is_small_ ? NetQuery::Type::DownloadSmall : NetQuery::Type::Download;
473       auto net_query = G()->net_query_creator().create(query, remote_.get_dc_id(), net_query_type);
474       info.queries.push_back(std::move(net_query));
475       break;
476     }
477     // Should fail?
478     break;
479   }
480   info.need_check = need_check_;
481   info.checked_prefix_size = checked_prefix_size;
482   return std::move(info);
483 }
484 
add_hash_info(const std::vector<telegram_api::object_ptr<telegram_api::fileHash>> & hashes)485 void FileDownloader::add_hash_info(const std::vector<telegram_api::object_ptr<telegram_api::fileHash>> &hashes) {
486   for (auto &hash : hashes) {
487     //LOG(ERROR) << "ADD HASH " << hash->offset_ << "->" << hash->limit_;
488     HashInfo hash_info;
489     hash_info.size = hash->limit_;
490     hash_info.offset = hash->offset_;
491     hash_info.hash = hash->hash_.as_slice().str();
492     hash_info_.insert(std::move(hash_info));
493   }
494 }
495 
keep_fd_flag(bool keep_fd)496 void FileDownloader::keep_fd_flag(bool keep_fd) {
497   keep_fd_ = keep_fd;
498   try_release_fd();
499 }
500 
try_release_fd()501 void FileDownloader::try_release_fd() {
502   if (!keep_fd_ && !fd_.empty()) {
503     fd_.close();
504   }
505 }
506 
acquire_fd()507 Status FileDownloader::acquire_fd() {
508   if (fd_.empty()) {
509     if (path_.empty()) {
510       TRY_RESULT_ASSIGN(std::tie(fd_, path_), open_temp_file(remote_.file_type_));
511     } else {
512       TRY_RESULT_ASSIGN(fd_, FileFd::open(path_, (only_check_ ? 0 : FileFd::Write) | FileFd::Read));
513     }
514   }
515   return Status::OK();
516 }
517 
518 }  // namespace td
519