1 use crate::manifest::{FileHash, Manifest};
2 use rayon::prelude::*;
3 use sha2::{Digest, Sha256};
4 use std::collections::{HashMap, HashSet};
5 use std::error::Error;
6 use std::fs::File;
7 use std::io::BufReader;
8 use std::path::{Path, PathBuf};
9 use std::sync::Mutex;
10 use std::time::Instant;
11 
12 pub(crate) struct Checksums {
13     cache_path: Option<PathBuf>,
14     collected: Mutex<HashMap<PathBuf, String>>,
15 }
16 
17 impl Checksums {
new() -> Result<Self, Box<dyn Error>>18     pub(crate) fn new() -> Result<Self, Box<dyn Error>> {
19         let cache_path = std::env::var_os("BUILD_MANIFEST_CHECKSUM_CACHE").map(PathBuf::from);
20 
21         let mut collected = HashMap::new();
22         if let Some(path) = &cache_path {
23             if path.is_file() {
24                 collected = serde_json::from_slice(&std::fs::read(path)?)?;
25             }
26         }
27 
28         Ok(Checksums { cache_path, collected: Mutex::new(collected) })
29     }
30 
store_cache(&self) -> Result<(), Box<dyn Error>>31     pub(crate) fn store_cache(&self) -> Result<(), Box<dyn Error>> {
32         if let Some(path) = &self.cache_path {
33             std::fs::write(path, &serde_json::to_vec(&self.collected)?)?;
34         }
35         Ok(())
36     }
37 
fill_missing_checksums(&mut self, manifest: &mut Manifest)38     pub(crate) fn fill_missing_checksums(&mut self, manifest: &mut Manifest) {
39         let need_checksums = self.find_missing_checksums(manifest);
40         if !need_checksums.is_empty() {
41             self.collect_checksums(&need_checksums);
42         }
43         self.replace_checksums(manifest);
44     }
45 
find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet<PathBuf>46     fn find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet<PathBuf> {
47         let collected = self.collected.lock().unwrap();
48         let mut need_checksums = HashSet::new();
49         crate::manifest::visit_file_hashes(manifest, |file_hash| {
50             if let FileHash::Missing(path) = file_hash {
51                 let path = std::fs::canonicalize(path).unwrap();
52                 if !collected.contains_key(&path) {
53                     need_checksums.insert(path);
54                 }
55             }
56         });
57         need_checksums
58     }
59 
replace_checksums(&mut self, manifest: &mut Manifest)60     fn replace_checksums(&mut self, manifest: &mut Manifest) {
61         let collected = self.collected.lock().unwrap();
62         crate::manifest::visit_file_hashes(manifest, |file_hash| {
63             if let FileHash::Missing(path) = file_hash {
64                 let path = std::fs::canonicalize(path).unwrap();
65                 match collected.get(&path) {
66                     Some(hash) => *file_hash = FileHash::Present(hash.clone()),
67                     None => panic!("missing hash for file {}", path.display()),
68                 }
69             }
70         });
71     }
72 
collect_checksums(&mut self, files: &HashSet<PathBuf>)73     fn collect_checksums(&mut self, files: &HashSet<PathBuf>) {
74         let collection_start = Instant::now();
75         println!(
76             "collecting hashes for {} tarballs across {} threads",
77             files.len(),
78             rayon::current_num_threads().min(files.len()),
79         );
80 
81         files.par_iter().for_each(|path| match hash(path) {
82             Ok(hash) => {
83                 self.collected.lock().unwrap().insert(path.clone(), hash);
84             }
85             Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
86         });
87 
88         println!("collected {} hashes in {:.2?}", files.len(), collection_start.elapsed());
89     }
90 }
91 
hash(path: &Path) -> Result<String, Box<dyn Error>>92 fn hash(path: &Path) -> Result<String, Box<dyn Error>> {
93     let mut file = BufReader::new(File::open(path)?);
94     let mut sha256 = Sha256::default();
95     std::io::copy(&mut file, &mut sha256)?;
96     Ok(hex::encode(sha256.finalize()))
97 }
98