1 pub mod decompress;
2 pub mod ffmpeg;
3 pub mod pandoc;
4 pub mod pdfpages;
5 pub mod poppler;
6 pub mod spawning;
7 pub mod sqlite;
8 pub mod tar;
9 pub mod tesseract;
10 pub mod zip;
11 use crate::matching::*;
12 use crate::preproc::PreprocConfig;
13 use failure::*;
14 use log::*;
15 use regex::Regex;
16 use std::borrow::Cow;
17 use std::collections::HashMap;
18 use std::io::prelude::*;
19 use std::iter::Iterator;
20 use std::path::Path;
21 use std::rc::Rc;
22 
23 pub struct AdapterMeta {
24     /// unique short name of this adapter (a-z0-9 only)
25     pub name: String,
26     /// version identifier. used to key cache entries, change if your output format changes
27     pub version: i32,
28     pub description: String,
29     /// indicates whether this adapter can descend (=call rga_preproc again). if true, the cache key needs to include the list of active adapters
30     pub recurses: bool,
31     /// list of matchers (interpreted as a OR b OR ...)
32     pub fast_matchers: Vec<FastMatcher>,
33     /// list of matchers when we have mime type detection active (interpreted as ORed)
34     /// warning: this *overrides* the fast matchers
35     pub slow_matchers: Option<Vec<SlowMatcher>>,
36 }
37 impl AdapterMeta {
38     // todo: this is pretty ugly
get_matchers<'a>( &'a self, slow: bool, ) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a>39     pub fn get_matchers<'a>(
40         &'a self,
41         slow: bool,
42     ) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a> {
43         match (slow, &self.slow_matchers) {
44             (true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))),
45             (_, _) => Box::new(
46                 self.fast_matchers
47                     .iter()
48                     .map(|e| Cow::Owned(SlowMatcher::Fast(e.clone()))),
49             ),
50         }
51     }
52 }
53 
54 pub trait GetMetadata {
metadata(&self) -> &AdapterMeta55     fn metadata(&self) -> &AdapterMeta;
56 }
57 pub trait FileAdapter: GetMetadata {
58     /// adapt a file.
59     ///
60     /// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
adapt(&self, a: AdaptInfo, detection_reason: &SlowMatcher) -> Fallible<()>61     fn adapt(&self, a: AdaptInfo, detection_reason: &SlowMatcher) -> Fallible<()>;
62 }
63 pub struct AdaptInfo<'a> {
64     /// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
65     pub filepath_hint: &'a Path,
66     /// true if filepath_hint is an actual file on the file system
67     pub is_real_file: bool,
68     /// depth at which this file is in archives. 0 for real filesystem
69     pub archive_recursion_depth: i32,
70     /// stream to read the file from. can be from a file or from some decoder
71     pub inp: &'a mut dyn Read,
72     /// stream to write to. will be written to from a different thread
73     pub oup: &'a mut (dyn Write + Send),
74     /// prefix every output line with this string to better indicate the file's location if it is in some archive
75     pub line_prefix: &'a str,
76     pub config: PreprocConfig<'a>,
77 }
78 
79 /// (enabledAdapters, disabledAdapters)
80 type AdaptersTuple = (Vec<Rc<dyn FileAdapter>>, Vec<Rc<dyn FileAdapter>>);
81 
get_all_adapters() -> AdaptersTuple82 pub fn get_all_adapters() -> AdaptersTuple {
83     // order in descending priority
84     let enabled_adapters: Vec<Rc<dyn FileAdapter>> = vec![
85         Rc::new(ffmpeg::FFmpegAdapter::new()),
86         Rc::new(pandoc::PandocAdapter::new()),
87         Rc::new(poppler::PopplerAdapter::new()),
88         Rc::new(zip::ZipAdapter::new()),
89         Rc::new(decompress::DecompressAdapter::new()),
90         Rc::new(tar::TarAdapter::new()),
91         Rc::new(sqlite::SqliteAdapter::new()),
92     ];
93     let disabled_adapters: Vec<Rc<dyn FileAdapter>> = vec![
94         Rc::new(pdfpages::PdfPagesAdapter::new()),
95         Rc::new(tesseract::TesseractAdapter::new()),
96     ];
97     (enabled_adapters, disabled_adapters)
98 }
99 
100 /**
101  * filter adapters by given names:
102  *
103  *  - "" means use default enabled adapter list
104  *  - "a,b" means use adapters a,b
105  *  - "-a,b" means use default list except for a and b
106  *  - "+a,b" means use default list but also a and b (a,b will be prepended to the list so given higher priority)
107  */
get_adapters_filtered<T: AsRef<str>>( adapter_names: &[T], ) -> Fallible<Vec<Rc<dyn FileAdapter>>>108 pub fn get_adapters_filtered<T: AsRef<str>>(
109     adapter_names: &[T],
110 ) -> Fallible<Vec<Rc<dyn FileAdapter>>> {
111     let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters();
112     let adapters = if !adapter_names.is_empty() {
113         let adapters_map: HashMap<_, _> = def_enabled_adapters
114             .iter()
115             .chain(def_disabled_adapters.iter())
116             .map(|e| (e.metadata().name.clone(), e.clone()))
117             .collect();
118         let mut adapters = vec![];
119         let mut subtractive = false;
120         let mut additive = false;
121         for (i, name) in adapter_names.iter().enumerate() {
122             let mut name = name.as_ref();
123             if i == 0 && (name.starts_with('-')) {
124                 subtractive = true;
125                 name = &name[1..];
126                 adapters = def_enabled_adapters.clone();
127             } else if i == 0 && (name.starts_with('+')) {
128                 name = &name[1..];
129                 adapters = def_enabled_adapters.clone();
130                 additive = true;
131             }
132             if subtractive {
133                 let inx = adapters
134                     .iter()
135                     .position(|a| a.metadata().name == name)
136                     .ok_or_else(|| format_err!("Could not remove {}: Not in list", name))?;
137                 adapters.remove(inx);
138             } else {
139                 let adapter = adapters_map
140                     .get(name)
141                     .ok_or_else(|| format_err!("Unknown adapter: \"{}\"", name))?
142                     .clone();
143                 if additive {
144                     adapters.insert(0, adapter);
145                 } else {
146                     adapters.push(adapter);
147                 }
148             }
149         }
150         adapters
151     } else {
152         def_enabled_adapters
153     };
154     debug!(
155         "Chosen adapters: {}",
156         adapters
157             .iter()
158             .map(|a| a.metadata().name.clone())
159             .collect::<Vec<String>>()
160             .join(",")
161     );
162     Ok(adapters)
163 }
164