1 use std::borrow::Cow;
2 use std::fs;
3 use std::io;
4 use std::path::{Path, PathBuf};
5 use std::str;
6 use std::thread;
7 use std::time;
8 
9 use csv;
10 use docopt::Docopt;
11 use num_cpus;
12 use serde::de::{Deserializer, Deserialize, DeserializeOwned, Error};
13 
14 use CliResult;
15 use config::{Config, Delimiter};
16 
num_cpus() -> usize17 pub fn num_cpus() -> usize {
18     num_cpus::get()
19 }
20 
version() -> String21 pub fn version() -> String {
22     let (maj, min, pat) = (
23         option_env!("CARGO_PKG_VERSION_MAJOR"),
24         option_env!("CARGO_PKG_VERSION_MINOR"),
25         option_env!("CARGO_PKG_VERSION_PATCH"),
26     );
27     match (maj, min, pat) {
28         (Some(maj), Some(min), Some(pat)) =>
29             format!("{}.{}.{}", maj, min, pat),
30         _ => "".to_owned(),
31     }
32 }
33 
get_args<T>(usage: &str, argv: &[&str]) -> CliResult<T> where T: DeserializeOwned34 pub fn get_args<T>(usage: &str, argv: &[&str]) -> CliResult<T>
35         where T: DeserializeOwned {
36     Docopt::new(usage)
37            .and_then(|d| d.argv(argv.iter().map(|&x| x))
38                           .version(Some(version()))
39                           .deserialize())
40            .map_err(From::from)
41 }
42 
many_configs(inps: &[String], delim: Option<Delimiter>, no_headers: bool) -> Result<Vec<Config>, String>43 pub fn many_configs(inps: &[String], delim: Option<Delimiter>,
44                     no_headers: bool) -> Result<Vec<Config>, String> {
45     let mut inps = inps.to_vec();
46     if inps.is_empty() {
47         inps.push("-".to_owned()); // stdin
48     }
49     let confs = inps.into_iter()
50                     .map(|p| Config::new(&Some(p))
51                                     .delimiter(delim)
52                                     .no_headers(no_headers))
53                     .collect::<Vec<_>>();
54     errif_greater_one_stdin(&*confs)?;
55     Ok(confs)
56 }
57 
errif_greater_one_stdin(inps: &[Config]) -> Result<(), String>58 pub fn errif_greater_one_stdin(inps: &[Config]) -> Result<(), String> {
59     let nstd = inps.iter().filter(|inp| inp.is_std()).count();
60     if nstd > 1 {
61         return Err("At most one <stdin> input is allowed.".to_owned());
62     }
63     Ok(())
64 }
65 
chunk_size(nitems: usize, njobs: usize) -> usize66 pub fn chunk_size(nitems: usize, njobs: usize) -> usize {
67     if nitems < njobs {
68         nitems
69     } else {
70         nitems / njobs
71     }
72 }
73 
num_of_chunks(nitems: usize, chunk_size: usize) -> usize74 pub fn num_of_chunks(nitems: usize, chunk_size: usize) -> usize {
75     if chunk_size == 0 {
76         return nitems;
77     }
78     let mut n = nitems / chunk_size;
79     if nitems % chunk_size != 0 {
80         n += 1;
81     }
82     n
83 }
84 
last_modified(md: &fs::Metadata) -> u6485 pub fn last_modified(md: &fs::Metadata) -> u64 {
86     use filetime::FileTime;
87     FileTime::from_last_modification_time(md).seconds_relative_to_1970()
88 }
89 
condense<'a>(val: Cow<'a, [u8]>, n: Option<usize>) -> Cow<'a, [u8]>90 pub fn condense<'a>(val: Cow<'a, [u8]>, n: Option<usize>) -> Cow<'a, [u8]> {
91     match n {
92         None => val,
93         Some(n) => {
94             let mut is_short_utf8 = false;
95             if let Ok(s) = str::from_utf8(&*val) {
96                 if n >= s.chars().count() {
97                     is_short_utf8 = true;
98                 } else {
99                     let mut s = s.chars().take(n).collect::<String>();
100                     s.push_str("...");
101                     return Cow::Owned(s.into_bytes());
102                 }
103             }
104             if is_short_utf8 || n >= (*val).len() { // already short enough
105                 val
106             } else {
107                 // This is a non-Unicode string, so we just trim on bytes.
108                 let mut s = val[0..n].to_vec();
109                 s.extend(b"...".iter().cloned());
110                 Cow::Owned(s)
111             }
112         }
113     }
114 }
115 
idx_path(csv_path: &Path) -> PathBuf116 pub fn idx_path(csv_path: &Path) -> PathBuf {
117     let mut p = csv_path.to_path_buf().into_os_string().into_string().unwrap();
118     p.push_str(".idx");
119     PathBuf::from(&p)
120 }
121 
122 pub type Idx = Option<usize>;
123 
range(start: Idx, end: Idx, len: Idx, index: Idx) -> Result<(usize, usize), String>124 pub fn range(start: Idx, end: Idx, len: Idx, index: Idx)
125             -> Result<(usize, usize), String> {
126     match (start, end, len, index) {
127         (None, None, None, Some(i)) => Ok((i, i+1)),
128         (_, _, _, Some(_)) =>
129             Err("--index cannot be used with --start, --end or --len".to_owned()),
130         (_, Some(_), Some(_), None) =>
131             Err("--end and --len cannot be used at the same time.".to_owned()),
132         (_, None, None, None) => Ok((start.unwrap_or(0), ::std::usize::MAX)),
133         (_, Some(e), None, None) => {
134             let s = start.unwrap_or(0);
135             if s > e {
136                 Err(format!("The end of the range ({}) must be greater than or\n\
137                              equal to the start of the range ({}).", e, s))
138             } else {
139                 Ok((s, e))
140             }
141         }
142         (_, None, Some(l), None) => {
143             let s = start.unwrap_or(0);
144             Ok((s, s + l))
145         }
146     }
147 }
148 
149 /// Create a directory recursively, avoiding the race conditons fixed by
150 /// https://github.com/rust-lang/rust/pull/39799.
create_dir_all_threadsafe(path: &Path) -> io::Result<()>151 fn create_dir_all_threadsafe(path: &Path) -> io::Result<()> {
152     // Try 20 times. This shouldn't theoretically need to be any larger
153     // than the number of nested directories we need to create.
154     for _ in 0..20 {
155         match fs::create_dir_all(path) {
156             // This happens if a directory in `path` doesn't exist when we
157             // test for it, and another thread creates it before we can.
158             Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => {},
159             other => return other,
160         }
161         // We probably don't need to sleep at all, because the intermediate
162         // directory is already created.  But let's attempt to back off a
163         // bit and let the other thread finish.
164         thread::sleep(time::Duration::from_millis(25));
165     }
166     // Try one last time, returning whatever happens.
167     fs::create_dir_all(path)
168 }
169 
170 /// Represents a filename template of the form `"{}.csv"`, where `"{}"` is
171 /// the splace to insert the part of the filename generated by `xsv`.
172 #[derive(Clone, Debug)]
173 pub struct FilenameTemplate {
174     prefix: String,
175     suffix: String,
176 }
177 
178 impl FilenameTemplate {
179     /// Generate a new filename using `unique_value` to replace the `"{}"`
180     /// in the template.
filename(&self, unique_value: &str) -> String181     pub fn filename(&self, unique_value: &str) -> String {
182         format!("{}{}{}", &self.prefix, unique_value, &self.suffix)
183     }
184 
185     /// Create a new, writable file in directory `path` with a filename
186     /// using `unique_value` to replace the `"{}"` in the template.  Note
187     /// that we do not output headers; the caller must do that if
188     /// desired.
writer<P>(&self, path: P, unique_value: &str) -> io::Result<csv::Writer<Box<io::Write+'static>>> where P: AsRef<Path>189     pub fn writer<P>(&self, path: P, unique_value: &str)
190                  -> io::Result<csv::Writer<Box<io::Write+'static>>>
191         where P: AsRef<Path>
192     {
193         let filename = self.filename(unique_value);
194         let full_path = path.as_ref().join(filename);
195         if let Some(parent) = full_path.parent() {
196             // We may be called concurrently, especially by parallel `xsv
197             // split`, so be careful to avoid the `create_dir_all` race
198             // condition.
199             create_dir_all_threadsafe(parent)?;
200         }
201         let spath = Some(full_path.display().to_string());
202         Config::new(&spath).writer()
203     }
204 }
205 
206 impl<'de> Deserialize<'de> for FilenameTemplate {
deserialize<D: Deserializer<'de>>( d: D, ) -> Result<FilenameTemplate, D::Error>207     fn deserialize<D: Deserializer<'de>>(
208         d: D,
209     ) -> Result<FilenameTemplate, D::Error> {
210         let raw = String::deserialize(d)?;
211         let chunks = raw.split("{}").collect::<Vec<_>>();
212         if chunks.len() == 2 {
213             Ok(FilenameTemplate {
214                 prefix: chunks[0].to_owned(),
215                 suffix: chunks[1].to_owned(),
216             })
217         } else {
218             Err(D::Error::custom(
219                 "The --filename argument must contain one '{}'."))
220         }
221     }
222 }
223