1 // This Source Code Form is subject to the terms of the Mozilla Public
2 // License, v. 2.0. If a copy of the MPL was not distributed with this
3 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 //
5 // Copyright © 2018 Corporation for Digital Scholarship
6 
7 use cfg_if::cfg_if;
8 cfg_if! {
9     if #[cfg(feature="jemalloc")] {
10         use jemallocator::Jemalloc;
11         #[global_allocator]
12         static A: Jemalloc = Jemalloc;
13     } else {
14         use std::alloc::System;
15         #[global_allocator]
16         static A: System = System;
17     }
18 }
19 
20 use citeproc::input::Reference;
21 use clap::{App, Arg, SubCommand};
22 use directories::ProjectDirs;
23 use std::fs;
24 use std::path::PathBuf;
25 use std::str::FromStr;
26 use std::sync::Arc;
27 
28 mod error;
29 mod pandoc;
30 use pandoc_types::definition::{Inline, MetaValue, Pandoc as PandocDocument};
31 
32 use citeproc::{LocaleFetchError, LocaleFetcher, Processor};
33 use csl::{Lang, Locale};
34 
main()35 fn main() {
36     // heuristically determine if we're running as an external pandoc filter
37     // TODO: work out earliest pandoc that sets PANDOC_VERSION
38     let not_a_tty = !atty::is(atty::Stream::Stdin) && !atty::is(atty::Stream::Stdout);
39     if std::env::var("PANDOC_VERSION").is_ok() && not_a_tty {
40         do_pandoc();
41         return;
42     }
43 
44     let matches = App::new("citeproc")
45         .version("0.0.0")
46         .author("Cormac Relf")
47         .about("Processes citations")
48         .subcommand(
49             SubCommand::with_name("parse-locale")
50                 .about("Parses a locale file (without performing fallback)")
51                 .arg(
52                     Arg::with_name("lang")
53                         .short("l")
54                         .long("lang")
55                         .takes_value(true),
56                 ),
57         )
58         .subcommand(SubCommand::with_name("pandoc").about(
59             "Force Pandoc JSON filter mode. Operates on stdin > stdout.\
60              \nNormally, you can just use `pandoc -F citeproc-rs`.",
61         ))
62         .subcommand(
63             SubCommand::with_name("disamb-index")
64                 .about("Prints the inverted disambiguation index for the reference library"),
65         )
66         // .arg(
67         //     Arg::with_name("format")
68         //         .short("f")
69         //         .long("format")
70         //         .value_name("FORMAT")
71         //         .takes_value(true),
72         // )
73         .arg(
74             Arg::with_name("library")
75                 .short("l")
76                 .long("library")
77                 .value_name("FILE.json")
78                 .help("A CSL-JSON file")
79                 .takes_value(true),
80         )
81         .arg(
82             Arg::with_name("csl")
83                 .short("c")
84                 .long("csl")
85                 .value_name("FILE")
86                 .help("A CSL style")
87                 .takes_value(true),
88         )
89         .arg(
90             Arg::with_name("key")
91                 .short("k")
92                 .long("key")
93                 .value_name("CITEKEY")
94                 .help("Run against a specific citekey")
95                 .takes_value(true),
96         )
97         .arg(
98             Arg::with_name("locales-dir")
99                 .long("locales-dir")
100                 .value_name("DIR")
101                 .help("Directory with locales-xx-XX.xml files in it")
102                 .takes_value(true),
103         )
104         .get_matches();
105 
106     let lib_text = r#"
107     [
108         {
109             "id": "quagmire2018",
110             "type": "legal_case",
111             "volume": "4",
112             "edition": "128th & 7-9, 17th",
113             "page": "1-5",
114             "container-title": "TASCC",
115             "title": "Solomon v Garrity",
116             "author": [
117                 {"family": "Beethoven", "dropping-particle": "van", "given": "Ludwig"}
118             ],
119             "editor": [
120                 {"family": "Paul", "given": "John", "suffix": "II"},
121                 {"family": "Mozart", "given": "Wolfgang Amadeus"},
122                 {"family": "Beethoven", "dropping-particle": "van", "given": "Ludwig"}
123             ],
124             "issued": {"raw": "1995-03-01"}
125         }
126     ]
127     "#;
128 
129     let filesystem_fetcher = {
130         let locales_dir = matches
131             .value_of("locales-dir")
132             .map(PathBuf::from)
133             .unwrap_or_else(|| {
134                 let pd = ProjectDirs::from("net", "cormacrelf", "citeproc-rs")
135                     .expect("No home directory found.");
136                 let mut locales_dir = pd.cache_dir().to_owned();
137                 locales_dir.push("locales");
138                 locales_dir
139             });
140         if matches.subcommand_matches("parse-locale").is_some() {
141             let locales_dir = locales_dir.clone();
142             dbg!(locales_dir);
143         }
144         Arc::new(Filesystem::new(locales_dir))
145     };
146 
147     if let Some(matches) = matches.subcommand_matches("parse-locale") {
148         let lang = if let Some(lan) = matches.value_of("lang") {
149             if let Ok(l) = Lang::from_str(lan) {
150                 l
151             } else {
152                 eprintln!(
153                     "`{}` is not a valid language",
154                     matches.value_of("lang").unwrap_or("")
155                 );
156                 return;
157             }
158         } else {
159             Lang::en_us()
160         };
161         fn fetch_cli(fetcher: &Filesystem, lang: &Lang) -> Option<Locale> {
162             let string = match fetcher.fetch_string(lang) {
163                 Ok(opt) => opt?,
164                 Err(e) => panic!("failed to read locale file, exiting\n{:?}", e),
165             };
166             let with_errors = |s: &str| Ok(Locale::from_str(s)?);
167             match with_errors(&string) {
168                 Ok(l) => Some(l),
169                 Err(e) => {
170                     self::error::file_diagnostics(&e, "input", &string);
171                     None
172                 }
173             }
174         }
175         let locale = fetch_cli(&filesystem_fetcher, &lang);
176         dbg!(locale);
177         return;
178     }
179 
180     // if let Some(_) = matches.subcommand_matches("disamb-index") {
181     //     let mut db = Processor::new(filesystem_fetcher);
182     //     db.insert_references(refs);
183     //     for (tok, ids) in db.inverted_index().iter() {
184     //         // if ids.len() > 1 {
185     //         let token = tok.clone();
186     //         let citekeys: Vec<_> = ids.iter().map(|atom| atom.to_string()).collect();
187     //         dbg!((token, citekeys));
188     //         // }
189     //     }
190     //     return;
191     // }
192 
193     if let Some(csl_path) = matches.value_of("csl") {
194         let key = matches
195             .value_of("key")
196             .map(citeproc::Atom::from)
197             .unwrap_or("quagmire2018".into());
198 
199         let text = fs::read_to_string(&csl_path).expect("No CSL file found at that path");
200 
201         match Processor::new(&text, filesystem_fetcher) {
202             Ok(mut db) => {
203                 let refs = if let Some(library_path) = matches.value_of("library") {
204                     expect_refs(library_path)
205                 } else {
206                     serde_json::from_str(&lib_text).expect("sample lib_text not parseable")
207                 };
208 
209                 db.init_clusters(vec![citeproc::input::Cluster {
210                     id: 0,
211                     note_number: 1,
212                     cites: vec![citeproc::input::Cite::basic(key)],
213                 }]);
214                 db.inssert_references(refs);
215 
216                 let inlines = db.get_cluster(0).to_vec();
217 
218                 use pandoc_types::definition::{Block, Meta, Pandoc};
219                 let doc = Pandoc(Meta::null(), vec![Block::Para(inlines)]);
220                 let out = serde_json::to_string(&doc).unwrap();
221                 println!("{}", out);
222             }
223             Err(e) => {
224                 self::error::file_diagnostics(&e, &csl_path, &text);
225             }
226         }
227     }
228 }
229 
pandoc_meta_str<'a>(doc: &'a PandocDocument, key: &str) -> Option<&'a str>230 fn pandoc_meta_str<'a>(doc: &'a PandocDocument, key: &str) -> Option<&'a str> {
231     doc.0.lookup(key).and_then(|value| match value {
232         // for metadata passed through the command line
233         // --metadata csl=my-style.csl
234         MetaValue::MetaString(s) => Some(s.as_str()),
235         MetaValue::MetaInlines(inlines) => match &inlines[..] {
236             // for inline paths with no spaces (otherwise they get split with
237             // Inline::Space)
238             // csl: "my-style.csl"
239             &[Inline::Str(ref s)] => Some(s.as_str()),
240             // for inline paths with spaces
241             // csl: "`my style.csl`{=raw}"
242             &[Inline::RawInline(_, ref s)] => Some(s.as_str()),
243             _ => None,
244         },
245         _ => None,
246     })
247 }
248 
do_pandoc()249 fn do_pandoc() {
250     let filter_args = App::new("pandoc_filter")
251         .arg(Arg::with_name("output_format").required(false).index(1))
252         .get_matches();
253     let _output_format = filter_args.value_of("output_format").unwrap_or("none");
254     let input = std::io::stdin();
255     // already LineWriter buffered, but we're only writing one line of JSON so not a problem
256     let output = std::io::stdout();
257 
258     let mut doc: PandocDocument =
259         serde_json::from_reader(input).expect("could not parse pandoc json");
260 
261     let csl_path = pandoc_meta_str(&doc, "csl").expect("No csl path provided through metadata");
262     let text = fs::read_to_string(&csl_path).expect("No CSL file found at that path");
263 
264     match Processor::new(&text, Arc::new(Filesystem::default())) {
265         Ok(mut db) => {
266             if let Some(library_path) = pandoc_meta_str(&doc, "bibliography") {
267                 db.reset_references(expect_refs(library_path));
268             }
269             db.init_clusters(pandoc::get_clusters(&mut doc));
270             db.compute();
271             pandoc::write_clusters(&mut doc, &db);
272             serde_json::to_writer(output, &doc).expect("could not write pandoc json");
273         }
274         Err(e) => {
275             self::error::file_diagnostics(&e, &csl_path, &text);
276         }
277     }
278 }
279 
280 pub struct Filesystem {
281     root: PathBuf,
282 }
283 
284 impl Default for Filesystem {
default() -> Self285     fn default() -> Self {
286         let locales_dir = None
287             // TODO: read metadata
288             .unwrap_or_else(|| {
289                 let pd = ProjectDirs::from("net", "cormacrelf", "citeproc-rs")
290                     .expect("No home directory found.");
291                 let mut locales_dir = pd.cache_dir().to_owned();
292                 locales_dir.push("locales");
293                 locales_dir
294             });
295         Filesystem::new(locales_dir)
296     }
297 }
298 
299 impl Filesystem {
new(repo_dir: impl Into<PathBuf>) -> Self300     pub fn new(repo_dir: impl Into<PathBuf>) -> Self {
301         Filesystem {
302             root: repo_dir.into(),
303         }
304     }
305 }
306 
307 use std::io;
308 
309 impl LocaleFetcher for Filesystem {
fetch_string(&self, lang: &Lang) -> Result<Option<String>, LocaleFetchError>310     fn fetch_string(&self, lang: &Lang) -> Result<Option<String>, LocaleFetchError> {
311         let mut path = self.root.clone();
312         path.push(&format!("locales-{}.xml", lang));
313         let read = fs::read_to_string(path);
314         match read {
315             Ok(string) => Ok(Some(string)),
316             Err(e) => match e.kind() {
317                 io::ErrorKind::NotFound => Ok(None),
318                 _ => Err(LocaleFetchError::Io(e)),
319             },
320         }
321     }
322 }
323 
expect_refs(library_path: &str) -> Vec<Reference>324 fn expect_refs(library_path: &str) -> Vec<Reference> {
325     use std::fs::File;
326     use std::io::BufReader;
327     let file = File::open(&library_path).expect("No library found at that path");
328     let reader = BufReader::new(file);
329     serde_json::from_reader(reader).expect("Could not parse JSON")
330 }
331