1 // This Source Code Form is subject to the terms of the Mozilla Public
2 // License, v. 2.0. If a copy of the MPL was not distributed with this
3 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 //
5 // Copyright © 2018 Corporation for Digital Scholarship
6
7 use cfg_if::cfg_if;
8 cfg_if! {
9 if #[cfg(feature="jemalloc")] {
10 use jemallocator::Jemalloc;
11 #[global_allocator]
12 static A: Jemalloc = Jemalloc;
13 } else {
14 use std::alloc::System;
15 #[global_allocator]
16 static A: System = System;
17 }
18 }
19
20 use citeproc::input::Reference;
21 use clap::{App, Arg, SubCommand};
22 use directories::ProjectDirs;
23 use std::fs;
24 use std::path::PathBuf;
25 use std::str::FromStr;
26 use std::sync::Arc;
27
28 mod error;
29 mod pandoc;
30 use pandoc_types::definition::{Inline, MetaValue, Pandoc as PandocDocument};
31
32 use citeproc::{LocaleFetchError, LocaleFetcher, Processor};
33 use csl::{Lang, Locale};
34
main()35 fn main() {
36 // heuristically determine if we're running as an external pandoc filter
37 // TODO: work out earliest pandoc that sets PANDOC_VERSION
38 let not_a_tty = !atty::is(atty::Stream::Stdin) && !atty::is(atty::Stream::Stdout);
39 if std::env::var("PANDOC_VERSION").is_ok() && not_a_tty {
40 do_pandoc();
41 return;
42 }
43
44 let matches = App::new("citeproc")
45 .version("0.0.0")
46 .author("Cormac Relf")
47 .about("Processes citations")
48 .subcommand(
49 SubCommand::with_name("parse-locale")
50 .about("Parses a locale file (without performing fallback)")
51 .arg(
52 Arg::with_name("lang")
53 .short("l")
54 .long("lang")
55 .takes_value(true),
56 ),
57 )
58 .subcommand(SubCommand::with_name("pandoc").about(
59 "Force Pandoc JSON filter mode. Operates on stdin > stdout.\
60 \nNormally, you can just use `pandoc -F citeproc-rs`.",
61 ))
62 .subcommand(
63 SubCommand::with_name("disamb-index")
64 .about("Prints the inverted disambiguation index for the reference library"),
65 )
66 // .arg(
67 // Arg::with_name("format")
68 // .short("f")
69 // .long("format")
70 // .value_name("FORMAT")
71 // .takes_value(true),
72 // )
73 .arg(
74 Arg::with_name("library")
75 .short("l")
76 .long("library")
77 .value_name("FILE.json")
78 .help("A CSL-JSON file")
79 .takes_value(true),
80 )
81 .arg(
82 Arg::with_name("csl")
83 .short("c")
84 .long("csl")
85 .value_name("FILE")
86 .help("A CSL style")
87 .takes_value(true),
88 )
89 .arg(
90 Arg::with_name("key")
91 .short("k")
92 .long("key")
93 .value_name("CITEKEY")
94 .help("Run against a specific citekey")
95 .takes_value(true),
96 )
97 .arg(
98 Arg::with_name("locales-dir")
99 .long("locales-dir")
100 .value_name("DIR")
101 .help("Directory with locales-xx-XX.xml files in it")
102 .takes_value(true),
103 )
104 .get_matches();
105
106 let lib_text = r#"
107 [
108 {
109 "id": "quagmire2018",
110 "type": "legal_case",
111 "volume": "4",
112 "edition": "128th & 7-9, 17th",
113 "page": "1-5",
114 "container-title": "TASCC",
115 "title": "Solomon v Garrity",
116 "author": [
117 {"family": "Beethoven", "dropping-particle": "van", "given": "Ludwig"}
118 ],
119 "editor": [
120 {"family": "Paul", "given": "John", "suffix": "II"},
121 {"family": "Mozart", "given": "Wolfgang Amadeus"},
122 {"family": "Beethoven", "dropping-particle": "van", "given": "Ludwig"}
123 ],
124 "issued": {"raw": "1995-03-01"}
125 }
126 ]
127 "#;
128
129 let filesystem_fetcher = {
130 let locales_dir = matches
131 .value_of("locales-dir")
132 .map(PathBuf::from)
133 .unwrap_or_else(|| {
134 let pd = ProjectDirs::from("net", "cormacrelf", "citeproc-rs")
135 .expect("No home directory found.");
136 let mut locales_dir = pd.cache_dir().to_owned();
137 locales_dir.push("locales");
138 locales_dir
139 });
140 if matches.subcommand_matches("parse-locale").is_some() {
141 let locales_dir = locales_dir.clone();
142 dbg!(locales_dir);
143 }
144 Arc::new(Filesystem::new(locales_dir))
145 };
146
147 if let Some(matches) = matches.subcommand_matches("parse-locale") {
148 let lang = if let Some(lan) = matches.value_of("lang") {
149 if let Ok(l) = Lang::from_str(lan) {
150 l
151 } else {
152 eprintln!(
153 "`{}` is not a valid language",
154 matches.value_of("lang").unwrap_or("")
155 );
156 return;
157 }
158 } else {
159 Lang::en_us()
160 };
161 fn fetch_cli(fetcher: &Filesystem, lang: &Lang) -> Option<Locale> {
162 let string = match fetcher.fetch_string(lang) {
163 Ok(opt) => opt?,
164 Err(e) => panic!("failed to read locale file, exiting\n{:?}", e),
165 };
166 let with_errors = |s: &str| Ok(Locale::from_str(s)?);
167 match with_errors(&string) {
168 Ok(l) => Some(l),
169 Err(e) => {
170 self::error::file_diagnostics(&e, "input", &string);
171 None
172 }
173 }
174 }
175 let locale = fetch_cli(&filesystem_fetcher, &lang);
176 dbg!(locale);
177 return;
178 }
179
180 // if let Some(_) = matches.subcommand_matches("disamb-index") {
181 // let mut db = Processor::new(filesystem_fetcher);
182 // db.insert_references(refs);
183 // for (tok, ids) in db.inverted_index().iter() {
184 // // if ids.len() > 1 {
185 // let token = tok.clone();
186 // let citekeys: Vec<_> = ids.iter().map(|atom| atom.to_string()).collect();
187 // dbg!((token, citekeys));
188 // // }
189 // }
190 // return;
191 // }
192
193 if let Some(csl_path) = matches.value_of("csl") {
194 let key = matches
195 .value_of("key")
196 .map(citeproc::Atom::from)
197 .unwrap_or("quagmire2018".into());
198
199 let text = fs::read_to_string(&csl_path).expect("No CSL file found at that path");
200
201 match Processor::new(&text, filesystem_fetcher) {
202 Ok(mut db) => {
203 let refs = if let Some(library_path) = matches.value_of("library") {
204 expect_refs(library_path)
205 } else {
206 serde_json::from_str(&lib_text).expect("sample lib_text not parseable")
207 };
208
209 db.init_clusters(vec![citeproc::input::Cluster {
210 id: 0,
211 note_number: 1,
212 cites: vec![citeproc::input::Cite::basic(key)],
213 }]);
214 db.inssert_references(refs);
215
216 let inlines = db.get_cluster(0).to_vec();
217
218 use pandoc_types::definition::{Block, Meta, Pandoc};
219 let doc = Pandoc(Meta::null(), vec![Block::Para(inlines)]);
220 let out = serde_json::to_string(&doc).unwrap();
221 println!("{}", out);
222 }
223 Err(e) => {
224 self::error::file_diagnostics(&e, &csl_path, &text);
225 }
226 }
227 }
228 }
229
pandoc_meta_str<'a>(doc: &'a PandocDocument, key: &str) -> Option<&'a str>230 fn pandoc_meta_str<'a>(doc: &'a PandocDocument, key: &str) -> Option<&'a str> {
231 doc.0.lookup(key).and_then(|value| match value {
232 // for metadata passed through the command line
233 // --metadata csl=my-style.csl
234 MetaValue::MetaString(s) => Some(s.as_str()),
235 MetaValue::MetaInlines(inlines) => match &inlines[..] {
236 // for inline paths with no spaces (otherwise they get split with
237 // Inline::Space)
238 // csl: "my-style.csl"
239 &[Inline::Str(ref s)] => Some(s.as_str()),
240 // for inline paths with spaces
241 // csl: "`my style.csl`{=raw}"
242 &[Inline::RawInline(_, ref s)] => Some(s.as_str()),
243 _ => None,
244 },
245 _ => None,
246 })
247 }
248
do_pandoc()249 fn do_pandoc() {
250 let filter_args = App::new("pandoc_filter")
251 .arg(Arg::with_name("output_format").required(false).index(1))
252 .get_matches();
253 let _output_format = filter_args.value_of("output_format").unwrap_or("none");
254 let input = std::io::stdin();
255 // already LineWriter buffered, but we're only writing one line of JSON so not a problem
256 let output = std::io::stdout();
257
258 let mut doc: PandocDocument =
259 serde_json::from_reader(input).expect("could not parse pandoc json");
260
261 let csl_path = pandoc_meta_str(&doc, "csl").expect("No csl path provided through metadata");
262 let text = fs::read_to_string(&csl_path).expect("No CSL file found at that path");
263
264 match Processor::new(&text, Arc::new(Filesystem::default())) {
265 Ok(mut db) => {
266 if let Some(library_path) = pandoc_meta_str(&doc, "bibliography") {
267 db.reset_references(expect_refs(library_path));
268 }
269 db.init_clusters(pandoc::get_clusters(&mut doc));
270 db.compute();
271 pandoc::write_clusters(&mut doc, &db);
272 serde_json::to_writer(output, &doc).expect("could not write pandoc json");
273 }
274 Err(e) => {
275 self::error::file_diagnostics(&e, &csl_path, &text);
276 }
277 }
278 }
279
280 pub struct Filesystem {
281 root: PathBuf,
282 }
283
284 impl Default for Filesystem {
default() -> Self285 fn default() -> Self {
286 let locales_dir = None
287 // TODO: read metadata
288 .unwrap_or_else(|| {
289 let pd = ProjectDirs::from("net", "cormacrelf", "citeproc-rs")
290 .expect("No home directory found.");
291 let mut locales_dir = pd.cache_dir().to_owned();
292 locales_dir.push("locales");
293 locales_dir
294 });
295 Filesystem::new(locales_dir)
296 }
297 }
298
299 impl Filesystem {
new(repo_dir: impl Into<PathBuf>) -> Self300 pub fn new(repo_dir: impl Into<PathBuf>) -> Self {
301 Filesystem {
302 root: repo_dir.into(),
303 }
304 }
305 }
306
307 use std::io;
308
309 impl LocaleFetcher for Filesystem {
fetch_string(&self, lang: &Lang) -> Result<Option<String>, LocaleFetchError>310 fn fetch_string(&self, lang: &Lang) -> Result<Option<String>, LocaleFetchError> {
311 let mut path = self.root.clone();
312 path.push(&format!("locales-{}.xml", lang));
313 let read = fs::read_to_string(path);
314 match read {
315 Ok(string) => Ok(Some(string)),
316 Err(e) => match e.kind() {
317 io::ErrorKind::NotFound => Ok(None),
318 _ => Err(LocaleFetchError::Io(e)),
319 },
320 }
321 }
322 }
323
expect_refs(library_path: &str) -> Vec<Reference>324 fn expect_refs(library_path: &str) -> Vec<Reference> {
325 use std::fs::File;
326 use std::io::BufReader;
327 let file = File::open(&library_path).expect("No library found at that path");
328 let reader = BufReader::new(file);
329 serde_json::from_reader(reader).expect("Could not parse JSON")
330 }
331