1 // This Source Code Form is subject to the terms of the Mozilla Public
2 // License, v. 2.0. If a copy of the MPL was not distributed with this
3 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 //
5 // Copyright © 2019 Corporation for Digital Scholarship
6 
7 use super::{Format, Mode, TestCase};
8 
9 use citeproc::prelude::*;
10 use citeproc::string_id::{Cluster as ClusterStr};
11 use citeproc_io::{Cite, Locators, Reference, Suppression, SmartString};
12 
13 use lazy_static::lazy_static;
14 use std::mem;
15 use std::str::FromStr;
16 
17 /// Techincally reference IDs are allowed to be numbers.
get_ref_id<'de, D>(d: D) -> Result<String, D::Error> where D: Deserializer<'de>,18 fn get_ref_id<'de, D>(d: D) -> Result<String, D::Error>
19 where
20     D: Deserializer<'de>,
21 {
22     use citeproc_io::NumberLike;
23     let s = NumberLike::deserialize(d)?;
24     Ok(s.into_string())
25 }
26 
27 #[derive(Deserialize, Clone, Debug, PartialEq)]
28 #[serde(untagged)]
29 pub enum CitationItem {
30     Array(Vec<CiteprocJsCite>),
31     Map { cites: Vec<CiteprocJsCite> },
32 }
33 
34 impl CitationItem {
to_note_cluster(self, index: u32) -> ClusterStr<Markup>35     pub fn to_note_cluster(self, index: u32) -> ClusterStr<Markup> {
36         let v = match self {
37             CitationItem::Array(v) => v,
38             CitationItem::Map { cites } => cites,
39         };
40         let cites = v.iter().map(CiteprocJsCite::to_cite).collect();
41         ClusterStr {
42             id: index.to_string().into(),
43             cites,
44         }
45     }
46 }
47 
48 #[derive(Deserialize, Clone, Debug, PartialEq)]
49 #[serde(rename_all = "kebab-case")]
50 pub struct CiteprocJsCite {
51     #[serde(deserialize_with = "get_ref_id")]
52     id: String,
53 
54     #[serde(default, flatten)]
55     locators: Option<Locators>,
56 
57     #[serde(default)]
58     prefix: Option<String>,
59     #[serde(default)]
60     suffix: Option<String>,
61     #[serde(default)]
62     suppress_author: bool,
63     #[serde(default)]
64     author_only: bool,
65 }
66 
67 impl CiteprocJsCite {
to_cite(&self) -> Cite<Markup>68     fn to_cite(&self) -> Cite<Markup> {
69         Cite {
70             ref_id: csl::Atom::from(self.id.as_str()),
71             prefix: self.prefix.as_ref().map(SmartString::from),
72             suffix: self.suffix.as_ref().map(SmartString::from),
73             locators: self.locators.clone(),
74             suppression: match (self.suppress_author, self.author_only) {
75                 (false, true) => Some(Suppression::InText),
76                 (true, false) => Some(Suppression::Rest),
77                 (false, false) => None,
78                 _ => panic!("multiple citation modes passed to CiteprocJsCite"),
79             },
80         }
81     }
82 }
83 
84 #[derive(Debug, PartialEq)]
85 enum ResultKind {
86     Dots,
87     Arrows,
88 }
89 #[derive(Debug, PartialEq)]
90 pub struct CiteResult {
91     kind: ResultKind,
92     // id: u32,
93     note: ClusterNumber,
94     text: String,
95 }
96 #[derive(Debug, PartialEq)]
97 pub struct Results(pub Vec<CiteResult>);
98 
99 impl Results {
output_independent(&self) -> String100     pub fn output_independent(&self) -> String {
101         let mut output = String::new();
102         for (n, res) in self.0.iter().enumerate() {
103             // Whether or not something is recomputed is not part of the CSL spec. We will simply
104             // ignore this.
105             // output.push_str(if res.kind == ResultKind::Arrows {
106             //     ">>"
107             // } else {
108             //     ".."
109             // });
110             output.push_str("[");
111             output.push_str(&format!("{}", n));
112             output.push_str("] ");
113             output.push_str(&res.text);
114             output.push_str("\n");
115         }
116         output
117     }
118 }
119 
120 impl FromStr for Results {
121     type Err = ();
from_str(s: &str) -> Result<Self, Self::Err>122     fn from_str(s: &str) -> Result<Self, Self::Err> {
123         use nom::{
124             branch::alt,
125             bytes::complete::{tag, take_until},
126             character::complete::{char, digit1},
127             combinator::map,
128             multi::separated_list1,
129             sequence::{delimited, preceded, tuple},
130             IResult,
131         };
132         fn dots(inp: &str) -> IResult<&str, ResultKind> {
133             map(alt((tag(".."), tag(">>"))), |s| match s {
134                 ".." => ResultKind::Dots,
135                 ">>" => ResultKind::Arrows,
136                 _ => unreachable!(),
137             })(inp)
138         }
139         fn num(inp: &str) -> IResult<&str, u32> {
140             map(delimited(char('['), digit1, char(']')), |ds: &str| {
141                 u32::from_str(ds).unwrap()
142             })(inp)
143         }
144         fn formatted(inp: &str) -> IResult<&str, &str> {
145             preceded(char(' '), take_until("\n"))(inp)
146         }
147         fn total(inp: &str) -> IResult<&str, CiteResult> {
148             map(tuple((dots, num, formatted)), |(k, n, f)| CiteResult {
149                 kind: k,
150                 // id: n,
151                 // incorrect, but we don't actually know except by looking at the instructions what
152                 // the right note number is
153                 note: ClusterNumber::Note(IntraNote::Single(n)),
154                 text: crate::normalise_html(&f),
155             })(inp)
156         }
157         fn whole_thing(inp: &str) -> IResult<&str, Vec<CiteResult>> {
158             separated_list1(char('\n'), total)(inp)
159         }
160         Ok(Results(whole_thing(s).unwrap().1))
161     }
162 }
163 
164 use serde::de::{Deserialize, Deserializer};
165 
166 pub enum InstructionMode {
167     Composite,
168     AuthorOnly,
169     SuppressAuthor,
170 }
171 
172 impl<'de> Deserialize<'de> for InstructionMode {
deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de>,173     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
174     where
175         D: Deserializer<'de>,
176     {
177         let s = String::deserialize(deserializer)?;
178         Ok(match s.as_str() {
179             "author-only" => InstructionMode::AuthorOnly,
180             "composite" => InstructionMode::Composite,
181             "suppress-author" => InstructionMode::SuppressAuthor,
182             _ => panic!("unrecognized instruction mode"),
183         })
184     }
185 }
186 
187 #[derive(Deserialize, Debug, Clone, PartialEq)]
188 #[serde(tag = "mode", rename = "kebab-case")]
189 pub enum ModeProperties {
190     Composite {
191         #[serde(default)]
192         infix: String,
193     },
194     AuthorOnly,
195     SuppressAuthor,
196 }
197 
198 #[derive(Deserialize, Debug, PartialEq, Clone)]
199 #[serde(rename_all = "camelCase")]
200 struct Properties {
201     #[serde(rename = "noteIndex", alias = "note")]
202     note_index: u32,
203     #[serde(default, flatten)]
204     mode: Option<ModeProperties>,
205 }
206 
207 #[derive(Deserialize, Debug, PartialEq, Clone)]
208 pub struct ClusterInstruction {
209     #[serde(rename = "citationID", alias = "id")]
210     cluster_id: SmartString,
211     #[serde(rename = "citationItems", alias = "cites")]
212     citation_items: Vec<CiteprocJsCite>,
213     properties: Properties,
214 }
215 
216 #[derive(Deserialize, Debug, PartialEq, Clone)]
217 pub struct PrePost(SmartString, u32);
218 
219 #[derive(Deserialize, Debug, PartialEq, Clone)]
220 pub struct CiteprocJsInstruction {
221     cluster: ClusterInstruction,
222     pre: Vec<PrePost>,
223     post: Vec<PrePost>,
224 }
225 
226 #[derive(Deserialize, Debug, PartialEq, Clone)]
227 #[serde(untagged)]
228 pub enum Instruction2 {
229     Map(CiteprocJsInstruction),
230     Array(ClusterInstruction, Vec<PrePost>, Vec<PrePost>),
231 }
232 
233 impl From<Instruction2> for CiteprocJsInstruction {
from(other: Instruction2) -> Self234     fn from(other: Instruction2) -> Self {
235         match other {
236             Instruction2::Map(i) => i,
237             Instruction2::Array(cluster, pre, post) => CiteprocJsInstruction { cluster, pre, post },
238         }
239     }
240 }
241 
242 use std::collections::HashMap;
243 
244 pub struct JsExecutor<'a> {
245     current_note_numbers: HashMap<ClusterId, ClusterNumber>,
246     proc: &'a mut Processor,
247 }
248 
249 impl JsExecutor<'_> {
new<'a>(proc: &'a mut Processor) -> JsExecutor<'a>250     pub fn new<'a>(proc: &'a mut Processor) -> JsExecutor<'a> {
251         JsExecutor {
252             current_note_numbers: HashMap::new(),
253             proc,
254         }
255     }
get_id(&mut self, string_id: &str) -> ClusterId256     fn get_id(&mut self, string_id: &str) -> ClusterId {
257         self.proc.new_cluster(string_id)
258     }
259 
get_results(&self) -> Results260     pub fn get_results(&self) -> Results {
261         let updates = self.proc.batched_updates();
262         let mut mod_clusters = HashMap::new();
263         let mut results = Vec::<CiteResult>::new();
264         for (id, text) in updates.clusters {
265             mod_clusters.insert(id, true);
266             let &note = self.current_note_numbers.get(&id).unwrap();
267             let text = (*text).clone();
268             results.push(CiteResult {
269                 kind: ResultKind::Arrows,
270                 // id,
271                 note,
272                 text: crate::normalise_html(&text),
273             })
274         }
275         // for &id in self.current_note_numbers.keys() {
276         //     if mod_clusters.contains_key(&id) {
277         //         continue;
278         //     }
279         //     let &note = self.current_note_numbers.get(&id).unwrap();
280         //     if let Some(text) = self.proc.get_cluster(id) {
281         //         results.push(CiteResult {
282         //             kind: ResultKind::Dots,
283         //             id,
284         //             note,
285         //             text: crate::normalise_html(&text),
286         //         })
287         //     }
288         // }
289         results.sort_by_key(|x| x.note);
290         Results(results)
291     }
292 
to_renumbering(&mut self, renum: &mut Vec<ClusterPosition>, prepost: &[PrePost])293     fn to_renumbering(&mut self, renum: &mut Vec<ClusterPosition>, prepost: &[PrePost]) {
294         for &PrePost(ref string_id, note_number) in prepost.iter() {
295             let id = self.get_id(string_id);
296             let note = if note_number == 0 {
297                 None
298             } else {
299                 Some(note_number)
300             };
301             renum.push(ClusterPosition { id, note })
302         }
303     }
304 
execute(&mut self, instructions: &[CiteprocJsInstruction])305     pub fn execute(&mut self, instructions: &[CiteprocJsInstruction]) {
306         self.proc.drain();
307         let mut renum = Vec::new();
308         for CiteprocJsInstruction { cluster, pre, post } in instructions {
309             let id = &cluster.cluster_id;
310             let note = cluster.properties.note_index;
311 
312             let mut cites = Vec::new();
313             for cite_item in cluster.citation_items.iter() {
314                 cites.push(cite_item.to_cite());
315             }
316 
317             renum.clear();
318             self.to_renumbering(&mut renum, pre);
319             self.to_renumbering(&mut renum, &[PrePost(cluster.cluster_id.clone(), note)]);
320             self.to_renumbering(&mut renum, post);
321             self.proc.insert_cites_str(id, &cites);
322             self.proc.set_cluster_order(&renum).unwrap();
323             for &ClusterPosition { id, .. } in &renum {
324                 if let Some(actual_note) = self.proc.get_cluster_note_number(id) {
325                     self.current_note_numbers.insert(id, actual_note);
326                 }
327             }
328         }
329     }
330 }
331 
332 enum Chunk {
333     // Required sections
334     Mode(String),
335 
336     /// Interpretation depends on which mode you're using
337     ///
338     /// https://github.com/citation-style-language/test-suite#result
339     Result(String),
340 
341     /// XML CSL style
342     ///
343     /// https://github.com/citation-style-language/test-suite#csl
344     Csl(String),
345 
346     /// JSON Reference[] list
347     ///
348     /// https://github.com/citation-style-language/test-suite#input
349     Input(String),
350 
351     // Optional sections
352     /// JSON LIST of LISTS of bibliography entries as item IDs
353     ///
354     /// https://github.com/citation-style-language/test-suite#bibentries
355     BibEntries(String),
356     /// JSON input to bibliography mode for limiting bib output
357     ///
358     /// https://github.com/citation-style-language/test-suite#bibsection
359     BibSection(String),
360     /// JSON list of lists of cites (ie Cluster[].map(cl => cl.cites))
361     ///
362     /// https://github.com/citation-style-language/test-suite#citation-items
363     CitationItems(String),
364     /// JSON list of lists of objects that represent calls to processCitationCluster
365     ///
366     /// https://github.com/citation-style-language/test-suite#citations
367     Citations(String),
368 }
369 
370 // fn format_human_test(test_case: &TestCase) -> String {
371 //     let mut out = String::new();
372 //     out += ">>===== MODE =====>>";
373 //     out += match test_case.mode {
374 //         Mode::Citation => "citation",
375 //         Mode::Bibliography => "bibliography",
376 //     };
377 //     out += "<<===== MODE =====<<";
378 //     out += ">>===== INPUT =====>>";
379 //     // out += &serde_json::to_string_pretty(&test_case.input).unwrap();
380 //     out += "<<===== INPUT =====<<";
381 //     out
382 // }
383 
parse_human_test(contents: &str) -> TestCase384 pub fn parse_human_test(contents: &str) -> TestCase {
385     use regex::Regex;
386     lazy_static! {
387         static ref BEGIN: Regex = Regex::new(r">>=+ ([A-Z\-]+) =+>>").unwrap();
388     }
389     lazy_static! {
390         static ref END: Regex = Regex::new(r"<<=+ ([A-Z\-]+) =+<<").unwrap();
391     }
392     let mut state = None;
393     let mut chunks = vec![];
394     // some of the files use two or four equals signs, most use five.
395     for line in contents.lines() {
396         if END.is_match(line) {
397             if state.is_some() {
398                 let mut chunk = None;
399                 mem::swap(&mut state, &mut chunk);
400                 chunks.push(chunk.unwrap());
401             }
402         } else if let Some(caps) = BEGIN.captures(line) {
403             state = match caps.get(1).unwrap().as_str() {
404                 "MODE" => Some(Chunk::Mode(String::new())),
405                 "CSL" => Some(Chunk::Csl(String::new())),
406                 "INPUT" => Some(Chunk::Input(String::new())),
407                 "RESULT" => Some(Chunk::Result(String::new())),
408                 "BIBENTRIES" => Some(Chunk::BibEntries(String::new())),
409                 "BIBSECTION" => Some(Chunk::BibSection(String::new())),
410                 "CITATION-ITEMS" => Some(Chunk::CitationItems(String::new())),
411                 "CITATIONS" => Some(Chunk::Citations(String::new())),
412                 x => panic!("unrecognized block: {}", x),
413             }
414         } else {
415             if let Some(ref mut state) = state {
416                 match state {
417                     Chunk::Mode(ref mut s)
418                     | Chunk::Csl(ref mut s)
419                     | Chunk::Input(ref mut s)
420                     | Chunk::Result(ref mut s)
421                     | Chunk::BibSection(ref mut s)
422                     | Chunk::BibEntries(ref mut s)
423                     | Chunk::CitationItems(ref mut s)
424                     | Chunk::Citations(ref mut s) => {
425                         if !s.is_empty() {
426                             s.push_str("\n");
427                         }
428                         s.push_str(line);
429                     }
430                 }
431             }
432             // otherwise, it's a comment
433         }
434     }
435 
436     let mut mode = None;
437     let mut csl = None;
438     let mut input: Option<Vec<Reference>> = None;
439     let mut result = None;
440 
441     // TODO
442     let mut bib_entries = None;
443     let mut bib_section = None;
444     let mut citation_items = None;
445     let mut process_citation_clusters: Option<Vec<Instruction2>> = None;
446 
447     for chunk in chunks {
448         match chunk {
449             Chunk::Mode(m) => {
450                 mode = mode.or_else(|| match m.as_str() {
451                     "citation" => Some((Mode::Citation, SupportedFormat::TestHtml, false)),
452                     "bibliography" => Some((Mode::Bibliography, SupportedFormat::TestHtml, false)),
453                     "bibliography-nosort" => Some((Mode::Bibliography, SupportedFormat::TestHtml, true)),
454                     "citation-rtf" => Some((Mode::Citation, SupportedFormat::Rtf, false)),
455                     "bibliography-rtf" => Some((Mode::Bibliography, SupportedFormat::Rtf, false)),
456                     _ => panic!("unknown mode {}", m),
457                 })
458             }
459             Chunk::Csl(s) => csl = csl.or_else(|| Some(s)),
460             Chunk::Input(s) => {
461                 input = input.or_else(|| {
462                     Some(
463                         serde_json::from_str(&s)
464                             .expect("could not parse references in INPUT section"),
465                     )
466                 })
467             }
468             Chunk::Result(s) => result = result.or_else(|| Some(s)),
469             Chunk::BibEntries(s) => bib_entries = bib_entries.or_else(|| Some(s)),
470             Chunk::BibSection(s) => bib_section = bib_section.or_else(|| Some(s)),
471             Chunk::CitationItems(s) => {
472                 citation_items = citation_items.or_else(|| {
473                     Some(serde_json::from_str(&s).expect("could not parse CITATION-ITEMS"))
474                 })
475             }
476             Chunk::Citations(s) => {
477                 process_citation_clusters = process_citation_clusters
478                     .or_else(|| Some(serde_json::from_str(&s).expect("could not parse CITATIONS")))
479             }
480         }
481     }
482 
483     TestCase::new(
484         mode.map(|(m, _, _)| m).unwrap_or(Mode::Citation),
485         mode.map(|(_, f, _)| Format(f))
486             .unwrap_or(Format(SupportedFormat::TestHtml)),
487         mode.map_or(false, |(_, _, nosort)| nosort),
488         csl.expect("test case without a CSL section"),
489         input.expect("test case without an INPUT section"),
490         result
491             .map(|x| crate::normalise_html(&x))
492             .expect("test case without a RESULT section"),
493         citation_items.map(|items: Vec<CitationItem>| {
494             items
495                 .into_iter()
496                 .enumerate()
497                 .map(|(n, c_item): (usize, CitationItem)| c_item.to_note_cluster(n as u32 + 1u32))
498                 .collect()
499         }),
500         process_citation_clusters.map(|inst2s| {
501             inst2s
502                 .into_iter()
503                 .map(|x| CiteprocJsInstruction::from(x))
504                 .collect()
505         }),
506     )
507 }
508