1 // This Source Code Form is subject to the terms of the Mozilla Public
2 // License, v. 2.0. If a copy of the MPL was not distributed with this
3 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 //
5 // Copyright © 2019 Corporation for Digital Scholarship
6
7 use super::{Format, Mode, TestCase};
8
9 use citeproc::prelude::*;
10 use citeproc::string_id::{Cluster as ClusterStr};
11 use citeproc_io::{Cite, Locators, Reference, Suppression, SmartString};
12
13 use lazy_static::lazy_static;
14 use std::mem;
15 use std::str::FromStr;
16
17 /// Techincally reference IDs are allowed to be numbers.
get_ref_id<'de, D>(d: D) -> Result<String, D::Error> where D: Deserializer<'de>,18 fn get_ref_id<'de, D>(d: D) -> Result<String, D::Error>
19 where
20 D: Deserializer<'de>,
21 {
22 use citeproc_io::NumberLike;
23 let s = NumberLike::deserialize(d)?;
24 Ok(s.into_string())
25 }
26
27 #[derive(Deserialize, Clone, Debug, PartialEq)]
28 #[serde(untagged)]
29 pub enum CitationItem {
30 Array(Vec<CiteprocJsCite>),
31 Map { cites: Vec<CiteprocJsCite> },
32 }
33
34 impl CitationItem {
to_note_cluster(self, index: u32) -> ClusterStr<Markup>35 pub fn to_note_cluster(self, index: u32) -> ClusterStr<Markup> {
36 let v = match self {
37 CitationItem::Array(v) => v,
38 CitationItem::Map { cites } => cites,
39 };
40 let cites = v.iter().map(CiteprocJsCite::to_cite).collect();
41 ClusterStr {
42 id: index.to_string().into(),
43 cites,
44 }
45 }
46 }
47
48 #[derive(Deserialize, Clone, Debug, PartialEq)]
49 #[serde(rename_all = "kebab-case")]
50 pub struct CiteprocJsCite {
51 #[serde(deserialize_with = "get_ref_id")]
52 id: String,
53
54 #[serde(default, flatten)]
55 locators: Option<Locators>,
56
57 #[serde(default)]
58 prefix: Option<String>,
59 #[serde(default)]
60 suffix: Option<String>,
61 #[serde(default)]
62 suppress_author: bool,
63 #[serde(default)]
64 author_only: bool,
65 }
66
67 impl CiteprocJsCite {
to_cite(&self) -> Cite<Markup>68 fn to_cite(&self) -> Cite<Markup> {
69 Cite {
70 ref_id: csl::Atom::from(self.id.as_str()),
71 prefix: self.prefix.as_ref().map(SmartString::from),
72 suffix: self.suffix.as_ref().map(SmartString::from),
73 locators: self.locators.clone(),
74 suppression: match (self.suppress_author, self.author_only) {
75 (false, true) => Some(Suppression::InText),
76 (true, false) => Some(Suppression::Rest),
77 (false, false) => None,
78 _ => panic!("multiple citation modes passed to CiteprocJsCite"),
79 },
80 }
81 }
82 }
83
84 #[derive(Debug, PartialEq)]
85 enum ResultKind {
86 Dots,
87 Arrows,
88 }
89 #[derive(Debug, PartialEq)]
90 pub struct CiteResult {
91 kind: ResultKind,
92 // id: u32,
93 note: ClusterNumber,
94 text: String,
95 }
96 #[derive(Debug, PartialEq)]
97 pub struct Results(pub Vec<CiteResult>);
98
99 impl Results {
output_independent(&self) -> String100 pub fn output_independent(&self) -> String {
101 let mut output = String::new();
102 for (n, res) in self.0.iter().enumerate() {
103 // Whether or not something is recomputed is not part of the CSL spec. We will simply
104 // ignore this.
105 // output.push_str(if res.kind == ResultKind::Arrows {
106 // ">>"
107 // } else {
108 // ".."
109 // });
110 output.push_str("[");
111 output.push_str(&format!("{}", n));
112 output.push_str("] ");
113 output.push_str(&res.text);
114 output.push_str("\n");
115 }
116 output
117 }
118 }
119
120 impl FromStr for Results {
121 type Err = ();
from_str(s: &str) -> Result<Self, Self::Err>122 fn from_str(s: &str) -> Result<Self, Self::Err> {
123 use nom::{
124 branch::alt,
125 bytes::complete::{tag, take_until},
126 character::complete::{char, digit1},
127 combinator::map,
128 multi::separated_list1,
129 sequence::{delimited, preceded, tuple},
130 IResult,
131 };
132 fn dots(inp: &str) -> IResult<&str, ResultKind> {
133 map(alt((tag(".."), tag(">>"))), |s| match s {
134 ".." => ResultKind::Dots,
135 ">>" => ResultKind::Arrows,
136 _ => unreachable!(),
137 })(inp)
138 }
139 fn num(inp: &str) -> IResult<&str, u32> {
140 map(delimited(char('['), digit1, char(']')), |ds: &str| {
141 u32::from_str(ds).unwrap()
142 })(inp)
143 }
144 fn formatted(inp: &str) -> IResult<&str, &str> {
145 preceded(char(' '), take_until("\n"))(inp)
146 }
147 fn total(inp: &str) -> IResult<&str, CiteResult> {
148 map(tuple((dots, num, formatted)), |(k, n, f)| CiteResult {
149 kind: k,
150 // id: n,
151 // incorrect, but we don't actually know except by looking at the instructions what
152 // the right note number is
153 note: ClusterNumber::Note(IntraNote::Single(n)),
154 text: crate::normalise_html(&f),
155 })(inp)
156 }
157 fn whole_thing(inp: &str) -> IResult<&str, Vec<CiteResult>> {
158 separated_list1(char('\n'), total)(inp)
159 }
160 Ok(Results(whole_thing(s).unwrap().1))
161 }
162 }
163
164 use serde::de::{Deserialize, Deserializer};
165
166 pub enum InstructionMode {
167 Composite,
168 AuthorOnly,
169 SuppressAuthor,
170 }
171
172 impl<'de> Deserialize<'de> for InstructionMode {
deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de>,173 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
174 where
175 D: Deserializer<'de>,
176 {
177 let s = String::deserialize(deserializer)?;
178 Ok(match s.as_str() {
179 "author-only" => InstructionMode::AuthorOnly,
180 "composite" => InstructionMode::Composite,
181 "suppress-author" => InstructionMode::SuppressAuthor,
182 _ => panic!("unrecognized instruction mode"),
183 })
184 }
185 }
186
187 #[derive(Deserialize, Debug, Clone, PartialEq)]
188 #[serde(tag = "mode", rename = "kebab-case")]
189 pub enum ModeProperties {
190 Composite {
191 #[serde(default)]
192 infix: String,
193 },
194 AuthorOnly,
195 SuppressAuthor,
196 }
197
198 #[derive(Deserialize, Debug, PartialEq, Clone)]
199 #[serde(rename_all = "camelCase")]
200 struct Properties {
201 #[serde(rename = "noteIndex", alias = "note")]
202 note_index: u32,
203 #[serde(default, flatten)]
204 mode: Option<ModeProperties>,
205 }
206
207 #[derive(Deserialize, Debug, PartialEq, Clone)]
208 pub struct ClusterInstruction {
209 #[serde(rename = "citationID", alias = "id")]
210 cluster_id: SmartString,
211 #[serde(rename = "citationItems", alias = "cites")]
212 citation_items: Vec<CiteprocJsCite>,
213 properties: Properties,
214 }
215
216 #[derive(Deserialize, Debug, PartialEq, Clone)]
217 pub struct PrePost(SmartString, u32);
218
219 #[derive(Deserialize, Debug, PartialEq, Clone)]
220 pub struct CiteprocJsInstruction {
221 cluster: ClusterInstruction,
222 pre: Vec<PrePost>,
223 post: Vec<PrePost>,
224 }
225
226 #[derive(Deserialize, Debug, PartialEq, Clone)]
227 #[serde(untagged)]
228 pub enum Instruction2 {
229 Map(CiteprocJsInstruction),
230 Array(ClusterInstruction, Vec<PrePost>, Vec<PrePost>),
231 }
232
233 impl From<Instruction2> for CiteprocJsInstruction {
from(other: Instruction2) -> Self234 fn from(other: Instruction2) -> Self {
235 match other {
236 Instruction2::Map(i) => i,
237 Instruction2::Array(cluster, pre, post) => CiteprocJsInstruction { cluster, pre, post },
238 }
239 }
240 }
241
242 use std::collections::HashMap;
243
244 pub struct JsExecutor<'a> {
245 current_note_numbers: HashMap<ClusterId, ClusterNumber>,
246 proc: &'a mut Processor,
247 }
248
249 impl JsExecutor<'_> {
new<'a>(proc: &'a mut Processor) -> JsExecutor<'a>250 pub fn new<'a>(proc: &'a mut Processor) -> JsExecutor<'a> {
251 JsExecutor {
252 current_note_numbers: HashMap::new(),
253 proc,
254 }
255 }
get_id(&mut self, string_id: &str) -> ClusterId256 fn get_id(&mut self, string_id: &str) -> ClusterId {
257 self.proc.new_cluster(string_id)
258 }
259
get_results(&self) -> Results260 pub fn get_results(&self) -> Results {
261 let updates = self.proc.batched_updates();
262 let mut mod_clusters = HashMap::new();
263 let mut results = Vec::<CiteResult>::new();
264 for (id, text) in updates.clusters {
265 mod_clusters.insert(id, true);
266 let ¬e = self.current_note_numbers.get(&id).unwrap();
267 let text = (*text).clone();
268 results.push(CiteResult {
269 kind: ResultKind::Arrows,
270 // id,
271 note,
272 text: crate::normalise_html(&text),
273 })
274 }
275 // for &id in self.current_note_numbers.keys() {
276 // if mod_clusters.contains_key(&id) {
277 // continue;
278 // }
279 // let ¬e = self.current_note_numbers.get(&id).unwrap();
280 // if let Some(text) = self.proc.get_cluster(id) {
281 // results.push(CiteResult {
282 // kind: ResultKind::Dots,
283 // id,
284 // note,
285 // text: crate::normalise_html(&text),
286 // })
287 // }
288 // }
289 results.sort_by_key(|x| x.note);
290 Results(results)
291 }
292
to_renumbering(&mut self, renum: &mut Vec<ClusterPosition>, prepost: &[PrePost])293 fn to_renumbering(&mut self, renum: &mut Vec<ClusterPosition>, prepost: &[PrePost]) {
294 for &PrePost(ref string_id, note_number) in prepost.iter() {
295 let id = self.get_id(string_id);
296 let note = if note_number == 0 {
297 None
298 } else {
299 Some(note_number)
300 };
301 renum.push(ClusterPosition { id, note })
302 }
303 }
304
execute(&mut self, instructions: &[CiteprocJsInstruction])305 pub fn execute(&mut self, instructions: &[CiteprocJsInstruction]) {
306 self.proc.drain();
307 let mut renum = Vec::new();
308 for CiteprocJsInstruction { cluster, pre, post } in instructions {
309 let id = &cluster.cluster_id;
310 let note = cluster.properties.note_index;
311
312 let mut cites = Vec::new();
313 for cite_item in cluster.citation_items.iter() {
314 cites.push(cite_item.to_cite());
315 }
316
317 renum.clear();
318 self.to_renumbering(&mut renum, pre);
319 self.to_renumbering(&mut renum, &[PrePost(cluster.cluster_id.clone(), note)]);
320 self.to_renumbering(&mut renum, post);
321 self.proc.insert_cites_str(id, &cites);
322 self.proc.set_cluster_order(&renum).unwrap();
323 for &ClusterPosition { id, .. } in &renum {
324 if let Some(actual_note) = self.proc.get_cluster_note_number(id) {
325 self.current_note_numbers.insert(id, actual_note);
326 }
327 }
328 }
329 }
330 }
331
332 enum Chunk {
333 // Required sections
334 Mode(String),
335
336 /// Interpretation depends on which mode you're using
337 ///
338 /// https://github.com/citation-style-language/test-suite#result
339 Result(String),
340
341 /// XML CSL style
342 ///
343 /// https://github.com/citation-style-language/test-suite#csl
344 Csl(String),
345
346 /// JSON Reference[] list
347 ///
348 /// https://github.com/citation-style-language/test-suite#input
349 Input(String),
350
351 // Optional sections
352 /// JSON LIST of LISTS of bibliography entries as item IDs
353 ///
354 /// https://github.com/citation-style-language/test-suite#bibentries
355 BibEntries(String),
356 /// JSON input to bibliography mode for limiting bib output
357 ///
358 /// https://github.com/citation-style-language/test-suite#bibsection
359 BibSection(String),
360 /// JSON list of lists of cites (ie Cluster[].map(cl => cl.cites))
361 ///
362 /// https://github.com/citation-style-language/test-suite#citation-items
363 CitationItems(String),
364 /// JSON list of lists of objects that represent calls to processCitationCluster
365 ///
366 /// https://github.com/citation-style-language/test-suite#citations
367 Citations(String),
368 }
369
370 // fn format_human_test(test_case: &TestCase) -> String {
371 // let mut out = String::new();
372 // out += ">>===== MODE =====>>";
373 // out += match test_case.mode {
374 // Mode::Citation => "citation",
375 // Mode::Bibliography => "bibliography",
376 // };
377 // out += "<<===== MODE =====<<";
378 // out += ">>===== INPUT =====>>";
379 // // out += &serde_json::to_string_pretty(&test_case.input).unwrap();
380 // out += "<<===== INPUT =====<<";
381 // out
382 // }
383
parse_human_test(contents: &str) -> TestCase384 pub fn parse_human_test(contents: &str) -> TestCase {
385 use regex::Regex;
386 lazy_static! {
387 static ref BEGIN: Regex = Regex::new(r">>=+ ([A-Z\-]+) =+>>").unwrap();
388 }
389 lazy_static! {
390 static ref END: Regex = Regex::new(r"<<=+ ([A-Z\-]+) =+<<").unwrap();
391 }
392 let mut state = None;
393 let mut chunks = vec![];
394 // some of the files use two or four equals signs, most use five.
395 for line in contents.lines() {
396 if END.is_match(line) {
397 if state.is_some() {
398 let mut chunk = None;
399 mem::swap(&mut state, &mut chunk);
400 chunks.push(chunk.unwrap());
401 }
402 } else if let Some(caps) = BEGIN.captures(line) {
403 state = match caps.get(1).unwrap().as_str() {
404 "MODE" => Some(Chunk::Mode(String::new())),
405 "CSL" => Some(Chunk::Csl(String::new())),
406 "INPUT" => Some(Chunk::Input(String::new())),
407 "RESULT" => Some(Chunk::Result(String::new())),
408 "BIBENTRIES" => Some(Chunk::BibEntries(String::new())),
409 "BIBSECTION" => Some(Chunk::BibSection(String::new())),
410 "CITATION-ITEMS" => Some(Chunk::CitationItems(String::new())),
411 "CITATIONS" => Some(Chunk::Citations(String::new())),
412 x => panic!("unrecognized block: {}", x),
413 }
414 } else {
415 if let Some(ref mut state) = state {
416 match state {
417 Chunk::Mode(ref mut s)
418 | Chunk::Csl(ref mut s)
419 | Chunk::Input(ref mut s)
420 | Chunk::Result(ref mut s)
421 | Chunk::BibSection(ref mut s)
422 | Chunk::BibEntries(ref mut s)
423 | Chunk::CitationItems(ref mut s)
424 | Chunk::Citations(ref mut s) => {
425 if !s.is_empty() {
426 s.push_str("\n");
427 }
428 s.push_str(line);
429 }
430 }
431 }
432 // otherwise, it's a comment
433 }
434 }
435
436 let mut mode = None;
437 let mut csl = None;
438 let mut input: Option<Vec<Reference>> = None;
439 let mut result = None;
440
441 // TODO
442 let mut bib_entries = None;
443 let mut bib_section = None;
444 let mut citation_items = None;
445 let mut process_citation_clusters: Option<Vec<Instruction2>> = None;
446
447 for chunk in chunks {
448 match chunk {
449 Chunk::Mode(m) => {
450 mode = mode.or_else(|| match m.as_str() {
451 "citation" => Some((Mode::Citation, SupportedFormat::TestHtml, false)),
452 "bibliography" => Some((Mode::Bibliography, SupportedFormat::TestHtml, false)),
453 "bibliography-nosort" => Some((Mode::Bibliography, SupportedFormat::TestHtml, true)),
454 "citation-rtf" => Some((Mode::Citation, SupportedFormat::Rtf, false)),
455 "bibliography-rtf" => Some((Mode::Bibliography, SupportedFormat::Rtf, false)),
456 _ => panic!("unknown mode {}", m),
457 })
458 }
459 Chunk::Csl(s) => csl = csl.or_else(|| Some(s)),
460 Chunk::Input(s) => {
461 input = input.or_else(|| {
462 Some(
463 serde_json::from_str(&s)
464 .expect("could not parse references in INPUT section"),
465 )
466 })
467 }
468 Chunk::Result(s) => result = result.or_else(|| Some(s)),
469 Chunk::BibEntries(s) => bib_entries = bib_entries.or_else(|| Some(s)),
470 Chunk::BibSection(s) => bib_section = bib_section.or_else(|| Some(s)),
471 Chunk::CitationItems(s) => {
472 citation_items = citation_items.or_else(|| {
473 Some(serde_json::from_str(&s).expect("could not parse CITATION-ITEMS"))
474 })
475 }
476 Chunk::Citations(s) => {
477 process_citation_clusters = process_citation_clusters
478 .or_else(|| Some(serde_json::from_str(&s).expect("could not parse CITATIONS")))
479 }
480 }
481 }
482
483 TestCase::new(
484 mode.map(|(m, _, _)| m).unwrap_or(Mode::Citation),
485 mode.map(|(_, f, _)| Format(f))
486 .unwrap_or(Format(SupportedFormat::TestHtml)),
487 mode.map_or(false, |(_, _, nosort)| nosort),
488 csl.expect("test case without a CSL section"),
489 input.expect("test case without an INPUT section"),
490 result
491 .map(|x| crate::normalise_html(&x))
492 .expect("test case without a RESULT section"),
493 citation_items.map(|items: Vec<CitationItem>| {
494 items
495 .into_iter()
496 .enumerate()
497 .map(|(n, c_item): (usize, CitationItem)| c_item.to_note_cluster(n as u32 + 1u32))
498 .collect()
499 }),
500 process_citation_clusters.map(|inst2s| {
501 inst2s
502 .into_iter()
503 .map(|x| CiteprocJsInstruction::from(x))
504 .collect()
505 }),
506 )
507 }
508