1 use crate::disamb::names::{replace_single_child, NameIR};
2 use crate::names::NameToken;
3 use crate::prelude::*;
4 use citeproc_io::Cite;
5 use std::mem;
6 use std::sync::Arc;
7
8 /////////////////////////////////
9 // capitalize start of cluster //
10 /////////////////////////////////
11
12 impl<O: OutputFormat> IR<O> {
capitalize_first_term_of_cluster(root: NodeId, arena: &mut IrArena<O>, fmt: &O)13 pub fn capitalize_first_term_of_cluster(root: NodeId, arena: &mut IrArena<O>, fmt: &O) {
14 if let Some(node) = IR::find_term_rendered_first(root, arena) {
15 let trf = match arena.get_mut(node).unwrap().get_mut().0 {
16 IR::Rendered(Some(CiteEdgeData::Term(ref mut b)))
17 | IR::Rendered(Some(CiteEdgeData::LocatorLabel(ref mut b)))
18 | IR::Rendered(Some(CiteEdgeData::FrnnLabel(ref mut b))) => b,
19 _ => return,
20 };
21 fmt.apply_text_case(
22 trf,
23 &IngestOptions {
24 text_case: TextCase::CapitalizeFirst,
25 ..Default::default()
26 },
27 );
28 }
29 }
30 // Gotta find a a CiteEdgeData::Term/LocatorLabel/FrnnLabel
31 // (the latter two are also terms, but a different kind for disambiguation).
find_term_rendered_first(node: NodeId, arena: &IrArena<O>) -> Option<NodeId>32 fn find_term_rendered_first(node: NodeId, arena: &IrArena<O>) -> Option<NodeId> {
33 match arena.get(node)?.get().0 {
34 IR::Rendered(Some(CiteEdgeData::Term(_)))
35 | IR::Rendered(Some(CiteEdgeData::LocatorLabel(_)))
36 | IR::Rendered(Some(CiteEdgeData::FrnnLabel(_))) => Some(node),
37 IR::ConditionalDisamb(_) | IR::Seq(_) => node
38 .children(arena)
39 .next()
40 .and_then(|child| IR::find_term_rendered_first(child, arena)),
41 _ => None,
42 }
43 }
44 }
45
46 ////////////////////////
47 // second-field-align //
48 ////////////////////////
49
50 impl<O: OutputFormat> IR<O> {
51 // If returns Some(id), that ID is the new root node of the whole tree.
split_first_field(node: NodeId, arena: &mut IrArena<O>) -> Option<NodeId>52 pub fn split_first_field(node: NodeId, arena: &mut IrArena<O>) -> Option<NodeId> {
53 // Pull off the first field of self -> [first, ...rest]
54
55 if node.children(arena).take(2).count() != 2 {
56 return None;
57 }
58
59 // Steal the top seq's IrSeq configuration
60 let orig_top = if let (IR::Seq(s), gv) = arena.get_mut(node)?.get_mut() {
61 (mem::take(s), *gv)
62 } else {
63 return None;
64 };
65
66 // Detach the first child
67 let first = node.children(arena).next().unwrap();
68 first.detach(arena);
69 let rest = node;
70
71 let (afpre, afsuf) = {
72 // Keep this mutable ref inside {}
73 // Split the affixes into two sets with empty inside.
74 orig_top
75 .0
76 .affixes
77 .map(|mine| {
78 (
79 Some(Affixes {
80 prefix: mine.prefix,
81 suffix: "".into(),
82 }),
83 Some(Affixes {
84 prefix: "".into(),
85 suffix: mine.suffix,
86 }),
87 )
88 })
89 .unwrap_or((None, None))
90 };
91
92 let left_gv = arena.get(first)?.get().1;
93 let left = arena.new_node((
94 IR::Seq(IrSeq {
95 display: Some(DisplayMode::LeftMargin),
96 affixes: afpre,
97 ..Default::default()
98 }),
99 left_gv,
100 ));
101 left.append(first, arena);
102
103 let right_config = (
104 IR::Seq(IrSeq {
105 display: Some(DisplayMode::RightInline),
106 affixes: afsuf,
107 ..Default::default()
108 }),
109 GroupVars::Important,
110 );
111
112 // Take the IrSeq that configured the original top-level.
113 // Replace the configuration for rest with right_config.
114 // This is because we want to move all of the rest node's children to the right
115 // half, so the node is the thing that has to move.
116 *arena.get_mut(rest)?.get_mut() = right_config;
117 let top_seq = (
118 IR::Seq(IrSeq {
119 display: None,
120 affixes: None,
121 dropped_gv: None,
122 ..orig_top.0
123 }),
124 orig_top.1,
125 );
126
127 // Twist it all into place.
128 // We make sure rest is detached, even though ATM it's definitely a detached node.
129 let new_toplevel = arena.new_node(top_seq);
130 rest.detach(arena);
131 new_toplevel.append(left, arena);
132 new_toplevel.append(rest, arena);
133 return Some(new_toplevel);
134 }
135 }
136
137 ////////////////////////////////
138 // Cite Grouping & Collapsing //
139 ////////////////////////////////
140
141 impl<O: OutputFormat> IR<O> {
first_name_block(node: NodeId, arena: &IrArena<O>) -> Option<NodeId>142 pub fn first_name_block(node: NodeId, arena: &IrArena<O>) -> Option<NodeId> {
143 match arena.get(node)?.get().0 {
144 IR::Name(_) => Some(node),
145 IR::ConditionalDisamb(_) | IR::Seq(_) => {
146 // assumes it's the first one that appears
147 node.children(arena)
148 .find_map(|child| IR::first_name_block(child, arena))
149 }
150 _ => None,
151 }
152 }
153
find_locator(node: NodeId, arena: &IrArena<O>) -> Option<NodeId>154 fn find_locator(node: NodeId, arena: &IrArena<O>) -> Option<NodeId> {
155 match arena.get(node)?.get().0 {
156 IR::Rendered(Some(CiteEdgeData::Locator(_))) => Some(node),
157 IR::ConditionalDisamb(_) | IR::Seq(_) => {
158 // Search backwards because it's likely to be near the end
159 node.reverse_children(arena)
160 .find_map(|child| IR::find_locator(child, arena))
161 }
162 _ => None,
163 }
164 }
165
find_first_year(node: NodeId, arena: &IrArena<O>) -> Option<NodeId>166 fn find_first_year(node: NodeId, arena: &IrArena<O>) -> Option<NodeId> {
167 match &arena.get(node)?.get().0 {
168 IR::Rendered(Some(CiteEdgeData::Year(_b))) => Some(node),
169 IR::Seq(_) | IR::ConditionalDisamb(_) => node
170 .children(arena)
171 .find_map(|child| IR::find_first_year(child, arena)),
172 _ => None,
173 }
174 }
175
find_year_suffix(node: NodeId, arena: &IrArena<O>) -> Option<u32>176 pub fn find_year_suffix(node: NodeId, arena: &IrArena<O>) -> Option<u32> {
177 IR::has_explicit_year_suffix(node, arena)
178 .or_else(|| IR::has_implicit_year_suffix(node, arena))
179 }
180
find_first_year_and_suffix(node: NodeId, arena: &IrArena<O>) -> Option<(NodeId, u32)>181 fn find_first_year_and_suffix(node: NodeId, arena: &IrArena<O>) -> Option<(NodeId, u32)> {
182 // if let Some(fy) = IR::find_first_year(node, arena) {
183 // debug!("fy, {:?}", arena.get(fy).unwrap().get().0);
184 // }
185 // if let Some(ys) = IR::find_year_suffix(node, arena) {
186 // debug!("ys, {:?}", ys);
187 // }
188 Some((
189 IR::find_first_year(node, arena)?,
190 IR::find_year_suffix(node, arena)?,
191 ))
192 }
193
194 /// Rest of the name: "if it has a year suffix"
suppress_first_year( node: NodeId, arena: &mut IrArena<O>, has_explicit: bool, ) -> Option<NodeId>195 fn suppress_first_year(
196 node: NodeId,
197 arena: &mut IrArena<O>,
198 has_explicit: bool,
199 ) -> Option<NodeId> {
200 match arena.get(node)?.get().0 {
201 IR::Rendered(Some(CiteEdgeData::Year(_))) => {
202 arena.get_mut(node)?.get_mut().0 = IR::Rendered(None);
203 Some(node)
204 }
205 IR::ConditionalDisamb(_) => {
206 // Not sure why this result is thrown away
207 IR::suppress_first_year(node, arena, has_explicit);
208 None
209 }
210 IR::Seq(_) => {
211 let mut iter = node.children(arena).fuse();
212 let first_two = (iter.next(), iter.next());
213 // Check for the exact explicit year suffix IR output
214 let mut found = if iter.next().is_some() {
215 None
216 } else if let (Some(first), Some(second)) = first_two {
217 match arena.get(second).unwrap().get() {
218 (IR::YearSuffix(_), GroupVars::Unresolved) if has_explicit => {
219 IR::suppress_first_year(first, arena, has_explicit)
220 }
221 (IR::YearSuffix(_), GroupVars::Important)
222 if !has_explicit && !IR::is_empty(second, arena) =>
223 {
224 IR::suppress_first_year(first, arena, has_explicit)
225 }
226 _ => None,
227 }
228 } else {
229 None
230 };
231
232 // Otherwise keep looking in subtrees etc
233 if found.is_none() {
234 let child_ids: Vec<_> = node.children(arena).collect();
235 for child in child_ids {
236 found = IR::suppress_first_year(child, arena, has_explicit);
237 if found.is_some() {
238 break;
239 }
240 }
241 }
242 found
243 }
244 _ => None,
245 }
246 }
247
has_implicit_year_suffix(node: NodeId, arena: &IrArena<O>) -> Option<u32>248 pub fn has_implicit_year_suffix(node: NodeId, arena: &IrArena<O>) -> Option<u32> {
249 match arena.get(node)?.get().0 {
250 IR::YearSuffix(YearSuffix {
251 hook: YearSuffixHook::Plain,
252 suffix_num: Some(n),
253 ..
254 }) if !IR::is_empty(node, arena) => Some(n),
255
256 IR::ConditionalDisamb(_) | IR::Seq(_) => {
257 // assumes it's the first one that appears
258 node.children(arena)
259 .find_map(|child| IR::has_implicit_year_suffix(child, arena))
260 }
261 _ => None,
262 }
263 }
264
has_explicit_year_suffix(node: NodeId, arena: &IrArena<O>) -> Option<u32>265 pub fn has_explicit_year_suffix(node: NodeId, arena: &IrArena<O>) -> Option<u32> {
266 match arena.get(node)?.get().0 {
267 IR::YearSuffix(YearSuffix {
268 hook: YearSuffixHook::Explicit(_),
269 suffix_num: Some(n),
270 ..
271 }) if !IR::is_empty(node, arena) => Some(n),
272
273 IR::ConditionalDisamb(_) | IR::Seq(_) => {
274 // assumes it's the first one that appears
275 node.children(arena)
276 .find_map(|child| IR::has_explicit_year_suffix(child, arena))
277 }
278 _ => None,
279 }
280 }
281
suppress_names(node: NodeId, arena: &mut IrArena<O>)282 pub fn suppress_names(node: NodeId, arena: &mut IrArena<O>) {
283 if let Some(fnb) = IR::first_name_block(node, arena) {
284 // TODO: check interaction of this with GroupVars of the parent seq
285 fnb.remove_subtree(arena);
286 }
287 }
288
suppress_year(node: NodeId, arena: &mut IrArena<O>)289 pub fn suppress_year(node: NodeId, arena: &mut IrArena<O>) {
290 let has_explicit = IR::has_explicit_year_suffix(node, arena).is_some();
291 let has_implicit = IR::has_implicit_year_suffix(node, arena).is_some();
292 if !has_explicit && !has_implicit {
293 return;
294 }
295 IR::suppress_first_year(node, arena, has_explicit);
296 }
297 }
298
299 impl<O: OutputFormat<Output = SmartString>> IR<O> {
collapse_to_cnum(node: NodeId, arena: &IrArena<O>, fmt: &O) -> Option<u32>300 pub fn collapse_to_cnum(node: NodeId, arena: &IrArena<O>, fmt: &O) -> Option<u32> {
301 match &arena.get(node)?.get().0 {
302 IR::Rendered(Some(CiteEdgeData::CitationNumber(build))) => {
303 // TODO: just get it from the database
304 fmt.output(build.clone(), false).parse().ok()
305 }
306 IR::ConditionalDisamb(_) => node
307 .children(arena)
308 .find_map(|child| IR::collapse_to_cnum(child, arena, fmt)),
309 IR::Seq(_) => {
310 // assumes it's the first one that appears
311 if node.children(arena).count() != 1 {
312 None
313 } else {
314 node.children(arena)
315 .next()
316 .and_then(|child| IR::collapse_to_cnum(child, arena, fmt))
317 }
318 }
319 _ => None,
320 }
321 }
322 }
323
324 use crate::db::IrGen;
325 use csl::Collapse;
326 use std::collections::HashMap;
327
328 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
329 pub struct CnumIx {
330 pub cnum: u32,
331 pub ix: usize,
332 pub force_single: bool,
333 }
334
335 impl CnumIx {
new(c: u32, ix: usize) -> Self336 fn new(c: u32, ix: usize) -> Self {
337 CnumIx {
338 cnum: c,
339 ix,
340 force_single: false,
341 }
342 }
343 }
344
345 #[derive(Debug, PartialEq, Eq)]
346 pub enum RangePiece {
347 /// If the length of the range is only two, it should be rendered with a comma anyway
348 Range(CnumIx, CnumIx),
349 Single(CnumIx),
350 }
351
352 impl RangePiece {
353 /// Return value is the previous value, to be emitted, if the next it couldn't be appended
attempt_append(&mut self, nxt: CnumIx) -> Option<RangePiece>354 fn attempt_append(&mut self, nxt: CnumIx) -> Option<RangePiece> {
355 *self = match self {
356 _ if nxt.force_single => return Some(std::mem::replace(self, RangePiece::Single(nxt))),
357 RangePiece::Single(prv) if prv.cnum == nxt.cnum - 1 => RangePiece::Range(*prv, nxt),
358 RangePiece::Range(_, end) if end.cnum == nxt.cnum - 1 => {
359 *end = nxt;
360 return None;
361 }
362 _ => return Some(std::mem::replace(self, RangePiece::Single(nxt))),
363 };
364 return None;
365 }
366 }
367
368 #[test]
range_append()369 fn range_append() {
370 let mut range = RangePiece::Single(CnumIx::new(1, 1));
371 let emit = range.attempt_append(CnumIx::new(2, 2));
372 assert_eq!(
373 (range, emit),
374 (
375 RangePiece::Range(CnumIx::new(1, 1), CnumIx::new(2, 2)),
376 None
377 )
378 );
379 let mut range = RangePiece::Single(CnumIx::new(1, 1));
380 let emit = range.attempt_append(CnumIx::new(3, 2));
381 assert_eq!(
382 (range, emit),
383 (
384 RangePiece::Single(CnumIx::new(3, 2)),
385 Some(RangePiece::Single(CnumIx::new(1, 1)))
386 )
387 );
388 }
389
collapse_ranges(nums: &[CnumIx]) -> Vec<RangePiece>390 pub fn collapse_ranges(nums: &[CnumIx]) -> Vec<RangePiece> {
391 let mut pieces = Vec::new();
392 if let Some(init) = nums.first() {
393 let mut wip = RangePiece::Single(*init);
394 for &num in &nums[1..] {
395 if let Some(emit) = wip.attempt_append(num) {
396 pieces.push(emit);
397 }
398 }
399 pieces.push(wip);
400 }
401 pieces
402 }
403
404 #[test]
range_collapse()405 fn range_collapse() {
406 let s = |cnum: u32| CnumIx::new(cnum, cnum as usize);
407 assert_eq!(
408 collapse_ranges(&[s(1), s(2), s(3)]),
409 vec![RangePiece::Range(s(1), s(3))]
410 );
411 assert_eq!(
412 collapse_ranges(&[s(1), s(2), CnumIx::new(4, 3)]),
413 vec![
414 RangePiece::Range(s(1), s(2)),
415 RangePiece::Single(CnumIx::new(4, 3))
416 ]
417 );
418 }
419
420 type MarkupBuild = <Markup as OutputFormat>::Build;
421 pub struct Unnamed3<O: OutputFormat> {
422 pub cite: Arc<Cite<O>>,
423 pub cnum: Option<u32>,
424 pub gen4: Arc<IrGen>,
425 /// So we can look for punctuation at the end and use the format's quoting abilities
426 pub prefix_parsed: Option<MarkupBuild>,
427 /// First of a group of cites with the same name
428 pub is_first: bool,
429 /// Subsequent in a group of cites with the same name
430 pub should_collapse: bool,
431 /// First of a group of cites with the same year, all with suffixes
432 /// (same name implied)
433 pub first_of_ys: bool,
434 /// Subsequent in a group of cites with the same year, all with suffixes
435 /// (same name implied)
436 pub collapse_ys: bool,
437
438 pub year_suffix: Option<u32>,
439
440 /// Ranges of year suffixes (not alphabetic, in its base u32 form)
441 pub collapsed_year_suffixes: Vec<RangePiece>,
442
443 /// Ranges of citation numbers
444 pub collapsed_ranges: Vec<RangePiece>,
445
446 /// Tagging removed cites is cheaper than memmoving the rest of the Vec
447 pub vanished: bool,
448
449 pub has_locator: bool,
450 }
451
452 use std::fmt::{Debug, Formatter};
453
454 impl<O: OutputFormat<Output = SmartString>> Debug for Unnamed3<O> {
fmt(&self, f: &mut Formatter) -> std::fmt::Result455 fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
456 let fmt = &Markup::default();
457 f.debug_struct("Unnamed3")
458 .field("cite", &self.cite)
459 .field("cnum", &self.cnum)
460 .field(
461 "gen4",
462 &IR::flatten(self.gen4.root, &self.gen4.arena, fmt, None)
463 .map(|x| fmt.output(x, false)),
464 )
465 .field("prefix_parsed", &self.prefix_parsed)
466 .field("has_locator", &self.has_locator)
467 .field("is_first", &self.is_first)
468 .field("should_collapse", &self.should_collapse)
469 .field("first_of_ys", &self.first_of_ys)
470 .field("collapse_ys", &self.collapse_ys)
471 .field("year_suffix", &self.year_suffix)
472 .field("collapsed_year_suffixes", &self.collapsed_year_suffixes)
473 .field("collapsed_ranges", &self.collapsed_ranges)
474 .field("vanished", &self.vanished)
475 .field("gen4_full", &self.gen4)
476 .finish()
477 }
478 }
479
480 impl Unnamed3<Markup> {
new(cite: Arc<Cite<Markup>>, cnum: Option<u32>, gen4: Arc<IrGen>, fmt: &Markup) -> Self481 pub fn new(cite: Arc<Cite<Markup>>, cnum: Option<u32>, gen4: Arc<IrGen>, fmt: &Markup) -> Self {
482 let prefix_parsed = cite.prefix.as_opt_str().map(|p| {
483 fmt.ingest(
484 p,
485 &IngestOptions {
486 is_external: true,
487 ..Default::default()
488 },
489 )
490 });
491 Unnamed3 {
492 has_locator: cite.locators.is_some()
493 && IR::find_locator(gen4.root, &gen4.arena).is_some(),
494 cite,
495 gen4,
496 prefix_parsed,
497 cnum,
498 is_first: false,
499 should_collapse: false,
500 first_of_ys: false,
501 collapse_ys: false,
502 year_suffix: None,
503 collapsed_year_suffixes: Vec::new(),
504 collapsed_ranges: Vec::new(),
505 vanished: false,
506 }
507 }
508 }
509
group_and_collapse<O: OutputFormat<Output = SmartString>>( fmt: &Markup, collapse: Option<Collapse>, cites: &mut Vec<Unnamed3<O>>, )510 pub fn group_and_collapse<O: OutputFormat<Output = SmartString>>(
511 fmt: &Markup,
512 collapse: Option<Collapse>,
513 cites: &mut Vec<Unnamed3<O>>,
514 ) {
515 // Neat trick: same_names[None] tracks cites without a cs:names block, which helps with styles
516 // that only include a year. (What kind of style is that?
517 // magic_ImplicitYearSuffixExplicitDelimiter.txt, I guess that's the only possible reason, but
518 // ok.)
519 let mut same_names: HashMap<Option<SmartString>, (usize, bool)> = HashMap::new();
520 let mut same_years: HashMap<SmartString, (usize, bool)> = HashMap::new();
521
522 // First, group cites with the same name
523 for ix in 0..cites.len() {
524 let gen4 = &cites[ix].gen4;
525 let rendered = IR::first_name_block(gen4.root, &gen4.arena)
526 .and_then(|fnb| IR::flatten(fnb, &gen4.arena, fmt, None))
527 .map(|flat| fmt.output(flat, false));
528 same_names
529 .entry(rendered)
530 .and_modify(|(oix, seen_once)| {
531 // Keep cites separated by affixes together
532 if cites.get(*oix).map_or(false, |u| u.cite.has_suffix())
533 || cites.get(*oix + 1).map_or(false, |u| u.cite.has_prefix())
534 || cites.get(ix - 1).map_or(false, |u| u.cite.has_suffix())
535 || cites.get(ix).map_or(false, |u| u.cite.has_affix())
536 {
537 *oix = ix;
538 *seen_once = false;
539 return;
540 }
541 if *oix < ix {
542 if !*seen_once {
543 cites[*oix].is_first = true;
544 }
545 *seen_once = true;
546 cites[ix].should_collapse = true;
547 let rotation = &mut cites[*oix + 1..ix + 1];
548 rotation.rotate_right(1);
549 *oix += 1;
550 }
551 })
552 .or_insert((ix, false));
553 }
554
555 if collapse.map_or(false, |c| {
556 c == Collapse::YearSuffixRanged || c == Collapse::YearSuffix
557 }) {
558 let mut top_ix = 0;
559 while top_ix < cites.len() {
560 if cites[top_ix].is_first {
561 let mut moved = 0;
562 let mut ix = top_ix;
563 while ix < cites.len() {
564 if ix != top_ix && !cites[ix].should_collapse {
565 break;
566 }
567 moved += 1;
568 let year_and_suf =
569 IR::find_first_year_and_suffix(cites[ix].gen4.root, &cites[ix].gen4.arena)
570 .and_then(|(ys_node, suf)| {
571 let flat = IR::flatten(ys_node, &cites[ix].gen4.arena, fmt, None)?;
572 Some((fmt.output(flat, false), suf))
573 });
574 if let Some((y, suf)) = year_and_suf {
575 cites[ix].year_suffix = Some(suf);
576 same_years
577 .entry(y)
578 .and_modify(|(oix, seen_once)| {
579 if *oix == ix - 1 {
580 if !*seen_once {
581 cites[*oix].first_of_ys = true;
582 }
583 cites[ix].collapse_ys = true;
584 *seen_once = true;
585 } else {
586 *seen_once = false;
587 }
588 *oix = ix;
589 })
590 .or_insert((ix, false));
591 }
592 ix += 1;
593 }
594 top_ix += moved;
595 }
596 top_ix += 1;
597 }
598 }
599
600 if collapse == Some(Collapse::CitationNumber) {
601 // XXX: Gotta factor in that some might have prefixes and suffixes
602 if let Some((first, rest)) = cites.split_first_mut() {
603 first.is_first = true;
604 for r in rest {
605 r.should_collapse = true;
606 }
607 }
608 }
609
610 if let Some(collapse) = collapse {
611 match collapse {
612 Collapse::CitationNumber => {
613 let mut ix = 0;
614 while ix < cites.len() {
615 let slice = &mut cites[ix..];
616 if let Some((u, rest)) = slice.split_first_mut() {
617 if u.is_first {
618 let following = rest.iter_mut().take_while(|u| u.should_collapse);
619
620 let mut cnums = Vec::new();
621 if let Some(cnum) = u.cnum {
622 cnums.push(CnumIx::new(cnum, ix));
623 }
624 let mut count = 0;
625 for (nix, cite) in following.enumerate() {
626 if let Some(cnum) = cite.cnum {
627 cnums.push(CnumIx {
628 cnum,
629 ix: ix + nix + 1,
630 force_single: cite.has_locator,
631 })
632 }
633 cite.vanished = true;
634 count += 1;
635 }
636 ix += count;
637 u.collapsed_ranges = collapse_ranges(&cnums);
638 }
639 }
640 ix += 1;
641 }
642 }
643 Collapse::Year => {
644 let mut ix = 0;
645 while ix < cites.len() {
646 let slice = &mut cites[ix..];
647 if let Some((u, rest)) = slice.split_first_mut() {
648 if u.is_first {
649 let following = rest.iter_mut().take_while(|u| u.should_collapse);
650 let mut count = 0;
651 for cite in following {
652 let gen4 = Arc::make_mut(&mut cite.gen4);
653 IR::suppress_names(gen4.root, &mut gen4.arena);
654 count += 1;
655 }
656 ix += count;
657 }
658 }
659 ix += 1;
660 }
661 }
662 Collapse::YearSuffixRanged | Collapse::YearSuffix => {
663 let mut ix = 0;
664 while ix < cites.len() {
665 let slice = &mut cites[ix..];
666 if let Some((u, rest)) = slice.split_first_mut() {
667 if u.is_first {
668 let following = rest.iter_mut().take_while(|u| u.should_collapse);
669 for cite in following {
670 let gen4 = Arc::make_mut(&mut cite.gen4);
671 IR::suppress_names(gen4.root, &mut gen4.arena)
672 }
673 }
674 if u.first_of_ys {
675 let following = rest.iter_mut().take_while(|u| u.collapse_ys);
676
677 if collapse == Collapse::YearSuffixRanged {
678 // Potentially confusing; 'cnums' here are year suffixes in u32 form.
679 let mut cnums = Vec::new();
680 if let Some(cnum) = u.year_suffix {
681 cnums.push(CnumIx::new(cnum, ix));
682 }
683 for (nix, cite) in following.enumerate() {
684 if let Some(cnum) = cite.year_suffix {
685 cnums.push(CnumIx {
686 cnum,
687 ix: ix + nix + 1,
688 force_single: cite.has_locator,
689 });
690 }
691 cite.vanished = true;
692 if !cite.has_locator {
693 let gen4 = Arc::make_mut(&mut cite.gen4);
694 IR::suppress_year(gen4.root, &mut gen4.arena);
695 }
696 }
697 u.collapsed_year_suffixes = collapse_ranges(&cnums);
698 } else {
699 if let Some(cnum) = u.year_suffix {
700 u.collapsed_year_suffixes
701 .push(RangePiece::Single(CnumIx::new(cnum, ix)));
702 }
703 for (nix, cite) in following.enumerate() {
704 if let Some(cnum) = cite.year_suffix {
705 u.collapsed_year_suffixes.push(RangePiece::Single(
706 CnumIx {
707 cnum,
708 ix: ix + nix + 1,
709 force_single: cite.has_locator,
710 },
711 ));
712 }
713 cite.vanished = true;
714 let gen4 = Arc::make_mut(&mut cite.gen4);
715 IR::suppress_year(gen4.root, &mut gen4.arena);
716 }
717 }
718 }
719 }
720 ix += 1;
721 }
722 }
723 }
724 }
725 }
726
727 ////////////////////////////////
728 // Cite Grouping & Collapsing //
729 ////////////////////////////////
730
731 use crate::disamb::names::DisambNameRatchet;
732 use citeproc_io::PersonName;
733 use csl::SubsequentAuthorSubstituteRule as SasRule;
734
735 #[derive(Eq, PartialEq, Clone)]
736 pub enum ReducedNameToken<'a, B> {
737 Name(&'a PersonName),
738 Literal(&'a B),
739 EtAl,
740 Ellipsis,
741 Delimiter,
742 And,
743 Space,
744 }
745
746 impl<'a, T: Debug> Debug for ReducedNameToken<'a, T> {
fmt(&self, f: &mut Formatter) -> std::fmt::Result747 fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
748 match self {
749 ReducedNameToken::Name(p) => write!(f, "{:?}", p.family),
750 ReducedNameToken::Literal(b) => write!(f, "{:?}", b),
751 ReducedNameToken::EtAl => write!(f, "EtAl"),
752 ReducedNameToken::Ellipsis => write!(f, "Ellipsis"),
753 ReducedNameToken::Delimiter => write!(f, "Delimiter"),
754 ReducedNameToken::And => write!(f, "And"),
755 ReducedNameToken::Space => write!(f, "Space"),
756 }
757 }
758 }
759
760 impl<'a, T> ReducedNameToken<'a, T> {
from_token(token: &NameToken, names: &'a [DisambNameRatchet<T>]) -> Self761 fn from_token(token: &NameToken, names: &'a [DisambNameRatchet<T>]) -> Self {
762 match token {
763 NameToken::Name(dnr_index) => match &names[*dnr_index] {
764 DisambNameRatchet::Person(p) => ReducedNameToken::Name(&p.data.value),
765 DisambNameRatchet::Literal { literal, .. } => ReducedNameToken::Literal(literal),
766 },
767 NameToken::Ellipsis => ReducedNameToken::Ellipsis,
768 NameToken::EtAl(..) => ReducedNameToken::EtAl,
769 NameToken::Space => ReducedNameToken::Space,
770 NameToken::Delimiter => ReducedNameToken::Delimiter,
771 NameToken::And => ReducedNameToken::And,
772 }
773 }
relevant(&self) -> bool774 fn relevant(&self) -> bool {
775 match self {
776 ReducedNameToken::Name(_) | ReducedNameToken::Literal(_) => true,
777 _ => false,
778 }
779 }
780 }
781
782 #[allow(dead_code)]
783 impl<O: OutputFormat> IR<O> {
unwrap_name_ir(&self) -> &NameIR<O>784 pub(crate) fn unwrap_name_ir(&self) -> &NameIR<O> {
785 match self {
786 IR::Name(nir) => nir,
787 _ => panic!("Called unwrap_name_ir on a {:?}", self),
788 }
789 }
unwrap_name_ir_mut(&mut self) -> &mut NameIR<O>790 pub(crate) fn unwrap_name_ir_mut(&mut self) -> &mut NameIR<O> {
791 match self {
792 IR::Name(nir) => nir,
793 _ => panic!("Called unwrap_name_ir_mut on a {:?}", self),
794 }
795 }
unwrap_year_suffix(&self) -> &YearSuffix796 pub(crate) fn unwrap_year_suffix(&self) -> &YearSuffix {
797 match self {
798 IR::YearSuffix(ys) => ys,
799 _ => panic!("Called unwrap_year_suffix on a {:?}", self),
800 }
801 }
unwrap_year_suffix_mut(&mut self) -> &mut YearSuffix802 pub(crate) fn unwrap_year_suffix_mut(&mut self) -> &mut YearSuffix {
803 match self {
804 IR::YearSuffix(ys) => ys,
805 _ => panic!("Called unwrap_year_suffix_mut on a {:?}", self),
806 }
807 }
808 #[allow(dead_code)]
unwrap_cond_disamb(&self) -> &ConditionalDisambIR809 pub(crate) fn unwrap_cond_disamb(&self) -> &ConditionalDisambIR {
810 match self {
811 IR::ConditionalDisamb(cond) => cond,
812 _ => panic!("Called unwrap_cond_disamb on a {:?}", self),
813 }
814 }
unwrap_cond_disamb_mut(&mut self) -> &mut ConditionalDisambIR815 pub(crate) fn unwrap_cond_disamb_mut(&mut self) -> &mut ConditionalDisambIR {
816 match self {
817 IR::ConditionalDisamb(cond) => cond,
818 _ => panic!("Called unwrap_cond_disamb_mut on a {:?}", self),
819 }
820 }
821 }
822
subsequent_author_substitute<O: OutputFormat>( fmt: &O, previous: &NameIR<O>, current_id: NodeId, arena: &mut IrArena<O>, sas: &str, sas_rule: SasRule, ) -> bool823 pub fn subsequent_author_substitute<O: OutputFormat>(
824 fmt: &O,
825 previous: &NameIR<O>,
826 current_id: NodeId,
827 arena: &mut IrArena<O>,
828 sas: &str,
829 sas_rule: SasRule,
830 ) -> bool {
831 let pre_tokens = previous.iter_bib_rendered_names(fmt);
832 let pre_reduced = pre_tokens
833 .iter()
834 .map(|tok| ReducedNameToken::from_token(tok, &previous.disamb_names))
835 .filter(|x| x.relevant());
836
837 let cur = arena.get(current_id).unwrap().get().0.unwrap_name_ir();
838 let label_after_name = cur
839 .names_inheritance
840 .label
841 .as_ref()
842 .map_or(false, |l| l.after_name);
843 let built_label = cur.built_label.clone();
844
845 let cur_tokens = cur.iter_bib_rendered_names(fmt);
846 let cur_reduced = cur_tokens
847 .iter()
848 .map(|tok| ReducedNameToken::from_token(tok, &cur.disamb_names))
849 .filter(|x| x.relevant());
850 debug!(
851 "{:?} vs {:?}",
852 pre_reduced.clone().collect::<Vec<_>>(),
853 cur_reduced.clone().collect::<Vec<_>>()
854 );
855
856 match sas_rule {
857 SasRule::CompleteAll | SasRule::CompleteEach => {
858 if Iterator::eq(pre_reduced, cur_reduced) {
859 let (current_ir, _current_gv) = arena.get_mut(current_id).unwrap().get_mut();
860 if sas_rule == SasRule::CompleteEach {
861 let current_nir = current_ir.unwrap_name_ir_mut();
862 // let nir handle it
863 // u32::MAX so ALL names get --- treatment
864 if let Some(rebuilt) =
865 current_nir.subsequent_author_substitute(fmt, std::u32::MAX, sas)
866 {
867 let node = NameIR::rendered_ntbs_to_node(
868 rebuilt,
869 arena,
870 false,
871 label_after_name,
872 built_label.as_ref(),
873 );
874 replace_single_child(current_id, node, arena);
875 }
876 } else if sas.is_empty() {
877 let empty_node = arena.new_node((IR::Rendered(None), GroupVars::Important));
878 replace_single_child(current_id, empty_node, arena);
879 } else {
880 // Remove all children
881 let children: Vec<_> = current_id.children(arena).collect();
882 children.into_iter().for_each(|ch| ch.remove_subtree(arena));
883
884 // Add the sas ---
885 let sas_ir = arena.new_node((
886 IR::Rendered(Some(CiteEdgeData::Output(fmt.plain(sas)))),
887 GroupVars::Important,
888 ));
889 current_id.append(sas_ir, arena);
890
891 // Add a name label
892 if let Some(label) = built_label.as_ref() {
893 let label_node = arena.new_node((
894 IR::Rendered(Some(CiteEdgeData::Output(label.clone()))),
895 GroupVars::Plain,
896 ));
897 if label_after_name {
898 current_id.append(label_node, arena)
899 } else {
900 current_id.prepend(label_node, arena)
901 }
902 }
903 };
904 return true;
905 }
906 }
907 SasRule::PartialEach => {
908 let count = pre_reduced
909 .zip(cur_reduced)
910 .take_while(|(p, c)| p == c)
911 .count();
912 let current = arena.get_mut(current_id).unwrap().get_mut();
913 let current_nir = current.0.unwrap_name_ir_mut();
914 if let Some(rebuilt) = current_nir.subsequent_author_substitute(fmt, count as u32, sas)
915 {
916 let node = NameIR::rendered_ntbs_to_node(
917 rebuilt,
918 arena,
919 false,
920 label_after_name,
921 built_label.as_ref(),
922 );
923 replace_single_child(current_id, node, arena);
924 }
925 }
926 SasRule::PartialFirst => {
927 let count = pre_reduced
928 .zip(cur_reduced)
929 .take_while(|(p, c)| p == c)
930 .count();
931 if count > 0 {
932 let current = arena.get_mut(current_id).unwrap().get_mut();
933 let current_nir = current.0.unwrap_name_ir_mut();
934 if let Some(rebuilt) = current_nir.subsequent_author_substitute(fmt, 1, sas) {
935 let node = NameIR::rendered_ntbs_to_node(
936 rebuilt,
937 arena,
938 false,
939 label_after_name,
940 built_label.as_ref(),
941 );
942 replace_single_child(current_id, node, arena);
943 }
944 }
945 }
946 }
947 false
948 }
949
950 ///////////////////////
951 // MixedNumericStyle //
952 ///////////////////////
953
style_is_mixed_numeric( style: &csl::Style, cite_or_bib: CiteOrBib, ) -> Option<(&Element, Option<&str>)>954 pub fn style_is_mixed_numeric(
955 style: &csl::Style,
956 cite_or_bib: CiteOrBib,
957 ) -> Option<(&Element, Option<&str>)> {
958 use csl::style::{Element as El, TextSource as TS, *};
959 use csl::variables::{NumberVariable::CitationNumber, StandardVariable as SV};
960 fn cnum_renders_first<'a>(
961 els: &'a [El],
962 maybe_delim: Option<&'a str>,
963 ) -> Option<(&'a Element, Option<&'a str>)> {
964 for el in els {
965 match el {
966 El::Text(TextElement {
967 source: TS::Variable(SV::Number(CitationNumber), _),
968 ..
969 }) => return Some((el, maybe_delim)),
970 El::Number(NumberElement {
971 variable: CitationNumber,
972 ..
973 }) => return Some((el, maybe_delim)),
974 El::Group(Group {
975 elements,
976 delimiter,
977 ..
978 }) => {
979 return cnum_renders_first(elements, delimiter.as_opt_str());
980 }
981 El::Choose(c) => {
982 let Choose(if_, ifthens_, else_) = c.as_ref();
983
984 // You could have a citation number appear first in the bibliography in an else
985 // block. You wouldn't, but you could.
986 let either = cnum_renders_first(&if_.1, maybe_delim).or_else(|| {
987 ifthens_
988 .iter()
989 .find_map(|ifthen| cnum_renders_first(&ifthen.1, maybe_delim))
990 });
991 if either.is_some() {
992 return either;
993 } else if else_.0.is_empty() {
994 // No else block? The choose could be empty.
995 continue;
996 } else {
997 let else_found = cnum_renders_first(&else_.0, maybe_delim);
998 if else_found.is_some() {
999 return else_found;
1000 }
1001 }
1002 }
1003 _ => break,
1004 }
1005 }
1006 None
1007 }
1008 style
1009 .get_layout(cite_or_bib)
1010 .and_then(|layout| cnum_renders_first(&layout.elements, None))
1011 }
1012
1013 #[test]
test_mixed_numeric()1014 fn test_mixed_numeric() {
1015 use csl::style::{Element as El, TextSource as TS, *};
1016 use csl::variables::{NumberVariable::CitationNumber, StandardVariable as SV};
1017 let mk = |layout: &str| {
1018 let txt = format!(
1019 r#"
1020 <style class="in-text" version="1.0">
1021 <citation><layout></layout></citation>
1022 <bibliography><layout>
1023 {}
1024 </layout></bibliography>
1025 </style>
1026 "#,
1027 layout
1028 );
1029 Style::parse_for_test(&txt).unwrap()
1030 };
1031 let style = mk(r#"<group delimiter=". "> <text variable="citation-number" /> </group>"#);
1032 let found = style_is_mixed_numeric(&style, CiteOrBib::Bibliography);
1033 let model_el = El::Text(TextElement {
1034 source: TS::Variable(SV::Number(CitationNumber), VariableForm::Long),
1035 ..Default::default()
1036 });
1037 assert_eq!(found, Some((&model_el, Some(". "))));
1038 let style = mk(r#"
1039 <group delimiter=". ">
1040 <choose>
1041 <if type="book">
1042 <text variable="citation-number" />
1043 <text variable="title" />
1044 </if>
1045 </choose>
1046 </group>"#);
1047 let found = style_is_mixed_numeric(&style, CiteOrBib::Bibliography);
1048 assert_eq!(found, Some((&model_el, Some(". "))));
1049 let style = mk(r#"
1050 <choose>
1051 <if type="book">
1052 <group delimiter=". ">
1053 <text variable="citation-number" />
1054 </group>
1055 </if>
1056 </choose>
1057 <text variable="title" />
1058 "#);
1059 let found = style_is_mixed_numeric(&style, CiteOrBib::Bibliography);
1060 assert_eq!(found, Some((&model_el, Some(". "))));
1061 let style = mk(r#"
1062 <choose>
1063 <if type="book">
1064 <group delimiter=". ">
1065 <number variable="citation-number" />
1066 <text variable="title" />
1067 </group>
1068 </if>
1069 </choose>
1070 "#);
1071 let found = style_is_mixed_numeric(&style, CiteOrBib::Bibliography);
1072 assert!(matches!(found, Some((_, Some(". ")))));
1073 }
1074
1075 ////////////////////////////////////////////////////
1076 // Layout affixes inside left-margin/right-inline //
1077 ////////////////////////////////////////////////////
1078
1079 #[derive(Debug, PartialEq)]
1080 struct LeftRightLayout {
1081 left: Option<NodeId>,
1082 right: Option<NodeId>,
1083 layout: NodeId,
1084 }
1085
find_left_right_layout<O: OutputFormat>( root: NodeId, arena: &IrArena<O>, ) -> Option<LeftRightLayout>1086 fn find_left_right_layout<O: OutputFormat>(
1087 root: NodeId,
1088 arena: &IrArena<O>,
1089 ) -> Option<LeftRightLayout> {
1090 let node = arena.get(root)?;
1091 match &node.get().0 {
1092 IR::Seq(seq)
1093 if seq.is_layout
1094 && seq
1095 .affixes
1096 .as_ref()
1097 .map_or(false, |af| !af.prefix.is_empty() || !af.suffix.is_empty()) =>
1098 {
1099 let left = node.first_child()
1100 .filter(|c| matches!(arena.get(*c).map(|x| &x.get().0), Some(IR::Seq(IrSeq { display: Some(DisplayMode::LeftMargin), .. }))));
1101 let right = node.last_child()
1102 .filter(|c| matches!(arena.get(*c).map(|x| &x.get().0), Some(IR::Seq(IrSeq { display: Some(DisplayMode::RightInline), .. }))));
1103 Some(LeftRightLayout {
1104 left,
1105 right,
1106 layout: root,
1107 })
1108 }
1109 _ => None,
1110 }
1111 }
1112
fix_left_right_layout_affixes<O: OutputFormat>( root: NodeId, arena: &mut IrArena<O>, )1113 pub fn fix_left_right_layout_affixes<O: OutputFormat>(
1114 root: NodeId,
1115 arena: &mut IrArena<O>,
1116 ) {
1117 let LeftRightLayout {
1118 left,
1119 right,
1120 layout,
1121 } = match find_left_right_layout(root, arena) {
1122 Some(lrl) => lrl,
1123 None => return,
1124 };
1125
1126 fn get_af<O: OutputFormat>(node_id: NodeId, suf: bool, arena: &IrArena<O>) -> &str {
1127 match &arena[node_id].get().0 {
1128 IR::Seq(s) => s
1129 .affixes
1130 .as_ref()
1131 .map(|af| if suf { &af.suffix } else { &af.prefix })
1132 .map_or("", |af| af.as_str()),
1133 _ => "",
1134 }
1135 }
1136 fn write_af<O: OutputFormat>(
1137 node_id: NodeId,
1138 suf: bool,
1139 content: SmartString,
1140 arena: &mut IrArena<O>,
1141 ) {
1142 match &mut arena[node_id].get_mut().0 {
1143 IR::Seq(s) => match &mut s.affixes {
1144 Some(af) => {
1145 let which = if suf { &mut af.suffix } else { &mut af.prefix };
1146 *which = content;
1147 if af.prefix.is_empty() && af.suffix.is_empty() {
1148 s.affixes = None;
1149 }
1150 }
1151 None if !content.is_empty() => {
1152 let mut af = Affixes::default();
1153 let which = if suf { &mut af.suffix } else { &mut af.prefix };
1154 *which = content;
1155 s.affixes = Some(af);
1156 }
1157 _ => {}
1158 },
1159 _ => {}
1160 }
1161 }
1162
1163 if let Some(left) = left {
1164 let layout_prefix = get_af(layout, false, arena);
1165 if !layout_prefix.is_empty() {
1166 let left_prefix = get_af(left, false, arena);
1167 let mut new_prefix = SmartString::new();
1168 new_prefix.push_str(layout_prefix);
1169 new_prefix.push_str(left_prefix);
1170 write_af(left, false, new_prefix, arena);
1171 write_af(layout, false, "".into(), arena);
1172 }
1173 }
1174 if let Some(right) = right {
1175 let layout_suffix = get_af(layout, true, arena);
1176 if !layout_suffix.is_empty() {
1177 let right_suffix = get_af(right, true, arena);
1178 let mut new_suffix = SmartString::new();
1179 new_suffix.push_str(right_suffix);
1180 new_suffix.push_str(layout_suffix);
1181 write_af(right, true, new_suffix, arena);
1182 write_af(layout, true, "".into(), arena);
1183 }
1184 }
1185 }
1186
1187 #[test]
test_left_right_layout()1188 fn test_left_right_layout() {
1189 let mut arena = IrArena::<Markup>::new();
1190 let fmt = Markup::html();
1191
1192 let left = arena.seq(
1193 IrSeq {
1194 display: Some(DisplayMode::LeftMargin),
1195 ..Default::default()
1196 },
1197 |arena, seq| {
1198 let cnum = arena.blob(
1199 CiteEdgeData::CitationNumber(fmt.plain("2. ")),
1200 GroupVars::Important,
1201 );
1202 seq.append(cnum, arena);
1203 },
1204 );
1205 let right = arena.seq(
1206 IrSeq {
1207 display: Some(DisplayMode::RightInline),
1208 ..Default::default()
1209 },
1210 |arena, seq| {
1211 let title = arena.blob(
1212 CiteEdgeData::Output(fmt.plain("title")),
1213 GroupVars::Important,
1214 );
1215 seq.append(title, arena);
1216 },
1217 );
1218 let layout = arena.seq(
1219 IrSeq {
1220 is_layout: true,
1221 affixes: Some(Affixes {
1222 prefix: "".into(),
1223 suffix: ".".into(),
1224 }),
1225 ..Default::default()
1226 },
1227 |arena, seq| {
1228 seq.append(left, arena);
1229 seq.append(right, arena);
1230 },
1231 );
1232
1233 let mut irgen = IrGen::new(layout, arena, IrState::new());
1234 dbg!(&irgen);
1235
1236 let found = find_left_right_layout(layout, &mut irgen.arena);
1237 assert_eq!(
1238 found,
1239 Some(LeftRightLayout {
1240 left: Some(left),
1241 right: Some(right),
1242 layout
1243 })
1244 );
1245
1246 let blob = irgen
1247 .arena
1248 .blob(CiteEdgeData::Output(fmt.plain("blob")), GroupVars::Plain);
1249 right.insert_before(blob, &mut irgen.arena);
1250
1251 dbg!(&irgen);
1252
1253 let found = find_left_right_layout(layout, &mut irgen.arena);
1254 assert_eq!(
1255 found,
1256 Some(LeftRightLayout {
1257 left: Some(left),
1258 right: Some(right),
1259 layout
1260 })
1261 );
1262
1263 fix_left_right_layout_affixes(layout, &mut irgen.arena);
1264
1265 let flat = IR::flatten(layout, &irgen.arena, &fmt, None).unwrap();
1266 let s = fmt.output(flat, false);
1267 assert_eq!(
1268 &s,
1269 r#"<div class="csl-left-margin">2. </div>blob<div class="csl-right-inline">title.</div>"#
1270 );
1271 }
1272
1273 #[cfg(test)]
1274 trait ArenaExtensions<O: OutputFormat> {
blob(&mut self, edge: CiteEdgeData<O>, gv: GroupVars) -> NodeId1275 fn blob(&mut self, edge: CiteEdgeData<O>, gv: GroupVars) -> NodeId;
seq<F: FnOnce(&mut Self, NodeId)>(&mut self, seq_tmpl: IrSeq, f: F) -> NodeId1276 fn seq<F: FnOnce(&mut Self, NodeId)>(&mut self, seq_tmpl: IrSeq, f: F) -> NodeId;
1277 }
1278
1279 #[cfg(test)]
1280 impl<O: OutputFormat> ArenaExtensions<O> for IrArena<O> {
blob(&mut self, edge: CiteEdgeData<O>, gv: GroupVars) -> NodeId1281 fn blob(&mut self, edge: CiteEdgeData<O>, gv: GroupVars) -> NodeId {
1282 self.new_node((IR::Rendered(Some(edge)), gv))
1283 }
seq<F: FnOnce(&mut Self, NodeId)>(&mut self, seq_tmpl: IrSeq, f: F) -> NodeId1284 fn seq<F: FnOnce(&mut Self, NodeId)>(&mut self, seq_tmpl: IrSeq, f: F) -> NodeId {
1285 let seq_node = self.new_node((IR::Seq(seq_tmpl), GroupVars::Important));
1286 f(self, seq_node);
1287 seq_node
1288 }
1289 }
1290