1 //! Based on a set of rules, validate a token stream and collect the
2 //! tokens by type.
3 //!
4 //! See the "rules" module for definitions of keywords types and
5 //! per-keyword rules.
6 //!
7 //! The key types in this module are SectionRules, which explains how to
8 //! validate and partition a stream of Item, and Section, which contains
9 //! a validated set of Item, ready to be interpreted.
10 //!
11 //! # Example
12 //!
13 //! (This is an internal API, so see the routerdesc.rs source for an
14 //! example of use.)
15 
16 use crate::parse::keyword::Keyword;
17 use crate::parse::rules::*;
18 use crate::parse::tokenize::*;
19 use crate::{Error, Result};
20 
21 /// Describe the rules for one section of a document.
22 ///
23 /// The rules are represented as a mapping from token index to
24 /// rules::TokenFmt.
25 #[derive(Clone)]
26 pub(crate) struct SectionRules<T: Keyword> {
27     /// A set of rules for decoding a series of tokens into a Section
28     /// object.  Each element of this array corresponds to the
29     /// token with the corresponding index values.
30     ///
31     /// When an array element is None, the corresponding keyword is
32     /// not allowed in this kind section.  Otherwise, the array
33     /// element is a TokenFmt describing how many of the corresponding
34     /// token may appear, and what they need to look like.
35     rules: Vec<Option<TokenFmt<T>>>,
36 }
37 
38 /// The entry or entries for a particular keyword within a document.
39 #[derive(Clone)]
40 enum TokVal<'a, K: Keyword> {
41     /// No value has been found.
42     None,
43     /// A single value has been found; we're storing it in place.
44     ///
45     /// We use a one-element array here so that we can return a slice
46     /// of the array.
47     Some([Item<'a, K>; 1]),
48     /// Multiple values have been found; they go in a vector.
49     Multi(Vec<Item<'a, K>>),
50 }
51 impl<'a, K: Keyword> TokVal<'a, K> {
52     /// Return the number of Items for this value.
count(&self) -> usize53     fn count(&self) -> usize {
54         match self {
55             TokVal::None => 0,
56             TokVal::Some(_) => 1,
57             TokVal::Multi(v) => v.len(),
58         }
59     }
60     /// Return the first Item for this value, or None if there wasn't one.
first(&self) -> Option<&Item<'a, K>>61     fn first(&self) -> Option<&Item<'a, K>> {
62         match self {
63             TokVal::None => None,
64             TokVal::Some([t]) => Some(t),
65             TokVal::Multi(v) => Some(&v[0]),
66         }
67     }
68     /// Return the Item for this value, if there is exactly one.
singleton(&self) -> Option<&Item<'a, K>>69     fn singleton(&self) -> Option<&Item<'a, K>> {
70         match self {
71             TokVal::None => None,
72             TokVal::Some([t]) => Some(t),
73             TokVal::Multi(_) => None,
74         }
75     }
76     /// Return all the Items for this value, as a slice.
as_slice(&self) -> &[Item<'a, K>]77     fn as_slice(&self) -> &[Item<'a, K>] {
78         match self {
79             TokVal::None => &[],
80             TokVal::Some(t) => &t[..],
81             TokVal::Multi(v) => &v[..],
82         }
83     }
84     /// Return the last Item for this value, if any.
last(&self) -> Option<&Item<'a, K>>85     fn last(&self) -> Option<&Item<'a, K>> {
86         match self {
87             TokVal::None => None,
88             TokVal::Some([t]) => Some(t),
89             TokVal::Multi(v) => Some(&v[v.len() - 1]),
90         }
91     }
92 }
93 
94 /// A Section is the result of sorting a document's entries by keyword.
95 ///
96 /// TODO: I'd rather have this be pub(crate), but I haven't figured out
97 /// how to make that work.
98 pub struct Section<'a, T: Keyword> {
99     /// Map from Keyword index to TokVal
100     v: Vec<TokVal<'a, T>>,
101     /// The keyword that appeared first in this section.  This will
102     /// be set if `v` is nonempty.
103     first: Option<T>,
104     /// The keyword that appeared last in this section.  This will
105     /// be set if `v` is nonempty.
106     last: Option<T>,
107 }
108 
109 impl<'a, T: Keyword> Section<'a, T> {
110     /// Make a new empty Section.
new() -> Self111     fn new() -> Self {
112         let n = T::n_vals();
113         let mut v = Vec::with_capacity(n);
114         v.resize(n, TokVal::None);
115         Section {
116             v,
117             first: None,
118             last: None,
119         }
120     }
121     /// Helper: return the tokval for some Keyword.
tokval(&self, t: T) -> &TokVal<'a, T>122     fn tokval(&self, t: T) -> &TokVal<'a, T> {
123         let idx = t.idx();
124         &self.v[idx]
125     }
126     /// Return all the Items for some Keyword, as a slice.
slice(&self, t: T) -> &[Item<'a, T>]127     pub(crate) fn slice(&self, t: T) -> &[Item<'a, T>] {
128         self.tokval(t).as_slice()
129     }
130     /// Return a single Item for some Keyword, if there is exactly one.
get(&self, t: T) -> Option<&Item<'a, T>>131     pub(crate) fn get(&self, t: T) -> Option<&Item<'a, T>> {
132         self.tokval(t).singleton()
133     }
134     /// Return a single Item for some Keyword, giving an error if there
135     /// is not exactly one.
136     ///
137     /// It is usually a mistake to use this function on a Keyword that is
138     /// not required.
required(&self, t: T) -> Result<&Item<'a, T>>139     pub(crate) fn required(&self, t: T) -> Result<&Item<'a, T>> {
140         self.get(t).ok_or_else(|| Error::MissingToken(t.to_str()))
141     }
142     /// Return a proxy MaybeItem object for some keyword.
143     //
144     /// A MaybeItem is used to represent an object that might or might
145     /// not be there.
maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a, T>146     pub(crate) fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a, T> {
147         MaybeItem::from_option(self.get(t))
148     }
149     /// Return the first item that was accepted for this section, or None
150     /// if no items were accepted for this section.
first_item(&self) -> Option<&Item<'a, T>>151     pub(crate) fn first_item(&self) -> Option<&Item<'a, T>> {
152         match self.first {
153             None => None,
154             Some(t) => self.tokval(t).first(),
155         }
156     }
157     /// Return the last item that was accepted for this section, or None
158     /// if no items were accepted for this section.
last_item(&self) -> Option<&Item<'a, T>>159     pub(crate) fn last_item(&self) -> Option<&Item<'a, T>> {
160         match self.last {
161             None => None,
162             Some(t) => self.tokval(t).last(),
163         }
164     }
165     /// Insert an `item`.
166     ///
167     /// The `item` must have parsed Keyword `t`.
add_tok(&mut self, t: T, item: Item<'a, T>)168     fn add_tok(&mut self, t: T, item: Item<'a, T>) {
169         let idx = Keyword::idx(t);
170         if idx >= self.v.len() {
171             self.v.resize(idx + 1, TokVal::None);
172         }
173         let m = &mut self.v[idx];
174 
175         match m {
176             TokVal::None => *m = TokVal::Some([item]),
177             TokVal::Some([x]) => {
178                 *m = TokVal::Multi(vec![x.clone(), item]);
179             }
180             TokVal::Multi(ref mut v) => {
181                 v.push(item);
182             }
183         };
184         if self.first.is_none() {
185             self.first = Some(t);
186         }
187         self.last = Some(t);
188     }
189 }
190 
191 impl<T: Keyword> SectionRules<T> {
192     /// Create a new SectionRules with no rules.
193     ///
194     /// By default, no Keyword is allowed by this SectionRules.
new() -> Self195     pub(crate) fn new() -> Self {
196         let n = T::n_vals();
197         let mut rules = Vec::with_capacity(n);
198         rules.resize(n, None);
199         SectionRules { rules }
200     }
201 
202     /// Add a rule to this SectionRules, based on a TokenFmtBuilder.
203     ///
204     /// Requires that no rule yet exists for the provided keyword.
add(&mut self, t: TokenFmtBuilder<T>)205     pub(crate) fn add(&mut self, t: TokenFmtBuilder<T>) {
206         let rule: TokenFmt<_> = t.into();
207         let idx = rule.kwd().idx();
208         assert!(self.rules[idx].is_none());
209         self.rules[idx] = Some(rule);
210     }
211 
212     /// Parse a stream of tokens into a Section object without (fully)
213     /// verifying them.
214     ///
215     /// Some errors are detected early, but others only show up later
216     /// when we validate more carefully.
parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()> where I: Iterator<Item = Result<Item<'a, T>>>,217     fn parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()>
218     where
219         I: Iterator<Item = Result<Item<'a, T>>>,
220     {
221         for item in tokens {
222             let item = item?;
223 
224             let tok = item.kwd();
225             let tok_idx = tok.idx();
226             if let Some(rule) = &self.rules[tok_idx] {
227                 // we want this token.
228                 assert!(rule.kwd() == tok);
229                 section.add_tok(tok, item);
230                 rule.check_multiplicity(section.v[tok_idx].as_slice())?;
231             } else {
232                 // We don't have a rule for this token.
233                 return Err(Error::UnexpectedToken(tok.to_str(), item.pos()));
234             }
235         }
236         Ok(())
237     }
238 
239     /// Check whether the tokens in a section we've parsed conform to
240     /// these rules.
validate<'a>(&self, s: &Section<'a, T>) -> Result<()>241     fn validate<'a>(&self, s: &Section<'a, T>) -> Result<()> {
242         // These vectors are both generated from T::n_vals().
243         assert_eq!(s.v.len(), self.rules.len());
244 
245         // Iterate over every item, and make sure it matches the
246         // corresponding rule.
247         for (rule, t) in self.rules.iter().zip(s.v.iter()) {
248             match rule {
249                 None => {
250                     // We aren't supposed to have any of these.
251                     if t.count() > 0 {
252                         unreachable!(
253                             "This item should have been rejected earlier, in parse_unverified()"
254                         );
255                     }
256                 }
257                 Some(rule) => {
258                     // We're allowed to have this. Is the number right?
259                     rule.check_multiplicity(t.as_slice())?;
260                     // The number is right. Check each individual item.
261                     for item in t.as_slice() {
262                         rule.check_item(item)?;
263                     }
264                 }
265             }
266         }
267 
268         Ok(())
269     }
270 
271     /// Check all the base64-encoded objects on a given keyword.
272     ///
273     /// We use this to validate objects on unrecognized items, since
274     /// otherwise nothing would check that they are well-formed.
validate_objects<'a>(&self, s: &Section<'a, T>, kwd: T) -> Result<()>275     fn validate_objects<'a>(&self, s: &Section<'a, T>, kwd: T) -> Result<()> {
276         for item in s.slice(kwd).iter() {
277             let _ = item.obj_raw()?;
278         }
279         Ok(())
280     }
281 
282     /// Parse a stream of tokens into a validated section.
parse<'a, I>(&self, tokens: &mut I) -> Result<Section<'a, T>> where I: Iterator<Item = Result<Item<'a, T>>>,283     pub(crate) fn parse<'a, I>(&self, tokens: &mut I) -> Result<Section<'a, T>>
284     where
285         I: Iterator<Item = Result<Item<'a, T>>>,
286     {
287         let mut section = Section::new();
288         self.parse_unverified(tokens, &mut section)?;
289         self.validate(&section)?;
290         self.validate_objects(&section, T::unrecognized())?;
291         self.validate_objects(&section, T::ann_unrecognized())?;
292         Ok(section)
293     }
294 }
295 
296 #[cfg(test)]
297 mod test {
298     #![allow(clippy::unwrap_used)]
299     use super::SectionRules;
300     use crate::parse::keyword::Keyword;
301     use crate::parse::macros::test::Fruit;
302     use crate::parse::tokenize::{Item, NetDocReader};
303     use crate::{Error, Result};
304     use once_cell::sync::Lazy;
305 
306     /// Rules for parsing a set of router annotations.
307     static FRUIT_SALAD: Lazy<SectionRules<Fruit>> = Lazy::new(|| {
308         use Fruit::*;
309         let mut rules = SectionRules::new();
310         rules.add(ANN_TASTY.rule().required().args(1..=1));
311         rules.add(ORANGE.rule().args(1..));
312         rules.add(STONEFRUIT.rule().may_repeat());
313         rules.add(GUAVA.rule().obj_optional());
314         rules.add(LEMON.rule().no_args().obj_required());
315         rules
316     });
317 
318     #[test]
parse_section() -> Result<()>319     fn parse_section() -> Result<()> {
320         use Fruit::*;
321         let s = "\
322 @tasty yes
323 orange soda
324 cherry cobbler
325 cherry pie
326 plum compote
327 guava fresh from 7 trees
328 -----BEGIN GUAVA MANIFESTO-----
329 VGhlIGd1YXZhIGVtb2ppIGlzIG5vdCBjdXJyZW50bHkgc3VwcG9ydGVkIGluI
330 HVuaWNvZGUgMTMuMC4gTGV0J3MgZmlnaHQgYWdhaW5zdCBhbnRpLWd1YXZhIG
331 JpYXMu
332 -----END GUAVA MANIFESTO-----
333 lemon
334 -----BEGIN LEMON-----
335 8J+Niw==
336 -----END LEMON-----
337 ";
338         let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
339         let sec = FRUIT_SALAD.parse(&mut r.iter()).unwrap();
340 
341         assert_eq!(sec.required(ANN_TASTY)?.arg(0), Some("yes"));
342 
343         assert!(sec.get(ORANGE).is_some());
344         assert_eq!(sec.get(ORANGE).unwrap().args_as_str(), "soda");
345 
346         let stonefruit_slice = sec.slice(STONEFRUIT);
347         assert_eq!(stonefruit_slice.len(), 3);
348         let kwds: Vec<&str> = stonefruit_slice.iter().map(Item::kwd_str).collect();
349         assert_eq!(kwds, &["cherry", "cherry", "plum"]);
350 
351         assert_eq!(sec.maybe(GUAVA).args_as_str(), Some("fresh from 7 trees"));
352         assert_eq!(sec.maybe(GUAVA).parse_arg::<u32>(2).unwrap(), Some(7));
353         assert!(sec.maybe(GUAVA).parse_arg::<u32>(1).is_err());
354 
355         assert_eq!(sec.get(GUAVA).unwrap().obj("GUAVA MANIFESTO").unwrap(),
356                    &b"The guava emoji is not currently supported in unicode 13.0. Let's fight against anti-guava bias."[..]);
357 
358         assert_eq!(
359             sec.get(ANN_TASTY).unwrap() as *const Item<'_, _>,
360             sec.first_item().unwrap() as *const Item<'_, _>
361         );
362 
363         assert_eq!(
364             sec.get(LEMON).unwrap() as *const Item<'_, _>,
365             sec.last_item().unwrap() as *const Item<'_, _>
366         );
367 
368         Ok(())
369     }
370 
371     #[test]
rejected()372     fn rejected() {
373         use crate::Pos;
374         fn check(s: &str, e: &Error) {
375             let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
376             let res = FRUIT_SALAD.parse(&mut r.iter());
377             assert!(res.is_err());
378             assert_eq!(&res.err().unwrap().within(s), e);
379         }
380 
381         // unrecognized tokens aren't allowed here
382         check(
383             "orange foo\nfoobar x\n@tasty yes\n",
384             &Error::UnexpectedToken("<unrecognized>", Pos::from_line(2, 1)),
385         );
386 
387         // Only one orange per customer.
388         check(
389             "@tasty yes\norange foo\norange bar\n",
390             &Error::DuplicateToken("orange", Pos::from_line(3, 1)),
391         );
392 
393         // There needs to be a declaration of tastiness.
394         check("orange foo\n", &Error::MissingToken("@tasty"));
395 
396         // You can't have an orange without an argument.
397         check(
398             "@tasty nope\norange\n",
399             &Error::TooFewArguments("orange", Pos::from_line(2, 1)),
400         );
401         // You can't have an more than one argument on "tasty".
402         check(
403             "@tasty yup indeed\norange normal\n",
404             &Error::TooManyArguments("@tasty", Pos::from_line(1, 1)),
405         );
406 
407         // Every lemon needs an object
408         check(
409             "@tasty yes\nlemon\norange no\n",
410             &Error::MissingObject("lemon", Pos::from_line(2, 1)),
411         );
412     }
413 }
414