1 //! Based on a set of rules, validate a token stream and collect the 2 //! tokens by type. 3 //! 4 //! See the "rules" module for definitions of keywords types and 5 //! per-keyword rules. 6 //! 7 //! The key types in this module are SectionRules, which explains how to 8 //! validate and partition a stream of Item, and Section, which contains 9 //! a validated set of Item, ready to be interpreted. 10 //! 11 //! # Example 12 //! 13 //! (This is an internal API, so see the routerdesc.rs source for an 14 //! example of use.) 15 16 use crate::parse::keyword::Keyword; 17 use crate::parse::rules::*; 18 use crate::parse::tokenize::*; 19 use crate::{Error, Result}; 20 21 /// Describe the rules for one section of a document. 22 /// 23 /// The rules are represented as a mapping from token index to 24 /// rules::TokenFmt. 25 #[derive(Clone)] 26 pub(crate) struct SectionRules<T: Keyword> { 27 /// A set of rules for decoding a series of tokens into a Section 28 /// object. Each element of this array corresponds to the 29 /// token with the corresponding index values. 30 /// 31 /// When an array element is None, the corresponding keyword is 32 /// not allowed in this kind section. Otherwise, the array 33 /// element is a TokenFmt describing how many of the corresponding 34 /// token may appear, and what they need to look like. 35 rules: Vec<Option<TokenFmt<T>>>, 36 } 37 38 /// The entry or entries for a particular keyword within a document. 39 #[derive(Clone)] 40 enum TokVal<'a, K: Keyword> { 41 /// No value has been found. 42 None, 43 /// A single value has been found; we're storing it in place. 44 /// 45 /// We use a one-element array here so that we can return a slice 46 /// of the array. 47 Some([Item<'a, K>; 1]), 48 /// Multiple values have been found; they go in a vector. 49 Multi(Vec<Item<'a, K>>), 50 } 51 impl<'a, K: Keyword> TokVal<'a, K> { 52 /// Return the number of Items for this value. count(&self) -> usize53 fn count(&self) -> usize { 54 match self { 55 TokVal::None => 0, 56 TokVal::Some(_) => 1, 57 TokVal::Multi(v) => v.len(), 58 } 59 } 60 /// Return the first Item for this value, or None if there wasn't one. first(&self) -> Option<&Item<'a, K>>61 fn first(&self) -> Option<&Item<'a, K>> { 62 match self { 63 TokVal::None => None, 64 TokVal::Some([t]) => Some(t), 65 TokVal::Multi(v) => Some(&v[0]), 66 } 67 } 68 /// Return the Item for this value, if there is exactly one. singleton(&self) -> Option<&Item<'a, K>>69 fn singleton(&self) -> Option<&Item<'a, K>> { 70 match self { 71 TokVal::None => None, 72 TokVal::Some([t]) => Some(t), 73 TokVal::Multi(_) => None, 74 } 75 } 76 /// Return all the Items for this value, as a slice. as_slice(&self) -> &[Item<'a, K>]77 fn as_slice(&self) -> &[Item<'a, K>] { 78 match self { 79 TokVal::None => &[], 80 TokVal::Some(t) => &t[..], 81 TokVal::Multi(v) => &v[..], 82 } 83 } 84 /// Return the last Item for this value, if any. last(&self) -> Option<&Item<'a, K>>85 fn last(&self) -> Option<&Item<'a, K>> { 86 match self { 87 TokVal::None => None, 88 TokVal::Some([t]) => Some(t), 89 TokVal::Multi(v) => Some(&v[v.len() - 1]), 90 } 91 } 92 } 93 94 /// A Section is the result of sorting a document's entries by keyword. 95 /// 96 /// TODO: I'd rather have this be pub(crate), but I haven't figured out 97 /// how to make that work. 98 pub struct Section<'a, T: Keyword> { 99 /// Map from Keyword index to TokVal 100 v: Vec<TokVal<'a, T>>, 101 /// The keyword that appeared first in this section. This will 102 /// be set if `v` is nonempty. 103 first: Option<T>, 104 /// The keyword that appeared last in this section. This will 105 /// be set if `v` is nonempty. 106 last: Option<T>, 107 } 108 109 impl<'a, T: Keyword> Section<'a, T> { 110 /// Make a new empty Section. new() -> Self111 fn new() -> Self { 112 let n = T::n_vals(); 113 let mut v = Vec::with_capacity(n); 114 v.resize(n, TokVal::None); 115 Section { 116 v, 117 first: None, 118 last: None, 119 } 120 } 121 /// Helper: return the tokval for some Keyword. tokval(&self, t: T) -> &TokVal<'a, T>122 fn tokval(&self, t: T) -> &TokVal<'a, T> { 123 let idx = t.idx(); 124 &self.v[idx] 125 } 126 /// Return all the Items for some Keyword, as a slice. slice(&self, t: T) -> &[Item<'a, T>]127 pub(crate) fn slice(&self, t: T) -> &[Item<'a, T>] { 128 self.tokval(t).as_slice() 129 } 130 /// Return a single Item for some Keyword, if there is exactly one. get(&self, t: T) -> Option<&Item<'a, T>>131 pub(crate) fn get(&self, t: T) -> Option<&Item<'a, T>> { 132 self.tokval(t).singleton() 133 } 134 /// Return a single Item for some Keyword, giving an error if there 135 /// is not exactly one. 136 /// 137 /// It is usually a mistake to use this function on a Keyword that is 138 /// not required. required(&self, t: T) -> Result<&Item<'a, T>>139 pub(crate) fn required(&self, t: T) -> Result<&Item<'a, T>> { 140 self.get(t).ok_or_else(|| Error::MissingToken(t.to_str())) 141 } 142 /// Return a proxy MaybeItem object for some keyword. 143 // 144 /// A MaybeItem is used to represent an object that might or might 145 /// not be there. maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a, T>146 pub(crate) fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a, T> { 147 MaybeItem::from_option(self.get(t)) 148 } 149 /// Return the first item that was accepted for this section, or None 150 /// if no items were accepted for this section. first_item(&self) -> Option<&Item<'a, T>>151 pub(crate) fn first_item(&self) -> Option<&Item<'a, T>> { 152 match self.first { 153 None => None, 154 Some(t) => self.tokval(t).first(), 155 } 156 } 157 /// Return the last item that was accepted for this section, or None 158 /// if no items were accepted for this section. last_item(&self) -> Option<&Item<'a, T>>159 pub(crate) fn last_item(&self) -> Option<&Item<'a, T>> { 160 match self.last { 161 None => None, 162 Some(t) => self.tokval(t).last(), 163 } 164 } 165 /// Insert an `item`. 166 /// 167 /// The `item` must have parsed Keyword `t`. add_tok(&mut self, t: T, item: Item<'a, T>)168 fn add_tok(&mut self, t: T, item: Item<'a, T>) { 169 let idx = Keyword::idx(t); 170 if idx >= self.v.len() { 171 self.v.resize(idx + 1, TokVal::None); 172 } 173 let m = &mut self.v[idx]; 174 175 match m { 176 TokVal::None => *m = TokVal::Some([item]), 177 TokVal::Some([x]) => { 178 *m = TokVal::Multi(vec![x.clone(), item]); 179 } 180 TokVal::Multi(ref mut v) => { 181 v.push(item); 182 } 183 }; 184 if self.first.is_none() { 185 self.first = Some(t); 186 } 187 self.last = Some(t); 188 } 189 } 190 191 impl<T: Keyword> SectionRules<T> { 192 /// Create a new SectionRules with no rules. 193 /// 194 /// By default, no Keyword is allowed by this SectionRules. new() -> Self195 pub(crate) fn new() -> Self { 196 let n = T::n_vals(); 197 let mut rules = Vec::with_capacity(n); 198 rules.resize(n, None); 199 SectionRules { rules } 200 } 201 202 /// Add a rule to this SectionRules, based on a TokenFmtBuilder. 203 /// 204 /// Requires that no rule yet exists for the provided keyword. add(&mut self, t: TokenFmtBuilder<T>)205 pub(crate) fn add(&mut self, t: TokenFmtBuilder<T>) { 206 let rule: TokenFmt<_> = t.into(); 207 let idx = rule.kwd().idx(); 208 assert!(self.rules[idx].is_none()); 209 self.rules[idx] = Some(rule); 210 } 211 212 /// Parse a stream of tokens into a Section object without (fully) 213 /// verifying them. 214 /// 215 /// Some errors are detected early, but others only show up later 216 /// when we validate more carefully. parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()> where I: Iterator<Item = Result<Item<'a, T>>>,217 fn parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()> 218 where 219 I: Iterator<Item = Result<Item<'a, T>>>, 220 { 221 for item in tokens { 222 let item = item?; 223 224 let tok = item.kwd(); 225 let tok_idx = tok.idx(); 226 if let Some(rule) = &self.rules[tok_idx] { 227 // we want this token. 228 assert!(rule.kwd() == tok); 229 section.add_tok(tok, item); 230 rule.check_multiplicity(section.v[tok_idx].as_slice())?; 231 } else { 232 // We don't have a rule for this token. 233 return Err(Error::UnexpectedToken(tok.to_str(), item.pos())); 234 } 235 } 236 Ok(()) 237 } 238 239 /// Check whether the tokens in a section we've parsed conform to 240 /// these rules. validate<'a>(&self, s: &Section<'a, T>) -> Result<()>241 fn validate<'a>(&self, s: &Section<'a, T>) -> Result<()> { 242 // These vectors are both generated from T::n_vals(). 243 assert_eq!(s.v.len(), self.rules.len()); 244 245 // Iterate over every item, and make sure it matches the 246 // corresponding rule. 247 for (rule, t) in self.rules.iter().zip(s.v.iter()) { 248 match rule { 249 None => { 250 // We aren't supposed to have any of these. 251 if t.count() > 0 { 252 unreachable!( 253 "This item should have been rejected earlier, in parse_unverified()" 254 ); 255 } 256 } 257 Some(rule) => { 258 // We're allowed to have this. Is the number right? 259 rule.check_multiplicity(t.as_slice())?; 260 // The number is right. Check each individual item. 261 for item in t.as_slice() { 262 rule.check_item(item)?; 263 } 264 } 265 } 266 } 267 268 Ok(()) 269 } 270 271 /// Check all the base64-encoded objects on a given keyword. 272 /// 273 /// We use this to validate objects on unrecognized items, since 274 /// otherwise nothing would check that they are well-formed. validate_objects<'a>(&self, s: &Section<'a, T>, kwd: T) -> Result<()>275 fn validate_objects<'a>(&self, s: &Section<'a, T>, kwd: T) -> Result<()> { 276 for item in s.slice(kwd).iter() { 277 let _ = item.obj_raw()?; 278 } 279 Ok(()) 280 } 281 282 /// Parse a stream of tokens into a validated section. parse<'a, I>(&self, tokens: &mut I) -> Result<Section<'a, T>> where I: Iterator<Item = Result<Item<'a, T>>>,283 pub(crate) fn parse<'a, I>(&self, tokens: &mut I) -> Result<Section<'a, T>> 284 where 285 I: Iterator<Item = Result<Item<'a, T>>>, 286 { 287 let mut section = Section::new(); 288 self.parse_unverified(tokens, &mut section)?; 289 self.validate(§ion)?; 290 self.validate_objects(§ion, T::unrecognized())?; 291 self.validate_objects(§ion, T::ann_unrecognized())?; 292 Ok(section) 293 } 294 } 295 296 #[cfg(test)] 297 mod test { 298 #![allow(clippy::unwrap_used)] 299 use super::SectionRules; 300 use crate::parse::keyword::Keyword; 301 use crate::parse::macros::test::Fruit; 302 use crate::parse::tokenize::{Item, NetDocReader}; 303 use crate::{Error, Result}; 304 use once_cell::sync::Lazy; 305 306 /// Rules for parsing a set of router annotations. 307 static FRUIT_SALAD: Lazy<SectionRules<Fruit>> = Lazy::new(|| { 308 use Fruit::*; 309 let mut rules = SectionRules::new(); 310 rules.add(ANN_TASTY.rule().required().args(1..=1)); 311 rules.add(ORANGE.rule().args(1..)); 312 rules.add(STONEFRUIT.rule().may_repeat()); 313 rules.add(GUAVA.rule().obj_optional()); 314 rules.add(LEMON.rule().no_args().obj_required()); 315 rules 316 }); 317 318 #[test] parse_section() -> Result<()>319 fn parse_section() -> Result<()> { 320 use Fruit::*; 321 let s = "\ 322 @tasty yes 323 orange soda 324 cherry cobbler 325 cherry pie 326 plum compote 327 guava fresh from 7 trees 328 -----BEGIN GUAVA MANIFESTO----- 329 VGhlIGd1YXZhIGVtb2ppIGlzIG5vdCBjdXJyZW50bHkgc3VwcG9ydGVkIGluI 330 HVuaWNvZGUgMTMuMC4gTGV0J3MgZmlnaHQgYWdhaW5zdCBhbnRpLWd1YXZhIG 331 JpYXMu 332 -----END GUAVA MANIFESTO----- 333 lemon 334 -----BEGIN LEMON----- 335 8J+Niw== 336 -----END LEMON----- 337 "; 338 let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s); 339 let sec = FRUIT_SALAD.parse(&mut r.iter()).unwrap(); 340 341 assert_eq!(sec.required(ANN_TASTY)?.arg(0), Some("yes")); 342 343 assert!(sec.get(ORANGE).is_some()); 344 assert_eq!(sec.get(ORANGE).unwrap().args_as_str(), "soda"); 345 346 let stonefruit_slice = sec.slice(STONEFRUIT); 347 assert_eq!(stonefruit_slice.len(), 3); 348 let kwds: Vec<&str> = stonefruit_slice.iter().map(Item::kwd_str).collect(); 349 assert_eq!(kwds, &["cherry", "cherry", "plum"]); 350 351 assert_eq!(sec.maybe(GUAVA).args_as_str(), Some("fresh from 7 trees")); 352 assert_eq!(sec.maybe(GUAVA).parse_arg::<u32>(2).unwrap(), Some(7)); 353 assert!(sec.maybe(GUAVA).parse_arg::<u32>(1).is_err()); 354 355 assert_eq!(sec.get(GUAVA).unwrap().obj("GUAVA MANIFESTO").unwrap(), 356 &b"The guava emoji is not currently supported in unicode 13.0. Let's fight against anti-guava bias."[..]); 357 358 assert_eq!( 359 sec.get(ANN_TASTY).unwrap() as *const Item<'_, _>, 360 sec.first_item().unwrap() as *const Item<'_, _> 361 ); 362 363 assert_eq!( 364 sec.get(LEMON).unwrap() as *const Item<'_, _>, 365 sec.last_item().unwrap() as *const Item<'_, _> 366 ); 367 368 Ok(()) 369 } 370 371 #[test] rejected()372 fn rejected() { 373 use crate::Pos; 374 fn check(s: &str, e: &Error) { 375 let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s); 376 let res = FRUIT_SALAD.parse(&mut r.iter()); 377 assert!(res.is_err()); 378 assert_eq!(&res.err().unwrap().within(s), e); 379 } 380 381 // unrecognized tokens aren't allowed here 382 check( 383 "orange foo\nfoobar x\n@tasty yes\n", 384 &Error::UnexpectedToken("<unrecognized>", Pos::from_line(2, 1)), 385 ); 386 387 // Only one orange per customer. 388 check( 389 "@tasty yes\norange foo\norange bar\n", 390 &Error::DuplicateToken("orange", Pos::from_line(3, 1)), 391 ); 392 393 // There needs to be a declaration of tastiness. 394 check("orange foo\n", &Error::MissingToken("@tasty")); 395 396 // You can't have an orange without an argument. 397 check( 398 "@tasty nope\norange\n", 399 &Error::TooFewArguments("orange", Pos::from_line(2, 1)), 400 ); 401 // You can't have an more than one argument on "tasty". 402 check( 403 "@tasty yup indeed\norange normal\n", 404 &Error::TooManyArguments("@tasty", Pos::from_line(1, 1)), 405 ); 406 407 // Every lemon needs an object 408 check( 409 "@tasty yes\nlemon\norange no\n", 410 &Error::MissingObject("lemon", Pos::from_line(2, 1)), 411 ); 412 } 413 } 414