1 // compose-filter.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley@google.com (Michael Riley) 17 // 18 // \file 19 // Classes for filtering the composition matches, e.g. for correct epsilon 20 // handling. 21 22 #ifndef FST_LIB_COMPOSE_FILTER_H__ 23 #define FST_LIB_COMPOSE_FILTER_H__ 24 25 #include <fst/fst.h> 26 #include <fst/fst-decl.h> // For optional argument declarations 27 #include <fst/filter-state.h> 28 #include <fst/matcher.h> 29 30 31 namespace fst { 32 33 34 // COMPOSITION FILTERS - these determine which matches are allowed to 35 // proceed. The filter's state is represented by the type 36 // ComposeFilter::FilterState. The basic filters handle correct 37 // epsilon matching. Their interface is: 38 // 39 // template <class M1, class M2> 40 // class ComposeFilter { 41 // public: 42 // typedef typename M1::FST1 FST1; 43 // typedef typename M1::FST2 FST2; 44 // typedef typename FST1::Arc Arc; 45 // typedef ... FilterState; 46 // typedef ... Matcher1; 47 // typedef ... Matcher2; 48 // 49 // // Required constructors. 50 // ComposeFilter(const FST1 &fst1, const FST2 &fst2, 51 // // M1 *matcher1 = 0, M2 *matcher2 = 0); 52 // // If safe=true, the copy is thread-safe. See Fst<>::Copy() 53 // // for further doc. 54 // ComposeFilter(const ComposeFilter<M1, M2> &filter, 55 // // bool safe = false); 56 // // Return start state of filter. 57 // FilterState Start() const; 58 // // Specifies current composition state. 59 // void SetState(StateId s1, StateId s2, const FilterState &f); 60 // 61 // // Apply filter at current composition state to these transitions. 62 // // If an arc label to be matched is kNolabel, then that side 63 // // does not consume a symbol. Returns the new filter state or, 64 // // if disallowed, FilterState::NoState(). The filter is permitted to 65 // // modify its inputs, e.g. for optimizations. 66 // FilterState FilterArc(Arc *arc1, Arc *arc2) const; 67 68 // // Apply filter at current composition state to these final weights 69 // // (cf. superfinal transitions). The filter may modify its inputs, 70 // // e.g. for optimizations. 71 // void FilterFinal(Weight *final1, Weight *final2) const; 72 // 73 // // Return resp matchers. Ownership stays with filter. These 74 // // methods allow the filter to access and possibly modify 75 // // the composition matchers (useful e.g. with lookahead). 76 // Matcher1 *GetMatcher1(); 77 // Matcher2 *GetMatcher2(); 78 // 79 // // This specifies how the filter affects the composition result 80 // // properties. It takes as argument the properties that would 81 // // apply with a trivial composition fitler. 82 // uint64 Properties(uint64 props) const; 83 // }; 84 85 86 // This filter allows only exact matching of symbols from FST1 with on 87 // FST2; e.g. no special interpretation of epsilons. (Template arg 88 // default declared in fst-decl.h.) 89 template <class M1, class M2 /* = M1 */> 90 class NullComposeFilter { 91 public: 92 typedef typename M1::FST FST1; 93 typedef typename M2::FST FST2; 94 typedef typename FST1::Arc Arc; 95 typedef CharFilterState FilterState; 96 typedef M1 Matcher1; 97 typedef M2 Matcher2; 98 99 typedef typename Arc::StateId StateId; 100 typedef typename Arc::Label Label; 101 typedef typename Arc::Weight Weight; 102 103 NullComposeFilter(const FST1 &fst1, const FST2 &fst2, 104 M1 *matcher1 = 0, M2 *matcher2 = 0) 105 : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), 106 matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), 107 fst1_(matcher1_->GetFst()), 108 fst2_(matcher2_->GetFst()) {} 109 110 NullComposeFilter(const NullComposeFilter<M1, M2> &filter, 111 bool safe = false) 112 : matcher1_(filter.matcher1_->Copy(safe)), 113 matcher2_(filter.matcher2_->Copy(safe)), 114 fst1_(matcher1_->GetFst()), 115 fst2_(matcher2_->GetFst()) {} 116 ~NullComposeFilter()117 ~NullComposeFilter() { 118 delete matcher1_; 119 delete matcher2_; 120 } 121 Start()122 FilterState Start() const { return FilterState(0); } 123 SetState(StateId,StateId,const FilterState &)124 void SetState(StateId, StateId, const FilterState &) { } 125 FilterArc(Arc * arc1,Arc * arc2)126 FilterState FilterArc(Arc *arc1, Arc *arc2) const { 127 return (arc1->olabel == kNoLabel || arc2->ilabel == kNoLabel) ? 128 FilterState::NoState() : FilterState(0); 129 } 130 FilterFinal(Weight *,Weight *)131 void FilterFinal(Weight *, Weight *) const {} 132 133 // Return resp matchers. Ownership stays with filter. GetMatcher1()134 Matcher1 *GetMatcher1() { return matcher1_; } GetMatcher2()135 Matcher2 *GetMatcher2() { return matcher2_; } 136 Properties(uint64 props)137 uint64 Properties(uint64 props) const { return props; } 138 139 private: 140 Matcher1 *matcher1_; 141 Matcher2 *matcher2_; 142 const FST1 &fst1_; 143 const FST2 &fst2_; 144 145 void operator=(const NullComposeFilter<M1, M2> &); // disallow 146 }; 147 148 149 // This filter requires epsilons on FST1 to be read before epsilons on FST2. 150 // (Template arg default declared in fst-decl.h.) 151 template <class M1, class M2 /* = M1 */> 152 class SequenceComposeFilter { 153 public: 154 typedef typename M1::FST FST1; 155 typedef typename M2::FST FST2; 156 typedef typename FST1::Arc Arc; 157 typedef CharFilterState FilterState; 158 typedef M1 Matcher1; 159 typedef M2 Matcher2; 160 161 typedef typename Arc::StateId StateId; 162 typedef typename Arc::Label Label; 163 typedef typename Arc::Weight Weight; 164 165 SequenceComposeFilter(const FST1 &fst1, const FST2 &fst2, 166 M1 *matcher1 = 0, M2 *matcher2 = 0) 167 : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), 168 matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), 169 fst1_(matcher1_->GetFst()), 170 s1_(kNoStateId), 171 s2_(kNoStateId), 172 f_(kNoStateId) {} 173 174 SequenceComposeFilter(const SequenceComposeFilter<M1, M2> &filter, 175 bool safe = false) 176 : matcher1_(filter.matcher1_->Copy(safe)), 177 matcher2_(filter.matcher2_->Copy(safe)), 178 fst1_(matcher1_->GetFst()), 179 s1_(kNoStateId), 180 s2_(kNoStateId), 181 f_(kNoStateId) {} 182 ~SequenceComposeFilter()183 ~SequenceComposeFilter() { 184 delete matcher1_; 185 delete matcher2_; 186 } 187 Start()188 FilterState Start() const { return FilterState(0); } 189 SetState(StateId s1,StateId s2,const FilterState & f)190 void SetState(StateId s1, StateId s2, const FilterState &f) { 191 if (s1_ == s1 && s2_ == s2 && f == f_) 192 return; 193 s1_ = s1; 194 s2_ = s2; 195 f_ = f; 196 size_t na1 = internal::NumArcs(fst1_, s1); 197 size_t ne1 = internal::NumOutputEpsilons(fst1_, s1); 198 bool fin1 = internal::Final(fst1_, s1) != Weight::Zero(); 199 alleps1_ = na1 == ne1 && !fin1; 200 noeps1_ = ne1 == 0; 201 } 202 FilterArc(Arc * arc1,Arc * arc2)203 FilterState FilterArc(Arc *arc1, Arc *arc2) const { 204 if (arc1->olabel == kNoLabel) 205 return alleps1_ ? FilterState::NoState() : 206 noeps1_ ? FilterState(0) : FilterState(1); 207 else if (arc2->ilabel == kNoLabel) 208 return f_ != FilterState(0) ? FilterState::NoState() : FilterState(0); 209 else 210 return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0); 211 } 212 FilterFinal(Weight *,Weight *)213 void FilterFinal(Weight *, Weight *) const {} 214 215 // Return resp matchers. Ownership stays with filter. GetMatcher1()216 Matcher1 *GetMatcher1() { return matcher1_; } GetMatcher2()217 Matcher2 *GetMatcher2() { return matcher2_; } 218 Properties(uint64 props)219 uint64 Properties(uint64 props) const { return props; } 220 221 private: 222 Matcher1 *matcher1_; 223 Matcher2 *matcher2_; 224 const FST1 &fst1_; 225 StateId s1_; // Current fst1_ state; 226 StateId s2_; // Current fst2_ state; 227 FilterState f_; // Current filter state 228 bool alleps1_; // Only epsilons (and non-final) leaving s1_? 229 bool noeps1_; // No epsilons leaving s1_? 230 231 void operator=(const SequenceComposeFilter<M1, M2> &); // disallow 232 }; 233 234 235 // This filter requires epsilons on FST2 to be read before epsilons on FST1. 236 // (Template arg default declared in fst-decl.h.) 237 template <class M1, class M2 /* = M1 */> 238 class AltSequenceComposeFilter { 239 public: 240 typedef typename M1::FST FST1; 241 typedef typename M2::FST FST2; 242 typedef typename FST1::Arc Arc; 243 typedef CharFilterState FilterState; 244 typedef M1 Matcher1; 245 typedef M2 Matcher2; 246 247 typedef typename Arc::StateId StateId; 248 typedef typename Arc::Label Label; 249 typedef typename Arc::Weight Weight; 250 251 AltSequenceComposeFilter(const FST1 &fst1, const FST2 &fst2, 252 M1 *matcher1 = 0, M2 *matcher2 = 0) 253 : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), 254 matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), 255 fst2_(matcher2_->GetFst()), 256 s1_(kNoStateId), 257 s2_(kNoStateId), 258 f_(kNoStateId) {} 259 260 AltSequenceComposeFilter(const AltSequenceComposeFilter<M1, M2> &filter, 261 bool safe = false) 262 : matcher1_(filter.matcher1_->Copy(safe)), 263 matcher2_(filter.matcher2_->Copy(safe)), 264 fst2_(matcher2_->GetFst()), 265 s1_(kNoStateId), 266 s2_(kNoStateId), 267 f_(kNoStateId) {} 268 ~AltSequenceComposeFilter()269 ~AltSequenceComposeFilter() { 270 delete matcher1_; 271 delete matcher2_; 272 } 273 Start()274 FilterState Start() const { return FilterState(0); } 275 SetState(StateId s1,StateId s2,const FilterState & f)276 void SetState(StateId s1, StateId s2, const FilterState &f) { 277 if (s1_ == s1 && s2_ == s2 && f == f_) 278 return; 279 s1_ = s1; 280 s2_ = s2; 281 f_ = f; 282 size_t na2 = internal::NumArcs(fst2_, s2); 283 size_t ne2 = internal::NumInputEpsilons(fst2_, s2); 284 bool fin2 = internal::Final(fst2_, s2) != Weight::Zero(); 285 alleps2_ = na2 == ne2 && !fin2; 286 noeps2_ = ne2 == 0; 287 } 288 FilterArc(Arc * arc1,Arc * arc2)289 FilterState FilterArc(Arc *arc1, Arc *arc2) const { 290 if (arc2->ilabel == kNoLabel) 291 return alleps2_ ? FilterState::NoState() : 292 noeps2_ ? FilterState(0) : FilterState(1); 293 else if (arc1->olabel == kNoLabel) 294 return f_ == FilterState(1) ? FilterState::NoState() : FilterState(0); 295 else 296 return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0); 297 } 298 FilterFinal(Weight *,Weight *)299 void FilterFinal(Weight *, Weight *) const {} 300 301 // Return resp matchers. Ownership stays with filter. GetMatcher1()302 Matcher1 *GetMatcher1() { return matcher1_; } GetMatcher2()303 Matcher2 *GetMatcher2() { return matcher2_; } 304 Properties(uint64 props)305 uint64 Properties(uint64 props) const { return props; } 306 307 private: 308 Matcher1 *matcher1_; 309 Matcher2 *matcher2_; 310 const FST2 &fst2_; 311 StateId s1_; // Current fst1_ state; 312 StateId s2_; // Current fst2_ state; 313 FilterState f_; // Current filter state 314 bool alleps2_; // Only epsilons (and non-final) leaving s2_? 315 bool noeps2_; // No epsilons leaving s2_? 316 317 void operator=(const AltSequenceComposeFilter<M1, M2> &); // disallow 318 }; 319 320 321 // This filter requires epsilons on FST1 to be matched with epsilons on FST2 322 // whenever possible. (Template arg default declared in fst-decl.h.) 323 template <class M1, class M2 /* = M1 */> 324 class MatchComposeFilter { 325 public: 326 typedef typename M1::FST FST1; 327 typedef typename M2::FST FST2; 328 typedef typename FST1::Arc Arc; 329 typedef CharFilterState FilterState; 330 typedef M1 Matcher1; 331 typedef M2 Matcher2; 332 333 typedef typename Arc::StateId StateId; 334 typedef typename Arc::Label Label; 335 typedef typename Arc::Weight Weight; 336 337 MatchComposeFilter(const FST1 &fst1, const FST2 &fst2, 338 M1 *matcher1 = 0, M2 *matcher2 = 0) 339 : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), 340 matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), 341 fst1_(matcher1_->GetFst()), 342 fst2_(matcher2_->GetFst()), 343 s1_(kNoStateId), 344 s2_(kNoStateId), 345 f_(kNoStateId) {} 346 347 MatchComposeFilter(const MatchComposeFilter<M1, M2> &filter, 348 bool safe = false) 349 : matcher1_(filter.matcher1_->Copy(safe)), 350 matcher2_(filter.matcher2_->Copy(safe)), 351 fst1_(matcher1_->GetFst()), 352 fst2_(matcher2_->GetFst()), 353 s1_(kNoStateId), 354 s2_(kNoStateId), 355 f_(kNoStateId) {} 356 ~MatchComposeFilter()357 ~MatchComposeFilter() { 358 delete matcher1_; 359 delete matcher2_; 360 } 361 Start()362 FilterState Start() const { return FilterState(0); } 363 SetState(StateId s1,StateId s2,const FilterState & f)364 void SetState(StateId s1, StateId s2, const FilterState &f) { 365 if (s1_ == s1 && s2_ == s2 && f == f_) 366 return; 367 s1_ = s1; 368 s2_ = s2; 369 f_ = f; 370 size_t na1 = internal::NumArcs(fst1_, s1); 371 size_t ne1 = internal::NumOutputEpsilons(fst1_, s1); 372 bool f1 = internal::Final(fst1_, s1) != Weight::Zero(); 373 alleps1_ = na1 == ne1 && !f1; 374 noeps1_ = ne1 == 0; 375 size_t na2 = internal::NumArcs(fst2_, s2); 376 size_t ne2 = internal::NumInputEpsilons(fst2_, s2); 377 bool f2 = internal::Final(fst2_, s2) != Weight::Zero(); 378 alleps2_ = na2 == ne2 && !f2; 379 noeps2_ = ne2 == 0; 380 } 381 FilterArc(Arc * arc1,Arc * arc2)382 FilterState FilterArc(Arc *arc1, Arc *arc2) const { 383 if (arc2->ilabel == kNoLabel) // Epsilon on Fst1 384 return f_ == FilterState(0) ? 385 (noeps2_ ? FilterState(0) : 386 (alleps2_ ? FilterState::NoState(): FilterState(1))) : 387 (f_ == FilterState(1) ? FilterState(1) : FilterState::NoState()); 388 else if (arc1->olabel == kNoLabel) // Epsilon on Fst2 389 return f_ == FilterState(0) ? 390 (noeps1_ ? FilterState(0) : 391 (alleps1_ ? FilterState::NoState() : FilterState(2))) : 392 (f_ == FilterState(2) ? FilterState(2) : FilterState::NoState()); 393 else if (arc1->olabel == 0) // Epsilon on both 394 return f_ == FilterState(0) ? FilterState(0) : FilterState::NoState(); 395 else // Both are non-epsilons 396 return FilterState(0); 397 } 398 FilterFinal(Weight *,Weight *)399 void FilterFinal(Weight *, Weight *) const {} 400 401 // Return resp matchers. Ownership stays with filter. GetMatcher1()402 Matcher1 *GetMatcher1() { return matcher1_; } GetMatcher2()403 Matcher2 *GetMatcher2() { return matcher2_; } 404 Properties(uint64 props)405 uint64 Properties(uint64 props) const { return props; } 406 407 private: 408 Matcher1 *matcher1_; 409 Matcher2 *matcher2_; 410 const FST1 &fst1_; 411 const FST2 &fst2_; 412 StateId s1_; // Current fst1_ state; 413 StateId s2_; // Current fst2_ state; 414 FilterState f_; // Current filter state ID 415 bool alleps1_, alleps2_; // Only epsilons (and non-final) leaving s1, s2? 416 bool noeps1_, noeps2_; // No epsilons leaving s1, s2? 417 418 void operator=(const MatchComposeFilter<M1, M2> &); // disallow 419 }; 420 421 422 // This filter works with the MultiEpsMatcher to determine if 423 // 'multi-epsilons' are preserved in the composition output 424 // (rather than rewritten as 0) and ensures correct properties. 425 template <class F> 426 class MultiEpsFilter { 427 public: 428 typedef typename F::FST1 FST1; 429 typedef typename F::FST2 FST2; 430 typedef typename F::Arc Arc; 431 typedef typename F::Matcher1 Matcher1; 432 typedef typename F::Matcher2 Matcher2; 433 typedef typename F::FilterState FilterState; 434 typedef MultiEpsFilter<F> Filter; 435 436 typedef typename Arc::StateId StateId; 437 typedef typename Arc::Label Label; 438 typedef typename Arc::Weight Weight; 439 440 MultiEpsFilter(const FST1 &fst1, const FST2 &fst2, 441 Matcher1 *matcher1 = 0, Matcher2 *matcher2 = 0, 442 bool keep_multi_eps = false) filter_(fst1,fst2,matcher1,matcher2)443 : filter_(fst1, fst2, matcher1, matcher2), 444 keep_multi_eps_(keep_multi_eps) {} 445 446 MultiEpsFilter(const Filter &filter, bool safe = false) 447 : filter_(filter.filter_, safe), 448 keep_multi_eps_(filter.keep_multi_eps_) {} 449 Start()450 FilterState Start() const { return filter_.Start(); } 451 SetState(StateId s1,StateId s2,const FilterState & f)452 void SetState(StateId s1, StateId s2, const FilterState &f) { 453 return filter_.SetState(s1, s2, f); 454 } 455 FilterArc(Arc * arc1,Arc * arc2)456 FilterState FilterArc(Arc *arc1, Arc *arc2) const { 457 FilterState f = filter_.FilterArc(arc1, arc2); 458 if (keep_multi_eps_) { 459 if (arc1->olabel == kNoLabel) 460 arc1->ilabel = arc2->ilabel; 461 if (arc2->ilabel == kNoLabel) 462 arc2->olabel = arc1->olabel; 463 } 464 return f; 465 } 466 FilterFinal(Weight * w1,Weight * w2)467 void FilterFinal(Weight *w1, Weight *w2) const { 468 return filter_.FilterFinal(w1, w2); 469 } 470 471 // Return resp matchers. Ownership stays with filter. GetMatcher1()472 Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); } GetMatcher2()473 Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); } 474 Properties(uint64 iprops)475 uint64 Properties(uint64 iprops) const { 476 uint64 oprops = filter_.Properties(iprops); 477 return oprops & kILabelInvariantProperties & kOLabelInvariantProperties; 478 } 479 480 private: 481 F filter_; 482 bool keep_multi_eps_; 483 }; 484 485 } // namespace fst 486 487 488 #endif // FST_LIB_COMPOSE_FILTER_H__ 489