1 // compose-filter.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Classes for filtering the composition matches, e.g. for correct epsilon
20 // handling.
21 
22 #ifndef FST_LIB_COMPOSE_FILTER_H__
23 #define FST_LIB_COMPOSE_FILTER_H__
24 
25 #include <fst/fst.h>
26 #include <fst/fst-decl.h>  // For optional argument declarations
27 #include <fst/filter-state.h>
28 #include <fst/matcher.h>
29 
30 
31 namespace fst {
32 
33 
34 // COMPOSITION FILTERS - these determine which matches are allowed to
35 // proceed. The filter's state is represented by the type
36 // ComposeFilter::FilterState. The basic filters handle correct
37 // epsilon matching.  Their interface is:
38 //
39 // template <class M1, class M2>
40 // class ComposeFilter {
41 //  public:
42 //   typedef typename M1::FST1 FST1;
43 //   typedef typename M1::FST2 FST2;
44 //   typedef typename FST1::Arc Arc;
45 //   typedef ... FilterState;
46 //   typedef ... Matcher1;
47 //   typedef ... Matcher2;
48 //
49 //   // Required constructors.
50 //   ComposeFilter(const FST1 &fst1, const FST2 &fst2,
51 //   //            M1 *matcher1 = 0, M2 *matcher2 = 0);
52 //   // If safe=true, the copy is thread-safe. See Fst<>::Copy()
53 //   // for further doc.
54 //   ComposeFilter(const ComposeFilter<M1, M2> &filter,
55 //   //            bool safe = false);
56 //   // Return start state of filter.
57 //   FilterState Start() const;
58 //   // Specifies current composition state.
59 //   void SetState(StateId s1, StateId s2, const FilterState &f);
60 //
61 //   // Apply filter at current composition state to these transitions.
62 //   // If an arc label to be matched is kNolabel, then that side
63 //   // does not consume a symbol. Returns the new filter state or,
64 //   // if disallowed, FilterState::NoState(). The filter is permitted to
65 //   // modify its inputs, e.g. for optimizations.
66 //   FilterState FilterArc(Arc *arc1, Arc *arc2) const;
67 
68 //   // Apply filter at current composition state to these final weights
69 //   // (cf. superfinal transitions). The filter may modify its inputs,
70 //   // e.g. for optimizations.
71 //   void FilterFinal(Weight *final1, Weight *final2) const;
72 //
73 //   // Return resp matchers. Ownership stays with filter. These
74 //   // methods allow the filter to access and possibly modify
75 //   // the composition matchers (useful e.g. with lookahead).
76 //   Matcher1 *GetMatcher1();
77 //   Matcher2 *GetMatcher2();
78 //
79 //   // This specifies how the filter affects the composition result
80 //   // properties. It takes as argument the properties that would
81 //   // apply with a trivial composition fitler.
82 //   uint64 Properties(uint64 props) const;
83 // };
84 
85 
86 // This filter allows only exact matching of symbols from FST1 with on
87 // FST2; e.g. no special interpretation of epsilons.  (Template arg
88 // default declared in fst-decl.h.)
89 template <class M1, class M2 /* = M1 */>
90 class NullComposeFilter {
91  public:
92   typedef typename M1::FST FST1;
93   typedef typename M2::FST FST2;
94   typedef typename FST1::Arc Arc;
95   typedef CharFilterState FilterState;
96   typedef M1 Matcher1;
97   typedef M2 Matcher2;
98 
99   typedef typename Arc::StateId StateId;
100   typedef typename Arc::Label Label;
101   typedef typename Arc::Weight Weight;
102 
103   NullComposeFilter(const FST1 &fst1, const FST2 &fst2,
104                          M1 *matcher1 = 0, M2 *matcher2 = 0)
105       : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
106         matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
107         fst1_(matcher1_->GetFst()),
108         fst2_(matcher2_->GetFst()) {}
109 
110   NullComposeFilter(const NullComposeFilter<M1, M2> &filter,
111                          bool safe = false)
112       : matcher1_(filter.matcher1_->Copy(safe)),
113         matcher2_(filter.matcher2_->Copy(safe)),
114         fst1_(matcher1_->GetFst()),
115         fst2_(matcher2_->GetFst()) {}
116 
~NullComposeFilter()117   ~NullComposeFilter() {
118     delete matcher1_;
119     delete matcher2_;
120   }
121 
Start()122   FilterState Start() const { return FilterState(0); }
123 
SetState(StateId,StateId,const FilterState &)124   void SetState(StateId, StateId, const FilterState &) { }
125 
FilterArc(Arc * arc1,Arc * arc2)126   FilterState FilterArc(Arc *arc1, Arc *arc2) const {
127     return (arc1->olabel == kNoLabel || arc2->ilabel == kNoLabel) ?
128         FilterState::NoState() : FilterState(0);
129   }
130 
FilterFinal(Weight *,Weight *)131   void FilterFinal(Weight *, Weight *) const {}
132 
133   // Return resp matchers. Ownership stays with filter.
GetMatcher1()134   Matcher1 *GetMatcher1() { return matcher1_; }
GetMatcher2()135   Matcher2 *GetMatcher2() { return matcher2_; }
136 
Properties(uint64 props)137   uint64 Properties(uint64 props) const { return props; }
138 
139  private:
140   Matcher1 *matcher1_;
141   Matcher2 *matcher2_;
142   const FST1 &fst1_;
143   const FST2 &fst2_;
144 
145   void operator=(const NullComposeFilter<M1, M2> &);  // disallow
146 };
147 
148 
149 // This filter requires epsilons on FST1 to be read before epsilons on FST2.
150 // (Template arg default declared in fst-decl.h.)
151 template <class M1, class M2 /* = M1 */>
152 class SequenceComposeFilter {
153  public:
154   typedef typename M1::FST FST1;
155   typedef typename M2::FST FST2;
156   typedef typename FST1::Arc Arc;
157   typedef CharFilterState FilterState;
158   typedef M1 Matcher1;
159   typedef M2 Matcher2;
160 
161   typedef typename Arc::StateId StateId;
162   typedef typename Arc::Label Label;
163   typedef typename Arc::Weight Weight;
164 
165   SequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
166                         M1 *matcher1 = 0, M2 *matcher2 = 0)
167       : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
168         matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
169         fst1_(matcher1_->GetFst()),
170         s1_(kNoStateId),
171         s2_(kNoStateId),
172         f_(kNoStateId) {}
173 
174   SequenceComposeFilter(const SequenceComposeFilter<M1, M2> &filter,
175                         bool safe = false)
176       : matcher1_(filter.matcher1_->Copy(safe)),
177         matcher2_(filter.matcher2_->Copy(safe)),
178         fst1_(matcher1_->GetFst()),
179         s1_(kNoStateId),
180         s2_(kNoStateId),
181         f_(kNoStateId) {}
182 
~SequenceComposeFilter()183   ~SequenceComposeFilter() {
184     delete matcher1_;
185     delete matcher2_;
186   }
187 
Start()188   FilterState Start() const { return FilterState(0); }
189 
SetState(StateId s1,StateId s2,const FilterState & f)190   void SetState(StateId s1, StateId s2, const FilterState &f) {
191     if (s1_ == s1 && s2_ == s2 && f == f_)
192       return;
193     s1_ = s1;
194     s2_ = s2;
195     f_ = f;
196     size_t na1 = internal::NumArcs(fst1_, s1);
197     size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
198     bool fin1 = internal::Final(fst1_, s1) != Weight::Zero();
199     alleps1_ = na1 == ne1 && !fin1;
200     noeps1_ = ne1 == 0;
201   }
202 
FilterArc(Arc * arc1,Arc * arc2)203   FilterState FilterArc(Arc *arc1, Arc *arc2) const {
204     if (arc1->olabel == kNoLabel)
205       return alleps1_ ? FilterState::NoState() :
206         noeps1_ ? FilterState(0) : FilterState(1);
207     else if (arc2->ilabel == kNoLabel)
208       return f_ != FilterState(0) ? FilterState::NoState() : FilterState(0);
209     else
210       return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
211   }
212 
FilterFinal(Weight *,Weight *)213   void FilterFinal(Weight *, Weight *) const {}
214 
215   // Return resp matchers. Ownership stays with filter.
GetMatcher1()216   Matcher1 *GetMatcher1() { return matcher1_; }
GetMatcher2()217   Matcher2 *GetMatcher2() { return matcher2_; }
218 
Properties(uint64 props)219   uint64 Properties(uint64 props) const { return props; }
220 
221  private:
222   Matcher1 *matcher1_;
223   Matcher2 *matcher2_;
224   const FST1 &fst1_;
225   StateId s1_;     // Current fst1_ state;
226   StateId s2_;     // Current fst2_ state;
227   FilterState f_;  // Current filter state
228   bool alleps1_;   // Only epsilons (and non-final) leaving s1_?
229   bool noeps1_;    // No epsilons leaving s1_?
230 
231   void operator=(const SequenceComposeFilter<M1, M2> &);  // disallow
232 };
233 
234 
235 // This filter requires epsilons on FST2 to be read before epsilons on FST1.
236 // (Template arg default declared in fst-decl.h.)
237 template <class M1, class M2 /* = M1 */>
238 class AltSequenceComposeFilter {
239  public:
240   typedef typename M1::FST FST1;
241   typedef typename M2::FST FST2;
242   typedef typename FST1::Arc Arc;
243   typedef CharFilterState FilterState;
244   typedef M1 Matcher1;
245   typedef M2 Matcher2;
246 
247   typedef typename Arc::StateId StateId;
248   typedef typename Arc::Label Label;
249   typedef typename Arc::Weight Weight;
250 
251   AltSequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
252                         M1 *matcher1 = 0, M2 *matcher2 = 0)
253       : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
254         matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
255         fst2_(matcher2_->GetFst()),
256         s1_(kNoStateId),
257         s2_(kNoStateId),
258         f_(kNoStateId) {}
259 
260   AltSequenceComposeFilter(const AltSequenceComposeFilter<M1, M2> &filter,
261                            bool safe = false)
262       : matcher1_(filter.matcher1_->Copy(safe)),
263         matcher2_(filter.matcher2_->Copy(safe)),
264         fst2_(matcher2_->GetFst()),
265         s1_(kNoStateId),
266         s2_(kNoStateId),
267         f_(kNoStateId) {}
268 
~AltSequenceComposeFilter()269   ~AltSequenceComposeFilter() {
270     delete matcher1_;
271     delete matcher2_;
272   }
273 
Start()274   FilterState Start() const { return FilterState(0); }
275 
SetState(StateId s1,StateId s2,const FilterState & f)276   void SetState(StateId s1, StateId s2, const FilterState &f) {
277     if (s1_ == s1 && s2_ == s2 && f == f_)
278       return;
279     s1_ = s1;
280     s2_ = s2;
281     f_ = f;
282     size_t na2 = internal::NumArcs(fst2_, s2);
283     size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
284     bool fin2 = internal::Final(fst2_, s2) != Weight::Zero();
285     alleps2_ = na2 == ne2 && !fin2;
286     noeps2_ = ne2 == 0;
287   }
288 
FilterArc(Arc * arc1,Arc * arc2)289   FilterState FilterArc(Arc *arc1, Arc *arc2) const {
290     if (arc2->ilabel == kNoLabel)
291       return alleps2_ ? FilterState::NoState() :
292         noeps2_ ? FilterState(0) : FilterState(1);
293     else if (arc1->olabel == kNoLabel)
294       return f_ == FilterState(1) ? FilterState::NoState() : FilterState(0);
295     else
296       return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
297   }
298 
FilterFinal(Weight *,Weight *)299   void FilterFinal(Weight *, Weight *) const {}
300 
301   // Return resp matchers. Ownership stays with filter.
GetMatcher1()302   Matcher1 *GetMatcher1() { return matcher1_; }
GetMatcher2()303   Matcher2 *GetMatcher2() { return matcher2_; }
304 
Properties(uint64 props)305   uint64 Properties(uint64 props) const { return props; }
306 
307  private:
308   Matcher1 *matcher1_;
309   Matcher2 *matcher2_;
310   const FST2 &fst2_;
311   StateId s1_;     // Current fst1_ state;
312   StateId s2_;     // Current fst2_ state;
313   FilterState f_;  // Current filter state
314   bool alleps2_;   // Only epsilons (and non-final) leaving s2_?
315   bool noeps2_;    // No epsilons leaving s2_?
316 
317 void operator=(const AltSequenceComposeFilter<M1, M2> &);  // disallow
318 };
319 
320 
321 // This filter requires epsilons on FST1 to be matched with epsilons on FST2
322 // whenever possible. (Template arg default declared in fst-decl.h.)
323 template <class M1, class M2 /* = M1 */>
324 class MatchComposeFilter {
325  public:
326   typedef typename M1::FST FST1;
327   typedef typename M2::FST FST2;
328   typedef typename FST1::Arc Arc;
329   typedef CharFilterState FilterState;
330   typedef M1 Matcher1;
331   typedef M2 Matcher2;
332 
333   typedef typename Arc::StateId StateId;
334   typedef typename Arc::Label Label;
335   typedef typename Arc::Weight Weight;
336 
337   MatchComposeFilter(const FST1 &fst1, const FST2 &fst2,
338                      M1 *matcher1 = 0, M2 *matcher2 = 0)
339       : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
340         matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
341         fst1_(matcher1_->GetFst()),
342         fst2_(matcher2_->GetFst()),
343         s1_(kNoStateId),
344         s2_(kNoStateId),
345         f_(kNoStateId) {}
346 
347   MatchComposeFilter(const MatchComposeFilter<M1, M2> &filter,
348                      bool safe = false)
349       : matcher1_(filter.matcher1_->Copy(safe)),
350         matcher2_(filter.matcher2_->Copy(safe)),
351         fst1_(matcher1_->GetFst()),
352         fst2_(matcher2_->GetFst()),
353         s1_(kNoStateId),
354         s2_(kNoStateId),
355         f_(kNoStateId) {}
356 
~MatchComposeFilter()357   ~MatchComposeFilter() {
358     delete matcher1_;
359     delete matcher2_;
360   }
361 
Start()362   FilterState Start() const { return FilterState(0); }
363 
SetState(StateId s1,StateId s2,const FilterState & f)364   void SetState(StateId s1, StateId s2, const FilterState &f) {
365     if (s1_ == s1 && s2_ == s2 && f == f_)
366       return;
367     s1_ = s1;
368     s2_ = s2;
369     f_ = f;
370     size_t na1 = internal::NumArcs(fst1_, s1);
371     size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
372     bool f1 = internal::Final(fst1_, s1) != Weight::Zero();
373     alleps1_ = na1 == ne1 && !f1;
374     noeps1_ = ne1 == 0;
375     size_t na2 = internal::NumArcs(fst2_, s2);
376     size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
377     bool f2 = internal::Final(fst2_, s2) != Weight::Zero();
378     alleps2_ = na2 == ne2 && !f2;
379     noeps2_ = ne2 == 0;
380   }
381 
FilterArc(Arc * arc1,Arc * arc2)382   FilterState FilterArc(Arc *arc1, Arc *arc2) const {
383     if (arc2->ilabel == kNoLabel)  // Epsilon on Fst1
384       return f_ == FilterState(0) ?
385           (noeps2_ ? FilterState(0) :
386            (alleps2_ ? FilterState::NoState(): FilterState(1))) :
387           (f_ == FilterState(1) ? FilterState(1) : FilterState::NoState());
388     else if (arc1->olabel == kNoLabel)  // Epsilon on Fst2
389       return f_ == FilterState(0) ?
390           (noeps1_ ? FilterState(0) :
391            (alleps1_ ? FilterState::NoState() : FilterState(2))) :
392           (f_ == FilterState(2) ? FilterState(2) : FilterState::NoState());
393     else if (arc1->olabel == 0)  // Epsilon on both
394       return f_ == FilterState(0) ? FilterState(0) : FilterState::NoState();
395     else  // Both are non-epsilons
396       return FilterState(0);
397   }
398 
FilterFinal(Weight *,Weight *)399   void FilterFinal(Weight *, Weight *) const {}
400 
401   // Return resp matchers. Ownership stays with filter.
GetMatcher1()402   Matcher1 *GetMatcher1() { return matcher1_; }
GetMatcher2()403   Matcher2 *GetMatcher2() { return matcher2_; }
404 
Properties(uint64 props)405   uint64 Properties(uint64 props) const { return props; }
406 
407  private:
408   Matcher1 *matcher1_;
409   Matcher2 *matcher2_;
410   const FST1 &fst1_;
411   const FST2 &fst2_;
412   StateId s1_;              // Current fst1_ state;
413   StateId s2_;              // Current fst2_ state;
414   FilterState f_;           // Current filter state ID
415   bool alleps1_, alleps2_;  // Only epsilons (and non-final) leaving s1, s2?
416   bool noeps1_, noeps2_;    // No epsilons leaving s1, s2?
417 
418   void operator=(const MatchComposeFilter<M1, M2> &);  // disallow
419 };
420 
421 
422 // This filter works with the MultiEpsMatcher to determine if
423 // 'multi-epsilons' are preserved in the composition output
424 // (rather than rewritten as 0) and ensures correct properties.
425 template <class F>
426 class MultiEpsFilter {
427  public:
428   typedef typename F::FST1 FST1;
429   typedef typename F::FST2 FST2;
430   typedef typename F::Arc Arc;
431   typedef typename F::Matcher1 Matcher1;
432   typedef typename F::Matcher2 Matcher2;
433   typedef typename F::FilterState FilterState;
434   typedef MultiEpsFilter<F> Filter;
435 
436   typedef typename Arc::StateId StateId;
437   typedef typename Arc::Label Label;
438   typedef typename Arc::Weight Weight;
439 
440   MultiEpsFilter(const FST1 &fst1, const FST2 &fst2,
441                  Matcher1 *matcher1 = 0,  Matcher2 *matcher2 = 0,
442                  bool keep_multi_eps = false)
filter_(fst1,fst2,matcher1,matcher2)443       : filter_(fst1, fst2, matcher1, matcher2),
444         keep_multi_eps_(keep_multi_eps) {}
445 
446   MultiEpsFilter(const Filter &filter, bool safe = false)
447       : filter_(filter.filter_, safe),
448         keep_multi_eps_(filter.keep_multi_eps_) {}
449 
Start()450   FilterState Start() const { return filter_.Start(); }
451 
SetState(StateId s1,StateId s2,const FilterState & f)452   void SetState(StateId s1, StateId s2, const FilterState &f) {
453     return filter_.SetState(s1, s2, f);
454   }
455 
FilterArc(Arc * arc1,Arc * arc2)456   FilterState FilterArc(Arc *arc1, Arc *arc2) const {
457     FilterState f = filter_.FilterArc(arc1, arc2);
458     if (keep_multi_eps_) {
459       if (arc1->olabel == kNoLabel)
460         arc1->ilabel = arc2->ilabel;
461       if (arc2->ilabel == kNoLabel)
462         arc2->olabel = arc1->olabel;
463     }
464     return f;
465   }
466 
FilterFinal(Weight * w1,Weight * w2)467   void FilterFinal(Weight *w1, Weight *w2) const {
468     return filter_.FilterFinal(w1, w2);
469   }
470 
471   // Return resp matchers. Ownership stays with filter.
GetMatcher1()472   Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
GetMatcher2()473   Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
474 
Properties(uint64 iprops)475   uint64 Properties(uint64 iprops) const {
476     uint64 oprops = filter_.Properties(iprops);
477     return oprops & kILabelInvariantProperties & kOLabelInvariantProperties;
478   }
479 
480  private:
481   F filter_;
482   bool keep_multi_eps_;
483 };
484 
485 }  // namespace fst
486 
487 
488 #endif  // FST_LIB_COMPOSE_FILTER_H__
489