1 // compose.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to compute the composition of two FSTs
20 
21 #ifndef FST_LIB_COMPOSE_H__
22 #define FST_LIB_COMPOSE_H__
23 
24 #include <algorithm>
25 #include <string>
26 #include <vector>
27 using std::vector;
28 
29 #include <fst/cache.h>
30 #include <fst/compose-filter.h>
31 #include <fst/fst-decl.h>  // For optional argument declarations
32 #include <fst/lookahead-filter.h>
33 #include <fst/matcher.h>
34 #include <fst/state-table.h>
35 #include <fst/test-properties.h>
36 
37 
38 namespace fst {
39 
40 // Delayed composition options templated on the arc type, the matcher,
41 // the composition filter, and the composition state table.  By
42 // default, the matchers, filter, and state table are constructed by
43 // composition. If set below, the user can instead pass in these
44 // objects; in that case, ComposeFst takes their ownership. This
45 // version controls composition implemented between generic Fst<Arc>
46 // types and a shared matcher type M for Fst<Arc>. This should be
47 // adequate for most applications, giving a reasonable tradeoff
48 // between efficiency and code sharing (but see ComposeFstImplOptions).
49 template <class A,
50           class M = Matcher<Fst<A> >,
51           class F = SequenceComposeFilter<M>,
52           class T = GenericComposeStateTable<A, typename F::FilterState> >
53 struct ComposeFstOptions : public CacheOptions {
54   M *matcher1;      // FST1 matcher (see matcher.h)
55   M *matcher2;      // FST2 matcher
56   F *filter;        // Composition filter (see compose-filter.h)
57   T *state_table;   // Composition state table (see compose-state-table.h)
58 
59   explicit ComposeFstOptions(const CacheOptions &opts,
60                              M *mat1 = 0, M *mat2 = 0,
61                              F *filt = 0, T *sttable= 0)
CacheOptionsComposeFstOptions62       : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
63         filter(filt), state_table(sttable) {}
64 
ComposeFstOptionsComposeFstOptions65   ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {}
66 };
67 
68 
69 // Delayed composition options templated on the two matcher types, the
70 // composition filter, the composition state table and the cache
71 // store.  By default, the matchers, filter, state table and cache
72 // store are constructed by composition. If set below, the user can
73 // instead pass in these objects; in that case, ComposeFst takes their
74 // ownership. This version controls composition implemented using
75 // arbitrary matchers (of the same Arc type but otherwise arbitrary
76 // Fst type). The user must ensure the matchers are compatible. These
77 // options permit the most efficient use, but shares the least
78 // code. This is for advanced use only in the most demanding or
79 // specialized applications that can benefit from it (o.w. prefer
80 // ComposeFstOptions).
81 template <class M1, class M2,
82           class F = SequenceComposeFilter<M1, M2>,
83           class T = GenericComposeStateTable<typename M1::Arc,
84                                              typename F::FilterState>,
85           class C = DefaultCacheStore<typename M1::Arc> >
86 struct ComposeFstImplOptions : public CacheImplOptions<C> {
87   M1 *matcher1;     // FST1 matcher (see matcher.h)
88   M2 *matcher2;     // FST2 matcher
89   F *filter;        // Composition filter (see compose-filter.h)
90   T *state_table;   // Composition state table (see compose-state-table.h)
91 
92   explicit ComposeFstImplOptions(const CacheOptions &opts,
93                                  M1 *mat1 = 0, M2 *mat2 = 0,
94                                  F *filt = 0, T *sttable= 0)
95       : CacheImplOptions<C>(opts), matcher1(mat1), matcher2(mat2),
96         filter(filt), state_table(sttable) {}
97 
98   explicit ComposeFstImplOptions(const CacheImplOptions<C> &opts,
99                                  M1 *mat1 = 0, M2 *mat2 = 0,
100                                  F *filt = 0, T *sttable= 0)
101       : CacheImplOptions<C>(opts), matcher1(mat1), matcher2(mat2),
102         filter(filt), state_table(sttable) {}
103 
ComposeFstImplOptionsComposeFstImplOptions104   ComposeFstImplOptions()
105       : matcher1(0), matcher2(0), filter(0), state_table(0) {}
106 };
107 
108 
109 // Implementation of delayed composition. This base class is
110 // common to the variants with different matchers, composition filters
111 // and state tables.
112 template <class A, class C = DefaultCacheStore<A> >
113 class ComposeFstImplBase : public CacheBaseImpl<typename C::State, C> {
114  public:
115   typedef typename A::Label Label;
116   typedef typename A::Weight Weight;
117   typedef typename A::StateId StateId;
118   typedef typename C::State State;
119   typedef CacheBaseImpl<State, C> CImpl;
120 
121   using FstImpl<A>::SetType;
122   using FstImpl<A>::SetProperties;
123   using FstImpl<A>::Properties;
124   using FstImpl<A>::SetInputSymbols;
125   using FstImpl<A>::SetOutputSymbols;
126 
127   using CImpl::HasStart;
128   using CImpl::HasFinal;
129   using CImpl::HasArcs;
130   using CImpl::SetFinal;
131   using CImpl::SetStart;
132 
ComposeFstImplBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheImplOptions<C> & opts)133   ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
134                      const CacheImplOptions<C> &opts)
135       : CImpl(opts) {
136     InitBase(fst1, fst2);
137   }
138 
ComposeFstImplBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheOptions & opts)139   ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
140                      const CacheOptions &opts)
141       : CImpl(opts) {
142     InitBase(fst1, fst2);
143   }
144 
ComposeFstImplBase(const ComposeFstImplBase<A,C> & impl)145   ComposeFstImplBase(const ComposeFstImplBase<A, C> &impl)
146       : CImpl(impl, true) {
147     SetType(impl.Type());
148     SetProperties(impl.Properties(), kCopyProperties);
149     SetInputSymbols(impl.InputSymbols());
150     SetOutputSymbols(impl.OutputSymbols());
151   }
152 
153   virtual ComposeFstImplBase<A, C> *Copy() = 0;
154 
~ComposeFstImplBase()155   virtual ~ComposeFstImplBase() {}
156 
Start()157   StateId Start() {
158     if (!HasStart()) {
159       StateId start = ComputeStart();
160       if (start != kNoStateId) {
161         SetStart(start);
162       }
163     }
164     return CImpl::Start();
165   }
166 
Final(StateId s)167   Weight Final(StateId s) {
168     if (!HasFinal(s)) {
169       Weight final = ComputeFinal(s);
170       SetFinal(s, final);
171     }
172     return CImpl::Final(s);
173   }
174 
175   virtual void Expand(StateId s) = 0;
176 
NumArcs(StateId s)177   size_t NumArcs(StateId s) {
178     if (!HasArcs(s))
179       Expand(s);
180     return CImpl::NumArcs(s);
181   }
182 
NumInputEpsilons(StateId s)183   size_t NumInputEpsilons(StateId s) {
184     if (!HasArcs(s))
185       Expand(s);
186     return CImpl::NumInputEpsilons(s);
187   }
188 
NumOutputEpsilons(StateId s)189   size_t NumOutputEpsilons(StateId s) {
190     if (!HasArcs(s))
191       Expand(s);
192     return CImpl::NumOutputEpsilons(s);
193   }
194 
InitArcIterator(StateId s,ArcIteratorData<A> * data)195   void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
196     if (!HasArcs(s))
197       Expand(s);
198     CImpl::InitArcIterator(s, data);
199   }
200 
InitMatcher(const ComposeFst<A,C> & fst,MatchType match_type)201   virtual MatcherBase<A> *InitMatcher(const ComposeFst<A, C> &fst,
202                                       MatchType match_type) const {
203     // Use the default matcher if no override is provided.
204     return 0;
205   }
206 
207  protected:
208   virtual StateId ComputeStart() = 0;
209   virtual Weight ComputeFinal(StateId s) = 0;
210 
211 
InitBase(const Fst<A> & fst1,const Fst<A> & fst2)212   void InitBase(const Fst<A> &fst1, const Fst<A> &fst2) {
213     VLOG(2) << "ComposeFst(" << this << "): Begin";
214     SetType("compose");
215 
216     if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) {
217       FSTERROR() << "ComposeFst: output symbol table of 1st argument "
218                  << "does not match input symbol table of 2nd argument";
219       SetProperties(kError, kError);
220     }
221 
222     SetInputSymbols(fst1.InputSymbols());
223     SetOutputSymbols(fst2.OutputSymbols());
224   }
225 };
226 
227 // Forward declaration of ComposeFstMatcher
228 template <class C, class F, class T>
229 class ComposeFstMatcher;
230 
231 // Implementaion of delayed composition templated on the matchers (see
232 // matcher.h), composition filter (see compose-filter-inl.h) and
233 // the composition state table (see compose-state-table.h).
234 template <class C, class F, class T>
235 class ComposeFstImpl : public ComposeFstImplBase<typename C::Arc, C> {
236   typedef typename F::Matcher1 M1;
237   typedef typename F::Matcher2 M2;
238   friend class ComposeFstMatcher<C, F, T>;
239 
240   typedef typename M1::FST FST1;
241   typedef typename M2::FST FST2;
242   typedef typename M1::Arc Arc;
243   typedef typename Arc::StateId StateId;
244   typedef typename Arc::Label Label;
245   typedef typename Arc::Weight Weight;
246   typedef typename F::FilterState FilterState;
247   typedef typename C::State State;
248   typedef CacheBaseImpl<State, C> CImpl;
249   typedef typename T::StateTuple StateTuple;
250 
251   using FstImpl<Arc>::SetType;
252   using FstImpl<Arc>::SetProperties;
253 
254  public:
255   template <class Mat1, class Mat2>
256   ComposeFstImpl(const FST1 &fst1, const FST2 &fst2,
257                  const ComposeFstImplOptions<Mat1, Mat2, F, T, C> &opts);
258 
ComposeFstImpl(const ComposeFstImpl<C,F,T> & impl)259   ComposeFstImpl(const ComposeFstImpl<C, F, T> &impl)
260       : ComposeFstImplBase<Arc, C>(impl),
261         filter_(new F(*impl.filter_, true)),
262         matcher1_(filter_->GetMatcher1()),
263         matcher2_(filter_->GetMatcher2()),
264         fst1_(matcher1_->GetFst()),
265         fst2_(matcher2_->GetFst()),
266         state_table_(new T(*impl.state_table_)),
267         match_type_(impl.match_type_) {}
268 
~ComposeFstImpl()269   ~ComposeFstImpl() {
270     VLOG(2) << "ComposeFst(" << this
271             << "): End: # of visited states: " << state_table_->Size();
272 
273     delete filter_;
274     delete state_table_;
275   }
276 
Copy()277   virtual ComposeFstImpl<C, F, T> *Copy() {
278     return new ComposeFstImpl<C, F, T>(*this);
279   }
280 
Properties()281   uint64 Properties() const { return Properties(kFstProperties); }
282 
283   // Set error if found; return FST impl properties.
Properties(uint64 mask)284   uint64 Properties(uint64 mask) const {
285     if ((mask & kError) &&
286         (fst1_.Properties(kError, false) ||
287          fst2_.Properties(kError, false) ||
288          (matcher1_->Properties(0) & kError) ||
289          (matcher2_->Properties(0) & kError) |
290          (filter_->Properties(0) & kError) ||
291          state_table_->Error())) {
292       SetProperties(kError, kError);
293     }
294     return FstImpl<Arc>::Properties(mask);
295   }
296 
297   // Arranges it so that the first arg to OrderedExpand is the Fst
298   // that will be matched on.
Expand(StateId s)299   void Expand(StateId s) {
300     const StateTuple &tuple = state_table_->Tuple(s);
301     const StateId s1 = tuple.StateId1();
302     const StateId s2 = tuple.StateId2();
303     filter_->SetState(s1, s2, tuple.GetFilterState());
304     if (MatchInput(s1, s2)) {
305       OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true);
306     } else {
307       OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false);
308     }
309   }
310 
GetFst1()311   const FST1 &GetFst1() const { return fst1_; }
GetFst2()312   const FST2 &GetFst2() const { return fst2_; }
313 
GetMatcher1()314   const M1 *GetMatcher1() const { return matcher1_; }
GetMatcher1()315   M1 *GetMatcher1() { return matcher1_; }
316 
GetMatcher2()317   const M2 *GetMatcher2() const { return matcher2_; }
GetMatcher2()318   M2 *GetMatcher2() { return matcher2_; }
319 
GetFilter()320   const F *GetFilter() const { return filter_; }
GetFilter()321   F *GetFilter() { return filter_; }
322 
GetStateTable()323   const T *GetStateTable() const { return state_table_; }
GetStateTable()324   T *GetStateTable() { return state_table_; }
325 
InitMatcher(const ComposeFst<Arc,C> & fst,MatchType match_type)326   virtual MatcherBase<Arc> *InitMatcher(const ComposeFst<Arc, C> &fst,
327                                         MatchType match_type) const {
328     uint64 test_props = match_type == MATCH_INPUT ?
329         kFstProperties & ~kILabelInvariantProperties :
330         kFstProperties & ~kOLabelInvariantProperties;
331     // If both matchers support 'match_type' and we have
332     // a guarantee that a call to 'filter_->FilterArc(arc1, arc2)' will
333     // not modify the ilabel of arc1 when MATCH_INPUT or the olabel
334     // or arc2 when MATCH_OUTPUT, then ComposeFstMatcher can be used.
335     if ((matcher1_->Type(false) == match_type) &&
336         (matcher2_->Type(false) == match_type) &&
337         (filter_->Properties(test_props) == test_props)) {
338       VLOG(1) << "Creating ComposeFstMatcher";
339       return new ComposeFstMatcher<C, F, T>(fst, this, match_type);
340     }
341     return 0;
342   }
343 
344  private:
345   // This does that actual matching of labels in the composition. The
346   // arguments are ordered so matching is called on state 'sa' of
347   // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg
348   // determines whether the input or output label of arcs at 'sb' is
349   // the one to match on.
350   template <class FST, class Matcher>
OrderedExpand(StateId s,const Fst<Arc> &,StateId sa,const FST & fstb,StateId sb,Matcher * matchera,bool match_input)351   void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa,
352                      const FST &fstb, StateId sb,
353                      Matcher *matchera,  bool match_input) {
354     matchera->SetState(sa);
355 
356     // First process non-consuming symbols (e.g., epsilons) on FSTA.
357     Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0,
358            Weight::One(), sb);
359     MatchArc(s, matchera, loop, match_input);
360 
361     // Then process matches on FSTB.
362     for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next())
363       MatchArc(s, matchera, iterb.Value(), match_input);
364 
365     CImpl::SetArcs(s);
366   }
367 
368   // Matches a single transition from 'fstb' against 'fata' at 's'.
369   template <class Matcher>
MatchArc(StateId s,Matcher * matchera,const Arc & arc,bool match_input)370   void MatchArc(StateId s, Matcher *matchera,
371                 const Arc &arc, bool match_input) {
372     if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) {
373       for (; !matchera->Done(); matchera->Next()) {
374         Arc arca = matchera->Value();
375         Arc arcb = arc;
376         if (match_input) {
377           const FilterState &f = filter_->FilterArc(&arcb, &arca);
378           if (f != FilterState::NoState())
379             AddArc(s, arcb, arca, f);
380         } else {
381           const FilterState &f = filter_->FilterArc(&arca, &arcb);
382           if (f != FilterState::NoState())
383             AddArc(s, arca, arcb, f);
384         }
385       }
386     }
387   }
388 
389   // Add a matching transition at 's'.
AddArc(StateId s,const Arc & arc1,const Arc & arc2,const FilterState & f)390    void AddArc(StateId s, const Arc &arc1, const Arc &arc2,
391                const FilterState &f) {
392     StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
393     Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight),
394            state_table_->FindState(tuple));
395     CImpl::PushArc(s, oarc);
396   }
397 
ComputeStart()398   StateId ComputeStart() {
399     StateId s1 = fst1_.Start();
400     if (s1 == kNoStateId)
401       return kNoStateId;
402 
403     StateId s2 = fst2_.Start();
404     if (s2 == kNoStateId)
405       return kNoStateId;
406 
407     const FilterState &f = filter_->Start();
408     StateTuple tuple(s1, s2, f);
409     return state_table_->FindState(tuple);
410   }
411 
ComputeFinal(StateId s)412   Weight ComputeFinal(StateId s) {
413     const StateTuple &tuple = state_table_->Tuple(s);
414     const StateId s1 = tuple.StateId1();
415     Weight final1 = internal::Final(fst1_, s1);
416     if (final1 == Weight::Zero())
417       return final1;
418 
419     const StateId s2 = tuple.StateId2();
420     Weight final2 = internal::Final(fst2_, s2);
421     if (final2 == Weight::Zero())
422       return final2;
423 
424     filter_->SetState(s1, s2, tuple.GetFilterState());
425     filter_->FilterFinal(&final1, &final2);
426     return Times(final1, final2);
427   }
428 
429   // Determines which side to match on per composition state.
MatchInput(StateId s1,StateId s2)430   bool MatchInput(StateId s1, StateId s2) {
431     switch (match_type_) {
432       case MATCH_INPUT:
433         return true;
434       case MATCH_OUTPUT:
435         return false;
436       default:  // MATCH_BOTH
437         ssize_t priority1 = matcher1_->Priority(s1);
438         ssize_t priority2 = matcher2_->Priority(s2);
439 
440         if (priority1 == kRequirePriority && priority2 == kRequirePriority) {
441           FSTERROR() << "ComposeFst: both sides can't require match";
442           SetProperties(kError, kError);
443           return true;
444         }
445 
446         if (priority1 == kRequirePriority)
447           return false;
448         if (priority2 == kRequirePriority) {
449           return true;
450         }
451         return priority1 <= priority2;
452     }
453   }
454 
455   // Identifies and verifies the capabilities of the matcher to be used for
456   // composition.
457   void SetMatchType();
458 
459   F *filter_;
460   M1 *matcher1_;
461   M2 *matcher2_;
462   const FST1 &fst1_;
463   const FST2 &fst2_;
464   T *state_table_;
465 
466   MatchType match_type_;
467 
468   void operator=(const ComposeFstImpl<C, F, T> &);  // disallow
469 };
470 
471 template <class C, class F, class T>
472 template <class Mat1, class Mat2>
ComposeFstImpl(const FST1 & fst1,const FST2 & fst2,const ComposeFstImplOptions<Mat1,Mat2,F,T,C> & opts)473 ComposeFstImpl<C, F, T>::ComposeFstImpl(
474     const FST1 &fst1, const FST2 &fst2,
475     const ComposeFstImplOptions<Mat1, Mat2, F, T, C> &opts)
476     : ComposeFstImplBase<Arc, C>(fst1, fst2, opts),
477       filter_(opts.filter ? opts.filter :
478               new F(fst1, fst2, opts.matcher1, opts.matcher2)),
479       matcher1_(filter_->GetMatcher1()),
480       matcher2_(filter_->GetMatcher2()),
481       fst1_(matcher1_->GetFst()),
482       fst2_(matcher2_->GetFst()),
483       state_table_(opts.state_table ? opts.state_table :
484                    new T(fst1_, fst2_)) {
485   SetMatchType();
486   if (match_type_ == MATCH_NONE)
487     SetProperties(kError, kError);
488   VLOG(2) << "ComposeFst(" << this << "): Match type: "
489           << (match_type_ == MATCH_OUTPUT ? "output" :
490               (match_type_ == MATCH_INPUT ? "input" :
491                (match_type_ == MATCH_BOTH ? "both" :
492                 (match_type_ == MATCH_NONE ? "none" : "unknown"))));
493 
494   uint64 fprops1 = fst1.Properties(kFstProperties, false);
495   uint64 fprops2 = fst2.Properties(kFstProperties, false);
496   uint64 mprops1 = matcher1_->Properties(fprops1);
497   uint64 mprops2 = matcher2_->Properties(fprops2);
498   uint64 cprops = ComposeProperties(mprops1, mprops2);
499   SetProperties(filter_->Properties(cprops), kCopyProperties);
500   if (state_table_->Error()) SetProperties(kError, kError);
501   VLOG(2) << "ComposeFst(" << this << "): Initialized";
502 }
503 
504 template <class C, class F, class T>
SetMatchType()505 void ComposeFstImpl<C, F, T>::SetMatchType() {
506   // Ensures any required matching is possible and known.
507   if ((matcher1_->Flags() & kRequireMatch) &&
508       matcher1_->Type(true) != MATCH_OUTPUT) {
509     FSTERROR() << "ComposeFst: 1st argument requires matching but cannot.";
510     match_type_ = MATCH_NONE;
511     return;
512   }
513   if ((matcher2_->Flags() & kRequireMatch) &&
514       matcher2_->Type(true) != MATCH_INPUT) {
515     FSTERROR() << "ComposeFst: 2nd argument requires matching but cannot.";
516     match_type_ = MATCH_NONE;
517     return;
518   }
519 
520   // Finds which sides to match on (favoring minimal testing of capabilities).
521   MatchType type1 = matcher1_->Type(false);
522   MatchType type2 = matcher2_->Type(false);
523   if (type1 == MATCH_OUTPUT && type2  == MATCH_INPUT) {
524     match_type_ = MATCH_BOTH;
525   } else if (type1 == MATCH_OUTPUT) {
526     match_type_ = MATCH_OUTPUT;
527   } else if (type2 == MATCH_INPUT) {
528     match_type_ = MATCH_INPUT;
529   } else if (matcher1_->Type(true) == MATCH_OUTPUT) {
530     match_type_ = MATCH_OUTPUT;
531   } else if (matcher2_->Type(true) == MATCH_INPUT) {
532     match_type_ = MATCH_INPUT;
533   } else {
534     FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
535                << "and 2nd argument cannot match on input labels (sort?).";
536     match_type_ = MATCH_NONE;
537   }
538 }
539 
540 // Computes the composition of two transducers. This version is a
541 // delayed Fst. If FST1 transduces string x to y with weight a and FST2
542 // transduces y to z with weight b, then their composition transduces
543 // string x to z with weight Times(x, z).
544 //
545 // The output labels of the first transducer or the input labels of
546 // the second transducer must be sorted (with the default matcher).
547 // The weights need to form a commutative semiring (valid for
548 // TropicalWeight and LogWeight).
549 //
550 // Complexity:
551 // Assuming the first FST is unsorted and the second is sorted:
552 // - Time: O(v1 v2 d1 (log d2 + m2)),
553 // - Space: O(v1 v2)
554 // where vi = # of states visited, di = maximum out-degree, and mi the
555 // maximum multiplicity of the states visited for the ith
556 // FST. Constant time and space to visit an input state or arc is
557 // assumed and exclusive of caching.
558 //
559 // Caveats:
560 // - ComposeFst does not trim its output (since it is a delayed operation).
561 // - The efficiency of composition can be strongly affected by several factors:
562 //   - the choice of which transducer is sorted - prefer sorting the FST
563 //     that has the greater average out-degree.
564 //   - the amount of non-determinism
565 //   - the presence and location of epsilon transitions - avoid epsilon
566 //     transitions on the output side of the first transducer or
567 //     the input side of the second transducer or prefer placing
568 //     them later in a path since they delay matching and can
569 //     introduce non-coaccessible states and transitions.
570 //
571 // This class attaches interface to implementation and handles
572 // reference counting, delegating most methods to ImplToFst.
573 // The type C specifies the cache store (default declared in fst-decl.h).
574 template <class A, class C /* = DefaultCacheStore<A> */>
575 class ComposeFst : public ImplToFst< ComposeFstImplBase<A, C> > {
576  public:
577   friend class ArcIterator< ComposeFst<A, C> >;
578   friend class StateIterator< ComposeFst<A, C> >;
579 
580   typedef A Arc;
581   typedef C Store;
582   typedef typename A::Weight Weight;
583   typedef typename A::StateId StateId;
584   typedef typename C::State State;
585 
586   typedef ComposeFstImplBase<A, C> Impl;
587 
588   using ImplToFst<Impl>::SetImpl;
589 
590   // Compose specifying only caching options.
591   ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
592              const CacheOptions &opts = CacheOptions())
CreateBase(fst1,fst2,opts)593       : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {}
594 
595   // Compose specifying one shared matcher type M.  Requires input
596   // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for
597   // best code-sharing and matcher compatiblity.
598   template <class M, class F, class T>
ComposeFst(const Fst<A> & fst1,const Fst<A> & fst2,const ComposeFstOptions<A,M,F,T> & opts)599   ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
600              const ComposeFstOptions<A, M, F, T> &opts)
601       : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {}
602 
603   // Compose specifying two matcher types M1 and M2.  Requires input
604   // Fsts (of the same Arc type but o.w. arbitrary) match the
605   // corresponding matcher FST types (M1::FST, M2::FST). Recommended
606   // only for advanced use in demanding or specialized applications
607   // due to potential code bloat and matcher incompatibilities.
608   template <class M1, class M2, class F, class T>
ComposeFst(const typename M1::FST & fst1,const typename M2::FST & fst2,const ComposeFstImplOptions<M1,M2,F,T,C> & opts)609   ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2,
610              const ComposeFstImplOptions<M1, M2, F, T, C> &opts)
611       : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {}
612 
613   // See Fst<>::Copy() for doc.
614   ComposeFst(const ComposeFst<A, C> &fst, bool safe = false)
615       : ImplToFst<Impl>() {
616     if (safe)
617       SetImpl(fst.GetImpl()->Copy());
618     else
619       SetImpl(fst.GetImpl(), false);
620   }
621 
622   // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc.
623   virtual ComposeFst<A, C> *Copy(bool safe = false) const {
624     return new ComposeFst<A, C>(*this, safe);
625   }
626 
627   virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
628 
InitArcIterator(StateId s,ArcIteratorData<A> * data)629   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
630     GetImpl()->InitArcIterator(s, data);
631   }
632 
InitMatcher(MatchType match_type)633   virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
634     return GetImpl()->InitMatcher(*this, match_type);
635   }
636 
637  protected:
ComposeFst()638   ComposeFst() {}
639 
640   // Create compose implementation specifying two matcher types.
641   template <class M1, class M2, class F, class T>
CreateBase2(const typename M1::FST & fst1,const typename M2::FST & fst2,const ComposeFstImplOptions<M1,M2,F,T,C> & opts)642   static Impl *CreateBase2(
643       const typename M1::FST &fst1, const typename M2::FST &fst2,
644       const ComposeFstImplOptions<M1, M2, F, T, C> &opts) {
645     Impl *impl = new ComposeFstImpl<C, F, T>(fst1, fst2, opts);
646     if (!(Weight::Properties() & kCommutative)) {
647       int64 props1 = fst1.Properties(kUnweighted, true);
648       int64 props2 = fst2.Properties(kUnweighted, true);
649       if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) {
650         FSTERROR() << "ComposeFst: Weights must be a commutative semiring: "
651                    << Weight::Type();
652         impl->SetProperties(kError, kError);
653       }
654     }
655     return impl;
656   }
657 
658   // Create compose implementation specifying one matcher type.
659   //  Requires input Fsts and matcher FST type (M::FST) be Fst<A>
660   template <class M, class F, class T>
CreateBase1(const Fst<A> & fst1,const Fst<A> & fst2,const ComposeFstOptions<A,M,F,T> & opts)661   static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2,
662                            const ComposeFstOptions<A, M, F, T> &opts) {
663     ComposeFstImplOptions<M, M, F, T, C> nopts(opts, opts.matcher1,
664                                                opts.matcher2,
665                                                opts.filter, opts.state_table);
666     return CreateBase2(fst1, fst2, nopts);
667   }
668 
669   // Create compose implementation specifying no matcher type.
CreateBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheOptions & opts)670   static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2,
671                           const CacheOptions &opts) {
672     switch (LookAheadMatchType(fst1, fst2)) {  // Check for lookahead matchers
673       default:
674       case MATCH_NONE: {     // Default composition (no look-ahead)
675         VLOG(2) << "ComposeFst: Default composition (no look-ahead)";
676         ComposeFstOptions<Arc> nopts(opts);
677         return CreateBase1(fst1, fst2, nopts);
678       }
679       case MATCH_OUTPUT: {   // Lookahead on fst1
680         VLOG(2) << "ComposeFst: Lookahead on fst1";
681         typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
682         typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
683         ComposeFstOptions<Arc, M, F> nopts(opts);
684         return CreateBase1(fst1, fst2, nopts);
685       }
686       case MATCH_INPUT: {    // Lookahead on fst2
687         VLOG(2) << "ComposeFst: Lookahead on fst2";
688         typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
689         typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
690         ComposeFstOptions<Arc, M, F> nopts(opts);
691         return CreateBase1(fst1, fst2, nopts);
692       }
693     }
694   }
695 
696  private:
697   // Makes visible to friends.
GetImpl()698   Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
699 
700   void operator=(const ComposeFst<A, C> &fst);  // disallow
701 };
702 
703 
704 // Specialization for ComposeFst.
705 template <class A, class C>
706 class StateIterator< ComposeFst<A, C> >
707     : public CacheStateIterator<ComposeFst<A, C> > {
708  public:
StateIterator(const ComposeFst<A,C> & fst)709   explicit StateIterator(const ComposeFst<A, C> &fst)
710       : CacheStateIterator<ComposeFst<A, C> >(fst, fst.GetImpl()) {}
711 };
712 
713 
714 // Specialization for ComposeFst.
715 template <class A, class C>
716 class ArcIterator< ComposeFst<A, C> >
717     : public CacheArcIterator<ComposeFst<A, C> > {
718  public:
719   typedef typename A::StateId StateId;
720 
ArcIterator(const ComposeFst<A,C> & fst,StateId s)721   ArcIterator(const ComposeFst<A, C> &fst, StateId s)
722       : CacheArcIterator<ComposeFst<A, C> >(fst.GetImpl(), s) {
723     if (!fst.GetImpl()->HasArcs(s))
724       fst.GetImpl()->Expand(s);
725   }
726 
727  private:
728   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
729 };
730 
731 template <class A, class C> inline
InitStateIterator(StateIteratorData<A> * data)732 void ComposeFst<A, C>::InitStateIterator(StateIteratorData<A> *data) const {
733   data->base = new StateIterator< ComposeFst<A, C> >(*this);
734 }
735 
736 
737 // Specialized matcher for ComposeFst.
738 // Supports MATCH_INPUT (resp. MATCH_OUTPUT) iff the underlying
739 // matchers for the two Fsts being composed support
740 // MATCH_INPUT (resp. MATCH_OUTPUT)
741 template <class C, class F, class T>
742 class ComposeFstMatcher : public MatcherBase<typename C::Arc> {
743  public:
744   typedef typename C::Arc Arc;
745   typedef typename Arc::Label Label;
746   typedef typename Arc::StateId StateId;
747   typedef typename F::FilterState FilterState;
748   typedef typename F::Matcher1 Matcher1;
749   typedef typename F::Matcher2 Matcher2;
750   typedef typename T::StateTuple StateTuple;
751 
ComposeFstMatcher(const ComposeFst<Arc,C> & fst,const ComposeFstImpl<C,F,T> * impl,MatchType match_type)752   ComposeFstMatcher(const ComposeFst<Arc, C> &fst,
753                     const ComposeFstImpl<C, F, T> *impl,
754                     MatchType match_type)
755       : fst_(fst),
756         impl_(impl),
757         s_(kNoStateId),
758         match_type_(match_type),
759         matcher1_(impl->matcher1_->Copy()),
760         matcher2_(impl->matcher2_->Copy()),
761         current_loop_(false),
762         loop_(kNoLabel, 0, Arc::Weight::One(), kNoStateId),
763         error_(false) {
764     if (match_type == MATCH_OUTPUT)
765       swap(loop_.ilabel, loop_.olabel);
766   }
767 
768   ComposeFstMatcher(const ComposeFstMatcher<C, F, T> &matcher,
769                     bool safe = false)
770       : fst_(matcher.fst_),
771         impl_(matcher.impl_),
772         s_(fst::kNoStateId),
773         match_type_(matcher.match_type_),
774         matcher1_(matcher.matcher1_->Copy(safe)),
775         matcher2_(matcher.matcher2_->Copy(safe)),
776         current_loop_(false),
777         loop_(fst::kNoLabel, 0, Arc::Weight::One(), fst::kNoStateId),
778         error_(matcher.error_) {
779     if (safe == true) {
780       FSTERROR() << "ComposeFstMatcher: safe copy not supported";
781       error_ = true;
782     }
783     if (match_type_ == MATCH_OUTPUT)
784       swap(loop_.ilabel, loop_.olabel);
785   }
786 
787   virtual ComposeFstMatcher<C, F, T> *Copy(bool safe = false) const {
788     return new ComposeFstMatcher<C, F, T>(*this, safe);
789   }
790 
~ComposeFstMatcher()791   virtual ~ComposeFstMatcher() {
792     delete matcher1_;
793     delete matcher2_;
794   }
795 
Type(bool test)796   virtual MatchType Type(bool test) const {
797     if ((matcher1_->Type(test) == MATCH_NONE) ||
798         (matcher2_->Type(test) == MATCH_NONE)) {
799       return MATCH_NONE;
800     }
801     if (((matcher1_->Type(test) == MATCH_UNKNOWN)  &&
802          (matcher2_->Type(test) == MATCH_UNKNOWN)) ||
803         ((matcher1_->Type(test) == MATCH_UNKNOWN)  &&
804          (matcher2_->Type(test) == match_type_)) ||
805         ((matcher1_->Type(test) == match_type_)  &&
806          (matcher2_->Type(test) == MATCH_UNKNOWN))) {
807       return MATCH_UNKNOWN;
808     }
809     if ((matcher1_->Type(test) == match_type_)  &&
810         (matcher2_->Type(test) == match_type_)) {
811       return match_type_;
812     }
813     return MATCH_NONE;
814   }
815 
GetFst()816   virtual const Fst<Arc> &GetFst() const {
817     return fst_;
818   }
819 
Properties(uint64 inprops)820   virtual uint64 Properties(uint64 inprops) const {
821     uint64 outprops = inprops;
822     if (error_) outprops |= kError;
823     return outprops;
824   }
825 
826   // Processes a match with the filter and creates resulting arc.
MatchArc(StateId s,Arc arc1,Arc arc2)827   bool MatchArc(StateId s, Arc arc1, Arc arc2) {
828     const FilterState &f = impl_->filter_->FilterArc(&arc1, &arc2);
829     if (f == FilterState::NoState())
830       return false;
831     StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
832     arc_.ilabel = arc1.ilabel;
833     arc_.olabel = arc2.olabel;
834     arc_.weight = Times(arc1.weight, arc2.weight);
835     arc_.nextstate = impl_->state_table_->FindState(tuple);
836     return true;
837   }
838 
839   // Finds the first match allowed by the filter.
840   template <class MatcherA, class MatcherB>
FindLabel(Label label,MatcherA * matchera,MatcherB * matcherb)841   bool FindLabel(Label label, MatcherA *matchera, MatcherB *matcherb) {
842     if (matchera->Find(label)) {
843       matcherb->Find(match_type_ == MATCH_INPUT ?
844                      matchera->Value().olabel : matchera->Value().ilabel);
845       return FindNext(matchera, matcherb);
846     }
847     return false;
848   }
849 
850   // Finds the next match allowed by the filter:
851   // Returns true if such a match is found.
852   template <class MatcherA, class MatcherB>
FindNext(MatcherA * matchera,MatcherB * matcherb)853   bool FindNext(MatcherA *matchera, MatcherB *matcherb) {
854     // State when entering this function:
855     // 'matchera' is pointed to a match (x,y) for label x, and a match for y was
856     // requested on 'matcherb'.
857     while (!matchera->Done() || !matcherb->Done()) {
858       if (matcherb->Done()) {
859         // If no more matches for y on 'matcherb'
860         // move forward on 'matchera' until a match (x,y') is found
861         // such that there is a match for y' on 'matcherb'.
862         matchera->Next();
863         while (!matchera->Done() &&
864                !matcherb->Find(
865                    match_type_ == MATCH_INPUT ?
866                    matchera->Value().olabel : matchera->Value().ilabel)) {
867           matchera->Next();
868         }
869       }
870       while (!matcherb->Done()) {
871         // 'matchera' is pointing to a match (x,y') ('arca') and
872         // 'matcherb' is pointing to a match (y',z') ('arcb').
873         // If combining these two arcs is allowed by the filter
874         // (hence resulting in an arc (x,z')) return true.
875         // Position 'matcherb' on the next potential match for y' before
876         // returning.
877         const Arc &arca = matchera->Value();
878         const Arc &arcb = matcherb->Value();
879         // Position 'matcherb' on the next potential match for y'.
880         matcherb->Next();
881         // If combining these two arcs is allowed by the filter
882         // (hence resulting in an arc (x,z')) return true.
883         // returning. Otherwise consider next match for y' on 'matcherb'.
884         if (MatchArc(s_,
885                      match_type_ == MATCH_INPUT ? arca : arcb,
886                      match_type_ == MATCH_INPUT ? arcb : arca)) {
887           return true;
888         }
889       }
890     }
891     // Both 'matchera' and 'matcherb' are done, no more match to analyse.
892     return false;
893   }
894 
895  private:
SetState_(StateId s)896   virtual void SetState_(StateId s) {
897     if (s_ == s) return;
898     s_ = s;
899     StateTuple tuple = impl_->state_table_->Tuple(s);
900     matcher1_->SetState(tuple.StateId1());
901     matcher2_->SetState(tuple.StateId2());
902     loop_.nextstate = s_;
903   }
904 
Find_(Label label)905   virtual bool Find_(Label label) {
906     bool found = false;
907     current_loop_ = false;
908     if (label == 0) {
909       current_loop_ = true;
910       found = true;
911     }
912     if (match_type_ == MATCH_INPUT)
913       found = found || FindLabel(label, matcher1_, matcher2_);
914     else  // match_type_ == MATCH_OUTPUT
915       found = found || FindLabel(label, matcher2_, matcher1_);
916     return found;
917   }
918 
Done_()919   virtual bool Done_() const {
920     return !current_loop_ &&  matcher1_->Done() && matcher2_->Done();
921   }
922 
Value_()923   virtual const Arc &Value_() const {
924     return current_loop_ ? loop_ : arc_;
925   }
926 
Next_()927   virtual void Next_() {
928     if (current_loop_)
929       current_loop_ = false;
930     else if (match_type_ == MATCH_INPUT)
931       FindNext(matcher1_, matcher2_);
932     else  // match_type_ == MATCH_OUTPUT
933       FindNext(matcher2_, matcher1_);
934   }
935 
Priority_(StateId s)936   virtual ssize_t Priority_(StateId s) { return fst_.NumArcs(s); }
937 
938  private:
939   const ComposeFst<Arc, C> &fst_;
940   const ComposeFstImpl<C, F, T> *impl_;
941   StateId s_;
942   MatchType match_type_;
943   Matcher1 *matcher1_;
944   Matcher2 *matcher2_;
945   bool current_loop_;
946   Arc loop_;
947   Arc arc_;
948   bool error_;
949 };
950 
951 
952 // Useful alias when using StdArc.
953 typedef ComposeFst<StdArc> StdComposeFst;
954 
955 enum ComposeFilter { AUTO_FILTER, NULL_FILTER, SEQUENCE_FILTER,
956                      ALT_SEQUENCE_FILTER, MATCH_FILTER };
957 
958 struct ComposeOptions {
959   bool connect;  // Connect output
960   ComposeFilter filter_type;  // Which pre-defined filter to use
961 
962   ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER)
connectComposeOptions963       : connect(c), filter_type(ft) {}
ComposeOptionsComposeOptions964   ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {}
965 };
966 
967 // Computes the composition of two transducers. This version writes
968 // the composed FST into a MutableFst. If FST1 transduces string x to
969 // y with weight a and FST2 transduces y to z with weight b, then
970 // their composition transduces string x to z with weight
971 // Times(x, z).
972 //
973 // The output labels of the first transducer or the input labels of
974 // the second transducer must be sorted.  The weights need to form a
975 // commutative semiring (valid for TropicalWeight and LogWeight).
976 //
977 // Complexity:
978 // Assuming the first FST is unsorted and the second is sorted:
979 // - Time: O(V1 V2 D1 (log D2 + M2)),
980 // - Space: O(V1 V2 D1 M2)
981 // where Vi = # of states, Di = maximum out-degree, and Mi is
982 // the maximum multiplicity for the ith FST.
983 //
984 // Caveats:
985 // - Compose trims its output.
986 // - The efficiency of composition can be strongly affected by several factors:
987 //   - the choice of which transducer is sorted - prefer sorting the FST
988 //     that has the greater average out-degree.
989 //   - the amount of non-determinism
990 //   - the presence and location of epsilon transitions - avoid epsilon
991 //     transitions on the output side of the first transducer or
992 //     the input side of the second transducer or prefer placing
993 //     them later in a path since they delay matching and can
994 //     introduce non-coaccessible states and transitions.
995 template <class Arc>
996 void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
997              MutableFst<Arc> *ofst,
998              const ComposeOptions &opts = ComposeOptions()) {
999   typedef Matcher< Fst<Arc> > M;
1000 
1001   if (opts.filter_type == AUTO_FILTER) {
1002      CacheOptions nopts;
1003      nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
1004      *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts);
1005   } else if (opts.filter_type == NULL_FILTER) {
1006     ComposeFstOptions<Arc, M,  NullComposeFilter<M> > copts;
1007     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
1008     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
1009   } else if (opts.filter_type == SEQUENCE_FILTER) {
1010     ComposeFstOptions<Arc, M,  SequenceComposeFilter<M> > copts;
1011     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
1012     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
1013   } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
1014     ComposeFstOptions<Arc, M,  AltSequenceComposeFilter<M> > copts;
1015     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
1016     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
1017   } else if (opts.filter_type == MATCH_FILTER) {
1018     ComposeFstOptions<Arc, M,  MatchComposeFilter<M> > copts;
1019     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
1020     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
1021   }
1022 
1023   if (opts.connect)
1024     Connect(ofst);
1025 }
1026 
1027 }  // namespace fst
1028 
1029 #endif  // FST_LIB_COMPOSE_H__
1030