1 // compose.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to compute the composition of two FSTs
20
21 #ifndef FST_LIB_COMPOSE_H__
22 #define FST_LIB_COMPOSE_H__
23
24 #include <algorithm>
25 #include <string>
26 #include <vector>
27 using std::vector;
28
29 #include <fst/cache.h>
30 #include <fst/compose-filter.h>
31 #include <fst/fst-decl.h> // For optional argument declarations
32 #include <fst/lookahead-filter.h>
33 #include <fst/matcher.h>
34 #include <fst/state-table.h>
35 #include <fst/test-properties.h>
36
37
38 namespace fst {
39
40 // Delayed composition options templated on the arc type, the matcher,
41 // the composition filter, and the composition state table. By
42 // default, the matchers, filter, and state table are constructed by
43 // composition. If set below, the user can instead pass in these
44 // objects; in that case, ComposeFst takes their ownership. This
45 // version controls composition implemented between generic Fst<Arc>
46 // types and a shared matcher type M for Fst<Arc>. This should be
47 // adequate for most applications, giving a reasonable tradeoff
48 // between efficiency and code sharing (but see ComposeFstImplOptions).
49 template <class A,
50 class M = Matcher<Fst<A> >,
51 class F = SequenceComposeFilter<M>,
52 class T = GenericComposeStateTable<A, typename F::FilterState> >
53 struct ComposeFstOptions : public CacheOptions {
54 M *matcher1; // FST1 matcher (see matcher.h)
55 M *matcher2; // FST2 matcher
56 F *filter; // Composition filter (see compose-filter.h)
57 T *state_table; // Composition state table (see compose-state-table.h)
58
59 explicit ComposeFstOptions(const CacheOptions &opts,
60 M *mat1 = 0, M *mat2 = 0,
61 F *filt = 0, T *sttable= 0)
CacheOptionsComposeFstOptions62 : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
63 filter(filt), state_table(sttable) {}
64
ComposeFstOptionsComposeFstOptions65 ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {}
66 };
67
68
69 // Delayed composition options templated on the two matcher types, the
70 // composition filter, the composition state table and the cache
71 // store. By default, the matchers, filter, state table and cache
72 // store are constructed by composition. If set below, the user can
73 // instead pass in these objects; in that case, ComposeFst takes their
74 // ownership. This version controls composition implemented using
75 // arbitrary matchers (of the same Arc type but otherwise arbitrary
76 // Fst type). The user must ensure the matchers are compatible. These
77 // options permit the most efficient use, but shares the least
78 // code. This is for advanced use only in the most demanding or
79 // specialized applications that can benefit from it (o.w. prefer
80 // ComposeFstOptions).
81 template <class M1, class M2,
82 class F = SequenceComposeFilter<M1, M2>,
83 class T = GenericComposeStateTable<typename M1::Arc,
84 typename F::FilterState>,
85 class C = DefaultCacheStore<typename M1::Arc> >
86 struct ComposeFstImplOptions : public CacheImplOptions<C> {
87 M1 *matcher1; // FST1 matcher (see matcher.h)
88 M2 *matcher2; // FST2 matcher
89 F *filter; // Composition filter (see compose-filter.h)
90 T *state_table; // Composition state table (see compose-state-table.h)
91
92 explicit ComposeFstImplOptions(const CacheOptions &opts,
93 M1 *mat1 = 0, M2 *mat2 = 0,
94 F *filt = 0, T *sttable= 0)
95 : CacheImplOptions<C>(opts), matcher1(mat1), matcher2(mat2),
96 filter(filt), state_table(sttable) {}
97
98 explicit ComposeFstImplOptions(const CacheImplOptions<C> &opts,
99 M1 *mat1 = 0, M2 *mat2 = 0,
100 F *filt = 0, T *sttable= 0)
101 : CacheImplOptions<C>(opts), matcher1(mat1), matcher2(mat2),
102 filter(filt), state_table(sttable) {}
103
ComposeFstImplOptionsComposeFstImplOptions104 ComposeFstImplOptions()
105 : matcher1(0), matcher2(0), filter(0), state_table(0) {}
106 };
107
108
109 // Implementation of delayed composition. This base class is
110 // common to the variants with different matchers, composition filters
111 // and state tables.
112 template <class A, class C = DefaultCacheStore<A> >
113 class ComposeFstImplBase : public CacheBaseImpl<typename C::State, C> {
114 public:
115 typedef typename A::Label Label;
116 typedef typename A::Weight Weight;
117 typedef typename A::StateId StateId;
118 typedef typename C::State State;
119 typedef CacheBaseImpl<State, C> CImpl;
120
121 using FstImpl<A>::SetType;
122 using FstImpl<A>::SetProperties;
123 using FstImpl<A>::Properties;
124 using FstImpl<A>::SetInputSymbols;
125 using FstImpl<A>::SetOutputSymbols;
126
127 using CImpl::HasStart;
128 using CImpl::HasFinal;
129 using CImpl::HasArcs;
130 using CImpl::SetFinal;
131 using CImpl::SetStart;
132
ComposeFstImplBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheImplOptions<C> & opts)133 ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
134 const CacheImplOptions<C> &opts)
135 : CImpl(opts) {
136 InitBase(fst1, fst2);
137 }
138
ComposeFstImplBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheOptions & opts)139 ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
140 const CacheOptions &opts)
141 : CImpl(opts) {
142 InitBase(fst1, fst2);
143 }
144
ComposeFstImplBase(const ComposeFstImplBase<A,C> & impl)145 ComposeFstImplBase(const ComposeFstImplBase<A, C> &impl)
146 : CImpl(impl, true) {
147 SetType(impl.Type());
148 SetProperties(impl.Properties(), kCopyProperties);
149 SetInputSymbols(impl.InputSymbols());
150 SetOutputSymbols(impl.OutputSymbols());
151 }
152
153 virtual ComposeFstImplBase<A, C> *Copy() = 0;
154
~ComposeFstImplBase()155 virtual ~ComposeFstImplBase() {}
156
Start()157 StateId Start() {
158 if (!HasStart()) {
159 StateId start = ComputeStart();
160 if (start != kNoStateId) {
161 SetStart(start);
162 }
163 }
164 return CImpl::Start();
165 }
166
Final(StateId s)167 Weight Final(StateId s) {
168 if (!HasFinal(s)) {
169 Weight final = ComputeFinal(s);
170 SetFinal(s, final);
171 }
172 return CImpl::Final(s);
173 }
174
175 virtual void Expand(StateId s) = 0;
176
NumArcs(StateId s)177 size_t NumArcs(StateId s) {
178 if (!HasArcs(s))
179 Expand(s);
180 return CImpl::NumArcs(s);
181 }
182
NumInputEpsilons(StateId s)183 size_t NumInputEpsilons(StateId s) {
184 if (!HasArcs(s))
185 Expand(s);
186 return CImpl::NumInputEpsilons(s);
187 }
188
NumOutputEpsilons(StateId s)189 size_t NumOutputEpsilons(StateId s) {
190 if (!HasArcs(s))
191 Expand(s);
192 return CImpl::NumOutputEpsilons(s);
193 }
194
InitArcIterator(StateId s,ArcIteratorData<A> * data)195 void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
196 if (!HasArcs(s))
197 Expand(s);
198 CImpl::InitArcIterator(s, data);
199 }
200
InitMatcher(const ComposeFst<A,C> & fst,MatchType match_type)201 virtual MatcherBase<A> *InitMatcher(const ComposeFst<A, C> &fst,
202 MatchType match_type) const {
203 // Use the default matcher if no override is provided.
204 return 0;
205 }
206
207 protected:
208 virtual StateId ComputeStart() = 0;
209 virtual Weight ComputeFinal(StateId s) = 0;
210
211
InitBase(const Fst<A> & fst1,const Fst<A> & fst2)212 void InitBase(const Fst<A> &fst1, const Fst<A> &fst2) {
213 VLOG(2) << "ComposeFst(" << this << "): Begin";
214 SetType("compose");
215
216 if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) {
217 FSTERROR() << "ComposeFst: output symbol table of 1st argument "
218 << "does not match input symbol table of 2nd argument";
219 SetProperties(kError, kError);
220 }
221
222 SetInputSymbols(fst1.InputSymbols());
223 SetOutputSymbols(fst2.OutputSymbols());
224 }
225 };
226
227 // Forward declaration of ComposeFstMatcher
228 template <class C, class F, class T>
229 class ComposeFstMatcher;
230
231 // Implementaion of delayed composition templated on the matchers (see
232 // matcher.h), composition filter (see compose-filter-inl.h) and
233 // the composition state table (see compose-state-table.h).
234 template <class C, class F, class T>
235 class ComposeFstImpl : public ComposeFstImplBase<typename C::Arc, C> {
236 typedef typename F::Matcher1 M1;
237 typedef typename F::Matcher2 M2;
238 friend class ComposeFstMatcher<C, F, T>;
239
240 typedef typename M1::FST FST1;
241 typedef typename M2::FST FST2;
242 typedef typename M1::Arc Arc;
243 typedef typename Arc::StateId StateId;
244 typedef typename Arc::Label Label;
245 typedef typename Arc::Weight Weight;
246 typedef typename F::FilterState FilterState;
247 typedef typename C::State State;
248 typedef CacheBaseImpl<State, C> CImpl;
249 typedef typename T::StateTuple StateTuple;
250
251 using FstImpl<Arc>::SetType;
252 using FstImpl<Arc>::SetProperties;
253
254 public:
255 template <class Mat1, class Mat2>
256 ComposeFstImpl(const FST1 &fst1, const FST2 &fst2,
257 const ComposeFstImplOptions<Mat1, Mat2, F, T, C> &opts);
258
ComposeFstImpl(const ComposeFstImpl<C,F,T> & impl)259 ComposeFstImpl(const ComposeFstImpl<C, F, T> &impl)
260 : ComposeFstImplBase<Arc, C>(impl),
261 filter_(new F(*impl.filter_, true)),
262 matcher1_(filter_->GetMatcher1()),
263 matcher2_(filter_->GetMatcher2()),
264 fst1_(matcher1_->GetFst()),
265 fst2_(matcher2_->GetFst()),
266 state_table_(new T(*impl.state_table_)),
267 match_type_(impl.match_type_) {}
268
~ComposeFstImpl()269 ~ComposeFstImpl() {
270 VLOG(2) << "ComposeFst(" << this
271 << "): End: # of visited states: " << state_table_->Size();
272
273 delete filter_;
274 delete state_table_;
275 }
276
Copy()277 virtual ComposeFstImpl<C, F, T> *Copy() {
278 return new ComposeFstImpl<C, F, T>(*this);
279 }
280
Properties()281 uint64 Properties() const { return Properties(kFstProperties); }
282
283 // Set error if found; return FST impl properties.
Properties(uint64 mask)284 uint64 Properties(uint64 mask) const {
285 if ((mask & kError) &&
286 (fst1_.Properties(kError, false) ||
287 fst2_.Properties(kError, false) ||
288 (matcher1_->Properties(0) & kError) ||
289 (matcher2_->Properties(0) & kError) |
290 (filter_->Properties(0) & kError) ||
291 state_table_->Error())) {
292 SetProperties(kError, kError);
293 }
294 return FstImpl<Arc>::Properties(mask);
295 }
296
297 // Arranges it so that the first arg to OrderedExpand is the Fst
298 // that will be matched on.
Expand(StateId s)299 void Expand(StateId s) {
300 const StateTuple &tuple = state_table_->Tuple(s);
301 const StateId s1 = tuple.StateId1();
302 const StateId s2 = tuple.StateId2();
303 filter_->SetState(s1, s2, tuple.GetFilterState());
304 if (MatchInput(s1, s2)) {
305 OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true);
306 } else {
307 OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false);
308 }
309 }
310
GetFst1()311 const FST1 &GetFst1() const { return fst1_; }
GetFst2()312 const FST2 &GetFst2() const { return fst2_; }
313
GetMatcher1()314 const M1 *GetMatcher1() const { return matcher1_; }
GetMatcher1()315 M1 *GetMatcher1() { return matcher1_; }
316
GetMatcher2()317 const M2 *GetMatcher2() const { return matcher2_; }
GetMatcher2()318 M2 *GetMatcher2() { return matcher2_; }
319
GetFilter()320 const F *GetFilter() const { return filter_; }
GetFilter()321 F *GetFilter() { return filter_; }
322
GetStateTable()323 const T *GetStateTable() const { return state_table_; }
GetStateTable()324 T *GetStateTable() { return state_table_; }
325
InitMatcher(const ComposeFst<Arc,C> & fst,MatchType match_type)326 virtual MatcherBase<Arc> *InitMatcher(const ComposeFst<Arc, C> &fst,
327 MatchType match_type) const {
328 uint64 test_props = match_type == MATCH_INPUT ?
329 kFstProperties & ~kILabelInvariantProperties :
330 kFstProperties & ~kOLabelInvariantProperties;
331 // If both matchers support 'match_type' and we have
332 // a guarantee that a call to 'filter_->FilterArc(arc1, arc2)' will
333 // not modify the ilabel of arc1 when MATCH_INPUT or the olabel
334 // or arc2 when MATCH_OUTPUT, then ComposeFstMatcher can be used.
335 if ((matcher1_->Type(false) == match_type) &&
336 (matcher2_->Type(false) == match_type) &&
337 (filter_->Properties(test_props) == test_props)) {
338 VLOG(1) << "Creating ComposeFstMatcher";
339 return new ComposeFstMatcher<C, F, T>(fst, this, match_type);
340 }
341 return 0;
342 }
343
344 private:
345 // This does that actual matching of labels in the composition. The
346 // arguments are ordered so matching is called on state 'sa' of
347 // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg
348 // determines whether the input or output label of arcs at 'sb' is
349 // the one to match on.
350 template <class FST, class Matcher>
OrderedExpand(StateId s,const Fst<Arc> &,StateId sa,const FST & fstb,StateId sb,Matcher * matchera,bool match_input)351 void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa,
352 const FST &fstb, StateId sb,
353 Matcher *matchera, bool match_input) {
354 matchera->SetState(sa);
355
356 // First process non-consuming symbols (e.g., epsilons) on FSTA.
357 Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0,
358 Weight::One(), sb);
359 MatchArc(s, matchera, loop, match_input);
360
361 // Then process matches on FSTB.
362 for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next())
363 MatchArc(s, matchera, iterb.Value(), match_input);
364
365 CImpl::SetArcs(s);
366 }
367
368 // Matches a single transition from 'fstb' against 'fata' at 's'.
369 template <class Matcher>
MatchArc(StateId s,Matcher * matchera,const Arc & arc,bool match_input)370 void MatchArc(StateId s, Matcher *matchera,
371 const Arc &arc, bool match_input) {
372 if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) {
373 for (; !matchera->Done(); matchera->Next()) {
374 Arc arca = matchera->Value();
375 Arc arcb = arc;
376 if (match_input) {
377 const FilterState &f = filter_->FilterArc(&arcb, &arca);
378 if (f != FilterState::NoState())
379 AddArc(s, arcb, arca, f);
380 } else {
381 const FilterState &f = filter_->FilterArc(&arca, &arcb);
382 if (f != FilterState::NoState())
383 AddArc(s, arca, arcb, f);
384 }
385 }
386 }
387 }
388
389 // Add a matching transition at 's'.
AddArc(StateId s,const Arc & arc1,const Arc & arc2,const FilterState & f)390 void AddArc(StateId s, const Arc &arc1, const Arc &arc2,
391 const FilterState &f) {
392 StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
393 Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight),
394 state_table_->FindState(tuple));
395 CImpl::PushArc(s, oarc);
396 }
397
ComputeStart()398 StateId ComputeStart() {
399 StateId s1 = fst1_.Start();
400 if (s1 == kNoStateId)
401 return kNoStateId;
402
403 StateId s2 = fst2_.Start();
404 if (s2 == kNoStateId)
405 return kNoStateId;
406
407 const FilterState &f = filter_->Start();
408 StateTuple tuple(s1, s2, f);
409 return state_table_->FindState(tuple);
410 }
411
ComputeFinal(StateId s)412 Weight ComputeFinal(StateId s) {
413 const StateTuple &tuple = state_table_->Tuple(s);
414 const StateId s1 = tuple.StateId1();
415 Weight final1 = internal::Final(fst1_, s1);
416 if (final1 == Weight::Zero())
417 return final1;
418
419 const StateId s2 = tuple.StateId2();
420 Weight final2 = internal::Final(fst2_, s2);
421 if (final2 == Weight::Zero())
422 return final2;
423
424 filter_->SetState(s1, s2, tuple.GetFilterState());
425 filter_->FilterFinal(&final1, &final2);
426 return Times(final1, final2);
427 }
428
429 // Determines which side to match on per composition state.
MatchInput(StateId s1,StateId s2)430 bool MatchInput(StateId s1, StateId s2) {
431 switch (match_type_) {
432 case MATCH_INPUT:
433 return true;
434 case MATCH_OUTPUT:
435 return false;
436 default: // MATCH_BOTH
437 ssize_t priority1 = matcher1_->Priority(s1);
438 ssize_t priority2 = matcher2_->Priority(s2);
439
440 if (priority1 == kRequirePriority && priority2 == kRequirePriority) {
441 FSTERROR() << "ComposeFst: both sides can't require match";
442 SetProperties(kError, kError);
443 return true;
444 }
445
446 if (priority1 == kRequirePriority)
447 return false;
448 if (priority2 == kRequirePriority) {
449 return true;
450 }
451 return priority1 <= priority2;
452 }
453 }
454
455 // Identifies and verifies the capabilities of the matcher to be used for
456 // composition.
457 void SetMatchType();
458
459 F *filter_;
460 M1 *matcher1_;
461 M2 *matcher2_;
462 const FST1 &fst1_;
463 const FST2 &fst2_;
464 T *state_table_;
465
466 MatchType match_type_;
467
468 void operator=(const ComposeFstImpl<C, F, T> &); // disallow
469 };
470
471 template <class C, class F, class T>
472 template <class Mat1, class Mat2>
ComposeFstImpl(const FST1 & fst1,const FST2 & fst2,const ComposeFstImplOptions<Mat1,Mat2,F,T,C> & opts)473 ComposeFstImpl<C, F, T>::ComposeFstImpl(
474 const FST1 &fst1, const FST2 &fst2,
475 const ComposeFstImplOptions<Mat1, Mat2, F, T, C> &opts)
476 : ComposeFstImplBase<Arc, C>(fst1, fst2, opts),
477 filter_(opts.filter ? opts.filter :
478 new F(fst1, fst2, opts.matcher1, opts.matcher2)),
479 matcher1_(filter_->GetMatcher1()),
480 matcher2_(filter_->GetMatcher2()),
481 fst1_(matcher1_->GetFst()),
482 fst2_(matcher2_->GetFst()),
483 state_table_(opts.state_table ? opts.state_table :
484 new T(fst1_, fst2_)) {
485 SetMatchType();
486 if (match_type_ == MATCH_NONE)
487 SetProperties(kError, kError);
488 VLOG(2) << "ComposeFst(" << this << "): Match type: "
489 << (match_type_ == MATCH_OUTPUT ? "output" :
490 (match_type_ == MATCH_INPUT ? "input" :
491 (match_type_ == MATCH_BOTH ? "both" :
492 (match_type_ == MATCH_NONE ? "none" : "unknown"))));
493
494 uint64 fprops1 = fst1.Properties(kFstProperties, false);
495 uint64 fprops2 = fst2.Properties(kFstProperties, false);
496 uint64 mprops1 = matcher1_->Properties(fprops1);
497 uint64 mprops2 = matcher2_->Properties(fprops2);
498 uint64 cprops = ComposeProperties(mprops1, mprops2);
499 SetProperties(filter_->Properties(cprops), kCopyProperties);
500 if (state_table_->Error()) SetProperties(kError, kError);
501 VLOG(2) << "ComposeFst(" << this << "): Initialized";
502 }
503
504 template <class C, class F, class T>
SetMatchType()505 void ComposeFstImpl<C, F, T>::SetMatchType() {
506 // Ensures any required matching is possible and known.
507 if ((matcher1_->Flags() & kRequireMatch) &&
508 matcher1_->Type(true) != MATCH_OUTPUT) {
509 FSTERROR() << "ComposeFst: 1st argument requires matching but cannot.";
510 match_type_ = MATCH_NONE;
511 return;
512 }
513 if ((matcher2_->Flags() & kRequireMatch) &&
514 matcher2_->Type(true) != MATCH_INPUT) {
515 FSTERROR() << "ComposeFst: 2nd argument requires matching but cannot.";
516 match_type_ = MATCH_NONE;
517 return;
518 }
519
520 // Finds which sides to match on (favoring minimal testing of capabilities).
521 MatchType type1 = matcher1_->Type(false);
522 MatchType type2 = matcher2_->Type(false);
523 if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) {
524 match_type_ = MATCH_BOTH;
525 } else if (type1 == MATCH_OUTPUT) {
526 match_type_ = MATCH_OUTPUT;
527 } else if (type2 == MATCH_INPUT) {
528 match_type_ = MATCH_INPUT;
529 } else if (matcher1_->Type(true) == MATCH_OUTPUT) {
530 match_type_ = MATCH_OUTPUT;
531 } else if (matcher2_->Type(true) == MATCH_INPUT) {
532 match_type_ = MATCH_INPUT;
533 } else {
534 FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
535 << "and 2nd argument cannot match on input labels (sort?).";
536 match_type_ = MATCH_NONE;
537 }
538 }
539
540 // Computes the composition of two transducers. This version is a
541 // delayed Fst. If FST1 transduces string x to y with weight a and FST2
542 // transduces y to z with weight b, then their composition transduces
543 // string x to z with weight Times(x, z).
544 //
545 // The output labels of the first transducer or the input labels of
546 // the second transducer must be sorted (with the default matcher).
547 // The weights need to form a commutative semiring (valid for
548 // TropicalWeight and LogWeight).
549 //
550 // Complexity:
551 // Assuming the first FST is unsorted and the second is sorted:
552 // - Time: O(v1 v2 d1 (log d2 + m2)),
553 // - Space: O(v1 v2)
554 // where vi = # of states visited, di = maximum out-degree, and mi the
555 // maximum multiplicity of the states visited for the ith
556 // FST. Constant time and space to visit an input state or arc is
557 // assumed and exclusive of caching.
558 //
559 // Caveats:
560 // - ComposeFst does not trim its output (since it is a delayed operation).
561 // - The efficiency of composition can be strongly affected by several factors:
562 // - the choice of which transducer is sorted - prefer sorting the FST
563 // that has the greater average out-degree.
564 // - the amount of non-determinism
565 // - the presence and location of epsilon transitions - avoid epsilon
566 // transitions on the output side of the first transducer or
567 // the input side of the second transducer or prefer placing
568 // them later in a path since they delay matching and can
569 // introduce non-coaccessible states and transitions.
570 //
571 // This class attaches interface to implementation and handles
572 // reference counting, delegating most methods to ImplToFst.
573 // The type C specifies the cache store (default declared in fst-decl.h).
574 template <class A, class C /* = DefaultCacheStore<A> */>
575 class ComposeFst : public ImplToFst< ComposeFstImplBase<A, C> > {
576 public:
577 friend class ArcIterator< ComposeFst<A, C> >;
578 friend class StateIterator< ComposeFst<A, C> >;
579
580 typedef A Arc;
581 typedef C Store;
582 typedef typename A::Weight Weight;
583 typedef typename A::StateId StateId;
584 typedef typename C::State State;
585
586 typedef ComposeFstImplBase<A, C> Impl;
587
588 using ImplToFst<Impl>::SetImpl;
589
590 // Compose specifying only caching options.
591 ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
592 const CacheOptions &opts = CacheOptions())
CreateBase(fst1,fst2,opts)593 : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {}
594
595 // Compose specifying one shared matcher type M. Requires input
596 // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for
597 // best code-sharing and matcher compatiblity.
598 template <class M, class F, class T>
ComposeFst(const Fst<A> & fst1,const Fst<A> & fst2,const ComposeFstOptions<A,M,F,T> & opts)599 ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
600 const ComposeFstOptions<A, M, F, T> &opts)
601 : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {}
602
603 // Compose specifying two matcher types M1 and M2. Requires input
604 // Fsts (of the same Arc type but o.w. arbitrary) match the
605 // corresponding matcher FST types (M1::FST, M2::FST). Recommended
606 // only for advanced use in demanding or specialized applications
607 // due to potential code bloat and matcher incompatibilities.
608 template <class M1, class M2, class F, class T>
ComposeFst(const typename M1::FST & fst1,const typename M2::FST & fst2,const ComposeFstImplOptions<M1,M2,F,T,C> & opts)609 ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2,
610 const ComposeFstImplOptions<M1, M2, F, T, C> &opts)
611 : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {}
612
613 // See Fst<>::Copy() for doc.
614 ComposeFst(const ComposeFst<A, C> &fst, bool safe = false)
615 : ImplToFst<Impl>() {
616 if (safe)
617 SetImpl(fst.GetImpl()->Copy());
618 else
619 SetImpl(fst.GetImpl(), false);
620 }
621
622 // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc.
623 virtual ComposeFst<A, C> *Copy(bool safe = false) const {
624 return new ComposeFst<A, C>(*this, safe);
625 }
626
627 virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
628
InitArcIterator(StateId s,ArcIteratorData<A> * data)629 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
630 GetImpl()->InitArcIterator(s, data);
631 }
632
InitMatcher(MatchType match_type)633 virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
634 return GetImpl()->InitMatcher(*this, match_type);
635 }
636
637 protected:
ComposeFst()638 ComposeFst() {}
639
640 // Create compose implementation specifying two matcher types.
641 template <class M1, class M2, class F, class T>
CreateBase2(const typename M1::FST & fst1,const typename M2::FST & fst2,const ComposeFstImplOptions<M1,M2,F,T,C> & opts)642 static Impl *CreateBase2(
643 const typename M1::FST &fst1, const typename M2::FST &fst2,
644 const ComposeFstImplOptions<M1, M2, F, T, C> &opts) {
645 Impl *impl = new ComposeFstImpl<C, F, T>(fst1, fst2, opts);
646 if (!(Weight::Properties() & kCommutative)) {
647 int64 props1 = fst1.Properties(kUnweighted, true);
648 int64 props2 = fst2.Properties(kUnweighted, true);
649 if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) {
650 FSTERROR() << "ComposeFst: Weights must be a commutative semiring: "
651 << Weight::Type();
652 impl->SetProperties(kError, kError);
653 }
654 }
655 return impl;
656 }
657
658 // Create compose implementation specifying one matcher type.
659 // Requires input Fsts and matcher FST type (M::FST) be Fst<A>
660 template <class M, class F, class T>
CreateBase1(const Fst<A> & fst1,const Fst<A> & fst2,const ComposeFstOptions<A,M,F,T> & opts)661 static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2,
662 const ComposeFstOptions<A, M, F, T> &opts) {
663 ComposeFstImplOptions<M, M, F, T, C> nopts(opts, opts.matcher1,
664 opts.matcher2,
665 opts.filter, opts.state_table);
666 return CreateBase2(fst1, fst2, nopts);
667 }
668
669 // Create compose implementation specifying no matcher type.
CreateBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheOptions & opts)670 static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2,
671 const CacheOptions &opts) {
672 switch (LookAheadMatchType(fst1, fst2)) { // Check for lookahead matchers
673 default:
674 case MATCH_NONE: { // Default composition (no look-ahead)
675 VLOG(2) << "ComposeFst: Default composition (no look-ahead)";
676 ComposeFstOptions<Arc> nopts(opts);
677 return CreateBase1(fst1, fst2, nopts);
678 }
679 case MATCH_OUTPUT: { // Lookahead on fst1
680 VLOG(2) << "ComposeFst: Lookahead on fst1";
681 typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
682 typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
683 ComposeFstOptions<Arc, M, F> nopts(opts);
684 return CreateBase1(fst1, fst2, nopts);
685 }
686 case MATCH_INPUT: { // Lookahead on fst2
687 VLOG(2) << "ComposeFst: Lookahead on fst2";
688 typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
689 typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
690 ComposeFstOptions<Arc, M, F> nopts(opts);
691 return CreateBase1(fst1, fst2, nopts);
692 }
693 }
694 }
695
696 private:
697 // Makes visible to friends.
GetImpl()698 Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
699
700 void operator=(const ComposeFst<A, C> &fst); // disallow
701 };
702
703
704 // Specialization for ComposeFst.
705 template <class A, class C>
706 class StateIterator< ComposeFst<A, C> >
707 : public CacheStateIterator<ComposeFst<A, C> > {
708 public:
StateIterator(const ComposeFst<A,C> & fst)709 explicit StateIterator(const ComposeFst<A, C> &fst)
710 : CacheStateIterator<ComposeFst<A, C> >(fst, fst.GetImpl()) {}
711 };
712
713
714 // Specialization for ComposeFst.
715 template <class A, class C>
716 class ArcIterator< ComposeFst<A, C> >
717 : public CacheArcIterator<ComposeFst<A, C> > {
718 public:
719 typedef typename A::StateId StateId;
720
ArcIterator(const ComposeFst<A,C> & fst,StateId s)721 ArcIterator(const ComposeFst<A, C> &fst, StateId s)
722 : CacheArcIterator<ComposeFst<A, C> >(fst.GetImpl(), s) {
723 if (!fst.GetImpl()->HasArcs(s))
724 fst.GetImpl()->Expand(s);
725 }
726
727 private:
728 DISALLOW_COPY_AND_ASSIGN(ArcIterator);
729 };
730
731 template <class A, class C> inline
InitStateIterator(StateIteratorData<A> * data)732 void ComposeFst<A, C>::InitStateIterator(StateIteratorData<A> *data) const {
733 data->base = new StateIterator< ComposeFst<A, C> >(*this);
734 }
735
736
737 // Specialized matcher for ComposeFst.
738 // Supports MATCH_INPUT (resp. MATCH_OUTPUT) iff the underlying
739 // matchers for the two Fsts being composed support
740 // MATCH_INPUT (resp. MATCH_OUTPUT)
741 template <class C, class F, class T>
742 class ComposeFstMatcher : public MatcherBase<typename C::Arc> {
743 public:
744 typedef typename C::Arc Arc;
745 typedef typename Arc::Label Label;
746 typedef typename Arc::StateId StateId;
747 typedef typename F::FilterState FilterState;
748 typedef typename F::Matcher1 Matcher1;
749 typedef typename F::Matcher2 Matcher2;
750 typedef typename T::StateTuple StateTuple;
751
ComposeFstMatcher(const ComposeFst<Arc,C> & fst,const ComposeFstImpl<C,F,T> * impl,MatchType match_type)752 ComposeFstMatcher(const ComposeFst<Arc, C> &fst,
753 const ComposeFstImpl<C, F, T> *impl,
754 MatchType match_type)
755 : fst_(fst),
756 impl_(impl),
757 s_(kNoStateId),
758 match_type_(match_type),
759 matcher1_(impl->matcher1_->Copy()),
760 matcher2_(impl->matcher2_->Copy()),
761 current_loop_(false),
762 loop_(kNoLabel, 0, Arc::Weight::One(), kNoStateId),
763 error_(false) {
764 if (match_type == MATCH_OUTPUT)
765 swap(loop_.ilabel, loop_.olabel);
766 }
767
768 ComposeFstMatcher(const ComposeFstMatcher<C, F, T> &matcher,
769 bool safe = false)
770 : fst_(matcher.fst_),
771 impl_(matcher.impl_),
772 s_(fst::kNoStateId),
773 match_type_(matcher.match_type_),
774 matcher1_(matcher.matcher1_->Copy(safe)),
775 matcher2_(matcher.matcher2_->Copy(safe)),
776 current_loop_(false),
777 loop_(fst::kNoLabel, 0, Arc::Weight::One(), fst::kNoStateId),
778 error_(matcher.error_) {
779 if (safe == true) {
780 FSTERROR() << "ComposeFstMatcher: safe copy not supported";
781 error_ = true;
782 }
783 if (match_type_ == MATCH_OUTPUT)
784 swap(loop_.ilabel, loop_.olabel);
785 }
786
787 virtual ComposeFstMatcher<C, F, T> *Copy(bool safe = false) const {
788 return new ComposeFstMatcher<C, F, T>(*this, safe);
789 }
790
~ComposeFstMatcher()791 virtual ~ComposeFstMatcher() {
792 delete matcher1_;
793 delete matcher2_;
794 }
795
Type(bool test)796 virtual MatchType Type(bool test) const {
797 if ((matcher1_->Type(test) == MATCH_NONE) ||
798 (matcher2_->Type(test) == MATCH_NONE)) {
799 return MATCH_NONE;
800 }
801 if (((matcher1_->Type(test) == MATCH_UNKNOWN) &&
802 (matcher2_->Type(test) == MATCH_UNKNOWN)) ||
803 ((matcher1_->Type(test) == MATCH_UNKNOWN) &&
804 (matcher2_->Type(test) == match_type_)) ||
805 ((matcher1_->Type(test) == match_type_) &&
806 (matcher2_->Type(test) == MATCH_UNKNOWN))) {
807 return MATCH_UNKNOWN;
808 }
809 if ((matcher1_->Type(test) == match_type_) &&
810 (matcher2_->Type(test) == match_type_)) {
811 return match_type_;
812 }
813 return MATCH_NONE;
814 }
815
GetFst()816 virtual const Fst<Arc> &GetFst() const {
817 return fst_;
818 }
819
Properties(uint64 inprops)820 virtual uint64 Properties(uint64 inprops) const {
821 uint64 outprops = inprops;
822 if (error_) outprops |= kError;
823 return outprops;
824 }
825
826 // Processes a match with the filter and creates resulting arc.
MatchArc(StateId s,Arc arc1,Arc arc2)827 bool MatchArc(StateId s, Arc arc1, Arc arc2) {
828 const FilterState &f = impl_->filter_->FilterArc(&arc1, &arc2);
829 if (f == FilterState::NoState())
830 return false;
831 StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
832 arc_.ilabel = arc1.ilabel;
833 arc_.olabel = arc2.olabel;
834 arc_.weight = Times(arc1.weight, arc2.weight);
835 arc_.nextstate = impl_->state_table_->FindState(tuple);
836 return true;
837 }
838
839 // Finds the first match allowed by the filter.
840 template <class MatcherA, class MatcherB>
FindLabel(Label label,MatcherA * matchera,MatcherB * matcherb)841 bool FindLabel(Label label, MatcherA *matchera, MatcherB *matcherb) {
842 if (matchera->Find(label)) {
843 matcherb->Find(match_type_ == MATCH_INPUT ?
844 matchera->Value().olabel : matchera->Value().ilabel);
845 return FindNext(matchera, matcherb);
846 }
847 return false;
848 }
849
850 // Finds the next match allowed by the filter:
851 // Returns true if such a match is found.
852 template <class MatcherA, class MatcherB>
FindNext(MatcherA * matchera,MatcherB * matcherb)853 bool FindNext(MatcherA *matchera, MatcherB *matcherb) {
854 // State when entering this function:
855 // 'matchera' is pointed to a match (x,y) for label x, and a match for y was
856 // requested on 'matcherb'.
857 while (!matchera->Done() || !matcherb->Done()) {
858 if (matcherb->Done()) {
859 // If no more matches for y on 'matcherb'
860 // move forward on 'matchera' until a match (x,y') is found
861 // such that there is a match for y' on 'matcherb'.
862 matchera->Next();
863 while (!matchera->Done() &&
864 !matcherb->Find(
865 match_type_ == MATCH_INPUT ?
866 matchera->Value().olabel : matchera->Value().ilabel)) {
867 matchera->Next();
868 }
869 }
870 while (!matcherb->Done()) {
871 // 'matchera' is pointing to a match (x,y') ('arca') and
872 // 'matcherb' is pointing to a match (y',z') ('arcb').
873 // If combining these two arcs is allowed by the filter
874 // (hence resulting in an arc (x,z')) return true.
875 // Position 'matcherb' on the next potential match for y' before
876 // returning.
877 const Arc &arca = matchera->Value();
878 const Arc &arcb = matcherb->Value();
879 // Position 'matcherb' on the next potential match for y'.
880 matcherb->Next();
881 // If combining these two arcs is allowed by the filter
882 // (hence resulting in an arc (x,z')) return true.
883 // returning. Otherwise consider next match for y' on 'matcherb'.
884 if (MatchArc(s_,
885 match_type_ == MATCH_INPUT ? arca : arcb,
886 match_type_ == MATCH_INPUT ? arcb : arca)) {
887 return true;
888 }
889 }
890 }
891 // Both 'matchera' and 'matcherb' are done, no more match to analyse.
892 return false;
893 }
894
895 private:
SetState_(StateId s)896 virtual void SetState_(StateId s) {
897 if (s_ == s) return;
898 s_ = s;
899 StateTuple tuple = impl_->state_table_->Tuple(s);
900 matcher1_->SetState(tuple.StateId1());
901 matcher2_->SetState(tuple.StateId2());
902 loop_.nextstate = s_;
903 }
904
Find_(Label label)905 virtual bool Find_(Label label) {
906 bool found = false;
907 current_loop_ = false;
908 if (label == 0) {
909 current_loop_ = true;
910 found = true;
911 }
912 if (match_type_ == MATCH_INPUT)
913 found = found || FindLabel(label, matcher1_, matcher2_);
914 else // match_type_ == MATCH_OUTPUT
915 found = found || FindLabel(label, matcher2_, matcher1_);
916 return found;
917 }
918
Done_()919 virtual bool Done_() const {
920 return !current_loop_ && matcher1_->Done() && matcher2_->Done();
921 }
922
Value_()923 virtual const Arc &Value_() const {
924 return current_loop_ ? loop_ : arc_;
925 }
926
Next_()927 virtual void Next_() {
928 if (current_loop_)
929 current_loop_ = false;
930 else if (match_type_ == MATCH_INPUT)
931 FindNext(matcher1_, matcher2_);
932 else // match_type_ == MATCH_OUTPUT
933 FindNext(matcher2_, matcher1_);
934 }
935
Priority_(StateId s)936 virtual ssize_t Priority_(StateId s) { return fst_.NumArcs(s); }
937
938 private:
939 const ComposeFst<Arc, C> &fst_;
940 const ComposeFstImpl<C, F, T> *impl_;
941 StateId s_;
942 MatchType match_type_;
943 Matcher1 *matcher1_;
944 Matcher2 *matcher2_;
945 bool current_loop_;
946 Arc loop_;
947 Arc arc_;
948 bool error_;
949 };
950
951
952 // Useful alias when using StdArc.
953 typedef ComposeFst<StdArc> StdComposeFst;
954
955 enum ComposeFilter { AUTO_FILTER, NULL_FILTER, SEQUENCE_FILTER,
956 ALT_SEQUENCE_FILTER, MATCH_FILTER };
957
958 struct ComposeOptions {
959 bool connect; // Connect output
960 ComposeFilter filter_type; // Which pre-defined filter to use
961
962 ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER)
connectComposeOptions963 : connect(c), filter_type(ft) {}
ComposeOptionsComposeOptions964 ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {}
965 };
966
967 // Computes the composition of two transducers. This version writes
968 // the composed FST into a MutableFst. If FST1 transduces string x to
969 // y with weight a and FST2 transduces y to z with weight b, then
970 // their composition transduces string x to z with weight
971 // Times(x, z).
972 //
973 // The output labels of the first transducer or the input labels of
974 // the second transducer must be sorted. The weights need to form a
975 // commutative semiring (valid for TropicalWeight and LogWeight).
976 //
977 // Complexity:
978 // Assuming the first FST is unsorted and the second is sorted:
979 // - Time: O(V1 V2 D1 (log D2 + M2)),
980 // - Space: O(V1 V2 D1 M2)
981 // where Vi = # of states, Di = maximum out-degree, and Mi is
982 // the maximum multiplicity for the ith FST.
983 //
984 // Caveats:
985 // - Compose trims its output.
986 // - The efficiency of composition can be strongly affected by several factors:
987 // - the choice of which transducer is sorted - prefer sorting the FST
988 // that has the greater average out-degree.
989 // - the amount of non-determinism
990 // - the presence and location of epsilon transitions - avoid epsilon
991 // transitions on the output side of the first transducer or
992 // the input side of the second transducer or prefer placing
993 // them later in a path since they delay matching and can
994 // introduce non-coaccessible states and transitions.
995 template <class Arc>
996 void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
997 MutableFst<Arc> *ofst,
998 const ComposeOptions &opts = ComposeOptions()) {
999 typedef Matcher< Fst<Arc> > M;
1000
1001 if (opts.filter_type == AUTO_FILTER) {
1002 CacheOptions nopts;
1003 nopts.gc_limit = 0; // Cache only the last state for fastest copy.
1004 *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts);
1005 } else if (opts.filter_type == NULL_FILTER) {
1006 ComposeFstOptions<Arc, M, NullComposeFilter<M> > copts;
1007 copts.gc_limit = 0; // Cache only the last state for fastest copy.
1008 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
1009 } else if (opts.filter_type == SEQUENCE_FILTER) {
1010 ComposeFstOptions<Arc, M, SequenceComposeFilter<M> > copts;
1011 copts.gc_limit = 0; // Cache only the last state for fastest copy.
1012 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
1013 } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
1014 ComposeFstOptions<Arc, M, AltSequenceComposeFilter<M> > copts;
1015 copts.gc_limit = 0; // Cache only the last state for fastest copy.
1016 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
1017 } else if (opts.filter_type == MATCH_FILTER) {
1018 ComposeFstOptions<Arc, M, MatchComposeFilter<M> > copts;
1019 copts.gc_limit = 0; // Cache only the last state for fastest copy.
1020 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
1021 }
1022
1023 if (opts.connect)
1024 Connect(ofst);
1025 }
1026
1027 } // namespace fst
1028
1029 #endif // FST_LIB_COMPOSE_H__
1030