1 // const-fst.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Simple concrete immutable FST whose states and arcs are each stored
20 // in single arrays.
21 
22 #ifndef FST_LIB_CONST_FST_H__
23 #define FST_LIB_CONST_FST_H__
24 
25 #include <string>
26 #include <vector>
27 using std::vector;
28 
29 #include <fst/expanded-fst.h>
30 #include <fst/fst-decl.h>  // For optional argument declarations
31 #include <fst/mapped-file.h>
32 #include <fst/test-properties.h>
33 #include <fst/util.h>
34 
35 
36 namespace fst {
37 
38 template <class A, class U> class ConstFst;
39 template <class F, class G> void Cast(const F &, G *);
40 
41 // States and arcs each implemented by single arrays, templated on the
42 // Arc definition. The unsigned type U is used to represent indices into
43 // the arc array.
44 template <class A, class U>
45 class ConstFstImpl : public FstImpl<A> {
46  public:
47   using FstImpl<A>::SetInputSymbols;
48   using FstImpl<A>::SetOutputSymbols;
49   using FstImpl<A>::SetType;
50   using FstImpl<A>::SetProperties;
51   using FstImpl<A>::Properties;
52 
53   typedef A Arc;
54   typedef typename A::Weight Weight;
55   typedef typename A::StateId StateId;
56   typedef U Unsigned;
57 
ConstFstImpl()58   ConstFstImpl()
59       : states_region_(0), arcs_region_(0), states_(0), arcs_(0), nstates_(0),
60         narcs_(0), start_(kNoStateId) {
61     string type = "const";
62     if (sizeof(U) != sizeof(uint32)) {
63       string size;
64       Int64ToStr(8 * sizeof(U), &size);
65       type += size;
66     }
67     SetType(type);
68     SetProperties(kNullProperties | kStaticProperties);
69   }
70 
71   explicit ConstFstImpl(const Fst<A> &fst);
72 
~ConstFstImpl()73   ~ConstFstImpl() {
74     delete arcs_region_;
75     delete states_region_;
76   }
77 
Start()78   StateId Start() const { return start_; }
79 
Final(StateId s)80   Weight Final(StateId s) const { return states_[s].final; }
81 
NumStates()82   StateId NumStates() const { return nstates_; }
83 
NumArcs(StateId s)84   size_t NumArcs(StateId s) const { return states_[s].narcs; }
85 
NumInputEpsilons(StateId s)86   size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }
87 
NumOutputEpsilons(StateId s)88   size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }
89 
90   static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts);
91 
Arcs(StateId s)92   A *Arcs(StateId s) { return arcs_ + states_[s].pos; }
93 
94   // Provide information needed for generic state iterator
InitStateIterator(StateIteratorData<A> * data)95   void InitStateIterator(StateIteratorData<A> *data) const {
96     data->base = 0;
97     data->nstates = nstates_;
98   }
99 
100   // Provide information needed for the generic arc iterator
InitArcIterator(StateId s,ArcIteratorData<A> * data)101   void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
102     data->base = 0;
103     data->arcs = arcs_ + states_[s].pos;
104     data->narcs = states_[s].narcs;
105     data->ref_count = 0;
106   }
107 
108  private:
109   friend class ConstFst<A, U>;  // Allow finding narcs_, nstates_ during Write
110 
111   // States implemented by array *states_ below, arcs by (single) *arcs_.
112   struct State {
113     Weight final;                // Final weight
114     Unsigned pos;                // Start of state's arcs in *arcs_
115     Unsigned narcs;              // Number of arcs (per state)
116     Unsigned niepsilons;         // # of input epsilons
117     Unsigned noepsilons;         // # of output epsilons
StateState118     State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
119   };
120 
121   // Properties always true of this Fst class
122   static const uint64 kStaticProperties = kExpanded;
123   // Current unaligned file format version. The unaligned version was added and
124   // made the default since the aligned version does not work on pipes.
125   static const int kFileVersion = 2;
126   // Current aligned file format version
127   static const int kAlignedFileVersion = 1;
128   // Minimum file format version supported
129   static const int kMinFileVersion = 1;
130 
131   MappedFile *states_region_;    // Mapped file for states
132   MappedFile *arcs_region_;      // Mapped file for arcs
133   State *states_;                // States represenation
134   A *arcs_;                      // Arcs representation
135   StateId nstates_;              // Number of states
136   size_t narcs_;                 // Number of arcs (per FST)
137   StateId start_;                // Initial state
138 
139   DISALLOW_COPY_AND_ASSIGN(ConstFstImpl);
140 };
141 
142 template <class A, class U>
143 const uint64 ConstFstImpl<A, U>::kStaticProperties;
144 template <class A, class U>
145 const int ConstFstImpl<A, U>::kFileVersion;
146 template <class A, class U>
147 const int ConstFstImpl<A, U>::kAlignedFileVersion;
148 template <class A, class U>
149 const int ConstFstImpl<A, U>::kMinFileVersion;
150 
151 
152 template<class A, class U>
ConstFstImpl(const Fst<A> & fst)153 ConstFstImpl<A, U>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) {
154   string type = "const";
155   if (sizeof(U) != sizeof(uint32)) {
156     string size;
157     Int64ToStr(sizeof(U) * 8, &size);
158     type += size;
159   }
160   SetType(type);
161   SetInputSymbols(fst.InputSymbols());
162   SetOutputSymbols(fst.OutputSymbols());
163   start_ = fst.Start();
164 
165   // Count # of states and arcs.
166   for (StateIterator< Fst<A> > siter(fst);
167        !siter.Done();
168        siter.Next()) {
169     ++nstates_;
170     StateId s = siter.Value();
171     for (ArcIterator< Fst<A> > aiter(fst, s);
172          !aiter.Done();
173          aiter.Next())
174       ++narcs_;
175   }
176   states_region_ = MappedFile::Allocate(nstates_ * sizeof(*states_));
177   arcs_region_ = MappedFile::Allocate(narcs_ * sizeof(*arcs_));
178   states_ = reinterpret_cast<State*>(states_region_->mutable_data());
179   arcs_ = reinterpret_cast<A*>(arcs_region_->mutable_data());
180   size_t pos = 0;
181   for (StateId s = 0; s < nstates_; ++s) {
182     states_[s].final = fst.Final(s);
183     states_[s].pos = pos;
184     states_[s].narcs = 0;
185     states_[s].niepsilons = 0;
186     states_[s].noepsilons = 0;
187     for (ArcIterator< Fst<A> > aiter(fst, s);
188          !aiter.Done();
189          aiter.Next()) {
190       const A &arc = aiter.Value();
191       ++states_[s].narcs;
192       if (arc.ilabel == 0)
193         ++states_[s].niepsilons;
194       if (arc.olabel == 0)
195         ++states_[s].noepsilons;
196       arcs_[pos++] = arc;
197     }
198   }
199   SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties);
200 }
201 
202 
203 template<class A, class U>
Read(istream & strm,const FstReadOptions & opts)204 ConstFstImpl<A, U> *ConstFstImpl<A, U>::Read(istream &strm,
205                                              const FstReadOptions &opts) {
206   ConstFstImpl<A, U> *impl = new ConstFstImpl<A, U>;
207   FstHeader hdr;
208   if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
209     delete impl;
210     return 0;
211   }
212   impl->start_ = hdr.Start();
213   impl->nstates_ = hdr.NumStates();
214   impl->narcs_ = hdr.NumArcs();
215 
216   // Ensures compatibility
217   if (hdr.Version() == kAlignedFileVersion)
218     hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
219 
220   if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
221     LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
222     delete impl;
223     return 0;
224   }
225 
226   size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A, U>::State);
227   impl->states_region_ = MappedFile::Map(
228       &strm, opts.mode == FstReadOptions::MAP, opts.source, b);
229   if (!strm || impl->states_region_ == NULL) {
230     LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
231     delete impl;
232     return 0;
233   }
234   impl->states_ = reinterpret_cast<State*>(
235       impl->states_region_->mutable_data());
236   if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
237     LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
238     delete impl;
239     return 0;
240   }
241 
242   b = impl->narcs_ * sizeof(A);
243   impl->arcs_region_ = MappedFile::Map(
244       &strm, opts.mode == FstReadOptions::MAP, opts.source, b);
245   if (!strm || impl->arcs_region_ == NULL) {
246     LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
247     delete impl;
248     return 0;
249   }
250   impl->arcs_ = reinterpret_cast<A*>(impl->arcs_region_->mutable_data());
251   return impl;
252 }
253 
254 // Simple concrete immutable FST.  This class attaches interface to
255 // implementation and handles reference counting, delegating most
256 // methods to ImplToExpandedFst. The unsigned type U is used to
257 // represent indices into the arc array (default declared
258 // in fst-decl.h).
259 template <class A, class U /* = uint32 */>
260 class ConstFst : public ImplToExpandedFst< ConstFstImpl<A, U> > {
261  public:
262   friend class StateIterator< ConstFst<A, U> >;
263   friend class ArcIterator< ConstFst<A, U> >;
264   template <class F, class G> void friend Cast(const F &, G *);
265 
266   typedef A Arc;
267   typedef typename A::StateId StateId;
268   typedef ConstFstImpl<A, U> Impl;
269   typedef U Unsigned;
270 
ConstFst()271   ConstFst() : ImplToExpandedFst<Impl>(new Impl()) {}
272 
ConstFst(const Fst<A> & fst)273   explicit ConstFst(const Fst<A> &fst)
274       : ImplToExpandedFst<Impl>(new Impl(fst)) {}
275 
ConstFst(const ConstFst<A,U> & fst)276   ConstFst(const ConstFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {}
277 
278   // Get a copy of this ConstFst. See Fst<>::Copy() for further doc.
279   virtual ConstFst<A, U> *Copy(bool safe = false) const {
280     return new ConstFst<A, U>(*this);
281   }
282 
283   // Read a ConstFst from an input stream; return NULL on error
Read(istream & strm,const FstReadOptions & opts)284   static ConstFst<A, U> *Read(istream &strm, const FstReadOptions &opts) {
285     Impl* impl = Impl::Read(strm, opts);
286     return impl ? new ConstFst<A, U>(impl) : 0;
287   }
288 
289   // Read a ConstFst from a file; return NULL on error
290   // Empty filename reads from standard input
Read(const string & filename)291   static ConstFst<A, U> *Read(const string &filename) {
292     Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
293     return impl ? new ConstFst<A, U>(impl) : 0;
294   }
295 
Write(ostream & strm,const FstWriteOptions & opts)296   virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
297     return WriteFst(*this, strm, opts);
298   }
299 
Write(const string & filename)300   virtual bool Write(const string &filename) const {
301     return Fst<A>::WriteFile(filename);
302   }
303 
304   template <class F>
305   static bool WriteFst(const F &fst, ostream &strm,
306                        const FstWriteOptions &opts);
307 
InitStateIterator(StateIteratorData<Arc> * data)308   virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
309     GetImpl()->InitStateIterator(data);
310   }
311 
InitArcIterator(StateId s,ArcIteratorData<Arc> * data)312   virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
313     GetImpl()->InitArcIterator(s, data);
314   }
315 
316  private:
ConstFst(Impl * impl)317   explicit ConstFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
318 
319   // Makes visible to friends.
GetImpl()320   Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
321 
322   void SetImpl(Impl *impl, bool own_impl = true) {
323     ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
324   }
325 
326   // Use overloading to extract the type of the argument.
GetImplIfConstFst(const ConstFst & const_fst)327   static Impl* GetImplIfConstFst(const ConstFst &const_fst) {
328     return const_fst.GetImpl();
329   }
330 
331   // Note that this does not give privileged treatment to subtypes of ConstFst.
332   template<typename NonConstFst>
GetImplIfConstFst(const NonConstFst & fst)333   static Impl* GetImplIfConstFst(const NonConstFst& fst) {
334     return NULL;
335   }
336 
337   void operator=(const ConstFst<A, U> &fst);  // disallow
338 };
339 
340 // Writes Fst in Const format, potentially with a pass over the machine
341 // before writing to compute number of states and arcs.
342 //
343 template <class A, class U>
344 template <class F>
WriteFst(const F & fst,ostream & strm,const FstWriteOptions & opts)345 bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
346                               const FstWriteOptions &opts) {
347   int file_version = opts.align ? ConstFstImpl<A, U>::kAlignedFileVersion :
348       ConstFstImpl<A, U>::kFileVersion;
349   size_t num_arcs = -1, num_states = -1;
350   size_t start_offset = 0;
351   bool update_header = true;
352   if (Impl* impl = GetImplIfConstFst(fst)) {
353     num_arcs = impl->narcs_;
354     num_states = impl->nstates_;
355     update_header = false;
356   } else if ((start_offset = strm.tellp()) == -1) {
357     // precompute values needed for header when we cannot seek to rewrite it.
358     num_arcs = 0;
359     num_states = 0;
360     for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
361       num_arcs += fst.NumArcs(siter.Value());
362       ++num_states;
363     }
364     update_header = false;
365   }
366   FstHeader hdr;
367   hdr.SetStart(fst.Start());
368   hdr.SetNumStates(num_states);
369   hdr.SetNumArcs(num_arcs);
370   string type = "const";
371   if (sizeof(U) != sizeof(uint32)) {
372     string size;
373     Int64ToStr(8 * sizeof(U), &size);
374     type += size;
375   }
376   uint64 properties = fst.Properties(kCopyProperties, true) |
377       ConstFstImpl<A, U>::kStaticProperties;
378   FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
379                              &hdr);
380   if (opts.align && !AlignOutput(strm)) {
381     LOG(ERROR) << "Could not align file during write after header";
382     return false;
383   }
384   size_t pos = 0, states = 0;
385   typename ConstFstImpl<A, U>::State state;
386   for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
387     state.final = fst.Final(siter.Value());
388     state.pos = pos;
389     state.narcs = fst.NumArcs(siter.Value());
390     state.niepsilons = fst.NumInputEpsilons(siter.Value());
391     state.noepsilons = fst.NumOutputEpsilons(siter.Value());
392     strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
393     pos += state.narcs;
394     ++states;
395   }
396   hdr.SetNumStates(states);
397   hdr.SetNumArcs(pos);
398   if (opts.align && !AlignOutput(strm)) {
399     LOG(ERROR) << "Could not align file during write after writing states";
400   }
401   for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
402     StateId s = siter.Value();
403     for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
404       const A &arc = aiter.Value();
405       strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc));
406     }
407   }
408   strm.flush();
409   if (!strm) {
410     LOG(ERROR) << "ConstFst Write write failed: " << opts.source;
411     return false;
412   }
413   if (update_header) {
414     return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type,
415                                        properties, &hdr, start_offset);
416   } else {
417     if (hdr.NumStates() != num_states) {
418       LOG(ERROR) << "Inconsistent number of states observed during write";
419       return false;
420     }
421     if (hdr.NumArcs() != num_arcs) {
422       LOG(ERROR) << "Inconsistent number of arcs observed during write";
423       return false;
424     }
425   }
426   return true;
427 }
428 
429 // Specialization for ConstFst; see generic version in fst.h
430 // for sample usage (but use the ConstFst type!). This version
431 // should inline.
432 template <class A, class U>
433 class StateIterator< ConstFst<A, U> > {
434  public:
435   typedef typename A::StateId StateId;
436 
StateIterator(const ConstFst<A,U> & fst)437   explicit StateIterator(const ConstFst<A, U> &fst)
438       : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
439 
Done()440   bool Done() const { return s_ >= nstates_; }
441 
Value()442   StateId Value() const { return s_; }
443 
Next()444   void Next() { ++s_; }
445 
Reset()446   void Reset() { s_ = 0; }
447 
448  private:
449   StateId nstates_;
450   StateId s_;
451 
452   DISALLOW_COPY_AND_ASSIGN(StateIterator);
453 };
454 
455 
456 // Specialization for ConstFst; see generic version in fst.h
457 // for sample usage (but use the ConstFst type!). This version
458 // should inline.
459 template <class A, class U>
460 class ArcIterator< ConstFst<A, U> > {
461  public:
462   typedef typename A::StateId StateId;
463 
ArcIterator(const ConstFst<A,U> & fst,StateId s)464   ArcIterator(const ConstFst<A, U> &fst, StateId s)
465       : arcs_(fst.GetImpl()->Arcs(s)),
466         narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {}
467 
Done()468   bool Done() const { return i_ >= narcs_; }
469 
Value()470   const A& Value() const { return arcs_[i_]; }
471 
Next()472   void Next() { ++i_; }
473 
Position()474   size_t Position() const { return i_; }
475 
Reset()476   void Reset() { i_ = 0; }
477 
Seek(size_t a)478   void Seek(size_t a) { i_ = a; }
479 
Flags()480   uint32 Flags() const {
481     return kArcValueFlags;
482   }
483 
SetFlags(uint32 f,uint32 m)484   void SetFlags(uint32 f, uint32 m) {}
485 
486  private:
487   const A *arcs_;
488   size_t narcs_;
489   size_t i_;
490 
491   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
492 };
493 
494 // A useful alias when using StdArc.
495 typedef ConstFst<StdArc> StdConstFst;
496 
497 }  // namespace fst
498 
499 #endif  // FST_LIB_CONST_FST_H__
500