1 // fst.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Finite-State Transducer (FST) - abstract base class definition,
20 // state and arc iterator interface, and suggested base implementation.
21 //
22 
23 #ifndef FST_LIB_FST_H__
24 #define FST_LIB_FST_H__
25 
26 #include <stddef.h>
27 #include <sys/types.h>
28 #include <cmath>
29 #include <string>
30 
31 #include <fst/compat.h>
32 #include <fst/types.h>
33 
34 #include <fst/arc.h>
35 #include <fst/memory.h>
36 #include <fst/properties.h>
37 #include <fst/register.h>
38 #include <iostream>
39 #include <fstream>
40 #include <sstream>
41 #include <fst/symbol-table.h>
42 #include <fst/util.h>
43 
44 
45 DECLARE_bool(fst_align);
46 
47 namespace fst {
48 
49 bool IsFstHeader(istream &, const string &);
50 
51 class FstHeader;
52 template <class A> class StateIteratorData;
53 template <class A> class ArcIteratorData;
54 template <class A> class MatcherBase;
55 
56 struct FstReadOptions {
57   // FileReadMode(s) are advisory, there are many conditions than prevent a
58   // file from being mapped, READ mode will be selected in these cases with
59   // a warning indicating why it was chosen.
60   enum FileReadMode { READ, MAP };
61 
62   string source;                // Where you're reading from
63   const FstHeader *header;      // Pointer to Fst header. If non-zero, use
64                                 // this info (don't read a stream header)
65   const SymbolTable* isymbols;  // Pointer to input symbols. If non-zero, use
66                                 // this info (read and skip stream isymbols)
67   const SymbolTable* osymbols;  // Pointer to output symbols. If non-zero, use
68                                 // this info (read and skip stream osymbols)
69   FileReadMode mode;            // Read or map files (advisory, if possible)
70   bool read_isymbols;           // Read isymbols, if any, default true
71   bool read_osymbols;           // Read osymbols, if any, default true
72 
73   explicit FstReadOptions(const string& src = "<unspecified>",
74                           const FstHeader *hdr = 0,
75                           const SymbolTable* isym = 0,
76                           const SymbolTable* osym = 0);
77 
78   explicit FstReadOptions(const string& src,
79                           const SymbolTable* isym,
80                           const SymbolTable* osym = 0);
81 
82   // Helper function to convert strings FileReadModes into their enum value.
83   static FileReadMode ReadMode(const string &mode);
84 };
85 
86 struct FstWriteOptions {
87   string source;                 // Where you're writing to
88   bool write_header;             // Write the header?
89   bool write_isymbols;           // Write input symbols?
90   bool write_osymbols;           // Write output symbols?
91   bool align;                    // Write data aligned where appropriate;
92                                  // this may fail on pipes
93 
94   explicit FstWriteOptions(const string& src = "<unspecifed>",
95                            bool hdr = true, bool isym = true,
96                            bool osym = true, bool alig = FLAGS_fst_align)
sourceFstWriteOptions97       : source(src), write_header(hdr),
98         write_isymbols(isym), write_osymbols(osym), align(alig) {}
99 };
100 
101 //
102 // Fst HEADER CLASS
103 //
104 // This is the recommended Fst file header representation.
105 //
106 class FstHeader {
107  public:
108   enum {
109     HAS_ISYMBOLS = 0x1,          // Has input symbol table
110     HAS_OSYMBOLS = 0x2,          // Has output symbol table
111     IS_ALIGNED   = 0x4,          // Memory-aligned (where appropriate)
112   } Flags;
113 
FstHeader()114   FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
115                 numstates_(0), numarcs_(0) {}
FstType()116   const string &FstType() const { return fsttype_; }
ArcType()117   const string &ArcType() const { return arctype_; }
Version()118   int32 Version() const { return version_; }
GetFlags()119   int32 GetFlags() const { return flags_; }
Properties()120   uint64 Properties() const { return properties_; }
Start()121   int64 Start() const { return start_; }
NumStates()122   int64 NumStates() const { return numstates_; }
NumArcs()123   int64 NumArcs() const { return numarcs_; }
124 
SetFstType(const string & type)125   void SetFstType(const string& type) { fsttype_ = type; }
SetArcType(const string & type)126   void SetArcType(const string& type) { arctype_ = type; }
SetVersion(int32 version)127   void SetVersion(int32 version) { version_ = version; }
SetFlags(int32 flags)128   void SetFlags(int32 flags) { flags_ = flags; }
SetProperties(uint64 properties)129   void SetProperties(uint64 properties) { properties_ = properties; }
SetStart(int64 start)130   void SetStart(int64 start) { start_ = start; }
SetNumStates(int64 numstates)131   void SetNumStates(int64 numstates) { numstates_ = numstates; }
SetNumArcs(int64 numarcs)132   void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
133 
134   bool Read(istream &strm, const string &source, bool rewind = false);
135   bool Write(ostream &strm, const string &source) const;
136 
137  private:
138 
139   string fsttype_;                   // E.g. "vector"
140   string arctype_;                   // E.g. "standard"
141   int32 version_;                    // Type version #
142   int32 flags_;                      // File format bits
143   uint64 properties_;                // FST property bits
144   int64 start_;                      // Start state
145   int64 numstates_;                  // # of states
146   int64 numarcs_;                    // # of arcs
147 };
148 
149 
150 // Specifies matcher action.
151 enum MatchType { MATCH_INPUT,      // Match input label.
152                  MATCH_OUTPUT,     // Match output label.
153                  MATCH_BOTH,       // Match input or output label.
154                  MATCH_NONE,       // Match nothing.
155                  MATCH_UNKNOWN };  // Match type unknown.
156 
157 //
158 // Fst INTERFACE CLASS DEFINITION
159 //
160 
161 // A generic FST, templated on the arc definition, with
162 // common-demoninator methods (use StateIterator and ArcIterator to
163 // iterate over its states and arcs).
164 template <class A>
165 class Fst {
166  public:
167   typedef A Arc;
168   typedef typename A::Weight Weight;
169   typedef typename A::StateId StateId;
170 
~Fst()171   virtual ~Fst() {}
172 
173   virtual StateId Start() const = 0;          // Initial state
174 
175   virtual Weight Final(StateId) const = 0;    // State's final weight
176 
177   virtual size_t NumArcs(StateId) const = 0;  // State's arc count
178 
179   virtual size_t NumInputEpsilons(StateId)
180       const = 0;                              // State's input epsilon count
181 
182   virtual size_t NumOutputEpsilons(StateId)
183       const = 0;                              // State's output epsilon count
184 
185   // If test=false, return stored properties bits for mask (some poss. unknown)
186   // If test=true, return property bits for mask (computing o.w. unknown)
187   virtual uint64 Properties(uint64 mask, bool test)
188       const = 0;  // Property bits
189 
190   virtual const string& Type() const = 0;    // Fst type name
191 
192   // Get a copy of this Fst. The copying behaves as follows:
193   //
194   // (1) The copying is constant time if safe = false or if safe = true
195   // and is on an otherwise unaccessed Fst.
196   //
197   // (2) If safe = true, the copy is thread-safe in that the original
198   // and copy can be safely accessed (but not necessarily mutated) by
199   // separate threads. For some Fst types, 'Copy(true)' should only be
200   // called on an Fst that has not otherwise been accessed. Its behavior
201   // is undefined otherwise.
202   //
203   // (3) If a MutableFst is copied and then mutated, then the original is
204   // unmodified and vice versa (often by a copy-on-write on the initial
205   // mutation, which may not be constant time).
206   virtual Fst<A> *Copy(bool safe = false) const = 0;
207 
208   // Read an Fst from an input stream; returns NULL on error
Read(istream & strm,const FstReadOptions & opts)209   static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
210     FstReadOptions ropts(opts);
211     FstHeader hdr;
212     if (ropts.header)
213       hdr = *opts.header;
214     else {
215       if (!hdr.Read(strm, opts.source))
216         return 0;
217       ropts.header = &hdr;
218     }
219     FstRegister<A> *registr = FstRegister<A>::GetRegister();
220     const typename FstRegister<A>::Reader reader =
221       registr->GetReader(hdr.FstType());
222     if (!reader) {
223       LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
224                  << "\" (arc type = \"" << A::Type()
225                  << "\"): " << ropts.source;
226       return 0;
227     }
228     return reader(strm, ropts);
229   }
230 
231   // Read an Fst from a file; return NULL on error
232   // Empty filename reads from standard input
Read(const string & filename)233   static Fst<A> *Read(const string &filename) {
234     if (!filename.empty()) {
235       ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
236       if (!strm) {
237         LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
238         return 0;
239       }
240       return Read(strm, FstReadOptions(filename));
241     } else {
242       return Read(cin, FstReadOptions("standard input"));
243     }
244   }
245 
246   // Write an Fst to an output stream; return false on error
Write(ostream & strm,const FstWriteOptions & opts)247   virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
248     LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
249                << " Fst type";
250     return false;
251   }
252 
253   // Write an Fst to a file; return false on error
254   // Empty filename writes to standard output
Write(const string & filename)255   virtual bool Write(const string &filename) const {
256     LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
257                << " Fst type";
258     return false;
259   }
260 
261   // Return input label symbol table; return NULL if not specified
262   virtual const SymbolTable* InputSymbols() const = 0;
263 
264   // Return output label symbol table; return NULL if not specified
265   virtual const SymbolTable* OutputSymbols() const = 0;
266 
267   // For generic state iterator construction; not normally called
268   // directly by users.
269   virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
270 
271   // For generic arc iterator construction; not normally called
272   // directly by users.
273   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
274 
275   // For generic matcher construction; not normally called
276   // directly by users.
277   virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
278 
279  protected:
WriteFile(const string & filename)280   bool WriteFile(const string &filename) const {
281     if (!filename.empty()) {
282       ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
283       if (!strm) {
284         LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
285         return false;
286       }
287       return Write(strm, FstWriteOptions(filename));
288     } else {
289       return Write(cout, FstWriteOptions("standard output"));
290     }
291   }
292 };
293 
294 
295 //
296 // STATE and ARC ITERATOR DEFINITIONS
297 //
298 
299 // State iterator interface templated on the Arc definition; used
300 // for StateIterator specializations returned by the InitStateIterator
301 // Fst method.
302 template <class A>
303 class StateIteratorBase {
304  public:
305   typedef A Arc;
306   typedef typename A::StateId StateId;
307 
~StateIteratorBase()308   virtual ~StateIteratorBase() {}
309 
Done()310   bool Done() const { return Done_(); }       // End of iterator?
Value()311   StateId Value() const { return Value_(); }  // Current state (when !Done)
Next()312   void Next() { Next_(); }      // Advance to next state (when !Done)
Reset()313   void Reset() { Reset_(); }    // Return to initial condition
314 
315  private:
316   // This allows base class virtual access to non-virtual derived-
317   // class members of the same name. It makes the derived class more
318   // efficient to use but unsafe to further derive.
319   virtual bool Done_() const = 0;
320   virtual StateId Value_() const = 0;
321   virtual void Next_() = 0;
322   virtual void Reset_() = 0;
323 };
324 
325 
326 // StateIterator initialization data
327 
328 template <class A> struct StateIteratorData {
329   StateIteratorBase<A> *base;   // Specialized iterator if non-zero
330   typename A::StateId nstates;  // O.w. total # of states
331 };
332 
333 
334 // Generic state iterator, templated on the FST definition
335 // - a wrapper around pointer to specific one.
336 // Here is a typical use: \code
337 //   for (StateIterator<StdFst> siter(fst);
338 //        !siter.Done();
339 //        siter.Next()) {
340 //     StateId s = siter.Value();
341 //     ...
342 //   } \endcode
343 template <class F>
344 class StateIterator {
345  public:
346   typedef F FST;
347   typedef typename F::Arc Arc;
348   typedef typename Arc::StateId StateId;
349 
StateIterator(const F & fst)350   explicit StateIterator(const F &fst) : s_(0) {
351     fst.InitStateIterator(&data_);
352   }
353 
~StateIterator()354   ~StateIterator() { if (data_.base) delete data_.base; }
355 
Done()356   bool Done() const {
357     return data_.base ? data_.base->Done() : s_ >= data_.nstates;
358   }
359 
Value()360   StateId Value() const { return data_.base ? data_.base->Value() : s_; }
361 
Next()362   void Next() {
363     if (data_.base)
364       data_.base->Next();
365     else
366       ++s_;
367   }
368 
Reset()369   void Reset() {
370     if (data_.base)
371       data_.base->Reset();
372     else
373       s_ = 0;
374   }
375 
376  private:
377   StateIteratorData<Arc> data_;
378   StateId s_;
379 
380   DISALLOW_COPY_AND_ASSIGN(StateIterator);
381 };
382 
383 
384 // Flags to control the behavior on an arc iterator:
385 static const uint32 kArcILabelValue    = 0x0001;  // Value() gives valid ilabel
386 static const uint32 kArcOLabelValue    = 0x0002;  //  "       "     "    olabel
387 static const uint32 kArcWeightValue    = 0x0004;  //  "       "     "    weight
388 static const uint32 kArcNextStateValue = 0x0008;  //  "       "     " nextstate
389 static const uint32 kArcNoCache   = 0x0010;       // No need to cache arcs
390 
391 static const uint32 kArcValueFlags =
392                   kArcILabelValue | kArcOLabelValue |
393                   kArcWeightValue | kArcNextStateValue;
394 
395 static const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
396 
397 
398 // Arc iterator interface, templated on the Arc definition; used
399 // for Arc iterator specializations that are returned by the InitArcIterator
400 // Fst method.
401 template <class A>
402 class ArcIteratorBase {
403  public:
404   typedef A Arc;
405   typedef typename A::StateId StateId;
406 
~ArcIteratorBase()407   virtual ~ArcIteratorBase() {}
408 
Done()409   bool Done() const { return Done_(); }            // End of iterator?
Value()410   const A& Value() const { return Value_(); }      // Current arc (when !Done)
Next()411   void Next() { Next_(); }           // Advance to next arc (when !Done)
Position()412   size_t Position() const { return Position_(); }  // Return current position
Reset()413   void Reset() { Reset_(); }         // Return to initial condition
Seek(size_t a)414   void Seek(size_t a) { Seek_(a); }  // Random arc access by position
Flags()415   uint32 Flags() const { return Flags_(); }  // Return current behavorial flags
SetFlags(uint32 flags,uint32 mask)416   void SetFlags(uint32 flags, uint32 mask) {  // Set behavorial flags
417     SetFlags_(flags, mask);
418   }
419 
420  private:
421   // This allows base class virtual access to non-virtual derived-
422   // class members of the same name. It makes the derived class more
423   // efficient to use but unsafe to further derive.
424   virtual bool Done_() const = 0;
425   virtual const A& Value_() const = 0;
426   virtual void Next_() = 0;
427   virtual size_t Position_() const = 0;
428   virtual void Reset_() = 0;
429   virtual void Seek_(size_t a) = 0;
430   virtual uint32 Flags_() const = 0;
431   virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
432 };
433 
434 
435 // ArcIterator initialization data
436 template <class A> struct ArcIteratorData {
437   ArcIteratorBase<A> *base;  // Specialized iterator if non-zero
438   const A *arcs;             // O.w. arcs pointer
439   size_t narcs;              // ... and arc count
440   int *ref_count;            // ... and reference count if non-zero
441 };
442 
443 // Generic arc iterator, templated on the FST definition
444 // - a wrapper around pointer to specific one.
445 // Here is a typical use: \code
446 //   for (ArcIterator<StdFst> aiter(fst, s));
447 //        !aiter.Done();
448 //         aiter.Next()) {
449 //     StdArc &arc = aiter.Value();
450 //     ...
451 //   } \endcode
452 template <class F>
453 class ArcIterator {
454    public:
455   typedef F FST;
456   typedef typename F::Arc Arc;
457   typedef typename Arc::StateId StateId;
458 
ArcIterator(const F & fst,StateId s)459   ArcIterator(const F &fst, StateId s) : i_(0) {
460     fst.InitArcIterator(s, &data_);
461   }
462 
ArcIterator(const ArcIteratorData<Arc> & data)463   explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
464     if (data_.ref_count)
465       ++(*data_.ref_count);
466   }
467 
~ArcIterator()468   ~ArcIterator() {
469     if (data_.base)
470       delete data_.base;
471     else if (data_.ref_count)
472       --(*data_.ref_count);
473   }
474 
Done()475   bool Done() const {
476     return data_.base ?  data_.base->Done() : i_ >= data_.narcs;
477   }
478 
Value()479   const Arc& Value() const {
480     return data_.base ? data_.base->Value() : data_.arcs[i_];
481   }
482 
Next()483   void Next() {
484     if (data_.base)
485       data_.base->Next();
486     else
487       ++i_;
488   }
489 
Reset()490   void Reset() {
491     if (data_.base)
492       data_.base->Reset();
493     else
494       i_ = 0;
495   }
496 
Seek(size_t a)497   void Seek(size_t a) {
498     if (data_.base)
499       data_.base->Seek(a);
500     else
501       i_ = a;
502   }
503 
Position()504   size_t Position() const {
505     return data_.base ? data_.base->Position() : i_;
506   }
507 
Flags()508   uint32 Flags() const {
509     if (data_.base)
510       return data_.base->Flags();
511     else
512       return kArcValueFlags;
513   }
514 
SetFlags(uint32 flags,uint32 mask)515   void SetFlags(uint32 flags, uint32 mask) {
516     if (data_.base)
517       data_.base->SetFlags(flags, mask);
518   }
519 
520  private:
521   ArcIteratorData<Arc> data_;
522   size_t i_;
523   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
524 };
525 
526 }  // namespace fst
527 
528 // ArcIterator placement operator new and destroy function.
529 // new needs to be in the global namespace
530 
531 template <class F>
new(size_t size,fst::MemoryPool<fst::ArcIterator<F>> * pool)532 void* operator new(size_t size,
533                    fst::MemoryPool< fst::ArcIterator<F> > *pool) {
534   return pool->Allocate();
535 }
536 
537 namespace fst {
538 
539 template <class F>
Destroy(fst::ArcIterator<F> * aiter,fst::MemoryPool<fst::ArcIterator<F>> * pool)540 inline void Destroy(fst::ArcIterator<F> *aiter,
541                     fst::MemoryPool< fst::ArcIterator<F> > *pool) {
542   if (aiter) {
543     aiter->~ArcIterator<F>();
544     pool->Free(aiter);
545   }
546 }
547 
548 
549 //
550 // MATCHER DEFINITIONS
551 //
552 
553 template <class A>
InitMatcher(MatchType match_type)554 MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
555   return 0;  // Use the default matcher
556 }
557 
558 
559 //
560 // FST ACCESSORS - Useful functions in high-performance cases.
561 //
562 
563 namespace internal {
564 
565 // General case - requires non-abstract, 'final' methods. Use for inlining.
566 template <class F> inline
Final(const F & fst,typename F::Arc::StateId s)567 typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
568   return fst.F::Final(s);
569 }
570 
571 template <class F> inline
NumArcs(const F & fst,typename F::Arc::StateId s)572 ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
573   return fst.F::NumArcs(s);
574 }
575 
576 template <class F> inline
NumInputEpsilons(const F & fst,typename F::Arc::StateId s)577 ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
578   return fst.F::NumInputEpsilons(s);
579 }
580 
581 template <class F> inline
NumOutputEpsilons(const F & fst,typename F::Arc::StateId s)582 ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
583   return fst.F::NumOutputEpsilons(s);
584 }
585 
586 
587 //  Fst<A> case - abstract methods.
588 template <class A> inline
Final(const Fst<A> & fst,typename A::StateId s)589 typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
590   return fst.Final(s);
591 }
592 
593 template <class A> inline
NumArcs(const Fst<A> & fst,typename A::StateId s)594 ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
595   return fst.NumArcs(s);
596 }
597 
598 template <class A> inline
NumInputEpsilons(const Fst<A> & fst,typename A::StateId s)599 ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
600   return fst.NumInputEpsilons(s);
601 }
602 
603 template <class A> inline
NumOutputEpsilons(const Fst<A> & fst,typename A::StateId s)604 ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
605   return fst.NumOutputEpsilons(s);
606 }
607 
608 }  // namespace internal
609 
610 // A useful alias when using StdArc.
611 typedef Fst<StdArc> StdFst;
612 
613 
614 //
615 //  CONSTANT DEFINITIONS
616 //
617 
618 const int kNoStateId   =  -1;  // Not a valid state ID
619 const int kNoLabel     =  -1;  // Not a valid label
620 
621 //
622 // Fst IMPLEMENTATION BASE
623 //
624 // This is the recommended Fst implementation base class. It will
625 // handle reference counts, property bits, type information and symbols.
626 //
627 
628 template <class A> class FstImpl {
629  public:
630   typedef typename A::Weight Weight;
631   typedef typename A::StateId StateId;
632 
FstImpl()633   FstImpl()
634       : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
635 
FstImpl(const FstImpl<A> & impl)636   FstImpl(const FstImpl<A> &impl)
637       : properties_(impl.properties_), type_(impl.type_),
638         isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
639         osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
640 
~FstImpl()641   virtual ~FstImpl() {
642     delete isymbols_;
643     delete osymbols_;
644   }
645 
Type()646   const string& Type() const { return type_; }
647 
SetType(const string & type)648   void SetType(const string &type) { type_ = type; }
649 
Properties()650   virtual uint64 Properties() const { return properties_; }
651 
Properties(uint64 mask)652   virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
653 
SetProperties(uint64 props)654   void SetProperties(uint64 props) {
655     properties_ &= kError;          // kError can't be cleared
656     properties_ |= props;
657   }
658 
SetProperties(uint64 props,uint64 mask)659   void SetProperties(uint64 props, uint64 mask) {
660     properties_ &= ~mask | kError;  // kError can't be cleared
661     properties_ |= props & mask;
662   }
663 
664   // Allows (only) setting error bit on const FST impls
SetProperties(uint64 props,uint64 mask)665   void SetProperties(uint64 props, uint64 mask) const {
666     if (mask != kError)
667       FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
668     properties_ |= kError;
669   }
670 
InputSymbols()671   const SymbolTable* InputSymbols() const { return isymbols_; }
672 
OutputSymbols()673   const SymbolTable* OutputSymbols() const { return osymbols_; }
674 
InputSymbols()675   SymbolTable* InputSymbols() { return isymbols_; }
676 
OutputSymbols()677   SymbolTable* OutputSymbols() { return osymbols_; }
678 
SetInputSymbols(const SymbolTable * isyms)679   void SetInputSymbols(const SymbolTable* isyms) {
680     if (isymbols_) delete isymbols_;
681     isymbols_ = isyms ? isyms->Copy() : 0;
682   }
683 
SetOutputSymbols(const SymbolTable * osyms)684   void SetOutputSymbols(const SymbolTable* osyms) {
685     if (osymbols_) delete osymbols_;
686     osymbols_ = osyms ? osyms->Copy() : 0;
687   }
688 
RefCount()689   int RefCount() const {
690     return ref_count_.count();
691   }
692 
IncrRefCount()693   int IncrRefCount() {
694     return ref_count_.Incr();
695   }
696 
DecrRefCount()697   int DecrRefCount() {
698     return ref_count_.Decr();
699   }
700 
701   // Read-in header and symbols from input stream, initialize Fst, and
702   // return the header.  If opts.header is non-null, skip read-in and
703   // use the option value.  If opts.[io]symbols is non-null, read-in
704   // (if present), but use the option value.
705   bool ReadHeader(istream &strm, const FstReadOptions& opts,
706                   int min_version, FstHeader *hdr);
707 
708   // Write-out header and symbols from output stream.
709   // If a opts.header is false, skip writing header.
710   // If opts.[io]symbols is false, skip writing those symbols.
711   // This method is needed for Impl's that implement Write methods.
WriteHeader(ostream & strm,const FstWriteOptions & opts,int version,FstHeader * hdr)712   void WriteHeader(ostream &strm, const FstWriteOptions& opts,
713                    int version, FstHeader *hdr) const {
714     if (opts.write_header) {
715       hdr->SetFstType(type_);
716       hdr->SetArcType(A::Type());
717       hdr->SetVersion(version);
718       hdr->SetProperties(properties_);
719       int32 file_flags = 0;
720       if (isymbols_ && opts.write_isymbols)
721         file_flags |= FstHeader::HAS_ISYMBOLS;
722       if (osymbols_ && opts.write_osymbols)
723         file_flags |= FstHeader::HAS_OSYMBOLS;
724       if (opts.align)
725         file_flags |= FstHeader::IS_ALIGNED;
726       hdr->SetFlags(file_flags);
727       hdr->Write(strm, opts.source);
728     }
729     if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
730     if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
731   }
732 
733   // Write-out header and symbols to output stream.
734   // If a opts.header is false, skip writing header.
735   // If opts.[io]symbols is false, skip writing those symbols.
736   // type is the Fst type being written.
737   // This method is used in the cross-type serialization methods Fst::WriteFst.
WriteFstHeader(const Fst<A> & fst,ostream & strm,const FstWriteOptions & opts,int version,const string & type,uint64 properties,FstHeader * hdr)738   static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
739                              const FstWriteOptions& opts, int version,
740                              const string &type, uint64 properties,
741                              FstHeader *hdr) {
742     if (opts.write_header) {
743       hdr->SetFstType(type);
744       hdr->SetArcType(A::Type());
745       hdr->SetVersion(version);
746       hdr->SetProperties(properties);
747       int32 file_flags = 0;
748       if (fst.InputSymbols() && opts.write_isymbols)
749         file_flags |= FstHeader::HAS_ISYMBOLS;
750       if (fst.OutputSymbols() && opts.write_osymbols)
751         file_flags |= FstHeader::HAS_OSYMBOLS;
752       if (opts.align)
753         file_flags |= FstHeader::IS_ALIGNED;
754       hdr->SetFlags(file_flags);
755       hdr->Write(strm, opts.source);
756     }
757     if (fst.InputSymbols() && opts.write_isymbols) {
758       fst.InputSymbols()->Write(strm);
759     }
760     if (fst.OutputSymbols() && opts.write_osymbols) {
761       fst.OutputSymbols()->Write(strm);
762     }
763   }
764 
765   // In serialization routines where the header cannot be written until after
766   // the machine has been serialized, this routine can be called to seek to
767   // the beginning of the file an rewrite the header with updated fields.
768   // It repositions the file pointer back at the end of the file.
769   // returns true on success, false on failure.
UpdateFstHeader(const Fst<A> & fst,ostream & strm,const FstWriteOptions & opts,int version,const string & type,uint64 properties,FstHeader * hdr,size_t header_offset)770   static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
771                               const FstWriteOptions& opts, int version,
772                               const string &type, uint64 properties,
773                               FstHeader *hdr, size_t header_offset) {
774     strm.seekp(header_offset);
775     if (!strm) {
776       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
777       return false;
778     }
779     WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
780     if (!strm) {
781       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
782       return false;
783     }
784     strm.seekp(0, ios_base::end);
785     if (!strm) {
786       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
787       return false;
788     }
789     return true;
790   }
791 
792  protected:
793   mutable uint64 properties_;           // Property bits
794 
795  private:
796   string type_;                 // Unique name of Fst class
797   SymbolTable *isymbols_;       // Ilabel symbol table
798   SymbolTable *osymbols_;       // Olabel symbol table
799   RefCounter ref_count_;        // Reference count
800 
801   void operator=(const FstImpl<A> &impl);  // disallow
802 };
803 
804 template <class A> inline
ReadHeader(istream & strm,const FstReadOptions & opts,int min_version,FstHeader * hdr)805 bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
806                             int min_version, FstHeader *hdr) {
807   if (opts.header)
808     *hdr = *opts.header;
809   else if (!hdr->Read(strm, opts.source))
810     return false;
811 
812   if (FLAGS_v >= 2) {
813     LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
814               << ", fst_type: " << hdr->FstType()
815               << ", arc_type: " << A::Type()
816               << ", version: " << hdr->Version()
817               << ", flags: " << hdr->GetFlags();
818   }
819 
820   if (hdr->FstType() != type_) {
821     LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
822                << "\": " << opts.source;
823     return false;
824   }
825   if (hdr->ArcType() != A::Type()) {
826     LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
827                << "\": " << opts.source;
828     return false;
829   }
830   if (hdr->Version() < min_version) {
831     LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
832                << " Fst version: " << opts.source;
833     return false;
834   }
835   properties_ = hdr->Properties();
836   if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
837     isymbols_ = SymbolTable::Read(strm, opts.source);
838   // Input symbol table not wanted; delete
839   if (!opts.read_isymbols)
840     SetInputSymbols(0);
841   if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
842     osymbols_ = SymbolTable::Read(strm, opts.source);
843   // Output symbol table not wanted; delete
844   if (!opts.read_osymbols)
845     SetOutputSymbols(0);
846   if (opts.isymbols) {
847     delete isymbols_;
848     isymbols_ = opts.isymbols->Copy();
849   }
850   if (opts.osymbols) {
851     delete osymbols_;
852     osymbols_ = opts.osymbols->Copy();
853   }
854   return true;
855 }
856 
857 
858 template<class Arc>
859 uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
860 
861 
862 // This is a helper class template useful for attaching an Fst interface to
863 // its implementation, handling reference counting.
864 template < class I, class F = Fst<typename I::Arc> >
865 class ImplToFst : public F {
866  public:
867   typedef typename I::Arc Arc;
868   typedef typename Arc::Weight Weight;
869   typedef typename Arc::StateId StateId;
870 
~ImplToFst()871   virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_;  }
872 
Start()873   virtual StateId Start() const { return impl_->Start(); }
874 
Final(StateId s)875   virtual Weight Final(StateId s) const { return impl_->Final(s); }
876 
NumArcs(StateId s)877   virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
878 
NumInputEpsilons(StateId s)879   virtual size_t NumInputEpsilons(StateId s) const {
880     return impl_->NumInputEpsilons(s);
881   }
882 
NumOutputEpsilons(StateId s)883   virtual size_t NumOutputEpsilons(StateId s) const {
884     return impl_->NumOutputEpsilons(s);
885   }
886 
Properties(uint64 mask,bool test)887   virtual uint64 Properties(uint64 mask, bool test) const {
888     if (test) {
889       uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
890       impl_->SetProperties(testprops, knownprops);
891       return testprops & mask;
892     } else {
893       return impl_->Properties(mask);
894     }
895   }
896 
Type()897   virtual const string& Type() const { return impl_->Type(); }
898 
InputSymbols()899   virtual const SymbolTable* InputSymbols() const {
900     return impl_->InputSymbols();
901   }
902 
OutputSymbols()903   virtual const SymbolTable* OutputSymbols() const {
904     return impl_->OutputSymbols();
905   }
906 
907  protected:
ImplToFst()908   ImplToFst() : impl_(0) {}
909 
ImplToFst(I * impl)910   ImplToFst(I *impl) : impl_(impl) {}
911 
ImplToFst(const ImplToFst<I,F> & fst)912   ImplToFst(const ImplToFst<I, F> &fst) {
913     impl_ = fst.impl_;
914     impl_->IncrRefCount();
915   }
916 
917   // This constructor presumes there is a copy constructor for the
918   // implementation.
ImplToFst(const ImplToFst<I,F> & fst,bool safe)919   ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
920     if (safe) {
921       impl_ = new I(*(fst.impl_));
922     } else {
923       impl_ = fst.impl_;
924       impl_->IncrRefCount();
925     }
926   }
927 
GetImpl()928   I *GetImpl() const { return impl_; }
929 
930   // Change Fst implementation pointer. If 'own_impl' is true,
931   // ownership of the input implementation is given to this
932   // object; otherwise, the input implementation's reference count
933   // should be incremented.
934   void SetImpl(I *impl, bool own_impl = true) {
935     if (!own_impl)
936       impl->IncrRefCount();
937     if (impl_ && !impl_->DecrRefCount()) delete impl_;
938     impl_ = impl;
939   }
940 
941  private:
942   // Disallow
943   ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
944 
945   ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
946     FSTERROR() << "ImplToFst: Assignment operator disallowed";
947     GetImpl()->SetProperties(kError, kError);
948     return *this;
949   }
950 
951   I *impl_;
952 };
953 
954 
955 // Converts FSTs by casting their implementations, where this makes
956 // sense (which excludes implementations with weight-dependent virtual
957 // methods). Must be a friend of the Fst classes involved (currently
958 // the concrete Fsts: VectorFst, ConstFst, CompactFst).
Cast(const F & ifst,G * ofst)959 template<class F, class G> void Cast(const F &ifst, G *ofst) {
960   ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
961 }
962 
963 // Fst Serialization
964 template <class A>
FstToString(const Fst<A> & fst,string * result)965 void FstToString(const Fst<A> &fst, string *result) {
966   ostringstream ostrm;
967   fst.Write(ostrm, FstWriteOptions("FstToString"));
968   *result = ostrm.str();
969 }
970 
971 template <class A>
FstToString(const Fst<A> & fst,string * result,const FstWriteOptions & options)972 void FstToString(const Fst<A> &fst, string *result,
973                  const FstWriteOptions& options) {
974   ostringstream ostrm;
975   fst.Write(ostrm, options);
976   *result = ostrm.str();
977 }
978 
979 template <class A>
StringToFst(const string & s)980 Fst<A> *StringToFst(const string &s) {
981   istringstream istrm(s);
982   return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
983 }
984 
985 }  // namespace fst
986 
987 
988 
989 #endif  // FST_LIB_FST_H__
990