1 // fst.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Finite-State Transducer (FST) - abstract base class definition,
20 // state and arc iterator interface, and suggested base implementation.
21 //
22
23 #ifndef FST_LIB_FST_H__
24 #define FST_LIB_FST_H__
25
26 #include <stddef.h>
27 #include <sys/types.h>
28 #include <cmath>
29 #include <string>
30
31 #include <fst/compat.h>
32 #include <fst/types.h>
33
34 #include <fst/arc.h>
35 #include <fst/memory.h>
36 #include <fst/properties.h>
37 #include <fst/register.h>
38 #include <iostream>
39 #include <fstream>
40 #include <sstream>
41 #include <fst/symbol-table.h>
42 #include <fst/util.h>
43
44
45 DECLARE_bool(fst_align);
46
47 namespace fst {
48
49 bool IsFstHeader(istream &, const string &);
50
51 class FstHeader;
52 template <class A> class StateIteratorData;
53 template <class A> class ArcIteratorData;
54 template <class A> class MatcherBase;
55
56 struct FstReadOptions {
57 // FileReadMode(s) are advisory, there are many conditions than prevent a
58 // file from being mapped, READ mode will be selected in these cases with
59 // a warning indicating why it was chosen.
60 enum FileReadMode { READ, MAP };
61
62 string source; // Where you're reading from
63 const FstHeader *header; // Pointer to Fst header. If non-zero, use
64 // this info (don't read a stream header)
65 const SymbolTable* isymbols; // Pointer to input symbols. If non-zero, use
66 // this info (read and skip stream isymbols)
67 const SymbolTable* osymbols; // Pointer to output symbols. If non-zero, use
68 // this info (read and skip stream osymbols)
69 FileReadMode mode; // Read or map files (advisory, if possible)
70 bool read_isymbols; // Read isymbols, if any, default true
71 bool read_osymbols; // Read osymbols, if any, default true
72
73 explicit FstReadOptions(const string& src = "<unspecified>",
74 const FstHeader *hdr = 0,
75 const SymbolTable* isym = 0,
76 const SymbolTable* osym = 0);
77
78 explicit FstReadOptions(const string& src,
79 const SymbolTable* isym,
80 const SymbolTable* osym = 0);
81
82 // Helper function to convert strings FileReadModes into their enum value.
83 static FileReadMode ReadMode(const string &mode);
84 };
85
86 struct FstWriteOptions {
87 string source; // Where you're writing to
88 bool write_header; // Write the header?
89 bool write_isymbols; // Write input symbols?
90 bool write_osymbols; // Write output symbols?
91 bool align; // Write data aligned where appropriate;
92 // this may fail on pipes
93
94 explicit FstWriteOptions(const string& src = "<unspecifed>",
95 bool hdr = true, bool isym = true,
96 bool osym = true, bool alig = FLAGS_fst_align)
sourceFstWriteOptions97 : source(src), write_header(hdr),
98 write_isymbols(isym), write_osymbols(osym), align(alig) {}
99 };
100
101 //
102 // Fst HEADER CLASS
103 //
104 // This is the recommended Fst file header representation.
105 //
106 class FstHeader {
107 public:
108 enum {
109 HAS_ISYMBOLS = 0x1, // Has input symbol table
110 HAS_OSYMBOLS = 0x2, // Has output symbol table
111 IS_ALIGNED = 0x4, // Memory-aligned (where appropriate)
112 } Flags;
113
FstHeader()114 FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
115 numstates_(0), numarcs_(0) {}
FstType()116 const string &FstType() const { return fsttype_; }
ArcType()117 const string &ArcType() const { return arctype_; }
Version()118 int32 Version() const { return version_; }
GetFlags()119 int32 GetFlags() const { return flags_; }
Properties()120 uint64 Properties() const { return properties_; }
Start()121 int64 Start() const { return start_; }
NumStates()122 int64 NumStates() const { return numstates_; }
NumArcs()123 int64 NumArcs() const { return numarcs_; }
124
SetFstType(const string & type)125 void SetFstType(const string& type) { fsttype_ = type; }
SetArcType(const string & type)126 void SetArcType(const string& type) { arctype_ = type; }
SetVersion(int32 version)127 void SetVersion(int32 version) { version_ = version; }
SetFlags(int32 flags)128 void SetFlags(int32 flags) { flags_ = flags; }
SetProperties(uint64 properties)129 void SetProperties(uint64 properties) { properties_ = properties; }
SetStart(int64 start)130 void SetStart(int64 start) { start_ = start; }
SetNumStates(int64 numstates)131 void SetNumStates(int64 numstates) { numstates_ = numstates; }
SetNumArcs(int64 numarcs)132 void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
133
134 bool Read(istream &strm, const string &source, bool rewind = false);
135 bool Write(ostream &strm, const string &source) const;
136
137 private:
138
139 string fsttype_; // E.g. "vector"
140 string arctype_; // E.g. "standard"
141 int32 version_; // Type version #
142 int32 flags_; // File format bits
143 uint64 properties_; // FST property bits
144 int64 start_; // Start state
145 int64 numstates_; // # of states
146 int64 numarcs_; // # of arcs
147 };
148
149
150 // Specifies matcher action.
151 enum MatchType { MATCH_INPUT, // Match input label.
152 MATCH_OUTPUT, // Match output label.
153 MATCH_BOTH, // Match input or output label.
154 MATCH_NONE, // Match nothing.
155 MATCH_UNKNOWN }; // Match type unknown.
156
157 //
158 // Fst INTERFACE CLASS DEFINITION
159 //
160
161 // A generic FST, templated on the arc definition, with
162 // common-demoninator methods (use StateIterator and ArcIterator to
163 // iterate over its states and arcs).
164 template <class A>
165 class Fst {
166 public:
167 typedef A Arc;
168 typedef typename A::Weight Weight;
169 typedef typename A::StateId StateId;
170
~Fst()171 virtual ~Fst() {}
172
173 virtual StateId Start() const = 0; // Initial state
174
175 virtual Weight Final(StateId) const = 0; // State's final weight
176
177 virtual size_t NumArcs(StateId) const = 0; // State's arc count
178
179 virtual size_t NumInputEpsilons(StateId)
180 const = 0; // State's input epsilon count
181
182 virtual size_t NumOutputEpsilons(StateId)
183 const = 0; // State's output epsilon count
184
185 // If test=false, return stored properties bits for mask (some poss. unknown)
186 // If test=true, return property bits for mask (computing o.w. unknown)
187 virtual uint64 Properties(uint64 mask, bool test)
188 const = 0; // Property bits
189
190 virtual const string& Type() const = 0; // Fst type name
191
192 // Get a copy of this Fst. The copying behaves as follows:
193 //
194 // (1) The copying is constant time if safe = false or if safe = true
195 // and is on an otherwise unaccessed Fst.
196 //
197 // (2) If safe = true, the copy is thread-safe in that the original
198 // and copy can be safely accessed (but not necessarily mutated) by
199 // separate threads. For some Fst types, 'Copy(true)' should only be
200 // called on an Fst that has not otherwise been accessed. Its behavior
201 // is undefined otherwise.
202 //
203 // (3) If a MutableFst is copied and then mutated, then the original is
204 // unmodified and vice versa (often by a copy-on-write on the initial
205 // mutation, which may not be constant time).
206 virtual Fst<A> *Copy(bool safe = false) const = 0;
207
208 // Read an Fst from an input stream; returns NULL on error
Read(istream & strm,const FstReadOptions & opts)209 static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
210 FstReadOptions ropts(opts);
211 FstHeader hdr;
212 if (ropts.header)
213 hdr = *opts.header;
214 else {
215 if (!hdr.Read(strm, opts.source))
216 return 0;
217 ropts.header = &hdr;
218 }
219 FstRegister<A> *registr = FstRegister<A>::GetRegister();
220 const typename FstRegister<A>::Reader reader =
221 registr->GetReader(hdr.FstType());
222 if (!reader) {
223 LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
224 << "\" (arc type = \"" << A::Type()
225 << "\"): " << ropts.source;
226 return 0;
227 }
228 return reader(strm, ropts);
229 }
230
231 // Read an Fst from a file; return NULL on error
232 // Empty filename reads from standard input
Read(const string & filename)233 static Fst<A> *Read(const string &filename) {
234 if (!filename.empty()) {
235 ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
236 if (!strm) {
237 LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
238 return 0;
239 }
240 return Read(strm, FstReadOptions(filename));
241 } else {
242 return Read(cin, FstReadOptions("standard input"));
243 }
244 }
245
246 // Write an Fst to an output stream; return false on error
Write(ostream & strm,const FstWriteOptions & opts)247 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
248 LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
249 << " Fst type";
250 return false;
251 }
252
253 // Write an Fst to a file; return false on error
254 // Empty filename writes to standard output
Write(const string & filename)255 virtual bool Write(const string &filename) const {
256 LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
257 << " Fst type";
258 return false;
259 }
260
261 // Return input label symbol table; return NULL if not specified
262 virtual const SymbolTable* InputSymbols() const = 0;
263
264 // Return output label symbol table; return NULL if not specified
265 virtual const SymbolTable* OutputSymbols() const = 0;
266
267 // For generic state iterator construction; not normally called
268 // directly by users.
269 virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
270
271 // For generic arc iterator construction; not normally called
272 // directly by users.
273 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
274
275 // For generic matcher construction; not normally called
276 // directly by users.
277 virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
278
279 protected:
WriteFile(const string & filename)280 bool WriteFile(const string &filename) const {
281 if (!filename.empty()) {
282 ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
283 if (!strm) {
284 LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
285 return false;
286 }
287 return Write(strm, FstWriteOptions(filename));
288 } else {
289 return Write(cout, FstWriteOptions("standard output"));
290 }
291 }
292 };
293
294
295 //
296 // STATE and ARC ITERATOR DEFINITIONS
297 //
298
299 // State iterator interface templated on the Arc definition; used
300 // for StateIterator specializations returned by the InitStateIterator
301 // Fst method.
302 template <class A>
303 class StateIteratorBase {
304 public:
305 typedef A Arc;
306 typedef typename A::StateId StateId;
307
~StateIteratorBase()308 virtual ~StateIteratorBase() {}
309
Done()310 bool Done() const { return Done_(); } // End of iterator?
Value()311 StateId Value() const { return Value_(); } // Current state (when !Done)
Next()312 void Next() { Next_(); } // Advance to next state (when !Done)
Reset()313 void Reset() { Reset_(); } // Return to initial condition
314
315 private:
316 // This allows base class virtual access to non-virtual derived-
317 // class members of the same name. It makes the derived class more
318 // efficient to use but unsafe to further derive.
319 virtual bool Done_() const = 0;
320 virtual StateId Value_() const = 0;
321 virtual void Next_() = 0;
322 virtual void Reset_() = 0;
323 };
324
325
326 // StateIterator initialization data
327
328 template <class A> struct StateIteratorData {
329 StateIteratorBase<A> *base; // Specialized iterator if non-zero
330 typename A::StateId nstates; // O.w. total # of states
331 };
332
333
334 // Generic state iterator, templated on the FST definition
335 // - a wrapper around pointer to specific one.
336 // Here is a typical use: \code
337 // for (StateIterator<StdFst> siter(fst);
338 // !siter.Done();
339 // siter.Next()) {
340 // StateId s = siter.Value();
341 // ...
342 // } \endcode
343 template <class F>
344 class StateIterator {
345 public:
346 typedef F FST;
347 typedef typename F::Arc Arc;
348 typedef typename Arc::StateId StateId;
349
StateIterator(const F & fst)350 explicit StateIterator(const F &fst) : s_(0) {
351 fst.InitStateIterator(&data_);
352 }
353
~StateIterator()354 ~StateIterator() { if (data_.base) delete data_.base; }
355
Done()356 bool Done() const {
357 return data_.base ? data_.base->Done() : s_ >= data_.nstates;
358 }
359
Value()360 StateId Value() const { return data_.base ? data_.base->Value() : s_; }
361
Next()362 void Next() {
363 if (data_.base)
364 data_.base->Next();
365 else
366 ++s_;
367 }
368
Reset()369 void Reset() {
370 if (data_.base)
371 data_.base->Reset();
372 else
373 s_ = 0;
374 }
375
376 private:
377 StateIteratorData<Arc> data_;
378 StateId s_;
379
380 DISALLOW_COPY_AND_ASSIGN(StateIterator);
381 };
382
383
384 // Flags to control the behavior on an arc iterator:
385 static const uint32 kArcILabelValue = 0x0001; // Value() gives valid ilabel
386 static const uint32 kArcOLabelValue = 0x0002; // " " " olabel
387 static const uint32 kArcWeightValue = 0x0004; // " " " weight
388 static const uint32 kArcNextStateValue = 0x0008; // " " " nextstate
389 static const uint32 kArcNoCache = 0x0010; // No need to cache arcs
390
391 static const uint32 kArcValueFlags =
392 kArcILabelValue | kArcOLabelValue |
393 kArcWeightValue | kArcNextStateValue;
394
395 static const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
396
397
398 // Arc iterator interface, templated on the Arc definition; used
399 // for Arc iterator specializations that are returned by the InitArcIterator
400 // Fst method.
401 template <class A>
402 class ArcIteratorBase {
403 public:
404 typedef A Arc;
405 typedef typename A::StateId StateId;
406
~ArcIteratorBase()407 virtual ~ArcIteratorBase() {}
408
Done()409 bool Done() const { return Done_(); } // End of iterator?
Value()410 const A& Value() const { return Value_(); } // Current arc (when !Done)
Next()411 void Next() { Next_(); } // Advance to next arc (when !Done)
Position()412 size_t Position() const { return Position_(); } // Return current position
Reset()413 void Reset() { Reset_(); } // Return to initial condition
Seek(size_t a)414 void Seek(size_t a) { Seek_(a); } // Random arc access by position
Flags()415 uint32 Flags() const { return Flags_(); } // Return current behavorial flags
SetFlags(uint32 flags,uint32 mask)416 void SetFlags(uint32 flags, uint32 mask) { // Set behavorial flags
417 SetFlags_(flags, mask);
418 }
419
420 private:
421 // This allows base class virtual access to non-virtual derived-
422 // class members of the same name. It makes the derived class more
423 // efficient to use but unsafe to further derive.
424 virtual bool Done_() const = 0;
425 virtual const A& Value_() const = 0;
426 virtual void Next_() = 0;
427 virtual size_t Position_() const = 0;
428 virtual void Reset_() = 0;
429 virtual void Seek_(size_t a) = 0;
430 virtual uint32 Flags_() const = 0;
431 virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
432 };
433
434
435 // ArcIterator initialization data
436 template <class A> struct ArcIteratorData {
437 ArcIteratorBase<A> *base; // Specialized iterator if non-zero
438 const A *arcs; // O.w. arcs pointer
439 size_t narcs; // ... and arc count
440 int *ref_count; // ... and reference count if non-zero
441 };
442
443 // Generic arc iterator, templated on the FST definition
444 // - a wrapper around pointer to specific one.
445 // Here is a typical use: \code
446 // for (ArcIterator<StdFst> aiter(fst, s));
447 // !aiter.Done();
448 // aiter.Next()) {
449 // StdArc &arc = aiter.Value();
450 // ...
451 // } \endcode
452 template <class F>
453 class ArcIterator {
454 public:
455 typedef F FST;
456 typedef typename F::Arc Arc;
457 typedef typename Arc::StateId StateId;
458
ArcIterator(const F & fst,StateId s)459 ArcIterator(const F &fst, StateId s) : i_(0) {
460 fst.InitArcIterator(s, &data_);
461 }
462
ArcIterator(const ArcIteratorData<Arc> & data)463 explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
464 if (data_.ref_count)
465 ++(*data_.ref_count);
466 }
467
~ArcIterator()468 ~ArcIterator() {
469 if (data_.base)
470 delete data_.base;
471 else if (data_.ref_count)
472 --(*data_.ref_count);
473 }
474
Done()475 bool Done() const {
476 return data_.base ? data_.base->Done() : i_ >= data_.narcs;
477 }
478
Value()479 const Arc& Value() const {
480 return data_.base ? data_.base->Value() : data_.arcs[i_];
481 }
482
Next()483 void Next() {
484 if (data_.base)
485 data_.base->Next();
486 else
487 ++i_;
488 }
489
Reset()490 void Reset() {
491 if (data_.base)
492 data_.base->Reset();
493 else
494 i_ = 0;
495 }
496
Seek(size_t a)497 void Seek(size_t a) {
498 if (data_.base)
499 data_.base->Seek(a);
500 else
501 i_ = a;
502 }
503
Position()504 size_t Position() const {
505 return data_.base ? data_.base->Position() : i_;
506 }
507
Flags()508 uint32 Flags() const {
509 if (data_.base)
510 return data_.base->Flags();
511 else
512 return kArcValueFlags;
513 }
514
SetFlags(uint32 flags,uint32 mask)515 void SetFlags(uint32 flags, uint32 mask) {
516 if (data_.base)
517 data_.base->SetFlags(flags, mask);
518 }
519
520 private:
521 ArcIteratorData<Arc> data_;
522 size_t i_;
523 DISALLOW_COPY_AND_ASSIGN(ArcIterator);
524 };
525
526 } // namespace fst
527
528 // ArcIterator placement operator new and destroy function.
529 // new needs to be in the global namespace
530
531 template <class F>
new(size_t size,fst::MemoryPool<fst::ArcIterator<F>> * pool)532 void* operator new(size_t size,
533 fst::MemoryPool< fst::ArcIterator<F> > *pool) {
534 return pool->Allocate();
535 }
536
537 namespace fst {
538
539 template <class F>
Destroy(fst::ArcIterator<F> * aiter,fst::MemoryPool<fst::ArcIterator<F>> * pool)540 inline void Destroy(fst::ArcIterator<F> *aiter,
541 fst::MemoryPool< fst::ArcIterator<F> > *pool) {
542 if (aiter) {
543 aiter->~ArcIterator<F>();
544 pool->Free(aiter);
545 }
546 }
547
548
549 //
550 // MATCHER DEFINITIONS
551 //
552
553 template <class A>
InitMatcher(MatchType match_type)554 MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
555 return 0; // Use the default matcher
556 }
557
558
559 //
560 // FST ACCESSORS - Useful functions in high-performance cases.
561 //
562
563 namespace internal {
564
565 // General case - requires non-abstract, 'final' methods. Use for inlining.
566 template <class F> inline
Final(const F & fst,typename F::Arc::StateId s)567 typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
568 return fst.F::Final(s);
569 }
570
571 template <class F> inline
NumArcs(const F & fst,typename F::Arc::StateId s)572 ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
573 return fst.F::NumArcs(s);
574 }
575
576 template <class F> inline
NumInputEpsilons(const F & fst,typename F::Arc::StateId s)577 ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
578 return fst.F::NumInputEpsilons(s);
579 }
580
581 template <class F> inline
NumOutputEpsilons(const F & fst,typename F::Arc::StateId s)582 ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
583 return fst.F::NumOutputEpsilons(s);
584 }
585
586
587 // Fst<A> case - abstract methods.
588 template <class A> inline
Final(const Fst<A> & fst,typename A::StateId s)589 typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
590 return fst.Final(s);
591 }
592
593 template <class A> inline
NumArcs(const Fst<A> & fst,typename A::StateId s)594 ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
595 return fst.NumArcs(s);
596 }
597
598 template <class A> inline
NumInputEpsilons(const Fst<A> & fst,typename A::StateId s)599 ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
600 return fst.NumInputEpsilons(s);
601 }
602
603 template <class A> inline
NumOutputEpsilons(const Fst<A> & fst,typename A::StateId s)604 ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
605 return fst.NumOutputEpsilons(s);
606 }
607
608 } // namespace internal
609
610 // A useful alias when using StdArc.
611 typedef Fst<StdArc> StdFst;
612
613
614 //
615 // CONSTANT DEFINITIONS
616 //
617
618 const int kNoStateId = -1; // Not a valid state ID
619 const int kNoLabel = -1; // Not a valid label
620
621 //
622 // Fst IMPLEMENTATION BASE
623 //
624 // This is the recommended Fst implementation base class. It will
625 // handle reference counts, property bits, type information and symbols.
626 //
627
628 template <class A> class FstImpl {
629 public:
630 typedef typename A::Weight Weight;
631 typedef typename A::StateId StateId;
632
FstImpl()633 FstImpl()
634 : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
635
FstImpl(const FstImpl<A> & impl)636 FstImpl(const FstImpl<A> &impl)
637 : properties_(impl.properties_), type_(impl.type_),
638 isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
639 osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
640
~FstImpl()641 virtual ~FstImpl() {
642 delete isymbols_;
643 delete osymbols_;
644 }
645
Type()646 const string& Type() const { return type_; }
647
SetType(const string & type)648 void SetType(const string &type) { type_ = type; }
649
Properties()650 virtual uint64 Properties() const { return properties_; }
651
Properties(uint64 mask)652 virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
653
SetProperties(uint64 props)654 void SetProperties(uint64 props) {
655 properties_ &= kError; // kError can't be cleared
656 properties_ |= props;
657 }
658
SetProperties(uint64 props,uint64 mask)659 void SetProperties(uint64 props, uint64 mask) {
660 properties_ &= ~mask | kError; // kError can't be cleared
661 properties_ |= props & mask;
662 }
663
664 // Allows (only) setting error bit on const FST impls
SetProperties(uint64 props,uint64 mask)665 void SetProperties(uint64 props, uint64 mask) const {
666 if (mask != kError)
667 FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
668 properties_ |= kError;
669 }
670
InputSymbols()671 const SymbolTable* InputSymbols() const { return isymbols_; }
672
OutputSymbols()673 const SymbolTable* OutputSymbols() const { return osymbols_; }
674
InputSymbols()675 SymbolTable* InputSymbols() { return isymbols_; }
676
OutputSymbols()677 SymbolTable* OutputSymbols() { return osymbols_; }
678
SetInputSymbols(const SymbolTable * isyms)679 void SetInputSymbols(const SymbolTable* isyms) {
680 if (isymbols_) delete isymbols_;
681 isymbols_ = isyms ? isyms->Copy() : 0;
682 }
683
SetOutputSymbols(const SymbolTable * osyms)684 void SetOutputSymbols(const SymbolTable* osyms) {
685 if (osymbols_) delete osymbols_;
686 osymbols_ = osyms ? osyms->Copy() : 0;
687 }
688
RefCount()689 int RefCount() const {
690 return ref_count_.count();
691 }
692
IncrRefCount()693 int IncrRefCount() {
694 return ref_count_.Incr();
695 }
696
DecrRefCount()697 int DecrRefCount() {
698 return ref_count_.Decr();
699 }
700
701 // Read-in header and symbols from input stream, initialize Fst, and
702 // return the header. If opts.header is non-null, skip read-in and
703 // use the option value. If opts.[io]symbols is non-null, read-in
704 // (if present), but use the option value.
705 bool ReadHeader(istream &strm, const FstReadOptions& opts,
706 int min_version, FstHeader *hdr);
707
708 // Write-out header and symbols from output stream.
709 // If a opts.header is false, skip writing header.
710 // If opts.[io]symbols is false, skip writing those symbols.
711 // This method is needed for Impl's that implement Write methods.
WriteHeader(ostream & strm,const FstWriteOptions & opts,int version,FstHeader * hdr)712 void WriteHeader(ostream &strm, const FstWriteOptions& opts,
713 int version, FstHeader *hdr) const {
714 if (opts.write_header) {
715 hdr->SetFstType(type_);
716 hdr->SetArcType(A::Type());
717 hdr->SetVersion(version);
718 hdr->SetProperties(properties_);
719 int32 file_flags = 0;
720 if (isymbols_ && opts.write_isymbols)
721 file_flags |= FstHeader::HAS_ISYMBOLS;
722 if (osymbols_ && opts.write_osymbols)
723 file_flags |= FstHeader::HAS_OSYMBOLS;
724 if (opts.align)
725 file_flags |= FstHeader::IS_ALIGNED;
726 hdr->SetFlags(file_flags);
727 hdr->Write(strm, opts.source);
728 }
729 if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
730 if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
731 }
732
733 // Write-out header and symbols to output stream.
734 // If a opts.header is false, skip writing header.
735 // If opts.[io]symbols is false, skip writing those symbols.
736 // type is the Fst type being written.
737 // This method is used in the cross-type serialization methods Fst::WriteFst.
WriteFstHeader(const Fst<A> & fst,ostream & strm,const FstWriteOptions & opts,int version,const string & type,uint64 properties,FstHeader * hdr)738 static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
739 const FstWriteOptions& opts, int version,
740 const string &type, uint64 properties,
741 FstHeader *hdr) {
742 if (opts.write_header) {
743 hdr->SetFstType(type);
744 hdr->SetArcType(A::Type());
745 hdr->SetVersion(version);
746 hdr->SetProperties(properties);
747 int32 file_flags = 0;
748 if (fst.InputSymbols() && opts.write_isymbols)
749 file_flags |= FstHeader::HAS_ISYMBOLS;
750 if (fst.OutputSymbols() && opts.write_osymbols)
751 file_flags |= FstHeader::HAS_OSYMBOLS;
752 if (opts.align)
753 file_flags |= FstHeader::IS_ALIGNED;
754 hdr->SetFlags(file_flags);
755 hdr->Write(strm, opts.source);
756 }
757 if (fst.InputSymbols() && opts.write_isymbols) {
758 fst.InputSymbols()->Write(strm);
759 }
760 if (fst.OutputSymbols() && opts.write_osymbols) {
761 fst.OutputSymbols()->Write(strm);
762 }
763 }
764
765 // In serialization routines where the header cannot be written until after
766 // the machine has been serialized, this routine can be called to seek to
767 // the beginning of the file an rewrite the header with updated fields.
768 // It repositions the file pointer back at the end of the file.
769 // returns true on success, false on failure.
UpdateFstHeader(const Fst<A> & fst,ostream & strm,const FstWriteOptions & opts,int version,const string & type,uint64 properties,FstHeader * hdr,size_t header_offset)770 static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
771 const FstWriteOptions& opts, int version,
772 const string &type, uint64 properties,
773 FstHeader *hdr, size_t header_offset) {
774 strm.seekp(header_offset);
775 if (!strm) {
776 LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
777 return false;
778 }
779 WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
780 if (!strm) {
781 LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
782 return false;
783 }
784 strm.seekp(0, ios_base::end);
785 if (!strm) {
786 LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
787 return false;
788 }
789 return true;
790 }
791
792 protected:
793 mutable uint64 properties_; // Property bits
794
795 private:
796 string type_; // Unique name of Fst class
797 SymbolTable *isymbols_; // Ilabel symbol table
798 SymbolTable *osymbols_; // Olabel symbol table
799 RefCounter ref_count_; // Reference count
800
801 void operator=(const FstImpl<A> &impl); // disallow
802 };
803
804 template <class A> inline
ReadHeader(istream & strm,const FstReadOptions & opts,int min_version,FstHeader * hdr)805 bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
806 int min_version, FstHeader *hdr) {
807 if (opts.header)
808 *hdr = *opts.header;
809 else if (!hdr->Read(strm, opts.source))
810 return false;
811
812 if (FLAGS_v >= 2) {
813 LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
814 << ", fst_type: " << hdr->FstType()
815 << ", arc_type: " << A::Type()
816 << ", version: " << hdr->Version()
817 << ", flags: " << hdr->GetFlags();
818 }
819
820 if (hdr->FstType() != type_) {
821 LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
822 << "\": " << opts.source;
823 return false;
824 }
825 if (hdr->ArcType() != A::Type()) {
826 LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
827 << "\": " << opts.source;
828 return false;
829 }
830 if (hdr->Version() < min_version) {
831 LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
832 << " Fst version: " << opts.source;
833 return false;
834 }
835 properties_ = hdr->Properties();
836 if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
837 isymbols_ = SymbolTable::Read(strm, opts.source);
838 // Input symbol table not wanted; delete
839 if (!opts.read_isymbols)
840 SetInputSymbols(0);
841 if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
842 osymbols_ = SymbolTable::Read(strm, opts.source);
843 // Output symbol table not wanted; delete
844 if (!opts.read_osymbols)
845 SetOutputSymbols(0);
846 if (opts.isymbols) {
847 delete isymbols_;
848 isymbols_ = opts.isymbols->Copy();
849 }
850 if (opts.osymbols) {
851 delete osymbols_;
852 osymbols_ = opts.osymbols->Copy();
853 }
854 return true;
855 }
856
857
858 template<class Arc>
859 uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
860
861
862 // This is a helper class template useful for attaching an Fst interface to
863 // its implementation, handling reference counting.
864 template < class I, class F = Fst<typename I::Arc> >
865 class ImplToFst : public F {
866 public:
867 typedef typename I::Arc Arc;
868 typedef typename Arc::Weight Weight;
869 typedef typename Arc::StateId StateId;
870
~ImplToFst()871 virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_; }
872
Start()873 virtual StateId Start() const { return impl_->Start(); }
874
Final(StateId s)875 virtual Weight Final(StateId s) const { return impl_->Final(s); }
876
NumArcs(StateId s)877 virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
878
NumInputEpsilons(StateId s)879 virtual size_t NumInputEpsilons(StateId s) const {
880 return impl_->NumInputEpsilons(s);
881 }
882
NumOutputEpsilons(StateId s)883 virtual size_t NumOutputEpsilons(StateId s) const {
884 return impl_->NumOutputEpsilons(s);
885 }
886
Properties(uint64 mask,bool test)887 virtual uint64 Properties(uint64 mask, bool test) const {
888 if (test) {
889 uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
890 impl_->SetProperties(testprops, knownprops);
891 return testprops & mask;
892 } else {
893 return impl_->Properties(mask);
894 }
895 }
896
Type()897 virtual const string& Type() const { return impl_->Type(); }
898
InputSymbols()899 virtual const SymbolTable* InputSymbols() const {
900 return impl_->InputSymbols();
901 }
902
OutputSymbols()903 virtual const SymbolTable* OutputSymbols() const {
904 return impl_->OutputSymbols();
905 }
906
907 protected:
ImplToFst()908 ImplToFst() : impl_(0) {}
909
ImplToFst(I * impl)910 ImplToFst(I *impl) : impl_(impl) {}
911
ImplToFst(const ImplToFst<I,F> & fst)912 ImplToFst(const ImplToFst<I, F> &fst) {
913 impl_ = fst.impl_;
914 impl_->IncrRefCount();
915 }
916
917 // This constructor presumes there is a copy constructor for the
918 // implementation.
ImplToFst(const ImplToFst<I,F> & fst,bool safe)919 ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
920 if (safe) {
921 impl_ = new I(*(fst.impl_));
922 } else {
923 impl_ = fst.impl_;
924 impl_->IncrRefCount();
925 }
926 }
927
GetImpl()928 I *GetImpl() const { return impl_; }
929
930 // Change Fst implementation pointer. If 'own_impl' is true,
931 // ownership of the input implementation is given to this
932 // object; otherwise, the input implementation's reference count
933 // should be incremented.
934 void SetImpl(I *impl, bool own_impl = true) {
935 if (!own_impl)
936 impl->IncrRefCount();
937 if (impl_ && !impl_->DecrRefCount()) delete impl_;
938 impl_ = impl;
939 }
940
941 private:
942 // Disallow
943 ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
944
945 ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
946 FSTERROR() << "ImplToFst: Assignment operator disallowed";
947 GetImpl()->SetProperties(kError, kError);
948 return *this;
949 }
950
951 I *impl_;
952 };
953
954
955 // Converts FSTs by casting their implementations, where this makes
956 // sense (which excludes implementations with weight-dependent virtual
957 // methods). Must be a friend of the Fst classes involved (currently
958 // the concrete Fsts: VectorFst, ConstFst, CompactFst).
Cast(const F & ifst,G * ofst)959 template<class F, class G> void Cast(const F &ifst, G *ofst) {
960 ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
961 }
962
963 // Fst Serialization
964 template <class A>
FstToString(const Fst<A> & fst,string * result)965 void FstToString(const Fst<A> &fst, string *result) {
966 ostringstream ostrm;
967 fst.Write(ostrm, FstWriteOptions("FstToString"));
968 *result = ostrm.str();
969 }
970
971 template <class A>
FstToString(const Fst<A> & fst,string * result,const FstWriteOptions & options)972 void FstToString(const Fst<A> &fst, string *result,
973 const FstWriteOptions& options) {
974 ostringstream ostrm;
975 fst.Write(ostrm, options);
976 *result = ostrm.str();
977 }
978
979 template <class A>
StringToFst(const string & s)980 Fst<A> *StringToFst(const string &s) {
981 istringstream istrm(s);
982 return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
983 }
984
985 } // namespace fst
986
987
988
989 #endif // FST_LIB_FST_H__
990