1 // const-fst.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Simple concrete immutable FST whose states and arcs are each stored
20 // in single arrays.
21
22 #ifndef FST_LIB_CONST_FST_H__
23 #define FST_LIB_CONST_FST_H__
24
25 #include <string>
26 #include <vector>
27 using std::vector;
28
29 #include <fst/expanded-fst.h>
30 #include <fst/fst-decl.h> // For optional argument declarations
31 #include <fst/mapped-file.h>
32 #include <fst/test-properties.h>
33 #include <fst/util.h>
34
35
36 namespace fst {
37
38 template <class A, class U> class ConstFst;
39 template <class F, class G> void Cast(const F &, G *);
40
41 // States and arcs each implemented by single arrays, templated on the
42 // Arc definition. The unsigned type U is used to represent indices into
43 // the arc array.
44 template <class A, class U>
45 class ConstFstImpl : public FstImpl<A> {
46 public:
47 using FstImpl<A>::SetInputSymbols;
48 using FstImpl<A>::SetOutputSymbols;
49 using FstImpl<A>::SetType;
50 using FstImpl<A>::SetProperties;
51 using FstImpl<A>::Properties;
52
53 typedef A Arc;
54 typedef typename A::Weight Weight;
55 typedef typename A::StateId StateId;
56 typedef U Unsigned;
57
ConstFstImpl()58 ConstFstImpl()
59 : states_region_(0), arcs_region_(0), states_(0), arcs_(0), nstates_(0),
60 narcs_(0), start_(kNoStateId) {
61 string type = "const";
62 if (sizeof(U) != sizeof(uint32)) {
63 string size;
64 Int64ToStr(8 * sizeof(U), &size);
65 type += size;
66 }
67 SetType(type);
68 SetProperties(kNullProperties | kStaticProperties);
69 }
70
71 explicit ConstFstImpl(const Fst<A> &fst);
72
~ConstFstImpl()73 ~ConstFstImpl() {
74 delete arcs_region_;
75 delete states_region_;
76 }
77
Start()78 StateId Start() const { return start_; }
79
Final(StateId s)80 Weight Final(StateId s) const { return states_[s].final; }
81
NumStates()82 StateId NumStates() const { return nstates_; }
83
NumArcs(StateId s)84 size_t NumArcs(StateId s) const { return states_[s].narcs; }
85
NumInputEpsilons(StateId s)86 size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }
87
NumOutputEpsilons(StateId s)88 size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }
89
90 static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts);
91
Arcs(StateId s)92 A *Arcs(StateId s) { return arcs_ + states_[s].pos; }
93
94 // Provide information needed for generic state iterator
InitStateIterator(StateIteratorData<A> * data)95 void InitStateIterator(StateIteratorData<A> *data) const {
96 data->base = 0;
97 data->nstates = nstates_;
98 }
99
100 // Provide information needed for the generic arc iterator
InitArcIterator(StateId s,ArcIteratorData<A> * data)101 void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
102 data->base = 0;
103 data->arcs = arcs_ + states_[s].pos;
104 data->narcs = states_[s].narcs;
105 data->ref_count = 0;
106 }
107
108 private:
109 friend class ConstFst<A, U>; // Allow finding narcs_, nstates_ during Write
110
111 // States implemented by array *states_ below, arcs by (single) *arcs_.
112 struct State {
113 Weight final; // Final weight
114 Unsigned pos; // Start of state's arcs in *arcs_
115 Unsigned narcs; // Number of arcs (per state)
116 Unsigned niepsilons; // # of input epsilons
117 Unsigned noepsilons; // # of output epsilons
StateState118 State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
119 };
120
121 // Properties always true of this Fst class
122 static const uint64 kStaticProperties = kExpanded;
123 // Current unaligned file format version. The unaligned version was added and
124 // made the default since the aligned version does not work on pipes.
125 static const int kFileVersion = 2;
126 // Current aligned file format version
127 static const int kAlignedFileVersion = 1;
128 // Minimum file format version supported
129 static const int kMinFileVersion = 1;
130
131 MappedFile *states_region_; // Mapped file for states
132 MappedFile *arcs_region_; // Mapped file for arcs
133 State *states_; // States represenation
134 A *arcs_; // Arcs representation
135 StateId nstates_; // Number of states
136 size_t narcs_; // Number of arcs (per FST)
137 StateId start_; // Initial state
138
139 DISALLOW_COPY_AND_ASSIGN(ConstFstImpl);
140 };
141
142 template <class A, class U>
143 const uint64 ConstFstImpl<A, U>::kStaticProperties;
144 template <class A, class U>
145 const int ConstFstImpl<A, U>::kFileVersion;
146 template <class A, class U>
147 const int ConstFstImpl<A, U>::kAlignedFileVersion;
148 template <class A, class U>
149 const int ConstFstImpl<A, U>::kMinFileVersion;
150
151
152 template<class A, class U>
ConstFstImpl(const Fst<A> & fst)153 ConstFstImpl<A, U>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) {
154 string type = "const";
155 if (sizeof(U) != sizeof(uint32)) {
156 string size;
157 Int64ToStr(sizeof(U) * 8, &size);
158 type += size;
159 }
160 SetType(type);
161 SetInputSymbols(fst.InputSymbols());
162 SetOutputSymbols(fst.OutputSymbols());
163 start_ = fst.Start();
164
165 // Count # of states and arcs.
166 for (StateIterator< Fst<A> > siter(fst);
167 !siter.Done();
168 siter.Next()) {
169 ++nstates_;
170 StateId s = siter.Value();
171 for (ArcIterator< Fst<A> > aiter(fst, s);
172 !aiter.Done();
173 aiter.Next())
174 ++narcs_;
175 }
176 states_region_ = MappedFile::Allocate(nstates_ * sizeof(*states_));
177 arcs_region_ = MappedFile::Allocate(narcs_ * sizeof(*arcs_));
178 states_ = reinterpret_cast<State*>(states_region_->mutable_data());
179 arcs_ = reinterpret_cast<A*>(arcs_region_->mutable_data());
180 size_t pos = 0;
181 for (StateId s = 0; s < nstates_; ++s) {
182 states_[s].final = fst.Final(s);
183 states_[s].pos = pos;
184 states_[s].narcs = 0;
185 states_[s].niepsilons = 0;
186 states_[s].noepsilons = 0;
187 for (ArcIterator< Fst<A> > aiter(fst, s);
188 !aiter.Done();
189 aiter.Next()) {
190 const A &arc = aiter.Value();
191 ++states_[s].narcs;
192 if (arc.ilabel == 0)
193 ++states_[s].niepsilons;
194 if (arc.olabel == 0)
195 ++states_[s].noepsilons;
196 arcs_[pos++] = arc;
197 }
198 }
199 SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties);
200 }
201
202
203 template<class A, class U>
Read(istream & strm,const FstReadOptions & opts)204 ConstFstImpl<A, U> *ConstFstImpl<A, U>::Read(istream &strm,
205 const FstReadOptions &opts) {
206 ConstFstImpl<A, U> *impl = new ConstFstImpl<A, U>;
207 FstHeader hdr;
208 if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
209 delete impl;
210 return 0;
211 }
212 impl->start_ = hdr.Start();
213 impl->nstates_ = hdr.NumStates();
214 impl->narcs_ = hdr.NumArcs();
215
216 // Ensures compatibility
217 if (hdr.Version() == kAlignedFileVersion)
218 hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
219
220 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
221 LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
222 delete impl;
223 return 0;
224 }
225
226 size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A, U>::State);
227 impl->states_region_ = MappedFile::Map(
228 &strm, opts.mode == FstReadOptions::MAP, opts.source, b);
229 if (!strm || impl->states_region_ == NULL) {
230 LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
231 delete impl;
232 return 0;
233 }
234 impl->states_ = reinterpret_cast<State*>(
235 impl->states_region_->mutable_data());
236 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
237 LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
238 delete impl;
239 return 0;
240 }
241
242 b = impl->narcs_ * sizeof(A);
243 impl->arcs_region_ = MappedFile::Map(
244 &strm, opts.mode == FstReadOptions::MAP, opts.source, b);
245 if (!strm || impl->arcs_region_ == NULL) {
246 LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
247 delete impl;
248 return 0;
249 }
250 impl->arcs_ = reinterpret_cast<A*>(impl->arcs_region_->mutable_data());
251 return impl;
252 }
253
254 // Simple concrete immutable FST. This class attaches interface to
255 // implementation and handles reference counting, delegating most
256 // methods to ImplToExpandedFst. The unsigned type U is used to
257 // represent indices into the arc array (default declared
258 // in fst-decl.h).
259 template <class A, class U /* = uint32 */>
260 class ConstFst : public ImplToExpandedFst< ConstFstImpl<A, U> > {
261 public:
262 friend class StateIterator< ConstFst<A, U> >;
263 friend class ArcIterator< ConstFst<A, U> >;
264 template <class F, class G> void friend Cast(const F &, G *);
265
266 typedef A Arc;
267 typedef typename A::StateId StateId;
268 typedef ConstFstImpl<A, U> Impl;
269 typedef U Unsigned;
270
ConstFst()271 ConstFst() : ImplToExpandedFst<Impl>(new Impl()) {}
272
ConstFst(const Fst<A> & fst)273 explicit ConstFst(const Fst<A> &fst)
274 : ImplToExpandedFst<Impl>(new Impl(fst)) {}
275
ConstFst(const ConstFst<A,U> & fst)276 ConstFst(const ConstFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {}
277
278 // Get a copy of this ConstFst. See Fst<>::Copy() for further doc.
279 virtual ConstFst<A, U> *Copy(bool safe = false) const {
280 return new ConstFst<A, U>(*this);
281 }
282
283 // Read a ConstFst from an input stream; return NULL on error
Read(istream & strm,const FstReadOptions & opts)284 static ConstFst<A, U> *Read(istream &strm, const FstReadOptions &opts) {
285 Impl* impl = Impl::Read(strm, opts);
286 return impl ? new ConstFst<A, U>(impl) : 0;
287 }
288
289 // Read a ConstFst from a file; return NULL on error
290 // Empty filename reads from standard input
Read(const string & filename)291 static ConstFst<A, U> *Read(const string &filename) {
292 Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
293 return impl ? new ConstFst<A, U>(impl) : 0;
294 }
295
Write(ostream & strm,const FstWriteOptions & opts)296 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
297 return WriteFst(*this, strm, opts);
298 }
299
Write(const string & filename)300 virtual bool Write(const string &filename) const {
301 return Fst<A>::WriteFile(filename);
302 }
303
304 template <class F>
305 static bool WriteFst(const F &fst, ostream &strm,
306 const FstWriteOptions &opts);
307
InitStateIterator(StateIteratorData<Arc> * data)308 virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
309 GetImpl()->InitStateIterator(data);
310 }
311
InitArcIterator(StateId s,ArcIteratorData<Arc> * data)312 virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
313 GetImpl()->InitArcIterator(s, data);
314 }
315
316 private:
ConstFst(Impl * impl)317 explicit ConstFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
318
319 // Makes visible to friends.
GetImpl()320 Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
321
322 void SetImpl(Impl *impl, bool own_impl = true) {
323 ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
324 }
325
326 // Use overloading to extract the type of the argument.
GetImplIfConstFst(const ConstFst & const_fst)327 static Impl* GetImplIfConstFst(const ConstFst &const_fst) {
328 return const_fst.GetImpl();
329 }
330
331 // Note that this does not give privileged treatment to subtypes of ConstFst.
332 template<typename NonConstFst>
GetImplIfConstFst(const NonConstFst & fst)333 static Impl* GetImplIfConstFst(const NonConstFst& fst) {
334 return NULL;
335 }
336
337 void operator=(const ConstFst<A, U> &fst); // disallow
338 };
339
340 // Writes Fst in Const format, potentially with a pass over the machine
341 // before writing to compute number of states and arcs.
342 //
343 template <class A, class U>
344 template <class F>
WriteFst(const F & fst,ostream & strm,const FstWriteOptions & opts)345 bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
346 const FstWriteOptions &opts) {
347 int file_version = opts.align ? ConstFstImpl<A, U>::kAlignedFileVersion :
348 ConstFstImpl<A, U>::kFileVersion;
349 size_t num_arcs = -1, num_states = -1;
350 size_t start_offset = 0;
351 bool update_header = true;
352 if (Impl* impl = GetImplIfConstFst(fst)) {
353 num_arcs = impl->narcs_;
354 num_states = impl->nstates_;
355 update_header = false;
356 } else if ((start_offset = strm.tellp()) == -1) {
357 // precompute values needed for header when we cannot seek to rewrite it.
358 num_arcs = 0;
359 num_states = 0;
360 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
361 num_arcs += fst.NumArcs(siter.Value());
362 ++num_states;
363 }
364 update_header = false;
365 }
366 FstHeader hdr;
367 hdr.SetStart(fst.Start());
368 hdr.SetNumStates(num_states);
369 hdr.SetNumArcs(num_arcs);
370 string type = "const";
371 if (sizeof(U) != sizeof(uint32)) {
372 string size;
373 Int64ToStr(8 * sizeof(U), &size);
374 type += size;
375 }
376 uint64 properties = fst.Properties(kCopyProperties, true) |
377 ConstFstImpl<A, U>::kStaticProperties;
378 FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
379 &hdr);
380 if (opts.align && !AlignOutput(strm)) {
381 LOG(ERROR) << "Could not align file during write after header";
382 return false;
383 }
384 size_t pos = 0, states = 0;
385 typename ConstFstImpl<A, U>::State state;
386 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
387 state.final = fst.Final(siter.Value());
388 state.pos = pos;
389 state.narcs = fst.NumArcs(siter.Value());
390 state.niepsilons = fst.NumInputEpsilons(siter.Value());
391 state.noepsilons = fst.NumOutputEpsilons(siter.Value());
392 strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
393 pos += state.narcs;
394 ++states;
395 }
396 hdr.SetNumStates(states);
397 hdr.SetNumArcs(pos);
398 if (opts.align && !AlignOutput(strm)) {
399 LOG(ERROR) << "Could not align file during write after writing states";
400 }
401 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
402 StateId s = siter.Value();
403 for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
404 const A &arc = aiter.Value();
405 strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc));
406 }
407 }
408 strm.flush();
409 if (!strm) {
410 LOG(ERROR) << "ConstFst Write write failed: " << opts.source;
411 return false;
412 }
413 if (update_header) {
414 return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type,
415 properties, &hdr, start_offset);
416 } else {
417 if (hdr.NumStates() != num_states) {
418 LOG(ERROR) << "Inconsistent number of states observed during write";
419 return false;
420 }
421 if (hdr.NumArcs() != num_arcs) {
422 LOG(ERROR) << "Inconsistent number of arcs observed during write";
423 return false;
424 }
425 }
426 return true;
427 }
428
429 // Specialization for ConstFst; see generic version in fst.h
430 // for sample usage (but use the ConstFst type!). This version
431 // should inline.
432 template <class A, class U>
433 class StateIterator< ConstFst<A, U> > {
434 public:
435 typedef typename A::StateId StateId;
436
StateIterator(const ConstFst<A,U> & fst)437 explicit StateIterator(const ConstFst<A, U> &fst)
438 : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
439
Done()440 bool Done() const { return s_ >= nstates_; }
441
Value()442 StateId Value() const { return s_; }
443
Next()444 void Next() { ++s_; }
445
Reset()446 void Reset() { s_ = 0; }
447
448 private:
449 StateId nstates_;
450 StateId s_;
451
452 DISALLOW_COPY_AND_ASSIGN(StateIterator);
453 };
454
455
456 // Specialization for ConstFst; see generic version in fst.h
457 // for sample usage (but use the ConstFst type!). This version
458 // should inline.
459 template <class A, class U>
460 class ArcIterator< ConstFst<A, U> > {
461 public:
462 typedef typename A::StateId StateId;
463
ArcIterator(const ConstFst<A,U> & fst,StateId s)464 ArcIterator(const ConstFst<A, U> &fst, StateId s)
465 : arcs_(fst.GetImpl()->Arcs(s)),
466 narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {}
467
Done()468 bool Done() const { return i_ >= narcs_; }
469
Value()470 const A& Value() const { return arcs_[i_]; }
471
Next()472 void Next() { ++i_; }
473
Position()474 size_t Position() const { return i_; }
475
Reset()476 void Reset() { i_ = 0; }
477
Seek(size_t a)478 void Seek(size_t a) { i_ = a; }
479
Flags()480 uint32 Flags() const {
481 return kArcValueFlags;
482 }
483
SetFlags(uint32 f,uint32 m)484 void SetFlags(uint32 f, uint32 m) {}
485
486 private:
487 const A *arcs_;
488 size_t narcs_;
489 size_t i_;
490
491 DISALLOW_COPY_AND_ASSIGN(ArcIterator);
492 };
493
494 // A useful alias when using StdArc.
495 typedef ConstFst<StdArc> StdConstFst;
496
497 } // namespace fst
498
499 #endif // FST_LIB_CONST_FST_H__
500