1 // fst.cc
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // FST definitions.
20 
21 #include <fst/fst.h>
22 
23 // Include these so they are registered
24 #include <fst/compact-fst.h>
25 #include <fst/const-fst.h>
26 #include <fst/matcher-fst.h>
27 #include <fst/vector-fst.h>
28 #include <fst/edit-fst.h>
29 
30 // FST flag definitions
31 
32 DEFINE_bool(fst_verify_properties, false,
33             "Verify fst properties queried by TestProperties");
34 
35 DEFINE_string(fst_weight_separator, ",",
36               "Character separator between printed composite weights; "
37               "must be a single character");
38 
39 DEFINE_string(fst_weight_parentheses, "",
40               "Characters enclosing the first weight of a printed composite "
41               "weight (e.g. pair weight, tuple weight and derived classes) to "
42               "ensure proper I/O of nested composite weights; "
43               "must have size 0 (none) or 2 (open and close parenthesis)");
44 
45 DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");
46 
47 DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL,
48              "Cache byte size that triggers garbage collection");
49 
50 DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
51 
52 DEFINE_string(save_relabel_ipairs, "",  "Save input relabel pairs to file");
53 DEFINE_string(save_relabel_opairs, "",  "Save output relabel pairs to file");
54 
55 DEFINE_string(fst_read_mode, "read",
56               "Default file reading mode for mappable files");
57 
58 namespace fst {
59 
60 // Register VectorFst, ConstFst and EditFst for common arcs types
61 REGISTER_FST(VectorFst, StdArc);
62 REGISTER_FST(VectorFst, LogArc);
63 REGISTER_FST(VectorFst, Log64Arc);
64 REGISTER_FST(ConstFst, StdArc);
65 REGISTER_FST(ConstFst, LogArc);
66 REGISTER_FST(ConstFst, Log64Arc);
67 REGISTER_FST(EditFst, StdArc);
68 REGISTER_FST(EditFst, LogArc);
69 REGISTER_FST(EditFst, Log64Arc);
70 
71 // Register CompactFst for common arcs with the default (uint32) size type
72 static FstRegisterer<
73   CompactFst<StdArc, StringCompactor<StdArc> > >
74 CompactFst_StdArc_StringCompactor_registerer;
75 static FstRegisterer<
76   CompactFst<LogArc, StringCompactor<LogArc> > >
77 CompactFst_LogArc_StringCompactor_registerer;
78 static FstRegisterer<
79   CompactFst<StdArc, WeightedStringCompactor<StdArc> > >
80 CompactFst_StdArc_WeightedStringCompactor_registerer;
81 static FstRegisterer<
82   CompactFst<LogArc, WeightedStringCompactor<LogArc> > >
83 CompactFst_LogArc_WeightedStringCompactor_registerer;
84 static FstRegisterer<
85   CompactFst<StdArc, AcceptorCompactor<StdArc> > >
86 CompactFst_StdArc_AcceptorCompactor_registerer;
87 static FstRegisterer<
88   CompactFst<LogArc, AcceptorCompactor<LogArc> > >
89 CompactFst_LogArc_AcceptorCompactor_registerer;
90 static FstRegisterer<
91   CompactFst<StdArc, UnweightedCompactor<StdArc> > >
92 CompactFst_StdArc_UnweightedCompactor_registerer;
93 static FstRegisterer<
94   CompactFst<LogArc, UnweightedCompactor<LogArc> > >
95 CompactFst_LogArc_UnweightedCompactor_registerer;
96 static FstRegisterer<
97   CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > >
98 CompactFst_StdArc_UnweightedAcceptorCompactor_registerer;
99 static FstRegisterer<
100   CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > >
101 CompactFst_LogArc_UnweightedAcceptorCompactor_registerer;
102 
103 // Fst type definitions for lookahead Fsts.
104 extern const char arc_lookahead_fst_type[] = "arc_lookahead";
105 extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
106 extern const char olabel_lookahead_fst_type[] = "olabel_lookahead";
107 
108 // Identifies stream data as an FST (and its endianity)
109 static const int32 kFstMagicNumber = 2125659606;
110 
111 // Check for Fst magic number in stream, to indicate
112 // caller function that the stream content is an Fst header;
IsFstHeader(istream & strm,const string & source)113 bool IsFstHeader(istream &strm, const string &source) {
114   int64 pos = strm.tellg();
115   bool match = true;
116   int32 magic_number = 0;
117   ReadType(strm, &magic_number);
118   if (magic_number != kFstMagicNumber
119       ) {
120     match = false;
121   }
122   strm.seekg(pos);
123   return match;
124 }
125 
126 // Check Fst magic number and read in Fst header.
127 // If rewind = true, reposition stream to before call (if possible).
Read(istream & strm,const string & source,bool rewind)128 bool FstHeader::Read(istream &strm, const string &source, bool rewind) {
129   int64 pos = 0;
130   if (rewind) pos = strm.tellg();
131   int32 magic_number = 0;
132   ReadType(strm, &magic_number);
133   if (magic_number != kFstMagicNumber
134       ) {
135     LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
136     if (rewind) strm.seekg(pos);
137     return false;
138   }
139 
140   ReadType(strm, &fsttype_);
141   ReadType(strm, &arctype_);
142   ReadType(strm, &version_);
143   ReadType(strm, &flags_);
144   ReadType(strm, &properties_);
145   ReadType(strm, &start_);
146   ReadType(strm, &numstates_);
147   ReadType(strm, &numarcs_);
148   if (!strm) {
149     LOG(ERROR) << "FstHeader::Read: read failed: " << source;
150     return false;
151   }
152   if (rewind) strm.seekg(pos);
153   return true;
154 }
155 
156 // Write Fst magic number and Fst header.
Write(ostream & strm,const string & source) const157 bool FstHeader::Write(ostream &strm, const string &source) const {
158   WriteType(strm, kFstMagicNumber);
159   WriteType(strm, fsttype_);
160   WriteType(strm, arctype_);
161   WriteType(strm, version_);
162   WriteType(strm, flags_);
163   WriteType(strm, properties_);
164   WriteType(strm, start_);
165   WriteType(strm, numstates_);
166   WriteType(strm, numarcs_);
167   return true;
168 }
169 
FstReadOptions(const string & src,const FstHeader * hdr,const SymbolTable * isym,const SymbolTable * osym)170 FstReadOptions::FstReadOptions(const string& src, const FstHeader *hdr,
171                                const SymbolTable* isym, const SymbolTable* osym)
172   : source(src), header(hdr), isymbols(isym), osymbols(osym),
173     read_isymbols(true), read_osymbols(true) {
174   mode = ReadMode(FLAGS_fst_read_mode);
175 }
176 
FstReadOptions(const string & src,const SymbolTable * isym,const SymbolTable * osym)177 FstReadOptions::FstReadOptions(const string& src, const SymbolTable* isym,
178                                const SymbolTable* osym)
179   : source(src), header(0), isymbols(isym), osymbols(osym),
180     read_isymbols(true), read_osymbols(true) {
181   mode = ReadMode(FLAGS_fst_read_mode);
182 }
183 
ReadMode(const string & mode)184 FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) {
185   if (mode == "read") {
186     return READ;
187   }
188   if (mode == "map") {
189     return MAP;
190   }
191   LOG(ERROR) << "Unknown file read mode " << mode;
192   return READ;
193 }
194 
195 }  // namespace fst
196