1 // info.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to compute various information about FSTs, helper class for fstinfo.cc
20 
21 #ifndef FST_SCRIPT_INFO_IMPL_H_
22 #define FST_SCRIPT_INFO_IMPL_H_
23 
24 #include <map>
25 #include <string>
26 #include <vector>
27 using std::vector;
28 
29 #include <fst/connect.h>
30 #include <fst/dfs-visit.h>
31 #include <fst/fst.h>
32 #include <fst/lookahead-matcher.h>
33 #include <fst/matcher.h>
34 #include <fst/queue.h>
35 #include <fst/test-properties.h>
36 #include <fst/verify.h>
37 #include <fst/visit.h>
38 
39 namespace fst {
40 
41 // Compute various information about FSTs, helper class for fstinfo.cc.
42 // WARNING: Stand-alone use of this class is not recommended, most code
43 // should call directly the relevant library functions: Fst<A>::NumStates,
44 // Fst<A>::NumArcs, TestProperties, ...
45 template <class A> class FstInfo {
46  public:
47   typedef A Arc;
48   typedef typename A::StateId StateId;
49   typedef typename A::Label Label;
50   typedef typename A::Weight Weight;
51 
52   // When info_type is "short" (or "auto" and not an ExpandedFst)
53   // then only minimal info is computed and can be requested.
54   FstInfo(const Fst<A> &fst, bool test_properties,
55           const string &arc_filter_type = "any",
56           string info_type = "auto", bool verify = true)
57       : fst_type_(fst.Type()),
58         input_symbols_(fst.InputSymbols() ?
59                        fst.InputSymbols()->Name() : "none"),
60         output_symbols_(fst.OutputSymbols() ?
61                         fst.OutputSymbols()->Name() : "none"),
62         nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0),
63         nepsilons_(0), niepsilons_(0), noepsilons_(0),
64         ilabel_mult_(0.0), olabel_mult_(0.0),
65         naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0),
66         input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE),
67         input_lookahead_(false), output_lookahead_(false),
68         properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) {
69     if (info_type == "long") {
70       long_info_ = true;
71     } else if (info_type == "short") {
72       long_info_ = false;
73     } else if (info_type == "auto") {
74       long_info_ = fst.Properties(kExpanded, false);
75     } else {
76       FSTERROR() << "Bad info type: " << info_type;
77       return;
78     }
79 
80     if (!long_info_)
81       return;
82 
83     // If the FST is not sane, we return.
84     if (verify && !Verify(fst)) {
85       FSTERROR() << "FstInfo: Verify: FST not well-formed.";
86       return;
87     }
88 
89     start_ = fst.Start();
90     properties_ = fst.Properties(kFstProperties, test_properties);
91 
92     for (StateIterator< Fst<A> > siter(fst);
93          !siter.Done();
94          siter.Next()) {
95       ++nstates_;
96       StateId s = siter.Value();
97       if (fst.Final(s) != Weight::Zero())
98         ++nfinal_;
99       map<Label, int64> ilabel_count;
100       map<Label, int64> olabel_count;
101       for (ArcIterator< Fst<A> > aiter(fst, s);
102            !aiter.Done();
103            aiter.Next()) {
104         const A &arc = aiter.Value();
105         ++narcs_;
106         if (arc.ilabel == 0 && arc.olabel == 0)
107           ++nepsilons_;
108         if (arc.ilabel == 0)
109           ++niepsilons_;
110         if (arc.olabel == 0)
111           ++noepsilons_;
112         ++ilabel_count[arc.ilabel];
113         ++olabel_count[arc.olabel];
114       }
115       for (typename map<Label, int64>::iterator it = ilabel_count.begin();
116            it != ilabel_count.end();
117            ++it) {
118         ilabel_mult_ += it->second * it->second;
119       }
120       for (typename map<Label, int64>::iterator it = olabel_count.begin();
121            it != olabel_count.end();
122            ++it) {
123         olabel_mult_ += it->second * it->second;
124       }
125     }
126 
127     if (narcs_ > 0) {
128       ilabel_mult_ /= narcs_;
129       olabel_mult_ /= narcs_;
130     }
131 
132     {
133       vector<StateId> cc;
134       CcVisitor<Arc> cc_visitor(&cc);
135       FifoQueue<StateId> fifo_queue;
136       if (arc_filter_type == "any") {
137         Visit(fst, &cc_visitor, &fifo_queue);
138       } else if (arc_filter_type == "epsilon") {
139         Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>());
140       } else if (arc_filter_type == "iepsilon") {
141         Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>());
142       } else if (arc_filter_type == "oepsilon") {
143         Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>());
144       } else {
145         FSTERROR() << "Bad arc filter type: " << arc_filter_type;
146         return;
147       }
148 
149       for (StateId s = 0; s < cc.size(); ++s) {
150         if (cc[s] >= ncc_)
151           ncc_ = cc[s] + 1;
152       }
153     }
154 
155     {
156       vector<StateId> scc;
157       vector<bool> access, coaccess;
158       uint64 props = 0;
159       SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props);
160       if (arc_filter_type == "any") {
161         DfsVisit(fst, &scc_visitor);
162       } else if (arc_filter_type == "epsilon") {
163         DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>());
164       } else if (arc_filter_type == "iepsilon") {
165         DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>());
166       } else if (arc_filter_type == "oepsilon") {
167         DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>());
168       } else {
169         FSTERROR() << "Bad arc filter type: " << arc_filter_type;
170         return;
171       }
172 
173       for (StateId s = 0; s < scc.size(); ++s) {
174         if (access[s])
175           ++naccess_;
176         if (coaccess[s])
177           ++ncoaccess_;
178         if (access[s] && coaccess[s])
179           ++nconnect_;
180         if (scc[s] >= nscc_)
181           nscc_ = scc[s] + 1;
182       }
183     }
184 
185     LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT);
186     input_match_type_ =  imatcher.Type(test_properties);
187     input_lookahead_ =  imatcher.Flags() & kInputLookAheadMatcher;
188 
189     LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT);
190     output_match_type_ =  omatcher.Type(test_properties);
191     output_lookahead_ =  omatcher.Flags() & kOutputLookAheadMatcher;
192   }
193 
194   // Short info
FstType()195   const string& FstType() const { return fst_type_; }
ArcType()196   const string& ArcType() const { return A::Type(); }
InputSymbols()197   const string& InputSymbols() const { return input_symbols_; }
OutputSymbols()198   const string& OutputSymbols() const { return output_symbols_; }
LongInfo()199   bool LongInfo() const { return long_info_; }
ArcFilterType()200   const string& ArcFilterType() const { return arc_filter_type_; }
201 
202   // Long info
InputMatchType()203   MatchType InputMatchType() const { CheckLong(); return input_match_type_; }
OutputMatchType()204   MatchType OutputMatchType() const { CheckLong(); return output_match_type_; }
InputLookAhead()205   bool InputLookAhead() const { CheckLong(); return input_lookahead_; }
OutputLookAhead()206   bool OutputLookAhead() const { CheckLong();  return output_lookahead_; }
NumStates()207   int64 NumStates() const { CheckLong();  return nstates_; }
NumArcs()208   int64 NumArcs() const { CheckLong();  return narcs_; }
Start()209   int64 Start() const { CheckLong();  return start_; }
NumFinal()210   int64 NumFinal() const { CheckLong();  return nfinal_; }
NumEpsilons()211   int64 NumEpsilons() const { CheckLong();  return nepsilons_; }
NumInputEpsilons()212   int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; }
NumOutputEpsilons()213   int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; }
InputLabelMultiplicity()214   double InputLabelMultiplicity() const { CheckLong(); return ilabel_mult_; }
OutputLabelMultiplicity()215   double OutputLabelMultiplicity() const { CheckLong(); return olabel_mult_; }
216 
NumAccessible()217   int64 NumAccessible() const { CheckLong(); return naccess_; }
NumCoAccessible()218   int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; }
NumConnected()219   int64 NumConnected() const { CheckLong(); return nconnect_; }
NumCc()220   int64 NumCc() const { CheckLong(); return ncc_; }
NumScc()221   int64 NumScc() const { CheckLong(); return nscc_; }
Properties()222   uint64 Properties() const { CheckLong(); return properties_; }
223 
224  private:
CheckLong()225   void CheckLong() const {
226     if (!long_info_)
227       FSTERROR() << "FstInfo: method only available with long info version";
228   }
229 
230   string fst_type_;
231   string input_symbols_;
232   string output_symbols_;
233   int64 nstates_;
234   int64 narcs_;
235   int64 start_;
236   int64 nfinal_;
237   int64 nepsilons_;
238   int64 niepsilons_;
239   int64 noepsilons_;
240   double ilabel_mult_;
241   double olabel_mult_;
242   int64 naccess_;
243   int64 ncoaccess_;
244   int64 nconnect_;
245   int64 ncc_;
246   int64 nscc_;
247   MatchType input_match_type_;
248   MatchType output_match_type_;
249   bool input_lookahead_;
250   bool output_lookahead_;
251   uint64 properties_;
252   string arc_filter_type_;
253   bool long_info_;
254   DISALLOW_COPY_AND_ASSIGN(FstInfo);
255 };
256 
257 template <class A>
258 void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) {
259   ostream &os = pipe ? cerr : cout;
260 
261   ios_base::fmtflags old = os.setf(ios::left);
262   os.width(50);
263   os << "fst type" <<  fstinfo.FstType() << endl;
264   os.width(50);
265   os << "arc type" << fstinfo.ArcType() << endl;
266   os.width(50);
267   os << "input symbol table" << fstinfo.InputSymbols() << endl;
268   os.width(50);
269   os << "output symbol table" << fstinfo.OutputSymbols() << endl;
270 
271   if (!fstinfo.LongInfo()) {
272     os.setf(old);
273     return;
274   }
275 
276   os.width(50);
277   os << "# of states" << fstinfo.NumStates() << endl;
278   os.width(50);
279   os << "# of arcs" << fstinfo.NumArcs() << endl;
280   os.width(50);
281   os << "initial state" << fstinfo.Start() << endl;
282   os.width(50);
283   os << "# of final states" << fstinfo.NumFinal() << endl;
284   os.width(50);
285   os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl;
286   os.width(50);
287   os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl;
288   os.width(50);
289   os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl;
290   os.width(50);
291   os << "input label multiplicity" << fstinfo.InputLabelMultiplicity() << endl;
292   os.width(50);
293   os << "output label multiplicity" << fstinfo.OutputLabelMultiplicity() << endl;
294   os.width(50);
295 
296   string arc_type = "";
297   if (fstinfo.ArcFilterType() == "epsilon")
298     arc_type = "epsilon ";
299   else if (fstinfo.ArcFilterType() == "iepsilon")
300     arc_type = "input-epsilon ";
301   else if (fstinfo.ArcFilterType() == "oepsilon")
302     arc_type = "output-epsilon ";
303 
304   string accessible_label = "# of " +  arc_type + "accessible states";
305   os.width(50);
306   os << accessible_label << fstinfo.NumAccessible() << endl;
307   string coaccessible_label = "# of " +  arc_type + "coaccessible states";
308   os.width(50);
309   os << coaccessible_label << fstinfo.NumCoAccessible() << endl;
310   string connected_label = "# of " +  arc_type + "connected states";
311   os.width(50);
312   os << connected_label << fstinfo.NumConnected() << endl;
313   string numcc_label = "# of " +  arc_type + "connected components";
314   os.width(50);
315   os << numcc_label << fstinfo.NumCc() << endl;
316   string numscc_label = "# of " +  arc_type + "strongly conn components";
317   os.width(50);
318   os << numscc_label << fstinfo.NumScc() << endl;
319 
320   os.width(50);
321   os << "input matcher"
322      << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' :
323          fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
324   os.width(50);
325   os << "output matcher"
326      << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' :
327          fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
328   os.width(50);
329   os << "input lookahead"
330      << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl;
331   os.width(50);
332   os << "output lookahead"
333      << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl;
334 
335   uint64 prop = 1;
336   for (int i = 0; i < 64; ++i, prop <<= 1) {
337     if (prop & kBinaryProperties) {
338       char value = 'n';
339       if (fstinfo.Properties() & prop) value = 'y';
340       os.width(50);
341       os << PropertyNames[i] << value << endl;
342     } else if (prop & kPosTrinaryProperties) {
343       char value = '?';
344       if (fstinfo.Properties() & prop) value = 'y';
345       else if (fstinfo.Properties() & prop << 1) value = 'n';
346       os.width(50);
347       os << PropertyNames[i] << value << endl;
348     }
349   }
350   os.setf(old);
351 }
352 
353 }  // namespace fst
354 
355 #endif  // FST_SCRIPT_INFO_IMPL_H_
356