1 // info.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley@google.com (Michael Riley) 17 // 18 // \file 19 // Class to compute various information about FSTs, helper class for fstinfo.cc 20 21 #ifndef FST_SCRIPT_INFO_IMPL_H_ 22 #define FST_SCRIPT_INFO_IMPL_H_ 23 24 #include <map> 25 #include <string> 26 #include <vector> 27 using std::vector; 28 29 #include <fst/connect.h> 30 #include <fst/dfs-visit.h> 31 #include <fst/fst.h> 32 #include <fst/lookahead-matcher.h> 33 #include <fst/matcher.h> 34 #include <fst/queue.h> 35 #include <fst/test-properties.h> 36 #include <fst/verify.h> 37 #include <fst/visit.h> 38 39 namespace fst { 40 41 // Compute various information about FSTs, helper class for fstinfo.cc. 42 // WARNING: Stand-alone use of this class is not recommended, most code 43 // should call directly the relevant library functions: Fst<A>::NumStates, 44 // Fst<A>::NumArcs, TestProperties, ... 45 template <class A> class FstInfo { 46 public: 47 typedef A Arc; 48 typedef typename A::StateId StateId; 49 typedef typename A::Label Label; 50 typedef typename A::Weight Weight; 51 52 // When info_type is "short" (or "auto" and not an ExpandedFst) 53 // then only minimal info is computed and can be requested. 54 FstInfo(const Fst<A> &fst, bool test_properties, 55 const string &arc_filter_type = "any", 56 string info_type = "auto", bool verify = true) 57 : fst_type_(fst.Type()), 58 input_symbols_(fst.InputSymbols() ? 59 fst.InputSymbols()->Name() : "none"), 60 output_symbols_(fst.OutputSymbols() ? 61 fst.OutputSymbols()->Name() : "none"), 62 nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0), 63 nepsilons_(0), niepsilons_(0), noepsilons_(0), 64 ilabel_mult_(0.0), olabel_mult_(0.0), 65 naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0), 66 input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE), 67 input_lookahead_(false), output_lookahead_(false), 68 properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) { 69 if (info_type == "long") { 70 long_info_ = true; 71 } else if (info_type == "short") { 72 long_info_ = false; 73 } else if (info_type == "auto") { 74 long_info_ = fst.Properties(kExpanded, false); 75 } else { 76 FSTERROR() << "Bad info type: " << info_type; 77 return; 78 } 79 80 if (!long_info_) 81 return; 82 83 // If the FST is not sane, we return. 84 if (verify && !Verify(fst)) { 85 FSTERROR() << "FstInfo: Verify: FST not well-formed."; 86 return; 87 } 88 89 start_ = fst.Start(); 90 properties_ = fst.Properties(kFstProperties, test_properties); 91 92 for (StateIterator< Fst<A> > siter(fst); 93 !siter.Done(); 94 siter.Next()) { 95 ++nstates_; 96 StateId s = siter.Value(); 97 if (fst.Final(s) != Weight::Zero()) 98 ++nfinal_; 99 map<Label, int64> ilabel_count; 100 map<Label, int64> olabel_count; 101 for (ArcIterator< Fst<A> > aiter(fst, s); 102 !aiter.Done(); 103 aiter.Next()) { 104 const A &arc = aiter.Value(); 105 ++narcs_; 106 if (arc.ilabel == 0 && arc.olabel == 0) 107 ++nepsilons_; 108 if (arc.ilabel == 0) 109 ++niepsilons_; 110 if (arc.olabel == 0) 111 ++noepsilons_; 112 ++ilabel_count[arc.ilabel]; 113 ++olabel_count[arc.olabel]; 114 } 115 for (typename map<Label, int64>::iterator it = ilabel_count.begin(); 116 it != ilabel_count.end(); 117 ++it) { 118 ilabel_mult_ += it->second * it->second; 119 } 120 for (typename map<Label, int64>::iterator it = olabel_count.begin(); 121 it != olabel_count.end(); 122 ++it) { 123 olabel_mult_ += it->second * it->second; 124 } 125 } 126 127 if (narcs_ > 0) { 128 ilabel_mult_ /= narcs_; 129 olabel_mult_ /= narcs_; 130 } 131 132 { 133 vector<StateId> cc; 134 CcVisitor<Arc> cc_visitor(&cc); 135 FifoQueue<StateId> fifo_queue; 136 if (arc_filter_type == "any") { 137 Visit(fst, &cc_visitor, &fifo_queue); 138 } else if (arc_filter_type == "epsilon") { 139 Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>()); 140 } else if (arc_filter_type == "iepsilon") { 141 Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>()); 142 } else if (arc_filter_type == "oepsilon") { 143 Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>()); 144 } else { 145 FSTERROR() << "Bad arc filter type: " << arc_filter_type; 146 return; 147 } 148 149 for (StateId s = 0; s < cc.size(); ++s) { 150 if (cc[s] >= ncc_) 151 ncc_ = cc[s] + 1; 152 } 153 } 154 155 { 156 vector<StateId> scc; 157 vector<bool> access, coaccess; 158 uint64 props = 0; 159 SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props); 160 if (arc_filter_type == "any") { 161 DfsVisit(fst, &scc_visitor); 162 } else if (arc_filter_type == "epsilon") { 163 DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>()); 164 } else if (arc_filter_type == "iepsilon") { 165 DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>()); 166 } else if (arc_filter_type == "oepsilon") { 167 DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>()); 168 } else { 169 FSTERROR() << "Bad arc filter type: " << arc_filter_type; 170 return; 171 } 172 173 for (StateId s = 0; s < scc.size(); ++s) { 174 if (access[s]) 175 ++naccess_; 176 if (coaccess[s]) 177 ++ncoaccess_; 178 if (access[s] && coaccess[s]) 179 ++nconnect_; 180 if (scc[s] >= nscc_) 181 nscc_ = scc[s] + 1; 182 } 183 } 184 185 LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT); 186 input_match_type_ = imatcher.Type(test_properties); 187 input_lookahead_ = imatcher.Flags() & kInputLookAheadMatcher; 188 189 LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT); 190 output_match_type_ = omatcher.Type(test_properties); 191 output_lookahead_ = omatcher.Flags() & kOutputLookAheadMatcher; 192 } 193 194 // Short info FstType()195 const string& FstType() const { return fst_type_; } ArcType()196 const string& ArcType() const { return A::Type(); } InputSymbols()197 const string& InputSymbols() const { return input_symbols_; } OutputSymbols()198 const string& OutputSymbols() const { return output_symbols_; } LongInfo()199 bool LongInfo() const { return long_info_; } ArcFilterType()200 const string& ArcFilterType() const { return arc_filter_type_; } 201 202 // Long info InputMatchType()203 MatchType InputMatchType() const { CheckLong(); return input_match_type_; } OutputMatchType()204 MatchType OutputMatchType() const { CheckLong(); return output_match_type_; } InputLookAhead()205 bool InputLookAhead() const { CheckLong(); return input_lookahead_; } OutputLookAhead()206 bool OutputLookAhead() const { CheckLong(); return output_lookahead_; } NumStates()207 int64 NumStates() const { CheckLong(); return nstates_; } NumArcs()208 int64 NumArcs() const { CheckLong(); return narcs_; } Start()209 int64 Start() const { CheckLong(); return start_; } NumFinal()210 int64 NumFinal() const { CheckLong(); return nfinal_; } NumEpsilons()211 int64 NumEpsilons() const { CheckLong(); return nepsilons_; } NumInputEpsilons()212 int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; } NumOutputEpsilons()213 int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; } InputLabelMultiplicity()214 double InputLabelMultiplicity() const { CheckLong(); return ilabel_mult_; } OutputLabelMultiplicity()215 double OutputLabelMultiplicity() const { CheckLong(); return olabel_mult_; } 216 NumAccessible()217 int64 NumAccessible() const { CheckLong(); return naccess_; } NumCoAccessible()218 int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; } NumConnected()219 int64 NumConnected() const { CheckLong(); return nconnect_; } NumCc()220 int64 NumCc() const { CheckLong(); return ncc_; } NumScc()221 int64 NumScc() const { CheckLong(); return nscc_; } Properties()222 uint64 Properties() const { CheckLong(); return properties_; } 223 224 private: CheckLong()225 void CheckLong() const { 226 if (!long_info_) 227 FSTERROR() << "FstInfo: method only available with long info version"; 228 } 229 230 string fst_type_; 231 string input_symbols_; 232 string output_symbols_; 233 int64 nstates_; 234 int64 narcs_; 235 int64 start_; 236 int64 nfinal_; 237 int64 nepsilons_; 238 int64 niepsilons_; 239 int64 noepsilons_; 240 double ilabel_mult_; 241 double olabel_mult_; 242 int64 naccess_; 243 int64 ncoaccess_; 244 int64 nconnect_; 245 int64 ncc_; 246 int64 nscc_; 247 MatchType input_match_type_; 248 MatchType output_match_type_; 249 bool input_lookahead_; 250 bool output_lookahead_; 251 uint64 properties_; 252 string arc_filter_type_; 253 bool long_info_; 254 DISALLOW_COPY_AND_ASSIGN(FstInfo); 255 }; 256 257 template <class A> 258 void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) { 259 ostream &os = pipe ? cerr : cout; 260 261 ios_base::fmtflags old = os.setf(ios::left); 262 os.width(50); 263 os << "fst type" << fstinfo.FstType() << endl; 264 os.width(50); 265 os << "arc type" << fstinfo.ArcType() << endl; 266 os.width(50); 267 os << "input symbol table" << fstinfo.InputSymbols() << endl; 268 os.width(50); 269 os << "output symbol table" << fstinfo.OutputSymbols() << endl; 270 271 if (!fstinfo.LongInfo()) { 272 os.setf(old); 273 return; 274 } 275 276 os.width(50); 277 os << "# of states" << fstinfo.NumStates() << endl; 278 os.width(50); 279 os << "# of arcs" << fstinfo.NumArcs() << endl; 280 os.width(50); 281 os << "initial state" << fstinfo.Start() << endl; 282 os.width(50); 283 os << "# of final states" << fstinfo.NumFinal() << endl; 284 os.width(50); 285 os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl; 286 os.width(50); 287 os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl; 288 os.width(50); 289 os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl; 290 os.width(50); 291 os << "input label multiplicity" << fstinfo.InputLabelMultiplicity() << endl; 292 os.width(50); 293 os << "output label multiplicity" << fstinfo.OutputLabelMultiplicity() << endl; 294 os.width(50); 295 296 string arc_type = ""; 297 if (fstinfo.ArcFilterType() == "epsilon") 298 arc_type = "epsilon "; 299 else if (fstinfo.ArcFilterType() == "iepsilon") 300 arc_type = "input-epsilon "; 301 else if (fstinfo.ArcFilterType() == "oepsilon") 302 arc_type = "output-epsilon "; 303 304 string accessible_label = "# of " + arc_type + "accessible states"; 305 os.width(50); 306 os << accessible_label << fstinfo.NumAccessible() << endl; 307 string coaccessible_label = "# of " + arc_type + "coaccessible states"; 308 os.width(50); 309 os << coaccessible_label << fstinfo.NumCoAccessible() << endl; 310 string connected_label = "# of " + arc_type + "connected states"; 311 os.width(50); 312 os << connected_label << fstinfo.NumConnected() << endl; 313 string numcc_label = "# of " + arc_type + "connected components"; 314 os.width(50); 315 os << numcc_label << fstinfo.NumCc() << endl; 316 string numscc_label = "# of " + arc_type + "strongly conn components"; 317 os.width(50); 318 os << numscc_label << fstinfo.NumScc() << endl; 319 320 os.width(50); 321 os << "input matcher" 322 << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' : 323 fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl; 324 os.width(50); 325 os << "output matcher" 326 << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' : 327 fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl; 328 os.width(50); 329 os << "input lookahead" 330 << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl; 331 os.width(50); 332 os << "output lookahead" 333 << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl; 334 335 uint64 prop = 1; 336 for (int i = 0; i < 64; ++i, prop <<= 1) { 337 if (prop & kBinaryProperties) { 338 char value = 'n'; 339 if (fstinfo.Properties() & prop) value = 'y'; 340 os.width(50); 341 os << PropertyNames[i] << value << endl; 342 } else if (prop & kPosTrinaryProperties) { 343 char value = '?'; 344 if (fstinfo.Properties() & prop) value = 'y'; 345 else if (fstinfo.Properties() & prop << 1) value = 'n'; 346 os.width(50); 347 os << PropertyNames[i] << value << endl; 348 } 349 } 350 os.setf(old); 351 } 352 353 } // namespace fst 354 355 #endif // FST_SCRIPT_INFO_IMPL_H_ 356