1 // info.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Prints information about a PDT.
20 
21 #ifndef FST_EXTENSIONS_PDT_INFO_H__
22 #define FST_EXTENSIONS_PDT_INFO_H__
23 
24 #include <unordered_map>
25 using std::unordered_map;
26 using std::unordered_multimap;
27 #include <unordered_set>
28 using std::unordered_set;
29 using std::unordered_multiset;
30 #include <vector>
31 using std::vector;
32 
33 #include <fst/fst.h>
34 #include <fst/extensions/pdt/pdt.h>
35 
36 namespace fst {
37 
38 // Compute various information about PDTs, helper class for pdtinfo.cc.
39 template <class A> class PdtInfo {
40 public:
41   typedef A Arc;
42   typedef typename A::StateId StateId;
43   typedef typename A::Label Label;
44   typedef typename A::Weight Weight;
45 
46   PdtInfo(const Fst<A> &fst,
47           const vector<pair<typename A::Label,
48           typename A::Label> > &parens);
49 
FstType()50   const string& FstType() const { return fst_type_; }
ArcType()51   const string& ArcType() const { return A::Type(); }
52 
NumStates()53   int64 NumStates() const { return nstates_; }
NumArcs()54   int64 NumArcs() const { return narcs_; }
NumOpenParens()55   int64 NumOpenParens() const { return nopen_parens_; }
NumCloseParens()56   int64 NumCloseParens() const { return nclose_parens_; }
NumUniqueOpenParens()57   int64 NumUniqueOpenParens() const { return nuniq_open_parens_; }
NumUniqueCloseParens()58   int64 NumUniqueCloseParens() const { return nuniq_close_parens_; }
NumOpenParenStates()59   int64 NumOpenParenStates() const { return nopen_paren_states_; }
NumCloseParenStates()60   int64 NumCloseParenStates() const { return nclose_paren_states_; }
61 
62  private:
63   string fst_type_;
64   int64 nstates_;
65   int64 narcs_;
66   int64 nopen_parens_;
67   int64 nclose_parens_;
68   int64 nuniq_open_parens_;
69   int64 nuniq_close_parens_;
70   int64 nopen_paren_states_;
71   int64 nclose_paren_states_;
72 
73   DISALLOW_COPY_AND_ASSIGN(PdtInfo);
74 };
75 
76 template <class A>
PdtInfo(const Fst<A> & fst,const vector<pair<typename A::Label,typename A::Label>> & parens)77 PdtInfo<A>::PdtInfo(const Fst<A> &fst,
78                  const vector<pair<typename A::Label,
79                                    typename A::Label> > &parens)
80   : fst_type_(fst.Type()),
81     nstates_(0),
82     narcs_(0),
83     nopen_parens_(0),
84     nclose_parens_(0),
85     nuniq_open_parens_(0),
86     nuniq_close_parens_(0),
87     nopen_paren_states_(0),
88     nclose_paren_states_(0) {
89   unordered_map<Label, size_t> paren_map;
90   unordered_set<Label> paren_set;
91   unordered_set<StateId> open_paren_state_set;
92   unordered_set<StateId> close_paren_state_set;
93 
94   for (size_t i = 0; i < parens.size(); ++i) {
95     const pair<Label, Label>  &p = parens[i];
96     paren_map[p.first] = i;
97     paren_map[p.second] = i;
98   }
99 
100   for (StateIterator< Fst<A> > siter(fst);
101        !siter.Done();
102        siter.Next()) {
103     ++nstates_;
104     StateId s = siter.Value();
105     for (ArcIterator< Fst<A> > aiter(fst, s);
106          !aiter.Done();
107          aiter.Next()) {
108       const A &arc = aiter.Value();
109       ++narcs_;
110       typename unordered_map<Label, size_t>::const_iterator pit
111         = paren_map.find(arc.ilabel);
112       if (pit != paren_map.end()) {
113         Label open_paren =  parens[pit->second].first;
114         Label close_paren =  parens[pit->second].second;
115         if (arc.ilabel == open_paren) {
116           ++nopen_parens_;
117           if (!paren_set.count(open_paren)) {
118             ++nuniq_open_parens_;
119             paren_set.insert(open_paren);
120           }
121           if (!open_paren_state_set.count(arc.nextstate)) {
122             ++nopen_paren_states_;
123             open_paren_state_set.insert(arc.nextstate);
124           }
125         } else {
126           ++nclose_parens_;
127           if (!paren_set.count(close_paren)) {
128             ++nuniq_close_parens_;
129             paren_set.insert(close_paren);
130           }
131           if (!close_paren_state_set.count(s)) {
132             ++nclose_paren_states_;
133             close_paren_state_set.insert(s);
134           }
135 
136         }
137       }
138     }
139   }
140 }
141 
142 
143 template <class A>
PrintPdtInfo(const PdtInfo<A> & pdtinfo)144 void PrintPdtInfo(const PdtInfo<A> &pdtinfo) {
145   ios_base::fmtflags old = cout.setf(ios::left);
146   cout.width(50);
147   cout << "fst type" << pdtinfo.FstType().c_str() << endl;
148   cout.width(50);
149   cout << "arc type" << pdtinfo.ArcType().c_str() << endl;
150   cout.width(50);
151   cout << "# of states" << pdtinfo.NumStates() << endl;
152   cout.width(50);
153   cout << "# of arcs" << pdtinfo.NumArcs() << endl;
154   cout.width(50);
155   cout << "# of open parentheses" << pdtinfo.NumOpenParens() << endl;
156   cout.width(50);
157   cout << "# of close parentheses" << pdtinfo.NumCloseParens() << endl;
158   cout.width(50);
159   cout << "# of unique open parentheses"
160        << pdtinfo.NumUniqueOpenParens() << endl;
161   cout.width(50);
162   cout << "# of unique close parentheses"
163        << pdtinfo.NumUniqueCloseParens() << endl;
164   cout.width(50);
165   cout << "# of open parenthesis dest. states"
166        << pdtinfo.NumOpenParenStates() << endl;
167   cout.width(50);
168   cout << "# of close parenthesis source states"
169        << pdtinfo.NumCloseParenStates() << endl;
170   cout.setf(old);
171 }
172 
173 }  // namespace fst
174 
175 #endif  // FST_EXTENSIONS_PDT_INFO_H__
176