1 
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 //     http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
14 // Copyright 2005-2010 Google, Inc.
15 // Author: sorenj@google.com (Jeffrey Sorensen)
16 
17 #include <fst/symbol-table-ops.h>
18 
19 namespace fst {
20 
MergeSymbolTable(const SymbolTable & left,const SymbolTable & right,bool * right_relabel_output)21 SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
22                               bool *right_relabel_output) {
23   // MergeSymbolTable detects several special cases.  It will return a reference
24   // copied version of SymbolTable of left or right if either symbol table is
25   // a superset of the other.
26   SymbolTable *merged = new SymbolTable("merge_" + left.Name() + "_" +
27                                         right.Name());
28   // copy everything from the left symbol table
29   bool left_has_all = true, right_has_all = true, relabel = false;
30   SymbolTableIterator liter(left);
31   for (; !liter.Done(); liter.Next()) {
32     merged->AddSymbol(liter.Symbol(), liter.Value());
33     if (right_has_all) {
34       int64 key = right.Find(liter.Symbol());
35       if (key == -1) {
36         right_has_all = false;
37       } else if (!relabel && key != liter.Value()) {
38         relabel = true;
39       }
40     }
41   }
42   if (right_has_all) {
43     delete merged;
44     if (right_relabel_output != NULL) {
45       *right_relabel_output = relabel;
46     }
47     return right.Copy();
48   }
49   // add all symbols we can from right symbol table
50   vector<string> conflicts;
51   SymbolTableIterator riter(right);
52   for (; !riter.Done(); riter.Next()) {
53     int64 key = merged->Find(riter.Symbol());
54     if (key != -1) {
55       // Symbol already exists, maybe with different value
56       if (key != riter.Value()) {
57         relabel = true;
58       }
59       continue;
60     }
61     // Symbol doesn't exist from left
62     left_has_all = false;
63     if (!merged->Find(riter.Value()).empty()) {
64       // we can't add this where we want to, add it later, in order
65       conflicts.push_back(riter.Symbol());
66       continue;
67     }
68     // there is a hole and we can add this symbol with its id
69     merged->AddSymbol(riter.Symbol(), riter.Value());
70   }
71   if (right_relabel_output != NULL) {
72     *right_relabel_output = relabel;
73   }
74   if (left_has_all) {
75     delete merged;
76     return left.Copy();
77   }
78   // Add all symbols that conflicted, in order
79   for (int i= 0; i < conflicts.size(); ++i) {
80     merged->AddSymbol(conflicts[i]);
81   }
82   return merged;
83 }
84 
CompactSymbolTable(const SymbolTable & syms)85 SymbolTable *CompactSymbolTable(const SymbolTable &syms) {
86   map<int, string> sorted;
87   SymbolTableIterator stiter(syms);
88   for (; !stiter.Done(); stiter.Next()) {
89     sorted[stiter.Value()] = stiter.Symbol();
90   }
91   SymbolTable *compact = new SymbolTable(syms.Name() + "_compact");
92   uint64 newkey = 0;
93   for (map<int, string>::const_iterator si = sorted.begin();
94        si != sorted.end(); ++si) {
95     compact->AddSymbol(si->second, newkey++);
96   }
97   return compact;
98 }
99 
FstReadSymbols(const string & filename,bool input_symbols)100 SymbolTable *FstReadSymbols(const string &filename, bool input_symbols) {
101   ifstream in(filename.c_str(), ifstream::in | ifstream::binary);
102   if (!in) {
103     LOG(ERROR) << "FstReadSymbols: Can't open file " << filename;
104     return NULL;
105   }
106   FstHeader hdr;
107   if (!hdr.Read(in, filename)) {
108     LOG(ERROR) << "FstReadSymbols: Couldn't read header from " << filename;
109     return NULL;
110   }
111   if (hdr.GetFlags() & FstHeader::HAS_ISYMBOLS) {
112     SymbolTable *isymbols = SymbolTable::Read(in, filename);
113     if (isymbols == NULL) {
114       LOG(ERROR) << "FstReadSymbols: Could not read input symbols from "
115                  << filename;
116       return NULL;
117     }
118     if (input_symbols) {
119       return isymbols;
120     }
121     delete isymbols;
122   }
123   if (hdr.GetFlags() & FstHeader::HAS_OSYMBOLS) {
124     SymbolTable *osymbols = SymbolTable::Read(in, filename);
125     if (osymbols == NULL) {
126       LOG(ERROR) << "FstReadSymbols: Could not read output symbols from "
127                  << filename;
128       return NULL;
129     }
130     if (!input_symbols) {
131       return osymbols;
132     }
133     delete osymbols;
134   }
135   LOG(ERROR) << "FstReadSymbols: The file " << filename
136              << " doesn't contain the requested symbols";
137   return NULL;
138 }
139 
140 }  // namespace fst
141