1 // properties.cc
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Functions for updating property bits for various FST operations and
20 // string names of the properties.
21
22 #include <fst/properties.h>
23
24 #include <stddef.h>
25 #include <vector>
26 using std::vector;
27
28 namespace fst {
29
30 // These functions determine the properties associated with the FST
31 // result of various finite-state operations. The property arguments
32 // correspond to the operation's FST arguments. The properties
33 // returned assume the operation modifies its first argument.
34 // Bitwise-and this result with kCopyProperties for the case when a
35 // new (possibly delayed) FST is instead constructed.
36
37 // Properties for a concatenatively-closed FST.
ClosureProperties(uint64 inprops,bool star,bool delayed)38 uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) {
39 uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops;
40 if (!delayed)
41 outprops |= (kExpanded | kMutable | kCoAccessible |
42 kNotTopSorted | kNotString) & inprops;
43 if (!delayed || inprops & kAccessible)
44 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
45 kNotILabelSorted | kNotOLabelSorted | kWeighted |
46 kNotAccessible | kNotCoAccessible) & inprops;
47 return outprops;
48 }
49
50 // Properties for a complemented FST.
ComplementProperties(uint64 inprops)51 uint64 ComplementProperties(uint64 inprops) {
52 uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons |
53 kNoIEpsilons | kNoOEpsilons |
54 kIDeterministic | kODeterministic | kAccessible;
55 outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) &
56 inprops;
57 if (inprops & kAccessible)
58 outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic;
59 return outprops;
60 }
61
62 // Properties for a composed FST.
ComposeProperties(uint64 inprops1,uint64 inprops2)63 uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) {
64 uint64 outprops = kError & (inprops1 | inprops2);
65 if (inprops1 & kAcceptor && inprops2 & kAcceptor) {
66 outprops |= kAcceptor | kAccessible;
67 outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic |
68 kInitialAcyclic) & inprops1 & inprops2;
69 if (kNoIEpsilons & inprops1 & inprops2)
70 outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2;
71 } else {
72 outprops |= kAccessible;
73 outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) &
74 inprops1 & inprops2;
75 if (kNoIEpsilons & inprops1 & inprops2)
76 outprops |= kIDeterministic & inprops1 & inprops2;
77 }
78 return outprops;
79 }
80
81 // Properties for a concatenated FST.
ConcatProperties(uint64 inprops1,uint64 inprops2,bool delayed)82 uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
83 uint64 outprops =
84 (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2;
85 outprops |= kError & (inprops1 | inprops2);
86
87 bool empty1 = delayed; // Can fst1 be the empty machine?
88 bool empty2 = delayed; // Can fst2 be the empty machine?
89
90 if (!delayed) {
91 outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
92 outprops |= (kNotTopSorted | kNotString) & inprops2;
93 }
94 if (!empty1)
95 outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1;
96 if (!delayed || inprops1 & kAccessible)
97 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
98 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
99 kNotOLabelSorted | kWeighted | kCyclic |
100 kNotAccessible | kNotCoAccessible) & inprops1;
101 if ((inprops1 & (kAccessible | kCoAccessible)) ==
102 (kAccessible | kCoAccessible) && !empty1) {
103 outprops |= kAccessible & inprops2;
104 if (!empty2)
105 outprops |= kCoAccessible & inprops2;
106 if (!delayed || inprops2 & kAccessible)
107 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
108 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
109 kNotOLabelSorted | kWeighted | kCyclic |
110 kNotAccessible | kNotCoAccessible) & inprops2;
111 }
112 return outprops;
113 }
114
115 // Properties for a determinized FST.
DeterminizeProperties(uint64 inprops,bool has_subsequential_label)116 uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
117 uint64 outprops = kAccessible;
118 if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
119 outprops |= kIDeterministic;
120 outprops |= (kError | kAcceptor | kAcyclic |
121 kInitialAcyclic | kCoAccessible | kString) & inprops;
122 if (inprops & kNoIEpsilons)
123 outprops |= kNoEpsilons & inprops;
124 if (inprops & kAccessible)
125 outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
126 kCyclic) & inprops;
127 if (inprops & kAcceptor)
128 outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops;
129 if ((inprops & kNoIEpsilons) && has_subsequential_label)
130 outprops |= kNoIEpsilons;
131 return outprops;
132 }
133
134 // Properties for factored weight FST.
FactorWeightProperties(uint64 inprops)135 uint64 FactorWeightProperties(uint64 inprops) {
136 uint64 outprops = (kExpanded | kMutable | kError | kAcceptor |
137 kAcyclic | kAccessible | kCoAccessible) & inprops;
138 if (inprops & kAccessible)
139 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
140 kEpsilons | kIEpsilons | kOEpsilons | kCyclic |
141 kNotILabelSorted | kNotOLabelSorted)
142 & inprops;
143 return outprops;
144 }
145
146 // Properties for an inverted FST.
InvertProperties(uint64 inprops)147 uint64 InvertProperties(uint64 inprops) {
148 uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
149 kEpsilons | kNoEpsilons | kWeighted | kUnweighted |
150 kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
151 kTopSorted | kNotTopSorted |
152 kAccessible | kNotAccessible |
153 kCoAccessible | kNotCoAccessible |
154 kString | kNotString) & inprops;
155 if (kIDeterministic & inprops)
156 outprops |= kODeterministic;
157 if (kNonIDeterministic & inprops)
158 outprops |= kNonODeterministic;
159 if (kODeterministic & inprops)
160 outprops |= kIDeterministic;
161 if (kNonODeterministic & inprops)
162 outprops |= kNonIDeterministic;
163
164 if (kIEpsilons & inprops)
165 outprops |= kOEpsilons;
166 if (kNoIEpsilons & inprops)
167 outprops |= kNoOEpsilons;
168 if (kOEpsilons & inprops)
169 outprops |= kIEpsilons;
170 if (kNoOEpsilons & inprops)
171 outprops |= kNoIEpsilons;
172
173 if (kILabelSorted & inprops)
174 outprops |= kOLabelSorted;
175 if (kNotILabelSorted & inprops)
176 outprops |= kNotOLabelSorted;
177 if (kOLabelSorted & inprops)
178 outprops |= kILabelSorted;
179 if (kNotOLabelSorted & inprops)
180 outprops |= kNotILabelSorted;
181 return outprops;
182 }
183
184 // Properties for a projected FST.
ProjectProperties(uint64 inprops,bool project_input)185 uint64 ProjectProperties(uint64 inprops, bool project_input) {
186 uint64 outprops = kAcceptor;
187 outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted |
188 kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
189 kTopSorted | kNotTopSorted | kAccessible | kNotAccessible |
190 kCoAccessible | kNotCoAccessible |
191 kString | kNotString) & inprops;
192 if (project_input) {
193 outprops |= (kIDeterministic | kNonIDeterministic |
194 kIEpsilons | kNoIEpsilons |
195 kILabelSorted | kNotILabelSorted) & inprops;
196
197 if (kIDeterministic & inprops)
198 outprops |= kODeterministic;
199 if (kNonIDeterministic & inprops)
200 outprops |= kNonODeterministic;
201
202 if (kIEpsilons & inprops)
203 outprops |= kOEpsilons | kEpsilons;
204 if (kNoIEpsilons & inprops)
205 outprops |= kNoOEpsilons | kNoEpsilons;
206
207 if (kILabelSorted & inprops)
208 outprops |= kOLabelSorted;
209 if (kNotILabelSorted & inprops)
210 outprops |= kNotOLabelSorted;
211 } else {
212 outprops |= (kODeterministic | kNonODeterministic |
213 kOEpsilons | kNoOEpsilons |
214 kOLabelSorted | kNotOLabelSorted) & inprops;
215
216 if (kODeterministic & inprops)
217 outprops |= kIDeterministic;
218 if (kNonODeterministic & inprops)
219 outprops |= kNonIDeterministic;
220
221 if (kOEpsilons & inprops)
222 outprops |= kIEpsilons | kEpsilons;
223 if (kNoOEpsilons & inprops)
224 outprops |= kNoIEpsilons | kNoEpsilons;
225
226 if (kOLabelSorted & inprops)
227 outprops |= kILabelSorted;
228 if (kNotOLabelSorted & inprops)
229 outprops |= kNotILabelSorted;
230 }
231 return outprops;
232 }
233
234 // Properties for a randgen FST.
RandGenProperties(uint64 inprops,bool weighted)235 uint64 RandGenProperties(uint64 inprops, bool weighted) {
236 uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible;
237 outprops |= inprops & kError;
238 if (weighted) {
239 outprops |= kTopSorted;
240 outprops |= (kAcceptor | kNoEpsilons |
241 kNoIEpsilons | kNoOEpsilons |
242 kIDeterministic | kODeterministic |
243 kILabelSorted | kOLabelSorted) & inprops;
244 } else {
245 outprops |= kUnweighted;
246 outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops;
247 }
248 return outprops;
249 }
250
251 // Properties for a replace FST.
ReplaceProperties(const vector<uint64> & inprops,ssize_t root,bool epsilon_on_call,bool epsilon_on_return,bool replace_transducer,bool no_empty_fsts)252 uint64 ReplaceProperties(const vector<uint64>& inprops,
253 ssize_t root,
254 bool epsilon_on_call,
255 bool epsilon_on_return,
256 bool replace_transducer,
257 bool no_empty_fsts) {
258 if (inprops.size() == 0)
259 return kNullProperties;
260 uint64 outprops = 0;
261 for (size_t i = 0; i < inprops.size(); ++i)
262 outprops |= kError & inprops[i];
263 uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0;
264 for (size_t i = 0; i < inprops.size(); ++i)
265 access_props &= (inprops[i] & (kAccessible | kCoAccessible));
266 if (access_props == (kAccessible | kCoAccessible)) {
267 outprops |= access_props;
268 if (inprops[root] & kInitialCyclic)
269 outprops |= kInitialCyclic;
270 uint64 props = 0;
271 bool string = true;
272 for (size_t i = 0; i < inprops.size(); ++i) {
273 if (replace_transducer)
274 props |= kNotAcceptor & inprops[i];
275 props |= (kNonIDeterministic | kNonODeterministic | kEpsilons |
276 kIEpsilons | kOEpsilons | kWeighted | kCyclic |
277 kNotTopSorted | kNotString) & inprops[i];
278 if (!(inprops[i] & kString))
279 string = false;
280 }
281 outprops |= props;
282 if (string)
283 outprops |= kString;
284 }
285 bool acceptor = !replace_transducer;
286 bool ideterministic = !epsilon_on_call && epsilon_on_return;
287 bool no_iepsilons = !epsilon_on_call && !epsilon_on_return;
288 bool acyclic = true;
289 bool unweighted = true;
290 for (size_t i = 0; i < inprops.size(); ++i) {
291 if (!(inprops[i] & kAcceptor))
292 acceptor = false;
293 if (!(inprops[i] & kIDeterministic))
294 ideterministic = false;
295 if (!(inprops[i] & kNoIEpsilons))
296 no_iepsilons = false;
297 if (!(inprops[i] & kAcyclic))
298 acyclic = false;
299 if (!(inprops[i] & kUnweighted))
300 unweighted = false;
301 if (i != root && !(inprops[i] & kNoIEpsilons))
302 ideterministic = false;
303 }
304 if (acceptor)
305 outprops |= kAcceptor;
306 if (ideterministic)
307 outprops |= kIDeterministic;
308 if (no_iepsilons)
309 outprops |= kNoIEpsilons;
310 if (acyclic)
311 outprops |= kAcyclic;
312 if (unweighted)
313 outprops |= kUnweighted;
314 if (inprops[root] & kInitialAcyclic)
315 outprops |= kInitialAcyclic;
316 return outprops;
317 }
318
319 // Properties for a relabeled FST.
RelabelProperties(uint64 inprops)320 uint64 RelabelProperties(uint64 inprops) {
321 uint64 outprops = (kExpanded | kMutable | kError |
322 kWeighted | kUnweighted |
323 kCyclic | kAcyclic |
324 kInitialCyclic | kInitialAcyclic |
325 kTopSorted | kNotTopSorted |
326 kAccessible | kNotAccessible |
327 kCoAccessible | kNotCoAccessible |
328 kString | kNotString) & inprops;
329 return outprops;
330 }
331
332 // Properties for a reversed FST. (the superinitial state limits this set)
ReverseProperties(uint64 inprops,bool has_superinitial)333 uint64 ReverseProperties(uint64 inprops, bool has_superinitial) {
334 uint64 outprops =
335 (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons |
336 kIEpsilons | kOEpsilons | kUnweighted | kCyclic | kAcyclic) & inprops;
337 if (has_superinitial)
338 outprops |= kWeighted & inprops;
339 return outprops;
340 }
341
342 // Properties for re-weighted FST.
ReweightProperties(uint64 inprops)343 uint64 ReweightProperties(uint64 inprops) {
344 uint64 outprops = inprops & kWeightInvariantProperties;
345 outprops = outprops & ~kCoAccessible;
346 return outprops;
347 }
348
349 // Properties for an epsilon-removed FST.
RmEpsilonProperties(uint64 inprops,bool delayed)350 uint64 RmEpsilonProperties(uint64 inprops, bool delayed) {
351 uint64 outprops = kNoEpsilons;
352 outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops;
353 if (inprops & kAcceptor)
354 outprops |= kNoIEpsilons | kNoOEpsilons;
355 if (!delayed) {
356 outprops |= kExpanded | kMutable;
357 outprops |= kTopSorted & inprops;
358 }
359 if (!delayed || inprops & kAccessible)
360 outprops |= kNotAcceptor & inprops;
361 return outprops;
362 }
363
364 // Properties for shortest path. This function computes how the properties
365 // of the output of shortest path need to be updated, given that 'props' is
366 // already known.
ShortestPathProperties(uint64 props)367 uint64 ShortestPathProperties(uint64 props) {
368 return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
369 }
370
371 // Properties for a synchronized FST.
SynchronizeProperties(uint64 inprops)372 uint64 SynchronizeProperties(uint64 inprops) {
373 uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible |
374 kCoAccessible | kUnweighted) & inprops;
375 if (inprops & kAccessible)
376 outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops;
377 return outprops;
378 }
379
380 // Properties for a unioned FST.
UnionProperties(uint64 inprops1,uint64 inprops2,bool delayed)381 uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
382 uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
383 & inprops1 & inprops2;
384 outprops |= kError & (inprops1 | inprops2);
385 outprops |= kInitialAcyclic;
386
387 bool empty1 = delayed; // Can fst1 be the empty machine?
388 bool empty2 = delayed; // Can fst2 be the empty machine?
389 if (!delayed) {
390 outprops |= (kExpanded | kMutable | kNotTopSorted) & inprops1;
391 outprops |= kNotTopSorted & inprops2;
392 }
393 if (!empty1 && !empty2) {
394 outprops |= kEpsilons | kIEpsilons | kOEpsilons;
395 outprops |= kCoAccessible & inprops1 & inprops2;
396 }
397 // Note kNotCoAccessible does not hold because of kInitialAcyclic opt.
398 if (!delayed || inprops1 & kAccessible)
399 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
400 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
401 kNotOLabelSorted | kWeighted | kCyclic |
402 kNotAccessible) & inprops1;
403 if (!delayed || inprops2 & kAccessible)
404 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
405 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
406 kNotOLabelSorted | kWeighted | kCyclic |
407 kNotAccessible | kNotCoAccessible) & inprops2;
408 return outprops;
409 }
410
411
412 // Property string names (indexed by bit position).
413 const char *PropertyNames[] = {
414 // binary
415 "expanded", "mutable", "error", "", "", "", "", "",
416 "", "", "", "", "", "", "", "",
417 // trinary
418 "acceptor", "not acceptor",
419 "input deterministic", "non input deterministic",
420 "output deterministic", "non output deterministic",
421 "input/output epsilons", "no input/output epsilons",
422 "input epsilons", "no input epsilons",
423 "output epsilons", "no output epsilons",
424 "input label sorted", "not input label sorted",
425 "output label sorted", "not output label sorted",
426 "weighted", "unweighted",
427 "cyclic", "acyclic",
428 "cyclic at initial state", "acyclic at initial state",
429 "top sorted", "not top sorted",
430 "accessible", "not accessible",
431 "coaccessible", "not coaccessible",
432 "string", "not string",
433 };
434
435 } // namespace fst
436