1 // properties.cc
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Functions for updating property bits for various FST operations and
20 // string names of the properties.
21 
22 #include <fst/properties.h>
23 
24 #include <stddef.h>
25 #include <vector>
26 using std::vector;
27 
28 namespace fst {
29 
30 // These functions determine the properties associated with the FST
31 // result of various finite-state operations. The property arguments
32 // correspond to the operation's FST arguments. The properties
33 // returned assume the operation modifies its first argument.
34 // Bitwise-and this result with kCopyProperties for the case when a
35 // new (possibly delayed) FST is instead constructed.
36 
37 // Properties for a concatenatively-closed FST.
ClosureProperties(uint64 inprops,bool star,bool delayed)38 uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) {
39   uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops;
40   if (!delayed)
41        outprops |= (kExpanded | kMutable | kCoAccessible |
42                     kNotTopSorted | kNotString) & inprops;
43   if (!delayed || inprops & kAccessible)
44     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
45                  kNotILabelSorted | kNotOLabelSorted | kWeighted |
46                  kNotAccessible | kNotCoAccessible) & inprops;
47   return outprops;
48 }
49 
50 // Properties for a complemented FST.
ComplementProperties(uint64 inprops)51 uint64 ComplementProperties(uint64 inprops) {
52   uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons |
53                     kNoIEpsilons | kNoOEpsilons |
54                     kIDeterministic | kODeterministic | kAccessible;
55   outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) &
56       inprops;
57   if (inprops & kAccessible)
58     outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic;
59   return outprops;
60 }
61 
62 // Properties for a composed FST.
ComposeProperties(uint64 inprops1,uint64 inprops2)63 uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) {
64   uint64 outprops = kError & (inprops1 | inprops2);
65   if (inprops1 & kAcceptor && inprops2 & kAcceptor) {
66     outprops |= kAcceptor | kAccessible;
67     outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic |
68                  kInitialAcyclic) & inprops1 & inprops2;
69     if (kNoIEpsilons & inprops1 & inprops2)
70       outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2;
71   } else {
72     outprops |= kAccessible;
73     outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) &
74         inprops1 & inprops2;
75     if (kNoIEpsilons & inprops1 & inprops2)
76       outprops |= kIDeterministic & inprops1 & inprops2;
77   }
78   return outprops;
79 }
80 
81 // Properties for a concatenated FST.
ConcatProperties(uint64 inprops1,uint64 inprops2,bool delayed)82 uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
83   uint64 outprops =
84     (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2;
85   outprops |= kError & (inprops1 | inprops2);
86 
87   bool empty1 = delayed;  // Can fst1 be the empty machine?
88   bool empty2 = delayed;  // Can fst2 be the empty machine?
89 
90   if (!delayed) {
91     outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
92     outprops |= (kNotTopSorted | kNotString) & inprops2;
93   }
94   if (!empty1)
95     outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1;
96   if (!delayed || inprops1 & kAccessible)
97     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
98                  kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
99                  kNotOLabelSorted | kWeighted | kCyclic |
100                  kNotAccessible | kNotCoAccessible) & inprops1;
101   if ((inprops1 & (kAccessible | kCoAccessible)) ==
102       (kAccessible | kCoAccessible) && !empty1) {
103     outprops |= kAccessible & inprops2;
104     if (!empty2)
105       outprops |= kCoAccessible & inprops2;
106     if (!delayed || inprops2 & kAccessible)
107       outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
108                    kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
109                    kNotOLabelSorted | kWeighted | kCyclic |
110                    kNotAccessible | kNotCoAccessible) & inprops2;
111   }
112   return outprops;
113 }
114 
115 // Properties for a determinized FST.
DeterminizeProperties(uint64 inprops,bool has_subsequential_label)116 uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
117   uint64 outprops = kAccessible;
118   if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
119     outprops |= kIDeterministic;
120   outprops |= (kError | kAcceptor | kAcyclic |
121                kInitialAcyclic | kCoAccessible | kString) & inprops;
122   if (inprops & kNoIEpsilons)
123     outprops |= kNoEpsilons & inprops;
124   if (inprops & kAccessible)
125      outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
126                   kCyclic) & inprops;
127   if (inprops & kAcceptor)
128     outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops;
129   if ((inprops & kNoIEpsilons) && has_subsequential_label)
130     outprops |= kNoIEpsilons;
131   return outprops;
132 }
133 
134 // Properties for factored weight FST.
FactorWeightProperties(uint64 inprops)135 uint64 FactorWeightProperties(uint64 inprops) {
136   uint64 outprops = (kExpanded | kMutable | kError | kAcceptor |
137                      kAcyclic | kAccessible | kCoAccessible) & inprops;
138   if (inprops & kAccessible)
139     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
140                  kEpsilons | kIEpsilons | kOEpsilons | kCyclic |
141                  kNotILabelSorted | kNotOLabelSorted)
142         & inprops;
143   return outprops;
144 }
145 
146 // Properties for an inverted FST.
InvertProperties(uint64 inprops)147 uint64 InvertProperties(uint64 inprops) {
148   uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
149                      kEpsilons | kNoEpsilons | kWeighted | kUnweighted |
150                      kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
151                      kTopSorted | kNotTopSorted |
152                      kAccessible | kNotAccessible |
153                      kCoAccessible | kNotCoAccessible |
154                      kString | kNotString) & inprops;
155   if (kIDeterministic & inprops)
156     outprops |= kODeterministic;
157   if (kNonIDeterministic & inprops)
158     outprops |= kNonODeterministic;
159   if (kODeterministic & inprops)
160     outprops |= kIDeterministic;
161   if (kNonODeterministic & inprops)
162     outprops |= kNonIDeterministic;
163 
164   if (kIEpsilons & inprops)
165     outprops |= kOEpsilons;
166   if (kNoIEpsilons & inprops)
167     outprops |= kNoOEpsilons;
168   if (kOEpsilons & inprops)
169     outprops |= kIEpsilons;
170   if (kNoOEpsilons & inprops)
171     outprops |= kNoIEpsilons;
172 
173   if (kILabelSorted & inprops)
174     outprops |= kOLabelSorted;
175   if (kNotILabelSorted & inprops)
176     outprops |= kNotOLabelSorted;
177   if (kOLabelSorted & inprops)
178     outprops |= kILabelSorted;
179   if (kNotOLabelSorted & inprops)
180     outprops |= kNotILabelSorted;
181   return outprops;
182 }
183 
184 // Properties for a projected FST.
ProjectProperties(uint64 inprops,bool project_input)185 uint64 ProjectProperties(uint64 inprops, bool project_input) {
186   uint64 outprops = kAcceptor;
187   outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted |
188                kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
189                kTopSorted | kNotTopSorted | kAccessible | kNotAccessible |
190                kCoAccessible | kNotCoAccessible |
191                kString | kNotString) & inprops;
192   if (project_input) {
193     outprops |= (kIDeterministic | kNonIDeterministic |
194                  kIEpsilons | kNoIEpsilons |
195                  kILabelSorted | kNotILabelSorted) & inprops;
196 
197     if (kIDeterministic & inprops)
198       outprops |= kODeterministic;
199     if (kNonIDeterministic & inprops)
200       outprops |= kNonODeterministic;
201 
202     if (kIEpsilons & inprops)
203       outprops |= kOEpsilons | kEpsilons;
204     if (kNoIEpsilons & inprops)
205       outprops |= kNoOEpsilons | kNoEpsilons;
206 
207     if (kILabelSorted & inprops)
208       outprops |= kOLabelSorted;
209     if (kNotILabelSorted & inprops)
210       outprops |= kNotOLabelSorted;
211   } else {
212     outprops |= (kODeterministic | kNonODeterministic |
213                  kOEpsilons | kNoOEpsilons |
214                  kOLabelSorted | kNotOLabelSorted) & inprops;
215 
216     if (kODeterministic & inprops)
217       outprops |= kIDeterministic;
218     if (kNonODeterministic & inprops)
219       outprops |= kNonIDeterministic;
220 
221     if (kOEpsilons & inprops)
222       outprops |= kIEpsilons | kEpsilons;
223     if (kNoOEpsilons & inprops)
224       outprops |= kNoIEpsilons | kNoEpsilons;
225 
226     if (kOLabelSorted & inprops)
227       outprops |= kILabelSorted;
228     if (kNotOLabelSorted & inprops)
229       outprops |= kNotILabelSorted;
230   }
231   return outprops;
232 }
233 
234 // Properties for a randgen FST.
RandGenProperties(uint64 inprops,bool weighted)235 uint64 RandGenProperties(uint64 inprops, bool weighted) {
236   uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible;
237   outprops |= inprops & kError;
238   if (weighted) {
239     outprops |= kTopSorted;
240     outprops |= (kAcceptor | kNoEpsilons |
241                  kNoIEpsilons | kNoOEpsilons |
242                  kIDeterministic | kODeterministic |
243                  kILabelSorted | kOLabelSorted) & inprops;
244   } else {
245     outprops |= kUnweighted;
246     outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops;
247   }
248   return outprops;
249 }
250 
251 // Properties for a replace FST.
ReplaceProperties(const vector<uint64> & inprops,ssize_t root,bool epsilon_on_call,bool epsilon_on_return,bool replace_transducer,bool no_empty_fsts)252 uint64 ReplaceProperties(const vector<uint64>& inprops,
253                          ssize_t root,
254                          bool epsilon_on_call,
255                          bool epsilon_on_return,
256                          bool replace_transducer,
257                          bool no_empty_fsts) {
258   if (inprops.size() == 0)
259     return kNullProperties;
260   uint64 outprops = 0;
261   for (size_t i = 0; i < inprops.size(); ++i)
262     outprops |= kError & inprops[i];
263   uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0;
264   for (size_t i = 0; i < inprops.size(); ++i)
265     access_props &= (inprops[i] & (kAccessible | kCoAccessible));
266   if (access_props == (kAccessible | kCoAccessible)) {
267     outprops |= access_props;
268     if (inprops[root] & kInitialCyclic)
269       outprops |= kInitialCyclic;
270     uint64 props =  0;
271     bool string = true;
272     for (size_t i = 0; i < inprops.size(); ++i) {
273       if (replace_transducer)
274         props |= kNotAcceptor & inprops[i];
275       props |= (kNonIDeterministic | kNonODeterministic | kEpsilons |
276                 kIEpsilons | kOEpsilons | kWeighted | kCyclic |
277                 kNotTopSorted | kNotString) & inprops[i];
278       if (!(inprops[i] & kString))
279         string = false;
280     }
281     outprops |= props;
282     if (string)
283       outprops |= kString;
284   }
285   bool acceptor = !replace_transducer;
286   bool ideterministic = !epsilon_on_call && epsilon_on_return;
287   bool no_iepsilons = !epsilon_on_call && !epsilon_on_return;
288   bool acyclic = true;
289   bool unweighted = true;
290   for (size_t i = 0; i < inprops.size(); ++i) {
291     if (!(inprops[i] & kAcceptor))
292       acceptor = false;
293     if (!(inprops[i] & kIDeterministic))
294       ideterministic = false;
295     if (!(inprops[i] & kNoIEpsilons))
296       no_iepsilons = false;
297     if (!(inprops[i] & kAcyclic))
298       acyclic = false;
299     if (!(inprops[i] & kUnweighted))
300       unweighted = false;
301     if (i != root && !(inprops[i] & kNoIEpsilons))
302       ideterministic = false;
303   }
304   if (acceptor)
305     outprops |= kAcceptor;
306   if (ideterministic)
307     outprops |= kIDeterministic;
308   if (no_iepsilons)
309     outprops |= kNoIEpsilons;
310   if (acyclic)
311     outprops |= kAcyclic;
312   if (unweighted)
313     outprops |= kUnweighted;
314   if (inprops[root] & kInitialAcyclic)
315       outprops |= kInitialAcyclic;
316   return outprops;
317 }
318 
319 // Properties for a relabeled FST.
RelabelProperties(uint64 inprops)320 uint64 RelabelProperties(uint64 inprops) {
321   uint64 outprops = (kExpanded | kMutable | kError |
322                      kWeighted | kUnweighted |
323                      kCyclic | kAcyclic |
324                      kInitialCyclic | kInitialAcyclic |
325                      kTopSorted | kNotTopSorted |
326                      kAccessible | kNotAccessible |
327                      kCoAccessible | kNotCoAccessible |
328                      kString | kNotString) & inprops;
329   return outprops;
330 }
331 
332 // Properties for a reversed FST. (the superinitial state limits this set)
ReverseProperties(uint64 inprops,bool has_superinitial)333 uint64 ReverseProperties(uint64 inprops, bool has_superinitial) {
334   uint64 outprops =
335     (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons |
336      kIEpsilons | kOEpsilons | kUnweighted | kCyclic | kAcyclic) & inprops;
337   if (has_superinitial)
338     outprops |= kWeighted & inprops;
339   return outprops;
340 }
341 
342 // Properties for re-weighted FST.
ReweightProperties(uint64 inprops)343 uint64 ReweightProperties(uint64 inprops) {
344   uint64 outprops = inprops & kWeightInvariantProperties;
345   outprops = outprops & ~kCoAccessible;
346   return outprops;
347 }
348 
349 // Properties for an epsilon-removed FST.
RmEpsilonProperties(uint64 inprops,bool delayed)350 uint64 RmEpsilonProperties(uint64 inprops, bool delayed) {
351   uint64 outprops = kNoEpsilons;
352   outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops;
353   if (inprops & kAcceptor)
354     outprops |= kNoIEpsilons | kNoOEpsilons;
355   if (!delayed) {
356     outprops |= kExpanded | kMutable;
357     outprops |= kTopSorted & inprops;
358   }
359   if (!delayed || inprops & kAccessible)
360     outprops |= kNotAcceptor & inprops;
361   return outprops;
362 }
363 
364 // Properties for shortest path. This function computes how the properties
365 // of the output of shortest path need to be updated, given that 'props' is
366 // already known.
ShortestPathProperties(uint64 props)367 uint64 ShortestPathProperties(uint64 props) {
368   return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
369 }
370 
371 // Properties for a synchronized FST.
SynchronizeProperties(uint64 inprops)372 uint64 SynchronizeProperties(uint64 inprops) {
373   uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible |
374                      kCoAccessible | kUnweighted) & inprops;
375   if (inprops & kAccessible)
376     outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops;
377   return outprops;
378 }
379 
380 // Properties for a unioned FST.
UnionProperties(uint64 inprops1,uint64 inprops2,bool delayed)381 uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
382   uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
383                     & inprops1 & inprops2;
384   outprops |= kError & (inprops1 | inprops2);
385   outprops |= kInitialAcyclic;
386 
387   bool empty1 = delayed;  // Can fst1 be the empty machine?
388   bool empty2 = delayed;  // Can fst2 be the empty machine?
389   if (!delayed) {
390     outprops |= (kExpanded | kMutable | kNotTopSorted) & inprops1;
391     outprops |= kNotTopSorted & inprops2;
392   }
393   if (!empty1 && !empty2) {
394     outprops |= kEpsilons | kIEpsilons | kOEpsilons;
395     outprops |= kCoAccessible & inprops1 & inprops2;
396   }
397   // Note kNotCoAccessible does not hold because of kInitialAcyclic opt.
398   if (!delayed || inprops1 & kAccessible)
399     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
400                  kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
401                  kNotOLabelSorted | kWeighted | kCyclic |
402                  kNotAccessible) & inprops1;
403   if (!delayed || inprops2 & kAccessible)
404     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
405                  kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
406                  kNotOLabelSorted | kWeighted | kCyclic |
407                  kNotAccessible | kNotCoAccessible) & inprops2;
408   return outprops;
409 }
410 
411 
412 // Property string names (indexed by bit position).
413 const char *PropertyNames[] = {
414   // binary
415   "expanded", "mutable", "error", "", "", "", "", "",
416   "", "", "", "", "", "", "", "",
417   // trinary
418   "acceptor", "not acceptor",
419   "input deterministic", "non input deterministic",
420   "output deterministic", "non output deterministic",
421   "input/output epsilons", "no input/output epsilons",
422   "input epsilons", "no input epsilons",
423   "output epsilons", "no output epsilons",
424   "input label sorted", "not input label sorted",
425   "output label sorted", "not output label sorted",
426   "weighted", "unweighted",
427   "cyclic", "acyclic",
428   "cyclic at initial state", "acyclic at initial state",
429   "top sorted", "not top sorted",
430   "accessible", "not accessible",
431   "coaccessible", "not coaccessible",
432   "string", "not string",
433 };
434 
435 }  // namespace fst
436