1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Allocators for contiguous arrays of arcs.
19 
20 #ifndef FST_ARC_ARENA_H_
21 #define FST_ARC_ARENA_H_
22 
23 #include <deque>
24 #include <memory>
25 #include <utility>
26 
27 #include <fst/types.h>
28 #include <fst/fst.h>
29 #include <fst/memory.h>
30 #include <unordered_map>
31 
32 namespace fst {
33 
34 // ArcArena is used for fast allocation of contiguous arrays of arcs.
35 //
36 // To create an arc array:
37 //   for each state:
38 //     for each arc:
39 //       arena.PushArc();
40 //     // Commits these arcs and returns pointer to them.
41 //     Arc *arcs = arena.GetArcs();
42 //
43 //     OR
44 //
45 //     arena.DropArcs();  // Throws away current arcs, reuse the space.
46 //
47 // The arcs returned are guaranteed to be contiguous and the pointer returned
48 // will never be invalidated until the arena is cleared for reuse.
49 //
50 // The contents of the arena can be released with a call to arena.Clear() after
51 // which the arena will restart with an initial allocation capable of holding at
52 // least all of the arcs requested in the last usage before Clear() making
53 // subsequent uses of the Arena more efficient.
54 //
55 // The max_retained_size option can limit the amount of arc space requested on
56 // Clear() to avoid excess growth from intermittent high usage.
57 template <typename Arc>
58 class ArcArena {
59  public:
60   explicit ArcArena(size_t block_size = 256, size_t max_retained_size = 1e6)
block_size_(block_size)61       : block_size_(block_size), max_retained_size_(max_retained_size) {
62     blocks_.emplace_back(MakeSharedBlock(block_size_));
63     first_block_size_ = block_size_;
64     total_size_ = block_size_;
65     arcs_ = blocks_.back().get();
66     end_ = arcs_ + block_size_;
67     next_ = arcs_;
68   }
69 
ArcArena(const ArcArena & copy)70   ArcArena(const ArcArena &copy)
71       : arcs_(copy.arcs_),
72         next_(copy.next_),
73         end_(copy.end_),
74         block_size_(copy.block_size_),
75         first_block_size_(copy.first_block_size_),
76         total_size_(copy.total_size_),
77         max_retained_size_(copy.max_retained_size_),
78         blocks_(copy.blocks_) {
79     NewBlock(block_size_);
80   }
81 
ReserveArcs(size_t n)82   void ReserveArcs(size_t n) {
83     if (next_ + n < end_) return;
84     NewBlock(n);
85   }
86 
PushArc(const Arc & arc)87   void PushArc(const Arc &arc) {
88     if (next_ == end_) {
89       size_t length = next_ - arcs_;
90       NewBlock(length * 2);
91     }
92     *next_ = arc;
93     ++next_;
94   }
95 
GetArcs()96   const Arc *GetArcs() {
97     const auto *arcs = arcs_;
98     arcs_ = next_;
99     return arcs;
100   }
101 
DropArcs()102   void DropArcs() { next_ = arcs_; }
103 
Size()104   size_t Size() { return total_size_; }
105 
Clear()106   void Clear() {
107     blocks_.resize(1);
108     if (total_size_ > first_block_size_) {
109       first_block_size_ = std::min(max_retained_size_, total_size_);
110       blocks_.back() = MakeSharedBlock(first_block_size_);
111     }
112     total_size_ = first_block_size_;
113     arcs_ = blocks_.back().get();
114     end_ = arcs_ + first_block_size_;
115     next_ = arcs_;
116   }
117 
118  private:
119   // Allocates a new block with capacity of at least n or block_size,
120   // copying incomplete arc sequence from old block to new block.
NewBlock(size_t n)121   void NewBlock(size_t n) {
122     const auto length = next_ - arcs_;
123     const auto new_block_size = std::max(n, block_size_);
124     total_size_ += new_block_size;
125     blocks_.emplace_back(MakeSharedBlock(new_block_size));
126     std::copy(arcs_, next_, blocks_.back().get());
127     arcs_ = blocks_.back().get();
128     next_ = arcs_ + length;
129     end_ = arcs_ + new_block_size;
130   }
131 
MakeSharedBlock(size_t size)132   std::shared_ptr<Arc> MakeSharedBlock(size_t size) {
133     return std::shared_ptr<Arc>(new Arc[size], std::default_delete<Arc[]>());
134   }
135 
136   Arc *arcs_;
137   Arc *next_;
138   const Arc *end_;
139   size_t block_size_;
140   size_t first_block_size_;
141   size_t total_size_;
142   size_t max_retained_size_;
143   std::list<std::shared_ptr<Arc>> blocks_;
144 };
145 
146 // ArcArenaStateStore uses a resusable ArcArena to store arc arrays and does not
147 // require that the Expander call ReserveArcs first.
148 //
149 // TODO(tombagby): Make cache type configurable.
150 // TODO(tombagby): Provide ThreadLocal/Concurrent configuration.
151 template <class A>
152 class ArcArenaStateStore {
153  public:
154   using Arc = A;
155   using Weight = typename Arc::Weight;
156   using StateId = typename Arc::StateId;
157 
158   class State {
159    public:
Final()160     Weight Final() const { return final_weight_; }
161 
NumInputEpsilons()162     size_t NumInputEpsilons() const { return niepsilons_; }
163 
NumOutputEpsilons()164     size_t NumOutputEpsilons() const { return noepsilons_; }
165 
NumArcs()166     size_t NumArcs() const { return narcs_; }
167 
GetArc(size_t n)168     const Arc &GetArc(size_t n) const { return arcs_[n]; }
169 
Arcs()170     const Arc *Arcs() const { return arcs_; }
171 
MutableRefCount()172     int *MutableRefCount() const { return nullptr; }
173 
174    private:
State(Weight final_weight,int32 niepsilons,int32 noepsilons,int32 narcs,const Arc * arcs)175     State(Weight final_weight, int32 niepsilons, int32 noepsilons, int32 narcs,
176           const Arc *arcs)
177         : final_weight_(std::move(final_weight)),
178           niepsilons_(niepsilons),
179           noepsilons_(noepsilons),
180           narcs_(narcs),
181           arcs_(arcs) {}
182 
183     Weight final_weight_;
184     size_t niepsilons_;
185     size_t noepsilons_;
186     size_t narcs_;
187     const Arc *arcs_;
188 
189     friend class ArcArenaStateStore<Arc>;
190   };
191 
192   template <class Expander>
FindOrExpand(Expander & expander,StateId state_id)193   State *FindOrExpand(Expander &expander, StateId state_id) {
194     auto it = cache_.insert(std::pair<StateId, State *>(state_id, nullptr));
195     if (!it.second) return it.first->second;
196     // Needs a new state.
197     StateBuilder builder(&arena_);
198     expander.Expand(state_id, &builder);
199     const auto arcs = arena_.GetArcs();
200     size_t narcs = builder.narcs_;
201     size_t niepsilons = 0;
202     size_t noepsilons = 0;
203     for (size_t i = 0; i < narcs; ++i) {
204       if (arcs[i].ilabel == 0) ++niepsilons;
205       if (arcs[i].olabel == 0) ++noepsilons;
206     }
207     states_.emplace_back(
208         State(builder.final_weight_, niepsilons, noepsilons, narcs, arcs));
209     // Places it in the cache.
210     auto state = &states_.back();
211     it.first->second = state;
212     return state;
213   }
214 
Find(StateId state_id)215   State *Find(StateId state_id) const {
216     auto it = cache_.find(state_id);
217     return (it == cache_.end()) ? nullptr : it->second;
218   }
219 
220  private:
221   class StateBuilder {
222    public:
StateBuilder(ArcArena<Arc> * arena)223     explicit StateBuilder(ArcArena<Arc> *arena)
224         : arena_(arena), final_weight_(Weight::Zero()), narcs_(0) {}
225 
SetFinal(Weight weight)226     void SetFinal(Weight weight) { final_weight_ = std::move(weight); }
227 
ReserveArcs(size_t n)228     void ReserveArcs(size_t n) { arena_->ReserveArcs(n); }
229 
AddArc(const Arc & arc)230     void AddArc(const Arc &arc) {
231       ++narcs_;
232       arena_->PushArc(arc);
233     }
234 
235    private:
236     friend class ArcArenaStateStore<Arc>;
237 
238     ArcArena<Arc> *arena_;
239     Weight final_weight_;
240     size_t narcs_;
241   };
242 
243   std::unordered_map<StateId, State *> cache_;
244   std::deque<State> states_;
245   ArcArena<Arc> arena_;
246 };
247 
248 }  // namespace fst
249 
250 #endif  // FST_ARC_ARENA_H_
251