1 // Copyright 2005-2020 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the 'License'); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an 'AS IS' BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // See www.openfst.org for extensive documentation on this weighted 16 // finite-state transducer library. 17 // 18 // Allocators for contiguous arrays of arcs. 19 20 #ifndef FST_ARC_ARENA_H_ 21 #define FST_ARC_ARENA_H_ 22 23 #include <deque> 24 #include <memory> 25 #include <utility> 26 27 #include <fst/types.h> 28 #include <fst/fst.h> 29 #include <fst/memory.h> 30 #include <unordered_map> 31 32 namespace fst { 33 34 // ArcArena is used for fast allocation of contiguous arrays of arcs. 35 // 36 // To create an arc array: 37 // for each state: 38 // for each arc: 39 // arena.PushArc(); 40 // // Commits these arcs and returns pointer to them. 41 // Arc *arcs = arena.GetArcs(); 42 // 43 // OR 44 // 45 // arena.DropArcs(); // Throws away current arcs, reuse the space. 46 // 47 // The arcs returned are guaranteed to be contiguous and the pointer returned 48 // will never be invalidated until the arena is cleared for reuse. 49 // 50 // The contents of the arena can be released with a call to arena.Clear() after 51 // which the arena will restart with an initial allocation capable of holding at 52 // least all of the arcs requested in the last usage before Clear() making 53 // subsequent uses of the Arena more efficient. 54 // 55 // The max_retained_size option can limit the amount of arc space requested on 56 // Clear() to avoid excess growth from intermittent high usage. 57 template <typename Arc> 58 class ArcArena { 59 public: 60 explicit ArcArena(size_t block_size = 256, size_t max_retained_size = 1e6) block_size_(block_size)61 : block_size_(block_size), max_retained_size_(max_retained_size) { 62 blocks_.emplace_back(MakeSharedBlock(block_size_)); 63 first_block_size_ = block_size_; 64 total_size_ = block_size_; 65 arcs_ = blocks_.back().get(); 66 end_ = arcs_ + block_size_; 67 next_ = arcs_; 68 } 69 ArcArena(const ArcArena & copy)70 ArcArena(const ArcArena ©) 71 : arcs_(copy.arcs_), 72 next_(copy.next_), 73 end_(copy.end_), 74 block_size_(copy.block_size_), 75 first_block_size_(copy.first_block_size_), 76 total_size_(copy.total_size_), 77 max_retained_size_(copy.max_retained_size_), 78 blocks_(copy.blocks_) { 79 NewBlock(block_size_); 80 } 81 ReserveArcs(size_t n)82 void ReserveArcs(size_t n) { 83 if (next_ + n < end_) return; 84 NewBlock(n); 85 } 86 PushArc(const Arc & arc)87 void PushArc(const Arc &arc) { 88 if (next_ == end_) { 89 size_t length = next_ - arcs_; 90 NewBlock(length * 2); 91 } 92 *next_ = arc; 93 ++next_; 94 } 95 GetArcs()96 const Arc *GetArcs() { 97 const auto *arcs = arcs_; 98 arcs_ = next_; 99 return arcs; 100 } 101 DropArcs()102 void DropArcs() { next_ = arcs_; } 103 Size()104 size_t Size() { return total_size_; } 105 Clear()106 void Clear() { 107 blocks_.resize(1); 108 if (total_size_ > first_block_size_) { 109 first_block_size_ = std::min(max_retained_size_, total_size_); 110 blocks_.back() = MakeSharedBlock(first_block_size_); 111 } 112 total_size_ = first_block_size_; 113 arcs_ = blocks_.back().get(); 114 end_ = arcs_ + first_block_size_; 115 next_ = arcs_; 116 } 117 118 private: 119 // Allocates a new block with capacity of at least n or block_size, 120 // copying incomplete arc sequence from old block to new block. NewBlock(size_t n)121 void NewBlock(size_t n) { 122 const auto length = next_ - arcs_; 123 const auto new_block_size = std::max(n, block_size_); 124 total_size_ += new_block_size; 125 blocks_.emplace_back(MakeSharedBlock(new_block_size)); 126 std::copy(arcs_, next_, blocks_.back().get()); 127 arcs_ = blocks_.back().get(); 128 next_ = arcs_ + length; 129 end_ = arcs_ + new_block_size; 130 } 131 MakeSharedBlock(size_t size)132 std::shared_ptr<Arc> MakeSharedBlock(size_t size) { 133 return std::shared_ptr<Arc>(new Arc[size], std::default_delete<Arc[]>()); 134 } 135 136 Arc *arcs_; 137 Arc *next_; 138 const Arc *end_; 139 size_t block_size_; 140 size_t first_block_size_; 141 size_t total_size_; 142 size_t max_retained_size_; 143 std::list<std::shared_ptr<Arc>> blocks_; 144 }; 145 146 // ArcArenaStateStore uses a resusable ArcArena to store arc arrays and does not 147 // require that the Expander call ReserveArcs first. 148 // 149 // TODO(tombagby): Make cache type configurable. 150 // TODO(tombagby): Provide ThreadLocal/Concurrent configuration. 151 template <class A> 152 class ArcArenaStateStore { 153 public: 154 using Arc = A; 155 using Weight = typename Arc::Weight; 156 using StateId = typename Arc::StateId; 157 158 class State { 159 public: Final()160 Weight Final() const { return final_weight_; } 161 NumInputEpsilons()162 size_t NumInputEpsilons() const { return niepsilons_; } 163 NumOutputEpsilons()164 size_t NumOutputEpsilons() const { return noepsilons_; } 165 NumArcs()166 size_t NumArcs() const { return narcs_; } 167 GetArc(size_t n)168 const Arc &GetArc(size_t n) const { return arcs_[n]; } 169 Arcs()170 const Arc *Arcs() const { return arcs_; } 171 MutableRefCount()172 int *MutableRefCount() const { return nullptr; } 173 174 private: State(Weight final_weight,int32 niepsilons,int32 noepsilons,int32 narcs,const Arc * arcs)175 State(Weight final_weight, int32 niepsilons, int32 noepsilons, int32 narcs, 176 const Arc *arcs) 177 : final_weight_(std::move(final_weight)), 178 niepsilons_(niepsilons), 179 noepsilons_(noepsilons), 180 narcs_(narcs), 181 arcs_(arcs) {} 182 183 Weight final_weight_; 184 size_t niepsilons_; 185 size_t noepsilons_; 186 size_t narcs_; 187 const Arc *arcs_; 188 189 friend class ArcArenaStateStore<Arc>; 190 }; 191 192 template <class Expander> FindOrExpand(Expander & expander,StateId state_id)193 State *FindOrExpand(Expander &expander, StateId state_id) { 194 auto it = cache_.insert(std::pair<StateId, State *>(state_id, nullptr)); 195 if (!it.second) return it.first->second; 196 // Needs a new state. 197 StateBuilder builder(&arena_); 198 expander.Expand(state_id, &builder); 199 const auto arcs = arena_.GetArcs(); 200 size_t narcs = builder.narcs_; 201 size_t niepsilons = 0; 202 size_t noepsilons = 0; 203 for (size_t i = 0; i < narcs; ++i) { 204 if (arcs[i].ilabel == 0) ++niepsilons; 205 if (arcs[i].olabel == 0) ++noepsilons; 206 } 207 states_.emplace_back( 208 State(builder.final_weight_, niepsilons, noepsilons, narcs, arcs)); 209 // Places it in the cache. 210 auto state = &states_.back(); 211 it.first->second = state; 212 return state; 213 } 214 Find(StateId state_id)215 State *Find(StateId state_id) const { 216 auto it = cache_.find(state_id); 217 return (it == cache_.end()) ? nullptr : it->second; 218 } 219 220 private: 221 class StateBuilder { 222 public: StateBuilder(ArcArena<Arc> * arena)223 explicit StateBuilder(ArcArena<Arc> *arena) 224 : arena_(arena), final_weight_(Weight::Zero()), narcs_(0) {} 225 SetFinal(Weight weight)226 void SetFinal(Weight weight) { final_weight_ = std::move(weight); } 227 ReserveArcs(size_t n)228 void ReserveArcs(size_t n) { arena_->ReserveArcs(n); } 229 AddArc(const Arc & arc)230 void AddArc(const Arc &arc) { 231 ++narcs_; 232 arena_->PushArc(arc); 233 } 234 235 private: 236 friend class ArcArenaStateStore<Arc>; 237 238 ArcArena<Arc> *arena_; 239 Weight final_weight_; 240 size_t narcs_; 241 }; 242 243 std::unordered_map<StateId, State *> cache_; 244 std::deque<State> states_; 245 ArcArena<Arc> arena_; 246 }; 247 248 } // namespace fst 249 250 #endif // FST_ARC_ARENA_H_ 251