// Copyright 2005-2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the 'License'); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an 'AS IS' BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Allocators for contiguous arrays of arcs. #ifndef FST_ARC_ARENA_H_ #define FST_ARC_ARENA_H_ #include #include #include #include #include #include #include namespace fst { // ArcArena is used for fast allocation of contiguous arrays of arcs. // // To create an arc array: // for each state: // for each arc: // arena.PushArc(); // // Commits these arcs and returns pointer to them. // Arc *arcs = arena.GetArcs(); // // OR // // arena.DropArcs(); // Throws away current arcs, reuse the space. // // The arcs returned are guaranteed to be contiguous and the pointer returned // will never be invalidated until the arena is cleared for reuse. // // The contents of the arena can be released with a call to arena.Clear() after // which the arena will restart with an initial allocation capable of holding at // least all of the arcs requested in the last usage before Clear() making // subsequent uses of the Arena more efficient. // // The max_retained_size option can limit the amount of arc space requested on // Clear() to avoid excess growth from intermittent high usage. template class ArcArena { public: explicit ArcArena(size_t block_size = 256, size_t max_retained_size = 1e6) : block_size_(block_size), max_retained_size_(max_retained_size) { blocks_.emplace_back(MakeSharedBlock(block_size_)); first_block_size_ = block_size_; total_size_ = block_size_; arcs_ = blocks_.back().get(); end_ = arcs_ + block_size_; next_ = arcs_; } ArcArena(const ArcArena ©) : arcs_(copy.arcs_), next_(copy.next_), end_(copy.end_), block_size_(copy.block_size_), first_block_size_(copy.first_block_size_), total_size_(copy.total_size_), max_retained_size_(copy.max_retained_size_), blocks_(copy.blocks_) { NewBlock(block_size_); } void ReserveArcs(size_t n) { if (next_ + n < end_) return; NewBlock(n); } void PushArc(const Arc &arc) { if (next_ == end_) { size_t length = next_ - arcs_; NewBlock(length * 2); } *next_ = arc; ++next_; } const Arc *GetArcs() { const auto *arcs = arcs_; arcs_ = next_; return arcs; } void DropArcs() { next_ = arcs_; } size_t Size() { return total_size_; } void Clear() { blocks_.resize(1); if (total_size_ > first_block_size_) { first_block_size_ = std::min(max_retained_size_, total_size_); blocks_.back() = MakeSharedBlock(first_block_size_); } total_size_ = first_block_size_; arcs_ = blocks_.back().get(); end_ = arcs_ + first_block_size_; next_ = arcs_; } private: // Allocates a new block with capacity of at least n or block_size, // copying incomplete arc sequence from old block to new block. void NewBlock(size_t n) { const auto length = next_ - arcs_; const auto new_block_size = std::max(n, block_size_); total_size_ += new_block_size; blocks_.emplace_back(MakeSharedBlock(new_block_size)); std::copy(arcs_, next_, blocks_.back().get()); arcs_ = blocks_.back().get(); next_ = arcs_ + length; end_ = arcs_ + new_block_size; } std::shared_ptr MakeSharedBlock(size_t size) { return std::shared_ptr(new Arc[size], std::default_delete()); } Arc *arcs_; Arc *next_; const Arc *end_; size_t block_size_; size_t first_block_size_; size_t total_size_; size_t max_retained_size_; std::list> blocks_; }; // ArcArenaStateStore uses a resusable ArcArena to store arc arrays and does not // require that the Expander call ReserveArcs first. // // TODO(tombagby): Make cache type configurable. // TODO(tombagby): Provide ThreadLocal/Concurrent configuration. template class ArcArenaStateStore { public: using Arc = A; using Weight = typename Arc::Weight; using StateId = typename Arc::StateId; class State { public: Weight Final() const { return final_weight_; } size_t NumInputEpsilons() const { return niepsilons_; } size_t NumOutputEpsilons() const { return noepsilons_; } size_t NumArcs() const { return narcs_; } const Arc &GetArc(size_t n) const { return arcs_[n]; } const Arc *Arcs() const { return arcs_; } int *MutableRefCount() const { return nullptr; } private: State(Weight final_weight, int32 niepsilons, int32 noepsilons, int32 narcs, const Arc *arcs) : final_weight_(std::move(final_weight)), niepsilons_(niepsilons), noepsilons_(noepsilons), narcs_(narcs), arcs_(arcs) {} Weight final_weight_; size_t niepsilons_; size_t noepsilons_; size_t narcs_; const Arc *arcs_; friend class ArcArenaStateStore; }; template State *FindOrExpand(Expander &expander, StateId state_id) { auto it = cache_.insert(std::pair(state_id, nullptr)); if (!it.second) return it.first->second; // Needs a new state. StateBuilder builder(&arena_); expander.Expand(state_id, &builder); const auto arcs = arena_.GetArcs(); size_t narcs = builder.narcs_; size_t niepsilons = 0; size_t noepsilons = 0; for (size_t i = 0; i < narcs; ++i) { if (arcs[i].ilabel == 0) ++niepsilons; if (arcs[i].olabel == 0) ++noepsilons; } states_.emplace_back( State(builder.final_weight_, niepsilons, noepsilons, narcs, arcs)); // Places it in the cache. auto state = &states_.back(); it.first->second = state; return state; } State *Find(StateId state_id) const { auto it = cache_.find(state_id); return (it == cache_.end()) ? nullptr : it->second; } private: class StateBuilder { public: explicit StateBuilder(ArcArena *arena) : arena_(arena), final_weight_(Weight::Zero()), narcs_(0) {} void SetFinal(Weight weight) { final_weight_ = std::move(weight); } void ReserveArcs(size_t n) { arena_->ReserveArcs(n); } void AddArc(const Arc &arc) { ++narcs_; arena_->PushArc(arc); } private: friend class ArcArenaStateStore; ArcArena *arena_; Weight final_weight_; size_t narcs_; }; std::unordered_map cache_; std::deque states_; ArcArena arena_; }; } // namespace fst #endif // FST_ARC_ARENA_H_