1 #include "defs.h" 2 #include "determine.h" 3 #include "half_final_fsm.h" 4 5 namespace Pire { 6 size_t HalfFinalFsm::MaxCountDepth = 10; 7 MakeScanner()8 void HalfFinalFsm::MakeScanner() { 9 fsm.Canonize(); 10 bool allowHalfFinals = AllowHalfFinals(); 11 if (!allowHalfFinals) { 12 MakeHalfFinal(); 13 return; 14 } 15 DisconnectFinals(true); 16 } 17 AllowHalfFinals()18 bool HalfFinalFsm::AllowHalfFinals() { 19 fsm.Canonize(); 20 for (size_t state = 0; state < fsm.Size(); ++state) { 21 if (fsm.IsFinal(state)) { 22 for (const auto& let : fsm.Letters()) { 23 bool hasFinalTransition = fsm.Destinations(state, let.first).empty(); 24 for (const auto& to : fsm.Destinations(state, let.first)) { 25 if (fsm.IsFinal(to)) { 26 hasFinalTransition = true; 27 } 28 } 29 if (!hasFinalTransition) { 30 return false; 31 } 32 } 33 } 34 } 35 return true; 36 } 37 MakeHalfFinal()38 void HalfFinalFsm::MakeHalfFinal() { 39 fsm.Unsparse(); 40 const auto newFinal = fsm.Size(); 41 fsm.Resize(newFinal + 1); 42 for (unsigned letter = 0; letter < MaxChar; ++letter) { 43 if (letter != Epsilon) { 44 fsm.Connect(newFinal, newFinal, letter); 45 } 46 } 47 for (size_t state = 0; state < fsm.Size(); ++state) { 48 bool hasFinalTransitions = false; 49 for (const auto& to : fsm.Destinations(state, EndMark)) { 50 if (fsm.IsFinal(to)) { 51 hasFinalTransitions = true; 52 break; 53 } 54 } 55 if (hasFinalTransitions) { 56 Fsm::StatesSet destinations = fsm.Destinations(state, EndMark); 57 for (const auto& to : destinations) { 58 fsm.Disconnect(state, to, EndMark); 59 } 60 fsm.Connect(state, newFinal, EndMark); 61 } 62 } 63 fsm.ClearFinal(); 64 fsm.SetFinal(newFinal, true); 65 fsm.Sparse(); 66 } 67 DisconnectFinals(bool allowIntersects)68 void HalfFinalFsm::DisconnectFinals(bool allowIntersects) { 69 fsm.Unsparse(); 70 for (size_t state = 0; state != fsm.Size(); ++state) { 71 fsm.SetTag(state, 0); 72 if (fsm.IsFinal(state)) { 73 for (unsigned letter = 0; letter < MaxChar; ++letter) { 74 Fsm::StatesSet destinations = fsm.Destinations(state, letter); 75 for (const auto& to : destinations) { 76 fsm.Disconnect(state, to, letter); 77 } 78 } 79 if (!allowIntersects) { 80 fsm.Connect(state, fsm.Initial()); 81 } 82 } 83 } 84 if (allowIntersects) { 85 fsm.PrependAnything(); 86 } 87 fsm.Sparse(); 88 fsm.SetIsDetermined(false); 89 fsm.Canonize(); 90 } 91 MakeNonGreedyCounter(bool allowIntersects,bool simplify)92 void HalfFinalFsm::MakeNonGreedyCounter(bool allowIntersects /* = true */, bool simplify /* = true */) { 93 fsm.Canonize(); 94 fsm.PrependAnything(); 95 fsm.RemoveDeadEnds(); 96 fsm.Canonize(); 97 if (!allowIntersects || simplify) { 98 DisconnectFinals(allowIntersects); 99 } 100 } 101 MakeGreedyCounter(bool simplify)102 void HalfFinalFsm::MakeGreedyCounter(bool simplify /* = true */) { 103 fsm.Canonize(); 104 fsm.RemoveDeadEnds(); 105 size_t determineFactor = MaxCountDepth; 106 if (simplify) { 107 determineFactor = 1; 108 } 109 Determine(determineFactor); 110 if (simplify) { 111 fsm.Minimize(); 112 } 113 fsm.RemoveDeadEnds(); 114 } 115 116 namespace Impl { 117 118 class HalfFinalDetermineState { 119 public: HalfFinalDetermineState(const Fsm & fsm,bool initial=false,size_t lastFinalCount=0)120 HalfFinalDetermineState(const Fsm& fsm, bool initial = false, size_t lastFinalCount = 0) 121 : mFsm(fsm) 122 , ToAdd(0) 123 , LastFinalCount(lastFinalCount) 124 { 125 if (initial) { 126 FinishBuild(1); 127 } 128 } 129 Next(Char letter,size_t maxCount) const130 HalfFinalDetermineState Next(Char letter, size_t maxCount) const { 131 HalfFinalDetermineState next(mFsm, false, LastFinalCount); 132 for (const auto& state : States) { 133 for (const auto& nextState : mFsm.Destinations(state.State, letter)) { 134 next.AddState(nextState, state.Count, state.ReachedFinal); 135 } 136 } 137 next.FinishBuild(maxCount, States.back().Count); 138 if (letter == EndMark) { 139 next.ToAdd += next.LastFinalCount; 140 next.LastFinalCount = 0; 141 next.States.clear(); 142 next.AddState(mFsm.Initial(), 0, false, true); 143 return next; 144 } 145 return next; 146 } 147 CopyData(Fsm & newFsm,size_t index) const148 void CopyData(Fsm& newFsm, size_t index) const { 149 if (ToAdd) { 150 newFsm.SetFinal(index, true); 151 newFsm.SetTag(index, ToAdd); 152 } 153 } 154 operator <(const HalfFinalDetermineState & otherState) const155 bool operator<(const HalfFinalDetermineState& otherState) const { 156 if (ToAdd != otherState.ToAdd) { 157 return ToAdd < otherState.ToAdd; 158 } 159 if (LastFinalCount != otherState.LastFinalCount) { 160 return LastFinalCount < otherState.LastFinalCount; 161 } 162 return States < otherState.States; 163 } 164 165 struct StateHolder { 166 size_t State; 167 size_t Count; 168 bool ReachedFinal; 169 operator <Pire::Impl::HalfFinalDetermineState::StateHolder170 bool operator<(const StateHolder& other) const { 171 if (State != other.State) { 172 return State < other.State; 173 } 174 if (Count != other.Count) { 175 return Count < other.Count; 176 } 177 return ReachedFinal < other.ReachedFinal; 178 } 179 }; 180 181 private: 182 const Fsm& mFsm; 183 TVector<StateHolder> States; 184 size_t ToAdd; 185 size_t LastFinalCount; 186 AddState(size_t state,size_t count,bool reachedFinal,bool force=false)187 void AddState(size_t state, size_t count, bool reachedFinal, bool force = false) { 188 size_t newLastFinalCount = LastFinalCount; 189 if (mFsm.IsFinal(state) && !reachedFinal) { 190 ++count; 191 reachedFinal = true; 192 newLastFinalCount = count; 193 } 194 for (const auto& addedState : States) { 195 if (addedState.State == state) { 196 return; 197 } 198 } 199 if (States.empty() || !mFsm.IsFinal(States.back().State) || force) { 200 States.push_back({state, count, reachedFinal}); 201 LastFinalCount = newLastFinalCount; 202 } 203 } 204 FinishBuild(size_t maxCount,size_t lastCount=0)205 void FinishBuild(size_t maxCount, size_t lastCount = 0) { 206 if (!States.empty() && mFsm.IsFinal(States.back().State)) { 207 lastCount = States.back().Count; 208 } 209 AddState(mFsm.Initial(), lastCount, false, true); 210 LastFinalCount = std::min(LastFinalCount, maxCount); 211 size_t minCount = States[0].Count; 212 for (auto& state : States) { 213 if (state.Count > maxCount) { 214 state.Count = maxCount; 215 } 216 minCount = std::min(state.Count, minCount); 217 } 218 ToAdd = minCount; 219 for (auto& state : States) { 220 state.Count -= minCount; 221 } 222 LastFinalCount -= minCount; 223 } 224 }; 225 226 class HalfFinalDetermineTask { 227 public: 228 typedef HalfFinalDetermineState State; 229 typedef Fsm::LettersTbl LettersTbl; 230 typedef TMap<State, size_t> InvStates; 231 HalfFinalDetermineTask(const Fsm & fsm,size_t maxCount)232 HalfFinalDetermineTask(const Fsm& fsm, size_t maxCount) 233 : mFsm(fsm) 234 , MaxCount(maxCount) 235 { 236 size_t oldSize = mFsm.Size(); 237 mFsm.Import(fsm); 238 mFsm.Unsparse(); 239 for (size_t state = 0; state < mFsm.Size(); ++state) { 240 for (Char letter = 0; letter < MaxChar; ++letter) { 241 Fsm::StatesSet destinations = mFsm.Destinations(state, letter); 242 for (const auto destination : destinations) { 243 size_t newDestination = destination % oldSize; 244 if (letter == EndMark) { 245 newDestination += oldSize; 246 } 247 if (destination != newDestination) { 248 mFsm.Disconnect(state, destination, letter); 249 mFsm.Connect(state, newDestination, letter); 250 } 251 } 252 } 253 if (mFsm.Destinations(state, EndMark).size() == 0) { 254 mFsm.Connect(state, oldSize + mFsm.Initial(), EndMark); 255 } 256 } 257 mFsm.Sparse(); 258 } 259 Letters() const260 const LettersTbl& Letters() const { return mFsm.Letters(); } 261 Initial() const262 State Initial() const { 263 return State(mFsm, true); 264 } 265 Next(const State & state,Char letter) const266 State Next(const State& state, Char letter) const { 267 return state.Next(letter, MaxCount); 268 } 269 IsRequired(const State &) const270 bool IsRequired(const State& /*state*/) const { return true; } 271 AcceptStates(const TVector<State> & newStates)272 void AcceptStates(const TVector<State>& newStates) { 273 mNewFsm.Resize(newStates.size()); 274 mNewFsm.SetInitial(0); 275 mNewFsm.SetIsDetermined(true); 276 mNewFsm.letters = Letters(); 277 mNewFsm.ClearFinal(); 278 for (size_t i = 0; i < newStates.size(); i++) { 279 newStates[i].CopyData(mNewFsm, i); 280 } 281 } 282 Connect(size_t from,size_t to,Char letter)283 void Connect(size_t from, size_t to, Char letter) { 284 Y_ASSERT(mNewFsm.Destinations(from, letter).size() == 0); 285 mNewFsm.Connect(from, to, letter); 286 } 287 288 typedef bool Result; 289 Success()290 Result Success() { return true; } 291 Failure()292 Result Failure() { return false; } 293 Output()294 Fsm& Output() { return mNewFsm; } 295 SetMaxCount(size_t maxCount)296 void SetMaxCount(size_t maxCount) { 297 MaxCount = maxCount; 298 } 299 300 private: 301 Fsm mFsm; 302 size_t MaxCount; 303 Fsm mNewFsm; 304 }; 305 } 306 Determine(size_t depth)307 void HalfFinalFsm::Determine(size_t depth) { 308 static const unsigned MaxSize = 200000; 309 310 Impl::HalfFinalDetermineTask task(fsm, depth); 311 if (!Pire::Impl::Determine(task, MaxSize)) { 312 task.SetMaxCount(1); 313 Pire::Impl::Determine(task, MaxSize); 314 } 315 316 task.Output().Swap(fsm); 317 } 318 GetCount(size_t state) const319 size_t HalfFinalFsm::GetCount(size_t state) const { 320 if (fsm.IsFinal(state)) { 321 if (fsm.Tag(state)) { 322 return fsm.Tag(state); 323 } else { 324 return 1; 325 } 326 } 327 return 0; 328 } 329 GetTotalCount() const330 size_t HalfFinalFsm::GetTotalCount() const { 331 size_t count = 0; 332 for (size_t state = 0; state < fsm.Size(); ++state) { 333 count += GetCount(state); 334 } 335 return count; 336 } 337 } 338