1 #include "defs.h"
2 #include "determine.h"
3 #include "half_final_fsm.h"
4 
5 namespace Pire {
6 	size_t HalfFinalFsm::MaxCountDepth = 10;
7 
MakeScanner()8 	void HalfFinalFsm::MakeScanner() {
9 		fsm.Canonize();
10 		bool allowHalfFinals = AllowHalfFinals();
11 		if (!allowHalfFinals) {
12 			MakeHalfFinal();
13 			return;
14 		}
15 		DisconnectFinals(true);
16 	}
17 
AllowHalfFinals()18 	bool HalfFinalFsm::AllowHalfFinals() {
19 		fsm.Canonize();
20 		for (size_t state = 0; state < fsm.Size(); ++state) {
21 			if (fsm.IsFinal(state)) {
22 				for (const auto& let : fsm.Letters()) {
23 					bool hasFinalTransition = fsm.Destinations(state, let.first).empty();
24 					for (const auto& to : fsm.Destinations(state, let.first)) {
25 						if (fsm.IsFinal(to)) {
26 							hasFinalTransition = true;
27 						}
28 					}
29 					if (!hasFinalTransition) {
30 						return false;
31 					}
32 				}
33 			}
34 		}
35 		return true;
36 	}
37 
MakeHalfFinal()38 	void HalfFinalFsm::MakeHalfFinal() {
39 		fsm.Unsparse();
40 		const auto newFinal = fsm.Size();
41 		fsm.Resize(newFinal + 1);
42 		for (unsigned letter = 0; letter < MaxChar; ++letter) {
43 			if (letter != Epsilon) {
44 				fsm.Connect(newFinal, newFinal, letter);
45 			}
46 		}
47 		for (size_t state = 0; state < fsm.Size(); ++state) {
48 			bool hasFinalTransitions = false;
49 			for (const auto& to : fsm.Destinations(state, EndMark)) {
50 				if (fsm.IsFinal(to)) {
51 					hasFinalTransitions = true;
52 					break;
53 				}
54 			}
55 			if (hasFinalTransitions) {
56 				Fsm::StatesSet destinations = fsm.Destinations(state, EndMark);
57 				for (const auto& to : destinations) {
58 					fsm.Disconnect(state, to, EndMark);
59 				}
60 				fsm.Connect(state, newFinal, EndMark);
61 			}
62 		}
63 		fsm.ClearFinal();
64 		fsm.SetFinal(newFinal, true);
65 		fsm.Sparse();
66 	}
67 
DisconnectFinals(bool allowIntersects)68 	void HalfFinalFsm::DisconnectFinals(bool allowIntersects) {
69 		fsm.Unsparse();
70 		for (size_t state = 0; state != fsm.Size(); ++state) {
71 			fsm.SetTag(state, 0);
72 			if (fsm.IsFinal(state)) {
73 				for (unsigned letter = 0; letter < MaxChar; ++letter) {
74 					Fsm::StatesSet destinations = fsm.Destinations(state, letter);
75 					for (const auto& to : destinations) {
76 						fsm.Disconnect(state, to, letter);
77 					}
78 				}
79 				if (!allowIntersects) {
80 					fsm.Connect(state, fsm.Initial());
81 				}
82 			}
83 		}
84 		if (allowIntersects) {
85 			fsm.PrependAnything();
86 		}
87 		fsm.Sparse();
88 		fsm.SetIsDetermined(false);
89 		fsm.Canonize();
90 	}
91 
MakeNonGreedyCounter(bool allowIntersects,bool simplify)92 	void HalfFinalFsm::MakeNonGreedyCounter(bool allowIntersects /* = true */, bool simplify /* = true */) {
93 		fsm.Canonize();
94 		fsm.PrependAnything();
95 		fsm.RemoveDeadEnds();
96 		fsm.Canonize();
97 		if (!allowIntersects || simplify) {
98 			DisconnectFinals(allowIntersects);
99 		}
100 	}
101 
MakeGreedyCounter(bool simplify)102 	void HalfFinalFsm::MakeGreedyCounter(bool simplify /* = true */) {
103 		fsm.Canonize();
104 		fsm.RemoveDeadEnds();
105 		size_t determineFactor = MaxCountDepth;
106 		if (simplify) {
107 			determineFactor = 1;
108 		}
109 		Determine(determineFactor);
110 		if (simplify) {
111 			fsm.Minimize();
112 		}
113 		fsm.RemoveDeadEnds();
114 	}
115 
116 	namespace Impl {
117 
118 		class HalfFinalDetermineState {
119 		public:
HalfFinalDetermineState(const Fsm & fsm,bool initial=false,size_t lastFinalCount=0)120 			HalfFinalDetermineState(const Fsm& fsm, bool initial = false, size_t lastFinalCount = 0)
121 				: mFsm(fsm)
122 				, ToAdd(0)
123 				, LastFinalCount(lastFinalCount)
124 			{
125 				if (initial) {
126 					FinishBuild(1);
127 				}
128 			}
129 
Next(Char letter,size_t maxCount) const130 			HalfFinalDetermineState Next(Char letter, size_t maxCount) const {
131 				HalfFinalDetermineState next(mFsm, false, LastFinalCount);
132 				for (const auto& state : States) {
133 					for (const auto& nextState : mFsm.Destinations(state.State, letter)) {
134 						next.AddState(nextState, state.Count, state.ReachedFinal);
135 					}
136 				}
137 				next.FinishBuild(maxCount, States.back().Count);
138 				if (letter == EndMark) {
139 					next.ToAdd += next.LastFinalCount;
140 					next.LastFinalCount = 0;
141 					next.States.clear();
142 					next.AddState(mFsm.Initial(), 0, false, true);
143 					return next;
144 				}
145 				return next;
146 			}
147 
CopyData(Fsm & newFsm,size_t index) const148 			void CopyData(Fsm& newFsm, size_t index) const {
149 				if (ToAdd) {
150 					newFsm.SetFinal(index, true);
151 					newFsm.SetTag(index, ToAdd);
152 				}
153 			}
154 
operator <(const HalfFinalDetermineState & otherState) const155 			bool operator<(const HalfFinalDetermineState& otherState) const {
156 				if (ToAdd != otherState.ToAdd) {
157 					return ToAdd < otherState.ToAdd;
158 				}
159 				if (LastFinalCount != otherState.LastFinalCount) {
160 					return LastFinalCount < otherState.LastFinalCount;
161 				}
162 				return States < otherState.States;
163 			}
164 
165 			struct StateHolder {
166 				size_t State;
167 				size_t Count;
168 				bool ReachedFinal;
169 
operator <Pire::Impl::HalfFinalDetermineState::StateHolder170 				bool operator<(const StateHolder& other) const {
171 					if (State != other.State) {
172 						return State < other.State;
173 					}
174 					if (Count != other.Count) {
175 						return Count < other.Count;
176 					}
177 					return ReachedFinal < other.ReachedFinal;
178 				}
179 			};
180 
181 		private:
182 			const Fsm& mFsm;
183 			TVector<StateHolder> States;
184 			size_t ToAdd;
185 			size_t LastFinalCount;
186 
AddState(size_t state,size_t count,bool reachedFinal,bool force=false)187 			void AddState(size_t state, size_t count, bool reachedFinal, bool force = false) {
188 				size_t newLastFinalCount = LastFinalCount;
189 				if (mFsm.IsFinal(state) && !reachedFinal) {
190 					++count;
191 					reachedFinal = true;
192 					newLastFinalCount = count;
193 				}
194 				for (const auto& addedState : States) {
195 					if (addedState.State == state) {
196 						return;
197 					}
198 				}
199 				if (States.empty() || !mFsm.IsFinal(States.back().State) || force) {
200 					States.push_back({state, count, reachedFinal});
201 					LastFinalCount = newLastFinalCount;
202 				}
203 			}
204 
FinishBuild(size_t maxCount,size_t lastCount=0)205 			void FinishBuild(size_t maxCount, size_t lastCount = 0) {
206 				if (!States.empty() && mFsm.IsFinal(States.back().State)) {
207 					lastCount = States.back().Count;
208 				}
209 				AddState(mFsm.Initial(), lastCount, false, true);
210 				LastFinalCount = std::min(LastFinalCount, maxCount);
211 				size_t minCount = States[0].Count;
212 				for (auto& state : States) {
213 					if (state.Count > maxCount) {
214 						state.Count = maxCount;
215 					}
216 					minCount = std::min(state.Count, minCount);
217 				}
218 				ToAdd = minCount;
219 				for (auto& state : States) {
220 					state.Count -= minCount;
221 				}
222 				LastFinalCount -= minCount;
223 			}
224 		};
225 
226 		class HalfFinalDetermineTask {
227 		public:
228 			typedef HalfFinalDetermineState State;
229 			typedef Fsm::LettersTbl LettersTbl;
230 			typedef TMap<State, size_t> InvStates;
231 
HalfFinalDetermineTask(const Fsm & fsm,size_t maxCount)232 			HalfFinalDetermineTask(const Fsm& fsm, size_t maxCount)
233 				: mFsm(fsm)
234 				, MaxCount(maxCount)
235 			{
236 				size_t oldSize = mFsm.Size();
237 				mFsm.Import(fsm);
238 				mFsm.Unsparse();
239 				for (size_t state = 0; state < mFsm.Size(); ++state) {
240 					for (Char letter = 0; letter < MaxChar; ++letter) {
241 						Fsm::StatesSet destinations = mFsm.Destinations(state, letter);
242 						for (const auto destination : destinations) {
243 							size_t newDestination = destination % oldSize;
244 							if (letter == EndMark) {
245 								newDestination += oldSize;
246 							}
247 							if (destination != newDestination) {
248 								mFsm.Disconnect(state, destination, letter);
249 								mFsm.Connect(state, newDestination, letter);
250 							}
251 						}
252 					}
253 					if (mFsm.Destinations(state, EndMark).size() == 0) {
254 						mFsm.Connect(state, oldSize + mFsm.Initial(), EndMark);
255 					}
256 				}
257 				mFsm.Sparse();
258 			}
259 
Letters() const260 			const LettersTbl& Letters() const { return mFsm.Letters(); }
261 
Initial() const262 			State Initial() const {
263 				return State(mFsm, true);
264 			}
265 
Next(const State & state,Char letter) const266 			State Next(const State& state, Char letter) const {
267 				return state.Next(letter, MaxCount);
268 			}
269 
IsRequired(const State &) const270 			bool IsRequired(const State& /*state*/) const { return true; }
271 
AcceptStates(const TVector<State> & newStates)272 			void AcceptStates(const TVector<State>& newStates) {
273 				mNewFsm.Resize(newStates.size());
274 				mNewFsm.SetInitial(0);
275 				mNewFsm.SetIsDetermined(true);
276 				mNewFsm.letters = Letters();
277 				mNewFsm.ClearFinal();
278 				for (size_t i = 0; i < newStates.size(); i++) {
279 					newStates[i].CopyData(mNewFsm, i);
280 				}
281 			}
282 
Connect(size_t from,size_t to,Char letter)283 			void Connect(size_t from, size_t to, Char letter) {
284 				Y_ASSERT(mNewFsm.Destinations(from, letter).size() == 0);
285 				mNewFsm.Connect(from, to, letter);
286 			}
287 
288 			typedef bool Result;
289 
Success()290 			Result Success() { return true; }
291 
Failure()292 			Result Failure() { return false; }
293 
Output()294 			Fsm& Output() { return mNewFsm; }
295 
SetMaxCount(size_t maxCount)296 			void SetMaxCount(size_t maxCount) {
297 				MaxCount = maxCount;
298 			}
299 
300 		private:
301 			Fsm mFsm;
302 			size_t MaxCount;
303 			Fsm mNewFsm;
304 		};
305 	}
306 
Determine(size_t depth)307 	void HalfFinalFsm::Determine(size_t depth) {
308 		static const unsigned MaxSize = 200000;
309 
310 		Impl::HalfFinalDetermineTask task(fsm, depth);
311 		if (!Pire::Impl::Determine(task, MaxSize)) {
312 			task.SetMaxCount(1);
313 			Pire::Impl::Determine(task, MaxSize);
314 		}
315 
316 		task.Output().Swap(fsm);
317 	}
318 
GetCount(size_t state) const319 	size_t HalfFinalFsm::GetCount(size_t state) const {
320 		if (fsm.IsFinal(state)) {
321 			if (fsm.Tag(state)) {
322 				return fsm.Tag(state);
323 			} else {
324 				return 1;
325 			}
326 		}
327 		return 0;
328 	}
329 
GetTotalCount() const330 	size_t HalfFinalFsm::GetTotalCount() const {
331 		size_t count = 0;
332 		for (size_t state = 0; state < fsm.Size(); ++state) {
333 			count += GetCount(state);
334 		}
335 		return count;
336 	}
337 }
338