1 /* 2 * loaded.h -- a definition of the LoadedScanner 3 * 4 * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, 5 * Alexander Gololobov <agololobov@gmail.com> 6 * 7 * This file is part of Pire, the Perl Incompatible 8 * Regular Expressions library. 9 * 10 * Pire is free software: you can redistribute it and/or modify 11 * it under the terms of the GNU Lesser Public License as published by 12 * the Free Software Foundation, either version 3 of the License, or 13 * (at your option) any later version. 14 * 15 * Pire is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser Public License for more details. 19 * You should have received a copy of the GNU Lesser Public License 20 * along with Pire. If not, see <http://www.gnu.org/licenses>. 21 */ 22 23 24 #ifndef PIRE_SCANNERS_LOADED_H 25 #define PIRE_SCANNERS_LOADED_H 26 27 #include <string.h> 28 #include "common.h" 29 #include "../approx_matching.h" 30 #include "../fsm.h" 31 #include "../partition.h" 32 33 #ifdef PIRE_DEBUG 34 #include <iostream> 35 #endif 36 37 namespace Pire { 38 39 /** 40 * A loaded scanner -- the deterministic scanner having actions 41 * associated with states and transitions 42 * 43 * Not a complete scanner itself (hence abstract), this class provides 44 * infrastructure for regexp-based algorithms (e.g. counts or captures), 45 * supporting major part of scanner construction, (de)serialization, 46 * mmap()-ing, etc. 47 * 48 * It is a good idea to override copy ctor, operator= and swap() 49 * in subclasses to avoid mixing different scanner types in these methods. 50 * Also please note that subclasses should not have any data members of thier own. 51 */ 52 class LoadedScanner { 53 public: 54 typedef ui8 Letter; 55 typedef ui32 Action; 56 typedef ui8 Tag; 57 58 typedef size_t InternalState; 59 60 union Transition { 61 size_t raw; // alignment hint for compiler 62 struct { 63 ui32 shift; 64 Action action; 65 }; 66 }; 67 68 // Override in subclass, if neccessary 69 enum { 70 FinalFlag = 0, 71 DeadFlag = 0 72 }; 73 74 static const size_t MAX_RE_COUNT = 16; 75 76 protected: LoadedScanner()77 LoadedScanner() { Alias(Null()); } 78 LoadedScanner(const LoadedScanner & s)79 LoadedScanner(const LoadedScanner& s): m(s.m) 80 { 81 if (s.m_buffer) { 82 m_buffer = BufferType(new char [BufSize()]); 83 memcpy(m_buffer.get(), s.m_buffer.get(), BufSize()); 84 Markup(m_buffer.get()); 85 m.initial = (InternalState)m_jumps + (s.m.initial - (InternalState)s.m_jumps); 86 } else { 87 Alias(s); 88 } 89 } 90 Swap(LoadedScanner & s)91 void Swap(LoadedScanner& s) 92 { 93 DoSwap(m_buffer, s.m_buffer); 94 DoSwap(m.statesCount, s.m.statesCount); 95 DoSwap(m.lettersCount, s.m.lettersCount); 96 DoSwap(m.regexpsCount, s.m.regexpsCount); 97 DoSwap(m.initial, s.m.initial); 98 DoSwap(m_letters, s.m_letters); 99 DoSwap(m_jumps, s.m_jumps); 100 DoSwap(m_tags, s.m_tags); 101 } 102 103 LoadedScanner& operator = (const LoadedScanner& s) { LoadedScanner(s).Swap(*this); return *this; } LoadedScanner(LoadedScanner && other)104 LoadedScanner (LoadedScanner&& other) : LoadedScanner() { 105 Swap(other); 106 } 107 LoadedScanner& operator=(LoadedScanner&& other) { 108 Swap(other); 109 return *this; 110 } 111 112 public: Size()113 size_t Size() const { return m.statesCount; } 114 Empty()115 bool Empty() const { return m_jumps == Null().m_jumps; } 116 RegexpsCount()117 size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; } 118 LettersCount()119 size_t LettersCount() const { return m.lettersCount; } 120 Mmap(const void * ptr,size_t size)121 const void* Mmap(const void* ptr, size_t size) { 122 return Mmap(ptr, size, nullptr); 123 } 124 Mmap(const void * ptr,size_t size,ui32 * type)125 const void* Mmap(const void* ptr, size_t size, ui32* type) 126 { 127 Impl::CheckAlign(ptr); 128 LoadedScanner s; 129 const size_t* p = reinterpret_cast<const size_t*>(ptr); 130 Header header = Impl::ValidateHeader(p, size, ScannerIOTypes::LoadedScanner, sizeof(s.m)); 131 if (type) { 132 *type = header.Type; 133 } 134 135 Locals* locals; 136 Impl::MapPtr(locals, 1, p, size); 137 memcpy(&s.m, locals, sizeof(s.m)); 138 139 Impl::MapPtr(s.m_letters, MaxChar, p, size); 140 Impl::MapPtr(s.m_jumps, s.m.statesCount * s.m.lettersCount, p, size); 141 if (header.Version == Header::RE_VERSION_WITH_MACTIONS) { 142 Action* actions = 0; 143 Impl::MapPtr(actions, s.m.statesCount * s.m.lettersCount, p, size); 144 } 145 Impl::MapPtr(s.m_tags, s.m.statesCount, p, size); 146 147 s.m.initial += reinterpret_cast<size_t>(s.m_jumps); 148 Swap(s); 149 150 return (const void*) p; 151 } 152 153 void Save(yostream*, ui32 type) const; 154 void Save(yostream*) const; 155 void Load(yistream*, ui32* type); 156 void Load(yistream*); 157 158 template<class Eq> 159 void Init(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1) 160 { 161 m.statesCount = states; 162 m.lettersCount = letters.Size(); 163 m.regexpsCount = regexpsCount; 164 m_buffer = BufferType(new char[BufSize()]); 165 memset(m_buffer.get(), 0, BufSize()); 166 Markup(m_buffer.get()); 167 168 m.initial = reinterpret_cast<size_t>(m_jumps + startState * m.lettersCount); 169 170 // Build letter translation table 171 Fill(m_letters, m_letters + sizeof(m_letters)/sizeof(*m_letters), 0); 172 for (auto&& letter : letters) 173 for (auto&& character : letter.second.second) 174 m_letters[character] = letter.second.first; 175 } 176 StateSize()177 size_t StateSize() const 178 { 179 return m.lettersCount * sizeof(*m_jumps); 180 } 181 TransitionIndex(size_t state,Char c)182 size_t TransitionIndex(size_t state, Char c) const 183 { 184 return state * m.lettersCount + m_letters[c]; 185 } 186 SetJump(size_t oldState,Char c,size_t newState,Action action)187 void SetJump(size_t oldState, Char c, size_t newState, Action action) 188 { 189 Y_ASSERT(m_buffer); 190 Y_ASSERT(oldState < m.statesCount); 191 Y_ASSERT(newState < m.statesCount); 192 193 size_t shift = (newState - oldState) * StateSize(); 194 Transition tr; 195 tr.shift = shift; 196 tr.action = action; 197 m_jumps[TransitionIndex(oldState, c)] = tr; 198 } 199 RemapAction(Action action)200 Action RemapAction(Action action) { return action; } 201 SetInitial(size_t state)202 void SetInitial(size_t state) { Y_ASSERT(m_buffer); m.initial = reinterpret_cast<size_t>(m_jumps + state * m.lettersCount); } SetTag(size_t state,Tag tag)203 void SetTag(size_t state, Tag tag) { Y_ASSERT(m_buffer); m_tags[state] = tag; } FinishBuild()204 void FinishBuild() {} 205 StateIdx(InternalState s)206 size_t StateIdx(InternalState s) const 207 { 208 return (reinterpret_cast<Transition*>(s) - m_jumps) / m.lettersCount; 209 } 210 SignExtend(i32 i)211 i64 SignExtend(i32 i) const { return i; } 212 BufSize()213 size_t BufSize() const 214 { 215 return 216 MaxChar * sizeof(*m_letters) 217 + m.statesCount * StateSize() 218 + m.statesCount * sizeof(*m_tags) 219 ; 220 } 221 222 protected: 223 224 static const Action IncrementMask = (1 << MAX_RE_COUNT) - 1; 225 static const Action ResetMask = IncrementMask << MAX_RE_COUNT; 226 227 // TODO: maybe, put fields in private section and provide data accessors 228 229 struct Locals { 230 ui32 statesCount; 231 ui32 lettersCount; 232 ui32 regexpsCount; 233 size_t initial; 234 } m; 235 236 using BufferType = std::unique_ptr<char[]>; 237 BufferType m_buffer; 238 239 Letter* m_letters; 240 Transition* m_jumps; 241 Tag* m_tags; 242 243 virtual ~LoadedScanner(); 244 245 private: 246 explicit LoadedScanner(Fsm& fsm, size_t distance = 0) 247 { 248 if (distance) { 249 fsm = CreateApproxFsm(fsm, distance); 250 } 251 fsm.Canonize(); 252 Init(fsm.Size(), fsm.Letters(), fsm.Initial()); 253 BuildScanner(fsm, *this); 254 } 255 256 // Only used to force Null() call during static initialization, when Null()::n can be 257 // initialized safely by compilers that don't support thread safe static local vars 258 // initialization 259 static const LoadedScanner* m_null; 260 Null()261 inline static const LoadedScanner& Null() 262 { 263 static const LoadedScanner n = Fsm::MakeFalse().Compile<LoadedScanner>(); 264 return n; 265 } 266 Markup(void * buf)267 void Markup(void* buf) 268 { 269 m_letters = reinterpret_cast<Letter*>(buf); 270 m_jumps = reinterpret_cast<Transition*>(m_letters + MaxChar); 271 m_tags = reinterpret_cast<Tag*>(m_jumps + m.statesCount * m.lettersCount); 272 } 273 Alias(const LoadedScanner & s)274 void Alias(const LoadedScanner& s) 275 { 276 memcpy(&m, &s.m, sizeof(m)); 277 m_buffer = 0; 278 m_letters = s.m_letters; 279 m_jumps = s.m_jumps; 280 m_tags = s.m_tags; 281 } 282 283 template<class Eq> 284 LoadedScanner(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1) 285 { 286 Init(states, letters, startState, regexpsCount); 287 } 288 289 friend class Fsm; 290 }; 291 292 inline LoadedScanner::~LoadedScanner() = default; 293 294 } 295 296 297 #endif 298