1------------------------------------------------------------------------------ 2-- -- 3-- Matreshka Project -- 4-- -- 5-- Localization, Internationalization, Globalization for Ada -- 6-- -- 7-- Runtime Library Component -- 8-- -- 9------------------------------------------------------------------------------ 10-- -- 11-- Copyright © 2010, Vadim Godunko <vgodunko@gmail.com> -- 12-- All rights reserved. -- 13-- -- 14-- Redistribution and use in source and binary forms, with or without -- 15-- modification, are permitted provided that the following conditions -- 16-- are met: -- 17-- -- 18-- * Redistributions of source code must retain the above copyright -- 19-- notice, this list of conditions and the following disclaimer. -- 20-- -- 21-- * Redistributions in binary form must reproduce the above copyright -- 22-- notice, this list of conditions and the following disclaimer in the -- 23-- documentation and/or other materials provided with the distribution. -- 24-- -- 25-- * Neither the name of the Vadim Godunko, IE nor the names of its -- 26-- contributors may be used to endorse or promote products derived from -- 27-- this software without specific prior written permission. -- 28-- -- 29-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -- 30-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -- 31-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -- 32-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -- 33-- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -- 34-- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -- 35-- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -- 36-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- 37-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -- 38-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- 39-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -- 40-- -- 41------------------------------------------------------------------------------ 42-- $Revision: 527 $ $Date: 2010-06-16 14:22:28 +0400 (Wed, 16 Jun 2010) $ 43------------------------------------------------------------------------------ 44 45package Matreshka.Internals.Regexps.Compiler is 46 47 pragma Preelaborate; 48 49 type YY_Errors is 50 (No_Error, 51 Unexpected_End_Of_Literal, 52 Unexpected_End_Of_Character_Class, 53 Unexpected_Character_in_Multiplicity_Specifier, 54 Unexpected_End_Of_Multiplicity_Specifier, 55 Unexpected_End_Of_Property_Specification, 56 Unrecognized_Character_In_Property_Specification, 57 Unescaped_Pattern_Syntax_Character, 58 Expression_Syntax_Error); 59 60 type YY_Error_Information is record 61 Error : YY_Errors; 62 Index : Natural; 63 end record; 64 65 type Property_Specification_Keyword is 66 (ASCII_Hex_Digit, -- Names of binary properties 67 Alphabetic, 68 Bidi_Control, 69-- Bidi_Mirrored, 70 Cased, 71 Case_Ignorable, 72 Changes_When_Casefolded, 73 Changes_When_Casemapped, 74 Changes_When_NFKC_Casefolded, 75 Changes_When_Lowercased, 76 Changes_When_Titlecased, 77 Changes_When_Uppercased, 78 Composition_Exclusion, 79 Full_Composition_Exclusion, 80 Dash, 81 Deprecated, 82 Default_Ignorable_Code_Point, 83 Diacritic, 84 Extender, 85 Grapheme_Base, 86 Grapheme_Extend, 87 Grapheme_Link, 88 Hex_Digit, 89 Hyphen, 90 ID_Continue, 91 Ideographic, 92 ID_Start, 93 IDS_Binary_Operator, 94 IDS_Trinary_Operator, 95 Join_Control, 96 Logical_Order_Exception, 97 Lowercase, 98 Math, 99 Noncharacter_Code_Point, 100 Other_Alphabetic, 101 Other_Default_Ignorable_Code_Point, 102 Other_Grapheme_Extend, 103 Other_ID_Continue, 104 Other_ID_Start, 105 Other_Lowercase, 106 Other_Math, 107 Other_Uppercase, 108 Pattern_Syntax, 109 Pattern_White_Space, 110 Quotation_Mark, 111 Radical, 112 Soft_Dotted, 113 STerm, 114 Terminal_Punctuation, 115 Unified_Ideograph, 116 Uppercase, 117 Variation_Selector, 118 White_Space, 119 XID_Continue, 120 XID_Start, 121 Expands_On_NFC, 122 Expands_On_NFD, 123 Expands_On_NFKC, 124 Expands_On_NFKD, 125 126 Other, -- Values of the General Category 127 Control, 128 Format, 129 Unassigned, 130 Private_Use, 131 Surrogate, 132 Letter, 133 Cased_Letter, 134 Lowercase_Letter, 135 Modifier_Letter, 136 Other_Letter, 137 Titlecase_Letter, 138 Uppercase_Letter, 139 Mark, 140 Spacing_Mark, 141 Enclosing_Mark, 142 Nonspacing_Mark, 143 Number, 144 Decimal_Number, 145 Letter_Number, 146 Other_Number, 147 Punctuation, 148 Connector_Punctuation, 149 Dash_Punctuation, 150 Close_Punctuation, 151 Final_Punctuation, 152 Initial_Punctuation, 153 Other_Punctuation, 154 Open_Punctuation, 155 Symbol, 156 Currency_Symbol, 157 Modifier_Symbol, 158 Math_Symbol, 159 Other_Symbol, 160 Separator, 161 Line_Separator, 162 Paragraph_Separator, 163 Space_Separator); 164 165 type Kinds is 166 (None, 167 Match_Code_Point, 168 Number, 169 Property_Keyword, 170 AST_Node); 171 172 type YYSType (Kind : Kinds := None) is record 173 case Kind is 174 when None => 175 null; 176 177 when Match_Code_Point => 178 Code : Wide_Wide_Character; 179 180 when Number => 181 Value : Natural; 182 183 when Property_Keyword => 184 Keyword : Property_Specification_Keyword; 185 186 when AST_Node => 187 Node : Positive; 188 end case; 189 end record; 190 191 type Token is 192 (End_Of_Input, 193 Error, 194 Token_Code_Point, 195 Token_Any_Code_Point, 196 Token_Alternation, 197 Token_Optional_Greedy, 198 Token_Optional_Lazy, 199 Token_Zero_Or_More_Greedy, 200 Token_Zero_Or_More_Lazy, 201 Token_One_Or_More_Greedy, 202 Token_One_Or_More_Lazy, 203 Token_Character_Class_Begin, 204 Token_Character_Class_End, 205 Token_Negate_Character_Class, 206 Token_Character_Class_Range, 207 Token_Multiplicity_Begin, 208 Token_Multiplicity_End_Greedy, 209 Token_Multiplicity_End_Lazy, 210 Token_Multiplicity_Comma, 211 Token_Multiplicity_Number, 212 Token_Subexpression_Capture_Begin, 213 Token_Subexpression_Begin, 214 Token_Subexpression_End, 215 Token_Property_Begin_Positive, 216 Token_Property_Begin_Negative, 217 Token_Property_End, 218 Token_Property_Keyword, 219 Token_Start_Of_Line, 220 Token_End_Of_Line); 221 222 -- Here is global state of the compiler. At the first stage of 223 -- refactoring all global state variables must be moved to here. 224 -- Later, they will be wrapped by record type to allow to have 225 -- several compiler in the different threads at the same time. 226 227 type Compiler_State is record 228 Data : Matreshka.Internals.Strings.Shared_String_Access; 229 YY_Start_State : Integer := 1; 230 YY_Current_Position : Matreshka.Internals.Utf16.Utf16_String_Index := 0; 231 YY_Current_Index : Positive := 1; 232 YY_Error : YY_Error_Information := (No_Error, 0); 233 YYLVal : YYSType; 234 YYVal : YYSType; 235 Character_Class_Mode : Boolean := False; 236 -- Recognition of the Unicode property specification is done in the 237 -- separate scanner's mode; this variable is used to switch back to 238 -- original mode. 239 end record; 240 241 procedure YYError 242 (Self : not null access Compiler_State; 243 Error : YY_Errors; 244 Index : Natural); 245 -- Report error. 246 247 procedure Attach 248 (Pattern : in out Shared_Pattern; Head : Positive; Node : Positive); 249 -- Attach Node to the list of nodes, started by Head. 250 251 function Compile 252 (Expression : not null Matreshka.Internals.Strings.Shared_String_Access) 253 return not null Shared_Pattern_Access; 254 255 function Create_Alternative 256 (Pattern : not null Shared_Pattern_Access; 257 Prefered : Positive; 258 Alternative : Positive) return Positive; 259 pragma Inline (Create_Alternative); 260 261 function Create_Anchor_End_Of_Line 262 (Pattern : not null Shared_Pattern_Access) return Positive; 263 pragma Inline (Create_Anchor_End_Of_Line); 264 265 function Create_Anchor_Start_Of_Line 266 (Pattern : not null Shared_Pattern_Access) return Positive; 267 pragma Inline (Create_Anchor_Start_Of_Line); 268 269 function Create_Character_Class 270 (Pattern : not null Shared_Pattern_Access) return Positive; 271 pragma Inline (Create_Character_Class); 272 273 function Create_Match_Any 274 (Pattern : not null Shared_Pattern_Access) return Positive; 275 pragma Inline (Create_Match_Any); 276 277 function Create_Match_Character 278 (Pattern : not null Shared_Pattern_Access; 279 Character : Matreshka.Internals.Unicode.Code_Point) return Positive; 280 pragma Inline (Create_Match_Character); 281 282 function Create_Match_Property 283 (Pattern : not null Shared_Pattern_Access; 284 Value : Matreshka.Internals.Unicode.Ucd.Boolean_Properties; 285 Negative : Boolean) return Positive; 286 pragma Inline (Create_Match_Property); 287 288 function Create_Match_Property 289 (Pattern : not null Shared_Pattern_Access; 290 Value : General_Category_Flags; 291 Negative : Boolean) return Positive; 292 pragma Inline (Create_Match_Property); 293 294 procedure Create_Member_Character 295 (Pattern : not null Shared_Pattern_Access; 296 Class : Positive; 297 Character : Matreshka.Internals.Unicode.Code_Point); 298 pragma Inline (Create_Member_Character); 299 300 procedure Create_Member_Property 301 (Pattern : not null Shared_Pattern_Access; 302 Class : Positive; 303 Value : Matreshka.Internals.Unicode.Ucd.Boolean_Properties; 304 Negative : Boolean); 305 pragma Inline (Create_Member_Property); 306 307 procedure Create_Member_Property 308 (Pattern : not null Shared_Pattern_Access; 309 Class : Positive; 310 Value : General_Category_Flags; 311 Negative : Boolean); 312 pragma Inline (Create_Member_Property); 313 314 procedure Create_Member_Range 315 (Pattern : not null Shared_Pattern_Access; 316 Class : Positive; 317 Low : Matreshka.Internals.Unicode.Code_Point; 318 High : Matreshka.Internals.Unicode.Code_Point); 319 pragma Inline (Create_Member_Range); 320 321 function Create_Repetition 322 (Pattern : not null Shared_Pattern_Access; 323 Expression : Positive; 324 Lower : Natural; 325 Upper : Natural; 326 Greedy : Boolean) return Positive; 327 pragma Inline (Create_Repetition); 328 329 function Create_Subexpression 330 (Pattern : not null Shared_Pattern_Access; 331 Expression : Positive; 332 Capture : Boolean) return Positive; 333 pragma Inline (Create_Subexpression); 334 335 function Get_Preferred 336 (Pattern : not null Shared_Pattern_Access; 337 Node : Positive) return Natural; 338 pragma Inline (Get_Preferred); 339 340 function Get_Fallback 341 (Pattern : not null Shared_Pattern_Access; 342 Node : Positive) return Natural; 343 pragma Inline (Get_Fallback); 344 345 function Get_Members 346 (Pattern : not null Shared_Pattern_Access; 347 Node : Positive) return Natural; 348 pragma Inline (Get_Members); 349 350 function Get_Expression 351 (Pattern : not null Shared_Pattern_Access; 352 Node : Positive) return Natural; 353 pragma Inline (Get_Expression); 354 355 function Get_Lower_Bound 356 (Pattern : not null Shared_Pattern_Access; 357 Node : Positive) return Natural; 358 pragma Inline (Get_Lower_Bound); 359 360 function Get_Upper_Bound 361 (Pattern : not null Shared_Pattern_Access; 362 Node : Positive) return Natural; 363 pragma Inline (Get_Upper_Bound); 364 365 function Get_Previous_Sibling 366 (Pattern : not null Shared_Pattern_Access; 367 Node : Positive) return Natural; 368 pragma Inline (Get_Previous_Sibling); 369 370 function Get_Next_Sibling 371 (Pattern : not null Shared_Pattern_Access; 372 Node : Positive) return Natural; 373 pragma Inline (Get_Next_Sibling); 374 375end Matreshka.Internals.Regexps.Compiler; 376