1module m_sax_fsm 2! 3use m_sax_buffer 4use m_sax_dictionary 5use m_sax_charset 6use m_sax_entities 7use m_sax_elstack 8 9private 10 11type, public :: fsm_t 12 ! 13 ! Contains information about the "finite state machine" 14 ! Some of the components (marked *) could at this point be made into 15 ! saved module variables. 16 ! 17 ! 18 integer :: state 19 integer :: context 20 integer :: nbrackets !* 21 integer :: nlts !* 22 character(len=1) :: quote_char !* 23 type(buffer_t) :: buffer !* 24 type(buffer_t) :: tmpbuf !* 25 type(buffer_t) :: element_name 26 type(dictionary_t) :: attributes 27 type(buffer_t) :: pcdata 28 logical :: entities_in_pcdata 29 logical :: entities_in_attributes 30 type(elstack_t) :: element_stack 31 logical :: root_element_seen 32 type(buffer_t) :: root_element_name 33 character(len=150) :: action 34 logical :: debug 35end type fsm_t 36 37public :: init_fsm, reset_fsm, evolve_fsm 38 39! 40! State parameters 41! 42integer, parameter, public :: ERROR = -1 43integer, parameter, public :: INIT = 1 44integer, parameter, private :: START_TAG_MARKER = 2 45integer, parameter, private :: END_TAG_MARKER = 3 46integer, parameter, private :: IN_NAME = 4 47integer, parameter, private :: WHITESPACE_IN_TAG = 5 48integer, parameter, private :: IN_PCDATA = 6 49integer, parameter, private :: SINGLETAG_MARKER = 7 50integer, parameter, private :: CLOSINGTAG_MARKER = 8 51integer, parameter, private :: IN_COMMENT = 9 52integer, parameter, private :: IN_ATT_NAME = 10 53integer, parameter, private :: IN_ATT_VALUE = 11 54integer, parameter, private :: EQUAL = 12 55integer, parameter, private :: SPACE_AFTER_EQUAL = 13 56integer, parameter, private :: SPACE_BEFORE_EQUAL = 14 57integer, parameter, private :: START_QUOTE = 15 58integer, parameter, private :: END_QUOTE = 16 59integer, parameter, private :: BANG = 17 60integer, parameter, private :: BANG_HYPHEN = 18 61integer, parameter, private :: ONE_HYPHEN = 19 62integer, parameter, private :: TWO_HYPHEN = 20 63integer, parameter, private :: QUESTION_MARK = 21 64integer, parameter, private :: START_XML_DECLARATION = 22 65integer, parameter, private :: IN_SGML_DECLARATION = 23 66integer, parameter, private :: IN_CDATA_SECTION = 24 67integer, parameter, private :: ONE_BRACKET = 25 68integer, parameter, private :: TWO_BRACKET = 26 69integer, parameter, private :: CDATA_PREAMBLE = 27 70integer, parameter, private :: IN_PCDATA_AT_EOL = 30 71! 72! Context parameters 73! 74integer, parameter, public :: OPENING_TAG = 100 75integer, parameter, public :: CLOSING_TAG = 110 76integer, parameter, public :: SINGLE_TAG = 120 77integer, parameter, public :: COMMENT_TAG = 130 78integer, parameter, public :: XML_DECLARATION_TAG = 140 79integer, parameter, public :: SGML_DECLARATION_TAG = 150 80integer, parameter, public :: CDATA_SECTION_TAG = 160 81integer, parameter, public :: NULL_CONTEXT = 200 82! 83! Signal parameters 84! 85integer, parameter, public :: QUIET = 1000 86integer, parameter, public :: END_OF_TAG = 1100 87integer, parameter, public :: CHUNK_OF_PCDATA = 1200 88integer, parameter, public :: CHUNK_OF_CDATA_SECTION = 1300 89integer, parameter, public :: EXCEPTION = 1500 90 91CONTAINS 92 93!------------------------------------------------------------ 94! Initialize once and for all the derived types (Fortran90 restriction) 95! 96subroutine init_fsm(fx) 97type(fsm_t), intent(inout) :: fx 98 99 fx%state = INIT 100 call setup_xml_charsets() 101 fx%context = NULL_CONTEXT 102 call init_elstack(fx%element_stack) 103 fx%root_element_seen = .false. 104 fx%debug = .false. 105 fx%entities_in_pcdata = .false. 106 fx%entities_in_attributes = .false. 107 fx%action = "" 108 call init_buffer(fx%buffer) 109 call init_buffer(fx%element_name) 110 call init_buffer(fx%pcdata) 111 call init_buffer(fx%root_element_name) 112 call init_dict(fx%attributes) 113end subroutine init_fsm 114!------------------------------------------------------------ 115subroutine reset_fsm(fx) 116type(fsm_t), intent(inout) :: fx 117 118 fx%state = INIT 119 call setup_xml_charsets() 120 fx%context = NULL_CONTEXT 121 call reset_elstack(fx%element_stack) 122 fx%action = "" 123 fx%root_element_seen = .false. 124 call reset_buffer(fx%buffer) 125 call reset_buffer(fx%element_name) 126 call reset_buffer(fx%pcdata) 127 call reset_buffer(fx%root_element_name) 128 call reset_dict(fx%attributes) 129end subroutine reset_fsm 130 131!------------------------------------------------------------ 132subroutine evolve_fsm(fx,c,signal) 133! 134! Finite-state machine evolution rules for XML parsing. 135! 136type(fsm_t), intent(inout) :: fx ! Internal state 137character(len=1), intent(in) :: c 138integer, intent(out) :: signal 139 140! 141! Reset signal 142! 143signal = QUIET 144! 145 146if (.not. (c .in. valid_chars)) then 147! 148! Let it pass (in case the underlying encoding is UTF-8) 149! But this chars in a name will cause havoc 150! 151! signal = EXCEPTION 152! fx%state = ERROR 153! fx%action = trim("Not a valid character in simple encoding: "//c) 154! RETURN 155endif 156 157select case(fx%state) 158 159 case (INIT) 160 if (c == "<") then 161 fx%state = START_TAG_MARKER 162 if (fx%debug) fx%action = ("Starting tag") 163 else if (c == ">") then 164 fx%state = ERROR 165 fx%action = ("Ending tag without being in one!") 166 else 167 if (fx%debug) fx%action = ("Reading garbage chars") 168 endif 169 170 case (START_TAG_MARKER) 171 if (c == ">") then 172 fx%state = ERROR 173 fx%action = ("Tag empty!") 174 else if (c == "<") then 175 fx%state = ERROR 176 fx%action = ("Double opening of tag!!") 177 else if (c == "/") then 178 fx%state = CLOSINGTAG_MARKER 179 if (fx%debug) fx%action = ("Starting endtag: ") 180 fx%context = CLOSING_TAG 181 else if (c == "?") then 182 fx%state = START_XML_DECLARATION 183 if (fx%debug) fx%action = ("Starting XML declaration ") 184 fx%context = XML_DECLARATION_TAG 185 else if (c == "!") then 186 fx%state = BANG 187 if (fx%debug) fx%action = ("Saw ! -- comment or SGML declaration expected...") 188 else if (c .in. whitespace) then 189 fx%state = ERROR 190 fx%action = ("Cannot have whitespace after <") 191 else if (c .in. initial_name_chars) then 192 fx%context = OPENING_TAG 193 fx%state = IN_NAME 194 call add_to_buffer(c,fx%buffer) 195 if (fx%debug) fx%action = ("Starting to read name in tag") 196 else 197 fx%state = ERROR 198 fx%action = ("Illegal initial character for name") 199 endif 200 201 202 case (BANG) 203 if (c == "-") then 204 fx%state = BANG_HYPHEN 205 if (fx%debug) fx%action = ("Almost ready to start comment ") 206 else if (c .in. uppercase_chars) then 207 fx%state = IN_SGML_DECLARATION 208 fx%nlts = 0 209 fx%nbrackets = 0 210 if (fx%debug) fx%action = ("SGML declaration ") 211 fx%context = SGML_DECLARATION_TAG 212 call add_to_buffer(c,fx%buffer) 213 else if (c == "[") then 214 fx%state = CDATA_PREAMBLE 215 if (fx%debug) fx%action = ("Declaration with [ ") 216 fx%context = CDATA_SECTION_TAG 217 else 218 fx%state = ERROR 219 fx%action = ("Wrong character after ! ") 220 endif 221 222 case (CDATA_PREAMBLE) 223 ! We assume a CDATA[ is forthcoming, we do not check 224 if (c == "[") then 225 fx%state = IN_CDATA_SECTION 226 if (fx%debug) fx%action = ("About to start reading CDATA contents") 227 else if (c == "]") then 228 fx%state = ERROR 229 fx%action = ("Unexpected ] in CDATA preamble") 230 else 231 if (fx%debug) fx%action = ("Reading CDATA preamble") 232 endif 233 234 case (IN_CDATA_SECTION) 235 if (c == "]") then 236 fx%state = ONE_BRACKET 237 if (fx%debug) fx%action = ("Saw a ] in CDATA section") 238 else 239 ! Check whether we are close to the end of the buffer. 240 ! If so, make a chunk and reset the buffer 241 ! Note that we do not split in whitespace (there might not be) 242 call add_to_buffer(c,fx%buffer) 243 if (buffer_nearly_full(fx%buffer)) then 244 signal = CHUNK_OF_CDATA_SECTION 245 if (fx%debug) fx%action = ("Resetting almost full CDATA buffer") 246 fx%pcdata = fx%buffer 247 call reset_buffer(fx%buffer) 248 else 249 if (fx%debug) fx%action = ("Reading contents of CDATA section") 250 endif 251 endif 252 253 case (ONE_BRACKET) 254 if (c == "]") then 255 fx%state = TWO_BRACKET 256 if (fx%debug) fx%action = ("Maybe finish a CDATA section") 257 else 258 fx%state = IN_CDATA_SECTION 259 call add_to_buffer("]",fx%buffer) 260 if (fx%debug) fx%action = ("Continue reading contents of CDATA section") 261 endif 262 263 case (TWO_BRACKET) 264 if (c == ">") then 265 fx%state = END_TAG_MARKER 266 signal = CHUNK_OF_CDATA_SECTION 267 if (fx%debug) fx%action = ("End of CDATA section") 268 fx%pcdata = fx%buffer ! Not quite the same behavior 269 ! as pcdata... (not filtered) 270 call reset_buffer(fx%buffer) 271 else 272 fx%state = IN_CDATA_SECTION 273 call add_to_buffer("]",fx%buffer) 274 if (fx%debug) fx%action = ("Continue reading contents of CDATA section") 275 endif 276 277 case (IN_SGML_DECLARATION) 278 if (c == "<") then 279 fx%nlts = fx%nlts + 1 280 call add_to_buffer("<",fx%buffer) 281 fx%action = "Read an intermediate < in SGML declaration" 282 else if (c == "[") then 283 fx%nbrackets = fx%nbrackets + 1 284 call add_to_buffer("[",fx%buffer) 285 fx%action = "Read a [ in SGML declaration" 286 else if (c == "]") then 287 fx%nbrackets = fx%nbrackets - 1 288 call add_to_buffer("]",fx%buffer) 289 fx%action = "Read a ] in SGML declaration" 290 else if (c == ">") then 291 if (fx%nlts == 0) then 292 if (fx%nbrackets == 0) then 293 fx%state = END_TAG_MARKER 294 signal = END_OF_TAG 295 if (fx%debug) fx%action = ("Ending SGML declaration tag") 296 fx%pcdata = fx%buffer ! Same behavior as pcdata 297 call reset_buffer(fx%buffer) 298 else 299 fx%state = ERROR 300 fx%action = ("Unmatched ] in SGML declaration") 301 endif 302 else 303 fx%nlts = fx%nlts -1 304 call add_to_buffer(">",fx%buffer) 305 fx%action = "Read an intermediate > in SGML declaration" 306 endif 307 else 308 if (fx%debug) fx%action = ("Keep reading SGML declaration") 309 call add_to_buffer(c,fx%buffer) 310 endif 311 312 case (BANG_HYPHEN) 313 if (c == "-") then 314 fx%state = IN_COMMENT 315 fx%context = COMMENT_TAG 316 if (fx%debug) fx%action = ("In comment ") 317 else 318 fx%state = ERROR 319 fx%action = ("Wrong character after <!- ") 320 endif 321 322 case (START_XML_DECLARATION) 323 if (c .in. initial_name_chars) then 324 fx%state = IN_NAME 325 call add_to_buffer(c,fx%buffer) 326 if (fx%debug) fx%action = ("Starting to read name in XML declaration") 327 else 328 fx%state = ERROR 329 fx%action = "Wrong character after ? in start of XML declaration" 330 endif 331 332 case (CLOSINGTAG_MARKER) 333 if (c == ">") then 334 fx%state = ERROR 335 fx%action = ("Closing tag empty!") 336 else if (c == "<") then 337 fx%state = ERROR 338 fx%action = ("Double opening of closing tag!!") 339 else if (c == "/") then 340 fx%state = ERROR 341 fx%action = ("Syntax error (<//)") 342 else if (c .in. whitespace) then 343 fx%state = ERROR 344 fx%action = ("Cannot have whitespace after </") 345 else if (c .in. initial_name_chars) then 346 fx%state = IN_NAME 347 if (fx%debug) fx%action = ("Starting to read name inside endtag") 348 call add_to_buffer(c,fx%buffer) 349 else 350 fx%state = ERROR 351 fx%action = ("Illegal initial character for name") 352 endif 353 354 case (IN_NAME) 355 if (c == "<") then 356 fx%state = ERROR 357 fx%action = ("Starting tag within tag") 358 else if (c == ">") then 359 fx%state = END_TAG_MARKER 360 signal = END_OF_TAG 361 if (fx%debug) fx%action = ("Ending tag") 362! call set_element_name(fx%buffer,fx%element) 363 fx%element_name = fx%buffer 364 call reset_buffer(fx%buffer) 365 call reset_dict(fx%attributes) 366 else if (c == "/") then 367 if (fx%context /= OPENING_TAG) then 368 fx%state = ERROR 369 fx%action = ("Single tag did not open as start tag") 370 else 371 fx%state = SINGLETAG_MARKER 372 fx%context = SINGLE_TAG 373 if (fx%debug) fx%action = ("Almost ending single tag") 374! call set_element_name(fx%buffer,fx%element) 375 fx%element_name = fx%buffer 376 call reset_buffer(fx%buffer) 377 call reset_dict(fx%attributes) 378 endif 379 else if (c .in. whitespace) then 380 fx%state = WHITESPACE_IN_TAG 381 if (fx%debug) fx%action = ("Ending name chars") 382! call set_element_name(fx%buffer,fx%element) 383 fx%element_name = fx%buffer 384 call reset_buffer(fx%buffer) 385 call reset_dict(fx%attributes) 386 else if (c .in. name_chars) then 387 if (fx%debug) fx%action = ("Reading name chars in tag") 388 call add_to_buffer(c,fx%buffer) 389 else 390 fx%state = ERROR 391 fx%action = ("Illegal character for name") 392 endif 393 394 case (IN_ATT_NAME) 395 if (c == "<") then 396 fx%state = ERROR 397 fx%action = ("Starting tag within tag") 398 else if (c == ">") then 399 fx%state = ERROR 400 fx%action = ("Ending tag in the middle of an attribute") 401 else if (c == "/") then 402 fx%state = ERROR 403 fx%action = ("Ending tag in the middle of an attribute") 404 else if (c .in. whitespace) then 405 fx%state = SPACE_BEFORE_EQUAL 406 if (fx%debug) fx%action = ("Whitespace after attr. name (specs?)") 407 call add_key_to_dict(fx%buffer,fx%attributes) 408 call reset_buffer(fx%buffer) 409 else if ( c == "=" ) then 410 fx%state = EQUAL 411 if (fx%debug) fx%action = ("End of attr. name") 412 call add_key_to_dict(fx%buffer,fx%attributes) 413 call reset_buffer(fx%buffer) 414 else if (c .in. name_chars) then 415 if (fx%debug) fx%action = ("Reading attribute name chars") 416 call add_to_buffer(c,fx%buffer) 417 else 418 fx%state = ERROR 419 fx%action = ("Illegal character for attribute name") 420 endif 421 422 case (EQUAL) 423 if ( (c == """") .or. (c == "'") ) then 424 fx%state = START_QUOTE 425 if (fx%debug) fx%action = ("Found beginning quote") 426 fx%quote_char = c 427 else if (c .in. whitespace) then 428 fx%state = SPACE_AFTER_EQUAL 429 if (fx%debug) fx%action = ("Whitespace after equal sign...") 430 else 431 fx%state = ERROR 432 fx%action = ("Must use quotes for attribute values") 433 endif 434 435 case (SPACE_BEFORE_EQUAL) 436 if ( c == "=" ) then 437 fx%state = EQUAL 438 if (fx%debug) fx%action = ("Equal sign") 439 else if (c .in. whitespace) then 440 if (fx%debug) fx%action = ("More whitespace before equal sign...") 441 else 442 fx%state = ERROR 443 fx%action = ("Must use equal sign for attribute values") 444 endif 445 446 case (SPACE_AFTER_EQUAL) 447 if ( c == "=" ) then 448 fx%state = ERROR 449 fx%action = ("Duplicate Equal sign") 450 else if (c .in. whitespace) then 451 if (fx%debug) fx%action = ("More whitespace after equal sign...") 452 else if ( (c == """") .or. (c == "'") ) then 453 fx%state = START_QUOTE 454 fx%quote_char = c 455 if (fx%debug) fx%action = ("Found beginning quote") 456 else 457 fx%state = ERROR 458 fx%action = ("Must use quotes for attribute values") 459 endif 460 461 case (START_QUOTE) 462 if (c == fx%quote_char) then 463 fx%state = END_QUOTE 464 if (fx%debug) fx%action = ("Emtpy attribute value...") 465 if (fx%entities_in_attributes) then 466 call entity_filter(fx%buffer,fx%tmpbuf) 467 fx%entities_in_attributes = .false. 468 call add_value_to_dict(fx%tmpbuf,fx%attributes) 469 else 470 call add_value_to_dict(fx%buffer,fx%attributes) 471 endif 472 call reset_buffer(fx%buffer) 473 else if (c == "<") then 474 fx%state = ERROR 475 fx%action = ("Attribute value cannot contain <") 476 else ! actually allowed chars in att values... Specs: No "<" 477 fx%state = IN_ATT_VALUE 478 if (fx%debug) fx%action = ("Starting to read attribute value") 479 if (c == "&") fx%entities_in_attributes = .true. 480 call add_to_buffer(c,fx%buffer) 481 endif 482 483 case (IN_ATT_VALUE) 484 if (c == fx%quote_char) then 485 fx%state = END_QUOTE 486 if (fx%debug) fx%action = ("End of attribute value") 487 if (fx%entities_in_attributes) then 488 call entity_filter(fx%buffer,fx%tmpbuf) 489 fx%entities_in_attributes = .false. 490 call add_value_to_dict(fx%tmpbuf,fx%attributes) 491 else 492 call add_value_to_dict(fx%buffer,fx%attributes) 493 endif 494 call reset_buffer(fx%buffer) 495 else if (c == "<") then 496 fx%state = ERROR 497 fx%action = ("Attribute value cannot contain <") 498 else if ( (c == char(10)) ) then 499 fx%state = ERROR 500! 501! Aparently other whitespace is allowed... 502! 503 fx%action = ("No newline allowed in attr. value (specs?)") 504 else ! all other chars allowed in attr value 505 if (fx%debug) fx%action = ("Reading attribute value chars") 506 call add_to_buffer(c,fx%buffer) 507 if (c == "&") fx%entities_in_attributes = .true. 508 endif 509 510 case (END_QUOTE) 511 if ((c == """") .or. (c == "'")) then 512 fx%state = ERROR 513 fx%action = ("Duplicate end quote") 514 else if (c .in. whitespace) then 515 fx%state = WHITESPACE_IN_TAG 516 if (fx%debug) fx%action = ("Space in between attributes or to end of tag") 517 else if (c == "<") then 518 fx%state = ERROR 519 fx%action = ("Starting tag within tag") 520 else if (c == ">") then 521 if (fx%context == XML_DECLARATION_TAG) then 522 fx%state = ERROR 523 fx%action = "End of XML declaration without ?" 524 else 525 fx%state = END_TAG_MARKER 526 signal = END_OF_TAG 527 if (fx%debug) fx%action = ("Ending tag after some attributes") 528 endif 529 else if (c == "/") then 530 if (fx%context /= OPENING_TAG) then 531 fx%state = ERROR 532 fx%action = ("Single tag did not open as start tag") 533 else 534 fx%state = SINGLETAG_MARKER 535 fx%context = SINGLE_TAG 536 if (fx%debug) fx%action = ("Almost ending single tag after some attributes") 537 endif 538 else if (c == "?") then 539 if (fx%context /= XML_DECLARATION_TAG) then 540 fx%state = ERROR 541 fx%action = "Wrong lone ? in tag" 542 else 543 fx%state = QUESTION_MARK 544 if (fx%debug) fx%action = ("About to end XML declaration") 545 endif 546 else 547 fx%state = ERROR 548 fx%action = ("Must have some whitespace after att. value") 549 endif 550 551 552 case (WHITESPACE_IN_TAG) 553 if ( c .in. whitespace) then 554 if (fx%debug) fx%action = ("Reading whitespace in tag") 555 else if (c == "<") then 556 fx%state = ERROR 557 fx%action = ("Starting tag within tag") 558 else if (c == ">") then 559 if (fx%context == XML_DECLARATION_TAG) then 560 fx%state = ERROR 561 fx%action = "End of XML declaration without ?" 562 else 563 fx%state = END_TAG_MARKER 564 signal = END_OF_TAG 565 if (fx%debug) fx%action = ("End whitespace in tag") 566 endif 567 else if (c == "/") then 568 if (fx%context /= OPENING_TAG) then 569 fx%state = ERROR 570 fx%action = ("Single tag did not open as start tag") 571 else 572 fx%state = SINGLETAG_MARKER 573 fx%context = SINGLE_TAG 574 if (fx%debug) fx%action = ("End whitespace in single tag") 575 endif 576 else if (c .in. initial_name_chars) then 577 fx%state = IN_ATT_NAME 578 if (fx%debug) fx%action = ("Starting Attribute name in tag") 579 call add_to_buffer(c,fx%buffer) 580 else if (c == "?") then 581 if (fx%context /= XML_DECLARATION_TAG) then 582 fx%state = ERROR 583 fx%action = "Wrong lone ? in tag" 584 else 585 fx%state = QUESTION_MARK 586 if (fx%debug) fx%action = ("About to end XML declaration after whitespace") 587 endif 588 else 589 fx%state = ERROR 590 fx%action = ("Illegal initial character for attribute") 591 endif 592 593 case (QUESTION_MARK) 594 if (c == ">") then 595 fx%state = END_TAG_MARKER 596 signal = END_OF_TAG 597 if (fx%debug) fx%action = ("End of XML declaration tag") 598 else 599 fx%state = ERROR 600 fx%action = "No > after ? in XML declaration tag" 601 endif 602 603 case (IN_COMMENT) 604 ! 605 ! End of comment is "-->", and ">" can appear inside comments 606 ! 607 if (c == "-") then 608 fx%state = ONE_HYPHEN 609 if (fx%debug) fx%action = ("Saw - in Comment") 610 else 611 if (fx%debug) fx%action = ("Reading comment") 612 call add_to_buffer(c,fx%buffer) 613 endif 614 615 case (ONE_HYPHEN) 616 if (c == "-") then 617 fx%state = TWO_HYPHEN 618 if (fx%debug) fx%action = ("About to end comment") 619 else 620 fx%state = IN_COMMENT 621 if (fx%debug) fx%action = ("Keep reading comment after -: ") 622 call add_to_buffer("-",fx%buffer) 623 call add_to_buffer(c,fx%buffer) 624 endif 625 626 case (TWO_HYPHEN) 627 if (c == ">") then 628 fx%state = END_TAG_MARKER 629 signal = END_OF_TAG 630 if (fx%debug) fx%action = ("End of Comment") 631 fx%pcdata = fx%buffer ! Same behavior as pcdata 632 call reset_buffer(fx%buffer) 633 else 634 fx%state = ERROR 635 fx%action = ("Cannot have -- in comment") 636 endif 637 638 case (SINGLETAG_MARKER) 639 640 if (c == ">") then 641 fx%state = END_TAG_MARKER 642 signal = END_OF_TAG 643 if (fx%debug) fx%action = ("Ending tag") 644 ! We have to call begin_element AND end_element 645 else 646 fx%state = ERROR 647 fx%action = ("Wrong ending of single tag") 648 endif 649 650 case (IN_PCDATA) 651 if (c == "<") then 652 fx%state = START_TAG_MARKER 653 signal = CHUNK_OF_PCDATA 654 if (fx%debug) fx%action = ("End of pcdata -- Starting tag") 655 if (fx%entities_in_pcdata) then 656 call entity_filter(fx%buffer,fx%pcdata) 657 fx%entities_in_pcdata = .false. 658 else 659 fx%pcdata = fx%buffer 660 endif 661 call reset_buffer(fx%buffer) 662 else if (c == ">") then 663 fx%state = ERROR 664 fx%action = ("Ending tag without starting it!") 665 else if (c == char(10)) then 666 fx%state = IN_PCDATA_AT_EOL 667 signal = CHUNK_OF_PCDATA 668 if (fx%debug) fx%action = ("Resetting PCDATA buffer at newline") 669 call add_to_buffer(c,fx%buffer) 670 if (fx%entities_in_pcdata) then 671 call entity_filter(fx%buffer,fx%pcdata) 672 fx%entities_in_pcdata = .false. 673 else 674 fx%pcdata = fx%buffer 675 endif 676 call reset_buffer(fx%buffer) 677 else 678 call add_to_buffer(c,fx%buffer) 679 if (c=="&") fx%entities_in_pcdata = .true. 680 if (fx%debug) fx%action = ("Reading pcdata") 681 ! 682 ! Check whether we are close to the end of the buffer. 683 ! If so, make a chunk and reset the buffer 684 if (c .in. whitespace) then 685 if (buffer_nearly_full(fx%buffer)) then 686 signal = CHUNK_OF_PCDATA 687 if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer") 688 if (fx%entities_in_pcdata) then 689 call entity_filter(fx%buffer,fx%pcdata) 690 fx%entities_in_pcdata = .false. 691 else 692 fx%pcdata = fx%buffer 693 endif 694 call reset_buffer(fx%buffer) 695 endif 696 endif 697 endif 698 699 case (IN_PCDATA_AT_EOL) 700 ! 701 ! Avoid triggering an extra pcdata event 702 ! 703 if (c == "<") then 704 fx%state = START_TAG_MARKER 705 if (fx%debug) fx%action = ("No more pcdata after eol-- Starting tag") 706 else if (c == ">") then 707 fx%state = ERROR 708 fx%action = ("Ending tag without starting it!") 709 else if (c == char(10)) then 710 fx%state = IN_PCDATA_AT_EOL 711 signal = CHUNK_OF_PCDATA 712 if (fx%debug) fx%action = ("Resetting PCDATA buffer at repeated newline") 713 call add_to_buffer(c,fx%buffer) 714 if (fx%entities_in_pcdata) then 715 call entity_filter(fx%buffer,fx%pcdata) 716 fx%entities_in_pcdata = .false. 717 else 718 fx%pcdata = fx%buffer 719 endif 720 call reset_buffer(fx%buffer) 721 else 722 fx%state = IN_PCDATA 723 call add_to_buffer(c,fx%buffer) 724 if (c=="&") fx%entities_in_pcdata = .true. 725 if (fx%debug) fx%action = ("Resuming reading pcdata after EOL") 726 ! 727 ! Check whether we are close to the end of the buffer. 728 ! If so, make a chunk and reset the buffer 729 if (c .in. whitespace) then 730 if (buffer_nearly_full(fx%buffer)) then 731 signal = CHUNK_OF_PCDATA 732 if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer") 733 if (fx%entities_in_pcdata) then 734 call entity_filter(fx%buffer,fx%pcdata) 735 fx%entities_in_pcdata = .false. 736 else 737 fx%pcdata = fx%buffer 738 endif 739 call reset_buffer(fx%buffer) 740 endif 741 endif 742 endif 743 744 745 746 case (END_TAG_MARKER) 747! 748 if (c == "<") then 749 fx%state = START_TAG_MARKER 750 if (fx%debug) fx%action = ("Starting tag") 751 else if (c == ">") then 752 fx%state = ERROR 753 fx%action = ("Double ending of tag!") 754! 755! We should make this whitespace in general (maybe not? 756! how about indentation in text chunks?) 757! See specs. 758! 759 else if (c == char(10)) then 760 ! Ignoring LF after end of tag is probably non standard... 761 762 if (fx%debug) & 763 fx%action = ("---------Discarding newline after end of tag") 764 765 !!! New code for full compliance 766 ! fx%state = IN_PCDATA_AT_EOL 767 ! call add_to_buffer(c,fx%buffer) 768 ! if (fx%debug) & 769 ! fx%action = ("Found LF after end of tag. Emitting PCDATA event") 770 ! signal = CHUNK_OF_PCDATA 771 ! if (fx%entities_in_pcdata) then 772 ! call entity_filter(fx%buffer,fx%pcdata) 773 ! fx%entities_in_pcdata = .false. 774 ! else 775 ! fx%pcdata = fx%buffer 776 ! endif 777 ! call reset_buffer(fx%buffer) 778 else 779 fx%state = IN_PCDATA 780 call add_to_buffer(c,fx%buffer) 781 if (c=="&") fx%entities_in_pcdata = .true. 782 if (fx%debug) fx%action = ("End of Tag. Starting to read PCDATA") 783 endif 784 785 case (ERROR) 786 787 stop "Cannot continue after parsing errors!" 788 789 end select 790 791if (fx%state == ERROR) signal = EXCEPTION 792 793end subroutine evolve_fsm 794 795end module m_sax_fsm 796 797 798 799 800 801 802 803 804 805 806 807 808 809