1module m_fsm 2! 3use m_buffer 4use m_dictionary 5use m_charset 6use m_entities 7use m_elstack 8 9private 10 11type, public :: fsm_t 12 ! 13 ! Contains information about the "finite state machine" 14 ! Some of the components (marked *) could at this point be made into 15 ! saved module variables. 16 ! 17 ! 18 integer :: state 19 integer :: context 20 integer :: nbrackets !* 21 integer :: nlts !* 22 character(len=1) :: quote_char !* 23 type(buffer_t) :: buffer !* 24 type(buffer_t) :: tmpbuf !* 25 type(buffer_t) :: element_name 26 type(dictionary_t) :: attributes 27 type(buffer_t) :: pcdata 28 logical :: entities_in_pcdata 29 logical :: entities_in_attributes 30 type(elstack_t) :: element_stack 31 logical :: root_element_seen 32 type(buffer_t) :: root_element_name 33 character(len=150) :: action 34 logical :: debug 35end type fsm_t 36 37public :: init_fsm, reset_fsm, evolve_fsm 38 39! 40! State parameters 41! 42integer, parameter, public :: ERROR = -1 43integer, parameter, public :: INIT = 1 44integer, parameter, private :: START_TAG_MARKER = 2 45integer, parameter, private :: END_TAG_MARKER = 3 46integer, parameter, private :: IN_NAME = 4 47integer, parameter, private :: WHITESPACE_IN_TAG = 5 48integer, parameter, private :: IN_PCDATA = 6 49integer, parameter, private :: SINGLETAG_MARKER = 7 50integer, parameter, private :: CLOSINGTAG_MARKER = 8 51integer, parameter, private :: IN_COMMENT = 9 52integer, parameter, private :: IN_ATT_NAME = 10 53integer, parameter, private :: IN_ATT_VALUE = 11 54integer, parameter, private :: EQUAL = 12 55integer, parameter, private :: SPACE_AFTER_EQUAL = 13 56integer, parameter, private :: SPACE_BEFORE_EQUAL = 14 57integer, parameter, private :: START_QUOTE = 15 58integer, parameter, private :: END_QUOTE = 16 59integer, parameter, private :: BANG = 17 60integer, parameter, private :: BANG_HYPHEN = 18 61integer, parameter, private :: ONE_HYPHEN = 19 62integer, parameter, private :: TWO_HYPHEN = 20 63integer, parameter, private :: QUESTION_MARK = 21 64integer, parameter, private :: START_XML_DECLARATION = 22 65integer, parameter, private :: IN_SGML_DECLARATION = 23 66integer, parameter, private :: IN_CDATA_SECTION = 24 67integer, parameter, private :: ONE_BRACKET = 25 68integer, parameter, private :: TWO_BRACKET = 26 69integer, parameter, private :: CDATA_PREAMBLE = 27 70integer, parameter, private :: IN_PCDATA_AT_EOL = 30 71! 72! Context parameters 73! 74integer, parameter, public :: OPENING_TAG = 100 75integer, parameter, public :: CLOSING_TAG = 110 76integer, parameter, public :: SINGLE_TAG = 120 77integer, parameter, public :: COMMENT_TAG = 130 78integer, parameter, public :: XML_DECLARATION_TAG = 140 79integer, parameter, public :: SGML_DECLARATION_TAG = 150 80integer, parameter, public :: CDATA_SECTION_TAG = 160 81integer, parameter, public :: NULL_CONTEXT = 200 82! 83! Signal parameters 84! 85integer, parameter, public :: QUIET = 1000 86integer, parameter, public :: END_OF_TAG = 1100 87integer, parameter, public :: CHUNK_OF_PCDATA = 1200 88integer, parameter, public :: EXCEPTION = 1500 89 90CONTAINS 91 92!------------------------------------------------------------ 93! Initialize once and for all the derived types (Fortran90 restriction) 94! 95subroutine init_fsm(fx) 96type(fsm_t), intent(inout) :: fx 97 98 fx%state = INIT 99 call setup_xml_charsets() 100 fx%context = NULL_CONTEXT 101 call init_elstack(fx%element_stack) 102 fx%root_element_seen = .false. 103 fx%debug = .false. 104 fx%entities_in_pcdata = .false. 105 fx%entities_in_attributes = .false. 106 fx%action = "" 107 call init_buffer(fx%buffer) 108 call init_buffer(fx%element_name) 109 call init_buffer(fx%pcdata) 110 call init_buffer(fx%root_element_name) 111 call init_dict(fx%attributes) 112end subroutine init_fsm 113!------------------------------------------------------------ 114subroutine reset_fsm(fx) 115type(fsm_t), intent(inout) :: fx 116 117 fx%state = INIT 118 call setup_xml_charsets() 119 fx%context = NULL_CONTEXT 120 call reset_elstack(fx%element_stack) 121 fx%action = "" 122 fx%root_element_seen = .false. 123 call reset_buffer(fx%buffer) 124 call reset_buffer(fx%element_name) 125 call reset_buffer(fx%pcdata) 126 call reset_buffer(fx%root_element_name) 127 call reset_dict(fx%attributes) 128end subroutine reset_fsm 129 130!------------------------------------------------------------ 131subroutine evolve_fsm(fx,c,signal) 132! 133! Finite-state machine evolution rules for XML parsing. 134! 135type(fsm_t), intent(inout) :: fx ! Internal state 136character(len=1), intent(in) :: c 137integer, intent(out) :: signal 138 139! 140! Reset signal 141! 142signal = QUIET 143! 144 145if (.not. (c .in. valid_chars)) then 146! 147! Let it pass (in case the underlying encoding is UTF-8) 148! But this chars in a name will cause havoc 149! 150! signal = EXCEPTION 151! fx%state = ERROR 152! fx%action = trim("Not a valid character in simple encoding: "//c) 153! RETURN 154endif 155 156select case(fx%state) 157 158 case (INIT) 159 if (c == "<") then 160 fx%state = START_TAG_MARKER 161 if (fx%debug) fx%action = ("Starting tag") 162 else if (c == ">") then 163 fx%state = ERROR 164 fx%action = ("Ending tag without being in one!") 165 else 166 if (fx%debug) fx%action = ("Reading garbage chars") 167 endif 168 169 case (START_TAG_MARKER) 170 if (c == ">") then 171 fx%state = ERROR 172 fx%action = ("Tag empty!") 173 else if (c == "<") then 174 fx%state = ERROR 175 fx%action = ("Double opening of tag!!") 176 else if (c == "/") then 177 fx%state = CLOSINGTAG_MARKER 178 if (fx%debug) fx%action = ("Starting endtag: ") 179 fx%context = CLOSING_TAG 180 else if (c == "?") then 181 fx%state = START_XML_DECLARATION 182 if (fx%debug) fx%action = ("Starting XML declaration ") 183 fx%context = XML_DECLARATION_TAG 184 else if (c == "!") then 185 fx%state = BANG 186 if (fx%debug) fx%action = ("Saw ! -- comment or SGML declaration expected...") 187 else if (c .in. whitespace) then 188 fx%state = ERROR 189 fx%action = ("Cannot have whitespace after <") 190 else if (c .in. initial_name_chars) then 191 fx%context = OPENING_TAG 192 fx%state = IN_NAME 193 call add_to_buffer(c,fx%buffer) 194 if (fx%debug) fx%action = ("Starting to read name in tag") 195 else 196 fx%state = ERROR 197 fx%action = ("Illegal initial character for name") 198 endif 199 200 201 case (BANG) 202 if (c == "-") then 203 fx%state = BANG_HYPHEN 204 if (fx%debug) fx%action = ("Almost ready to start comment ") 205 else if (c .in. uppercase_chars) then 206 fx%state = IN_SGML_DECLARATION 207 fx%nlts = 0 208 fx%nbrackets = 0 209 if (fx%debug) fx%action = ("SGML declaration ") 210 fx%context = SGML_DECLARATION_TAG 211 call add_to_buffer(c,fx%buffer) 212 else if (c == "[") then 213 fx%state = CDATA_PREAMBLE 214 if (fx%debug) fx%action = ("Declaration with [ ") 215 fx%context = CDATA_SECTION_TAG 216 else 217 fx%state = ERROR 218 fx%action = ("Wrong character after ! ") 219 endif 220 221 case (CDATA_PREAMBLE) 222 ! We assume a CDATA[ is forthcoming, we do not check 223 if (c == "[") then 224 fx%state = IN_CDATA_SECTION 225 if (fx%debug) fx%action = ("About to start reading CDATA contents") 226 else if (c == "]") then 227 fx%state = ERROR 228 fx%action = ("Unexpected ] in CDATA preamble") 229 else 230 if (fx%debug) fx%action = ("Reading CDATA preamble") 231 endif 232 233 case (IN_CDATA_SECTION) 234 if (c == "]") then 235 fx%state = ONE_BRACKET 236 if (fx%debug) fx%action = ("Saw a ] in CDATA section") 237 else 238 call add_to_buffer(c,fx%buffer) 239 if (fx%debug) fx%action = ("Reading contents of CDATA section") 240 endif 241 242 case (ONE_BRACKET) 243 if (c == "]") then 244 fx%state = TWO_BRACKET 245 if (fx%debug) fx%action = ("Maybe finish a CDATA section") 246 else 247 fx%state = IN_CDATA_SECTION 248 call add_to_buffer("]",fx%buffer) 249 if (fx%debug) fx%action = ("Continue reading contents of CDATA section") 250 endif 251 252 case (TWO_BRACKET) 253 if (c == ">") then 254 fx%state = END_TAG_MARKER 255 signal = END_OF_TAG 256 if (fx%debug) fx%action = ("End of CDATA section") 257 fx%pcdata = fx%buffer ! Not quite the same behavior 258 ! as pcdata... (not filtered) 259 call reset_buffer(fx%buffer) 260 else 261 fx%state = IN_CDATA_SECTION 262 call add_to_buffer("]",fx%buffer) 263 if (fx%debug) fx%action = ("Continue reading contents of CDATA section") 264 endif 265 266 case (IN_SGML_DECLARATION) 267 if (c == "<") then 268 fx%nlts = fx%nlts + 1 269 call add_to_buffer("<",fx%buffer) 270 fx%action = "Read an intermediate < in SGML declaration" 271 else if (c == "[") then 272 fx%nbrackets = fx%nbrackets + 1 273 call add_to_buffer("[",fx%buffer) 274 fx%action = "Read a [ in SGML declaration" 275 else if (c == "]") then 276 fx%nbrackets = fx%nbrackets - 1 277 call add_to_buffer("]",fx%buffer) 278 fx%action = "Read a ] in SGML declaration" 279 else if (c == ">") then 280 if (fx%nlts == 0) then 281 if (fx%nbrackets == 0) then 282 fx%state = END_TAG_MARKER 283 signal = END_OF_TAG 284 if (fx%debug) fx%action = ("Ending SGML declaration tag") 285 fx%pcdata = fx%buffer ! Same behavior as pcdata 286 call reset_buffer(fx%buffer) 287 else 288 fx%state = ERROR 289 fx%action = ("Unmatched ] in SGML declaration") 290 endif 291 else 292 fx%nlts = fx%nlts -1 293 call add_to_buffer(">",fx%buffer) 294 fx%action = "Read an intermediate > in SGML declaration" 295 endif 296 else 297 if (fx%debug) fx%action = ("Keep reading SGML declaration") 298 call add_to_buffer(c,fx%buffer) 299 endif 300 301 case (BANG_HYPHEN) 302 if (c == "-") then 303 fx%state = IN_COMMENT 304 fx%context = COMMENT_TAG 305 if (fx%debug) fx%action = ("In comment ") 306 else 307 fx%state = ERROR 308 fx%action = ("Wrong character after <!- ") 309 endif 310 311 case (START_XML_DECLARATION) 312 if (c .in. initial_name_chars) then 313 fx%state = IN_NAME 314 call add_to_buffer(c,fx%buffer) 315 if (fx%debug) fx%action = ("Starting to read name in XML declaration") 316 else 317 fx%state = ERROR 318 fx%action = "Wrong character after ? in start of XML declaration" 319 endif 320 321 case (CLOSINGTAG_MARKER) 322 if (c == ">") then 323 fx%state = ERROR 324 fx%action = ("Closing tag empty!") 325 else if (c == "<") then 326 fx%state = ERROR 327 fx%action = ("Double opening of closing tag!!") 328 else if (c == "/") then 329 fx%state = ERROR 330 fx%action = ("Syntax error (<//)") 331 else if (c .in. whitespace) then 332 fx%state = ERROR 333 fx%action = ("Cannot have whitespace after </") 334 else if (c .in. initial_name_chars) then 335 fx%state = IN_NAME 336 if (fx%debug) fx%action = ("Starting to read name inside endtag") 337 call add_to_buffer(c,fx%buffer) 338 else 339 fx%state = ERROR 340 fx%action = ("Illegal initial character for name") 341 endif 342 343 case (IN_NAME) 344 if (c == "<") then 345 fx%state = ERROR 346 fx%action = ("Starting tag within tag") 347 else if (c == ">") then 348 fx%state = END_TAG_MARKER 349 signal = END_OF_TAG 350 if (fx%debug) fx%action = ("Ending tag") 351! call set_element_name(fx%buffer,fx%element) 352 fx%element_name = fx%buffer 353 call reset_buffer(fx%buffer) 354 call reset_dict(fx%attributes) 355 else if (c == "/") then 356 if (fx%context /= OPENING_TAG) then 357 fx%state = ERROR 358 fx%action = ("Single tag did not open as start tag") 359 else 360 fx%state = SINGLETAG_MARKER 361 fx%context = SINGLE_TAG 362 if (fx%debug) fx%action = ("Almost ending single tag") 363! call set_element_name(fx%buffer,fx%element) 364 fx%element_name = fx%buffer 365 call reset_buffer(fx%buffer) 366 call reset_dict(fx%attributes) 367 endif 368 else if (c .in. whitespace) then 369 fx%state = WHITESPACE_IN_TAG 370 if (fx%debug) fx%action = ("Ending name chars") 371! call set_element_name(fx%buffer,fx%element) 372 fx%element_name = fx%buffer 373 call reset_buffer(fx%buffer) 374 call reset_dict(fx%attributes) 375 else if (c .in. name_chars) then 376 if (fx%debug) fx%action = ("Reading name chars in tag") 377 call add_to_buffer(c,fx%buffer) 378 else 379 fx%state = ERROR 380 fx%action = ("Illegal character for name") 381 endif 382 383 case (IN_ATT_NAME) 384 if (c == "<") then 385 fx%state = ERROR 386 fx%action = ("Starting tag within tag") 387 else if (c == ">") then 388 fx%state = ERROR 389 fx%action = ("Ending tag in the middle of an attribute") 390 else if (c == "/") then 391 fx%state = ERROR 392 fx%action = ("Ending tag in the middle of an attribute") 393 else if (c .in. whitespace) then 394 fx%state = SPACE_BEFORE_EQUAL 395 if (fx%debug) fx%action = ("Whitespace after attr. name (specs?)") 396 call add_key_to_dict(fx%buffer,fx%attributes) 397 call reset_buffer(fx%buffer) 398 else if ( c == "=" ) then 399 fx%state = EQUAL 400 if (fx%debug) fx%action = ("End of attr. name") 401 call add_key_to_dict(fx%buffer,fx%attributes) 402 call reset_buffer(fx%buffer) 403 else if (c .in. name_chars) then 404 if (fx%debug) fx%action = ("Reading attribute name chars") 405 call add_to_buffer(c,fx%buffer) 406 else 407 fx%state = ERROR 408 fx%action = ("Illegal character for attribute name") 409 endif 410 411 case (EQUAL) 412 if ( (c == """") .or. (c == "'") ) then 413 fx%state = START_QUOTE 414 if (fx%debug) fx%action = ("Found beginning quote") 415 fx%quote_char = c 416 else if (c .in. whitespace) then 417 fx%state = SPACE_AFTER_EQUAL 418 if (fx%debug) fx%action = ("Whitespace after equal sign...") 419 else 420 fx%state = ERROR 421 fx%action = ("Must use quotes for attribute values") 422 endif 423 424 case (SPACE_BEFORE_EQUAL) 425 if ( c == "=" ) then 426 fx%state = EQUAL 427 if (fx%debug) fx%action = ("Equal sign") 428 else if (c .in. whitespace) then 429 if (fx%debug) fx%action = ("More whitespace before equal sign...") 430 else 431 fx%state = ERROR 432 fx%action = ("Must use equal sign for attribute values") 433 endif 434 435 case (SPACE_AFTER_EQUAL) 436 if ( c == "=" ) then 437 fx%state = ERROR 438 fx%action = ("Duplicate Equal sign") 439 else if (c .in. whitespace) then 440 if (fx%debug) fx%action = ("More whitespace after equal sign...") 441 else if ( (c == """") .or. (c == "'") ) then 442 fx%state = START_QUOTE 443 fx%quote_char = c 444 if (fx%debug) fx%action = ("Found beginning quote") 445 else 446 fx%state = ERROR 447 fx%action = ("Must use quotes for attribute values") 448 endif 449 450 case (START_QUOTE) 451 if (c == fx%quote_char) then 452 fx%state = END_QUOTE 453 if (fx%debug) fx%action = ("Emtpy attribute value...") 454 if (fx%entities_in_attributes) then 455 call entity_filter(fx%buffer,fx%tmpbuf) 456 fx%entities_in_attributes = .false. 457 call add_value_to_dict(fx%tmpbuf,fx%attributes) 458 else 459 call add_value_to_dict(fx%buffer,fx%attributes) 460 endif 461 call reset_buffer(fx%buffer) 462 else if (c == "<") then 463 fx%state = ERROR 464 fx%action = ("Attribute value cannot contain <") 465 else ! actually allowed chars in att values... Specs: No "<" 466 fx%state = IN_ATT_VALUE 467 if (fx%debug) fx%action = ("Starting to read attribute value") 468 if (c == "&") fx%entities_in_attributes = .true. 469 call add_to_buffer(c,fx%buffer) 470 endif 471 472 case (IN_ATT_VALUE) 473 if (c == fx%quote_char) then 474 fx%state = END_QUOTE 475 if (fx%debug) fx%action = ("End of attribute value") 476 if (fx%entities_in_attributes) then 477 call entity_filter(fx%buffer,fx%tmpbuf) 478 fx%entities_in_attributes = .false. 479 call add_value_to_dict(fx%tmpbuf,fx%attributes) 480 else 481 call add_value_to_dict(fx%buffer,fx%attributes) 482 endif 483 call reset_buffer(fx%buffer) 484 else if (c == "<") then 485 fx%state = ERROR 486 fx%action = ("Attribute value cannot contain <") 487 else if ( (c == char(10)) ) then 488 fx%state = ERROR 489! 490! Aparently other whitespace is allowed... 491! 492 fx%action = ("No newline allowed in attr. value (specs?)") 493 else ! all other chars allowed in attr value 494 if (fx%debug) fx%action = ("Reading attribute value chars") 495 call add_to_buffer(c,fx%buffer) 496 if (c == "&") fx%entities_in_attributes = .true. 497 endif 498 499 case (END_QUOTE) 500 if ((c == """") .or. (c == "'")) then 501 fx%state = ERROR 502 fx%action = ("Duplicate end quote") 503 else if (c .in. whitespace) then 504 fx%state = WHITESPACE_IN_TAG 505 if (fx%debug) fx%action = ("Space in between attributes or to end of tag") 506 else if (c == "<") then 507 fx%state = ERROR 508 fx%action = ("Starting tag within tag") 509 else if (c == ">") then 510 if (fx%context == XML_DECLARATION_TAG) then 511 fx%state = ERROR 512 fx%action = "End of XML declaration without ?" 513 else 514 fx%state = END_TAG_MARKER 515 signal = END_OF_TAG 516 if (fx%debug) fx%action = ("Ending tag after some attributes") 517 endif 518 else if (c == "/") then 519 if (fx%context /= OPENING_TAG) then 520 fx%state = ERROR 521 fx%action = ("Single tag did not open as start tag") 522 else 523 fx%state = SINGLETAG_MARKER 524 fx%context = SINGLE_TAG 525 if (fx%debug) fx%action = ("Almost ending single tag after some attributes") 526 endif 527 else if (c == "?") then 528 if (fx%context /= XML_DECLARATION_TAG) then 529 fx%state = ERROR 530 fx%action = "Wrong lone ? in tag" 531 else 532 fx%state = QUESTION_MARK 533 if (fx%debug) fx%action = ("About to end XML declaration") 534 endif 535 else 536 fx%state = ERROR 537 fx%action = ("Must have some whitespace after att. value") 538 endif 539 540 541 case (WHITESPACE_IN_TAG) 542 if ( c .in. whitespace) then 543 if (fx%debug) fx%action = ("Reading whitespace in tag") 544 else if (c == "<") then 545 fx%state = ERROR 546 fx%action = ("Starting tag within tag") 547 else if (c == ">") then 548 if (fx%context == XML_DECLARATION_TAG) then 549 fx%state = ERROR 550 fx%action = "End of XML declaration without ?" 551 else 552 fx%state = END_TAG_MARKER 553 signal = END_OF_TAG 554 if (fx%debug) fx%action = ("End whitespace in tag") 555 endif 556 else if (c == "/") then 557 if (fx%context /= OPENING_TAG) then 558 fx%state = ERROR 559 fx%action = ("Single tag did not open as start tag") 560 else 561 fx%state = SINGLETAG_MARKER 562 fx%context = SINGLE_TAG 563 if (fx%debug) fx%action = ("End whitespace in single tag") 564 endif 565 else if (c .in. initial_name_chars) then 566 fx%state = IN_ATT_NAME 567 if (fx%debug) fx%action = ("Starting Attribute name in tag") 568 call add_to_buffer(c,fx%buffer) 569 else if (c == "?") then 570 if (fx%context /= XML_DECLARATION_TAG) then 571 fx%state = ERROR 572 fx%action = "Wrong lone ? in tag" 573 else 574 fx%state = QUESTION_MARK 575 if (fx%debug) fx%action = ("About to end XML declaration after whitespace") 576 endif 577 else 578 fx%state = ERROR 579 fx%action = ("Illegal initial character for attribute") 580 endif 581 582 case (QUESTION_MARK) 583 if (c == ">") then 584 fx%state = END_TAG_MARKER 585 signal = END_OF_TAG 586 if (fx%debug) fx%action = ("End of XML declaration tag") 587 else 588 fx%state = ERROR 589 fx%action = "No > after ? in XML declaration tag" 590 endif 591 592 case (IN_COMMENT) 593 ! 594 ! End of comment is "-->", and ">" can appear inside comments 595 ! 596 if (c == "-") then 597 fx%state = ONE_HYPHEN 598 if (fx%debug) fx%action = ("Saw - in Comment") 599 else 600 if (fx%debug) fx%action = ("Reading comment") 601 call add_to_buffer(c,fx%buffer) 602 endif 603 604 case (ONE_HYPHEN) 605 if (c == "-") then 606 fx%state = TWO_HYPHEN 607 if (fx%debug) fx%action = ("About to end comment") 608 else 609 fx%state = IN_COMMENT 610 if (fx%debug) fx%action = ("Keep reading comment after -: ") 611 call add_to_buffer("-",fx%buffer) 612 call add_to_buffer(c,fx%buffer) 613 endif 614 615 case (TWO_HYPHEN) 616 if (c == ">") then 617 fx%state = END_TAG_MARKER 618 signal = END_OF_TAG 619 if (fx%debug) fx%action = ("End of Comment") 620 fx%pcdata = fx%buffer ! Same behavior as pcdata 621 call reset_buffer(fx%buffer) 622 else 623 fx%state = ERROR 624 fx%action = ("Cannot have -- in comment") 625 endif 626 627 case (SINGLETAG_MARKER) 628 629 if (c == ">") then 630 fx%state = END_TAG_MARKER 631 signal = END_OF_TAG 632 if (fx%debug) fx%action = ("Ending tag") 633 ! We have to call begin_element AND end_element 634 else 635 fx%state = ERROR 636 fx%action = ("Wrong ending of single tag") 637 endif 638 639 case (IN_PCDATA) 640 if (c == "<") then 641 fx%state = START_TAG_MARKER 642 signal = CHUNK_OF_PCDATA 643 if (fx%debug) fx%action = ("End of pcdata -- Starting tag") 644 if (fx%entities_in_pcdata) then 645 call entity_filter(fx%buffer,fx%pcdata) 646 fx%entities_in_pcdata = .false. 647 else 648 fx%pcdata = fx%buffer 649 endif 650 call reset_buffer(fx%buffer) 651 else if (c == ">") then 652 fx%state = ERROR 653 fx%action = ("Ending tag without starting it!") 654 else if (c == char(10)) then 655 fx%state = IN_PCDATA_AT_EOL 656 signal = CHUNK_OF_PCDATA 657 if (fx%debug) fx%action = ("Resetting PCDATA buffer at newline") 658 call add_to_buffer(c,fx%buffer) 659 if (fx%entities_in_pcdata) then 660 call entity_filter(fx%buffer,fx%pcdata) 661 fx%entities_in_pcdata = .false. 662 else 663 fx%pcdata = fx%buffer 664 endif 665 call reset_buffer(fx%buffer) 666 else 667 call add_to_buffer(c,fx%buffer) 668 if (c=="&") fx%entities_in_pcdata = .true. 669 if (fx%debug) fx%action = ("Reading pcdata") 670 ! 671 ! Check whether we are close to the end of the buffer. 672 ! If so, make a chunk and reset the buffer 673 if (c .in. whitespace) then 674 if (buffer_nearly_full(fx%buffer)) then 675 signal = CHUNK_OF_PCDATA 676 if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer") 677 if (fx%entities_in_pcdata) then 678 call entity_filter(fx%buffer,fx%pcdata) 679 fx%entities_in_pcdata = .false. 680 else 681 fx%pcdata = fx%buffer 682 endif 683 call reset_buffer(fx%buffer) 684 endif 685 endif 686 endif 687 688 case (IN_PCDATA_AT_EOL) 689 ! 690 ! Avoid triggering an extra pcdata event 691 ! 692 if (c == "<") then 693 fx%state = START_TAG_MARKER 694 if (fx%debug) fx%action = ("No more pcdata after eol-- Starting tag") 695 else if (c == ">") then 696 fx%state = ERROR 697 fx%action = ("Ending tag without starting it!") 698 else if (c == char(10)) then 699 fx%state = IN_PCDATA_AT_EOL 700 signal = CHUNK_OF_PCDATA 701 if (fx%debug) fx%action = ("Resetting PCDATA buffer at repeated newline") 702 call add_to_buffer(c,fx%buffer) 703 if (fx%entities_in_pcdata) then 704 call entity_filter(fx%buffer,fx%pcdata) 705 fx%entities_in_pcdata = .false. 706 else 707 fx%pcdata = fx%buffer 708 endif 709 call reset_buffer(fx%buffer) 710 else 711 fx%state = IN_PCDATA 712 call add_to_buffer(c,fx%buffer) 713 if (c=="&") fx%entities_in_pcdata = .true. 714 if (fx%debug) fx%action = ("Resuming reading pcdata after EOL") 715 ! 716 ! Check whether we are close to the end of the buffer. 717 ! If so, make a chunk and reset the buffer 718 if (c .in. whitespace) then 719 if (buffer_nearly_full(fx%buffer)) then 720 signal = CHUNK_OF_PCDATA 721 if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer") 722 if (fx%entities_in_pcdata) then 723 call entity_filter(fx%buffer,fx%pcdata) 724 fx%entities_in_pcdata = .false. 725 else 726 fx%pcdata = fx%buffer 727 endif 728 call reset_buffer(fx%buffer) 729 endif 730 endif 731 endif 732 733 734 735 case (END_TAG_MARKER) 736! 737 if (c == "<") then 738 fx%state = START_TAG_MARKER 739 if (fx%debug) fx%action = ("Starting tag") 740 else if (c == ">") then 741 fx%state = ERROR 742 fx%action = ("Double ending of tag!") 743! 744! We should make this whitespace in general (maybe not? 745! how about indentation in text chunks?) 746! See specs. 747! 748 else if (c == char(10)) then 749 ! Ignoring LF after end of tag is probably non standard... 750 751 if (fx%debug) & 752 fx%action = ("---------Discarding newline after end of tag") 753 754 !!! New code for full compliance 755 ! fx%state = IN_PCDATA_AT_EOL 756 ! call add_to_buffer(c,fx%buffer) 757 ! if (fx%debug) & 758 ! fx%action = ("Found LF after end of tag. Emitting PCDATA event") 759 ! signal = CHUNK_OF_PCDATA 760 ! if (fx%entities_in_pcdata) then 761 ! call entity_filter(fx%buffer,fx%pcdata) 762 ! fx%entities_in_pcdata = .false. 763 ! else 764 ! fx%pcdata = fx%buffer 765 ! endif 766 ! call reset_buffer(fx%buffer) 767 else 768 fx%state = IN_PCDATA 769 call add_to_buffer(c,fx%buffer) 770 if (c=="&") fx%entities_in_pcdata = .true. 771 if (fx%debug) fx%action = ("End of Tag. Starting to read PCDATA") 772 endif 773 774 case (ERROR) 775 776 stop "Cannot continue after parsing errors!" 777 778 end select 779 780if (fx%state == ERROR) signal = EXCEPTION 781 782end subroutine evolve_fsm 783 784end module m_fsm 785 786 787 788 789 790 791 792 793 794 795 796 797 798