1module m_xml_parser 2 3! 4! Basic module to parse XML in the SAX spirit. 5! 6 7use m_buffer 8use m_reader 9use m_fsm 10use m_dictionary 11use m_debug 12use m_xml_error 13use m_elstack ! For element nesting checks 14! 15private 16 17! 18! XML file handle 19! 20type, public :: xml_t 21private 22 type(file_buffer_t) :: fb 23 type(fsm_t) :: fx 24 character(len=200) :: path_mark 25end type xml_t 26 27! 28public :: xml_parse 29public :: open_xmlfile, close_xmlfile 30public :: endfile_xmlfile, rewind_xmlfile 31public :: eof_xmlfile, sync_xmlfile 32public :: xml_char_count 33public :: xml_path, xml_mark_path, xml_get_path_mark 34public :: xml_name, xml_attributes 35 36CONTAINS !============================================================= 37 38subroutine open_xmlfile(fname,fxml,iostat,record_size) 39 character(len=*), intent(in) :: fname 40 integer, intent(out) :: iostat 41 type(xml_t), intent(out) :: fxml 42 integer, intent(in), optional :: record_size 43 44 call open_file(fname,fxml%fb,iostat,record_size) 45 call init_fsm(fxml%fx) 46 fxml%path_mark = "" 47 48end subroutine open_xmlfile 49!------------------------------------------------------------------------- 50 51subroutine rewind_xmlfile(fxml) 52 type(xml_t), intent(inout) :: fxml 53 54 call rewind_file(fxml%fb) 55 call reset_fsm(fxml%fx) 56 fxml%path_mark = "" 57 58end subroutine rewind_xmlfile 59 60!----------------------------------------- 61subroutine endfile_xmlfile(fxml) 62 type(xml_t), intent(inout) :: fxml 63 64 call mark_eof_file(fxml%fb) 65 66end subroutine endfile_xmlfile 67 68!----------------------------------------- 69subroutine close_xmlfile(fxml) 70 type(xml_t), intent(inout) :: fxml 71 72 call close_file_buffer(fxml%fb) 73 call reset_fsm(fxml%fx) ! just in case 74 fxml%path_mark = "" ! "" 75 76end subroutine close_xmlfile 77 78!----------------------------------------- 79subroutine sync_xmlfile(fxml,iostat) 80 type(xml_t), intent(inout) :: fxml 81 integer, intent(out) :: iostat 82 83 call sync_file(fxml%fb,iostat) 84 ! Do not reset fx: that's the whole point of synching. 85 86end subroutine sync_xmlfile 87 88!---------------------------------------------------- 89function eof_xmlfile(fxml) result (res) 90 type(xml_t), intent(in) :: fxml 91 logical :: res 92 93 res = eof_file(fxml%fb) 94 95end function eof_xmlfile 96! 97!---------------------------------------------------- 98!---------------------------------------------------- 99function xml_char_count(fxml) result (nc) 100 type(xml_t), intent(in) :: fxml 101 integer :: nc 102 nc = nchars_processed(fxml%fb) 103end function xml_char_count 104! 105!---------------------------------------------------- 106! 107 108recursive subroutine xml_parse(fxml, begin_element_handler, & 109 end_element_handler, & 110 pcdata_chunk_handler, & 111 comment_handler, & 112 xml_declaration_handler, & 113 cdata_section_handler, & 114 sgml_declaration_handler, & 115 error_handler, & 116 signal_handler, & 117 verbose, & 118 empty_element_handler, & 119 start_document_handler, & 120 end_document_handler) 121 122type(xml_t), intent(inout), target :: fxml 123 124optional :: begin_element_handler 125optional :: end_element_handler 126optional :: pcdata_chunk_handler 127optional :: comment_handler 128optional :: xml_declaration_handler 129optional :: sgml_declaration_handler 130optional :: cdata_section_handler 131optional :: error_handler 132optional :: signal_handler 133logical, intent(in), optional :: verbose 134optional :: empty_element_handler 135optional :: start_document_handler 136optional :: end_document_handler 137 138interface 139 subroutine begin_element_handler(name,attributes) 140 use m_dictionary 141 character(len=*), intent(in) :: name 142 type(dictionary_t), intent(in) :: attributes 143 end subroutine begin_element_handler 144 145 subroutine end_element_handler(name) 146 character(len=*), intent(in) :: name 147 end subroutine end_element_handler 148 149 subroutine pcdata_chunk_handler(chunk) 150 character(len=*), intent(in) :: chunk 151 end subroutine pcdata_chunk_handler 152 153 subroutine comment_handler(comment) 154 character(len=*), intent(in) :: comment 155 end subroutine comment_handler 156 157 subroutine xml_declaration_handler(name,attributes) 158 use m_dictionary 159 character(len=*), intent(in) :: name 160 type(dictionary_t), intent(in) :: attributes 161 end subroutine xml_declaration_handler 162 163 subroutine sgml_declaration_handler(sgml_declaration) 164 character(len=*), intent(in) :: sgml_declaration 165 end subroutine sgml_declaration_handler 166 167 subroutine cdata_section_handler(cdata) 168 character(len=*), intent(in) :: cdata 169 end subroutine cdata_section_handler 170 171 subroutine error_handler(error_info) 172 use m_xml_error 173 type(xml_error_t), intent(in) :: error_info 174 end subroutine error_handler 175 176 subroutine signal_handler(code) 177 logical, intent(out) :: code 178 end subroutine signal_handler 179 180 subroutine empty_element_handler(name,attributes) 181 use m_dictionary 182 character(len=*), intent(in) :: name 183 type(dictionary_t), intent(in) :: attributes 184 end subroutine empty_element_handler 185 186 subroutine start_document_handler() 187 end subroutine start_document_handler 188 189 subroutine end_document_handler() 190 end subroutine end_document_handler 191 192end interface 193 194character(len=1) :: c 195integer :: iostat 196 197integer :: signal 198 199type(buffer_t) :: name, oldname, dummy 200 201logical :: have_begin_handler, have_end_handler, & 202 have_pcdata_handler, have_comment_handler, & 203 have_xml_declaration_handler, & 204 have_sgml_declaration_handler, & 205 have_cdata_section_handler, have_empty_handler, & 206 have_error_handler, have_signal_handler, & 207 have_start_document_handler, have_end_document_handler 208 209logical :: pause_signal 210 211type(xml_error_t) :: error_info 212type(file_buffer_t), pointer :: fb 213type(fsm_t), pointer :: fx 214 215have_begin_handler = present(begin_element_handler) 216have_end_handler = present(end_element_handler) 217have_pcdata_handler = present(pcdata_chunk_handler) 218have_comment_handler = present(comment_handler) 219have_xml_declaration_handler = present(xml_declaration_handler) 220have_sgml_declaration_handler = present(sgml_declaration_handler) 221have_cdata_section_handler = present(cdata_section_handler) 222have_error_handler = present(error_handler) 223have_signal_handler = present(signal_handler) 224have_empty_handler = present(empty_element_handler) 225have_start_document_handler = present(start_document_handler) 226have_end_document_handler = present(end_document_handler) 227 228fb => fxml%fb 229fx => fxml%fx 230if (present(verbose)) then 231 debug = verbose ! For m_converters 232 fx%debug = verbose ! job-specific flag 233endif 234 235if (fx%debug) print *, " Entering xml_parse..." 236if (have_start_document_handler) call start_document_handler() 237 238!--------------------------------------------------------------------- 239do 240 call get_character(fb,c,iostat) 241 242 if (iostat /= 0) then ! End of file... 243 if (.not. is_empty(fx%element_stack)) then 244 call build_error_info(error_info, & 245 "Early end of file.", & 246 line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE) 247 if (have_error_handler) then 248 call error_handler(error_info) 249 else 250 call default_error_handler(error_info) 251 endif 252 endif 253 if (have_end_document_handler) call end_document_handler() 254 call endfile_xmlfile(fxml) ! Mark it as eof 255 EXIT 256 endif 257 258 call evolve_fsm(fx,c,signal) 259 260 if (fx%debug) print *, c, " ::: ", trim(fx%action) 261 262 if (signal == END_OF_TAG) then 263 ! 264 ! We decide whether we have ended an opening tag or a closing tag 265 ! 266 if (fx%context == OPENING_TAG) then 267 name = fx%element_name 268 269 if (fx%debug) print *, "We have found an opening tag" 270 if (fx%root_element_seen) then 271 if (name .equal. fx%root_element_name) then 272 call build_error_info(error_info, & 273 "Duplicate root element: " // str(name), & 274 line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE) 275 if (have_error_handler) then 276 call error_handler(error_info) 277 else 278 call default_error_handler(error_info) 279 endif 280 endif 281 if (is_empty(fx%element_stack)) then 282 call build_error_info(error_info, & 283 "Opening tag beyond root context: " // str(name), & 284 line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE) 285 if (have_error_handler) then 286 call error_handler(error_info) 287 else 288 call default_error_handler(error_info) 289 endif 290 endif 291 else 292 fx%root_element_name = name 293 fx%root_element_seen = .true. 294 endif 295 call push_elstack(name,fx%element_stack) 296 if (have_begin_handler) & 297 call begin_element_handler(str(name),fx%attributes) 298 299 else if (fx%context == CLOSING_TAG) then 300 name = fx%element_name 301 302 if (fx%debug) print *, "We have found a closing tag" 303 if (is_empty(fx%element_stack)) then 304 call build_error_info(error_info, & 305 "Nesting error: End tag: " // str(name) // & 306 " does not match -- too many end tags", & 307 line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE) 308 if (have_error_handler) then 309 call error_handler(error_info) 310 else 311 call default_error_handler(error_info) 312 endif 313 else 314 call get_top_elstack(fx%element_stack,oldname) 315 if (oldname .equal. name) then 316 call pop_elstack(fx%element_stack,oldname) 317 if (have_end_handler) call end_element_handler(str(name)) 318!! call pop_elstack(fx%element_stack,oldname) 319 else 320 call build_error_info(error_info, & 321 "Nesting error: End tag: " // str(name) // & 322 ". Expecting end of : " // str(oldname), & 323 line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE) 324 if (have_error_handler) then 325 call error_handler(error_info) 326 else 327 call default_error_handler(error_info) 328 endif 329 endif 330 endif 331 else if (fx%context == SINGLE_TAG) then 332 name = fx%element_name 333 334 if (fx%debug) print *, "We have found a single (empty) tag: ", & 335 str(name) 336 ! 337 ! Push name on to stack to reveal true xpath 338 ! 339 call push_elstack(name,fx%element_stack) 340 if (have_empty_handler) then 341 if (fx%debug) print *, "--> calling empty_element_handler." 342 call empty_element_handler(str(name),fx%attributes) 343 call pop_elstack(fx%element_stack,dummy) 344 else 345 if (have_begin_handler) then 346 if (fx%debug) print *, "--> calling begin_element_handler..." 347 call begin_element_handler(str(name),fx%attributes) 348 endif 349 call pop_elstack(fx%element_stack,dummy) 350 if (have_end_handler) then 351 if (fx%debug) print *, "--> ... and end_element_handler." 352 call end_element_handler(str(name)) 353 endif 354 endif 355!! call pop_elstack(fx%element_stack,dummy) 356 357 else if (fx%context == CDATA_SECTION_TAG) then 358 359 if (fx%debug) print *, "We found a CDATA section" 360 if (is_empty(fx%element_stack)) then 361 if (fx%debug) print *, & 362 "... Warning: CDATA section outside element context" 363 else 364 if (have_cdata_section_handler) then 365 call cdata_section_handler(str(fx%pcdata)) 366 else 367 if (have_pcdata_handler) & 368 call pcdata_chunk_handler(str(fx%pcdata)) 369 endif 370 endif 371 372 else if (fx%context == COMMENT_TAG) then 373 374 if (fx%debug) print *, "We found a comment tag" 375 if (have_comment_handler) & 376 call comment_handler(str(fx%pcdata)) 377 378 else if (fx%context == SGML_DECLARATION_TAG) then 379 380 if (fx%debug) print *, "We found an sgml declaration" 381 if (have_sgml_declaration_handler) & 382 call sgml_declaration_handler(str(fx%pcdata)) 383 384 else if (fx%context == XML_DECLARATION_TAG) then 385 386 if (fx%debug) print *, "We found an XML declaration" 387 name = fx%element_name 388 if (have_xml_declaration_handler) & 389 call xml_declaration_handler(str(name),fx%attributes) 390 391 else 392 393 ! do nothing 394 395 endif 396 397 else if (signal == CHUNK_OF_PCDATA) then 398 399 if (fx%debug) print *, "We found a chunk of PCDATA" 400 if (is_empty(fx%element_stack)) then 401 if (fx%debug) print *, "... Warning: PCDATA outside element context" 402 ! Just a warning 403 call build_error_info(error_info, & 404 "PCDATA outside of element context", & 405 line(fb),column(fb),fx%element_stack,WARNING_CODE) 406 if (have_error_handler) then 407 call error_handler(error_info) 408 else 409 call default_error_handler(error_info) 410 endif 411 else 412 if (have_pcdata_handler) & 413 call pcdata_chunk_handler(str(fx%pcdata)) 414 endif 415 416 else if (signal == EXCEPTION) then 417 call build_error_info(error_info, fx%action, & 418 line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE) 419 if (have_error_handler) then 420 call error_handler(error_info) 421 else 422 call default_error_handler(error_info) 423 endif 424 else 425 ! QUIET, do nothing 426 endif 427 if (signal /= QUIET) then 428 if (have_signal_handler) then 429 call signal_handler(pause_signal) 430 if (pause_signal) exit 431 endif 432 endif 433 434enddo 435 436end subroutine xml_parse 437 438!----------------------------------------- 439subroutine xml_path(fxml,path) 440 type(xml_t), intent(in) :: fxml 441 character(len=*), intent(out) :: path 442 443 call get_elstack_signature(fxml%fx%element_stack,path) 444 445end subroutine xml_path 446 447!----------------------------------------- 448subroutine xml_mark_path(fxml,path) 449 ! 450 ! Marks the current path 451 ! 452 type(xml_t), intent(inout) :: fxml 453 character(len=*), intent(out) :: path 454 455 call get_elstack_signature(fxml%fx%element_stack,fxml%path_mark) 456 path = fxml%path_mark 457 458end subroutine xml_mark_path 459 460!----------------------------------------- 461subroutine xml_get_path_mark(fxml,path) 462 ! 463 ! Returns the currently markd path (or an empty string if there are no marks) 464 ! 465 type(xml_t), intent(in) :: fxml 466 character(len=*), intent(out) :: path 467 468 path = fxml%path_mark 469 470end subroutine xml_get_path_mark 471 472!----------------------------------------- 473subroutine xml_name(fxml,name) 474 type(xml_t), intent(in) :: fxml 475 character(len=*), intent(out) :: name 476 477 name = str(fxml%fx%element_name) 478 479end subroutine xml_name 480!----------------------------------------- 481subroutine xml_attributes(fxml,attributes) 482 type(xml_t), intent(in) :: fxml 483 type(dictionary_t), intent(out) :: attributes 484 485 attributes = fxml%fx%attributes 486 487end subroutine xml_attributes 488 489end module m_xml_parser 490 491 492 493 494