1module m_xpath_api 2! 3! XPATH-like API for XML Parsing 4! Copyright Alberto Garcia <wdpgaara@lg.ehu.es>, August 2003 5! 6use xmlf90_sax 7 8private 9! 10public :: get_node, mark_node, enable_debug, disable_debug 11 12private :: match, process_node, get_path 13private :: begin_element, end_element, pcdata_handler, empty_element 14private :: pause_parsing 15 16! 17integer, private, save :: global_status 18integer, public, parameter :: END_OF_FILE = -1 19integer, public, parameter :: END_OF_ANCESTOR_ELEMENT = -2 20integer, public, parameter :: PCDATA_OVERFLOW = 7 21 22logical, private, save :: debug_xpath = .false. 23logical, private, save :: debug_sax = .false. 24character(len=500), private, save :: path_required 25character(len=100), private, save :: target_path ! *** Hard limit 26 27 28logical, private, save :: in_target_element = .false. 29logical, private, save :: in_pcdata_level = .false. 30 31logical, private, save :: stop_parsing = .false. 32! 33! This global variable determines whether we stop after 34! getting the initial element tag, or after digesting the full node. 35! 36logical, private, save :: full_node = .true. 37 38logical, private, save :: relative_path = .false. 39logical, private, save :: looking_for_current_element 40 41logical, private, save :: attributes_requested 42type(dictionary_t), private, save, pointer :: attributes_recovered 43 44integer, parameter, private :: MAX_PCDATA_SIZE = 65536 45logical, private, save :: pcdata_requested 46character(len=MAX_PCDATA_SIZE), private, & 47 save :: pcdata_recovered !*** Hard 48integer, private, save :: len_pcdata 49integer, private, save :: max_len_pcdata 50 51type(xml_t), pointer, save, private :: xp 52 53CONTAINS !=========================================================== 54 55!---------------------------------------------------- 56! Debugging control 57! 58subroutine enable_debug(sax) 59logical, intent(in), optional :: sax 60 debug_xpath = .true. 61 debug_sax = .false. 62 if (present(sax)) then 63 debug_sax = sax 64 endif 65end subroutine enable_debug 66 67subroutine disable_debug() 68 debug_xpath = .false. 69end subroutine disable_debug 70 71!---------------------------------------------------- 72! Main routines 73!--------------------------------------------------------------------- 74subroutine mark_node(fxml,path,att_name,att_value,attributes,status) 75! 76! Performs a search of a given element (by path, and/or presence of 77! a given attribute and/or value of that attribute), returning optionally 78! the element's attribute dictionary, and leaving the file handle fxml 79! ready to process the rest of the element's contents (child elements 80! and/or pcdata). 81! 82! Side effects: it sets "ancestor_path" to the element's path 83! 84! If the argument "path" is present and evaluates to a relative path (a 85! string not beginning with "/"), the search is interrupted after the end 86! of the "ancestor_element" set by a previous call to "mark_node". 87! If not earlier, the search ends at the end of the file. 88! 89! The status argument, if present, will hold a return value, 90! which will be: 91! 92! 0 on success, 93! negative in case of end-of-file or end-of-ancestor-element, or 94! positive in case of a malfunction 95! 96type(xml_t), intent(inout), target :: fxml 97character(len=*), intent(in), optional :: path 98character(len=*), intent(in), optional :: att_name 99character(len=*), intent(in), optional :: att_value 100type(dictionary_t), intent(out), optional :: attributes 101integer, intent(out), optional :: status 102 103 104character(len=200) :: ancestor_path ! local variable 105 106 full_node = .false. 107 call process_node(fxml, & 108 path,att_name,att_value, & 109 attributes, & 110 status=status) 111 if (status == 0) then 112 call xml_mark_path(fxml,ancestor_path) 113 if (debug_xpath) print *, "Setting ancestor_path to: ", trim(ancestor_path) 114 endif 115 116end subroutine mark_node 117 118!-------------------------------------------------------------------- 119subroutine get_node(fxml,path,att_name,att_value,attributes,pcdata,status) 120! 121! Performs a search of a given element (by path, and/or presence of 122! a given attribute and/or value of that attribute), returning optionally 123! the element's attribute dictionary and any PCDATA characters contained 124! in the element's scope (but not child elements). It leaves the file handle 125! physically and logically positioned: 126! 127! after the end of the element's start tag if 'pcdata' is not present 128! after the end of the element's end tag if 'pcdata' is present 129! 130! If the argument "path" is present and evaluates to a relative path (a 131! string not beginning with "/"), the search is interrupted after the end 132! of the "ancestor_element" set by a previous call to "mark_node". 133! If not earlier, the search ends at the end of the file. 134! 135! The status argument, if present, will hold a return value, 136! which will be: 137! 138! 0 on success, 139! negative in case of end-of-file or end-of-ancestor-element, or 140! positive in case of a malfunction (such as the overflow of the 141! user's pcdata buffer). 142! 143type(xml_t), intent(inout), target :: fxml 144character(len=*), intent(in), optional :: path 145character(len=*), intent(in), optional :: att_name 146character(len=*), intent(in), optional :: att_value 147type(dictionary_t), intent(out), optional :: attributes 148character(len=*), intent(out), optional, target :: pcdata 149integer, intent(out), optional :: status 150 151 full_node = present(pcdata) 152 call process_node(fxml, & 153 path,att_name,att_value, & 154 attributes,pcdata, & 155 status=status) 156 157end subroutine get_node 158! 159!-------------------------------------------------------------------- 160! Workhorse routines follow 161!-------------------------------------------------------------------- 162subroutine process_node(fxml,path,att_name,att_value, & 163 attributes,pcdata,& 164 status) 165type(xml_t), intent(inout), target :: fxml 166character(len=*), intent(in), optional :: path 167character(len=*), intent(in), optional :: att_name 168character(len=*), intent(in), optional :: att_value 169type(dictionary_t), intent(out), optional :: attributes 170character(len=*), intent(out), optional, target :: pcdata 171integer, intent(out), optional :: status 172 173logical :: path_present, att_name_present, att_value_present 174logical :: attributes_present 175 176character(len=3) :: any_path = "//*" 177character(len=200) :: local_path, ancestor_path ! *** Hard limit 178character(len=500) :: value ! *** Hard limit 179integer :: local_status 180 181type(dictionary_t) :: local_attributes 182 183global_status = 0 ! reset 184 185path_present = present(path) 186attributes_present = present(attributes) 187att_name_present = present(att_name) 188att_value_present = present(att_value) 189 190relative_path = .false. 191 192if (path_present) then 193 if (debug_xpath) print *, "SEARCHING for: ", trim(path) 194 if (path(1:1) /= "/") then 195 ! 196 ! Relative path search 197 ! 198 call xml_path(fxml,local_path) 199 call xml_get_path_mark(fxml,ancestor_path) 200 if (ancestor_path == "") then 201 if (debug_xpath) print *, "Relative search with null ancestor..." 202 endif 203 relative_path = .true. 204 if (debug_xpath) print *, "Relative search. ANCESTOR ELEMENT: ", & 205 trim(ancestor_path) 206 ! 207 ! Convert to absolute path 208 local_path = trim(local_path) // "/" // trim(path) 209 if (debug_xpath) print *, "Converting ", trim(path), & 210 " to absolute path: ", trim(local_path) 211 else 212 local_path = path 213 endif 214else 215 local_path = any_path 216endif 217 218looking_for_current_element = (path == ".") 219! 220! Use local_attributes, since it is in principle possible that 221! the user does not need to get back the attribute list. 222! 223do ! Loop until we satisfy the constraints 224 225 if (debug_xpath) print *, "--> Calling get_path ..." 226 call get_path(fxml,local_path,local_attributes,pcdata,local_status) 227 if (debug_xpath) print *, "-->Status after get_path: ", local_status 228 if (local_status /= 0) EXIT 229 230 if (debug_xpath) print *, "FOUND path matching: ", trim(local_path) 231 232 if (att_name_present) then 233 if (debug_xpath) print *, "Checking ", trim(att_name), " among ", & 234 number_of_entries(local_attributes), " entries:" 235 if (debug_xpath) call print_dict(local_attributes) 236 237 if (has_key(local_attributes,att_name)) then 238 239 if (att_value_present) then 240 call get_value(local_attributes,att_name,value,local_status) 241 if (local_status /= 0) then 242 if (debug_xpath) print *, "Failed to get value of att: ", & 243 trim(att_name) 244 EXIT 245 endif 246 247 if (att_value == value) then 248 local_status = 0 249 if (debug_xpath) print *, "Got correct att name and value " 250 EXIT 251 else 252 if (debug_xpath) print *, "att value: ", trim(value), & 253 " does not match" 254 cycle ! We keep searching 255 endif 256 else ! Found att_name, and no value required 257 local_status = 0 258 if (debug_xpath) print *, "Got correct att name" 259 EXIT 260 endif 261 else ! Did not find that attribute name 262 if (debug_xpath) print *, "Att name not present" 263 cycle ! keep searching 264 endif 265 else ! Found path, and no att info required 266 local_status = 0 267 if (debug_xpath) print *, "Found correct path. No other reqs." 268 EXIT 269 endif 270 271enddo 272 273if (present(status)) then 274 status = local_status 275 if (debug_xpath) print *, "--Returning status: ", status 276endif 277 278if (attributes_present) then 279 attributes = local_attributes 280endif 281 282end subroutine process_node 283 284!-------------------------------------------------------------------- 285subroutine get_path(fxml,path,attributes,pcdata,status) 286type(xml_t), intent(inout), target :: fxml 287character(len=*), intent(in) :: path 288type(dictionary_t), intent(out), optional, target :: attributes 289character(len=*), intent(out), optional, target :: pcdata 290integer, intent(out), optional :: status 291 292logical :: status_present 293 294xp => fxml 295 296path_required = path 297status_present = present(status) 298pcdata_requested = (present(pcdata)) 299 300attributes_requested = (present(attributes)) 301if (attributes_requested) then 302 call reset_dict(attributes) 303 attributes_recovered => attributes 304endif 305 306if (pcdata_requested) then 307! 308! Make sure we do not overstep the bounds of the supplied argument 309! 310 max_len_pcdata = min(len(pcdata),MAX_PCDATA_SIZE) 311 len_pcdata = 0 312 pcdata_recovered(1:max_len_pcdata) = "" 313 if (debug_xpath) print *, "Max length of pcdata store: ", max_len_pcdata 314endif 315 316if (looking_for_current_element) then 317 if (debug_xpath) print *, "Returning info about current element" 318 319 ! We are now in the desired element, and we have the name and 320 ! attribute list saved in xp. 321 ! 322 if (attributes_requested) call xml_attributes(xp,attributes_recovered) 323 ! 324 if (pcdata_requested) then 325 ! 326 ! Set things up so that we can get the pcdata 327 ! 328 call xml_path(xp,target_path) 329 in_target_element = .true. 330 in_pcdata_level = .true. 331 else 332 if (status_present) status = 0 333 RETURN ! We are done 334 endif 335else 336 target_path = "" 337 in_target_element = .false. 338 in_pcdata_level = .false. 339endif 340 341stop_parsing = .false. 342 343call xml_parse(fxml, & 344 begin_element_handler = begin_element , & 345 end_element_handler = end_element, & 346 pcdata_chunk_handler = pcdata_handler, & 347 verbose = debug_sax, signal_handler=pause_parsing, & 348 empty_element_handler = empty_element) 349 350if (eof_xmlfile(fxml)) then 351 global_status = END_OF_FILE 352 if (debug_xpath) print *, "Found end of file" 353 if (pcdata_requested) pcdata = "" 354else if (global_status == END_OF_ANCESTOR_ELEMENT) then 355 if (debug_xpath) print *, "Found end of ancestor element" 356 if (pcdata_requested) pcdata = "" 357else 358 if (debug_xpath) print *, "Parser found candidate element" 359 if (pcdata_requested) then 360 pcdata = pcdata_recovered(1:len_pcdata) 361 if (debug_xpath) print *, "PCDATA recovered: ", pcdata_recovered(1:len_pcdata) 362 endif 363endif 364if (global_status > 0) then 365 if (debug_xpath) print *, "Something went slightly wrong. Status > 0" 366endif 367! 368if (present(status)) status = global_status 369 370 371end subroutine get_path 372 373!================================================================== 374subroutine begin_element(name,attributes) 375character(len=*), intent(in) :: name 376type(dictionary_t), intent(in) :: attributes 377 378character(len=1000) :: path ! *** Hard limit 379 380call xml_path(xp,path) 381if (debug_xpath) print *, " begin_element ::: PATH: " , trim(path) 382if (debug_xpath) print *, "path: ", trim(path), " req: ", trim(path_required) 383if (match(path,path_required)) then 384 if (debug_xpath) print *, " Match path: " , trim(path) 385 in_target_element = .true. 386 target_path = path 387 in_pcdata_level = .true. 388 if (debug_xpath) print *, "In element name: " , name 389 if (attributes_requested) attributes_recovered = attributes 390 ! stop parsing 391 if (debug_xpath) print *, "full_node: ", full_node 392 if (.not. full_node) then 393 if (debug_xpath) print *, "Stopping parsing after initial tag" 394 stop_parsing = .true. 395 endif 396else 397 ! 398 ! If we are at the pcdata level and we enter another element, 399 ! we must not read pcdata 400 ! 401 if (in_pcdata_level) in_pcdata_level = .false. 402endif 403 404end subroutine begin_element 405!------------------------------------------------------------ 406subroutine end_element(name) 407character(len=*), intent(in) :: name 408 409character(len=300) :: path ! *** Hard limit 410character(len=300) :: left_path ! *** Hard limit 411character(len=300) :: ancestor_path ! *** Hard limit 412! 413 414call xml_path(xp,path) ! path *after* leaving element 415left_path = trim(path) // "/" // trim(name) 416 417if (in_target_element) then 418 if (path == target_path) then 419 ! 420 ! We are back to pcdata level after visiting child elements 421 ! 422 in_pcdata_level = .true. 423 424 else if (left_path == target_path) then 425 426 ! We stop the parsing at the end of the element 427 ! 428 if (debug_xpath) print *, "Exiting target element: ", trim(target_path) 429 in_target_element = .false. 430 in_pcdata_level = .false. 431 if (debug_xpath) print *, "Stopping parsing after end of target element" 432 stop_parsing = .true. 433 endif 434 435else if (relative_path) then 436 ! 437 ! Check in case we go out of ancestor element 438 ! 439 call xml_get_path_mark(xp,ancestor_path) 440 if (match(left_path,ancestor_path)) then 441 ! 442 ! We are leaving the ancestor element 443 ! 444 if (debug_xpath) print *, "Relative search. End of element: ", name 445 if (debug_xpath) print *, "Leaving Path: ", trim(left_path) 446 if (debug_xpath) print *, "Ancestor Path: ", trim(ancestor_path) 447 if (debug_xpath) print *, "Stopping parsing after end of ancestor element" 448 449 stop_parsing = .true. 450 global_status = END_OF_ANCESTOR_ELEMENT 451 endif 452endif 453 454end subroutine end_element 455 456!------------------------------------------------------------ 457subroutine empty_element(name,attributes) 458character(len=*), intent(in) :: name 459type(dictionary_t), intent(in) :: attributes 460 461character(len=300) :: path ! *** Hard limit 462 463call xml_path(xp,path) 464if (debug_xpath) print *, " empty_element ::: PATH: " , trim(path) 465if (debug_xpath) print *, "path: ", trim(path), " req: ", trim(path_required) 466if (match(path,path_required)) then 467 if (debug_xpath) print *, " Match path: " , trim(path) 468 if (debug_xpath) print *, "In (empty) element name: " , name 469 if (attributes_requested) attributes_recovered = attributes 470 ! stop parsing 471 stop_parsing = .true. 472 if (debug_xpath) print *, "Stopping parsing after empty tag" 473 if (debug_xpath) print *, "full_node: ", full_node 474 if (full_node) then 475 if (debug_xpath) print *, "*Warning: full_node requested, empty tag found" 476 endif 477endif 478! 479! There is no logic for ancestor element handling, as by definition 480! an emtpy element cannot have children. 481! 482end subroutine empty_element 483 484!----------------------------------------------------------- 485subroutine pcdata_handler(chunk) 486character(len=*), intent(in) :: chunk 487 488integer :: len_chunk 489 490if (in_pcdata_level) then 491 ! 492 ! Build pcdata_recovered chunk by chunk, until it overflows 493 ! 494 if (pcdata_requested) then 495 if (debug_xpath) print *, "Found chunk of pcdata: ", chunk 496 len_chunk = len(chunk) 497 if ((len_pcdata + len_chunk) > max_len_pcdata) then 498 ! 499 if (debug_xpath) print *, "***Pcdata Overflow " 500 global_status = PCDATA_OVERFLOW 501 stop_parsing = .true. 502 return 503 endif 504 pcdata_recovered(len_pcdata+1:len_pcdata+len_chunk) = chunk 505 len_pcdata = len_pcdata + len_chunk 506 endif 507endif 508 509end subroutine pcdata_handler 510!-------------------------------------------------------------------- 511 512subroutine pause_parsing(res) 513logical, intent(out) :: res 514 515res = stop_parsing 516 517end subroutine pause_parsing 518!-------------------------------------------------------------------- 519 520recursive function match(p,ptarget) result(res_match) 521character(len=*), intent(in) :: p 522character(len=*), intent(in) :: ptarget 523logical :: res_match 524 525! 526! Checks whether a given XML path matches the target path ptarget 527! Only absolute paths are considered. 528! 529! Examples of target paths: 530! 531! /pseudo/vps/radfunc [1] 532! //radfunc/data 533! //data 534! //*/vps/data 535! //job//data 536! //* 537! 538integer :: len_target, len_path, pos_target, pos_path 539character(len=100) :: anchor_leaf ! *** Hard limit 540 541res_match = .false. 542 543if (trim(p) == trim(ptarget)) then 544 res_match = .true. 545 return 546 547else if (ptarget == "/") then 548 ! We process // in the middle below 549 550 res_match = .true. 551 return 552 553else ! We get the extreme elements 554 555 len_target = len_trim(ptarget) 556 len_path = len_trim(p) 557 pos_target = index(ptarget,"/",back=.true.) 558 pos_path = index(p,"/",back=.true.) 559 560 if (pos_target == len_target) then ! // in the middle... 561 ! Get leaf further up 562 search_anchor : do 563 pos_target = index(ptarget(1:len_target-1),"/",back=.true.) 564 if (pos_target == 1) then ! Target begins by /.// 565 res_match = .true. 566 return 567 endif 568 anchor_leaf = ptarget(pos_target:len_target-1) 569 if (anchor_leaf == "/.") then ! keep searching 570 len_target = pos_target 571 cycle search_anchor 572 else 573 exit search_anchor 574 endif 575 enddo search_anchor 576 577 ! Note that the anchor includes the leading / 578 ! Now we search for that anchor in the candidate path 579 ! 580 pos_path = index(p(1:len_path),trim(anchor_leaf),back=.true.) 581 if (pos_path /= 0) then 582 583 ! Found anchor. Continue further up. 584 ! 585 res_match = match(p(1:pos_path-1),ptarget(1:pos_target-1)) 586 endif 587 588 else if (ptarget(pos_target+1:len_target) == ".") then 589 590 ! A dot is a dummy. Continue further up in target path. 591 ! 592 res_match = match(p(1:len_path),ptarget(1:pos_target-1)) 593 594 else if (ptarget(pos_target+1:len_target) == "*") then 595 596 if (len_path == pos_path) then 597 RETURN ! empty path element 598 endif 599 600 ! A star matches any non-empty leaf. Continue further up. 601 ! 602 res_match = match(p(1:pos_path-1),ptarget(1:pos_target-1)) 603 604 else if (p(pos_path+1:len_path) == & 605 ptarget(pos_target+1:len_target)) then 606 607 ! Leafs are equal. Continue further up. 608 ! 609 res_match = match(p(1:pos_path-1),ptarget(1:pos_target-1)) 610 611 endif 612 613endif 614 615end function match 616 617end module m_xpath_api 618 619 620 621 622 623 624 625 626 627 628 629 630