1module m_xml_parser
2
3!
4! Basic module to parse XML in the SAX spirit.
5!
6
7use m_buffer
8use m_reader
9use m_fsm
10use m_dictionary
11use m_debug
12use m_xml_error
13use m_elstack          ! For element nesting checks
14!
15private
16
17!
18!  XML file handle
19!
20type, public :: xml_t
21private
22      type(file_buffer_t)  :: fb
23      type(fsm_t)          :: fx
24      character(len=200)   :: path_mark
25end type xml_t
26
27!
28public :: xml_parse
29public :: open_xmlfile, close_xmlfile
30public :: endfile_xmlfile, rewind_xmlfile
31public :: eof_xmlfile, sync_xmlfile
32public :: xml_char_count
33public :: xml_path, xml_mark_path, xml_get_path_mark
34public :: xml_name, xml_attributes
35
36CONTAINS  !=============================================================
37
38subroutine open_xmlfile(fname,fxml,iostat,record_size)
39  character(len=*), intent(in)      :: fname
40  integer, intent(out)              :: iostat
41  type(xml_t), intent(out)          :: fxml
42  integer, intent(in), optional     :: record_size
43
44  call open_file(fname,fxml%fb,iostat,record_size)
45  call init_fsm(fxml%fx)
46  fxml%path_mark = ""
47
48end subroutine open_xmlfile
49!-------------------------------------------------------------------------
50
51subroutine rewind_xmlfile(fxml)
52  type(xml_t), intent(inout) :: fxml
53
54  call rewind_file(fxml%fb)
55  call reset_fsm(fxml%fx)
56  fxml%path_mark = ""
57
58end subroutine rewind_xmlfile
59
60!-----------------------------------------
61subroutine endfile_xmlfile(fxml)
62  type(xml_t), intent(inout) :: fxml
63
64  call mark_eof_file(fxml%fb)
65
66end subroutine endfile_xmlfile
67
68!-----------------------------------------
69subroutine close_xmlfile(fxml)
70  type(xml_t), intent(inout) :: fxml
71
72  call close_file_buffer(fxml%fb)
73  call reset_fsm(fxml%fx)         ! just in case
74  fxml%path_mark = ""             ! ""
75
76end subroutine close_xmlfile
77
78!-----------------------------------------
79subroutine sync_xmlfile(fxml,iostat)
80  type(xml_t), intent(inout) :: fxml
81  integer, intent(out)       :: iostat
82
83  call sync_file(fxml%fb,iostat)
84  ! Do not reset fx: that's the whole point of synching.
85
86end subroutine sync_xmlfile
87
88!----------------------------------------------------
89function eof_xmlfile(fxml) result (res)
90  type(xml_t), intent(in)          :: fxml
91  logical                          :: res
92
93  res = eof_file(fxml%fb)
94
95end function eof_xmlfile
96!
97!----------------------------------------------------
98!----------------------------------------------------
99function xml_char_count(fxml) result (nc)
100  type(xml_t), intent(in)          :: fxml
101  integer                          :: nc
102  nc = nchars_processed(fxml%fb)
103end function xml_char_count
104!
105!----------------------------------------------------
106!
107
108recursive subroutine xml_parse(fxml, begin_element_handler,    &
109                           end_element_handler,      &
110                           pcdata_chunk_handler,     &
111                           comment_handler,          &
112                           xml_declaration_handler,  &
113                           cdata_section_handler,    &
114                           sgml_declaration_handler, &
115                           error_handler,            &
116                           signal_handler,           &
117                           verbose,                  &
118                           empty_element_handler,    &
119                           start_document_handler,   &
120                           end_document_handler)
121
122type(xml_t), intent(inout), target  :: fxml
123
124optional                            :: begin_element_handler
125optional                            :: end_element_handler
126optional                            :: pcdata_chunk_handler
127optional                            :: comment_handler
128optional                            :: xml_declaration_handler
129optional                            :: sgml_declaration_handler
130optional                            :: cdata_section_handler
131optional                            :: error_handler
132optional                            :: signal_handler
133logical, intent(in), optional       :: verbose
134optional                            :: empty_element_handler
135optional                            :: start_document_handler
136optional                            :: end_document_handler
137
138interface
139   subroutine begin_element_handler(name,attributes)
140   use m_dictionary
141   character(len=*), intent(in)     :: name
142   type(dictionary_t), intent(in)   :: attributes
143   end subroutine begin_element_handler
144
145   subroutine end_element_handler(name)
146   character(len=*), intent(in)     :: name
147   end subroutine end_element_handler
148
149   subroutine pcdata_chunk_handler(chunk)
150   character(len=*), intent(in) :: chunk
151   end subroutine pcdata_chunk_handler
152
153   subroutine comment_handler(comment)
154   character(len=*), intent(in) :: comment
155   end subroutine comment_handler
156
157   subroutine xml_declaration_handler(name,attributes)
158   use m_dictionary
159   character(len=*), intent(in)     :: name
160   type(dictionary_t), intent(in)   :: attributes
161   end subroutine xml_declaration_handler
162
163   subroutine sgml_declaration_handler(sgml_declaration)
164   character(len=*), intent(in) :: sgml_declaration
165   end subroutine sgml_declaration_handler
166
167   subroutine cdata_section_handler(cdata)
168   character(len=*), intent(in) :: cdata
169   end subroutine cdata_section_handler
170
171   subroutine error_handler(error_info)
172   use m_xml_error
173   type(xml_error_t), intent(in)            :: error_info
174   end subroutine error_handler
175
176   subroutine signal_handler(code)
177   logical, intent(out) :: code
178   end subroutine signal_handler
179
180   subroutine empty_element_handler(name,attributes)
181   use m_dictionary
182   character(len=*), intent(in)     :: name
183   type(dictionary_t), intent(in)   :: attributes
184   end subroutine empty_element_handler
185
186   subroutine start_document_handler()
187   end subroutine start_document_handler
188
189   subroutine end_document_handler()
190   end subroutine end_document_handler
191
192end interface
193
194character(len=1)     :: c
195integer              :: iostat
196
197integer              :: signal
198
199type(buffer_t)       :: name, oldname, dummy
200
201logical              :: have_begin_handler, have_end_handler, &
202                        have_pcdata_handler, have_comment_handler, &
203                        have_xml_declaration_handler, &
204                        have_sgml_declaration_handler, &
205                        have_cdata_section_handler, have_empty_handler, &
206                        have_error_handler, have_signal_handler, &
207                        have_start_document_handler, have_end_document_handler
208
209logical              :: pause_signal
210
211type(xml_error_t)            :: error_info
212type(file_buffer_t), pointer :: fb
213type(fsm_t), pointer         :: fx
214
215have_begin_handler = present(begin_element_handler)
216have_end_handler = present(end_element_handler)
217have_pcdata_handler = present(pcdata_chunk_handler)
218have_comment_handler = present(comment_handler)
219have_xml_declaration_handler = present(xml_declaration_handler)
220have_sgml_declaration_handler = present(sgml_declaration_handler)
221have_cdata_section_handler = present(cdata_section_handler)
222have_error_handler = present(error_handler)
223have_signal_handler = present(signal_handler)
224have_empty_handler = present(empty_element_handler)
225have_start_document_handler = present(start_document_handler)
226have_end_document_handler = present(end_document_handler)
227
228fb => fxml%fb
229fx => fxml%fx
230if (present(verbose)) then
231   debug = verbose                 ! For m_converters
232   fx%debug = verbose              ! job-specific flag
233endif
234
235if (fx%debug) print *, " Entering xml_parse..."
236if (have_start_document_handler) call start_document_handler()
237
238!---------------------------------------------------------------------
239do
240      call get_character(fb,c,iostat)
241
242      if (iostat /= 0) then          ! End of file...
243         if (.not. is_empty(fx%element_stack)) then
244            call build_error_info(error_info, &
245                 "Early end of file.", &
246                 line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE)
247            if (have_error_handler) then
248               call error_handler(error_info)
249            else
250               call default_error_handler(error_info)
251            endif
252         endif
253         if (have_end_document_handler) call end_document_handler()
254         call endfile_xmlfile(fxml)  ! Mark it as eof
255         EXIT
256      endif
257
258      call evolve_fsm(fx,c,signal)
259
260      if (fx%debug) print *, c, " ::: ", trim(fx%action)
261
262      if (signal == END_OF_TAG) then
263         !
264         ! We decide whether we have ended an opening tag or a closing tag
265         !
266         if (fx%context == OPENING_TAG) then
267            name = fx%element_name
268
269            if (fx%debug) print *, "We have found an opening tag"
270            if (fx%root_element_seen) then
271               if (name .equal. fx%root_element_name) then
272                  call build_error_info(error_info, &
273                  "Duplicate root element: " // str(name), &
274                  line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE)
275                  if (have_error_handler) then
276                     call error_handler(error_info)
277                  else
278                     call default_error_handler(error_info)
279                  endif
280               endif
281               if (is_empty(fx%element_stack)) then
282                  call build_error_info(error_info, &
283                  "Opening tag beyond root context: " // str(name), &
284                  line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE)
285                  if (have_error_handler) then
286                     call error_handler(error_info)
287                  else
288                     call default_error_handler(error_info)
289                  endif
290               endif
291            else
292               fx%root_element_name = name
293               fx%root_element_seen = .true.
294            endif
295            call push_elstack(name,fx%element_stack)
296            if (have_begin_handler) &
297                call begin_element_handler(str(name),fx%attributes)
298
299         else if (fx%context == CLOSING_TAG) then
300            name = fx%element_name
301
302            if (fx%debug) print *, "We have found a closing tag"
303            if (is_empty(fx%element_stack)) then
304               call build_error_info(error_info, &
305                  "Nesting error: End tag: " // str(name) //  &
306                  " does not match -- too many end tags", &
307                  line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE)
308               if (have_error_handler) then
309                  call error_handler(error_info)
310               else
311                  call default_error_handler(error_info)
312               endif
313            else
314               call get_top_elstack(fx%element_stack,oldname)
315               if (oldname .equal. name) then
316                  call pop_elstack(fx%element_stack,oldname)
317                  if (have_end_handler) call end_element_handler(str(name))
318!!                  call pop_elstack(fx%element_stack,oldname)
319               else
320                  call build_error_info(error_info, &
321                       "Nesting error: End tag: " // str(name) //  &
322                       ". Expecting end of : " // str(oldname), &
323                       line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE)
324                  if (have_error_handler) then
325                     call error_handler(error_info)
326                  else
327                     call default_error_handler(error_info)
328                  endif
329               endif
330            endif
331         else if (fx%context == SINGLE_TAG) then
332            name = fx%element_name
333
334            if (fx%debug) print *, "We have found a single (empty) tag: ", &
335                                str(name)
336            !
337            ! Push name on to stack to reveal true xpath
338            !
339            call push_elstack(name,fx%element_stack)
340            if (have_empty_handler) then
341               if (fx%debug) print *, "--> calling empty_element_handler."
342               call empty_element_handler(str(name),fx%attributes)
343               call pop_elstack(fx%element_stack,dummy)
344            else
345               if (have_begin_handler) then
346                  if (fx%debug) print *, "--> calling begin_element_handler..."
347                  call begin_element_handler(str(name),fx%attributes)
348               endif
349               call pop_elstack(fx%element_stack,dummy)
350               if (have_end_handler) then
351                  if (fx%debug) print *, "--> ... and end_element_handler."
352                  call end_element_handler(str(name))
353               endif
354            endif
355!!            call pop_elstack(fx%element_stack,dummy)
356
357         else if (fx%context == CDATA_SECTION_TAG) then
358
359            if (fx%debug) print *, "We found a CDATA section"
360            if (is_empty(fx%element_stack)) then
361               if (fx%debug) print *, &
362                   "... Warning: CDATA section outside element context"
363            else
364               if (have_cdata_section_handler) then
365                  call cdata_section_handler(str(fx%pcdata))
366               else
367                  if (have_pcdata_handler) &
368                   call pcdata_chunk_handler(str(fx%pcdata))
369               endif
370            endif
371
372         else if (fx%context == COMMENT_TAG) then
373
374            if (fx%debug) print *, "We found a comment tag"
375            if (have_comment_handler)  &
376                 call comment_handler(str(fx%pcdata))
377
378         else if (fx%context == SGML_DECLARATION_TAG) then
379
380            if (fx%debug) print *, "We found an sgml declaration"
381            if (have_sgml_declaration_handler)  &
382                      call sgml_declaration_handler(str(fx%pcdata))
383
384         else if (fx%context == XML_DECLARATION_TAG) then
385
386            if (fx%debug) print *, "We found an XML declaration"
387            name = fx%element_name
388            if (have_xml_declaration_handler)  &
389                      call xml_declaration_handler(str(name),fx%attributes)
390
391         else
392
393            ! do nothing
394
395         endif
396
397      else if (signal == CHUNK_OF_PCDATA) then
398
399         if (fx%debug) print *, "We found a chunk of PCDATA"
400         if (is_empty(fx%element_stack)) then
401            if (fx%debug) print *, "... Warning: PCDATA outside element context"
402               ! Just a warning
403               call build_error_info(error_info, &
404                  "PCDATA outside of element context", &
405                  line(fb),column(fb),fx%element_stack,WARNING_CODE)
406               if (have_error_handler) then
407                  call error_handler(error_info)
408               else
409                  call default_error_handler(error_info)
410               endif
411         else
412            if (have_pcdata_handler) &
413                 call pcdata_chunk_handler(str(fx%pcdata))
414         endif
415
416      else if (signal == EXCEPTION) then
417         call build_error_info(error_info, fx%action, &
418                  line(fb),column(fb),fx%element_stack,SEVERE_ERROR_CODE)
419         if (have_error_handler) then
420            call error_handler(error_info)
421         else
422            call default_error_handler(error_info)
423         endif
424      else
425         ! QUIET, do nothing
426      endif
427      if (signal /= QUIET) then
428         if (have_signal_handler) then
429            call signal_handler(pause_signal)
430            if (pause_signal) exit
431         endif
432      endif
433
434enddo
435
436end subroutine xml_parse
437
438!-----------------------------------------
439subroutine xml_path(fxml,path)
440  type(xml_t), intent(in) :: fxml
441  character(len=*), intent(out)  :: path
442
443  call get_elstack_signature(fxml%fx%element_stack,path)
444
445end subroutine xml_path
446
447!-----------------------------------------
448subroutine xml_mark_path(fxml,path)
449  !
450  ! Marks the current path
451  !
452  type(xml_t), intent(inout) :: fxml
453  character(len=*), intent(out)  :: path
454
455  call get_elstack_signature(fxml%fx%element_stack,fxml%path_mark)
456  path = fxml%path_mark
457
458end subroutine xml_mark_path
459
460!-----------------------------------------
461subroutine xml_get_path_mark(fxml,path)
462  !
463  ! Returns the currently markd path (or an empty string if there are no marks)
464  !
465  type(xml_t), intent(in)        :: fxml
466  character(len=*), intent(out)  :: path
467
468  path = fxml%path_mark
469
470end subroutine xml_get_path_mark
471
472!-----------------------------------------
473subroutine xml_name(fxml,name)
474  type(xml_t), intent(in) :: fxml
475  character(len=*), intent(out)  :: name
476
477  name = str(fxml%fx%element_name)
478
479end subroutine xml_name
480!-----------------------------------------
481subroutine xml_attributes(fxml,attributes)
482  type(xml_t), intent(in) :: fxml
483  type(dictionary_t), intent(out)  :: attributes
484
485  attributes = fxml%fx%attributes
486
487end subroutine xml_attributes
488
489end module m_xml_parser
490
491
492
493
494