1module m_sax_fsm
2!
3use m_sax_buffer
4use m_sax_dictionary
5use m_sax_charset
6use m_sax_entities
7use m_sax_elstack
8
9private
10
11type, public :: fsm_t
12      !
13      ! Contains information about the "finite state machine"
14      ! Some of the components (marked *) could at this point be made into
15      ! saved module variables.
16      !
17      !
18      integer              :: state
19      integer              :: context
20      integer              :: nbrackets             !*
21      integer              :: nlts                  !*
22      character(len=1)     :: quote_char            !*
23      type(buffer_t)       :: buffer                !*
24      type(buffer_t)       :: tmpbuf                !*
25      type(buffer_t)       :: element_name
26      type(dictionary_t)   :: attributes
27      type(buffer_t)       :: pcdata
28      logical              :: entities_in_pcdata
29      logical              :: entities_in_attributes
30      type(elstack_t)      :: element_stack
31      logical              :: root_element_seen
32      type(buffer_t)       :: root_element_name
33      character(len=150)   :: action
34      logical              :: debug
35end type fsm_t
36
37public :: init_fsm, reset_fsm, evolve_fsm
38
39!
40! State parameters
41!
42integer, parameter, public   ::  ERROR = -1
43integer, parameter, public   ::  INIT = 1
44integer, parameter, private  ::  START_TAG_MARKER = 2
45integer, parameter, private  ::  END_TAG_MARKER = 3
46integer, parameter, private  ::  IN_NAME = 4
47integer, parameter, private  ::  WHITESPACE_IN_TAG = 5
48integer, parameter, private  ::  IN_PCDATA = 6
49integer, parameter, private  ::  SINGLETAG_MARKER = 7
50integer, parameter, private  ::  CLOSINGTAG_MARKER = 8
51integer, parameter, private  ::  IN_COMMENT = 9
52integer, parameter, private  ::  IN_ATT_NAME = 10
53integer, parameter, private  ::  IN_ATT_VALUE = 11
54integer, parameter, private  ::  EQUAL = 12
55integer, parameter, private  ::  SPACE_AFTER_EQUAL = 13
56integer, parameter, private  ::  SPACE_BEFORE_EQUAL = 14
57integer, parameter, private  ::  START_QUOTE = 15
58integer, parameter, private  ::  END_QUOTE = 16
59integer, parameter, private  ::  BANG = 17
60integer, parameter, private  ::  BANG_HYPHEN = 18
61integer, parameter, private  ::  ONE_HYPHEN = 19
62integer, parameter, private  ::  TWO_HYPHEN = 20
63integer, parameter, private  ::  QUESTION_MARK = 21
64integer, parameter, private  ::  START_XML_DECLARATION = 22
65integer, parameter, private  ::  IN_SGML_DECLARATION = 23
66integer, parameter, private  ::  IN_CDATA_SECTION = 24
67integer, parameter, private  ::  ONE_BRACKET = 25
68integer, parameter, private  ::  TWO_BRACKET = 26
69integer, parameter, private  ::  CDATA_PREAMBLE = 27
70integer, parameter, private  ::  IN_PCDATA_AT_EOL = 30
71!
72! Context parameters
73!
74integer, parameter, public   ::  OPENING_TAG  = 100
75integer, parameter, public   ::  CLOSING_TAG  = 110
76integer, parameter, public   ::  SINGLE_TAG   = 120
77integer, parameter, public   ::  COMMENT_TAG  = 130
78integer, parameter, public   ::  XML_DECLARATION_TAG  = 140
79integer, parameter, public   ::  SGML_DECLARATION_TAG  = 150
80integer, parameter, public   ::  CDATA_SECTION_TAG  = 160
81integer, parameter, public   ::  NULL_CONTEXT          = 200
82!
83! Signal parameters
84!
85integer, parameter, public   ::  QUIET             = 1000
86integer, parameter, public   ::  END_OF_TAG        = 1100
87integer, parameter, public   ::  CHUNK_OF_PCDATA   = 1200
88integer, parameter, public   ::  CHUNK_OF_CDATA_SECTION  = 1300
89integer, parameter, public   ::  EXCEPTION         = 1500
90
91CONTAINS
92
93!------------------------------------------------------------
94! Initialize once and for all the derived types (Fortran90 restriction)
95!
96subroutine init_fsm(fx)
97type(fsm_t), intent(inout)   :: fx
98
99 fx%state = INIT
100 call setup_xml_charsets()
101 fx%context = NULL_CONTEXT
102 call init_elstack(fx%element_stack)
103 fx%root_element_seen = .false.
104 fx%debug = .false.
105 fx%entities_in_pcdata = .false.
106 fx%entities_in_attributes = .false.
107 fx%action = ""
108 call init_buffer(fx%buffer)
109 call init_buffer(fx%element_name)
110 call init_buffer(fx%pcdata)
111 call init_buffer(fx%root_element_name)
112 call init_dict(fx%attributes)
113end subroutine init_fsm
114!------------------------------------------------------------
115subroutine reset_fsm(fx)
116type(fsm_t), intent(inout)   :: fx
117
118 fx%state = INIT
119 call setup_xml_charsets()
120 fx%context = NULL_CONTEXT
121 call reset_elstack(fx%element_stack)
122 fx%action = ""
123 fx%root_element_seen = .false.
124 call reset_buffer(fx%buffer)
125 call reset_buffer(fx%element_name)
126 call reset_buffer(fx%pcdata)
127 call reset_buffer(fx%root_element_name)
128 call reset_dict(fx%attributes)
129end subroutine reset_fsm
130
131!------------------------------------------------------------
132subroutine evolve_fsm(fx,c,signal)
133!
134! Finite-state machine evolution rules for XML parsing.
135!
136type(fsm_t), intent(inout)      :: fx    ! Internal state
137character(len=1), intent(in)    :: c
138integer, intent(out)            :: signal
139
140!
141! Reset signal
142!
143signal = QUIET
144!
145
146if (.not. (c .in. valid_chars)) then
147!
148!      Let it pass (in case the underlying encoding is UTF-8)
149!      But this chars in a name will cause havoc
150!
151!      signal = EXCEPTION
152!      fx%state = ERROR
153!      fx%action = trim("Not a valid character in simple encoding: "//c)
154!      RETURN
155endif
156
157select case(fx%state)
158
159 case (INIT)
160      if (c == "<") then
161         fx%state = START_TAG_MARKER
162         if (fx%debug) fx%action = ("Starting tag")
163      else if (c == ">") then
164         fx%state = ERROR
165         fx%action = ("Ending tag without being in one!")
166      else
167         if (fx%debug) fx%action = ("Reading garbage chars")
168      endif
169
170 case (START_TAG_MARKER)
171      if (c == ">") then
172         fx%state = ERROR
173         fx%action = ("Tag empty!")
174      else if (c == "<") then
175         fx%state = ERROR
176         fx%action = ("Double opening of tag!!")
177      else if (c == "/") then
178         fx%state = CLOSINGTAG_MARKER
179         if (fx%debug) fx%action = ("Starting endtag: ")
180         fx%context = CLOSING_TAG
181      else if (c == "?") then
182         fx%state = START_XML_DECLARATION
183         if (fx%debug) fx%action = ("Starting XML declaration ")
184         fx%context = XML_DECLARATION_TAG
185      else if (c == "!") then
186         fx%state = BANG
187         if (fx%debug) fx%action = ("Saw ! -- comment or SGML declaration expected...")
188      else if (c .in. whitespace) then
189         fx%state = ERROR
190         fx%action = ("Cannot have whitespace after <")
191      else if (c .in. initial_name_chars) then
192         fx%context = OPENING_TAG
193         fx%state = IN_NAME
194         call add_to_buffer(c,fx%buffer)
195         if (fx%debug) fx%action = ("Starting to read name in tag")
196      else
197         fx%state = ERROR
198         fx%action = ("Illegal initial character for name")
199      endif
200
201
202 case (BANG)
203      if (c == "-") then
204         fx%state = BANG_HYPHEN
205         if (fx%debug) fx%action = ("Almost ready to start comment ")
206      else if (c .in. uppercase_chars) then
207         fx%state = IN_SGML_DECLARATION
208         fx%nlts = 0
209         fx%nbrackets = 0
210         if (fx%debug) fx%action = ("SGML declaration ")
211         fx%context = SGML_DECLARATION_TAG
212         call add_to_buffer(c,fx%buffer)
213      else if (c == "[") then
214         fx%state = CDATA_PREAMBLE
215         if (fx%debug) fx%action = ("Declaration with [ ")
216         fx%context = CDATA_SECTION_TAG
217      else
218         fx%state = ERROR
219         fx%action = ("Wrong character after ! ")
220      endif
221
222 case (CDATA_PREAMBLE)
223      ! We assume a CDATA[ is forthcoming, we do not check
224      if (c == "[") then
225         fx%state = IN_CDATA_SECTION
226         if (fx%debug) fx%action = ("About to start reading CDATA contents")
227      else if (c == "]") then
228         fx%state = ERROR
229         fx%action = ("Unexpected ] in CDATA preamble")
230      else
231         if (fx%debug) fx%action = ("Reading CDATA preamble")
232      endif
233
234 case (IN_CDATA_SECTION)
235      if (c == "]") then
236         fx%state = ONE_BRACKET
237         if (fx%debug) fx%action = ("Saw a ] in CDATA section")
238      else
239         ! Check whether we are close to the end of the buffer.
240         ! If so, make a chunk and reset the buffer
241         ! Note that we do not split in whitespace (there might not be)
242         call add_to_buffer(c,fx%buffer)
243         if (buffer_nearly_full(fx%buffer)) then
244               signal = CHUNK_OF_CDATA_SECTION
245               if (fx%debug) fx%action = ("Resetting almost full CDATA buffer")
246               fx%pcdata = fx%buffer
247               call reset_buffer(fx%buffer)
248         else
249            if (fx%debug) fx%action = ("Reading contents of CDATA section")
250         endif
251      endif
252
253 case (ONE_BRACKET)
254      if (c == "]") then
255         fx%state = TWO_BRACKET
256         if (fx%debug) fx%action = ("Maybe finish a CDATA section")
257      else
258         fx%state = IN_CDATA_SECTION
259         call add_to_buffer("]",fx%buffer)
260         if (fx%debug) fx%action = ("Continue reading contents of CDATA section")
261      endif
262
263 case (TWO_BRACKET)
264      if (c == ">") then
265         fx%state = END_TAG_MARKER
266         signal = CHUNK_OF_CDATA_SECTION
267         if (fx%debug) fx%action = ("End of CDATA section")
268         fx%pcdata = fx%buffer    ! Not quite the same behavior
269                                  ! as pcdata... (not filtered)
270         call reset_buffer(fx%buffer)
271      else
272         fx%state = IN_CDATA_SECTION
273         call add_to_buffer("]",fx%buffer)
274         if (fx%debug) fx%action = ("Continue reading contents of CDATA section")
275      endif
276
277 case (IN_SGML_DECLARATION)
278      if (c == "<") then
279         fx%nlts = fx%nlts + 1
280         call add_to_buffer("<",fx%buffer)
281         fx%action = "Read an intermediate < in SGML declaration"
282      else if (c == "[") then
283         fx%nbrackets = fx%nbrackets + 1
284         call add_to_buffer("[",fx%buffer)
285         fx%action = "Read a [ in SGML declaration"
286      else if (c == "]") then
287         fx%nbrackets = fx%nbrackets - 1
288         call add_to_buffer("]",fx%buffer)
289         fx%action = "Read a ] in SGML declaration"
290      else if (c == ">") then
291         if (fx%nlts == 0) then
292            if (fx%nbrackets == 0) then
293               fx%state = END_TAG_MARKER
294               signal  = END_OF_TAG
295               if (fx%debug) fx%action = ("Ending SGML declaration tag")
296               fx%pcdata = fx%buffer       ! Same behavior as pcdata
297               call reset_buffer(fx%buffer)
298            else
299               fx%state = ERROR
300               fx%action = ("Unmatched ] in SGML declaration")
301            endif
302         else
303            fx%nlts = fx%nlts -1
304            call add_to_buffer(">",fx%buffer)
305            fx%action = "Read an intermediate > in SGML declaration"
306         endif
307      else
308         if (fx%debug) fx%action = ("Keep reading SGML declaration")
309         call add_to_buffer(c,fx%buffer)
310      endif
311
312 case (BANG_HYPHEN)
313      if (c == "-") then
314         fx%state = IN_COMMENT
315         fx%context = COMMENT_TAG
316         if (fx%debug) fx%action = ("In comment ")
317      else
318         fx%state = ERROR
319         fx%action = ("Wrong character after <!- ")
320      endif
321
322 case (START_XML_DECLARATION)
323      if (c .in. initial_name_chars) then
324         fx%state = IN_NAME
325         call add_to_buffer(c,fx%buffer)
326         if (fx%debug) fx%action = ("Starting to read name in XML declaration")
327      else
328         fx%state = ERROR
329         fx%action = "Wrong character after ? in start of XML declaration"
330      endif
331
332 case (CLOSINGTAG_MARKER)
333      if (c == ">") then
334         fx%state = ERROR
335         fx%action = ("Closing tag empty!")
336      else if (c == "<") then
337         fx%state = ERROR
338         fx%action = ("Double opening of closing tag!!")
339      else if (c == "/") then
340         fx%state = ERROR
341         fx%action = ("Syntax error (<//)")
342      else if (c .in. whitespace) then
343         fx%state = ERROR
344         fx%action = ("Cannot have whitespace after </")
345      else if (c .in. initial_name_chars) then
346         fx%state = IN_NAME
347         if (fx%debug) fx%action = ("Starting to read name inside endtag")
348         call add_to_buffer(c,fx%buffer)
349      else
350         fx%state = ERROR
351         fx%action = ("Illegal initial character for name")
352      endif
353
354 case (IN_NAME)
355      if (c == "<") then
356         fx%state = ERROR
357         fx%action = ("Starting tag within tag")
358      else if (c == ">") then
359         fx%state = END_TAG_MARKER
360         signal  = END_OF_TAG
361         if (fx%debug) fx%action = ("Ending tag")
362!         call set_element_name(fx%buffer,fx%element)
363         fx%element_name = fx%buffer
364         call reset_buffer(fx%buffer)
365         call reset_dict(fx%attributes)
366      else if (c == "/") then
367         if (fx%context /= OPENING_TAG) then
368            fx%state = ERROR
369            fx%action = ("Single tag did not open as start tag")
370         else
371            fx%state = SINGLETAG_MARKER
372            fx%context = SINGLE_TAG
373            if (fx%debug) fx%action = ("Almost ending single tag")
374!            call set_element_name(fx%buffer,fx%element)
375            fx%element_name = fx%buffer
376            call reset_buffer(fx%buffer)
377            call reset_dict(fx%attributes)
378         endif
379      else if (c .in. whitespace) then
380         fx%state = WHITESPACE_IN_TAG
381         if (fx%debug) fx%action = ("Ending name chars")
382!            call set_element_name(fx%buffer,fx%element)
383         fx%element_name = fx%buffer
384         call reset_buffer(fx%buffer)
385         call reset_dict(fx%attributes)
386      else if (c .in. name_chars) then
387         if (fx%debug) fx%action = ("Reading name chars in tag")
388         call add_to_buffer(c,fx%buffer)
389      else
390         fx%state = ERROR
391         fx%action = ("Illegal character for name")
392      endif
393
394 case (IN_ATT_NAME)
395      if (c == "<") then
396         fx%state = ERROR
397         fx%action = ("Starting tag within tag")
398      else if (c == ">") then
399         fx%state = ERROR
400         fx%action = ("Ending tag in the middle of an attribute")
401      else if (c == "/") then
402         fx%state = ERROR
403         fx%action = ("Ending tag in the middle of an attribute")
404      else if (c .in. whitespace) then
405         fx%state = SPACE_BEFORE_EQUAL
406         if (fx%debug) fx%action = ("Whitespace after attr. name (specs?)")
407         call add_key_to_dict(fx%buffer,fx%attributes)
408         call reset_buffer(fx%buffer)
409      else if ( c == "=" ) then
410         fx%state = EQUAL
411         if (fx%debug) fx%action = ("End of attr. name")
412         call add_key_to_dict(fx%buffer,fx%attributes)
413         call reset_buffer(fx%buffer)
414      else if (c .in. name_chars) then
415         if (fx%debug) fx%action = ("Reading attribute name chars")
416         call add_to_buffer(c,fx%buffer)
417      else
418         fx%state = ERROR
419         fx%action = ("Illegal character for attribute name")
420      endif
421
422 case (EQUAL)
423      if ( (c == """") .or. (c == "'") ) then
424         fx%state = START_QUOTE
425         if (fx%debug) fx%action = ("Found beginning quote")
426         fx%quote_char = c
427      else if (c .in. whitespace) then
428         fx%state = SPACE_AFTER_EQUAL
429         if (fx%debug) fx%action = ("Whitespace after equal sign...")
430      else
431         fx%state = ERROR
432         fx%action = ("Must use quotes for attribute values")
433      endif
434
435 case (SPACE_BEFORE_EQUAL)
436      if ( c == "=" ) then
437         fx%state = EQUAL
438         if (fx%debug) fx%action = ("Equal sign")
439      else if (c .in. whitespace) then
440         if (fx%debug) fx%action = ("More whitespace before equal sign...")
441      else
442         fx%state = ERROR
443         fx%action = ("Must use equal sign for attribute values")
444      endif
445
446 case (SPACE_AFTER_EQUAL)
447      if ( c == "=" ) then
448         fx%state = ERROR
449         fx%action = ("Duplicate Equal sign")
450      else if (c .in. whitespace) then
451         if (fx%debug) fx%action = ("More whitespace after equal sign...")
452      else  if ( (c == """") .or. (c == "'") ) then
453         fx%state = START_QUOTE
454         fx%quote_char = c
455         if (fx%debug) fx%action = ("Found beginning quote")
456      else
457         fx%state = ERROR
458         fx%action = ("Must use quotes for attribute values")
459      endif
460
461 case (START_QUOTE)
462      if (c == fx%quote_char) then
463         fx%state = END_QUOTE
464         if (fx%debug) fx%action = ("Emtpy attribute value...")
465         if (fx%entities_in_attributes) then
466            call entity_filter(fx%buffer,fx%tmpbuf)
467            fx%entities_in_attributes = .false.
468            call add_value_to_dict(fx%tmpbuf,fx%attributes)
469         else
470            call add_value_to_dict(fx%buffer,fx%attributes)
471         endif
472         call reset_buffer(fx%buffer)
473      else if (c == "<") then
474         fx%state = ERROR
475         fx%action = ("Attribute value cannot contain <")
476      else   ! actually allowed chars in att values... Specs: No "<"
477         fx%state = IN_ATT_VALUE
478         if (fx%debug) fx%action = ("Starting to read attribute value")
479         if (c == "&") fx%entities_in_attributes = .true.
480         call add_to_buffer(c,fx%buffer)
481      endif
482
483 case (IN_ATT_VALUE)
484      if (c == fx%quote_char) then
485         fx%state = END_QUOTE
486         if (fx%debug) fx%action = ("End of attribute value")
487         if (fx%entities_in_attributes) then
488            call entity_filter(fx%buffer,fx%tmpbuf)
489            fx%entities_in_attributes = .false.
490            call add_value_to_dict(fx%tmpbuf,fx%attributes)
491         else
492            call add_value_to_dict(fx%buffer,fx%attributes)
493         endif
494         call reset_buffer(fx%buffer)
495      else if (c == "<") then
496         fx%state = ERROR
497         fx%action = ("Attribute value cannot contain <")
498      else if ( (c == char(10)) ) then
499         fx%state = ERROR
500!
501!        Aparently other whitespace is allowed...
502!
503         fx%action = ("No newline allowed in attr. value (specs?)")
504      else        ! all other chars allowed in attr value
505         if (fx%debug) fx%action = ("Reading attribute value chars")
506         call add_to_buffer(c,fx%buffer)
507         if (c == "&") fx%entities_in_attributes = .true.
508      endif
509
510 case (END_QUOTE)
511      if ((c == """") .or. (c == "'")) then
512         fx%state = ERROR
513         fx%action = ("Duplicate end quote")
514      else if (c .in. whitespace) then
515         fx%state = WHITESPACE_IN_TAG
516         if (fx%debug) fx%action = ("Space in between attributes or to end of tag")
517      else if (c == "<") then
518         fx%state = ERROR
519         fx%action = ("Starting tag within tag")
520      else if (c == ">") then
521         if (fx%context == XML_DECLARATION_TAG) then
522            fx%state = ERROR
523            fx%action = "End of XML declaration without ?"
524         else
525            fx%state = END_TAG_MARKER
526            signal  = END_OF_TAG
527            if (fx%debug) fx%action = ("Ending tag after some attributes")
528         endif
529      else if (c == "/") then
530         if (fx%context /= OPENING_TAG) then
531            fx%state = ERROR
532            fx%action = ("Single tag did not open as start tag")
533         else
534            fx%state = SINGLETAG_MARKER
535            fx%context = SINGLE_TAG
536            if (fx%debug) fx%action = ("Almost ending single tag after some attributes")
537         endif
538      else if (c == "?") then
539         if (fx%context /= XML_DECLARATION_TAG) then
540            fx%state = ERROR
541            fx%action = "Wrong lone ? in tag"
542         else
543            fx%state = QUESTION_MARK
544            if (fx%debug) fx%action = ("About to end XML declaration")
545         endif
546      else
547         fx%state = ERROR
548         fx%action = ("Must have some whitespace after att. value")
549      endif
550
551
552 case (WHITESPACE_IN_TAG)
553      if ( c .in. whitespace) then
554         if (fx%debug) fx%action = ("Reading whitespace in tag")
555      else if (c == "<") then
556         fx%state = ERROR
557         fx%action = ("Starting tag within tag")
558      else if (c == ">") then
559         if (fx%context == XML_DECLARATION_TAG) then
560            fx%state = ERROR
561            fx%action = "End of XML declaration without ?"
562         else
563            fx%state = END_TAG_MARKER
564            signal  = END_OF_TAG
565            if (fx%debug) fx%action = ("End whitespace in tag")
566         endif
567      else if (c == "/") then
568         if (fx%context /= OPENING_TAG) then
569            fx%state = ERROR
570            fx%action = ("Single tag did not open as start tag")
571         else
572            fx%state = SINGLETAG_MARKER
573            fx%context = SINGLE_TAG
574            if (fx%debug) fx%action = ("End whitespace in single tag")
575         endif
576      else if (c .in. initial_name_chars) then
577         fx%state = IN_ATT_NAME
578         if (fx%debug) fx%action = ("Starting Attribute name in tag")
579         call add_to_buffer(c,fx%buffer)
580      else if (c == "?") then
581         if (fx%context /= XML_DECLARATION_TAG) then
582            fx%state = ERROR
583            fx%action = "Wrong lone ? in tag"
584         else
585            fx%state = QUESTION_MARK
586            if (fx%debug) fx%action = ("About to end XML declaration after whitespace")
587         endif
588      else
589         fx%state = ERROR
590         fx%action = ("Illegal initial character for attribute")
591      endif
592
593 case (QUESTION_MARK)
594      if (c == ">") then
595         fx%state = END_TAG_MARKER
596         signal  = END_OF_TAG
597         if (fx%debug) fx%action = ("End of XML declaration tag")
598      else
599         fx%state = ERROR
600         fx%action = "No > after ? in XML declaration tag"
601      endif
602
603 case (IN_COMMENT)
604      !
605      ! End of comment is  "-->", and  ">" can appear inside comments
606      !
607      if (c == "-") then
608         fx%state = ONE_HYPHEN
609         if (fx%debug) fx%action = ("Saw - in Comment")
610      else
611         if (fx%debug) fx%action = ("Reading comment")
612         call add_to_buffer(c,fx%buffer)
613      endif
614
615 case (ONE_HYPHEN)
616      if (c == "-") then
617         fx%state = TWO_HYPHEN
618         if (fx%debug) fx%action = ("About to end comment")
619      else
620         fx%state = IN_COMMENT
621         if (fx%debug) fx%action = ("Keep reading comment after -: ")
622         call add_to_buffer("-",fx%buffer)
623         call add_to_buffer(c,fx%buffer)
624      endif
625
626 case (TWO_HYPHEN)
627      if (c == ">") then
628         fx%state = END_TAG_MARKER
629         signal  = END_OF_TAG
630         if (fx%debug) fx%action = ("End of Comment")
631         fx%pcdata = fx%buffer                  ! Same behavior as pcdata
632         call reset_buffer(fx%buffer)
633      else
634         fx%state = ERROR
635         fx%action = ("Cannot have -- in comment")
636      endif
637
638 case (SINGLETAG_MARKER)
639
640      if (c == ">") then
641         fx%state = END_TAG_MARKER
642         signal  = END_OF_TAG
643         if (fx%debug) fx%action = ("Ending tag")
644         ! We have to call begin_element AND end_element
645      else
646         fx%state = ERROR
647         fx%action = ("Wrong ending of single tag")
648      endif
649
650 case (IN_PCDATA)
651      if (c == "<") then
652         fx%state = START_TAG_MARKER
653         signal = CHUNK_OF_PCDATA
654         if (fx%debug) fx%action = ("End of pcdata -- Starting tag")
655         if (fx%entities_in_pcdata) then
656            call entity_filter(fx%buffer,fx%pcdata)
657            fx%entities_in_pcdata = .false.
658         else
659            fx%pcdata = fx%buffer
660         endif
661         call reset_buffer(fx%buffer)
662      else if (c == ">") then
663         fx%state = ERROR
664         fx%action = ("Ending tag without starting it!")
665      else if  (c == char(10)) then
666         fx%state = IN_PCDATA_AT_EOL
667         signal = CHUNK_OF_PCDATA
668         if (fx%debug) fx%action = ("Resetting PCDATA buffer at newline")
669         call add_to_buffer(c,fx%buffer)
670         if (fx%entities_in_pcdata) then
671            call entity_filter(fx%buffer,fx%pcdata)
672            fx%entities_in_pcdata = .false.
673         else
674            fx%pcdata = fx%buffer
675         endif
676         call reset_buffer(fx%buffer)
677      else
678         call add_to_buffer(c,fx%buffer)
679         if (c=="&") fx%entities_in_pcdata = .true.
680         if (fx%debug) fx%action = ("Reading pcdata")
681         !
682         ! Check whether we are close to the end of the buffer.
683         ! If so, make a chunk and reset the buffer
684         if (c .in. whitespace) then
685            if (buffer_nearly_full(fx%buffer)) then
686               signal = CHUNK_OF_PCDATA
687               if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer")
688               if (fx%entities_in_pcdata) then
689                  call entity_filter(fx%buffer,fx%pcdata)
690                  fx%entities_in_pcdata = .false.
691               else
692                  fx%pcdata = fx%buffer
693               endif
694               call reset_buffer(fx%buffer)
695            endif
696         endif
697      endif
698
699 case (IN_PCDATA_AT_EOL)
700      !
701      ! Avoid triggering an extra pcdata event
702      !
703      if (c == "<") then
704         fx%state = START_TAG_MARKER
705         if (fx%debug) fx%action = ("No more pcdata after eol-- Starting tag")
706      else if (c == ">") then
707         fx%state = ERROR
708         fx%action = ("Ending tag without starting it!")
709      else if  (c == char(10)) then
710         fx%state = IN_PCDATA_AT_EOL
711         signal = CHUNK_OF_PCDATA
712         if (fx%debug) fx%action = ("Resetting PCDATA buffer at repeated newline")
713         call add_to_buffer(c,fx%buffer)
714         if (fx%entities_in_pcdata) then
715            call entity_filter(fx%buffer,fx%pcdata)
716            fx%entities_in_pcdata = .false.
717         else
718            fx%pcdata = fx%buffer
719         endif
720         call reset_buffer(fx%buffer)
721      else
722         fx%state = IN_PCDATA
723         call add_to_buffer(c,fx%buffer)
724         if (c=="&")  fx%entities_in_pcdata = .true.
725         if (fx%debug) fx%action = ("Resuming reading pcdata after EOL")
726         !
727         ! Check whether we are close to the end of the buffer.
728         ! If so, make a chunk and reset the buffer
729         if (c .in. whitespace) then
730            if (buffer_nearly_full(fx%buffer)) then
731               signal = CHUNK_OF_PCDATA
732               if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer")
733               if (fx%entities_in_pcdata) then
734                  call entity_filter(fx%buffer,fx%pcdata)
735                  fx%entities_in_pcdata = .false.
736               else
737                  fx%pcdata = fx%buffer
738               endif
739               call reset_buffer(fx%buffer)
740            endif
741         endif
742      endif
743
744
745
746 case (END_TAG_MARKER)
747!
748      if (c == "<") then
749         fx%state = START_TAG_MARKER
750         if (fx%debug) fx%action = ("Starting tag")
751      else if (c == ">") then
752         fx%state = ERROR
753         fx%action = ("Double ending of tag!")
754!
755!     We should make this whitespace in general (maybe not?
756!     how about indentation in text chunks?)
757!     See specs.
758!
759      else if (c == char(10)) then
760        ! Ignoring LF after end of tag is probably non standard...
761
762         if (fx%debug) &
763            fx%action = ("---------Discarding newline after end of tag")
764
765        !!!  New code for full compliance
766        ! fx%state = IN_PCDATA_AT_EOL
767        ! call add_to_buffer(c,fx%buffer)
768        ! if (fx%debug) &
769        !    fx%action = ("Found LF after end of tag. Emitting PCDATA event")
770        ! signal = CHUNK_OF_PCDATA
771        ! if (fx%entities_in_pcdata) then
772        !    call entity_filter(fx%buffer,fx%pcdata)
773        !    fx%entities_in_pcdata = .false.
774        ! else
775        !    fx%pcdata = fx%buffer
776        ! endif
777        ! call reset_buffer(fx%buffer)
778      else
779         fx%state = IN_PCDATA
780         call add_to_buffer(c,fx%buffer)
781         if (c=="&") fx%entities_in_pcdata = .true.
782         if (fx%debug) fx%action = ("End of Tag. Starting to read PCDATA")
783      endif
784
785 case (ERROR)
786
787      stop "Cannot continue after parsing errors!"
788
789 end select
790
791if (fx%state == ERROR) signal  = EXCEPTION
792
793end subroutine evolve_fsm
794
795end module m_sax_fsm
796
797
798
799
800
801
802
803
804
805
806
807
808
809