1module m_fsm
2!
3use m_buffer
4use m_dictionary
5use m_charset
6use m_entities
7use m_elstack
8
9private
10
11type, public :: fsm_t
12      !
13      ! Contains information about the "finite state machine"
14      ! Some of the components (marked *) could at this point be made into
15      ! saved module variables.
16      !
17      !
18      integer              :: state
19      integer              :: context
20      integer              :: nbrackets             !*
21      integer              :: nlts                  !*
22      character(len=1)     :: quote_char            !*
23      type(buffer_t)       :: buffer                !*
24      type(buffer_t)       :: tmpbuf                !*
25      type(buffer_t)       :: element_name
26      type(dictionary_t)   :: attributes
27      type(buffer_t)       :: pcdata
28      logical              :: entities_in_pcdata
29      logical              :: entities_in_attributes
30      type(elstack_t)      :: element_stack
31      logical              :: root_element_seen
32      type(buffer_t)       :: root_element_name
33      character(len=150)   :: action
34      logical              :: debug
35end type fsm_t
36
37public :: init_fsm, reset_fsm, evolve_fsm
38
39!
40! State parameters
41!
42integer, parameter, public   ::  ERROR = -1
43integer, parameter, public   ::  INIT = 1
44integer, parameter, private  ::  START_TAG_MARKER = 2
45integer, parameter, private  ::  END_TAG_MARKER = 3
46integer, parameter, private  ::  IN_NAME = 4
47integer, parameter, private  ::  WHITESPACE_IN_TAG = 5
48integer, parameter, private  ::  IN_PCDATA = 6
49integer, parameter, private  ::  SINGLETAG_MARKER = 7
50integer, parameter, private  ::  CLOSINGTAG_MARKER = 8
51integer, parameter, private  ::  IN_COMMENT = 9
52integer, parameter, private  ::  IN_ATT_NAME = 10
53integer, parameter, private  ::  IN_ATT_VALUE = 11
54integer, parameter, private  ::  EQUAL = 12
55integer, parameter, private  ::  SPACE_AFTER_EQUAL = 13
56integer, parameter, private  ::  SPACE_BEFORE_EQUAL = 14
57integer, parameter, private  ::  START_QUOTE = 15
58integer, parameter, private  ::  END_QUOTE = 16
59integer, parameter, private  ::  BANG = 17
60integer, parameter, private  ::  BANG_HYPHEN = 18
61integer, parameter, private  ::  ONE_HYPHEN = 19
62integer, parameter, private  ::  TWO_HYPHEN = 20
63integer, parameter, private  ::  QUESTION_MARK = 21
64integer, parameter, private  ::  START_XML_DECLARATION = 22
65integer, parameter, private  ::  IN_SGML_DECLARATION = 23
66integer, parameter, private  ::  IN_CDATA_SECTION = 24
67integer, parameter, private  ::  ONE_BRACKET = 25
68integer, parameter, private  ::  TWO_BRACKET = 26
69integer, parameter, private  ::  CDATA_PREAMBLE = 27
70integer, parameter, private  ::  IN_PCDATA_AT_EOL = 30
71!
72! Context parameters
73!
74integer, parameter, public   ::  OPENING_TAG  = 100
75integer, parameter, public   ::  CLOSING_TAG  = 110
76integer, parameter, public   ::  SINGLE_TAG   = 120
77integer, parameter, public   ::  COMMENT_TAG  = 130
78integer, parameter, public   ::  XML_DECLARATION_TAG  = 140
79integer, parameter, public   ::  SGML_DECLARATION_TAG  = 150
80integer, parameter, public   ::  CDATA_SECTION_TAG  = 160
81integer, parameter, public   ::  NULL_CONTEXT          = 200
82!
83! Signal parameters
84!
85integer, parameter, public   ::  QUIET             = 1000
86integer, parameter, public   ::  END_OF_TAG        = 1100
87integer, parameter, public   ::  CHUNK_OF_PCDATA   = 1200
88integer, parameter, public   ::  EXCEPTION         = 1500
89
90CONTAINS
91
92!------------------------------------------------------------
93! Initialize once and for all the derived types (Fortran90 restriction)
94!
95subroutine init_fsm(fx)
96type(fsm_t), intent(inout)   :: fx
97
98 fx%state = INIT
99 call setup_xml_charsets()
100 fx%context = NULL_CONTEXT
101 call init_elstack(fx%element_stack)
102 fx%root_element_seen = .false.
103 fx%debug = .false.
104 fx%entities_in_pcdata = .false.
105 fx%entities_in_attributes = .false.
106 fx%action = ""
107 call init_buffer(fx%buffer)
108 call init_buffer(fx%element_name)
109 call init_buffer(fx%pcdata)
110 call init_buffer(fx%root_element_name)
111 call init_dict(fx%attributes)
112end subroutine init_fsm
113!------------------------------------------------------------
114subroutine reset_fsm(fx)
115type(fsm_t), intent(inout)   :: fx
116
117 fx%state = INIT
118 call setup_xml_charsets()
119 fx%context = NULL_CONTEXT
120 call reset_elstack(fx%element_stack)
121 fx%action = ""
122 fx%root_element_seen = .false.
123 call reset_buffer(fx%buffer)
124 call reset_buffer(fx%element_name)
125 call reset_buffer(fx%pcdata)
126 call reset_buffer(fx%root_element_name)
127 call reset_dict(fx%attributes)
128end subroutine reset_fsm
129
130!------------------------------------------------------------
131subroutine evolve_fsm(fx,c,signal)
132!
133! Finite-state machine evolution rules for XML parsing.
134!
135type(fsm_t), intent(inout)      :: fx    ! Internal state
136character(len=1), intent(in)    :: c
137integer, intent(out)            :: signal
138
139!
140! Reset signal
141!
142signal = QUIET
143!
144
145if (.not. (c .in. valid_chars)) then
146!
147!      Let it pass (in case the underlying encoding is UTF-8)
148!      But this chars in a name will cause havoc
149!
150!      signal = EXCEPTION
151!      fx%state = ERROR
152!      fx%action = trim("Not a valid character in simple encoding: "//c)
153!      RETURN
154endif
155
156select case(fx%state)
157
158 case (INIT)
159      if (c == "<") then
160         fx%state = START_TAG_MARKER
161         if (fx%debug) fx%action = ("Starting tag")
162      else if (c == ">") then
163         fx%state = ERROR
164         fx%action = ("Ending tag without being in one!")
165      else
166         if (fx%debug) fx%action = ("Reading garbage chars")
167      endif
168
169 case (START_TAG_MARKER)
170      if (c == ">") then
171         fx%state = ERROR
172         fx%action = ("Tag empty!")
173      else if (c == "<") then
174         fx%state = ERROR
175         fx%action = ("Double opening of tag!!")
176      else if (c == "/") then
177         fx%state = CLOSINGTAG_MARKER
178         if (fx%debug) fx%action = ("Starting endtag: ")
179         fx%context = CLOSING_TAG
180      else if (c == "?") then
181         fx%state = START_XML_DECLARATION
182         if (fx%debug) fx%action = ("Starting XML declaration ")
183         fx%context = XML_DECLARATION_TAG
184      else if (c == "!") then
185         fx%state = BANG
186         if (fx%debug) fx%action = ("Saw ! -- comment or SGML declaration expected...")
187      else if (c .in. whitespace) then
188         fx%state = ERROR
189         fx%action = ("Cannot have whitespace after <")
190      else if (c .in. initial_name_chars) then
191         fx%context = OPENING_TAG
192         fx%state = IN_NAME
193         call add_to_buffer(c,fx%buffer)
194         if (fx%debug) fx%action = ("Starting to read name in tag")
195      else
196         fx%state = ERROR
197         fx%action = ("Illegal initial character for name")
198      endif
199
200
201 case (BANG)
202      if (c == "-") then
203         fx%state = BANG_HYPHEN
204         if (fx%debug) fx%action = ("Almost ready to start comment ")
205      else if (c .in. uppercase_chars) then
206         fx%state = IN_SGML_DECLARATION
207         fx%nlts = 0
208         fx%nbrackets = 0
209         if (fx%debug) fx%action = ("SGML declaration ")
210         fx%context = SGML_DECLARATION_TAG
211         call add_to_buffer(c,fx%buffer)
212      else if (c == "[") then
213         fx%state = CDATA_PREAMBLE
214         if (fx%debug) fx%action = ("Declaration with [ ")
215         fx%context = CDATA_SECTION_TAG
216      else
217         fx%state = ERROR
218         fx%action = ("Wrong character after ! ")
219      endif
220
221 case (CDATA_PREAMBLE)
222      ! We assume a CDATA[ is forthcoming, we do not check
223      if (c == "[") then
224         fx%state = IN_CDATA_SECTION
225         if (fx%debug) fx%action = ("About to start reading CDATA contents")
226      else if (c == "]") then
227         fx%state = ERROR
228         fx%action = ("Unexpected ] in CDATA preamble")
229      else
230         if (fx%debug) fx%action = ("Reading CDATA preamble")
231      endif
232
233 case (IN_CDATA_SECTION)
234      if (c == "]") then
235         fx%state = ONE_BRACKET
236         if (fx%debug) fx%action = ("Saw a ] in CDATA section")
237      else
238         call add_to_buffer(c,fx%buffer)
239         if (fx%debug) fx%action = ("Reading contents of CDATA section")
240      endif
241
242 case (ONE_BRACKET)
243      if (c == "]") then
244         fx%state = TWO_BRACKET
245         if (fx%debug) fx%action = ("Maybe finish a CDATA section")
246      else
247         fx%state = IN_CDATA_SECTION
248         call add_to_buffer("]",fx%buffer)
249         if (fx%debug) fx%action = ("Continue reading contents of CDATA section")
250      endif
251
252 case (TWO_BRACKET)
253      if (c == ">") then
254         fx%state = END_TAG_MARKER
255         signal = END_OF_TAG
256         if (fx%debug) fx%action = ("End of CDATA section")
257         fx%pcdata = fx%buffer    ! Not quite the same behavior
258                                  ! as pcdata... (not filtered)
259         call reset_buffer(fx%buffer)
260      else
261         fx%state = IN_CDATA_SECTION
262         call add_to_buffer("]",fx%buffer)
263         if (fx%debug) fx%action = ("Continue reading contents of CDATA section")
264      endif
265
266 case (IN_SGML_DECLARATION)
267      if (c == "<") then
268         fx%nlts = fx%nlts + 1
269         call add_to_buffer("<",fx%buffer)
270         fx%action = "Read an intermediate < in SGML declaration"
271      else if (c == "[") then
272         fx%nbrackets = fx%nbrackets + 1
273         call add_to_buffer("[",fx%buffer)
274         fx%action = "Read a [ in SGML declaration"
275      else if (c == "]") then
276         fx%nbrackets = fx%nbrackets - 1
277         call add_to_buffer("]",fx%buffer)
278         fx%action = "Read a ] in SGML declaration"
279      else if (c == ">") then
280         if (fx%nlts == 0) then
281            if (fx%nbrackets == 0) then
282               fx%state = END_TAG_MARKER
283               signal  = END_OF_TAG
284               if (fx%debug) fx%action = ("Ending SGML declaration tag")
285               fx%pcdata = fx%buffer       ! Same behavior as pcdata
286               call reset_buffer(fx%buffer)
287            else
288               fx%state = ERROR
289               fx%action = ("Unmatched ] in SGML declaration")
290            endif
291         else
292            fx%nlts = fx%nlts -1
293            call add_to_buffer(">",fx%buffer)
294            fx%action = "Read an intermediate > in SGML declaration"
295         endif
296      else
297         if (fx%debug) fx%action = ("Keep reading SGML declaration")
298         call add_to_buffer(c,fx%buffer)
299      endif
300
301 case (BANG_HYPHEN)
302      if (c == "-") then
303         fx%state = IN_COMMENT
304         fx%context = COMMENT_TAG
305         if (fx%debug) fx%action = ("In comment ")
306      else
307         fx%state = ERROR
308         fx%action = ("Wrong character after <!- ")
309      endif
310
311 case (START_XML_DECLARATION)
312      if (c .in. initial_name_chars) then
313         fx%state = IN_NAME
314         call add_to_buffer(c,fx%buffer)
315         if (fx%debug) fx%action = ("Starting to read name in XML declaration")
316      else
317         fx%state = ERROR
318         fx%action = "Wrong character after ? in start of XML declaration"
319      endif
320
321 case (CLOSINGTAG_MARKER)
322      if (c == ">") then
323         fx%state = ERROR
324         fx%action = ("Closing tag empty!")
325      else if (c == "<") then
326         fx%state = ERROR
327         fx%action = ("Double opening of closing tag!!")
328      else if (c == "/") then
329         fx%state = ERROR
330         fx%action = ("Syntax error (<//)")
331      else if (c .in. whitespace) then
332         fx%state = ERROR
333         fx%action = ("Cannot have whitespace after </")
334      else if (c .in. initial_name_chars) then
335         fx%state = IN_NAME
336         if (fx%debug) fx%action = ("Starting to read name inside endtag")
337         call add_to_buffer(c,fx%buffer)
338      else
339         fx%state = ERROR
340         fx%action = ("Illegal initial character for name")
341      endif
342
343 case (IN_NAME)
344      if (c == "<") then
345         fx%state = ERROR
346         fx%action = ("Starting tag within tag")
347      else if (c == ">") then
348         fx%state = END_TAG_MARKER
349         signal  = END_OF_TAG
350         if (fx%debug) fx%action = ("Ending tag")
351!         call set_element_name(fx%buffer,fx%element)
352         fx%element_name = fx%buffer
353         call reset_buffer(fx%buffer)
354         call reset_dict(fx%attributes)
355      else if (c == "/") then
356         if (fx%context /= OPENING_TAG) then
357            fx%state = ERROR
358            fx%action = ("Single tag did not open as start tag")
359         else
360            fx%state = SINGLETAG_MARKER
361            fx%context = SINGLE_TAG
362            if (fx%debug) fx%action = ("Almost ending single tag")
363!            call set_element_name(fx%buffer,fx%element)
364            fx%element_name = fx%buffer
365            call reset_buffer(fx%buffer)
366            call reset_dict(fx%attributes)
367         endif
368      else if (c .in. whitespace) then
369         fx%state = WHITESPACE_IN_TAG
370         if (fx%debug) fx%action = ("Ending name chars")
371!            call set_element_name(fx%buffer,fx%element)
372         fx%element_name = fx%buffer
373         call reset_buffer(fx%buffer)
374         call reset_dict(fx%attributes)
375      else if (c .in. name_chars) then
376         if (fx%debug) fx%action = ("Reading name chars in tag")
377         call add_to_buffer(c,fx%buffer)
378      else
379         fx%state = ERROR
380         fx%action = ("Illegal character for name")
381      endif
382
383 case (IN_ATT_NAME)
384      if (c == "<") then
385         fx%state = ERROR
386         fx%action = ("Starting tag within tag")
387      else if (c == ">") then
388         fx%state = ERROR
389         fx%action = ("Ending tag in the middle of an attribute")
390      else if (c == "/") then
391         fx%state = ERROR
392         fx%action = ("Ending tag in the middle of an attribute")
393      else if (c .in. whitespace) then
394         fx%state = SPACE_BEFORE_EQUAL
395         if (fx%debug) fx%action = ("Whitespace after attr. name (specs?)")
396         call add_key_to_dict(fx%buffer,fx%attributes)
397         call reset_buffer(fx%buffer)
398      else if ( c == "=" ) then
399         fx%state = EQUAL
400         if (fx%debug) fx%action = ("End of attr. name")
401         call add_key_to_dict(fx%buffer,fx%attributes)
402         call reset_buffer(fx%buffer)
403      else if (c .in. name_chars) then
404         if (fx%debug) fx%action = ("Reading attribute name chars")
405         call add_to_buffer(c,fx%buffer)
406      else
407         fx%state = ERROR
408         fx%action = ("Illegal character for attribute name")
409      endif
410
411 case (EQUAL)
412      if ( (c == """") .or. (c == "'") ) then
413         fx%state = START_QUOTE
414         if (fx%debug) fx%action = ("Found beginning quote")
415         fx%quote_char = c
416      else if (c .in. whitespace) then
417         fx%state = SPACE_AFTER_EQUAL
418         if (fx%debug) fx%action = ("Whitespace after equal sign...")
419      else
420         fx%state = ERROR
421         fx%action = ("Must use quotes for attribute values")
422      endif
423
424 case (SPACE_BEFORE_EQUAL)
425      if ( c == "=" ) then
426         fx%state = EQUAL
427         if (fx%debug) fx%action = ("Equal sign")
428      else if (c .in. whitespace) then
429         if (fx%debug) fx%action = ("More whitespace before equal sign...")
430      else
431         fx%state = ERROR
432         fx%action = ("Must use equal sign for attribute values")
433      endif
434
435 case (SPACE_AFTER_EQUAL)
436      if ( c == "=" ) then
437         fx%state = ERROR
438         fx%action = ("Duplicate Equal sign")
439      else if (c .in. whitespace) then
440         if (fx%debug) fx%action = ("More whitespace after equal sign...")
441      else  if ( (c == """") .or. (c == "'") ) then
442         fx%state = START_QUOTE
443         fx%quote_char = c
444         if (fx%debug) fx%action = ("Found beginning quote")
445      else
446         fx%state = ERROR
447         fx%action = ("Must use quotes for attribute values")
448      endif
449
450 case (START_QUOTE)
451      if (c == fx%quote_char) then
452         fx%state = END_QUOTE
453         if (fx%debug) fx%action = ("Emtpy attribute value...")
454         if (fx%entities_in_attributes) then
455            call entity_filter(fx%buffer,fx%tmpbuf)
456            fx%entities_in_attributes = .false.
457            call add_value_to_dict(fx%tmpbuf,fx%attributes)
458         else
459            call add_value_to_dict(fx%buffer,fx%attributes)
460         endif
461         call reset_buffer(fx%buffer)
462      else if (c == "<") then
463         fx%state = ERROR
464         fx%action = ("Attribute value cannot contain <")
465      else   ! actually allowed chars in att values... Specs: No "<"
466         fx%state = IN_ATT_VALUE
467         if (fx%debug) fx%action = ("Starting to read attribute value")
468         if (c == "&") fx%entities_in_attributes = .true.
469         call add_to_buffer(c,fx%buffer)
470      endif
471
472 case (IN_ATT_VALUE)
473      if (c == fx%quote_char) then
474         fx%state = END_QUOTE
475         if (fx%debug) fx%action = ("End of attribute value")
476         if (fx%entities_in_attributes) then
477            call entity_filter(fx%buffer,fx%tmpbuf)
478            fx%entities_in_attributes = .false.
479            call add_value_to_dict(fx%tmpbuf,fx%attributes)
480         else
481            call add_value_to_dict(fx%buffer,fx%attributes)
482         endif
483         call reset_buffer(fx%buffer)
484      else if (c == "<") then
485         fx%state = ERROR
486         fx%action = ("Attribute value cannot contain <")
487      else if ( (c == char(10)) ) then
488         fx%state = ERROR
489!
490!        Aparently other whitespace is allowed...
491!
492         fx%action = ("No newline allowed in attr. value (specs?)")
493      else        ! all other chars allowed in attr value
494         if (fx%debug) fx%action = ("Reading attribute value chars")
495         call add_to_buffer(c,fx%buffer)
496         if (c == "&") fx%entities_in_attributes = .true.
497      endif
498
499 case (END_QUOTE)
500      if ((c == """") .or. (c == "'")) then
501         fx%state = ERROR
502         fx%action = ("Duplicate end quote")
503      else if (c .in. whitespace) then
504         fx%state = WHITESPACE_IN_TAG
505         if (fx%debug) fx%action = ("Space in between attributes or to end of tag")
506      else if (c == "<") then
507         fx%state = ERROR
508         fx%action = ("Starting tag within tag")
509      else if (c == ">") then
510         if (fx%context == XML_DECLARATION_TAG) then
511            fx%state = ERROR
512            fx%action = "End of XML declaration without ?"
513         else
514            fx%state = END_TAG_MARKER
515            signal  = END_OF_TAG
516            if (fx%debug) fx%action = ("Ending tag after some attributes")
517         endif
518      else if (c == "/") then
519         if (fx%context /= OPENING_TAG) then
520            fx%state = ERROR
521            fx%action = ("Single tag did not open as start tag")
522         else
523            fx%state = SINGLETAG_MARKER
524            fx%context = SINGLE_TAG
525            if (fx%debug) fx%action = ("Almost ending single tag after some attributes")
526         endif
527      else if (c == "?") then
528         if (fx%context /= XML_DECLARATION_TAG) then
529            fx%state = ERROR
530            fx%action = "Wrong lone ? in tag"
531         else
532            fx%state = QUESTION_MARK
533            if (fx%debug) fx%action = ("About to end XML declaration")
534         endif
535      else
536         fx%state = ERROR
537         fx%action = ("Must have some whitespace after att. value")
538      endif
539
540
541 case (WHITESPACE_IN_TAG)
542      if ( c .in. whitespace) then
543         if (fx%debug) fx%action = ("Reading whitespace in tag")
544      else if (c == "<") then
545         fx%state = ERROR
546         fx%action = ("Starting tag within tag")
547      else if (c == ">") then
548         if (fx%context == XML_DECLARATION_TAG) then
549            fx%state = ERROR
550            fx%action = "End of XML declaration without ?"
551         else
552            fx%state = END_TAG_MARKER
553            signal  = END_OF_TAG
554            if (fx%debug) fx%action = ("End whitespace in tag")
555         endif
556      else if (c == "/") then
557         if (fx%context /= OPENING_TAG) then
558            fx%state = ERROR
559            fx%action = ("Single tag did not open as start tag")
560         else
561            fx%state = SINGLETAG_MARKER
562            fx%context = SINGLE_TAG
563            if (fx%debug) fx%action = ("End whitespace in single tag")
564         endif
565      else if (c .in. initial_name_chars) then
566         fx%state = IN_ATT_NAME
567         if (fx%debug) fx%action = ("Starting Attribute name in tag")
568         call add_to_buffer(c,fx%buffer)
569      else if (c == "?") then
570         if (fx%context /= XML_DECLARATION_TAG) then
571            fx%state = ERROR
572            fx%action = "Wrong lone ? in tag"
573         else
574            fx%state = QUESTION_MARK
575            if (fx%debug) fx%action = ("About to end XML declaration after whitespace")
576         endif
577      else
578         fx%state = ERROR
579         fx%action = ("Illegal initial character for attribute")
580      endif
581
582 case (QUESTION_MARK)
583      if (c == ">") then
584         fx%state = END_TAG_MARKER
585         signal  = END_OF_TAG
586         if (fx%debug) fx%action = ("End of XML declaration tag")
587      else
588         fx%state = ERROR
589         fx%action = "No > after ? in XML declaration tag"
590      endif
591
592 case (IN_COMMENT)
593      !
594      ! End of comment is  "-->", and  ">" can appear inside comments
595      !
596      if (c == "-") then
597         fx%state = ONE_HYPHEN
598         if (fx%debug) fx%action = ("Saw - in Comment")
599      else
600         if (fx%debug) fx%action = ("Reading comment")
601         call add_to_buffer(c,fx%buffer)
602      endif
603
604 case (ONE_HYPHEN)
605      if (c == "-") then
606         fx%state = TWO_HYPHEN
607         if (fx%debug) fx%action = ("About to end comment")
608      else
609         fx%state = IN_COMMENT
610         if (fx%debug) fx%action = ("Keep reading comment after -: ")
611         call add_to_buffer("-",fx%buffer)
612         call add_to_buffer(c,fx%buffer)
613      endif
614
615 case (TWO_HYPHEN)
616      if (c == ">") then
617         fx%state = END_TAG_MARKER
618         signal  = END_OF_TAG
619         if (fx%debug) fx%action = ("End of Comment")
620         fx%pcdata = fx%buffer                  ! Same behavior as pcdata
621         call reset_buffer(fx%buffer)
622      else
623         fx%state = ERROR
624         fx%action = ("Cannot have -- in comment")
625      endif
626
627 case (SINGLETAG_MARKER)
628
629      if (c == ">") then
630         fx%state = END_TAG_MARKER
631         signal  = END_OF_TAG
632         if (fx%debug) fx%action = ("Ending tag")
633         ! We have to call begin_element AND end_element
634      else
635         fx%state = ERROR
636         fx%action = ("Wrong ending of single tag")
637      endif
638
639 case (IN_PCDATA)
640      if (c == "<") then
641         fx%state = START_TAG_MARKER
642         signal = CHUNK_OF_PCDATA
643         if (fx%debug) fx%action = ("End of pcdata -- Starting tag")
644         if (fx%entities_in_pcdata) then
645            call entity_filter(fx%buffer,fx%pcdata)
646            fx%entities_in_pcdata = .false.
647         else
648            fx%pcdata = fx%buffer
649         endif
650         call reset_buffer(fx%buffer)
651      else if (c == ">") then
652         fx%state = ERROR
653         fx%action = ("Ending tag without starting it!")
654      else if  (c == char(10)) then
655         fx%state = IN_PCDATA_AT_EOL
656         signal = CHUNK_OF_PCDATA
657         if (fx%debug) fx%action = ("Resetting PCDATA buffer at newline")
658         call add_to_buffer(c,fx%buffer)
659         if (fx%entities_in_pcdata) then
660            call entity_filter(fx%buffer,fx%pcdata)
661            fx%entities_in_pcdata = .false.
662         else
663            fx%pcdata = fx%buffer
664         endif
665         call reset_buffer(fx%buffer)
666      else
667         call add_to_buffer(c,fx%buffer)
668         if (c=="&") fx%entities_in_pcdata = .true.
669         if (fx%debug) fx%action = ("Reading pcdata")
670         !
671         ! Check whether we are close to the end of the buffer.
672         ! If so, make a chunk and reset the buffer
673         if (c .in. whitespace) then
674            if (buffer_nearly_full(fx%buffer)) then
675               signal = CHUNK_OF_PCDATA
676               if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer")
677               if (fx%entities_in_pcdata) then
678                  call entity_filter(fx%buffer,fx%pcdata)
679                  fx%entities_in_pcdata = .false.
680               else
681                  fx%pcdata = fx%buffer
682               endif
683               call reset_buffer(fx%buffer)
684            endif
685         endif
686      endif
687
688 case (IN_PCDATA_AT_EOL)
689      !
690      ! Avoid triggering an extra pcdata event
691      !
692      if (c == "<") then
693         fx%state = START_TAG_MARKER
694         if (fx%debug) fx%action = ("No more pcdata after eol-- Starting tag")
695      else if (c == ">") then
696         fx%state = ERROR
697         fx%action = ("Ending tag without starting it!")
698      else if  (c == char(10)) then
699         fx%state = IN_PCDATA_AT_EOL
700         signal = CHUNK_OF_PCDATA
701         if (fx%debug) fx%action = ("Resetting PCDATA buffer at repeated newline")
702         call add_to_buffer(c,fx%buffer)
703         if (fx%entities_in_pcdata) then
704            call entity_filter(fx%buffer,fx%pcdata)
705            fx%entities_in_pcdata = .false.
706         else
707            fx%pcdata = fx%buffer
708         endif
709         call reset_buffer(fx%buffer)
710      else
711         fx%state = IN_PCDATA
712         call add_to_buffer(c,fx%buffer)
713         if (c=="&")  fx%entities_in_pcdata = .true.
714         if (fx%debug) fx%action = ("Resuming reading pcdata after EOL")
715         !
716         ! Check whether we are close to the end of the buffer.
717         ! If so, make a chunk and reset the buffer
718         if (c .in. whitespace) then
719            if (buffer_nearly_full(fx%buffer)) then
720               signal = CHUNK_OF_PCDATA
721               if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer")
722               if (fx%entities_in_pcdata) then
723                  call entity_filter(fx%buffer,fx%pcdata)
724                  fx%entities_in_pcdata = .false.
725               else
726                  fx%pcdata = fx%buffer
727               endif
728               call reset_buffer(fx%buffer)
729            endif
730         endif
731      endif
732
733
734
735 case (END_TAG_MARKER)
736!
737      if (c == "<") then
738         fx%state = START_TAG_MARKER
739         if (fx%debug) fx%action = ("Starting tag")
740      else if (c == ">") then
741         fx%state = ERROR
742         fx%action = ("Double ending of tag!")
743!
744!     We should make this whitespace in general (maybe not?
745!     how about indentation in text chunks?)
746!     See specs.
747!
748      else if (c == char(10)) then
749        ! Ignoring LF after end of tag is probably non standard...
750
751         if (fx%debug) &
752            fx%action = ("---------Discarding newline after end of tag")
753
754        !!!  New code for full compliance
755        ! fx%state = IN_PCDATA_AT_EOL
756        ! call add_to_buffer(c,fx%buffer)
757        ! if (fx%debug) &
758        !    fx%action = ("Found LF after end of tag. Emitting PCDATA event")
759        ! signal = CHUNK_OF_PCDATA
760        ! if (fx%entities_in_pcdata) then
761        !    call entity_filter(fx%buffer,fx%pcdata)
762        !    fx%entities_in_pcdata = .false.
763        ! else
764        !    fx%pcdata = fx%buffer
765        ! endif
766        ! call reset_buffer(fx%buffer)
767      else
768         fx%state = IN_PCDATA
769         call add_to_buffer(c,fx%buffer)
770         if (c=="&") fx%entities_in_pcdata = .true.
771         if (fx%debug) fx%action = ("End of Tag. Starting to read PCDATA")
772      endif
773
774 case (ERROR)
775
776      stop "Cannot continue after parsing errors!"
777
778 end select
779
780if (fx%state == ERROR) signal  = EXCEPTION
781
782end subroutine evolve_fsm
783
784end module m_fsm
785
786
787
788
789
790
791
792
793
794
795
796
797
798