1# frozen_string_literal: false 2require 'xmlscan/scanner' 3require 'stringio' 4 5module RSS 6 7 class XMLScanParser < BaseParser 8 9 class << self 10 def listener 11 XMLScanListener 12 end 13 end 14 15 private 16 def _parse 17 begin 18 if @rss.is_a?(String) 19 input = StringIO.new(@rss) 20 else 21 input = @rss 22 end 23 scanner = XMLScan::XMLScanner.new(@listener) 24 scanner.parse(input) 25 rescue XMLScan::Error => e 26 lineno = e.lineno || scanner.lineno || input.lineno 27 raise NotWellFormedError.new(lineno){e.message} 28 end 29 end 30 31 end 32 33 class XMLScanListener < BaseListener 34 35 include XMLScan::Visitor 36 include ListenerMixin 37 38 ENTITIES = { 39 'lt' => '<', 40 'gt' => '>', 41 'amp' => '&', 42 'quot' => '"', 43 'apos' => '\'' 44 } 45 46 def on_xmldecl_version(str) 47 @version = str 48 end 49 50 def on_xmldecl_encoding(str) 51 @encoding = str 52 end 53 54 def on_xmldecl_standalone(str) 55 @standalone = str 56 end 57 58 def on_xmldecl_end 59 xmldecl(@version, @encoding, @standalone == "yes") 60 end 61 62 alias_method(:on_pi, :instruction) 63 alias_method(:on_chardata, :text) 64 alias_method(:on_cdata, :text) 65 66 def on_etag(name) 67 tag_end(name) 68 end 69 70 def on_entityref(ref) 71 text(entity(ref)) 72 end 73 74 def on_charref(code) 75 text([code].pack('U')) 76 end 77 78 alias_method(:on_charref_hex, :on_charref) 79 80 def on_stag(name) 81 @attrs = {} 82 end 83 84 def on_attribute(name) 85 @attrs[name] = @current_attr = '' 86 end 87 88 def on_attr_value(str) 89 @current_attr << str 90 end 91 92 def on_attr_entityref(ref) 93 @current_attr << entity(ref) 94 end 95 96 def on_attr_charref(code) 97 @current_attr << [code].pack('U') 98 end 99 100 alias_method(:on_attr_charref_hex, :on_attr_charref) 101 102 def on_stag_end(name) 103 tag_start(name, @attrs) 104 end 105 106 def on_stag_end_empty(name) 107 tag_start(name, @attrs) 108 tag_end(name) 109 end 110 111 private 112 def entity(ref) 113 ent = ENTITIES[ref] 114 if ent 115 ent 116 else 117 wellformed_error("undefined entity: #{ref}") 118 end 119 end 120 end 121 122end 123