1# frozen_string_literal: true 2 3# Generated HTML is transformed back to GFM by app/assets/javascripts/behaviors/markdown/nodes/reference.js 4module Banzai 5 module Filter 6 module References 7 # Base class for GitLab Flavored Markdown reference filters. 8 # 9 # References within <pre>, <code>, <a>, and <style> elements are ignored. 10 # 11 # Context options: 12 # :project (required) - Current project, ignored if reference is cross-project. 13 # :only_path - Generate path-only links. 14 class ReferenceFilter < HTML::Pipeline::Filter 15 include RequestStoreReferenceCache 16 include OutputSafety 17 18 class << self 19 # Implement in child class 20 # Example: self.reference_type = :merge_request 21 attr_accessor :reference_type 22 23 # Implement in child class 24 # Example: self.object_class = MergeRequest 25 attr_accessor :object_class 26 27 def call(doc, context = nil, result = nil) 28 new(doc, context, result).call_and_update_nodes 29 end 30 end 31 32 def initialize(doc, context = nil, result = nil) 33 super 34 35 @new_nodes = {} 36 @nodes = self.result[:reference_filter_nodes] 37 end 38 39 def call_and_update_nodes 40 with_update_nodes { call } 41 end 42 43 def call 44 ref_pattern_start = /\A#{object_reference_pattern}\z/ 45 46 nodes.each_with_index do |node, index| 47 if text_node?(node) 48 replace_text_when_pattern_matches(node, index, object_reference_pattern) do |content| 49 object_link_filter(content, object_reference_pattern) 50 end 51 elsif element_node?(node) 52 yield_valid_link(node) do |link, inner_html| 53 if link =~ ref_pattern_start 54 replace_link_node_with_href(node, index, link) do 55 object_link_filter(link, object_reference_pattern, link_content: inner_html) 56 end 57 end 58 end 59 end 60 end 61 62 doc 63 end 64 65 # Public: Find references in text (like `!123` for merge requests) 66 # 67 # references_in(text) do |match, id, project_ref, matches| 68 # object = find_object(project_ref, id) 69 # "<a href=...>#{object.to_reference}</a>" 70 # end 71 # 72 # text - String text to search. 73 # 74 # Yields the String match, the Integer referenced object ID, an optional String 75 # of the external project reference, and all of the matchdata. 76 # 77 # Returns a String replaced with the return of the block. 78 def references_in(text, pattern = object_reference_pattern) 79 raise NotImplementedError, "#{self.class} must implement method: #{__callee__}" 80 end 81 82 # Iterates over all <a> and text() nodes in a document. 83 # 84 # Nodes are skipped whenever their ancestor is one of the nodes returned 85 # by `ignore_ancestor_query`. Link tags are not processed if they have a 86 # "gfm" class or the "href" attribute is empty. 87 def each_node 88 return to_enum(__method__) unless block_given? 89 90 doc.xpath(query).each do |node| 91 yield node 92 end 93 end 94 95 # Returns an Array containing all HTML nodes. 96 def nodes 97 @nodes ||= each_node.to_a 98 end 99 100 def object_class 101 self.class.object_class 102 end 103 104 def project 105 context[:project] 106 end 107 108 def group 109 context[:group] 110 end 111 112 def requires_unescaping? 113 false 114 end 115 116 private 117 118 # Returns a data attribute String to attach to a reference link 119 # 120 # attributes - Hash, where the key becomes the data attribute name and the 121 # value is the data attribute value 122 # 123 # Examples: 124 # 125 # data_attribute(project: 1, issue: 2) 126 # # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"1\" data-issue=\"2\"" 127 # 128 # data_attribute(project: 3, merge_request: 4) 129 # # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"3\" data-merge-request=\"4\"" 130 # 131 # Returns a String 132 def data_attribute(attributes = {}) 133 attributes = attributes.reject { |_, v| v.nil? } 134 135 attributes[:reference_type] ||= self.class.reference_type 136 attributes[:container] ||= 'body' 137 attributes[:placement] ||= 'top' 138 attributes.delete(:original) if context[:no_original_data] 139 attributes.map do |key, value| 140 %Q(data-#{key.to_s.dasherize}="#{escape_once(value)}") 141 end.join(' ') 142 end 143 144 def ignore_ancestor_query 145 @ignore_ancestor_query ||= begin 146 parents = %w(pre code a style) 147 parents << 'blockquote' if context[:ignore_blockquotes] 148 149 parents.map { |n| "ancestor::#{n}" }.join(' or ') 150 end 151 end 152 153 # Ensure that a :project key exists in context 154 # 155 # Note that while the key might exist, its value could be nil! 156 def validate 157 needs :project unless skip_project_check? 158 end 159 160 def user 161 context[:user] 162 end 163 164 def skip_project_check? 165 context[:skip_project_check] 166 end 167 168 def reference_class(type, tooltip: true) 169 gfm_klass = "gfm gfm-#{type}" 170 171 return gfm_klass unless tooltip 172 173 "#{gfm_klass} has-tooltip" 174 end 175 176 # Yields the link's URL and inner HTML whenever the node is a valid <a> tag. 177 def yield_valid_link(node) 178 link = unescape_link(node.attr('href').to_s) 179 inner_html = node.inner_html 180 181 return unless link.force_encoding('UTF-8').valid_encoding? 182 183 yield link, inner_html 184 end 185 186 def unescape_link(href) 187 # We cannot use CGI.unescape here because it also converts `+` to spaces. 188 # We need to keep the `+` for expanded reference formats. 189 Addressable::URI.unescape(href) 190 end 191 192 def unescape_html_entities(text) 193 CGI.unescapeHTML(text.to_s) 194 end 195 196 def escape_html_entities(text) 197 CGI.escapeHTML(text.to_s) 198 end 199 200 def replace_text_when_pattern_matches(node, index, pattern) 201 return unless node.text =~ pattern 202 203 content = node.to_html 204 html = yield content 205 206 replace_text_with_html(node, index, html) unless html == content 207 end 208 209 def replace_link_node_with_text(node, index) 210 html = yield 211 212 replace_text_with_html(node, index, html) unless html == node.text 213 end 214 215 def replace_link_node_with_href(node, index, link) 216 html = yield 217 218 replace_text_with_html(node, index, html) unless html == link 219 end 220 221 def text_node?(node) 222 node.is_a?(Nokogiri::XML::Text) 223 end 224 225 def element_node?(node) 226 node.is_a?(Nokogiri::XML::Element) 227 end 228 229 def object_reference_pattern 230 @object_reference_pattern ||= object_class.reference_pattern 231 end 232 233 def object_name 234 @object_name ||= object_class.name.underscore 235 end 236 237 def object_sym 238 @object_sym ||= object_name.to_sym 239 end 240 241 def object_link_filter(text, pattern, link_content: nil, link_reference: false) 242 raise NotImplementedError, "#{self.class} must implement method: #{__callee__}" 243 end 244 245 def query 246 @query ||= %Q{descendant-or-self::text()[not(#{ignore_ancestor_query})] 247 | descendant-or-self::a[ 248 not(contains(concat(" ", @class, " "), " gfm ")) and not(@href = "") 249 ]} 250 end 251 252 def replace_text_with_html(node, index, html) 253 replace_and_update_new_nodes(node, index, html) 254 end 255 256 def replace_and_update_new_nodes(node, index, html) 257 previous_node = node.previous 258 next_node = node.next 259 parent_node = node.parent 260 # Unfortunately node.replace(html) returns re-parented nodes, not the actual replaced nodes in the doc 261 # We need to find the actual nodes in the doc that were replaced 262 node.replace(html) 263 @new_nodes[index] = [] 264 265 # We replaced node with new nodes, so we find first new node. If previous_node is nil, we take first parent child 266 new_node = previous_node ? previous_node.next : parent_node&.children&.first 267 268 # We iterate from first to last replaced node and store replaced nodes in @new_nodes 269 while new_node && new_node != next_node 270 @new_nodes[index] << new_node.xpath(query) 271 new_node = new_node.next 272 end 273 274 @new_nodes[index].flatten! 275 end 276 277 def only_path? 278 context[:only_path] 279 end 280 281 def with_update_nodes 282 @new_nodes = {} 283 yield.tap { update_nodes! } 284 end 285 286 # Once Filter completes replacing nodes, we update nodes with @new_nodes 287 def update_nodes! 288 @new_nodes.sort_by { |index, _new_nodes| -index }.each do |index, new_nodes| 289 nodes[index, 1] = new_nodes 290 end 291 result[:reference_filter_nodes] = nodes 292 end 293 end 294 end 295 end 296end 297