1# frozen_string_literal: true
2
3# Generated HTML is transformed back to GFM by app/assets/javascripts/behaviors/markdown/nodes/reference.js
4module Banzai
5  module Filter
6    module References
7      # Base class for GitLab Flavored Markdown reference filters.
8      #
9      # References within <pre>, <code>, <a>, and <style> elements are ignored.
10      #
11      # Context options:
12      #   :project (required) - Current project, ignored if reference is cross-project.
13      #   :only_path          - Generate path-only links.
14      class ReferenceFilter < HTML::Pipeline::Filter
15        include RequestStoreReferenceCache
16        include OutputSafety
17
18        class << self
19          # Implement in child class
20          # Example: self.reference_type = :merge_request
21          attr_accessor :reference_type
22
23          # Implement in child class
24          # Example: self.object_class = MergeRequest
25          attr_accessor :object_class
26
27          def call(doc, context = nil, result = nil)
28            new(doc, context, result).call_and_update_nodes
29          end
30        end
31
32        def initialize(doc, context = nil, result = nil)
33          super
34
35          @new_nodes = {}
36          @nodes = self.result[:reference_filter_nodes]
37        end
38
39        def call_and_update_nodes
40          with_update_nodes { call }
41        end
42
43        def call
44          ref_pattern_start = /\A#{object_reference_pattern}\z/
45
46          nodes.each_with_index do |node, index|
47            if text_node?(node)
48              replace_text_when_pattern_matches(node, index, object_reference_pattern) do |content|
49                object_link_filter(content, object_reference_pattern)
50              end
51            elsif element_node?(node)
52              yield_valid_link(node) do |link, inner_html|
53                if link =~ ref_pattern_start
54                  replace_link_node_with_href(node, index, link) do
55                    object_link_filter(link, object_reference_pattern, link_content: inner_html)
56                  end
57                end
58              end
59            end
60          end
61
62          doc
63        end
64
65        # Public: Find references in text (like `!123` for merge requests)
66        #
67        #   references_in(text) do |match, id, project_ref, matches|
68        #     object = find_object(project_ref, id)
69        #     "<a href=...>#{object.to_reference}</a>"
70        #   end
71        #
72        # text - String text to search.
73        #
74        # Yields the String match, the Integer referenced object ID, an optional String
75        # of the external project reference, and all of the matchdata.
76        #
77        # Returns a String replaced with the return of the block.
78        def references_in(text, pattern = object_reference_pattern)
79          raise NotImplementedError, "#{self.class} must implement method: #{__callee__}"
80        end
81
82        # Iterates over all <a> and text() nodes in a document.
83        #
84        # Nodes are skipped whenever their ancestor is one of the nodes returned
85        # by `ignore_ancestor_query`. Link tags are not processed if they have a
86        # "gfm" class or the "href" attribute is empty.
87        def each_node
88          return to_enum(__method__) unless block_given?
89
90          doc.xpath(query).each do |node|
91            yield node
92          end
93        end
94
95        # Returns an Array containing all HTML nodes.
96        def nodes
97          @nodes ||= each_node.to_a
98        end
99
100        def object_class
101          self.class.object_class
102        end
103
104        def project
105          context[:project]
106        end
107
108        def group
109          context[:group]
110        end
111
112        def requires_unescaping?
113          false
114        end
115
116        private
117
118        # Returns a data attribute String to attach to a reference link
119        #
120        # attributes - Hash, where the key becomes the data attribute name and the
121        #              value is the data attribute value
122        #
123        # Examples:
124        #
125        #   data_attribute(project: 1, issue: 2)
126        #   # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"1\" data-issue=\"2\""
127        #
128        #   data_attribute(project: 3, merge_request: 4)
129        #   # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"3\" data-merge-request=\"4\""
130        #
131        # Returns a String
132        def data_attribute(attributes = {})
133          attributes = attributes.reject { |_, v| v.nil? }
134
135          attributes[:reference_type] ||= self.class.reference_type
136          attributes[:container] ||= 'body'
137          attributes[:placement] ||= 'top'
138          attributes.delete(:original) if context[:no_original_data]
139          attributes.map do |key, value|
140            %Q(data-#{key.to_s.dasherize}="#{escape_once(value)}")
141          end.join(' ')
142        end
143
144        def ignore_ancestor_query
145          @ignore_ancestor_query ||= begin
146            parents = %w(pre code a style)
147            parents << 'blockquote' if context[:ignore_blockquotes]
148
149            parents.map { |n| "ancestor::#{n}" }.join(' or ')
150          end
151        end
152
153        # Ensure that a :project key exists in context
154        #
155        # Note that while the key might exist, its value could be nil!
156        def validate
157          needs :project unless skip_project_check?
158        end
159
160        def user
161          context[:user]
162        end
163
164        def skip_project_check?
165          context[:skip_project_check]
166        end
167
168        def reference_class(type, tooltip: true)
169          gfm_klass = "gfm gfm-#{type}"
170
171          return gfm_klass unless tooltip
172
173          "#{gfm_klass} has-tooltip"
174        end
175
176        # Yields the link's URL and inner HTML whenever the node is a valid <a> tag.
177        def yield_valid_link(node)
178          link = unescape_link(node.attr('href').to_s)
179          inner_html = node.inner_html
180
181          return unless link.force_encoding('UTF-8').valid_encoding?
182
183          yield link, inner_html
184        end
185
186        def unescape_link(href)
187          # We cannot use CGI.unescape here because it also converts `+` to spaces.
188          # We need to keep the `+` for expanded reference formats.
189          Addressable::URI.unescape(href)
190        end
191
192        def unescape_html_entities(text)
193          CGI.unescapeHTML(text.to_s)
194        end
195
196        def escape_html_entities(text)
197          CGI.escapeHTML(text.to_s)
198        end
199
200        def replace_text_when_pattern_matches(node, index, pattern)
201          return unless node.text =~ pattern
202
203          content = node.to_html
204          html = yield content
205
206          replace_text_with_html(node, index, html) unless html == content
207        end
208
209        def replace_link_node_with_text(node, index)
210          html = yield
211
212          replace_text_with_html(node, index, html) unless html == node.text
213        end
214
215        def replace_link_node_with_href(node, index, link)
216          html = yield
217
218          replace_text_with_html(node, index, html) unless html == link
219        end
220
221        def text_node?(node)
222          node.is_a?(Nokogiri::XML::Text)
223        end
224
225        def element_node?(node)
226          node.is_a?(Nokogiri::XML::Element)
227        end
228
229        def object_reference_pattern
230          @object_reference_pattern ||= object_class.reference_pattern
231        end
232
233        def object_name
234          @object_name ||= object_class.name.underscore
235        end
236
237        def object_sym
238          @object_sym ||= object_name.to_sym
239        end
240
241        def object_link_filter(text, pattern, link_content: nil, link_reference: false)
242          raise NotImplementedError, "#{self.class} must implement method: #{__callee__}"
243        end
244
245        def query
246          @query ||= %Q{descendant-or-self::text()[not(#{ignore_ancestor_query})]
247          | descendant-or-self::a[
248            not(contains(concat(" ", @class, " "), " gfm ")) and not(@href = "")
249          ]}
250        end
251
252        def replace_text_with_html(node, index, html)
253          replace_and_update_new_nodes(node, index, html)
254        end
255
256        def replace_and_update_new_nodes(node, index, html)
257          previous_node = node.previous
258          next_node = node.next
259          parent_node = node.parent
260          # Unfortunately node.replace(html) returns re-parented nodes, not the actual replaced nodes in the doc
261          # We need to find the actual nodes in the doc that were replaced
262          node.replace(html)
263          @new_nodes[index] = []
264
265          # We replaced node with new nodes, so we find first new node. If previous_node is nil, we take first parent child
266          new_node = previous_node ? previous_node.next : parent_node&.children&.first
267
268          # We iterate from first to last replaced node and store replaced nodes in @new_nodes
269          while new_node && new_node != next_node
270            @new_nodes[index] << new_node.xpath(query)
271            new_node = new_node.next
272          end
273
274          @new_nodes[index].flatten!
275        end
276
277        def only_path?
278          context[:only_path]
279        end
280
281        def with_update_nodes
282          @new_nodes = {}
283          yield.tap { update_nodes! }
284        end
285
286        # Once Filter completes replacing nodes, we update nodes with @new_nodes
287        def update_nodes!
288          @new_nodes.sort_by { |index, _new_nodes| -index }.each do |index, new_nodes|
289            nodes[index, 1] = new_nodes
290          end
291          result[:reference_filter_nodes] = nodes
292        end
293      end
294    end
295  end
296end
297