1# frozen_string_literal: true
2
3module Gitlab
4  module Ci
5    module Parsers
6      module Coverage
7        class Cobertura
8          InvalidXMLError = Class.new(Gitlab::Ci::Parsers::ParserError)
9          InvalidLineInformationError = Class.new(Gitlab::Ci::Parsers::ParserError)
10
11          GO_SOURCE_PATTERN = '/usr/local/go/src'
12          MAX_SOURCES = 100
13
14          def parse!(xml_data, coverage_report, project_path: nil, worktree_paths: nil)
15            root = Hash.from_xml(xml_data)
16
17            context = {
18              project_path: project_path,
19              paths: worktree_paths&.to_set,
20              sources: []
21            }
22
23            parse_all(root, coverage_report, context)
24          rescue Nokogiri::XML::SyntaxError
25            raise InvalidXMLError, "XML parsing failed"
26          end
27
28          private
29
30          def parse_all(root, coverage_report, context)
31            return unless root.present?
32
33            root.each do |key, value|
34              parse_node(key, value, coverage_report, context)
35            end
36          end
37
38          def parse_node(key, value, coverage_report, context)
39            if key == 'sources' && value && value['source'].present?
40              parse_sources(value['source'], context)
41            elsif key == 'package'
42              Array.wrap(value).each do |item|
43                parse_package(item, coverage_report, context)
44              end
45            elsif key == 'class'
46              # This means the cobertura XML does not have classes within package nodes.
47              # This is possible in some cases like in simple JS project structures
48              # running Jest.
49              Array.wrap(value).each do |item|
50                parse_class(item, coverage_report, context)
51              end
52            elsif value.is_a?(Hash)
53              parse_all(value, coverage_report, context)
54            elsif value.is_a?(Array)
55              value.each do |item|
56                parse_all(item, coverage_report, context)
57              end
58            end
59          end
60
61          def parse_sources(sources, context)
62            return unless context[:project_path] && context[:paths]
63
64            sources = Array.wrap(sources)
65
66            # TODO: Go cobertura has a different format with how their packages
67            # are included in the filename. So we can't rely on the sources.
68            # We'll deal with this later.
69            return if sources.include?(GO_SOURCE_PATTERN)
70
71            sources.each do |source|
72              source = build_source_path(source, context)
73              context[:sources] << source if source.present?
74            end
75          end
76
77          def build_source_path(source, context)
78            # | raw source                  | extracted  |
79            # |-----------------------------|------------|
80            # | /builds/foo/test/SampleLib/ | SampleLib/ |
81            # | /builds/foo/test/something  | something  |
82            # | /builds/foo/test/           | nil        |
83            # | /builds/foo/test            | nil        |
84            source.split("#{context[:project_path]}/", 2)[1]
85          end
86
87          def parse_package(package, coverage_report, context)
88            classes = package.dig('classes', 'class')
89            return unless classes.present?
90
91            matched_filenames = Array.wrap(classes).map do |item|
92              parse_class(item, coverage_report, context)
93            end
94
95            # Remove these filenames from the paths to avoid conflict
96            # with other packages that may contain the same class filenames
97            remove_matched_filenames(matched_filenames, context)
98          end
99
100          def remove_matched_filenames(filenames, context)
101            return unless context[:paths]
102
103            filenames.each { |f| context[:paths].delete(f) }
104          end
105
106          def parse_class(file, coverage_report, context)
107            return unless file["filename"].present? && file["lines"].present?
108
109            parsed_lines = parse_lines(file["lines"])
110            filename = determine_filename(file["filename"], context)
111
112            coverage_report.add_file(filename, Hash[parsed_lines]) if filename
113
114            filename
115          end
116
117          def parse_lines(lines)
118            line_array = Array.wrap(lines["line"])
119
120            line_array.map do |line|
121              # Using `Integer()` here to raise exception on invalid values
122              [Integer(line["number"]), Integer(line["hits"])]
123            end
124          rescue StandardError
125            raise InvalidLineInformationError, "Line information had invalid values"
126          end
127
128          def determine_filename(filename, context)
129            return filename unless context[:sources].any?
130
131            full_filename = nil
132
133            context[:sources].each_with_index do |source, index|
134              break if index >= MAX_SOURCES
135              break if full_filename = check_source(source, filename, context)
136            end
137
138            full_filename
139          end
140
141          def check_source(source, filename, context)
142            full_path = File.join(source, filename)
143
144            return full_path if context[:paths].include?(full_path)
145          end
146        end
147      end
148    end
149  end
150end
151