1# frozen_string_literal: true
2
3module Gitlab
4  module Regex
5    module Packages
6      CONAN_RECIPE_FILES = %w[conanfile.py conanmanifest.txt conan_sources.tgz conan_export.tgz].freeze
7      CONAN_PACKAGE_FILES = %w[conaninfo.txt conanmanifest.txt conan_package.tgz].freeze
8
9      API_PATH_REGEX = %r{^/api/v\d+/(projects/[^/]+/|groups?/[^/]+/-/)?packages/[A-Za-z]+}.freeze
10
11      def conan_package_reference_regex
12        @conan_package_reference_regex ||= %r{\A[A-Za-z0-9]+\z}.freeze
13      end
14
15      def conan_revision_regex
16        @conan_revision_regex ||= %r{\A0\z}.freeze
17      end
18
19      def conan_recipe_user_channel_regex
20        %r{\A(_|#{conan_name_regex})\z}.freeze
21      end
22
23      def conan_recipe_component_regex
24        # https://docs.conan.io/en/latest/reference/conanfile/attributes.html#name
25        @conan_recipe_component_regex ||= %r{\A#{conan_name_regex}\z}.freeze
26      end
27
28      def composer_package_version_regex
29        # see https://github.com/composer/semver/blob/31f3ea725711245195f62e54ffa402d8ef2fdba9/src/VersionParser.php#L215
30        @composer_package_version_regex ||= %r{\Av?((\d++)(\.(?:\d++|[xX*]))?(\.(?:\d++|[xX*]))?(\.(?:\d++|[xX*]))?)?\z}.freeze
31      end
32
33      def composer_dev_version_regex
34        @composer_dev_version_regex ||= %r{(^dev-)|(-dev$)}.freeze
35      end
36
37      def package_name_regex
38        @package_name_regex ||=
39          %r{
40              \A\@?
41              (?> # atomic group to prevent backtracking
42                (([\w\-\.\+]*)\/)*([\w\-\.]+)
43              )
44              @?
45              (?> # atomic group to prevent backtracking
46                (([\w\-\.\+]*)\/)*([\w\-\.]*)
47              )
48              \z
49            }x.freeze
50      end
51
52      def maven_file_name_regex
53        @maven_file_name_regex ||= %r{\A[A-Za-z0-9\.\_\-\+]+\z}.freeze
54      end
55
56      def maven_path_regex
57        @maven_path_regex ||= %r{\A\@?(([\w\-\.]*)/)*([\w\-\.\+]*)\z}.freeze
58      end
59
60      def maven_app_name_regex
61        @maven_app_name_regex ||= /\A[\w\-\.]+\z/.freeze
62      end
63
64      def maven_version_regex
65        @maven_version_regex ||= /\A(?!.*\.\.)[\w+.-]+\z/.freeze
66      end
67
68      def maven_app_group_regex
69        maven_app_name_regex
70      end
71
72      def npm_package_name_regex
73        @npm_package_name_regex ||= %r{\A(?:@(#{Gitlab::PathRegex::NAMESPACE_FORMAT_REGEX})/)?[-+\.\_a-zA-Z0-9]+\z}
74      end
75
76      def nuget_package_name_regex
77        @nuget_package_name_regex ||= %r{\A[-+\.\_a-zA-Z0-9]+\z}.freeze
78      end
79
80      def nuget_version_regex
81        @nuget_version_regex ||= /
82          \A#{_semver_major_minor_patch_regex}(\.\d*)?#{_semver_prerelease_build_regex}\z
83        /x.freeze
84      end
85
86      def terraform_module_package_name_regex
87        @terraform_module_package_name_regex ||= %r{\A[-a-z0-9]+\/[-a-z0-9]+\z}.freeze
88      end
89
90      def pypi_version_regex
91        # See the official regex: https://github.com/pypa/packaging/blob/16.7/packaging/version.py#L159
92
93        @pypi_version_regex ||= %r{
94          \A(?:
95            v?
96            (?:([0-9]+)!)?                                                 (?# epoch)
97            ([0-9]+(?:\.[0-9]+)*)                                          (?# release segment)
98            ([-_\.]?((a|b|c|rc|alpha|beta|pre|preview))[-_\.]?([0-9]+)?)?  (?# pre-release)
99            ((?:-([0-9]+))|(?:[-_\.]?(post|rev|r)[-_\.]?([0-9]+)?))?       (?# post release)
100            ([-_\.]?(dev)[-_\.]?([0-9]+)?)?                                (?# dev release)
101            (?:\+([a-z0-9]+(?:[-_\.][a-z0-9]+)*))?                         (?# local version)
102            )\z}xi.freeze
103      end
104
105      def debian_package_name_regex
106        # See official parser
107        # https://git.dpkg.org/cgit/dpkg/dpkg.git/tree/lib/dpkg/parsehelp.c?id=9e0c88ec09475f4d1addde9cdba1ad7849720356#n122
108        # @debian_package_name_regex ||= %r{\A[a-z0-9][-+\._a-z0-9]*\z}i.freeze
109        # But we prefer a more strict version from Lintian
110        # https://salsa.debian.org/lintian/lintian/-/blob/5080c0068ffc4a9ddee92022a91d0c2ff53e56d1/lib/Lintian/Util.pm#L116
111        @debian_package_name_regex ||= %r{\A[a-z0-9][-+\.a-z0-9]+\z}.freeze
112      end
113
114      def debian_version_regex
115        # See official parser: https://git.dpkg.org/cgit/dpkg/dpkg.git/tree/lib/dpkg/parsehelp.c?id=9e0c88ec09475f4d1addde9cdba1ad7849720356#n205
116        @debian_version_regex ||= %r{
117          \A(?:
118            (?:([0-9]{1,9}):)?    (?# epoch)
119            ([0-9][0-9a-z\.+~-]*)  (?# version)
120            (?:(-[0-0a-z\.+~]+))?  (?# revision)
121            )\z}xi.freeze
122      end
123
124      def debian_architecture_regex
125        # See official parser: https://git.dpkg.org/cgit/dpkg/dpkg.git/tree/lib/dpkg/arch.c?id=9e0c88ec09475f4d1addde9cdba1ad7849720356#n43
126        # But we limit to lower case
127        @debian_architecture_regex ||= %r{\A#{::Packages::Debian::ARCHITECTURE_REGEX}\z}.freeze
128      end
129
130      def debian_distribution_regex
131        @debian_distribution_regex ||= %r{\A#{::Packages::Debian::DISTRIBUTION_REGEX}\z}i.freeze
132      end
133
134      def debian_component_regex
135        @debian_component_regex ||= %r{\A#{::Packages::Debian::COMPONENT_REGEX}\z}.freeze
136      end
137
138      def helm_channel_regex
139        @helm_channel_regex ||= %r{\A([a-zA-Z0-9](\.|-|_)?){1,255}(?<!\.|-|_)\z}.freeze
140      end
141
142      def helm_package_regex
143        @helm_package_regex ||= %r{#{helm_channel_regex}}.freeze
144      end
145
146      def helm_version_regex
147        # identical to semver_regex, with optional preceding 'v'
148        @helm_version_regex ||= Regexp.new("\\Av?#{::Gitlab::Regex.unbounded_semver_regex.source}\\z", ::Gitlab::Regex.unbounded_semver_regex.options)
149      end
150
151      def unbounded_semver_regex
152        # See the official regex: https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
153
154        # The order of the alternatives in <prerelease> are intentionally
155        # reordered to be greedy. Without this change, the unbounded regex would
156        # only partially match "v0.0.0-20201230123456-abcdefabcdef".
157        @unbounded_semver_regex ||= /
158          #{_semver_major_minor_patch_regex}#{_semver_prerelease_build_regex}
159        /x.freeze
160      end
161
162      def semver_regex
163        @semver_regex ||= Regexp.new("\\A#{::Gitlab::Regex.unbounded_semver_regex.source}\\z", ::Gitlab::Regex.unbounded_semver_regex.options).freeze
164      end
165
166      # These partial semver regexes are intended for use in composing other
167      # regexes rather than being used alone.
168      def _semver_major_minor_patch_regex
169        @_semver_major_minor_patch_regex ||= /
170          (?<major>0|[1-9]\d*)
171          \.(?<minor>0|[1-9]\d*)
172          \.(?<patch>0|[1-9]\d*)
173        /x.freeze
174      end
175
176      def _semver_prerelease_build_regex
177        @_semver_prerelease_build_regex ||= /
178          (?:-(?<prerelease>(?:\d*[a-zA-Z-][0-9a-zA-Z-]*|[1-9]\d*|0)(?:\.(?:\d*[a-zA-Z-][0-9a-zA-Z-]*|[1-9]\d*|0))*))?
179          (?:\+(?<build>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?
180        /x.freeze
181      end
182
183      def prefixed_semver_regex
184        # identical to semver_regex, except starting with 'v'
185        @prefixed_semver_regex ||= Regexp.new("\\Av#{::Gitlab::Regex.unbounded_semver_regex.source}\\z", ::Gitlab::Regex.unbounded_semver_regex.options)
186      end
187
188      def go_package_regex
189        # A Go package name looks like a URL but is not; it:
190        #   - Must not have a scheme, such as http:// or https://
191        #   - Must not have a port number, such as :8080 or :8443
192
193        @go_package_regex ||= %r{
194          \b (?# word boundary)
195          (?<domain>
196            [0-9a-z](?:(?:-|[0-9a-z]){0,61}[0-9a-z])? (?# first domain)
197            (?:\.[0-9a-z](?:(?:-|[0-9a-z]){0,61}[0-9a-z])?)* (?# inner domains)
198            \.[a-z]{2,} (?# top-level domain)
199          )
200          (?<path>/(?:
201            [-/$_.+!*'(),0-9a-z] (?# plain URL character)
202            | %[0-9a-f]{2})* (?# URL encoded character)
203          )? (?# path)
204          \b (?# word boundary)
205        }ix.freeze
206      end
207
208      def generic_package_version_regex
209        maven_version_regex
210      end
211
212      def generic_package_name_regex
213        maven_file_name_regex
214      end
215
216      def generic_package_file_name_regex
217        generic_package_name_regex
218      end
219
220      private
221
222      def conan_name_regex
223        @conan_name_regex ||= %r{[a-zA-Z0-9_][a-zA-Z0-9_\+\.-]{1,49}}.freeze
224      end
225    end
226
227    extend self
228    extend Packages
229
230    def project_name_regex
231      # The character range \p{Alnum} overlaps with \u{00A9}-\u{1f9ff}
232      # hence the Ruby warning.
233      # https://gitlab.com/gitlab-org/gitlab/merge_requests/23165#not-easy-fixable
234      @project_name_regex ||= /\A[\p{Alnum}\u{00A9}-\u{1f9ff}_][\p{Alnum}\p{Pd}\u{002B}\u{00A9}-\u{1f9ff}_\. ]*\z/.freeze
235    end
236
237    def project_name_regex_message
238      "can contain only letters, digits, emojis, '_', '.', '+', dashes, or spaces. " \
239      "It must start with a letter, digit, emoji, or '_'."
240    end
241
242    def group_name_regex
243      @group_name_regex ||= /\A#{group_name_regex_chars}\z/.freeze
244    end
245
246    def group_name_regex_chars
247      @group_name_regex_chars ||= /[\p{Alnum}\u{00A9}-\u{1f9ff}_][\p{Alnum}\p{Pd}\u{00A9}-\u{1f9ff}_()\. ]*/.freeze
248    end
249
250    def group_name_regex_message
251      "can contain only letters, digits, emojis, '_', '.', dash, space, parenthesis. " \
252      "It must start with letter, digit, emoji or '_'."
253    end
254
255    ##
256    # Docker Distribution Registry repository / tag name rules
257    #
258    # See https://github.com/docker/distribution/blob/master/reference/regexp.go.
259    #
260    def container_repository_name_regex
261      @container_repository_regex ||= %r{\A[a-z0-9]+(([._/]|__|-*)[a-z0-9])*\z}
262    end
263
264    ##
265    # We do not use regexp anchors here because these are not allowed when
266    # used as a routing constraint.
267    #
268    def container_registry_tag_regex
269      @container_registry_tag_regex ||= /\w[\w.-]{0,127}/
270    end
271
272    def environment_name_regex_chars
273      'a-zA-Z0-9_/\\$\\{\\}\\. \\-'
274    end
275
276    def environment_name_regex_chars_without_slash
277      'a-zA-Z0-9_\\$\\{\\}\\. -'
278    end
279
280    def environment_name_regex
281      @environment_name_regex ||= /\A[#{environment_name_regex_chars_without_slash}]([#{environment_name_regex_chars}]*[#{environment_name_regex_chars_without_slash}])?\z/.freeze
282    end
283
284    def environment_name_regex_message
285      "can contain only letters, digits, '-', '_', '/', '$', '{', '}', '.', and spaces, but it cannot start or end with '/'"
286    end
287
288    def environment_scope_regex_chars
289      "#{environment_name_regex_chars}\\*"
290    end
291
292    def environment_scope_regex
293      @environment_scope_regex ||= /\A[#{environment_scope_regex_chars}]+\z/.freeze
294    end
295
296    def environment_scope_regex_message
297      "can contain only letters, digits, '-', '_', '/', '$', '{', '}', '.', '*' and spaces"
298    end
299
300    # https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/master/doc/identity_and_auth.md#agent-identity-and-name
301    def cluster_agent_name_regex
302      /\A[a-z0-9]([-a-z0-9]*[a-z0-9])?\z/
303    end
304
305    def cluster_agent_name_regex_message
306      %q{can contain only lowercase letters, digits, and '-', but cannot start or end with '-'}
307    end
308
309    def kubernetes_namespace_regex
310      /\A[a-z0-9]([-a-z0-9]*[a-z0-9])?\z/
311    end
312
313    def kubernetes_namespace_regex_message
314      "can contain only lowercase letters, digits, and '-'. " \
315      "Must start with a letter, and cannot end with '-'"
316    end
317
318    # Pod name adheres to DNS Subdomain Names(RFC 1123) naming convention
319    # https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names
320    def kubernetes_dns_subdomain_regex
321      /\A[a-z0-9]([a-z0-9\-\.]*[a-z0-9])?\z/
322    end
323
324    def environment_slug_regex
325      @environment_slug_regex ||= /\A[a-z]([a-z0-9-]*[a-z0-9])?\z/.freeze
326    end
327
328    def environment_slug_regex_message
329      "can contain only lowercase letters, digits, and '-'. " \
330      "Must start with a letter, and cannot end with '-'"
331    end
332
333    # The section start, e.g. section_start:12345678:NAME
334    def logs_section_prefix_regex
335      /section_((?:start)|(?:end)):(\d+):([a-zA-Z0-9_.-]+)/
336    end
337
338    # The optional section options, e.g. [collapsed=true]
339    def logs_section_options_regex
340      /(\[(?:\w+=\w+)(?:, ?(?:\w+=\w+))*\])?/
341    end
342
343    # The region end, always: \r\e\[0K
344    def logs_section_suffix_regex
345      /\r\033\[0K/
346    end
347
348    def build_trace_section_regex
349      @build_trace_section_regexp ||= %r{
350        #{logs_section_prefix_regex}
351        #{logs_section_options_regex}
352        #{logs_section_suffix_regex}
353      }x.freeze
354    end
355
356    def markdown_code_or_html_blocks
357      @markdown_code_or_html_blocks ||= %r{
358          (?<code>
359            # Code blocks:
360            # ```
361            # Anything, including `>>>` blocks which are ignored by this filter
362            # ```
363
364            ^```
365            .+?
366            \n```\ *$
367          )
368        |
369          (?<html>
370            # HTML block:
371            # <tag>
372            # Anything, including `>>>` blocks which are ignored by this filter
373            # </tag>
374
375            ^<[^>]+?>\ *\n
376            .+?
377            \n<\/[^>]+?>\ *$
378          )
379      }mx
380    end
381
382    # Based on Jira's project key format
383    # https://confluence.atlassian.com/adminjiraserver073/changing-the-project-key-format-861253229.html
384    def jira_issue_key_regex
385      @jira_issue_key_regex ||= /[A-Z][A-Z_0-9]+-\d+/
386    end
387
388    def jira_transition_id_regex
389      @jira_transition_id_regex ||= /\d+/
390    end
391
392    def breakline_regex
393      @breakline_regex ||= /\r\n|\r|\n/
394    end
395
396    # https://docs.aws.amazon.com/general/latest/gr/acct-identifiers.html
397    def aws_account_id_regex
398      /\A\d{12}\z/
399    end
400
401    def aws_account_id_message
402      'must be a 12-digit number'
403    end
404
405    # https://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html
406    def aws_arn_regex
407      /\Aarn:\S+\z/
408    end
409
410    def aws_arn_regex_message
411      'must be a valid Amazon Resource Name'
412    end
413
414    def utc_date_regex
415      @utc_date_regex ||= /\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/.freeze
416    end
417
418    def merge_request_wip
419      /(?i)(\[WIP\]\s*|WIP:\s*|\AWIP\z)/
420    end
421
422    def merge_request_draft
423      /\A(?i)(\[draft\]|\(draft\)|draft:)/
424    end
425
426    def issue
427      @issue ||= /(?<issue>\d+)(?<format>\+)?(?=\W|\z)/
428    end
429
430    def merge_request
431      @merge_request ||= /(?<merge_request>\d+)(?<format>\+)?/
432    end
433
434    def base64_regex
435      @base64_regex ||= %r{(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?}.freeze
436    end
437
438    def feature_flag_regex
439      /\A[a-z]([-_a-z0-9]*[a-z0-9])?\z/
440    end
441
442    def feature_flag_regex_message
443      "can contain only lowercase letters, digits, '_' and '-'. " \
444      "Must start with a letter, and cannot end with '-' or '_'"
445    end
446  end
447end
448
449Gitlab::Regex.prepend_mod
450