1# frozen_string_literal: true 2 3module Gitlab 4 module Regex 5 module Packages 6 CONAN_RECIPE_FILES = %w[conanfile.py conanmanifest.txt conan_sources.tgz conan_export.tgz].freeze 7 CONAN_PACKAGE_FILES = %w[conaninfo.txt conanmanifest.txt conan_package.tgz].freeze 8 9 API_PATH_REGEX = %r{^/api/v\d+/(projects/[^/]+/|groups?/[^/]+/-/)?packages/[A-Za-z]+}.freeze 10 11 def conan_package_reference_regex 12 @conan_package_reference_regex ||= %r{\A[A-Za-z0-9]+\z}.freeze 13 end 14 15 def conan_revision_regex 16 @conan_revision_regex ||= %r{\A0\z}.freeze 17 end 18 19 def conan_recipe_user_channel_regex 20 %r{\A(_|#{conan_name_regex})\z}.freeze 21 end 22 23 def conan_recipe_component_regex 24 # https://docs.conan.io/en/latest/reference/conanfile/attributes.html#name 25 @conan_recipe_component_regex ||= %r{\A#{conan_name_regex}\z}.freeze 26 end 27 28 def composer_package_version_regex 29 # see https://github.com/composer/semver/blob/31f3ea725711245195f62e54ffa402d8ef2fdba9/src/VersionParser.php#L215 30 @composer_package_version_regex ||= %r{\Av?((\d++)(\.(?:\d++|[xX*]))?(\.(?:\d++|[xX*]))?(\.(?:\d++|[xX*]))?)?\z}.freeze 31 end 32 33 def composer_dev_version_regex 34 @composer_dev_version_regex ||= %r{(^dev-)|(-dev$)}.freeze 35 end 36 37 def package_name_regex 38 @package_name_regex ||= 39 %r{ 40 \A\@? 41 (?> # atomic group to prevent backtracking 42 (([\w\-\.\+]*)\/)*([\w\-\.]+) 43 ) 44 @? 45 (?> # atomic group to prevent backtracking 46 (([\w\-\.\+]*)\/)*([\w\-\.]*) 47 ) 48 \z 49 }x.freeze 50 end 51 52 def maven_file_name_regex 53 @maven_file_name_regex ||= %r{\A[A-Za-z0-9\.\_\-\+]+\z}.freeze 54 end 55 56 def maven_path_regex 57 @maven_path_regex ||= %r{\A\@?(([\w\-\.]*)/)*([\w\-\.\+]*)\z}.freeze 58 end 59 60 def maven_app_name_regex 61 @maven_app_name_regex ||= /\A[\w\-\.]+\z/.freeze 62 end 63 64 def maven_version_regex 65 @maven_version_regex ||= /\A(?!.*\.\.)[\w+.-]+\z/.freeze 66 end 67 68 def maven_app_group_regex 69 maven_app_name_regex 70 end 71 72 def npm_package_name_regex 73 @npm_package_name_regex ||= %r{\A(?:@(#{Gitlab::PathRegex::NAMESPACE_FORMAT_REGEX})/)?[-+\.\_a-zA-Z0-9]+\z} 74 end 75 76 def nuget_package_name_regex 77 @nuget_package_name_regex ||= %r{\A[-+\.\_a-zA-Z0-9]+\z}.freeze 78 end 79 80 def nuget_version_regex 81 @nuget_version_regex ||= / 82 \A#{_semver_major_minor_patch_regex}(\.\d*)?#{_semver_prerelease_build_regex}\z 83 /x.freeze 84 end 85 86 def terraform_module_package_name_regex 87 @terraform_module_package_name_regex ||= %r{\A[-a-z0-9]+\/[-a-z0-9]+\z}.freeze 88 end 89 90 def pypi_version_regex 91 # See the official regex: https://github.com/pypa/packaging/blob/16.7/packaging/version.py#L159 92 93 @pypi_version_regex ||= %r{ 94 \A(?: 95 v? 96 (?:([0-9]+)!)? (?# epoch) 97 ([0-9]+(?:\.[0-9]+)*) (?# release segment) 98 ([-_\.]?((a|b|c|rc|alpha|beta|pre|preview))[-_\.]?([0-9]+)?)? (?# pre-release) 99 ((?:-([0-9]+))|(?:[-_\.]?(post|rev|r)[-_\.]?([0-9]+)?))? (?# post release) 100 ([-_\.]?(dev)[-_\.]?([0-9]+)?)? (?# dev release) 101 (?:\+([a-z0-9]+(?:[-_\.][a-z0-9]+)*))? (?# local version) 102 )\z}xi.freeze 103 end 104 105 def debian_package_name_regex 106 # See official parser 107 # https://git.dpkg.org/cgit/dpkg/dpkg.git/tree/lib/dpkg/parsehelp.c?id=9e0c88ec09475f4d1addde9cdba1ad7849720356#n122 108 # @debian_package_name_regex ||= %r{\A[a-z0-9][-+\._a-z0-9]*\z}i.freeze 109 # But we prefer a more strict version from Lintian 110 # https://salsa.debian.org/lintian/lintian/-/blob/5080c0068ffc4a9ddee92022a91d0c2ff53e56d1/lib/Lintian/Util.pm#L116 111 @debian_package_name_regex ||= %r{\A[a-z0-9][-+\.a-z0-9]+\z}.freeze 112 end 113 114 def debian_version_regex 115 # See official parser: https://git.dpkg.org/cgit/dpkg/dpkg.git/tree/lib/dpkg/parsehelp.c?id=9e0c88ec09475f4d1addde9cdba1ad7849720356#n205 116 @debian_version_regex ||= %r{ 117 \A(?: 118 (?:([0-9]{1,9}):)? (?# epoch) 119 ([0-9][0-9a-z\.+~-]*) (?# version) 120 (?:(-[0-0a-z\.+~]+))? (?# revision) 121 )\z}xi.freeze 122 end 123 124 def debian_architecture_regex 125 # See official parser: https://git.dpkg.org/cgit/dpkg/dpkg.git/tree/lib/dpkg/arch.c?id=9e0c88ec09475f4d1addde9cdba1ad7849720356#n43 126 # But we limit to lower case 127 @debian_architecture_regex ||= %r{\A#{::Packages::Debian::ARCHITECTURE_REGEX}\z}.freeze 128 end 129 130 def debian_distribution_regex 131 @debian_distribution_regex ||= %r{\A#{::Packages::Debian::DISTRIBUTION_REGEX}\z}i.freeze 132 end 133 134 def debian_component_regex 135 @debian_component_regex ||= %r{\A#{::Packages::Debian::COMPONENT_REGEX}\z}.freeze 136 end 137 138 def helm_channel_regex 139 @helm_channel_regex ||= %r{\A([a-zA-Z0-9](\.|-|_)?){1,255}(?<!\.|-|_)\z}.freeze 140 end 141 142 def helm_package_regex 143 @helm_package_regex ||= %r{#{helm_channel_regex}}.freeze 144 end 145 146 def helm_version_regex 147 # identical to semver_regex, with optional preceding 'v' 148 @helm_version_regex ||= Regexp.new("\\Av?#{::Gitlab::Regex.unbounded_semver_regex.source}\\z", ::Gitlab::Regex.unbounded_semver_regex.options) 149 end 150 151 def unbounded_semver_regex 152 # See the official regex: https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string 153 154 # The order of the alternatives in <prerelease> are intentionally 155 # reordered to be greedy. Without this change, the unbounded regex would 156 # only partially match "v0.0.0-20201230123456-abcdefabcdef". 157 @unbounded_semver_regex ||= / 158 #{_semver_major_minor_patch_regex}#{_semver_prerelease_build_regex} 159 /x.freeze 160 end 161 162 def semver_regex 163 @semver_regex ||= Regexp.new("\\A#{::Gitlab::Regex.unbounded_semver_regex.source}\\z", ::Gitlab::Regex.unbounded_semver_regex.options).freeze 164 end 165 166 # These partial semver regexes are intended for use in composing other 167 # regexes rather than being used alone. 168 def _semver_major_minor_patch_regex 169 @_semver_major_minor_patch_regex ||= / 170 (?<major>0|[1-9]\d*) 171 \.(?<minor>0|[1-9]\d*) 172 \.(?<patch>0|[1-9]\d*) 173 /x.freeze 174 end 175 176 def _semver_prerelease_build_regex 177 @_semver_prerelease_build_regex ||= / 178 (?:-(?<prerelease>(?:\d*[a-zA-Z-][0-9a-zA-Z-]*|[1-9]\d*|0)(?:\.(?:\d*[a-zA-Z-][0-9a-zA-Z-]*|[1-9]\d*|0))*))? 179 (?:\+(?<build>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))? 180 /x.freeze 181 end 182 183 def prefixed_semver_regex 184 # identical to semver_regex, except starting with 'v' 185 @prefixed_semver_regex ||= Regexp.new("\\Av#{::Gitlab::Regex.unbounded_semver_regex.source}\\z", ::Gitlab::Regex.unbounded_semver_regex.options) 186 end 187 188 def go_package_regex 189 # A Go package name looks like a URL but is not; it: 190 # - Must not have a scheme, such as http:// or https:// 191 # - Must not have a port number, such as :8080 or :8443 192 193 @go_package_regex ||= %r{ 194 \b (?# word boundary) 195 (?<domain> 196 [0-9a-z](?:(?:-|[0-9a-z]){0,61}[0-9a-z])? (?# first domain) 197 (?:\.[0-9a-z](?:(?:-|[0-9a-z]){0,61}[0-9a-z])?)* (?# inner domains) 198 \.[a-z]{2,} (?# top-level domain) 199 ) 200 (?<path>/(?: 201 [-/$_.+!*'(),0-9a-z] (?# plain URL character) 202 | %[0-9a-f]{2})* (?# URL encoded character) 203 )? (?# path) 204 \b (?# word boundary) 205 }ix.freeze 206 end 207 208 def generic_package_version_regex 209 maven_version_regex 210 end 211 212 def generic_package_name_regex 213 maven_file_name_regex 214 end 215 216 def generic_package_file_name_regex 217 generic_package_name_regex 218 end 219 220 private 221 222 def conan_name_regex 223 @conan_name_regex ||= %r{[a-zA-Z0-9_][a-zA-Z0-9_\+\.-]{1,49}}.freeze 224 end 225 end 226 227 extend self 228 extend Packages 229 230 def project_name_regex 231 # The character range \p{Alnum} overlaps with \u{00A9}-\u{1f9ff} 232 # hence the Ruby warning. 233 # https://gitlab.com/gitlab-org/gitlab/merge_requests/23165#not-easy-fixable 234 @project_name_regex ||= /\A[\p{Alnum}\u{00A9}-\u{1f9ff}_][\p{Alnum}\p{Pd}\u{002B}\u{00A9}-\u{1f9ff}_\. ]*\z/.freeze 235 end 236 237 def project_name_regex_message 238 "can contain only letters, digits, emojis, '_', '.', '+', dashes, or spaces. " \ 239 "It must start with a letter, digit, emoji, or '_'." 240 end 241 242 def group_name_regex 243 @group_name_regex ||= /\A#{group_name_regex_chars}\z/.freeze 244 end 245 246 def group_name_regex_chars 247 @group_name_regex_chars ||= /[\p{Alnum}\u{00A9}-\u{1f9ff}_][\p{Alnum}\p{Pd}\u{00A9}-\u{1f9ff}_()\. ]*/.freeze 248 end 249 250 def group_name_regex_message 251 "can contain only letters, digits, emojis, '_', '.', dash, space, parenthesis. " \ 252 "It must start with letter, digit, emoji or '_'." 253 end 254 255 ## 256 # Docker Distribution Registry repository / tag name rules 257 # 258 # See https://github.com/docker/distribution/blob/master/reference/regexp.go. 259 # 260 def container_repository_name_regex 261 @container_repository_regex ||= %r{\A[a-z0-9]+(([._/]|__|-*)[a-z0-9])*\z} 262 end 263 264 ## 265 # We do not use regexp anchors here because these are not allowed when 266 # used as a routing constraint. 267 # 268 def container_registry_tag_regex 269 @container_registry_tag_regex ||= /\w[\w.-]{0,127}/ 270 end 271 272 def environment_name_regex_chars 273 'a-zA-Z0-9_/\\$\\{\\}\\. \\-' 274 end 275 276 def environment_name_regex_chars_without_slash 277 'a-zA-Z0-9_\\$\\{\\}\\. -' 278 end 279 280 def environment_name_regex 281 @environment_name_regex ||= /\A[#{environment_name_regex_chars_without_slash}]([#{environment_name_regex_chars}]*[#{environment_name_regex_chars_without_slash}])?\z/.freeze 282 end 283 284 def environment_name_regex_message 285 "can contain only letters, digits, '-', '_', '/', '$', '{', '}', '.', and spaces, but it cannot start or end with '/'" 286 end 287 288 def environment_scope_regex_chars 289 "#{environment_name_regex_chars}\\*" 290 end 291 292 def environment_scope_regex 293 @environment_scope_regex ||= /\A[#{environment_scope_regex_chars}]+\z/.freeze 294 end 295 296 def environment_scope_regex_message 297 "can contain only letters, digits, '-', '_', '/', '$', '{', '}', '.', '*' and spaces" 298 end 299 300 # https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/master/doc/identity_and_auth.md#agent-identity-and-name 301 def cluster_agent_name_regex 302 /\A[a-z0-9]([-a-z0-9]*[a-z0-9])?\z/ 303 end 304 305 def cluster_agent_name_regex_message 306 %q{can contain only lowercase letters, digits, and '-', but cannot start or end with '-'} 307 end 308 309 def kubernetes_namespace_regex 310 /\A[a-z0-9]([-a-z0-9]*[a-z0-9])?\z/ 311 end 312 313 def kubernetes_namespace_regex_message 314 "can contain only lowercase letters, digits, and '-'. " \ 315 "Must start with a letter, and cannot end with '-'" 316 end 317 318 # Pod name adheres to DNS Subdomain Names(RFC 1123) naming convention 319 # https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names 320 def kubernetes_dns_subdomain_regex 321 /\A[a-z0-9]([a-z0-9\-\.]*[a-z0-9])?\z/ 322 end 323 324 def environment_slug_regex 325 @environment_slug_regex ||= /\A[a-z]([a-z0-9-]*[a-z0-9])?\z/.freeze 326 end 327 328 def environment_slug_regex_message 329 "can contain only lowercase letters, digits, and '-'. " \ 330 "Must start with a letter, and cannot end with '-'" 331 end 332 333 # The section start, e.g. section_start:12345678:NAME 334 def logs_section_prefix_regex 335 /section_((?:start)|(?:end)):(\d+):([a-zA-Z0-9_.-]+)/ 336 end 337 338 # The optional section options, e.g. [collapsed=true] 339 def logs_section_options_regex 340 /(\[(?:\w+=\w+)(?:, ?(?:\w+=\w+))*\])?/ 341 end 342 343 # The region end, always: \r\e\[0K 344 def logs_section_suffix_regex 345 /\r\033\[0K/ 346 end 347 348 def build_trace_section_regex 349 @build_trace_section_regexp ||= %r{ 350 #{logs_section_prefix_regex} 351 #{logs_section_options_regex} 352 #{logs_section_suffix_regex} 353 }x.freeze 354 end 355 356 def markdown_code_or_html_blocks 357 @markdown_code_or_html_blocks ||= %r{ 358 (?<code> 359 # Code blocks: 360 # ``` 361 # Anything, including `>>>` blocks which are ignored by this filter 362 # ``` 363 364 ^``` 365 .+? 366 \n```\ *$ 367 ) 368 | 369 (?<html> 370 # HTML block: 371 # <tag> 372 # Anything, including `>>>` blocks which are ignored by this filter 373 # </tag> 374 375 ^<[^>]+?>\ *\n 376 .+? 377 \n<\/[^>]+?>\ *$ 378 ) 379 }mx 380 end 381 382 # Based on Jira's project key format 383 # https://confluence.atlassian.com/adminjiraserver073/changing-the-project-key-format-861253229.html 384 def jira_issue_key_regex 385 @jira_issue_key_regex ||= /[A-Z][A-Z_0-9]+-\d+/ 386 end 387 388 def jira_transition_id_regex 389 @jira_transition_id_regex ||= /\d+/ 390 end 391 392 def breakline_regex 393 @breakline_regex ||= /\r\n|\r|\n/ 394 end 395 396 # https://docs.aws.amazon.com/general/latest/gr/acct-identifiers.html 397 def aws_account_id_regex 398 /\A\d{12}\z/ 399 end 400 401 def aws_account_id_message 402 'must be a 12-digit number' 403 end 404 405 # https://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html 406 def aws_arn_regex 407 /\Aarn:\S+\z/ 408 end 409 410 def aws_arn_regex_message 411 'must be a valid Amazon Resource Name' 412 end 413 414 def utc_date_regex 415 @utc_date_regex ||= /\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/.freeze 416 end 417 418 def merge_request_wip 419 /(?i)(\[WIP\]\s*|WIP:\s*|\AWIP\z)/ 420 end 421 422 def merge_request_draft 423 /\A(?i)(\[draft\]|\(draft\)|draft:)/ 424 end 425 426 def issue 427 @issue ||= /(?<issue>\d+)(?<format>\+)?(?=\W|\z)/ 428 end 429 430 def merge_request 431 @merge_request ||= /(?<merge_request>\d+)(?<format>\+)?/ 432 end 433 434 def base64_regex 435 @base64_regex ||= %r{(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?}.freeze 436 end 437 438 def feature_flag_regex 439 /\A[a-z]([-_a-z0-9]*[a-z0-9])?\z/ 440 end 441 442 def feature_flag_regex_message 443 "can contain only lowercase letters, digits, '_' and '-'. " \ 444 "Must start with a letter, and cannot end with '-' or '_'" 445 end 446 end 447end 448 449Gitlab::Regex.prepend_mod 450