1cb14a3feSDimitry Andric //===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
2cb14a3feSDimitry Andric //
3cb14a3feSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4cb14a3feSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5cb14a3feSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6cb14a3feSDimitry Andric //
7cb14a3feSDimitry Andric //===----------------------------------------------------------------------===//
8cb14a3feSDimitry Andric ///
9cb14a3feSDimitry Andric /// \file
10cb14a3feSDimitry Andric /// This file implements the functionality of matching a file path name to
11cb14a3feSDimitry Andric /// a pattern, similar to the POSIX fnmatch() function.
12cb14a3feSDimitry Andric ///
13cb14a3feSDimitry Andric //===----------------------------------------------------------------------===//
14cb14a3feSDimitry Andric 
15cb14a3feSDimitry Andric #include "MatchFilePath.h"
16cb14a3feSDimitry Andric 
17cb14a3feSDimitry Andric using namespace llvm;
18cb14a3feSDimitry Andric 
19cb14a3feSDimitry Andric namespace clang {
20cb14a3feSDimitry Andric namespace format {
21cb14a3feSDimitry Andric 
22*647cbc5dSDimitry Andric // Check whether `FilePath` matches `Pattern` based on POSIX 2.13.1, 2.13.2, and
23*647cbc5dSDimitry Andric // Rule 1 of 2.13.3.
matchFilePath(StringRef Pattern,StringRef FilePath)24cb14a3feSDimitry Andric bool matchFilePath(StringRef Pattern, StringRef FilePath) {
25cb14a3feSDimitry Andric   assert(!Pattern.empty());
26cb14a3feSDimitry Andric   assert(!FilePath.empty());
27cb14a3feSDimitry Andric 
28cb14a3feSDimitry Andric   // No match if `Pattern` ends with a non-meta character not equal to the last
29cb14a3feSDimitry Andric   // character of `FilePath`.
30cb14a3feSDimitry Andric   if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back())
31cb14a3feSDimitry Andric     return false;
32cb14a3feSDimitry Andric 
33cb14a3feSDimitry Andric   constexpr auto Separator = '/';
34cb14a3feSDimitry Andric   const auto EOP = Pattern.size();  // End of `Pattern`.
35cb14a3feSDimitry Andric   const auto End = FilePath.size(); // End of `FilePath`.
36cb14a3feSDimitry Andric   unsigned I = 0;                   // Index to `Pattern`.
37cb14a3feSDimitry Andric 
38cb14a3feSDimitry Andric   for (unsigned J = 0; J < End; ++J) {
39cb14a3feSDimitry Andric     if (I == EOP)
40cb14a3feSDimitry Andric       return false;
41cb14a3feSDimitry Andric 
42cb14a3feSDimitry Andric     switch (const auto F = FilePath[J]; Pattern[I]) {
43cb14a3feSDimitry Andric     case '\\':
44cb14a3feSDimitry Andric       if (++I == EOP || F != Pattern[I])
45cb14a3feSDimitry Andric         return false;
46cb14a3feSDimitry Andric       break;
47cb14a3feSDimitry Andric     case '?':
48cb14a3feSDimitry Andric       if (F == Separator)
49cb14a3feSDimitry Andric         return false;
50cb14a3feSDimitry Andric       break;
51cb14a3feSDimitry Andric     case '*': {
52cb14a3feSDimitry Andric       while (++I < EOP && Pattern[I] == '*') { // Skip consecutive stars.
53cb14a3feSDimitry Andric       }
54cb14a3feSDimitry Andric       const auto K = FilePath.find(Separator, J); // Index of next `Separator`.
55cb14a3feSDimitry Andric       const bool NoMoreSeparatorsInFilePath = K == StringRef::npos;
56cb14a3feSDimitry Andric       if (I == EOP) // `Pattern` ends with a star.
57cb14a3feSDimitry Andric         return NoMoreSeparatorsInFilePath;
58cb14a3feSDimitry Andric       // `Pattern` ends with a lone backslash.
59cb14a3feSDimitry Andric       if (Pattern[I] == '\\' && ++I == EOP)
60cb14a3feSDimitry Andric         return false;
61cb14a3feSDimitry Andric       // The star is followed by a (possibly escaped) `Separator`.
62cb14a3feSDimitry Andric       if (Pattern[I] == Separator) {
63cb14a3feSDimitry Andric         if (NoMoreSeparatorsInFilePath)
64cb14a3feSDimitry Andric           return false;
65cb14a3feSDimitry Andric         J = K; // Skip to next `Separator` in `FilePath`.
66cb14a3feSDimitry Andric         break;
67cb14a3feSDimitry Andric       }
68cb14a3feSDimitry Andric       // Recurse.
69cb14a3feSDimitry Andric       for (auto Pat = Pattern.substr(I); J < End && FilePath[J] != Separator;
70cb14a3feSDimitry Andric            ++J) {
71cb14a3feSDimitry Andric         if (matchFilePath(Pat, FilePath.substr(J)))
72cb14a3feSDimitry Andric           return true;
73cb14a3feSDimitry Andric       }
74cb14a3feSDimitry Andric       return false;
75cb14a3feSDimitry Andric     }
76cb14a3feSDimitry Andric     case '[':
77cb14a3feSDimitry Andric       // Skip e.g. `[!]`.
78cb14a3feSDimitry Andric       if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
79cb14a3feSDimitry Andric         // Skip unpaired `[`, brackets containing slashes, and `[]`.
80cb14a3feSDimitry Andric         if (const auto K = Pattern.find_first_of("]/", I + 1);
81cb14a3feSDimitry Andric             K != StringRef::npos && Pattern[K] == ']' && K > I + 1) {
82cb14a3feSDimitry Andric           if (F == Separator)
83cb14a3feSDimitry Andric             return false;
84cb14a3feSDimitry Andric           ++I; // After the `[`.
85cb14a3feSDimitry Andric           bool Negated = false;
86cb14a3feSDimitry Andric           if (Pattern[I] == '!') {
87cb14a3feSDimitry Andric             Negated = true;
88cb14a3feSDimitry Andric             ++I; // After the `!`.
89cb14a3feSDimitry Andric           }
90cb14a3feSDimitry Andric           bool Match = false;
91cb14a3feSDimitry Andric           do {
92cb14a3feSDimitry Andric             if (I + 2 < K && Pattern[I + 1] == '-') {
93cb14a3feSDimitry Andric               Match = Pattern[I] <= F && F <= Pattern[I + 2];
94cb14a3feSDimitry Andric               I += 3; // After the range, e.g. `A-Z`.
95cb14a3feSDimitry Andric             } else {
96cb14a3feSDimitry Andric               Match = F == Pattern[I++];
97cb14a3feSDimitry Andric             }
98cb14a3feSDimitry Andric           } while (!Match && I < K);
99cb14a3feSDimitry Andric           if (Negated ? Match : !Match)
100cb14a3feSDimitry Andric             return false;
101cb14a3feSDimitry Andric           I = K + 1; // After the `]`.
102cb14a3feSDimitry Andric           continue;
103cb14a3feSDimitry Andric         }
104cb14a3feSDimitry Andric       }
105cb14a3feSDimitry Andric       [[fallthrough]]; // Match `[` literally.
106cb14a3feSDimitry Andric     default:
107cb14a3feSDimitry Andric       if (F != Pattern[I])
108cb14a3feSDimitry Andric         return false;
109cb14a3feSDimitry Andric     }
110cb14a3feSDimitry Andric 
111cb14a3feSDimitry Andric     ++I;
112cb14a3feSDimitry Andric   }
113cb14a3feSDimitry Andric 
114cb14a3feSDimitry Andric   // Match trailing stars with null strings.
115cb14a3feSDimitry Andric   while (I < EOP && Pattern[I] == '*')
116cb14a3feSDimitry Andric     ++I;
117cb14a3feSDimitry Andric 
118cb14a3feSDimitry Andric   return I == EOP;
119cb14a3feSDimitry Andric }
120cb14a3feSDimitry Andric 
121cb14a3feSDimitry Andric } // namespace format
122cb14a3feSDimitry Andric } // namespace clang
123