1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/pattern.h"
6 
7 #include "base/third_party/icu/icu_utf.h"
8 
9 namespace base {
10 
11 namespace {
12 
IsWildcard(base_icu::UChar32 character)13 constexpr bool IsWildcard(base_icu::UChar32 character) {
14   return character == '*' || character == '?';
15 }
16 
17 // Searches for the next subpattern of |pattern| in |string|, up to the given
18 // |maximum_distance|. The subpattern extends from the start of |pattern| up to
19 // the first wildcard character (or the end of the string). If the value of
20 // |maximum_distance| is negative, the maximum distance is considered infinite.
21 template <typename CHAR, typename NEXT>
SearchForChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,int maximum_distance,NEXT next)22 constexpr bool SearchForChars(const CHAR** pattern,
23                               const CHAR* pattern_end,
24                               const CHAR** string,
25                               const CHAR* string_end,
26                               int maximum_distance,
27                               NEXT next) {
28   const CHAR* pattern_start = *pattern;
29   const CHAR* string_start = *string;
30   bool escape = false;
31   while (true) {
32     if (*pattern == pattern_end) {
33       // If this is the end of the pattern, only accept the end of the string;
34       // anything else falls through to the mismatch case.
35       if (*string == string_end)
36         return true;
37     } else {
38       // If we have found a wildcard, we're done.
39       if (!escape && IsWildcard(**pattern))
40         return true;
41 
42       // Check if the escape character is found. If so, skip it and move to the
43       // next character.
44       if (!escape && **pattern == '\\') {
45         escape = true;
46         next(pattern, pattern_end);
47         continue;
48       }
49 
50       escape = false;
51 
52       if (*string == string_end)
53         return false;
54 
55       // Check if the chars match, if so, increment the ptrs.
56       const CHAR* pattern_next = *pattern;
57       const CHAR* string_next = *string;
58       base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
59       if (pattern_char == next(&string_next, string_end) &&
60           pattern_char != CBU_SENTINEL) {
61         *pattern = pattern_next;
62         *string = string_next;
63         continue;
64       }
65     }
66 
67     // Mismatch. If we have reached the maximum distance, return false,
68     // otherwise restart at the beginning of the pattern with the next character
69     // in the string.
70     // TODO(bauerb): This is a naive implementation of substring search, which
71     // could be implemented with a more efficient algorithm, e.g.
72     // Knuth-Morris-Pratt (at the expense of requiring preprocessing).
73     if (maximum_distance == 0)
74       return false;
75 
76     // Because unlimited distance is represented as -1, this will never reach 0
77     // and therefore fail the match above.
78     maximum_distance--;
79     *pattern = pattern_start;
80     next(&string_start, string_end);
81     *string = string_start;
82   }
83 }
84 
85 // Consumes consecutive wildcard characters (? or *). Returns the maximum number
86 // of characters matched by the sequence of wildcards, or -1 if the wildcards
87 // match an arbitrary number of characters (which is the case if it contains at
88 // least one *).
89 template <typename CHAR, typename NEXT>
EatWildcards(const CHAR ** pattern,const CHAR * end,NEXT next)90 constexpr int EatWildcards(const CHAR** pattern, const CHAR* end, NEXT next) {
91   int num_question_marks = 0;
92   bool has_asterisk = false;
93   while (*pattern != end) {
94     if (**pattern == '?') {
95       num_question_marks++;
96     } else if (**pattern == '*') {
97       has_asterisk = true;
98     } else {
99       break;
100     }
101 
102     next(pattern, end);
103   }
104   return has_asterisk ? -1 : num_question_marks;
105 }
106 
107 template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,NEXT next)108 constexpr bool MatchPatternT(const CHAR* eval,
109                              const CHAR* eval_end,
110                              const CHAR* pattern,
111                              const CHAR* pattern_end,
112                              NEXT next) {
113   do {
114     int maximum_wildcard_length = EatWildcards(&pattern, pattern_end, next);
115     if (!SearchForChars(&pattern, pattern_end, &eval, eval_end,
116                         maximum_wildcard_length, next)) {
117       return false;
118     }
119   } while (pattern != pattern_end);
120   return true;
121 }
122 
123 struct NextCharUTF8 {
operator ()base::__anon9481b8950111::NextCharUTF8124   base_icu::UChar32 operator()(const char** p, const char* end) {
125     base_icu::UChar32 c;
126     int offset = 0;
127     CBU8_NEXT(*p, offset, end - *p, c);
128     *p += offset;
129     return c;
130   }
131 };
132 
133 struct NextCharUTF16 {
operator ()base::__anon9481b8950111::NextCharUTF16134   base_icu::UChar32 operator()(const char16** p, const char16* end) {
135     base_icu::UChar32 c;
136     int offset = 0;
137     CBU16_NEXT(*p, offset, end - *p, c);
138     *p += offset;
139     return c;
140   }
141 };
142 
143 }  // namespace
144 
MatchPattern(StringPiece eval,StringPiece pattern)145 bool MatchPattern(StringPiece eval, StringPiece pattern) {
146   return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
147                        pattern.data() + pattern.size(), NextCharUTF8());
148 }
149 
MatchPattern(StringPiece16 eval,StringPiece16 pattern)150 bool MatchPattern(StringPiece16 eval, StringPiece16 pattern) {
151   return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
152                        pattern.data() + pattern.size(), NextCharUTF16());
153 }
154 
155 }  // namespace base
156