1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include <iostream>
5 #include <stack>
6 
7 #include "filterrb.h"
8 #include "errmsg.h"
9 
10 
11 const char* PathFilter::kEInclusionNames[] = {
12     "INCLUDE",
13     "PARTIAL",
14     "EXCLUDE"
15 };
16 
17 
ResKeyPath()18 ResKeyPath::ResKeyPath() {}
19 
ResKeyPath(const std::string & path,UErrorCode & status)20 ResKeyPath::ResKeyPath(const std::string& path, UErrorCode& status) {
21     if (path.empty() || path[0] != '/') {
22         std::cerr << "genrb error: path must start with /: " << path << std::endl;
23         status = U_PARSE_ERROR;
24         return;
25     }
26     if (path.length() == 1) {
27         return;
28     }
29     size_t i;
30     size_t j = 0;
31     while (true) {
32         i = j + 1;
33         j = path.find('/', i);
34         std::string key = path.substr(i, j - i);
35         if (key.empty()) {
36             std::cerr << "genrb error: empty subpaths and trailing slashes are not allowed: " << path << std::endl;
37             status = U_PARSE_ERROR;
38             return;
39         }
40         push(key);
41         if (j == std::string::npos) {
42             break;
43         }
44     }
45 }
46 
push(const std::string & key)47 void ResKeyPath::push(const std::string& key) {
48     fPath.push_back(key);
49 }
50 
pop()51 void ResKeyPath::pop() {
52     fPath.pop_back();
53 }
54 
pieces() const55 const std::list<std::string>& ResKeyPath::pieces() const {
56     return fPath;
57 }
58 
operator <<(std::ostream & out,const ResKeyPath & value)59 std::ostream& operator<<(std::ostream& out, const ResKeyPath& value) {
60     if (value.pieces().empty()) {
61         out << "/";
62     } else for (auto& key : value.pieces()) {
63         out << "/" << key;
64     }
65     return out;
66 }
67 
68 
69 PathFilter::~PathFilter() = default;
70 
71 
addRule(const std::string & ruleLine,UErrorCode & status)72 void SimpleRuleBasedPathFilter::addRule(const std::string& ruleLine, UErrorCode& status) {
73     if (ruleLine.empty()) {
74         std::cerr << "genrb error: empty filter rules are not allowed" << std::endl;
75         status = U_PARSE_ERROR;
76         return;
77     }
78     bool inclusionRule = false;
79     if (ruleLine[0] == '+') {
80         inclusionRule = true;
81     } else if (ruleLine[0] != '-') {
82         std::cerr << "genrb error: rules must start with + or -: " << ruleLine << std::endl;
83         status = U_PARSE_ERROR;
84         return;
85     }
86     ResKeyPath path(ruleLine.substr(1), status);
87     addRule(path, inclusionRule, status);
88 }
89 
addRule(const ResKeyPath & path,bool inclusionRule,UErrorCode & status)90 void SimpleRuleBasedPathFilter::addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status) {
91     if (U_FAILURE(status)) {
92         return;
93     }
94     fRoot.applyRule(path, path.pieces().begin(), inclusionRule, status);
95 }
96 
match(const ResKeyPath & path) const97 PathFilter::EInclusion SimpleRuleBasedPathFilter::match(const ResKeyPath& path) const {
98     const Tree* node = &fRoot;
99 
100     // defaultResult "bubbles up" the nearest "definite" inclusion/exclusion rule
101     EInclusion defaultResult = INCLUDE;
102     if (node->fIncluded != PARTIAL) {
103         // rules handled here: "+/" and "-/"
104         defaultResult = node->fIncluded;
105     }
106 
107     // isLeaf is whether the filter tree can provide no additional information
108     // even if additional subpaths are added to the given key
109     bool isLeaf = false;
110 
111     for (auto& key : path.pieces()) {
112         auto child = node->fChildren.find(key);
113         // Leaf case 1: input path descends outside the filter tree
114         if (child == node->fChildren.end()) {
115             if (node->fWildcard) {
116                 // A wildcard pattern is present; continue checking
117                 node = node->fWildcard.get();
118             } else {
119                 isLeaf = true;
120                 break;
121             }
122         } else {
123             node = &child->second;
124         }
125         if (node->fIncluded != PARTIAL) {
126             defaultResult = node->fIncluded;
127         }
128     }
129 
130     // Leaf case 2: input path exactly matches a filter leaf
131     if (node->isLeaf()) {
132         isLeaf = true;
133     }
134 
135     // Always return PARTIAL if we are not at a leaf
136     if (!isLeaf) {
137         return PARTIAL;
138     }
139 
140     // If leaf node is PARTIAL, return the default
141     if (node->fIncluded == PARTIAL) {
142         return defaultResult;
143     }
144 
145     return node->fIncluded;
146 }
147 
148 
Tree(const Tree & other)149 SimpleRuleBasedPathFilter::Tree::Tree(const Tree& other)
150         : fIncluded(other.fIncluded), fChildren(other.fChildren) {
151     // Note: can't use the default copy assignment because of the std::unique_ptr
152     if (other.fWildcard) {
153         fWildcard.reset(new Tree(*other.fWildcard));
154     }
155 }
156 
isLeaf() const157 bool SimpleRuleBasedPathFilter::Tree::isLeaf() const {
158     return fChildren.empty() && !fWildcard;
159 }
160 
applyRule(const ResKeyPath & path,std::list<std::string>::const_iterator it,bool inclusionRule,UErrorCode & status)161 void SimpleRuleBasedPathFilter::Tree::applyRule(
162         const ResKeyPath& path,
163         std::list<std::string>::const_iterator it,
164         bool inclusionRule,
165         UErrorCode& status) {
166 
167     // Base Case
168     if (it == path.pieces().end()) {
169         if (isVerbose() && (fIncluded != PARTIAL || !isLeaf())) {
170             std::cout << "genrb info: rule on path " << path
171                 << " overrides previous rules" << std::endl;
172         }
173         fIncluded = inclusionRule ? INCLUDE : EXCLUDE;
174         fChildren.clear();
175         fWildcard.reset();
176         return;
177     }
178 
179     // Recursive Step
180     auto& key = *it;
181     if (key == "*") {
182         // Case 1: Wildcard
183         if (!fWildcard) {
184             fWildcard.reset(new Tree());
185         }
186         // Apply the rule to fWildcard and also to all existing children.
187         it++;
188         fWildcard->applyRule(path, it, inclusionRule, status);
189         for (auto& child : fChildren) {
190             child.second.applyRule(path, it, inclusionRule, status);
191         }
192         it--;
193 
194     } else {
195         // Case 2: Normal Key
196         auto search = fChildren.find(key);
197         if (search == fChildren.end()) {
198             if (fWildcard) {
199                 // Deep-copy the existing wildcard tree into the new key
200                 search = fChildren.emplace(key, Tree(*fWildcard)).first;
201             } else {
202                 search = fChildren.emplace(key, Tree()).first;
203             }
204         }
205         it++;
206         search->second.applyRule(path, it, inclusionRule, status);
207         it--;
208     }
209 }
210 
print(std::ostream & out,int32_t indent) const211 void SimpleRuleBasedPathFilter::Tree::print(std::ostream& out, int32_t indent) const {
212     for (int32_t i=0; i<indent; i++) out << "\t";
213     out << "included: " << kEInclusionNames[fIncluded] << std::endl;
214     for (auto& child : fChildren) {
215         for (int32_t i=0; i<indent; i++) out << "\t";
216         out << child.first << ": {" << std::endl;
217         child.second.print(out, indent + 1);
218         for (int32_t i=0; i<indent; i++) out << "\t";
219         out << "}" << std::endl;
220     }
221     if (fWildcard) {
222         for (int32_t i=0; i<indent; i++) out << "\t";
223         out << "* {" << std::endl;
224         fWildcard->print(out, indent + 1);
225         for (int32_t i=0; i<indent; i++) out << "\t";
226         out << "}" << std::endl;
227     }
228 }
229 
print(std::ostream & out) const230 void SimpleRuleBasedPathFilter::print(std::ostream& out) const {
231     out << "SimpleRuleBasedPathFilter {" << std::endl;
232     fRoot.print(out, 1);
233     out << "}" << std::endl;
234 }
235 
operator <<(std::ostream & out,const SimpleRuleBasedPathFilter & value)236 std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value) {
237     value.print(out);
238     return out;
239 }
240