1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include "net/jsonpath.h"
31
32 #include <algorithm>
33 #include <cctype>
34 #include <sstream>
35 #include <string>
36 #include <vector>
37
38 #include "base/logging.h"
39 #include "base/number_util.h"
40 #include "base/port.h"
41 #include "base/util.h"
42
43 namespace mozc {
44 namespace net {
45 namespace {
46
47 struct JsonPathNode {
48 enum Type {
49 UNDEFINED_INDEX,
50 OBJECT_INDEX,
51 ARRAY_INDEX,
52 SLICE_INDEX
53 };
54 Type type;
55 string object_index;
56 int array_index;
57 int slice_start;
58 int slice_end;
59 int slice_step;
60
61 static const int kSliceUndef = kint32max;
62
IsUndefmozc::net::__anon82d093630111::JsonPathNode63 static bool IsUndef(int n) {
64 return n == kSliceUndef;
65 }
66
DebugStringmozc::net::__anon82d093630111::JsonPathNode67 string DebugString() const {
68 std::ostringstream os;
69 os << "{" << type << ":" << array_index << ":" << object_index
70 << ":(" << slice_start << ":" << slice_end << ":" << slice_step << ")}";
71 return os.str();
72 }
73
JsonPathNodemozc::net::__anon82d093630111::JsonPathNode74 JsonPathNode() :
75 type(UNDEFINED_INDEX), array_index(0),
76 slice_start(0), slice_end(0), slice_step(0) {}
~JsonPathNodemozc::net::__anon82d093630111::JsonPathNode77 ~JsonPathNode() {}
78 };
79
GetDigit(const string & str,int * output)80 bool GetDigit(const string &str, int *output) {
81 DCHECK(output);
82 if (str.empty()) {
83 return false;
84 }
85
86 const char *begin = str.data();
87 const char *end = str.data() + str.size();
88 if (str[0] == '-') {
89 if (str.size() == 1) {
90 return false;
91 }
92 ++begin;
93 }
94
95 while (begin < end) {
96 if (!isdigit(*begin)) {
97 return false;
98 }
99 ++begin;
100 }
101
102 *output = NumberUtil::SimpleAtoi(str);
103
104 return true;
105 }
106
GetSliceDigit(const string & str,int * output)107 bool GetSliceDigit(const string &str, int *output) {
108 if (str.empty()) {
109 *output = JsonPathNode::kSliceUndef;
110 return true;
111 }
112 return GetDigit(str, output);
113 }
114
GetQuotedString(const string & str,const char c,string * output)115 bool GetQuotedString(const string &str, const char c,
116 string *output) {
117 if (str.size() >= 2 &&
118 str[0] == c && str[str.size() - 1] == c) {
119 *output = str.substr(1, str.size() - 2);
120 return true;
121 }
122 return false;
123 }
124
125 typedef std::vector<JsonPathNode> JsonPathNodes;
126
127 class JsonPathExp : public std::vector<std::vector<JsonPathNode> > {
128 public:
Parse(const string & jsonpath)129 bool Parse(const string &jsonpath) {
130 clear();
131 if (jsonpath.size() <= 1 || jsonpath[0] != '$') {
132 LOG(ERROR) << "Not starts with $";
133 return false;
134 }
135
136 if (Util::EndsWith(jsonpath, ".") ||
137 jsonpath.find("...") != string::npos) {
138 LOG(ERROR) << "Parse error: " << jsonpath;
139 return false;
140 }
141
142 if (jsonpath.find("(") != string::npos ||
143 jsonpath.find(")") != string::npos ||
144 jsonpath.find("@") != string::npos ||
145 jsonpath.find("?") != string::npos) {
146 LOG(ERROR) << "script expression/current node are not supported: "
147 << jsonpath;
148 return false;
149 }
150
151 const char *begin = jsonpath.data() + 1;
152 const char *end = jsonpath.data() + jsonpath.size();
153
154 string item;
155 for (; begin < end; ++begin) {
156 if (*begin == ']') {
157 return false;
158 }
159 if (*begin == '.' || *begin == '[') {
160 if (!item.empty()) {
161 if (!AddNodes(item, OUT_BRACKET)) {
162 return false;
163 }
164 item.clear();
165 }
166 if (*begin == '.' && begin + 1 < end && *(begin + 1) == '.') {
167 ++begin;
168 if (!AddNodes(".", OUT_BRACKET)) {
169 return false;
170 }
171 } else if (*begin == '[') {
172 ++begin;
173 string exp;
174 while (*begin != ']') {
175 if (begin == end) {
176 LOG(ERROR) << "Cannot find closing \"]\"";
177 return false;
178 }
179 exp += *begin;
180 ++begin;
181 }
182 if (!AddNodes(exp, IN_BRACKET)) {
183 return false;
184 }
185 }
186 } else {
187 item += *begin;
188 }
189 }
190
191 if (!item.empty()) {
192 if (!AddNodes(item, OUT_BRACKET)) {
193 return false;
194 }
195 }
196
197 return !empty();
198 }
199
DebugString() const200 string DebugString() const {
201 std::ostringstream os;
202 for (size_t i = 0; i < size(); ++i) {
203 os << "[";
204 for (size_t j = 0; j < (*this)[i].size(); ++j) {
205 os << (*this)[i][j].DebugString();
206 }
207 os << "]";
208 }
209 return os.str();
210 }
211
212 private:
213 enum NodesType {
214 IN_BRACKET,
215 OUT_BRACKET
216 };
217
AddNodes(const string & nodes_exp,NodesType nodes_type)218 bool AddNodes(const string &nodes_exp, NodesType nodes_type) {
219 if (nodes_exp.empty()) {
220 return false;
221 }
222
223 resize(size() + 1);
224 JsonPathNodes *nodes = &(back());
225 DCHECK(nodes);
226
227 if (nodes_type == OUT_BRACKET) {
228 JsonPathNode node;
229 node.type = JsonPathNode::OBJECT_INDEX;
230 node.object_index = nodes_exp;
231 nodes->push_back(node);
232 } else if (nodes_type == IN_BRACKET) {
233 std::vector<string> nodes_exps;
234 Util::SplitStringUsing(nodes_exp, ",", &nodes_exps);
235 for (size_t i = 0; i < nodes_exps.size(); ++i) {
236 JsonPathNode node;
237 node.type = JsonPathNode::UNDEFINED_INDEX;
238 string in_nodes_exp;
239 if (GetQuotedString(nodes_exps[i], '\'', &in_nodes_exp) ||
240 GetQuotedString(nodes_exps[i], '\"', &in_nodes_exp)) {
241 node.type = JsonPathNode::OBJECT_INDEX;
242 node.object_index = in_nodes_exp;
243 } else if (nodes_exps[i] == "*") {
244 node.type = JsonPathNode::OBJECT_INDEX;
245 node.object_index = "*";
246 } else {
247 std::vector<string> slice;
248 Util::SplitStringAllowEmpty(nodes_exps[i], ":", &slice);
249 if (slice.size() == 1) {
250 if (GetDigit(slice[0], &node.array_index)) {
251 node.type = JsonPathNode::ARRAY_INDEX;
252 } else {
253 // fallback to OBJECT_INDEX.
254 node.type = JsonPathNode::OBJECT_INDEX;
255 node.object_index = slice[0];
256 }
257 } else if (slice.size() == 2 &&
258 GetSliceDigit(slice[0], &node.slice_start) &&
259 GetSliceDigit(slice[1], &node.slice_end)) {
260 node.slice_step = JsonPathNode::kSliceUndef;
261 node.type = JsonPathNode::SLICE_INDEX;
262 } else if (slice.size() == 3 &&
263 GetSliceDigit(slice[0], &node.slice_start) &&
264 GetSliceDigit(slice[1], &node.slice_end) &&
265 GetSliceDigit(slice[2], &node.slice_step)) {
266 node.type = JsonPathNode::SLICE_INDEX;
267 }
268 }
269
270 if (node.type == JsonPathNode::UNDEFINED_INDEX) {
271 return false;
272 }
273
274 nodes->push_back(node);
275 }
276 } else {
277 LOG(FATAL) << "Unknown nodes type";
278 }
279
280 return true;
281 }
282 };
283
284 // Find all Json::Value(s) from root node |value|,
285 // which matches to |nodes|.
CollectValuesRecursively(const Json::Value & value,const JsonPathNodes & nodes,std::vector<const Json::Value * > * output)286 void CollectValuesRecursively(const Json::Value &value,
287 const JsonPathNodes &nodes,
288 std::vector<const Json::Value *> *output) {
289 DCHECK(output);
290 for (size_t node_index = 0; node_index < nodes.size(); ++node_index) {
291 if (nodes[node_index].type != JsonPathNode::OBJECT_INDEX) {
292 continue;
293 }
294 if (value.isObject()) {
295 const Json::Value::Members members = value.getMemberNames();
296 const string &object_index = nodes[node_index].object_index;
297 if (object_index != "*" && value.isMember(object_index)) {
298 output->push_back(&value[object_index]);
299 }
300 for (size_t i = 0; i < members.size(); ++i) {
301 const Json::Value &v = value[members[i]];
302 if (object_index == "*") {
303 output->push_back(&v);
304 }
305 CollectValuesRecursively(v, nodes, output);
306 }
307 } else if (value.isArray()) {
308 for (Json::ArrayIndex i = 0; i < value.size(); ++i) {
309 CollectValuesRecursively(value[i], nodes, output);
310 }
311 }
312 }
313 }
314
CollectNodesFromJson(const Json::Value & value,const JsonPathExp & jsonpathexp,size_t depth,std::vector<const Json::Value * > * output)315 void CollectNodesFromJson(const Json::Value &value,
316 const JsonPathExp &jsonpathexp,
317 size_t depth,
318 std::vector<const Json::Value *> *output) {
319 if (depth >= jsonpathexp.size()) {
320 output->push_back(&value);
321 return;
322 }
323
324 const JsonPathNodes &nodes = jsonpathexp[depth];
325
326 for (size_t node_index = 0; node_index < nodes.size(); ++node_index) {
327 const JsonPathNode &node = nodes[node_index];
328 if (node.type == JsonPathNode::OBJECT_INDEX) {
329 if (node.object_index == "*") {
330 if (value.isObject()) {
331 const Json::Value::Members members = value.getMemberNames();
332 for (size_t i = 0; i < members.size(); ++i) {
333 CollectNodesFromJson(value[members[i]],
334 jsonpathexp, depth + 1, output);
335 }
336 } else if (value.isArray()) {
337 for (Json::ArrayIndex i = 0; i < value.size(); ++i) {
338 CollectNodesFromJson(value[i], jsonpathexp, depth + 1, output);
339 }
340 } else {
341 CollectNodesFromJson(value, jsonpathexp, depth + 1, output);
342 }
343 } else if (node.object_index == "." && depth + 1 < jsonpathexp.size()) {
344 std::vector<const Json::Value *> matched_values;
345 CollectValuesRecursively(value, jsonpathexp[depth + 1],
346 &matched_values);
347 for (size_t i = 0; i < matched_values.size(); ++i) {
348 CollectNodesFromJson(*(matched_values[i]),
349 jsonpathexp, depth + 2, output);
350 }
351 } else if (value.isObject() && value.isMember(node.object_index)) {
352 CollectNodesFromJson(value[node.object_index],
353 jsonpathexp, depth + 1, output);
354 }
355 } else if (node.type == JsonPathNode::ARRAY_INDEX) {
356 const int i = node.array_index >= 0 ?
357 node.array_index : value.size() + node.array_index;
358 if (value.isArray() && value.isValidIndex(i)) {
359 CollectNodesFromJson(value[i], jsonpathexp, depth + 1, output);
360 }
361 } else if (node.type == JsonPathNode::SLICE_INDEX) {
362 if (value.isArray()) {
363 const int size = static_cast<int>(value.size());
364 int start = JsonPathNode::IsUndef(node.slice_start) ?
365 0 : node.slice_start;
366 int end = JsonPathNode::IsUndef(node.slice_end) ?
367 size : node.slice_end;
368 const int step = JsonPathNode::IsUndef(node.slice_step) ?
369 1 : node.slice_step;
370 start = (start < 0) ? std::max(0, start + size) : std::min(size, start);
371 end = (end < 0) ? std::max(0, end + size) : std::min(size, end);
372 if (step > 0 && end > start) {
373 for (int i = start; i < end; i += step) {
374 if (value.isValidIndex(i)) {
375 CollectNodesFromJson(value[i], jsonpathexp, depth + 1, output);
376 }
377 }
378 } else if (step < 0 && end < start) {
379 for (int i = start; i > end; i += step) {
380 if (value.isValidIndex(i)) {
381 CollectNodesFromJson(value[i], jsonpathexp, depth + 1, output);
382 }
383 }
384 }
385 }
386 } else {
387 LOG(FATAL) << "Unknown type";
388 }
389 }
390 }
391 } // namespace
392
393 // static
Parse(const Json::Value & root,const string & jsonpath,std::vector<const Json::Value * > * output)394 bool JsonPath::Parse(const Json::Value &root,
395 const string &jsonpath,
396 std::vector<const Json::Value *> *output) {
397 JsonPathExp jsonpathexp;
398 if (!jsonpathexp.Parse(jsonpath)) {
399 LOG(WARNING) << "Parsing JsonPath error: " << jsonpath;
400 return false;
401 }
402
403 VLOG(1) << jsonpathexp.DebugString();
404
405 DCHECK(output);
406 CollectNodesFromJson(root, jsonpathexp, 0, output);
407 return true;
408 }
409 } // namespace net
410 } // namespace mozc
411