1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "net/jsonpath.h"
31 
32 #include <algorithm>
33 #include <cctype>
34 #include <sstream>
35 #include <string>
36 #include <vector>
37 
38 #include "base/logging.h"
39 #include "base/number_util.h"
40 #include "base/port.h"
41 #include "base/util.h"
42 
43 namespace mozc {
44 namespace net {
45 namespace {
46 
47 struct JsonPathNode {
48   enum Type {
49     UNDEFINED_INDEX,
50     OBJECT_INDEX,
51     ARRAY_INDEX,
52     SLICE_INDEX
53   };
54   Type type;
55   string object_index;
56   int array_index;
57   int slice_start;
58   int slice_end;
59   int slice_step;
60 
61   static const int kSliceUndef = kint32max;
62 
IsUndefmozc::net::__anon82d093630111::JsonPathNode63   static bool IsUndef(int n) {
64     return n == kSliceUndef;
65   }
66 
DebugStringmozc::net::__anon82d093630111::JsonPathNode67   string DebugString() const {
68     std::ostringstream os;
69     os << "{" << type << ":" << array_index << ":" << object_index
70        << ":(" << slice_start << ":" << slice_end << ":" << slice_step << ")}";
71     return os.str();
72   }
73 
JsonPathNodemozc::net::__anon82d093630111::JsonPathNode74   JsonPathNode() :
75       type(UNDEFINED_INDEX), array_index(0),
76       slice_start(0), slice_end(0), slice_step(0) {}
~JsonPathNodemozc::net::__anon82d093630111::JsonPathNode77   ~JsonPathNode() {}
78 };
79 
GetDigit(const string & str,int * output)80 bool GetDigit(const string &str, int *output) {
81   DCHECK(output);
82   if (str.empty()) {
83     return false;
84   }
85 
86   const char *begin = str.data();
87   const char *end = str.data() + str.size();
88   if (str[0] == '-') {
89     if (str.size() == 1) {
90       return false;
91     }
92     ++begin;
93   }
94 
95   while (begin < end) {
96     if (!isdigit(*begin)) {
97       return false;
98     }
99     ++begin;
100   }
101 
102   *output = NumberUtil::SimpleAtoi(str);
103 
104   return true;
105 }
106 
GetSliceDigit(const string & str,int * output)107 bool GetSliceDigit(const string &str, int *output) {
108   if (str.empty()) {
109     *output = JsonPathNode::kSliceUndef;
110     return true;
111   }
112   return GetDigit(str, output);
113 }
114 
GetQuotedString(const string & str,const char c,string * output)115 bool GetQuotedString(const string &str, const char c,
116                      string *output) {
117   if (str.size() >= 2 &&
118       str[0] == c && str[str.size() - 1] == c) {
119     *output = str.substr(1, str.size() - 2);
120     return true;
121   }
122   return false;
123 }
124 
125 typedef std::vector<JsonPathNode> JsonPathNodes;
126 
127 class JsonPathExp : public std::vector<std::vector<JsonPathNode> > {
128  public:
Parse(const string & jsonpath)129   bool Parse(const string &jsonpath) {
130     clear();
131     if (jsonpath.size() <= 1 || jsonpath[0] != '$') {
132       LOG(ERROR) << "Not starts with $";
133       return false;
134     }
135 
136     if (Util::EndsWith(jsonpath, ".") ||
137         jsonpath.find("...") != string::npos) {
138       LOG(ERROR) << "Parse error: " << jsonpath;
139       return false;
140     }
141 
142     if (jsonpath.find("(") != string::npos ||
143         jsonpath.find(")") != string::npos ||
144         jsonpath.find("@") != string::npos ||
145         jsonpath.find("?") != string::npos) {
146       LOG(ERROR) << "script expression/current node are not supported: "
147                  << jsonpath;
148       return false;
149     }
150 
151     const char *begin = jsonpath.data() + 1;
152     const char *end = jsonpath.data() + jsonpath.size();
153 
154     string item;
155     for (; begin < end; ++begin) {
156       if (*begin == ']') {
157         return false;
158       }
159       if (*begin == '.' || *begin == '[') {
160         if (!item.empty()) {
161           if (!AddNodes(item, OUT_BRACKET)) {
162             return false;
163           }
164           item.clear();
165         }
166         if (*begin == '.' && begin + 1 < end && *(begin + 1) == '.') {
167           ++begin;
168           if (!AddNodes(".", OUT_BRACKET)) {
169             return false;
170           }
171         } else if (*begin == '[') {
172           ++begin;
173           string exp;
174           while (*begin != ']') {
175             if (begin == end) {
176               LOG(ERROR) << "Cannot find closing \"]\"";
177               return false;
178             }
179             exp += *begin;
180             ++begin;
181           }
182           if (!AddNodes(exp, IN_BRACKET)) {
183             return false;
184           }
185         }
186       } else {
187         item += *begin;
188       }
189     }
190 
191     if (!item.empty()) {
192       if (!AddNodes(item, OUT_BRACKET)) {
193         return false;
194       }
195     }
196 
197     return !empty();
198   }
199 
DebugString() const200   string DebugString() const {
201     std::ostringstream os;
202     for (size_t i = 0; i < size(); ++i) {
203       os << "[";
204       for (size_t j = 0; j < (*this)[i].size(); ++j) {
205         os << (*this)[i][j].DebugString();
206       }
207       os << "]";
208     }
209     return os.str();
210   }
211 
212  private:
213   enum NodesType {
214     IN_BRACKET,
215     OUT_BRACKET
216   };
217 
AddNodes(const string & nodes_exp,NodesType nodes_type)218   bool AddNodes(const string &nodes_exp, NodesType nodes_type) {
219     if (nodes_exp.empty()) {
220       return false;
221     }
222 
223     resize(size() + 1);
224     JsonPathNodes *nodes = &(back());
225     DCHECK(nodes);
226 
227     if (nodes_type == OUT_BRACKET) {
228       JsonPathNode node;
229       node.type = JsonPathNode::OBJECT_INDEX;
230       node.object_index = nodes_exp;
231       nodes->push_back(node);
232     } else if (nodes_type == IN_BRACKET) {
233       std::vector<string> nodes_exps;
234       Util::SplitStringUsing(nodes_exp, ",", &nodes_exps);
235       for (size_t i = 0; i < nodes_exps.size(); ++i) {
236         JsonPathNode node;
237         node.type = JsonPathNode::UNDEFINED_INDEX;
238         string in_nodes_exp;
239         if (GetQuotedString(nodes_exps[i], '\'', &in_nodes_exp) ||
240             GetQuotedString(nodes_exps[i], '\"', &in_nodes_exp)) {
241           node.type = JsonPathNode::OBJECT_INDEX;
242           node.object_index = in_nodes_exp;
243         } else if (nodes_exps[i] == "*") {
244           node.type = JsonPathNode::OBJECT_INDEX;
245           node.object_index = "*";
246         } else {
247           std::vector<string> slice;
248           Util::SplitStringAllowEmpty(nodes_exps[i], ":", &slice);
249           if (slice.size() == 1) {
250             if (GetDigit(slice[0], &node.array_index)) {
251               node.type = JsonPathNode::ARRAY_INDEX;
252             } else {
253               // fallback to OBJECT_INDEX.
254               node.type = JsonPathNode::OBJECT_INDEX;
255               node.object_index = slice[0];
256             }
257           } else if (slice.size() == 2 &&
258                      GetSliceDigit(slice[0], &node.slice_start) &&
259                      GetSliceDigit(slice[1], &node.slice_end)) {
260             node.slice_step = JsonPathNode::kSliceUndef;
261             node.type = JsonPathNode::SLICE_INDEX;
262           } else if (slice.size() == 3 &&
263                      GetSliceDigit(slice[0], &node.slice_start) &&
264                      GetSliceDigit(slice[1], &node.slice_end) &&
265                      GetSliceDigit(slice[2], &node.slice_step)) {
266             node.type = JsonPathNode::SLICE_INDEX;
267           }
268         }
269 
270         if (node.type == JsonPathNode::UNDEFINED_INDEX) {
271           return false;
272         }
273 
274         nodes->push_back(node);
275       }
276     } else {
277       LOG(FATAL) << "Unknown nodes type";
278     }
279 
280     return true;
281   }
282 };
283 
284 // Find all Json::Value(s) from root node |value|,
285 //  which matches to |nodes|.
CollectValuesRecursively(const Json::Value & value,const JsonPathNodes & nodes,std::vector<const Json::Value * > * output)286 void CollectValuesRecursively(const Json::Value &value,
287                               const JsonPathNodes &nodes,
288                               std::vector<const Json::Value *> *output) {
289   DCHECK(output);
290   for (size_t node_index = 0; node_index < nodes.size(); ++node_index) {
291     if (nodes[node_index].type != JsonPathNode::OBJECT_INDEX) {
292       continue;
293     }
294     if (value.isObject()) {
295       const Json::Value::Members members = value.getMemberNames();
296       const string &object_index = nodes[node_index].object_index;
297       if (object_index != "*" && value.isMember(object_index)) {
298         output->push_back(&value[object_index]);
299       }
300       for (size_t i = 0; i < members.size(); ++i) {
301         const Json::Value &v = value[members[i]];
302         if (object_index == "*") {
303           output->push_back(&v);
304         }
305         CollectValuesRecursively(v, nodes, output);
306       }
307     } else if (value.isArray()) {
308       for (Json::ArrayIndex i = 0; i < value.size(); ++i) {
309         CollectValuesRecursively(value[i], nodes, output);
310       }
311     }
312   }
313 }
314 
CollectNodesFromJson(const Json::Value & value,const JsonPathExp & jsonpathexp,size_t depth,std::vector<const Json::Value * > * output)315 void CollectNodesFromJson(const Json::Value &value,
316                           const JsonPathExp &jsonpathexp,
317                           size_t depth,
318                           std::vector<const Json::Value *> *output) {
319   if (depth >= jsonpathexp.size()) {
320     output->push_back(&value);
321     return;
322   }
323 
324   const JsonPathNodes &nodes = jsonpathexp[depth];
325 
326   for (size_t node_index = 0; node_index < nodes.size(); ++node_index) {
327     const JsonPathNode &node = nodes[node_index];
328     if (node.type == JsonPathNode::OBJECT_INDEX) {
329       if (node.object_index == "*") {
330         if (value.isObject()) {
331           const Json::Value::Members members = value.getMemberNames();
332           for (size_t i = 0; i < members.size(); ++i) {
333             CollectNodesFromJson(value[members[i]],
334                                  jsonpathexp, depth + 1, output);
335           }
336         } else if (value.isArray()) {
337           for (Json::ArrayIndex i = 0; i < value.size(); ++i) {
338             CollectNodesFromJson(value[i], jsonpathexp, depth + 1, output);
339           }
340         } else {
341           CollectNodesFromJson(value, jsonpathexp, depth + 1, output);
342         }
343       } else if (node.object_index == "." && depth + 1 < jsonpathexp.size()) {
344         std::vector<const Json::Value *> matched_values;
345         CollectValuesRecursively(value, jsonpathexp[depth + 1],
346                                  &matched_values);
347         for (size_t i = 0; i < matched_values.size(); ++i) {
348           CollectNodesFromJson(*(matched_values[i]),
349                                jsonpathexp, depth + 2, output);
350         }
351       } else if (value.isObject() && value.isMember(node.object_index)) {
352         CollectNodesFromJson(value[node.object_index],
353                              jsonpathexp, depth + 1, output);
354       }
355     } else if (node.type == JsonPathNode::ARRAY_INDEX) {
356       const int i = node.array_index >= 0 ?
357           node.array_index : value.size() + node.array_index;
358       if (value.isArray() && value.isValidIndex(i)) {
359         CollectNodesFromJson(value[i], jsonpathexp, depth + 1, output);
360       }
361     } else if (node.type == JsonPathNode::SLICE_INDEX) {
362       if (value.isArray()) {
363         const int size = static_cast<int>(value.size());
364         int start = JsonPathNode::IsUndef(node.slice_start) ?
365             0 : node.slice_start;
366         int end = JsonPathNode::IsUndef(node.slice_end) ?
367             size : node.slice_end;
368         const int step = JsonPathNode::IsUndef(node.slice_step) ?
369             1 : node.slice_step;
370         start = (start < 0) ? std::max(0, start + size) : std::min(size, start);
371         end = (end < 0) ? std::max(0, end + size) : std::min(size, end);
372         if (step > 0 && end > start) {
373           for (int i = start; i < end; i += step) {
374             if (value.isValidIndex(i)) {
375               CollectNodesFromJson(value[i], jsonpathexp, depth + 1, output);
376             }
377           }
378         } else if (step < 0 && end < start) {
379           for (int i = start; i > end; i += step) {
380             if (value.isValidIndex(i)) {
381               CollectNodesFromJson(value[i], jsonpathexp, depth + 1, output);
382             }
383           }
384         }
385       }
386     } else {
387       LOG(FATAL) << "Unknown type";
388     }
389   }
390 }
391 }  // namespace
392 
393 // static
Parse(const Json::Value & root,const string & jsonpath,std::vector<const Json::Value * > * output)394 bool JsonPath::Parse(const Json::Value &root,
395                      const string &jsonpath,
396                      std::vector<const Json::Value *> *output) {
397   JsonPathExp jsonpathexp;
398   if (!jsonpathexp.Parse(jsonpath)) {
399     LOG(WARNING) << "Parsing JsonPath error: " << jsonpath;
400     return false;
401   }
402 
403   VLOG(1) << jsonpathexp.DebugString();
404 
405   DCHECK(output);
406   CollectNodesFromJson(root, jsonpathexp, 0, output);
407   return true;
408 }
409 }   // namespace net
410 }   // namespace mozc
411