1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/policy/core/common/schema.h"
6 
7 #include <limits.h>
8 #include <stddef.h>
9 
10 #include <algorithm>
11 #include <climits>
12 #include <map>
13 #include <memory>
14 #include <utility>
15 
16 #include "base/check_op.h"
17 #include "base/compiler_specific.h"
18 #include "base/containers/flat_set.h"
19 #include "base/json/json_reader.h"
20 #include "base/macros.h"
21 #include "base/memory/ptr_util.h"
22 #include "base/notreached.h"
23 #include "base/stl_util.h"
24 #include "base/strings/stringprintf.h"
25 #include "components/policy/core/common/json_schema_constants.h"
26 #include "components/policy/core/common/schema_internal.h"
27 #if defined(OS_FREEBSD) || defined(OS_DRAGONFLY)
28 #include <re2/re2.h>
29 #else
30 #include "third_party/re2/src/re2/re2.h"
31 #endif
32 
33 namespace schema = json_schema_constants;
34 
35 namespace policy {
36 
37 using internal::PropertiesNode;
38 using internal::PropertyNode;
39 using internal::RestrictionNode;
40 using internal::SchemaData;
41 using internal::SchemaNode;
42 
43 namespace {
44 
45 struct ReferencesAndIDs {
46   // Maps schema "id" attributes to the corresponding SchemaNode index.
47   std::map<std::string, short> id_map;
48 
49   // List of pairs of references to be assigned later. The string is the "id"
50   // whose corresponding index should be stored in the pointer, once all the IDs
51   // are available.
52   std::vector<std::pair<std::string, short*>> reference_list;
53 };
54 
55 // Sizes for the storage arrays. These are calculated in advance so that the
56 // arrays don't have to be resized during parsing, which would invalidate
57 // pointers into their contents (i.e. string's c_str() and address of indices
58 // for "$ref" attributes).
59 struct StorageSizes {
StorageSizespolicy::__anonede270ce0111::StorageSizes60   StorageSizes()
61       : strings(0),
62         schema_nodes(0),
63         property_nodes(0),
64         properties_nodes(0),
65         restriction_nodes(0),
66         required_properties(0),
67         int_enums(0),
68         string_enums(0) {}
69   size_t strings;
70   size_t schema_nodes;
71   size_t property_nodes;
72   size_t properties_nodes;
73   size_t restriction_nodes;
74   size_t required_properties;
75   size_t int_enums;
76   size_t string_enums;
77 };
78 
79 // |Schema::MaskSensitiveValues| will replace sensitive values with this string.
80 // It should be consistent with the mask |NetworkConfigurationPolicyHandler|
81 // uses for network credential fields.
82 constexpr char kSensitiveValueMask[] = "********";
83 
84 // An invalid index, indicating that a node is not present; similar to a NULL
85 // pointer.
86 const short kInvalid = -1;
87 
88 // Maps a schema key to the corresponding base::Value::Type
89 struct SchemaKeyToValueType {
90   const char* key;
91   base::Value::Type type;
92 };
93 
94 // Allowed types and their base::Value::Type equivalent. These are ordered
95 // alphabetically to perform binary search.
96 const SchemaKeyToValueType kSchemaTypesToValueTypes[] = {
97     {schema::kArray, base::Value::Type::LIST},
98     {schema::kBoolean, base::Value::Type::BOOLEAN},
99     {schema::kInteger, base::Value::Type::INTEGER},
100     {schema::kNumber, base::Value::Type::DOUBLE},
101     {schema::kObject, base::Value::Type::DICTIONARY},
102     {schema::kString, base::Value::Type::STRING},
103 };
104 const SchemaKeyToValueType* kSchemaTypesToValueTypesEnd =
105     kSchemaTypesToValueTypes + base::size(kSchemaTypesToValueTypes);
106 
107 // Allowed attributes and types for type 'array'. These are ordered
108 // alphabetically to perform binary search.
109 const SchemaKeyToValueType kAttributesAndTypesForArray[] = {
110     {schema::kDescription, base::Value::Type::STRING},
111     {schema::kId, base::Value::Type::STRING},
112     {schema::kItems, base::Value::Type::DICTIONARY},
113     {schema::kSensitiveValue, base::Value::Type::BOOLEAN},
114     {schema::kTitle, base::Value::Type::STRING},
115     {schema::kType, base::Value::Type::STRING},
116 };
117 const SchemaKeyToValueType* kAttributesAndTypesForArrayEnd =
118     kAttributesAndTypesForArray + base::size(kAttributesAndTypesForArray);
119 
120 // Allowed attributes and types for type 'boolean'. These are ordered
121 // alphabetically to perform binary search.
122 const SchemaKeyToValueType kAttributesAndTypesForBoolean[] = {
123     {schema::kDescription, base::Value::Type::STRING},
124     {schema::kId, base::Value::Type::STRING},
125     {schema::kSensitiveValue, base::Value::Type::BOOLEAN},
126     {schema::kTitle, base::Value::Type::STRING},
127     {schema::kType, base::Value::Type::STRING},
128 };
129 const SchemaKeyToValueType* kAttributesAndTypesForBooleanEnd =
130     kAttributesAndTypesForBoolean + base::size(kAttributesAndTypesForBoolean);
131 
132 // Allowed attributes and types for type 'integer'. These are ordered
133 // alphabetically to perform binary search.
134 const SchemaKeyToValueType kAttributesAndTypesForInteger[] = {
135     {schema::kDescription, base::Value::Type::STRING},
136     {schema::kEnum, base::Value::Type::LIST},
137     {schema::kId, base::Value::Type::STRING},
138     {schema::kMaximum, base::Value::Type::DOUBLE},
139     {schema::kMinimum, base::Value::Type::DOUBLE},
140     {schema::kSensitiveValue, base::Value::Type::BOOLEAN},
141     {schema::kTitle, base::Value::Type::STRING},
142     {schema::kType, base::Value::Type::STRING},
143 };
144 const SchemaKeyToValueType* kAttributesAndTypesForIntegerEnd =
145     kAttributesAndTypesForInteger + base::size(kAttributesAndTypesForInteger);
146 
147 // Allowed attributes and types for type 'number'. These are ordered
148 // alphabetically to perform binary search.
149 const SchemaKeyToValueType kAttributesAndTypesForNumber[] = {
150     {schema::kDescription, base::Value::Type::STRING},
151     {schema::kId, base::Value::Type::STRING},
152     {schema::kSensitiveValue, base::Value::Type::BOOLEAN},
153     {schema::kTitle, base::Value::Type::STRING},
154     {schema::kType, base::Value::Type::STRING},
155 };
156 const SchemaKeyToValueType* kAttributesAndTypesForNumberEnd =
157     kAttributesAndTypesForNumber + base::size(kAttributesAndTypesForNumber);
158 
159 // Allowed attributes and types for type 'object'. These are ordered
160 // alphabetically to perform binary search.
161 const SchemaKeyToValueType kAttributesAndTypesForObject[] = {
162     {schema::kAdditionalProperties, base::Value::Type::DICTIONARY},
163     {schema::kDescription, base::Value::Type::STRING},
164     {schema::kId, base::Value::Type::STRING},
165     {schema::kPatternProperties, base::Value::Type::DICTIONARY},
166     {schema::kProperties, base::Value::Type::DICTIONARY},
167     {schema::kRequired, base::Value::Type::LIST},
168     {schema::kSensitiveValue, base::Value::Type::BOOLEAN},
169     {schema::kTitle, base::Value::Type::STRING},
170     {schema::kType, base::Value::Type::STRING},
171 };
172 const SchemaKeyToValueType* kAttributesAndTypesForObjectEnd =
173     kAttributesAndTypesForObject + base::size(kAttributesAndTypesForObject);
174 
175 // Allowed attributes and types for $ref. These are ordered alphabetically to
176 // perform binary search.
177 const SchemaKeyToValueType kAttributesAndTypesForRef[] = {
178     {schema::kDescription, base::Value::Type::STRING},
179     {schema::kRef, base::Value::Type::STRING},
180     {schema::kTitle, base::Value::Type::STRING},
181 };
182 const SchemaKeyToValueType* kAttributesAndTypesForRefEnd =
183     kAttributesAndTypesForRef + base::size(kAttributesAndTypesForRef);
184 
185 // Allowed attributes and types for type 'string'. These are ordered
186 // alphabetically to perform binary search.
187 const SchemaKeyToValueType kAttributesAndTypesForString[] = {
188     {schema::kDescription, base::Value::Type::STRING},
189     {schema::kEnum, base::Value::Type::LIST},
190     {schema::kId, base::Value::Type::STRING},
191     {schema::kPattern, base::Value::Type::STRING},
192     {schema::kSensitiveValue, base::Value::Type::BOOLEAN},
193     {schema::kTitle, base::Value::Type::STRING},
194     {schema::kType, base::Value::Type::STRING},
195 };
196 const SchemaKeyToValueType* kAttributesAndTypesForStringEnd =
197     kAttributesAndTypesForString + base::size(kAttributesAndTypesForString);
198 
199 // Helper for std::lower_bound.
CompareToString(const SchemaKeyToValueType & entry,const std::string & key)200 bool CompareToString(const SchemaKeyToValueType& entry,
201                      const std::string& key) {
202   return entry.key < key;
203 }
204 
205 // Returns true if a SchemaKeyToValueType with key==|schema_key| can be found in
206 // the array represented by |begin| and |end|. If so, |value_type| will be set
207 // to the SchemaKeyToValueType value type.
MapSchemaKeyToValueType(const std::string & schema_key,const SchemaKeyToValueType * begin,const SchemaKeyToValueType * end,base::Value::Type * value_type)208 bool MapSchemaKeyToValueType(const std::string& schema_key,
209                              const SchemaKeyToValueType* begin,
210                              const SchemaKeyToValueType* end,
211                              base::Value::Type* value_type) {
212   const SchemaKeyToValueType* entry =
213       std::lower_bound(begin, end, schema_key, CompareToString);
214   if (entry == end || entry->key != schema_key)
215     return false;
216   if (value_type)
217     *value_type = entry->type;
218   return true;
219 }
220 
221 // Shorthand method for |SchemaTypeToValueType()| with
222 // |kSchemaTypesToValueTypes|.
SchemaTypeToValueType(const std::string & schema_type,base::Value::Type * value_type)223 bool SchemaTypeToValueType(const std::string& schema_type,
224                            base::Value::Type* value_type) {
225   return MapSchemaKeyToValueType(schema_type, kSchemaTypesToValueTypes,
226                                  kSchemaTypesToValueTypesEnd, value_type);
227 }
228 
StrategyAllowUnknown(SchemaOnErrorStrategy strategy)229 bool StrategyAllowUnknown(SchemaOnErrorStrategy strategy) {
230   return strategy != SCHEMA_STRICT;
231 }
232 
SchemaErrorFound(std::string * error_path,std::string * error,const std::string & msg)233 void SchemaErrorFound(std::string* error_path,
234                       std::string* error,
235                       const std::string& msg) {
236   if (error_path)
237     *error_path = "";
238   *error = msg;
239 }
240 
AddListIndexPrefixToPath(int index,std::string * path)241 void AddListIndexPrefixToPath(int index, std::string* path) {
242   if (path) {
243     if (path->empty())
244       *path = base::StringPrintf("items[%d]", index);
245     else
246       *path = base::StringPrintf("items[%d].", index) + *path;
247   }
248 }
249 
AddDictKeyPrefixToPath(const std::string & key,std::string * path)250 void AddDictKeyPrefixToPath(const std::string& key, std::string* path) {
251   if (path) {
252     if (path->empty())
253       *path = key;
254     else
255       *path = key + "." + *path;
256   }
257 }
258 
IgnoreUnknownAttributes(int options)259 bool IgnoreUnknownAttributes(int options) {
260   return (options & kSchemaOptionsIgnoreUnknownAttributes);
261 }
262 
263 // Check that the value's type and the expected type are equal. We also allow
264 // integers when expecting doubles.
CheckType(const base::Value * value,base::Value::Type expected_type)265 bool CheckType(const base::Value* value, base::Value::Type expected_type) {
266   return value->type() == expected_type ||
267          (value->is_int() && expected_type == base::Value::Type::DOUBLE);
268 }
269 
270 // Returns true if |type| is supported as schema's 'type' value.
IsValidType(const std::string & type)271 bool IsValidType(const std::string& type) {
272   return MapSchemaKeyToValueType(type, kSchemaTypesToValueTypes,
273                                  kSchemaTypesToValueTypesEnd, nullptr);
274 }
275 
276 // Validate that |dict| only contains attributes that are allowed for the
277 // corresponding value of 'type'. Also ensure that all of those attributes are
278 // of the expected type. |options| can be used to ignore unknown attributes.
ValidateAttributesAndTypes(const base::Value & dict,const std::string & type,int options,std::string * error)279 bool ValidateAttributesAndTypes(const base::Value& dict,
280                                 const std::string& type,
281                                 int options,
282                                 std::string* error) {
283   const SchemaKeyToValueType* begin = nullptr;
284   const SchemaKeyToValueType* end = nullptr;
285   if (type == schema::kArray) {
286     begin = kAttributesAndTypesForArray;
287     end = kAttributesAndTypesForArrayEnd;
288   } else if (type == schema::kBoolean) {
289     begin = kAttributesAndTypesForBoolean;
290     end = kAttributesAndTypesForBooleanEnd;
291   } else if (type == schema::kInteger) {
292     begin = kAttributesAndTypesForInteger;
293     end = kAttributesAndTypesForIntegerEnd;
294   } else if (type == schema::kNumber) {
295     begin = kAttributesAndTypesForNumber;
296     end = kAttributesAndTypesForNumberEnd;
297   } else if (type == schema::kObject) {
298     begin = kAttributesAndTypesForObject;
299     end = kAttributesAndTypesForObjectEnd;
300   } else if (type == schema::kRef) {
301     begin = kAttributesAndTypesForRef;
302     end = kAttributesAndTypesForRefEnd;
303   } else if (type == schema::kString) {
304     begin = kAttributesAndTypesForString;
305     end = kAttributesAndTypesForStringEnd;
306   } else {
307     NOTREACHED() << "Type should be a valid schema type or '$ref'.";
308   }
309 
310   base::Value::Type expected_type = base::Value::Type::NONE;
311   for (const auto& it : dict.DictItems()) {
312     if (MapSchemaKeyToValueType(it.first, begin, end, &expected_type)) {
313       if (!CheckType(&it.second, expected_type)) {
314         *error = base::StringPrintf("Invalid type for attribute '%s'",
315                                     it.first.c_str());
316         return false;
317       }
318     } else if (!IgnoreUnknownAttributes(options)) {
319       *error = base::StringPrintf("Unknown attribute '%s'", it.first.c_str());
320       return false;
321     }
322   }
323   return true;
324 }
325 
326 // Validates that |enum_list| is a list and its items are all of type |type|.
ValidateEnum(const base::Value * enum_list,const std::string & type,std::string * error)327 bool ValidateEnum(const base::Value* enum_list,
328                   const std::string& type,
329                   std::string* error) {
330   if (enum_list->type() != base::Value::Type::LIST ||
331       enum_list->GetList().empty()) {
332     *error = "Attribute 'enum' must be a non-empty list.";
333     return false;
334   }
335   base::Value::Type expected_item_type = base::Value::Type::NONE;
336   MapSchemaKeyToValueType(type, kSchemaTypesToValueTypes,
337                           kSchemaTypesToValueTypesEnd, &expected_item_type);
338   for (const base::Value& item : enum_list->GetList()) {
339     if (item.type() != expected_item_type) {
340       *error = base::StringPrintf(
341           "Attribute 'enum' for type '%s' contains items with invalid types",
342           type.c_str());
343       return false;
344     }
345   }
346   return true;
347 }
348 
349 // Forward declaration (used in ValidateProperties).
350 bool IsValidSchema(const base::Value& dict, int options, std::string* error);
351 
352 // Validates that the values in the |properties| dict are valid schemas.
ValidateProperties(const base::Value & properties,int options,std::string * error)353 bool ValidateProperties(const base::Value& properties,
354                         int options,
355                         std::string* error) {
356   for (const auto& dict_it : properties.DictItems()) {
357     if (dict_it.second.type() != base::Value::Type::DICTIONARY) {
358       *error = base::StringPrintf("Schema for property '%s' must be a dict.",
359                                   dict_it.first.c_str());
360       return false;
361     }
362     if (!IsValidSchema(dict_it.second, options, error))
363       return false;
364   }
365   return true;
366 }
367 
368 // Checks whether the passed dict is a valid schema. See
369 // |kAllowedAttributesAndTypes| for a list of supported types, supported
370 // attributes and their expected types. Values for 'minimum' and 'maximum' for
371 // type 'integer' can be of type int or double. Referenced IDs ($ref) are not
372 // checked for existence and IDs are not checked for duplicates. The 'pattern'
373 // attribute and keys for 'patternProperties' are not checked for valid regulax
374 // expression syntax. Invalid regular expressions will cause a value validation
375 // error.
IsValidSchema(const base::Value & dict,int options,std::string * error)376 bool IsValidSchema(const base::Value& dict, int options, std::string* error) {
377   DCHECK(dict.is_dict());
378   // Validate '$ref'.
379   const base::Value* ref_id = dict.FindKey(schema::kRef);
380   if (ref_id)
381     return ValidateAttributesAndTypes(dict, schema::kRef, options, error);
382 
383   // Validate 'type'.
384   const base::Value* type = dict.FindKey(schema::kType);
385   if (!type) {
386     *error = "Each schema must have a 'type' or '$ref'.";
387     return false;
388   }
389   if (type->type() != base::Value::Type::STRING) {
390     *error = "Attribute 'type' must be a string.";
391     return false;
392   }
393   const std::string type_string = type->GetString();
394   if (!IsValidType(type_string)) {
395     *error = base::StringPrintf("Unknown type '%s'.", type_string.c_str());
396     return false;
397   }
398 
399   // Validate attributes and expected types.
400   if (!ValidateAttributesAndTypes(dict, type_string, options, error))
401     return false;
402 
403   // Validate 'enum' attribute.
404   if (type_string == schema::kString || type_string == schema::kInteger) {
405     const base::Value* enum_list = dict.FindKey(schema::kEnum);
406     if (enum_list && !ValidateEnum(enum_list, type_string, error))
407       return false;
408   }
409 
410   if (type_string == schema::kInteger) {
411     // Validate 'minimum' > 'maximum'.
412     const base::Value* minimum_value = dict.FindKey(schema::kMinimum);
413     const base::Value* maximum_value = dict.FindKey(schema::kMaximum);
414     if (minimum_value && maximum_value) {
415       double minimum = minimum_value->is_int() ? minimum_value->GetInt()
416                                                : minimum_value->GetDouble();
417       double maximum = maximum_value->is_int() ? maximum_value->GetInt()
418                                                : maximum_value->GetDouble();
419       if (minimum > maximum) {
420         *error = base::StringPrintf("Invalid range specified [%f;%f].", minimum,
421                                     maximum);
422         return false;
423       }
424     }
425   } else if (type_string == schema::kArray) {
426     // Validate type 'array'.
427     const base::Value* items = dict.FindKey(schema::kItems);
428     if (!items) {
429       *error = "Schema of type 'array' must have a schema in 'items'.";
430       return false;
431     }
432     if (!IsValidSchema(*items, options, error))
433       return false;
434   } else if (type_string == schema::kObject) {
435     // Validate type 'object'.
436     const base::Value* properties = dict.FindKey(schema::kProperties);
437     if (properties && !ValidateProperties(*properties, options, error))
438       return false;
439 
440     const base::Value* pattern_properties =
441         dict.FindKey(schema::kPatternProperties);
442     if (pattern_properties &&
443         !ValidateProperties(*pattern_properties, options, error)) {
444       return false;
445     }
446 
447     const base::Value* additional_properties =
448         dict.FindKey(schema::kAdditionalProperties);
449     if (additional_properties) {
450       if (!IsValidSchema(*additional_properties, options, error))
451         return false;
452     }
453 
454     const base::Value* required = dict.FindKey(schema::kRequired);
455     if (required) {
456       for (const base::Value& item : required->GetList()) {
457         if (!item.is_string()) {
458           *error = "Attribute 'required' may only contain strings.";
459           return false;
460         }
461         const std::string property_name = item.GetString();
462         if (!properties || !properties->FindKey(property_name)) {
463           *error = base::StringPrintf(
464               "Attribute 'required' contains unknown property '%s'.",
465               property_name.c_str());
466           return false;
467         }
468       }
469     }
470   }
471 
472   return true;
473 }
474 
475 }  // namespace
476 
477 // Contains the internal data representation of a Schema. This can either wrap
478 // a SchemaData owned elsewhere (currently used to wrap the Chrome schema, which
479 // is generated at compile time), or it can own its own SchemaData.
480 class Schema::InternalStorage
481     : public base::RefCountedThreadSafe<InternalStorage> {
482  public:
483   static scoped_refptr<const InternalStorage> Wrap(const SchemaData* data);
484 
485   static scoped_refptr<const InternalStorage> ParseSchema(
486       const base::Value& schema,
487       std::string* error);
488 
data() const489   const SchemaData* data() const { return &schema_data_; }
490 
root_node() const491   const SchemaNode* root_node() const { return schema(0); }
492 
493   // Returns the validation_schema root node if one was generated, or nullptr.
validation_schema_root_node() const494   const SchemaNode* validation_schema_root_node() const {
495     return schema_data_.validation_schema_root_index >= 0
496                ? schema(schema_data_.validation_schema_root_index)
497                : nullptr;
498   }
499 
schema(int index) const500   const SchemaNode* schema(int index) const {
501     DCHECK_GE(index, 0);
502     return schema_data_.schema_nodes + index;
503   }
504 
properties(int index) const505   const PropertiesNode* properties(int index) const {
506     DCHECK_GE(index, 0);
507     return schema_data_.properties_nodes + index;
508   }
509 
property(int index) const510   const PropertyNode* property(int index) const {
511     DCHECK_GE(index, 0);
512     return schema_data_.property_nodes + index;
513   }
514 
restriction(int index) const515   const RestrictionNode* restriction(int index) const {
516     DCHECK_GE(index, 0);
517     return schema_data_.restriction_nodes + index;
518   }
519 
required_property(int index) const520   const char* const* required_property(int index) const {
521     DCHECK_GE(index, 0);
522     return schema_data_.required_properties + index;
523   }
524 
int_enums(int index) const525   const int* int_enums(int index) const {
526     DCHECK_GE(index, 0);
527     return schema_data_.int_enums + index;
528   }
529 
string_enums(int index) const530   const char* const* string_enums(int index) const {
531     DCHECK_GE(index, 0);
532     return schema_data_.string_enums + index;
533   }
534 
535   // Compiles regular expression |pattern|. The result is cached and will be
536   // returned directly next time.
537   re2::RE2* CompileRegex(const std::string& pattern) const;
538 
539  private:
540   friend class base::RefCountedThreadSafe<InternalStorage>;
541 
542   InternalStorage();
543   ~InternalStorage();
544 
545   // Determines the expected |sizes| of the storage for the representation
546   // of |schema|.
547   static void DetermineStorageSizes(const base::Value& schema,
548                                     StorageSizes* sizes);
549 
550   // Parses the JSON schema in |schema|.
551   //
552   // If |schema| has a "$ref" attribute then a pending reference is appended
553   // to the |reference_list|, and nothing else is done.
554   //
555   // Otherwise, |index| gets assigned the index of the corresponding SchemaNode
556   // in |schema_nodes_|. If the |schema| contains an "id" then that ID is mapped
557   // to the |index| in the |id_map|.
558   //
559   // If |schema| is invalid then |error| gets the error reason and false is
560   // returned. Otherwise returns true.
561   bool Parse(const base::Value& schema,
562              short* index,
563              ReferencesAndIDs* references_and_ids,
564              std::string* error);
565 
566   // Helper for Parse() that gets an already assigned |schema_node| instead of
567   // an |index| pointer.
568   bool ParseDictionary(const base::Value& schema,
569                        SchemaNode* schema_node,
570                        ReferencesAndIDs* references_and_ids,
571                        std::string* error);
572 
573   // Helper for Parse() that gets an already assigned |schema_node| instead of
574   // an |index| pointer.
575   bool ParseList(const base::Value& schema,
576                  SchemaNode* schema_node,
577                  ReferencesAndIDs* references_and_ids,
578                  std::string* error);
579 
580   bool ParseEnum(const base::Value& schema,
581                  base::Value::Type type,
582                  SchemaNode* schema_node,
583                  std::string* error);
584 
585   bool ParseRangedInt(const base::Value& schema,
586                       SchemaNode* schema_node,
587                       std::string* error);
588 
589   bool ParseStringPattern(const base::Value& schema,
590                           SchemaNode* schema_node,
591                           std::string* error);
592 
593   // Assigns the IDs in |id_map| to the pending references in the
594   // |reference_list|. If an ID is missing then |error| is set and false is
595   // returned; otherwise returns true.
596   static bool ResolveReferences(const ReferencesAndIDs& references_and_ids,
597                                 std::string* error);
598 
599   // Sets |has_sensitive_children| for all |SchemaNode|s in |schema_nodes_|.
600   void FindSensitiveChildren();
601 
602   // Returns true iff the node at |index| has sensitive child elements or
603   // contains a sensitive value itself.
604   bool FindSensitiveChildrenRecursive(int index,
605                                       std::set<int>* handled_schema_nodes);
606 
607   // Cache for CompileRegex(), will memorize return value of every call to
608   // CompileRegex() and return results directly next time.
609   mutable std::map<std::string, std::unique_ptr<re2::RE2>> regex_cache_;
610 
611   SchemaData schema_data_;
612   std::vector<std::string> strings_;
613   std::vector<SchemaNode> schema_nodes_;
614   std::vector<PropertyNode> property_nodes_;
615   std::vector<PropertiesNode> properties_nodes_;
616   std::vector<RestrictionNode> restriction_nodes_;
617   std::vector<const char*> required_properties_;
618   std::vector<int> int_enums_;
619   std::vector<const char*> string_enums_;
620 
621   DISALLOW_COPY_AND_ASSIGN(InternalStorage);
622 };
623 
InternalStorage()624 Schema::InternalStorage::InternalStorage() {}
625 
~InternalStorage()626 Schema::InternalStorage::~InternalStorage() {}
627 
628 // static
Wrap(const SchemaData * data)629 scoped_refptr<const Schema::InternalStorage> Schema::InternalStorage::Wrap(
630     const SchemaData* data) {
631   InternalStorage* storage = new InternalStorage();
632   storage->schema_data_ = *data;
633   return storage;
634 }
635 
636 // static
637 scoped_refptr<const Schema::InternalStorage>
ParseSchema(const base::Value & schema,std::string * error)638 Schema::InternalStorage::ParseSchema(const base::Value& schema,
639                                      std::string* error) {
640   // Determine the sizes of the storage arrays and reserve the capacity before
641   // starting to append nodes and strings. This is important to prevent the
642   // arrays from being reallocated, which would invalidate the c_str() pointers
643   // and the addresses of indices to fix.
644   StorageSizes sizes;
645   DetermineStorageSizes(schema, &sizes);
646 
647   scoped_refptr<InternalStorage> storage = new InternalStorage();
648   storage->strings_.reserve(sizes.strings);
649   storage->schema_nodes_.reserve(sizes.schema_nodes);
650   storage->property_nodes_.reserve(sizes.property_nodes);
651   storage->properties_nodes_.reserve(sizes.properties_nodes);
652   storage->restriction_nodes_.reserve(sizes.restriction_nodes);
653   storage->required_properties_.reserve(sizes.required_properties);
654   storage->int_enums_.reserve(sizes.int_enums);
655   storage->string_enums_.reserve(sizes.string_enums);
656 
657   short root_index = kInvalid;
658   ReferencesAndIDs references_and_ids;
659   if (!storage->Parse(schema, &root_index, &references_and_ids, error))
660     return nullptr;
661 
662   if (root_index == kInvalid) {
663     *error = "The main schema can't have a $ref";
664     return nullptr;
665   }
666 
667   // None of this should ever happen without having been already detected.
668   // But, if it does happen, then it will lead to corrupted memory; drop
669   // everything in that case.
670   if (root_index != 0 || sizes.strings != storage->strings_.size() ||
671       sizes.schema_nodes != storage->schema_nodes_.size() ||
672       sizes.property_nodes != storage->property_nodes_.size() ||
673       sizes.properties_nodes != storage->properties_nodes_.size() ||
674       sizes.restriction_nodes != storage->restriction_nodes_.size() ||
675       sizes.required_properties != storage->required_properties_.size() ||
676       sizes.int_enums != storage->int_enums_.size() ||
677       sizes.string_enums != storage->string_enums_.size()) {
678     *error =
679         "Failed to parse the schema due to a Chrome bug. Please file a "
680         "new issue at http://crbug.com";
681     return nullptr;
682   }
683 
684   if (!ResolveReferences(references_and_ids, error))
685     return nullptr;
686 
687   storage->FindSensitiveChildren();
688 
689   SchemaData* data = &storage->schema_data_;
690   data->schema_nodes = storage->schema_nodes_.data();
691   data->property_nodes = storage->property_nodes_.data();
692   data->properties_nodes = storage->properties_nodes_.data();
693   data->restriction_nodes = storage->restriction_nodes_.data();
694   data->required_properties = storage->required_properties_.data();
695   data->int_enums = storage->int_enums_.data();
696   data->string_enums = storage->string_enums_.data();
697   data->validation_schema_root_index = -1;
698 
699   return storage;
700 }
701 
CompileRegex(const std::string & pattern) const702 re2::RE2* Schema::InternalStorage::CompileRegex(
703     const std::string& pattern) const {
704   auto it = regex_cache_.find(pattern);
705   if (it == regex_cache_.end()) {
706     std::unique_ptr<re2::RE2> compiled(new re2::RE2(pattern));
707     re2::RE2* compiled_ptr = compiled.get();
708     regex_cache_.insert(std::make_pair(pattern, std::move(compiled)));
709     return compiled_ptr;
710   }
711   return it->second.get();
712 }
713 
714 // static
DetermineStorageSizes(const base::Value & schema,StorageSizes * sizes)715 void Schema::InternalStorage::DetermineStorageSizes(const base::Value& schema,
716                                                     StorageSizes* sizes) {
717   if (schema.FindStringKey(schema::kRef)) {
718     // Schemas with a "$ref" attribute don't take additional storage.
719     return;
720   }
721 
722   base::Value::Type type = base::Value::Type::NONE;
723   const std::string* type_string = schema.FindStringKey(schema::kType);
724   if (!type_string || !SchemaTypeToValueType(*type_string, &type)) {
725     // This schema is invalid.
726     return;
727   }
728 
729   sizes->schema_nodes++;
730 
731   if (type == base::Value::Type::LIST) {
732     const base::Value* items = schema.FindDictKey(schema::kItems);
733     if (items)
734       DetermineStorageSizes(*items, sizes);
735   } else if (type == base::Value::Type::DICTIONARY) {
736     sizes->properties_nodes++;
737 
738     const base::Value* additional_properties =
739         schema.FindDictKey(schema::kAdditionalProperties);
740     if (additional_properties)
741       DetermineStorageSizes(*additional_properties, sizes);
742 
743     const base::Value* properties = schema.FindDictKey(schema::kProperties);
744     if (properties) {
745       for (const auto& property : properties->DictItems()) {
746         DetermineStorageSizes(property.second, sizes);
747         sizes->strings++;
748         sizes->property_nodes++;
749       }
750     }
751 
752     const base::Value* pattern_properties =
753         schema.FindDictKey(schema::kPatternProperties);
754     if (pattern_properties) {
755       for (const auto& pattern_property : pattern_properties->DictItems()) {
756         DetermineStorageSizes(pattern_property.second, sizes);
757         sizes->strings++;
758         sizes->property_nodes++;
759       }
760     }
761 
762     const base::Value* required_properties = schema.FindKey(schema::kRequired);
763     if (required_properties) {
764       sizes->strings += required_properties->GetList().size();
765       sizes->required_properties += required_properties->GetList().size();
766     }
767   } else if (schema.FindKey(schema::kEnum)) {
768     const base::Value* possible_values = schema.FindListKey(schema::kEnum);
769     if (possible_values) {
770       size_t num_possible_values = possible_values->GetList().size();
771       if (type == base::Value::Type::INTEGER) {
772         sizes->int_enums += num_possible_values;
773       } else if (type == base::Value::Type::STRING) {
774         sizes->string_enums += num_possible_values;
775         sizes->strings += num_possible_values;
776       }
777       sizes->restriction_nodes++;
778     }
779   } else if (type == base::Value::Type::INTEGER) {
780     if (schema.FindKey(schema::kMinimum) || schema.FindKey(schema::kMaximum))
781       sizes->restriction_nodes++;
782   } else if (type == base::Value::Type::STRING) {
783     if (schema.FindKey(schema::kPattern)) {
784       sizes->strings++;
785       sizes->string_enums++;
786       sizes->restriction_nodes++;
787     }
788   }
789 }
790 
Parse(const base::Value & schema,short * index,ReferencesAndIDs * references_and_ids,std::string * error)791 bool Schema::InternalStorage::Parse(const base::Value& schema,
792                                     short* index,
793                                     ReferencesAndIDs* references_and_ids,
794                                     std::string* error) {
795   const std::string* ref = schema.FindStringKey(schema::kRef);
796   if (ref) {
797     if (schema.FindStringKey(schema::kId)) {
798       *error = "Schemas with a $ref can't have an id";
799       return false;
800     }
801     references_and_ids->reference_list.emplace_back(*ref, index);
802     return true;
803   }
804 
805   const std::string* type_string = schema.FindStringKey(schema::kType);
806   if (!type_string) {
807     *error = "The schema type must be declared.";
808     return false;
809   }
810 
811   base::Value::Type type = base::Value::Type::NONE;
812   if (!SchemaTypeToValueType(*type_string, &type)) {
813     *error = "Type not supported: " + *type_string;
814     return false;
815   }
816 
817   if (schema_nodes_.size() > std::numeric_limits<short>::max()) {
818     *error = "Can't have more than " +
819              std::to_string(std::numeric_limits<short>::max()) +
820              " schema nodes.";
821     return false;
822   }
823   *index = static_cast<short>(schema_nodes_.size());
824   schema_nodes_.push_back(SchemaNode());
825   SchemaNode* schema_node = &schema_nodes_.back();
826   schema_node->type = type;
827   schema_node->extra = kInvalid;
828   schema_node->is_sensitive_value = false;
829 
830   base::Optional<bool> is_sensitive_value =
831       schema.FindBoolKey(schema::kSensitiveValue);
832   if (is_sensitive_value)
833     schema_node->is_sensitive_value = *is_sensitive_value;
834 
835   if (type == base::Value::Type::DICTIONARY) {
836     if (!ParseDictionary(schema, schema_node, references_and_ids, error))
837       return false;
838   } else if (type == base::Value::Type::LIST) {
839     if (!ParseList(schema, schema_node, references_and_ids, error))
840       return false;
841   } else if (schema.FindKey(schema::kEnum)) {
842     if (!ParseEnum(schema, type, schema_node, error))
843       return false;
844   } else if (schema.FindKey(schema::kPattern)) {
845     if (!ParseStringPattern(schema, schema_node, error))
846       return false;
847   } else if (schema.FindKey(schema::kMinimum) ||
848              schema.FindKey(schema::kMaximum)) {
849     if (type != base::Value::Type::INTEGER) {
850       *error = "Only integers can have minimum and maximum";
851       return false;
852     }
853     if (!ParseRangedInt(schema, schema_node, error))
854       return false;
855   }
856   const std::string* id = schema.FindStringKey(schema::kId);
857   if (id) {
858     auto& id_map = references_and_ids->id_map;
859     if (base::Contains(id_map, *id)) {
860       *error = "Duplicated id: " + *id;
861       return false;
862     }
863     id_map[*id] = *index;
864   }
865 
866   return true;
867 }
868 
ParseDictionary(const base::Value & schema,SchemaNode * schema_node,ReferencesAndIDs * references_and_ids,std::string * error)869 bool Schema::InternalStorage::ParseDictionary(
870     const base::Value& schema,
871     SchemaNode* schema_node,
872     ReferencesAndIDs* references_and_ids,
873     std::string* error) {
874   int extra = static_cast<int>(properties_nodes_.size());
875   properties_nodes_.push_back(PropertiesNode());
876   properties_nodes_[extra].additional = kInvalid;
877   schema_node->extra = extra;
878 
879   const base::Value* additional_properties =
880       schema.FindDictKey(schema::kAdditionalProperties);
881   if (additional_properties) {
882     if (!Parse(*additional_properties, &properties_nodes_[extra].additional,
883                references_and_ids, error)) {
884       return false;
885     }
886   }
887 
888   properties_nodes_[extra].begin = static_cast<int>(property_nodes_.size());
889 
890   const base::Value* properties = schema.FindDictKey(schema::kProperties);
891   if (properties) {
892     // This and below reserves nodes for all of the |properties|, and makes sure
893     // they are contiguous. Recursive calls to Parse() will append after these
894     // elements.
895     property_nodes_.resize(property_nodes_.size() + properties->DictSize());
896   }
897 
898   properties_nodes_[extra].end = static_cast<int>(property_nodes_.size());
899 
900   const base::Value* pattern_properties =
901       schema.FindDictKey(schema::kPatternProperties);
902   if (pattern_properties) {
903     property_nodes_.resize(property_nodes_.size() +
904                            pattern_properties->DictSize());
905   }
906 
907   properties_nodes_[extra].pattern_end =
908       static_cast<int>(property_nodes_.size());
909 
910   if (properties != nullptr) {
911     int base_index = properties_nodes_[extra].begin;
912     int index = base_index;
913 
914     for (const auto& property : properties->DictItems()) {
915       strings_.push_back(property.first);
916       property_nodes_[index].key = strings_.back().c_str();
917       if (!Parse(property.second, &property_nodes_[index].schema,
918                  references_and_ids, error)) {
919         return false;
920       }
921       ++index;
922     }
923     CHECK_EQ(static_cast<int>(properties->DictSize()), index - base_index);
924   }
925 
926   if (pattern_properties != nullptr) {
927     int base_index = properties_nodes_[extra].end;
928     int index = base_index;
929 
930     for (const auto& pattern_property : pattern_properties->DictItems()) {
931       re2::RE2* compiled_regex = CompileRegex(pattern_property.first);
932       if (!compiled_regex->ok()) {
933         *error = "/" + pattern_property.first +
934                  "/ is a invalid regex: " + compiled_regex->error();
935         return false;
936       }
937       strings_.push_back(pattern_property.first);
938       property_nodes_[index].key = strings_.back().c_str();
939       if (!Parse(pattern_property.second, &property_nodes_[index].schema,
940                  references_and_ids, error)) {
941         return false;
942       }
943       ++index;
944     }
945     CHECK_EQ(static_cast<int>(pattern_properties->DictSize()),
946              index - base_index);
947   }
948 
949   properties_nodes_[extra].required_begin = required_properties_.size();
950   const base::Value* required_properties = schema.FindKey(schema::kRequired);
951   if (required_properties) {
952     for (const base::Value& val : required_properties->GetList()) {
953       strings_.push_back(val.GetString());
954       required_properties_.push_back(strings_.back().c_str());
955     }
956   }
957   properties_nodes_[extra].required_end = required_properties_.size();
958 
959   if (properties_nodes_[extra].begin == properties_nodes_[extra].pattern_end) {
960     properties_nodes_[extra].begin = kInvalid;
961     properties_nodes_[extra].end = kInvalid;
962     properties_nodes_[extra].pattern_end = kInvalid;
963     properties_nodes_[extra].required_begin = kInvalid;
964     properties_nodes_[extra].required_end = kInvalid;
965   }
966 
967   return true;
968 }
969 
ParseList(const base::Value & schema,SchemaNode * schema_node,ReferencesAndIDs * references_and_ids,std::string * error)970 bool Schema::InternalStorage::ParseList(const base::Value& schema,
971                                         SchemaNode* schema_node,
972                                         ReferencesAndIDs* references_and_ids,
973                                         std::string* error) {
974   const base::Value* items = schema.FindDictKey(schema::kItems);
975   if (!items) {
976     *error = "Arrays must declare a single schema for their items.";
977     return false;
978   }
979   return Parse(*items, &schema_node->extra, references_and_ids, error);
980 }
981 
ParseEnum(const base::Value & schema,base::Value::Type type,SchemaNode * schema_node,std::string * error)982 bool Schema::InternalStorage::ParseEnum(const base::Value& schema,
983                                         base::Value::Type type,
984                                         SchemaNode* schema_node,
985                                         std::string* error) {
986   const base::Value* possible_values = schema.FindListKey(schema::kEnum);
987   if (!possible_values) {
988     *error = "Enum attribute must be a list value";
989     return false;
990   }
991   if (possible_values->GetList().empty()) {
992     *error = "Enum attribute must be non-empty";
993     return false;
994   }
995   int offset_begin;
996   int offset_end;
997   if (type == base::Value::Type::INTEGER) {
998     offset_begin = static_cast<int>(int_enums_.size());
999     for (const auto& possible_value : possible_values->GetList()) {
1000       if (!possible_value.is_int()) {
1001         *error = "Invalid enumeration member type";
1002         return false;
1003       }
1004       int_enums_.push_back(possible_value.GetInt());
1005     }
1006     offset_end = static_cast<int>(int_enums_.size());
1007   } else if (type == base::Value::Type::STRING) {
1008     offset_begin = static_cast<int>(string_enums_.size());
1009     for (const auto& possible_value : possible_values->GetList()) {
1010       if (!possible_value.is_string()) {
1011         *error = "Invalid enumeration member type";
1012         return false;
1013       }
1014       strings_.push_back(possible_value.GetString());
1015       string_enums_.push_back(strings_.back().c_str());
1016     }
1017     offset_end = static_cast<int>(string_enums_.size());
1018   } else {
1019     *error = "Enumeration is only supported for integer and string.";
1020     return false;
1021   }
1022   schema_node->extra = static_cast<int>(restriction_nodes_.size());
1023   restriction_nodes_.push_back(RestrictionNode());
1024   restriction_nodes_.back().enumeration_restriction.offset_begin = offset_begin;
1025   restriction_nodes_.back().enumeration_restriction.offset_end = offset_end;
1026   return true;
1027 }
1028 
ParseRangedInt(const base::Value & schema,SchemaNode * schema_node,std::string * error)1029 bool Schema::InternalStorage::ParseRangedInt(const base::Value& schema,
1030                                              SchemaNode* schema_node,
1031                                              std::string* error) {
1032   int min_value = schema.FindIntKey(schema::kMinimum).value_or(INT_MIN);
1033   int max_value = schema.FindIntKey(schema::kMaximum).value_or(INT_MAX);
1034   if (min_value > max_value) {
1035     *error = "Invalid range restriction for int type.";
1036     return false;
1037   }
1038   schema_node->extra = static_cast<int>(restriction_nodes_.size());
1039   restriction_nodes_.push_back(RestrictionNode());
1040   restriction_nodes_.back().ranged_restriction.max_value = max_value;
1041   restriction_nodes_.back().ranged_restriction.min_value = min_value;
1042   return true;
1043 }
1044 
ParseStringPattern(const base::Value & schema,SchemaNode * schema_node,std::string * error)1045 bool Schema::InternalStorage::ParseStringPattern(const base::Value& schema,
1046                                                  SchemaNode* schema_node,
1047                                                  std::string* error) {
1048   const std::string* pattern = schema.FindStringKey(schema::kPattern);
1049   if (!pattern) {
1050     *error = "Schema pattern must be a string.";
1051     return false;
1052   }
1053   re2::RE2* compiled_regex = CompileRegex(*pattern);
1054   if (!compiled_regex->ok()) {
1055     *error = "/" + *pattern + "/ is invalid regex: " + compiled_regex->error();
1056     return false;
1057   }
1058   int index = static_cast<int>(string_enums_.size());
1059   strings_.push_back(*pattern);
1060   string_enums_.push_back(strings_.back().c_str());
1061   schema_node->extra = static_cast<int>(restriction_nodes_.size());
1062   restriction_nodes_.push_back(RestrictionNode());
1063   restriction_nodes_.back().string_pattern_restriction.pattern_index = index;
1064   restriction_nodes_.back().string_pattern_restriction.pattern_index_backup =
1065       index;
1066   return true;
1067 }
1068 
1069 // static
ResolveReferences(const ReferencesAndIDs & references_and_ids,std::string * error)1070 bool Schema::InternalStorage::ResolveReferences(
1071     const ReferencesAndIDs& references_and_ids,
1072     std::string* error) {
1073   const auto& reference_list = references_and_ids.reference_list;
1074   const auto& id_map = references_and_ids.id_map;
1075   for (auto& ref : reference_list) {
1076     auto id = id_map.find(ref.first);
1077     if (id == id_map.end()) {
1078       *error = "Invalid $ref: " + ref.first;
1079       return false;
1080     }
1081     *ref.second = id->second;
1082   }
1083   return true;
1084 }
1085 
FindSensitiveChildren()1086 void Schema::InternalStorage::FindSensitiveChildren() {
1087   if (schema_nodes_.empty())
1088     return;
1089 
1090   std::set<int> handled_schema_nodes;
1091   FindSensitiveChildrenRecursive(0, &handled_schema_nodes);
1092 }
1093 
FindSensitiveChildrenRecursive(int index,std::set<int> * handled_schema_nodes)1094 bool Schema::InternalStorage::FindSensitiveChildrenRecursive(
1095     int index,
1096     std::set<int>* handled_schema_nodes) {
1097   DCHECK(static_cast<unsigned long>(index) < schema_nodes_.size());
1098   SchemaNode& schema_node = schema_nodes_[index];
1099   if (handled_schema_nodes->find(index) != handled_schema_nodes->end())
1100     return schema_node.has_sensitive_children || schema_node.is_sensitive_value;
1101 
1102   handled_schema_nodes->insert(index);
1103   bool has_sensitive_children = false;
1104   if (schema_node.type == base::Value::Type::DICTIONARY) {
1105     const PropertiesNode& properties_node =
1106         properties_nodes_[schema_node.extra];
1107     // Iterate through properties and patternProperties.
1108     for (int i = properties_node.begin; i < properties_node.pattern_end; ++i) {
1109       int sub_index = property_nodes_[i].schema;
1110       has_sensitive_children |=
1111           FindSensitiveChildrenRecursive(sub_index, handled_schema_nodes);
1112     }
1113     if (properties_node.additional != kInvalid) {
1114       has_sensitive_children |= FindSensitiveChildrenRecursive(
1115           properties_node.additional, handled_schema_nodes);
1116     }
1117   } else if (schema_node.type == base::Value::Type::LIST) {
1118     int sub_index = schema_node.extra;
1119     has_sensitive_children |=
1120         FindSensitiveChildrenRecursive(sub_index, handled_schema_nodes);
1121   }
1122   schema_node.has_sensitive_children = has_sensitive_children;
1123 
1124   return schema_node.has_sensitive_children || schema_node.is_sensitive_value;
1125 }
1126 
Iterator(const scoped_refptr<const InternalStorage> & storage,const PropertiesNode * node)1127 Schema::Iterator::Iterator(const scoped_refptr<const InternalStorage>& storage,
1128                            const PropertiesNode* node) {
1129   if (node->begin == kInvalid || node->end == kInvalid) {
1130     it_ = nullptr;
1131     end_ = nullptr;
1132   } else {
1133     storage_ = storage;
1134     it_ = storage->property(node->begin);
1135     end_ = storage->property(node->end);
1136   }
1137 }
1138 
Iterator(const Iterator & iterator)1139 Schema::Iterator::Iterator(const Iterator& iterator)
1140     : storage_(iterator.storage_), it_(iterator.it_), end_(iterator.end_) {}
1141 
~Iterator()1142 Schema::Iterator::~Iterator() {}
1143 
operator =(const Iterator & iterator)1144 Schema::Iterator& Schema::Iterator::operator=(const Iterator& iterator) {
1145   storage_ = iterator.storage_;
1146   it_ = iterator.it_;
1147   end_ = iterator.end_;
1148   return *this;
1149 }
1150 
IsAtEnd() const1151 bool Schema::Iterator::IsAtEnd() const {
1152   return it_ == end_;
1153 }
1154 
Advance()1155 void Schema::Iterator::Advance() {
1156   DCHECK(it_);
1157   ++it_;
1158 }
1159 
key() const1160 const char* Schema::Iterator::key() const {
1161   return it_->key;
1162 }
1163 
schema() const1164 Schema Schema::Iterator::schema() const {
1165   return Schema(storage_, storage_->schema(it_->schema));
1166 }
1167 
Schema()1168 Schema::Schema() : node_(nullptr) {}
1169 
Schema(const scoped_refptr<const InternalStorage> & storage,const SchemaNode * node)1170 Schema::Schema(const scoped_refptr<const InternalStorage>& storage,
1171                const SchemaNode* node)
1172     : storage_(storage), node_(node) {}
1173 
Schema(const Schema & schema)1174 Schema::Schema(const Schema& schema)
1175     : storage_(schema.storage_), node_(schema.node_) {}
1176 
~Schema()1177 Schema::~Schema() {}
1178 
operator =(const Schema & schema)1179 Schema& Schema::operator=(const Schema& schema) {
1180   storage_ = schema.storage_;
1181   node_ = schema.node_;
1182   return *this;
1183 }
1184 
1185 // static
Wrap(const SchemaData * data)1186 Schema Schema::Wrap(const SchemaData* data) {
1187   scoped_refptr<const InternalStorage> storage = InternalStorage::Wrap(data);
1188   return Schema(storage, storage->root_node());
1189 }
1190 
Validate(const base::Value & value,SchemaOnErrorStrategy strategy,std::string * error_path,std::string * error) const1191 bool Schema::Validate(const base::Value& value,
1192                       SchemaOnErrorStrategy strategy,
1193                       std::string* error_path,
1194                       std::string* error) const {
1195   if (!valid()) {
1196     SchemaErrorFound(error_path, error, "The schema is invalid.");
1197     return false;
1198   }
1199 
1200   if (value.type() != type()) {
1201     // Allow the integer to double promotion. Note that range restriction on
1202     // double is not supported now.
1203     if (value.is_int() && type() == base::Value::Type::DOUBLE) {
1204       return true;
1205     }
1206 
1207     SchemaErrorFound(error_path, error,
1208                      "The value type doesn't match the schema type.");
1209     return false;
1210   }
1211 
1212   if (value.is_dict()) {
1213     base::flat_set<std::string> present_properties;
1214     for (const auto& dict_item : value.DictItems()) {
1215       SchemaList schema_list = GetMatchingProperties(dict_item.first);
1216       if (schema_list.empty()) {
1217         // Unknown property was detected.
1218         SchemaErrorFound(error_path, error,
1219                          "Unknown property: " + dict_item.first);
1220         if (!StrategyAllowUnknown(strategy))
1221           return false;
1222       } else {
1223         for (const auto& subschema : schema_list) {
1224           std::string new_error;
1225           const bool validation_result = subschema.Validate(
1226               dict_item.second, strategy, error_path, &new_error);
1227           if (!new_error.empty()) {
1228             AddDictKeyPrefixToPath(dict_item.first, error_path);
1229             *error = std::move(new_error);
1230           }
1231           if (!validation_result) {
1232             // Invalid property was detected.
1233             return false;
1234           }
1235         }
1236         present_properties.insert(dict_item.first);
1237       }
1238     }
1239 
1240     for (const auto& required_property : GetRequiredProperties()) {
1241       if (base::Contains(present_properties, required_property))
1242         continue;
1243 
1244       SchemaErrorFound(
1245           error_path, error,
1246           "Missing or invalid required property: " + required_property);
1247       return false;
1248     }
1249   } else if (value.is_list()) {
1250     for (size_t index = 0; index < value.GetList().size(); ++index) {
1251       const base::Value& list_item = value.GetList()[index];
1252       std::string new_error;
1253       const bool validation_result =
1254           GetItems().Validate(list_item, strategy, error_path, &new_error);
1255       if (!new_error.empty()) {
1256         AddListIndexPrefixToPath(index, error_path);
1257         *error = std::move(new_error);
1258       }
1259       if (!validation_result)
1260         return false;  // Invalid list item was detected.
1261     }
1262   } else if (value.is_int()) {
1263     if (node_->extra != kInvalid &&
1264         !ValidateIntegerRestriction(node_->extra, value.GetInt())) {
1265       SchemaErrorFound(error_path, error, "Invalid value for integer");
1266       return false;
1267     }
1268   } else if (value.is_string()) {
1269     if (node_->extra != kInvalid &&
1270         !ValidateStringRestriction(node_->extra, value.GetString().c_str())) {
1271       SchemaErrorFound(error_path, error, "Invalid value for string");
1272       return false;
1273     }
1274   }
1275 
1276   return true;
1277 }
1278 
Normalize(base::Value * value,SchemaOnErrorStrategy strategy,std::string * error_path,std::string * error,bool * changed) const1279 bool Schema::Normalize(base::Value* value,
1280                        SchemaOnErrorStrategy strategy,
1281                        std::string* error_path,
1282                        std::string* error,
1283                        bool* changed) const {
1284   if (!valid()) {
1285     SchemaErrorFound(error_path, error, "The schema is invalid.");
1286     return false;
1287   }
1288 
1289   if (value->type() != type()) {
1290     // Allow the integer to double promotion. Note that range restriction on
1291     // double is not supported now.
1292     if (value->is_int() && type() == base::Value::Type::DOUBLE) {
1293       return true;
1294     }
1295 
1296     SchemaErrorFound(error_path, error,
1297                      "The value type doesn't match the schema type.");
1298     return false;
1299   }
1300 
1301   if (value->is_dict()) {
1302     base::flat_set<std::string> present_properties;
1303     std::vector<std::string> drop_list;  // Contains the keys to drop.
1304     for (const auto& dict_item : value->DictItems()) {
1305       SchemaList schema_list = GetMatchingProperties(dict_item.first);
1306       if (schema_list.empty()) {
1307         // Unknown property was detected.
1308         SchemaErrorFound(error_path, error,
1309                          "Unknown property: " + dict_item.first);
1310         if (!StrategyAllowUnknown(strategy))
1311           return false;
1312         drop_list.push_back(dict_item.first);
1313       } else {
1314         for (const auto& subschema : schema_list) {
1315           std::string new_error;
1316           const bool normalization_result = subschema.Normalize(
1317               &dict_item.second, strategy, error_path, &new_error, changed);
1318           if (!new_error.empty()) {
1319             AddDictKeyPrefixToPath(dict_item.first, error_path);
1320             *error = std::move(new_error);
1321           }
1322           if (!normalization_result) {
1323             // Invalid property was detected.
1324             return false;
1325           }
1326         }
1327         present_properties.insert(dict_item.first);
1328       }
1329     }
1330 
1331     for (const auto& required_property : GetRequiredProperties()) {
1332       if (base::Contains(present_properties, required_property))
1333         continue;
1334 
1335       SchemaErrorFound(
1336           error_path, error,
1337           "Missing or invalid required property: " + required_property);
1338       return false;
1339     }
1340 
1341     if (changed && !drop_list.empty())
1342       *changed = true;
1343     for (const auto& drop_key : drop_list)
1344       value->RemoveKey(drop_key);
1345     return true;
1346   } else if (value->is_list()) {
1347     base::Value::ListStorage list = value->TakeList();
1348     // Instead of removing invalid list items afterwards, we push valid items
1349     // forward in the list by overriding invalid items. The next free position
1350     // is indicated by |write_index|, which gets increased for every valid item.
1351     // At the end |list| is resized to |write_index|'s size.
1352     size_t write_index = 0;
1353     for (size_t index = 0; index < list.size(); ++index) {
1354       base::Value& list_item = list[index];
1355       std::string new_error;
1356       const bool normalization_result = GetItems().Normalize(
1357           &list_item, strategy, error_path, &new_error, changed);
1358       if (!new_error.empty()) {
1359         AddListIndexPrefixToPath(index, error_path);
1360         *error = new_error;
1361       }
1362       if (!normalization_result) {
1363         // Invalid list item was detected.
1364         return false;
1365       } else {
1366         if (write_index != index)
1367           list[write_index] = std::move(list_item);
1368         ++write_index;
1369       }
1370     }
1371     if (changed && write_index < list.size())
1372       *changed = true;
1373     list.resize(write_index);
1374     *value = base::Value(std::move(list));
1375     return true;
1376   }
1377 
1378   return Validate(*value, strategy, error_path, error);
1379 }
1380 
MaskSensitiveValues(base::Value * value) const1381 void Schema::MaskSensitiveValues(base::Value* value) const {
1382   if (!valid())
1383     return;
1384 
1385   MaskSensitiveValuesRecursive(value);
1386 }
1387 
1388 // static
Parse(const std::string & content,std::string * error)1389 Schema Schema::Parse(const std::string& content, std::string* error) {
1390   // Validate as a generic JSON schema, and ignore unknown attributes; they
1391   // may become used in a future version of the schema format.
1392   base::Optional<base::Value> dict = Schema::ParseToDictAndValidate(
1393       content, kSchemaOptionsIgnoreUnknownAttributes, error);
1394   if (!dict.has_value())
1395     return Schema();
1396 
1397   // Validate the main type.
1398   const std::string* type = dict->FindStringKey(schema::kType);
1399   if (!type || *type != schema::kObject) {
1400     *error =
1401         "The main schema must have a type attribute with \"object\" value.";
1402     return Schema();
1403   }
1404 
1405   // Checks for invalid attributes at the top-level.
1406   if (dict.value().FindKey(schema::kAdditionalProperties) ||
1407       dict.value().FindKey(schema::kPatternProperties)) {
1408     *error =
1409         "\"additionalProperties\" and \"patternProperties\" are not "
1410         "supported at the main schema.";
1411     return Schema();
1412   }
1413 
1414   scoped_refptr<const InternalStorage> storage =
1415       InternalStorage::ParseSchema(dict.value(), error);
1416   if (!storage)
1417     return Schema();
1418   return Schema(storage, storage->root_node());
1419 }
1420 
1421 // static
ParseToDictAndValidate(const std::string & schema,int validator_options,std::string * error)1422 base::Optional<base::Value> Schema::ParseToDictAndValidate(
1423     const std::string& schema,
1424     int validator_options,
1425     std::string* error) {
1426   base::JSONReader::ValueWithError value_with_error =
1427       base::JSONReader::ReadAndReturnValueWithError(
1428           schema, base::JSONParserOptions::JSON_ALLOW_TRAILING_COMMAS);
1429   *error = value_with_error.error_message;
1430 
1431   if (!value_with_error.value)
1432     return base::nullopt;
1433   base::Value json = std::move(value_with_error.value.value());
1434   if (!json.is_dict()) {
1435     *error = "Schema must be a JSON object";
1436     return base::nullopt;
1437   }
1438   if (!IsValidSchema(json, validator_options, error))
1439     return base::nullopt;
1440   return json;
1441 }
1442 
type() const1443 base::Value::Type Schema::type() const {
1444   CHECK(valid());
1445   return node_->type;
1446 }
1447 
GetPropertiesIterator() const1448 Schema::Iterator Schema::GetPropertiesIterator() const {
1449   CHECK(valid());
1450   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1451   return Iterator(storage_, storage_->properties(node_->extra));
1452 }
1453 
1454 namespace {
1455 
CompareKeys(const PropertyNode & node,const std::string & key)1456 bool CompareKeys(const PropertyNode& node, const std::string& key) {
1457   return node.key < key;
1458 }
1459 
1460 }  // namespace
1461 
GetKnownProperty(const std::string & key) const1462 Schema Schema::GetKnownProperty(const std::string& key) const {
1463   CHECK(valid());
1464   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1465   const PropertiesNode* node = storage_->properties(node_->extra);
1466   if (node->begin == kInvalid || node->end == kInvalid)
1467     return Schema();
1468   const PropertyNode* begin = storage_->property(node->begin);
1469   const PropertyNode* end = storage_->property(node->end);
1470   const PropertyNode* it = std::lower_bound(begin, end, key, CompareKeys);
1471   if (it != end && it->key == key)
1472     return Schema(storage_, storage_->schema(it->schema));
1473   return Schema();
1474 }
1475 
GetAdditionalProperties() const1476 Schema Schema::GetAdditionalProperties() const {
1477   CHECK(valid());
1478   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1479   const PropertiesNode* node = storage_->properties(node_->extra);
1480   if (node->additional == kInvalid)
1481     return Schema();
1482   return Schema(storage_, storage_->schema(node->additional));
1483 }
1484 
GetPatternProperties(const std::string & key) const1485 SchemaList Schema::GetPatternProperties(const std::string& key) const {
1486   CHECK(valid());
1487   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1488   const PropertiesNode* node = storage_->properties(node_->extra);
1489   if (node->end == kInvalid || node->pattern_end == kInvalid)
1490     return {};
1491   const PropertyNode* begin = storage_->property(node->end);
1492   const PropertyNode* end = storage_->property(node->pattern_end);
1493   SchemaList matching_properties;
1494   for (const PropertyNode* it = begin; it != end; ++it) {
1495     if (re2::RE2::PartialMatch(key, *storage_->CompileRegex(it->key))) {
1496       matching_properties.push_back(
1497           Schema(storage_, storage_->schema(it->schema)));
1498     }
1499   }
1500   return matching_properties;
1501 }
1502 
GetRequiredProperties() const1503 std::vector<std::string> Schema::GetRequiredProperties() const {
1504   CHECK(valid());
1505   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1506   const PropertiesNode* node = storage_->properties(node_->extra);
1507   if (node->required_begin == kInvalid || node->required_end == kInvalid)
1508     return {};
1509   const size_t begin = node->required_begin;
1510   const size_t end = node->required_end;
1511 
1512   return std::vector<std::string>(storage_->required_property(begin),
1513                                   storage_->required_property(end));
1514 }
1515 
GetProperty(const std::string & key) const1516 Schema Schema::GetProperty(const std::string& key) const {
1517   Schema schema = GetKnownProperty(key);
1518   if (schema.valid())
1519     return schema;
1520   return GetAdditionalProperties();
1521 }
1522 
GetMatchingProperties(const std::string & key) const1523 SchemaList Schema::GetMatchingProperties(const std::string& key) const {
1524   SchemaList schema_list;
1525 
1526   Schema known_property = GetKnownProperty(key);
1527   if (known_property.valid())
1528     schema_list.push_back(known_property);
1529 
1530   SchemaList pattern_properties = GetPatternProperties(key);
1531   schema_list.insert(schema_list.end(), pattern_properties.begin(),
1532                      pattern_properties.end());
1533 
1534   if (schema_list.empty()) {
1535     Schema additional_property = GetAdditionalProperties();
1536     if (additional_property.valid())
1537       schema_list.push_back(additional_property);
1538   }
1539 
1540   return schema_list;
1541 }
1542 
GetItems() const1543 Schema Schema::GetItems() const {
1544   CHECK(valid());
1545   CHECK_EQ(base::Value::Type::LIST, type());
1546   if (node_->extra == kInvalid)
1547     return Schema();
1548   return Schema(storage_, storage_->schema(node_->extra));
1549 }
1550 
ValidateIntegerRestriction(int index,int value) const1551 bool Schema::ValidateIntegerRestriction(int index, int value) const {
1552   const RestrictionNode* rnode = storage_->restriction(index);
1553   if (rnode->ranged_restriction.min_value <=
1554       rnode->ranged_restriction.max_value) {
1555     return rnode->ranged_restriction.min_value <= value &&
1556            rnode->ranged_restriction.max_value >= value;
1557   } else {
1558     for (int i = rnode->enumeration_restriction.offset_begin;
1559          i < rnode->enumeration_restriction.offset_end; ++i) {
1560       if (*storage_->int_enums(i) == value)
1561         return true;
1562     }
1563     return false;
1564   }
1565 }
1566 
ValidateStringRestriction(int index,const char * str) const1567 bool Schema::ValidateStringRestriction(int index, const char* str) const {
1568   const RestrictionNode* rnode = storage_->restriction(index);
1569   if (rnode->enumeration_restriction.offset_begin <
1570       rnode->enumeration_restriction.offset_end) {
1571     for (int i = rnode->enumeration_restriction.offset_begin;
1572          i < rnode->enumeration_restriction.offset_end; ++i) {
1573       if (strcmp(*storage_->string_enums(i), str) == 0)
1574         return true;
1575     }
1576     return false;
1577   } else {
1578     int index = rnode->string_pattern_restriction.pattern_index;
1579     DCHECK(index == rnode->string_pattern_restriction.pattern_index_backup);
1580     re2::RE2* regex = storage_->CompileRegex(*storage_->string_enums(index));
1581     return re2::RE2::PartialMatch(str, *regex);
1582   }
1583 }
1584 
MaskSensitiveValuesRecursive(base::Value * value) const1585 void Schema::MaskSensitiveValuesRecursive(base::Value* value) const {
1586   if (IsSensitiveValue()) {
1587     *value = base::Value(kSensitiveValueMask);
1588     return;
1589   }
1590   if (!HasSensitiveChildren())
1591     return;
1592   if (value->type() != type())
1593     return;
1594 
1595   if (value->is_dict()) {
1596     for (const auto& dict_item : value->DictItems()) {
1597       auto& sub_value = dict_item.second;
1598       SchemaList schema_list = GetMatchingProperties(dict_item.first);
1599       for (const auto& schema_item : schema_list)
1600         schema_item.MaskSensitiveValuesRecursive(&sub_value);
1601     }
1602   } else if (value->is_list()) {
1603     for (auto& list_elem : value->GetList())
1604       GetItems().MaskSensitiveValuesRecursive(&list_elem);
1605   }
1606 }
1607 
GetValidationSchema() const1608 Schema Schema::GetValidationSchema() const {
1609   CHECK(valid());
1610   const SchemaNode* validation_schema_root_node =
1611       storage_->validation_schema_root_node();
1612   if (!validation_schema_root_node)
1613     return Schema();
1614   return Schema(storage_, validation_schema_root_node);
1615 }
1616 
IsSensitiveValue() const1617 bool Schema::IsSensitiveValue() const {
1618   CHECK(valid());
1619 
1620   // This is safe because |node_| is guaranteed to have been returned from
1621   // |storage_| and |storage_->root_node()| always returns to the |SchemaNode|
1622   // with index 0.
1623   int index = node_ - storage_->root_node();
1624   const SchemaNode* schema_node = storage_->schema(index);
1625   if (!schema_node)
1626     return false;
1627   return schema_node->is_sensitive_value;
1628 }
1629 
HasSensitiveChildren() const1630 bool Schema::HasSensitiveChildren() const {
1631   CHECK(valid());
1632 
1633   // This is safe because |node_| is guaranteed to have been returned from
1634   // |storage_| and |storage_->root_node()| always returns to the |SchemaNode|
1635   // with index 0.
1636   int index = node_ - storage_->root_node();
1637   const SchemaNode* schema_node = storage_->schema(index);
1638   if (!schema_node)
1639     return false;
1640   return schema_node->has_sensitive_children;
1641 }
1642 
1643 }  // namespace policy
1644