1 /*
2 * JSON schema validator for JSON for modern C++
3 *
4 * Copyright (c) 2016-2019 Patrick Boettcher <p@yai.se>.
5 *
6 * SPDX-License-Identifier: MIT
7 *
8 */
9 #include <nlohmann/json-schema.hpp>
10
11 #include "json-patch.hpp"
12
13 #include <deque>
14 #include <memory>
15 #include <set>
16 #include <sstream>
17
18 using nlohmann::json;
19 using nlohmann::json_patch;
20 using nlohmann::json_uri;
21 using nlohmann::json_schema::root_schema;
22 using namespace nlohmann::json_schema;
23
24 #ifdef JSON_SCHEMA_BOOST_REGEX
25 # include <boost/regex.hpp>
26 # define REGEX_NAMESPACE boost
27 #elif defined(JSON_SCHEMA_NO_REGEX)
28 # define NO_STD_REGEX
29 #else
30 # include <regex>
31 # define REGEX_NAMESPACE std
32 #endif
33
34 namespace
35 {
36
37 static const json EmptyDefault = nullptr;
38
39 class schema
40 {
41 protected:
42 root_schema *root_;
43
44 public:
45 virtual ~schema() = default;
46
schema(root_schema * root)47 schema(root_schema *root)
48 : root_(root) {}
49
50 virtual void validate(const json::json_pointer &ptr, const json &instance, json_patch &patch, error_handler &e) const = 0;
51
defaultValue(const json::json_pointer &,const json &,error_handler &) const52 virtual const json &defaultValue(const json::json_pointer &, const json &, error_handler &) const
53 {
54 return EmptyDefault;
55 }
56
57 static std::shared_ptr<schema> make(json &schema,
58 root_schema *root,
59 const std::vector<std::string> &key,
60 std::vector<nlohmann::json_uri> uris);
61 };
62
63 class schema_ref : public schema
64 {
65 const std::string id_;
66 std::weak_ptr<schema> target_;
67
validate(const json::json_pointer & ptr,const json & instance,json_patch & patch,error_handler & e) const68 void validate(const json::json_pointer &ptr, const json &instance, json_patch &patch, error_handler &e) const final
69 {
70 auto target = target_.lock();
71
72 if (target)
73 target->validate(ptr, instance, patch, e);
74 else
75 e.error(ptr, instance, "unresolved or freed schema-reference " + id_);
76 }
77
defaultValue(const json::json_pointer & ptr,const json & instance,error_handler & e) const78 const json &defaultValue(const json::json_pointer &ptr, const json &instance, error_handler &e) const override
79 {
80 auto target = target_.lock();
81
82 if (target)
83 return target->defaultValue(ptr, instance, e);
84 else
85 e.error(ptr, instance, "unresolved or freed schema-reference " + id_);
86
87 return EmptyDefault;
88 }
89
90 public:
schema_ref(const std::string & id,root_schema * root)91 schema_ref(const std::string &id, root_schema *root)
92 : schema(root), id_(id) {}
93
id() const94 const std::string &id() const { return id_; }
set_target(const std::shared_ptr<schema> & target)95 void set_target(const std::shared_ptr<schema> &target) { target_ = target; }
96 };
97
98 } // namespace
99
100 namespace nlohmann
101 {
102 namespace json_schema
103 {
104
105 class root_schema
106 {
107 schema_loader loader_;
108 format_checker format_check_;
109 content_checker content_check_;
110
111 std::shared_ptr<schema> root_;
112
113 struct schema_file {
114 std::map<std::string, std::shared_ptr<schema>> schemas;
115 std::map<std::string, std::shared_ptr<schema_ref>> unresolved; // contains all unresolved references from any other file seen during parsing
116 json unknown_keywords;
117 };
118
119 // location as key
120 std::map<std::string, schema_file> files_;
121
get_or_create_file(const std::string & loc)122 schema_file &get_or_create_file(const std::string &loc)
123 {
124 auto file = files_.lower_bound(loc);
125 if (file != files_.end() && !(files_.key_comp()(loc, file->first)))
126 return file->second;
127 else
128 return files_.insert(file, {loc, {}})->second;
129 }
130
131 public:
root_schema(schema_loader && loader,format_checker && format,content_checker && content)132 root_schema(schema_loader &&loader,
133 format_checker &&format,
134 content_checker &&content)
135
136 : loader_(std::move(loader)),
137 format_check_(std::move(format)),
138 content_check_(std::move(content))
139 {
140 }
141
format_check()142 format_checker &format_check() { return format_check_; }
content_check()143 content_checker &content_check() { return content_check_; }
144
insert(const json_uri & uri,const std::shared_ptr<schema> & s)145 void insert(const json_uri &uri, const std::shared_ptr<schema> &s)
146 {
147 auto &file = get_or_create_file(uri.location());
148 auto sch = file.schemas.lower_bound(uri.fragment());
149 if (sch != file.schemas.end() && !(file.schemas.key_comp()(uri.fragment(), sch->first))) {
150 throw std::invalid_argument("schema with " + uri.to_string() + " already inserted");
151 return;
152 }
153
154 file.schemas.insert({uri.fragment(), s});
155
156 // was someone referencing this newly inserted schema?
157 auto unresolved = file.unresolved.find(uri.fragment());
158 if (unresolved != file.unresolved.end()) {
159 unresolved->second->set_target(s);
160 file.unresolved.erase(unresolved);
161 }
162 }
163
insert_unknown_keyword(const json_uri & uri,const std::string & key,json & value)164 void insert_unknown_keyword(const json_uri &uri, const std::string &key, json &value)
165 {
166 auto &file = get_or_create_file(uri.location());
167 auto new_uri = uri.append(key);
168 auto fragment = new_uri.pointer();
169
170 // is there a reference looking for this unknown-keyword, which is thus no longer a unknown keyword but a schema
171 auto unresolved = file.unresolved.find(fragment);
172 if (unresolved != file.unresolved.end())
173 schema::make(value, this, {}, {{new_uri}});
174 else { // no, nothing ref'd it, keep for later
175
176 // need to create an object for each reference-token in the
177 // JSON-Pointer When not existing, a stringified integer reference
178 // token (e.g. "123") in the middle of the pointer will be
179 // interpreted a an array-index and an array will be created.
180
181 // json_pointer's reference_tokens is private - get them
182 std::deque<std::string> ref_tokens;
183 auto uri_pointer = uri.pointer();
184 while (!uri_pointer.empty()) {
185 ref_tokens.push_front(uri_pointer.back());
186 uri_pointer.pop_back();
187 }
188
189 // for each token create an object, if not already existing
190 auto unk_kw = &file.unknown_keywords;
191 for (auto &rt : ref_tokens) {
192 auto existing_object = unk_kw->find(rt);
193 if (existing_object == unk_kw->end())
194 (*unk_kw)[rt] = json::object();
195 unk_kw = &(*unk_kw)[rt];
196 }
197 (*unk_kw)[key] = value;
198 }
199
200 // recursively add possible subschemas of unknown keywords
201 if (value.type() == json::value_t::object)
202 for (auto &subsch : value.items())
203 insert_unknown_keyword(new_uri, subsch.key(), subsch.value());
204 }
205
get_or_create_ref(const json_uri & uri)206 std::shared_ptr<schema> get_or_create_ref(const json_uri &uri)
207 {
208 auto &file = get_or_create_file(uri.location());
209
210 // existing schema
211 auto sch = file.schemas.find(uri.fragment());
212 if (sch != file.schemas.end())
213 return sch->second;
214
215 // referencing an unknown keyword, turn it into schema
216 //
217 // an unknown keyword can only be referenced by a json-pointer,
218 // not by a plain name fragment
219 if (uri.pointer() != "") {
220 try {
221 auto &subschema = file.unknown_keywords.at(uri.pointer()); // null is returned if not existing
222 auto s = schema::make(subschema, this, {}, {{uri}}); // A JSON Schema MUST be an object or a boolean.
223 if (s) { // nullptr if invalid schema, e.g. null
224 file.unknown_keywords.erase(uri.fragment());
225 return s;
226 }
227 } catch (nlohmann::detail::out_of_range &) { // at() did not find it
228 }
229 }
230
231 // get or create a schema_ref
232 auto r = file.unresolved.lower_bound(uri.fragment());
233 if (r != file.unresolved.end() && !(file.unresolved.key_comp()(uri.fragment(), r->first))) {
234 return r->second; // unresolved, already seen previously - use existing reference
235 } else {
236 return file.unresolved.insert(r,
237 {uri.fragment(), std::make_shared<schema_ref>(uri.to_string(), this)})
238 ->second; // unresolved, create reference
239 }
240 }
241
set_root_schema(json sch)242 void set_root_schema(json sch)
243 {
244 files_.clear();
245 root_ = schema::make(sch, this, {}, {{"#"}});
246
247 // load all files which have not yet been loaded
248 do {
249 bool new_schema_loaded = false;
250
251 // files_ is modified during parsing, iterators are invalidated
252 std::vector<std::string> locations;
253 for (auto &file : files_)
254 locations.push_back(file.first);
255
256 for (auto &loc : locations) {
257 if (files_[loc].schemas.size() == 0) { // nothing has been loaded for this file
258 if (loader_) {
259 json loaded_schema;
260
261 loader_(loc, loaded_schema);
262
263 schema::make(loaded_schema, this, {}, {{loc}});
264 new_schema_loaded = true;
265 } else {
266 throw std::invalid_argument("external schema reference '" + loc + "' needs loading, but no loader callback given");
267 }
268 }
269 }
270
271 if (!new_schema_loaded) // if no new schema loaded, no need to try again
272 break;
273 } while (1);
274
275 for (const auto &file : files_)
276 if (file.second.unresolved.size() != 0)
277 throw std::invalid_argument("after all files have been parsed, '" +
278 (file.first == "" ? "<root>" : file.first) +
279 "' has still undefined references.");
280 }
281
validate(const json::json_pointer & ptr,const json & instance,json_patch & patch,error_handler & e,const json_uri & initial) const282 void validate(const json::json_pointer &ptr,
283 const json &instance,
284 json_patch &patch,
285 error_handler &e,
286 const json_uri &initial) const
287 {
288 if (!root_) {
289 e.error(ptr, "", "no root schema has yet been set for validating an instance");
290 return;
291 }
292
293 auto file_entry = files_.find(initial.location());
294 if (file_entry == files_.end()) {
295 e.error(ptr, "", "no file found serving requested root-URI. " + initial.location());
296 return;
297 }
298
299 auto &file = file_entry->second;
300 auto sch = file.schemas.find(initial.fragment());
301 if (sch == file.schemas.end()) {
302 e.error(ptr, "", "no schema find for request initial URI: " + initial.to_string());
303 return;
304 }
305
306 sch->second->validate(ptr, instance, patch, e);
307 }
308 };
309
310 } // namespace json_schema
311 } // namespace nlohmann
312
313 namespace
314 {
315
316 class first_error_handler : public error_handler
317 {
318 public:
319 bool error_{false};
320 json::json_pointer ptr_;
321 json instance_;
322 std::string message_;
323
error(const json::json_pointer & ptr,const json & instance,const std::string & message)324 void error(const json::json_pointer &ptr, const json &instance, const std::string &message) override
325 {
326 if (*this)
327 return;
328 error_ = true;
329 ptr_ = ptr;
330 instance_ = instance;
331 message_ = message;
332 }
333
operator bool() const334 operator bool() const { return error_; }
335 };
336
337 class logical_not : public schema
338 {
339 std::shared_ptr<schema> subschema_;
340
validate(const json::json_pointer & ptr,const json & instance,json_patch & patch,error_handler & e) const341 void validate(const json::json_pointer &ptr, const json &instance, json_patch &patch, error_handler &e) const final
342 {
343 first_error_handler esub;
344 subschema_->validate(ptr, instance, patch, esub);
345
346 if (!esub)
347 e.error(ptr, instance, "the subschema has succeeded, but it is required to not validate");
348 }
349
defaultValue(const json::json_pointer & ptr,const json & instance,error_handler & e) const350 const json &defaultValue(const json::json_pointer &ptr, const json &instance, error_handler &e) const override
351 {
352 return subschema_->defaultValue(ptr, instance, e);
353 }
354
355 public:
logical_not(json & sch,root_schema * root,const std::vector<nlohmann::json_uri> & uris)356 logical_not(json &sch,
357 root_schema *root,
358 const std::vector<nlohmann::json_uri> &uris)
359 : schema(root)
360 {
361 subschema_ = schema::make(sch, root, {"not"}, uris);
362 }
363 };
364
365 enum logical_combination_types {
366 allOf,
367 anyOf,
368 oneOf
369 };
370
371 template <enum logical_combination_types combine_logic>
372 class logical_combination : public schema
373 {
374 std::vector<std::shared_ptr<schema>> subschemata_;
375
validate(const json::json_pointer & ptr,const json & instance,json_patch & patch,error_handler & e) const376 void validate(const json::json_pointer &ptr, const json &instance, json_patch &patch, error_handler &e) const final
377 {
378 size_t count = 0;
379
380 for (auto &s : subschemata_) {
381 first_error_handler esub;
382 s->validate(ptr, instance, patch, esub);
383 if (!esub)
384 count++;
385
386 if (is_validate_complete(instance, ptr, e, esub, count))
387 return;
388 }
389
390 // could accumulate esub details for anyOf and oneOf, but not clear how to select which subschema failure to report
391 // or how to report multiple such failures
392 if (count == 0)
393 e.error(ptr, instance, "no subschema has succeeded, but one of them is required to validate");
394 }
395
396 // specialized for each of the logical_combination_types
397 static const std::string key;
398 static bool is_validate_complete(const json &, const json::json_pointer &, error_handler &, const first_error_handler &, size_t);
399
400 public:
logical_combination(json & sch,root_schema * root,const std::vector<nlohmann::json_uri> & uris)401 logical_combination(json &sch,
402 root_schema *root,
403 const std::vector<nlohmann::json_uri> &uris)
404 : schema(root)
405 {
406 size_t c = 0;
407 for (auto &subschema : sch)
408 subschemata_.push_back(schema::make(subschema, root, {key, std::to_string(c++)}, uris));
409
410 // value of allOf, anyOf, and oneOf "MUST be a non-empty array"
411 // TODO error/throw? when subschemata_.empty()
412 }
413 };
414
415 template <>
416 const std::string logical_combination<allOf>::key = "allOf";
417 template <>
418 const std::string logical_combination<anyOf>::key = "anyOf";
419 template <>
420 const std::string logical_combination<oneOf>::key = "oneOf";
421
422 template <>
is_validate_complete(const json &,const json::json_pointer &,error_handler & e,const first_error_handler & esub,size_t)423 bool logical_combination<allOf>::is_validate_complete(const json &, const json::json_pointer &, error_handler &e, const first_error_handler &esub, size_t)
424 {
425 if (esub)
426 e.error(esub.ptr_, esub.instance_, "at least one subschema has failed, but all of them are required to validate - " + esub.message_);
427 return esub;
428 }
429
430 template <>
is_validate_complete(const json &,const json::json_pointer &,error_handler &,const first_error_handler &,size_t count)431 bool logical_combination<anyOf>::is_validate_complete(const json &, const json::json_pointer &, error_handler &, const first_error_handler &, size_t count)
432 {
433 return count == 1;
434 }
435
436 template <>
is_validate_complete(const json & instance,const json::json_pointer & ptr,error_handler & e,const first_error_handler &,size_t count)437 bool logical_combination<oneOf>::is_validate_complete(const json &instance, const json::json_pointer &ptr, error_handler &e, const first_error_handler &, size_t count)
438 {
439 if (count > 1)
440 e.error(ptr, instance, "more than one subschema has succeeded, but exactly one of them is required to validate");
441 return count > 1;
442 }
443
444 class type_schema : public schema
445 {
446 json defaultValue_ = EmptyDefault;
447 std::vector<std::shared_ptr<schema>> type_;
448 std::pair<bool, json> enum_, const_;
449 std::vector<std::shared_ptr<schema>> logic_;
450
451 static std::shared_ptr<schema> make(json &schema,
452 json::value_t type,
453 root_schema *,
454 const std::vector<nlohmann::json_uri> &,
455 std::set<std::string> &);
456
457 std::shared_ptr<schema> if_, then_, else_;
458
defaultValue(const json::json_pointer &,const json &,error_handler &) const459 const json &defaultValue(const json::json_pointer &, const json &, error_handler &) const override
460 {
461 return defaultValue_;
462 }
463
validate(const json::json_pointer & ptr,const json & instance,json_patch & patch,error_handler & e) const464 void validate(const json::json_pointer &ptr, const json &instance, json_patch &patch, error_handler &e) const override final
465 {
466 // depending on the type of instance run the type specific validator - if present
467 auto type = type_[(uint8_t) instance.type()];
468
469 if (type)
470 type->validate(ptr, instance, patch, e);
471 else
472 e.error(ptr, instance, "unexpected instance type");
473
474 if (enum_.first) {
475 bool seen_in_enum = false;
476 for (auto &v : enum_.second)
477 if (instance == v) {
478 seen_in_enum = true;
479 break;
480 }
481
482 if (!seen_in_enum)
483 e.error(ptr, instance, "instance not found in required enum");
484 }
485
486 if (const_.first &&
487 const_.second != instance)
488 e.error(ptr, instance, "instance not const");
489
490 for (auto l : logic_)
491 l->validate(ptr, instance, patch, e);
492
493 if (if_) {
494 first_error_handler err;
495
496 if_->validate(ptr, instance, patch, err);
497 if (!err) {
498 if (then_)
499 then_->validate(ptr, instance, patch, e);
500 } else {
501 if (else_)
502 else_->validate(ptr, instance, patch, e);
503 }
504 }
505 }
506
507 public:
type_schema(json & sch,root_schema * root,const std::vector<nlohmann::json_uri> & uris)508 type_schema(json &sch,
509 root_schema *root,
510 const std::vector<nlohmann::json_uri> &uris)
511 : schema(root), type_((uint8_t) json::value_t::discarded + 1)
512 {
513 // association between JSON-schema-type and NLohmann-types
514 static const std::vector<std::pair<std::string, json::value_t>> schema_types = {
515 {"null", json::value_t::null},
516 {"object", json::value_t::object},
517 {"array", json::value_t::array},
518 {"string", json::value_t::string},
519 {"boolean", json::value_t::boolean},
520 {"integer", json::value_t::number_integer},
521 {"number", json::value_t::number_float},
522 };
523
524 std::set<std::string> known_keywords;
525
526 auto attr = sch.find("type");
527 if (attr == sch.end()) // no type field means all sub-types possible
528 for (auto &t : schema_types)
529 type_[(uint8_t) t.second] = type_schema::make(sch, t.second, root, uris, known_keywords);
530 else {
531 switch (attr.value().type()) { // "type": "type"
532
533 case json::value_t::string: {
534 auto schema_type = attr.value().get<std::string>();
535 for (auto &t : schema_types)
536 if (t.first == schema_type)
537 type_[(uint8_t) t.second] = type_schema::make(sch, t.second, root, uris, known_keywords);
538 } break;
539
540 case json::value_t::array: // "type": ["type1", "type2"]
541 for (auto &schema_type : attr.value())
542 for (auto &t : schema_types)
543 if (t.first == schema_type)
544 type_[(uint8_t) t.second] = type_schema::make(sch, t.second, root, uris, known_keywords);
545 break;
546
547 default:
548 break;
549 }
550
551 sch.erase(attr);
552 }
553
554 const auto defaultAttr = sch.find("default");
555 if (defaultAttr != sch.end()) {
556 defaultValue_ = defaultAttr.value();
557 }
558
559 for (auto &key : known_keywords)
560 sch.erase(key);
561
562 // with nlohmann::json float instance (but number in schema-definition) can be seen as unsigned or integer -
563 // reuse the number-validator for integer values as well, if they have not been specified explicitly
564 if (type_[(uint8_t) json::value_t::number_float] && !type_[(uint8_t) json::value_t::number_integer])
565 type_[(uint8_t) json::value_t::number_integer] = type_[(uint8_t) json::value_t::number_float];
566
567 // #54: JSON-schema does not differentiate between unsigned and signed integer - nlohmann::json does
568 // we stick with JSON-schema: use the integer-validator if instance-value is unsigned
569 type_[(uint8_t) json::value_t::number_unsigned] = type_[(uint8_t) json::value_t::number_integer];
570
571 // special for binary types
572 if (type_[(uint8_t) json::value_t::string]) {
573 type_[(uint8_t) json::value_t::binary] = type_[(uint8_t) json::value_t::string];
574 }
575
576 attr = sch.find("enum");
577 if (attr != sch.end()) {
578 enum_ = {true, attr.value()};
579 sch.erase(attr);
580 }
581
582 attr = sch.find("const");
583 if (attr != sch.end()) {
584 const_ = {true, attr.value()};
585 sch.erase(attr);
586 }
587
588 attr = sch.find("not");
589 if (attr != sch.end()) {
590 logic_.push_back(std::make_shared<logical_not>(attr.value(), root, uris));
591 sch.erase(attr);
592 }
593
594 attr = sch.find("allOf");
595 if (attr != sch.end()) {
596 logic_.push_back(std::make_shared<logical_combination<allOf>>(attr.value(), root, uris));
597 sch.erase(attr);
598 }
599
600 attr = sch.find("anyOf");
601 if (attr != sch.end()) {
602 logic_.push_back(std::make_shared<logical_combination<anyOf>>(attr.value(), root, uris));
603 sch.erase(attr);
604 }
605
606 attr = sch.find("oneOf");
607 if (attr != sch.end()) {
608 logic_.push_back(std::make_shared<logical_combination<oneOf>>(attr.value(), root, uris));
609 sch.erase(attr);
610 }
611
612 attr = sch.find("if");
613 if (attr != sch.end()) {
614 auto attr_then = sch.find("then");
615 auto attr_else = sch.find("else");
616
617 if (attr_then != sch.end() || attr_else != sch.end()) {
618 if_ = schema::make(attr.value(), root, {"if"}, uris);
619
620 if (attr_then != sch.end()) {
621 then_ = schema::make(attr_then.value(), root, {"then"}, uris);
622 sch.erase(attr_then);
623 }
624
625 if (attr_else != sch.end()) {
626 else_ = schema::make(attr_else.value(), root, {"else"}, uris);
627 sch.erase(attr_else);
628 }
629 }
630 sch.erase(attr);
631 }
632 }
633 };
634
635 class string : public schema
636 {
637 std::pair<bool, size_t> maxLength_{false, 0};
638 std::pair<bool, size_t> minLength_{false, 0};
639
640 #ifndef NO_STD_REGEX
641 std::pair<bool, REGEX_NAMESPACE::regex> pattern_{false, REGEX_NAMESPACE::regex()};
642 std::string patternString_;
643 #endif
644
645 std::pair<bool, std::string> format_;
646 std::tuple<bool, std::string, std::string> content_{false, "", ""};
647
utf8_length(const std::string & s) const648 std::size_t utf8_length(const std::string &s) const
649 {
650 size_t len = 0;
651 for (auto c : s)
652 if ((c & 0xc0) != 0x80)
653 len++;
654 return len;
655 }
656
validate(const json::json_pointer & ptr,const json & instance,json_patch &,error_handler & e) const657 void validate(const json::json_pointer &ptr, const json &instance, json_patch &, error_handler &e) const override
658 {
659 if (minLength_.first) {
660 if (utf8_length(instance) < minLength_.second) {
661 std::ostringstream s;
662 s << "instance is too short as per minLength:" << minLength_.second;
663 e.error(ptr, instance, s.str());
664 }
665 }
666
667 if (maxLength_.first) {
668 if (utf8_length(instance) > maxLength_.second) {
669 std::ostringstream s;
670 s << "instance is too long as per maxLength: " << maxLength_.second;
671 e.error(ptr, instance, s.str());
672 }
673 }
674
675 if (std::get<0>(content_)) {
676 if (root_->content_check() == nullptr)
677 e.error(ptr, instance, std::string("a content checker was not provided but a contentEncoding or contentMediaType for this string have been present: '") + std::get<1>(content_) + "' '" + std::get<2>(content_) + "'");
678 else {
679 try {
680 root_->content_check()(std::get<1>(content_), std::get<2>(content_), instance);
681 } catch (const std::exception &ex) {
682 e.error(ptr, instance, std::string("content-checking failed: ") + ex.what());
683 }
684 }
685 } else if (instance.type() == json::value_t::binary) {
686 e.error(ptr, instance, "expected string, but get binary data");
687 }
688
689 if (instance.type() != json::value_t::string) {
690 return; // next checks only for strings
691 }
692
693 #ifndef NO_STD_REGEX
694 if (pattern_.first &&
695 !REGEX_NAMESPACE::regex_search(instance.get<std::string>(), pattern_.second))
696 e.error(ptr, instance, "instance does not match regex pattern: " + patternString_);
697 #endif
698
699 if (format_.first) {
700 if (root_->format_check() == nullptr)
701 e.error(ptr, instance, std::string("a format checker was not provided but a format keyword for this string is present: ") + format_.second);
702 else {
703 try {
704 root_->format_check()(format_.second, instance);
705 } catch (const std::exception &ex) {
706 e.error(ptr, instance, std::string("format-checking failed: ") + ex.what());
707 }
708 }
709 }
710 }
711
712 public:
string(json & sch,root_schema * root)713 string(json &sch, root_schema *root)
714 : schema(root)
715 {
716 auto attr = sch.find("maxLength");
717 if (attr != sch.end()) {
718 maxLength_ = {true, attr.value()};
719 sch.erase(attr);
720 }
721
722 attr = sch.find("minLength");
723 if (attr != sch.end()) {
724 minLength_ = {true, attr.value()};
725 sch.erase(attr);
726 }
727
728 attr = sch.find("contentEncoding");
729 if (attr != sch.end()) {
730 std::get<0>(content_) = true;
731 std::get<1>(content_) = attr.value().get<std::string>();
732
733 // special case for nlohmann::json-binary-types
734 //
735 // https://github.com/pboettch/json-schema-validator/pull/114
736 //
737 // We cannot use explicitly in a schema: {"type": "binary"} or
738 // "type": ["binary", "number"] we have to be implicit. For a
739 // schema where "contentEncoding" is set to "binary", an instance
740 // of type json::value_t::binary is accepted. If a
741 // contentEncoding-callback has to be provided and is called
742 // accordingly. For encoding=binary, no other type validations are done
743
744 sch.erase(attr);
745 }
746
747 attr = sch.find("contentMediaType");
748 if (attr != sch.end()) {
749 std::get<0>(content_) = true;
750 std::get<2>(content_) = attr.value().get<std::string>();
751
752 sch.erase(attr);
753 }
754
755 if (std::get<0>(content_) == true && root_->content_check() == nullptr) {
756 throw std::invalid_argument{"schema contains contentEncoding/contentMediaType but content checker was not set"};
757 }
758
759 #ifndef NO_STD_REGEX
760 attr = sch.find("pattern");
761 if (attr != sch.end()) {
762 patternString_ = attr.value();
763 pattern_ = {true, REGEX_NAMESPACE::regex(attr.value().get<std::string>(),
764 REGEX_NAMESPACE::regex::ECMAScript)};
765 sch.erase(attr);
766 }
767 #endif
768
769 attr = sch.find("format");
770 if (attr != sch.end()) {
771 if (root_->format_check() == nullptr)
772 throw std::invalid_argument{"a format checker was not provided but a format keyword for this string is present: " + format_.second};
773
774 format_ = {true, attr.value()};
775 sch.erase(attr);
776 }
777 }
778 };
779
780 template <typename T>
781 class numeric : public schema
782 {
783 std::pair<bool, T> maximum_{false, 0};
784 std::pair<bool, T> minimum_{false, 0};
785
786 bool exclusiveMaximum_ = false;
787 bool exclusiveMinimum_ = false;
788
789 std::pair<bool, json::number_float_t> multipleOf_{false, 0};
790
791 // multipleOf - if the remainder of the division is 0 -> OK
violates_multiple_of(T x) const792 bool violates_multiple_of(T x) const
793 {
794 double res = std::remainder(x, multipleOf_.second);
795 double eps = std::nextafter(x, 0) - x;
796 return std::fabs(res) > std::fabs(eps);
797 }
798
validate(const json::json_pointer & ptr,const json & instance,json_patch &,error_handler & e) const799 void validate(const json::json_pointer &ptr, const json &instance, json_patch &, error_handler &e) const override
800 {
801 T value = instance; // conversion of json to value_type
802
803 if (multipleOf_.first && value != 0) // zero is multiple of everything
804 if (violates_multiple_of(value))
805 e.error(ptr, instance, "instance is not a multiple of " + std::to_string(multipleOf_.second));
806
807 if (maximum_.first)
808 if ((exclusiveMaximum_ && value >= maximum_.second) ||
809 value > maximum_.second)
810 e.error(ptr, instance, "instance exceeds maximum of " + std::to_string(maximum_.second));
811
812 if (minimum_.first)
813 if ((exclusiveMinimum_ && value <= minimum_.second) ||
814 value < minimum_.second)
815 e.error(ptr, instance, "instance is below minimum of " + std::to_string(minimum_.second));
816 }
817
818 public:
numeric(const json & sch,root_schema * root,std::set<std::string> & kw)819 numeric(const json &sch, root_schema *root, std::set<std::string> &kw)
820 : schema(root)
821 {
822 auto attr = sch.find("maximum");
823 if (attr != sch.end()) {
824 maximum_ = {true, attr.value()};
825 kw.insert("maximum");
826 }
827
828 attr = sch.find("minimum");
829 if (attr != sch.end()) {
830 minimum_ = {true, attr.value()};
831 kw.insert("minimum");
832 }
833
834 attr = sch.find("exclusiveMaximum");
835 if (attr != sch.end()) {
836 exclusiveMaximum_ = true;
837 maximum_ = {true, attr.value()};
838 kw.insert("exclusiveMaximum");
839 }
840
841 attr = sch.find("exclusiveMinimum");
842 if (attr != sch.end()) {
843 minimum_ = {true, attr.value()};
844 exclusiveMinimum_ = true;
845 kw.insert("exclusiveMinimum");
846 }
847
848 attr = sch.find("multipleOf");
849 if (attr != sch.end()) {
850 multipleOf_ = {true, attr.value()};
851 kw.insert("multipleOf");
852 }
853 }
854 };
855
856 class null : public schema
857 {
validate(const json::json_pointer & ptr,const json & instance,json_patch &,error_handler & e) const858 void validate(const json::json_pointer &ptr, const json &instance, json_patch &, error_handler &e) const override
859 {
860 if (!instance.is_null())
861 e.error(ptr, instance, "expected to be null");
862 }
863
864 public:
null(json &,root_schema * root)865 null(json &, root_schema *root)
866 : schema(root) {}
867 };
868
869 class boolean_type : public schema
870 {
validate(const json::json_pointer &,const json &,json_patch &,error_handler &) const871 void validate(const json::json_pointer &, const json &, json_patch &, error_handler &) const override {}
872
873 public:
boolean_type(json &,root_schema * root)874 boolean_type(json &, root_schema *root)
875 : schema(root) {}
876 };
877
878 class boolean : public schema
879 {
880 bool true_;
validate(const json::json_pointer & ptr,const json & instance,json_patch &,error_handler & e) const881 void validate(const json::json_pointer &ptr, const json &instance, json_patch &, error_handler &e) const override
882 {
883 if (!true_) { // false schema
884 // empty array
885 //switch (instance.type()) {
886 //case json::value_t::array:
887 // if (instance.size() != 0) // valid false-schema
888 // e.error(ptr, instance, "false-schema required empty array");
889 // return;
890 //}
891
892 e.error(ptr, instance, "instance invalid as per false-schema");
893 }
894 }
895
896 public:
boolean(json & sch,root_schema * root)897 boolean(json &sch, root_schema *root)
898 : schema(root), true_(sch) {}
899 };
900
901 class required : public schema
902 {
903 const std::vector<std::string> required_;
904
validate(const json::json_pointer & ptr,const json & instance,json_patch &,error_handler & e) const905 void validate(const json::json_pointer &ptr, const json &instance, json_patch &, error_handler &e) const override final
906 {
907 for (auto &r : required_)
908 if (instance.find(r) == instance.end())
909 e.error(ptr, instance, "required property '" + r + "' not found in object as a dependency");
910 }
911
912 public:
required(const std::vector<std::string> & r,root_schema * root)913 required(const std::vector<std::string> &r, root_schema *root)
914 : schema(root), required_(r) {}
915 };
916
917 class object : public schema
918 {
919 std::pair<bool, size_t> maxProperties_{false, 0};
920 std::pair<bool, size_t> minProperties_{false, 0};
921 std::vector<std::string> required_;
922
923 std::map<std::string, std::shared_ptr<schema>> properties_;
924 #ifndef NO_STD_REGEX
925 std::vector<std::pair<REGEX_NAMESPACE::regex, std::shared_ptr<schema>>> patternProperties_;
926 #endif
927 std::shared_ptr<schema> additionalProperties_;
928
929 std::map<std::string, std::shared_ptr<schema>> dependencies_;
930
931 std::shared_ptr<schema> propertyNames_;
932
validate(const json::json_pointer & ptr,const json & instance,json_patch & patch,error_handler & e) const933 void validate(const json::json_pointer &ptr, const json &instance, json_patch &patch, error_handler &e) const override
934 {
935 if (maxProperties_.first && instance.size() > maxProperties_.second)
936 e.error(ptr, instance, "too many properties");
937
938 if (minProperties_.first && instance.size() < minProperties_.second)
939 e.error(ptr, instance, "too few properties");
940
941 for (auto &r : required_)
942 if (instance.find(r) == instance.end())
943 e.error(ptr, instance, "required property '" + r + "' not found in object");
944
945 // for each property in instance
946 for (auto &p : instance.items()) {
947 if (propertyNames_)
948 propertyNames_->validate(ptr, p.key(), patch, e);
949
950 bool a_prop_or_pattern_matched = false;
951 auto schema_p = properties_.find(p.key());
952 // check if it is in "properties"
953 if (schema_p != properties_.end()) {
954 a_prop_or_pattern_matched = true;
955 schema_p->second->validate(ptr / p.key(), p.value(), patch, e);
956 }
957
958 #ifndef NO_STD_REGEX
959 // check all matching patternProperties
960 for (auto &schema_pp : patternProperties_)
961 if (REGEX_NAMESPACE::regex_search(p.key(), schema_pp.first)) {
962 a_prop_or_pattern_matched = true;
963 schema_pp.second->validate(ptr / p.key(), p.value(), patch, e);
964 }
965 #endif
966
967 // check additionalProperties as a last resort
968 if (!a_prop_or_pattern_matched && additionalProperties_) {
969 first_error_handler additional_prop_err;
970 additionalProperties_->validate(ptr / p.key(), p.value(), patch, additional_prop_err);
971 if (additional_prop_err)
972 e.error(ptr, instance, "validation failed for additional property '" + p.key() + "': " + additional_prop_err.message_);
973 }
974 }
975
976 // reverse search
977 for (auto const &prop : properties_) {
978 const auto finding = instance.find(prop.first);
979 if (instance.end() == finding) { // if the prop is not in the instance
980 const auto &defaultValue = prop.second->defaultValue(ptr, instance, e);
981 if (!defaultValue.is_null()) { // if default value is available
982 patch.add((ptr / prop.first), defaultValue);
983 }
984 }
985 }
986
987 for (auto &dep : dependencies_) {
988 auto prop = instance.find(dep.first);
989 if (prop != instance.end()) // if dependency-property is present in instance
990 dep.second->validate(ptr / dep.first, instance, patch, e); // validate
991 }
992 }
993
994 public:
object(json & sch,root_schema * root,const std::vector<nlohmann::json_uri> & uris)995 object(json &sch,
996 root_schema *root,
997 const std::vector<nlohmann::json_uri> &uris)
998 : schema(root)
999 {
1000 auto attr = sch.find("maxProperties");
1001 if (attr != sch.end()) {
1002 maxProperties_ = {true, attr.value()};
1003 sch.erase(attr);
1004 }
1005
1006 attr = sch.find("minProperties");
1007 if (attr != sch.end()) {
1008 minProperties_ = {true, attr.value()};
1009 sch.erase(attr);
1010 }
1011
1012 attr = sch.find("required");
1013 if (attr != sch.end()) {
1014 required_ = attr.value().get<std::vector<std::string>>();
1015 sch.erase(attr);
1016 }
1017
1018 attr = sch.find("properties");
1019 if (attr != sch.end()) {
1020 for (auto prop : attr.value().items())
1021 properties_.insert(
1022 std::make_pair(
1023 prop.key(),
1024 schema::make(prop.value(), root, {"properties", prop.key()}, uris)));
1025 sch.erase(attr);
1026 }
1027
1028 #ifndef NO_STD_REGEX
1029 attr = sch.find("patternProperties");
1030 if (attr != sch.end()) {
1031 for (auto prop : attr.value().items())
1032 patternProperties_.push_back(
1033 std::make_pair(
1034 REGEX_NAMESPACE::regex(prop.key(), REGEX_NAMESPACE::regex::ECMAScript),
1035 schema::make(prop.value(), root, {prop.key()}, uris)));
1036 sch.erase(attr);
1037 }
1038 #endif
1039
1040 attr = sch.find("additionalProperties");
1041 if (attr != sch.end()) {
1042 additionalProperties_ = schema::make(attr.value(), root, {"additionalProperties"}, uris);
1043 sch.erase(attr);
1044 }
1045
1046 attr = sch.find("dependencies");
1047 if (attr != sch.end()) {
1048 for (auto &dep : attr.value().items())
1049 switch (dep.value().type()) {
1050 case json::value_t::array:
1051 dependencies_.emplace(dep.key(),
1052 std::make_shared<required>(
1053 dep.value().get<std::vector<std::string>>(), root));
1054 break;
1055
1056 default:
1057 dependencies_.emplace(dep.key(),
1058 schema::make(dep.value(), root, {"dependencies", dep.key()}, uris));
1059 break;
1060 }
1061 sch.erase(attr);
1062 }
1063
1064 attr = sch.find("propertyNames");
1065 if (attr != sch.end()) {
1066 propertyNames_ = schema::make(attr.value(), root, {"propertyNames"}, uris);
1067 sch.erase(attr);
1068 }
1069 }
1070 };
1071
1072 class array : public schema
1073 {
1074 std::pair<bool, size_t> maxItems_{false, 0};
1075 std::pair<bool, size_t> minItems_{false, 0};
1076 bool uniqueItems_ = false;
1077
1078 std::shared_ptr<schema> items_schema_;
1079
1080 std::vector<std::shared_ptr<schema>> items_;
1081 std::shared_ptr<schema> additionalItems_;
1082
1083 std::shared_ptr<schema> contains_;
1084
validate(const json::json_pointer & ptr,const json & instance,json_patch & patch,error_handler & e) const1085 void validate(const json::json_pointer &ptr, const json &instance, json_patch &patch, error_handler &e) const override
1086 {
1087 if (maxItems_.first && instance.size() > maxItems_.second)
1088 e.error(ptr, instance, "array has too many items");
1089
1090 if (minItems_.first && instance.size() < minItems_.second)
1091 e.error(ptr, instance, "array has too few items");
1092
1093 if (uniqueItems_) {
1094 for (auto it = instance.cbegin(); it != instance.cend(); ++it) {
1095 auto v = std::find(it + 1, instance.end(), *it);
1096 if (v != instance.end())
1097 e.error(ptr, instance, "items have to be unique for this array");
1098 }
1099 }
1100
1101 size_t index = 0;
1102 if (items_schema_)
1103 for (auto &i : instance) {
1104 items_schema_->validate(ptr / index, i, patch, e);
1105 index++;
1106 }
1107 else {
1108 auto item = items_.cbegin();
1109 for (auto &i : instance) {
1110 std::shared_ptr<schema> item_validator;
1111 if (item == items_.cend())
1112 item_validator = additionalItems_;
1113 else {
1114 item_validator = *item;
1115 item++;
1116 }
1117
1118 if (!item_validator)
1119 break;
1120
1121 item_validator->validate(ptr / index, i, patch, e);
1122 }
1123 }
1124
1125 if (contains_) {
1126 bool contained = false;
1127 for (auto &item : instance) {
1128 first_error_handler local_e;
1129 contains_->validate(ptr, item, patch, local_e);
1130 if (!local_e) {
1131 contained = true;
1132 break;
1133 }
1134 }
1135 if (!contained)
1136 e.error(ptr, instance, "array does not contain required element as per 'contains'");
1137 }
1138 }
1139
1140 public:
array(json & sch,root_schema * root,const std::vector<nlohmann::json_uri> & uris)1141 array(json &sch, root_schema *root, const std::vector<nlohmann::json_uri> &uris)
1142 : schema(root)
1143 {
1144 auto attr = sch.find("maxItems");
1145 if (attr != sch.end()) {
1146 maxItems_ = {true, attr.value()};
1147 sch.erase(attr);
1148 }
1149
1150 attr = sch.find("minItems");
1151 if (attr != sch.end()) {
1152 minItems_ = {true, attr.value()};
1153 sch.erase(attr);
1154 }
1155
1156 attr = sch.find("uniqueItems");
1157 if (attr != sch.end()) {
1158 uniqueItems_ = attr.value();
1159 sch.erase(attr);
1160 }
1161
1162 attr = sch.find("items");
1163 if (attr != sch.end()) {
1164
1165 if (attr.value().type() == json::value_t::array) {
1166 size_t c = 0;
1167 for (auto &subsch : attr.value())
1168 items_.push_back(schema::make(subsch, root, {"items", std::to_string(c++)}, uris));
1169
1170 auto attr_add = sch.find("additionalItems");
1171 if (attr_add != sch.end()) {
1172 additionalItems_ = schema::make(attr_add.value(), root, {"additionalItems"}, uris);
1173 sch.erase(attr_add);
1174 }
1175
1176 } else if (attr.value().type() == json::value_t::object ||
1177 attr.value().type() == json::value_t::boolean)
1178 items_schema_ = schema::make(attr.value(), root, {"items"}, uris);
1179
1180 sch.erase(attr);
1181 }
1182
1183 attr = sch.find("contains");
1184 if (attr != sch.end()) {
1185 contains_ = schema::make(attr.value(), root, {"contains"}, uris);
1186 sch.erase(attr);
1187 }
1188 }
1189 };
1190
make(json & schema,json::value_t type,root_schema * root,const std::vector<nlohmann::json_uri> & uris,std::set<std::string> & kw)1191 std::shared_ptr<schema> type_schema::make(json &schema,
1192 json::value_t type,
1193 root_schema *root,
1194 const std::vector<nlohmann::json_uri> &uris,
1195 std::set<std::string> &kw)
1196 {
1197 switch (type) {
1198 case json::value_t::null:
1199 return std::make_shared<null>(schema, root);
1200
1201 case json::value_t::number_unsigned:
1202 case json::value_t::number_integer:
1203 return std::make_shared<numeric<json::number_integer_t>>(schema, root, kw);
1204 case json::value_t::number_float:
1205 return std::make_shared<numeric<json::number_float_t>>(schema, root, kw);
1206 case json::value_t::string:
1207 return std::make_shared<string>(schema, root);
1208 case json::value_t::boolean:
1209 return std::make_shared<boolean_type>(schema, root);
1210 case json::value_t::object:
1211 return std::make_shared<object>(schema, root, uris);
1212 case json::value_t::array:
1213 return std::make_shared<array>(schema, root, uris);
1214
1215 case json::value_t::discarded: // not a real type - silence please
1216 break;
1217
1218 case json::value_t::binary:
1219 break;
1220 }
1221 return nullptr;
1222 }
1223 } // namespace
1224
1225 namespace
1226 {
1227
make(json & schema,root_schema * root,const std::vector<std::string> & keys,std::vector<nlohmann::json_uri> uris)1228 std::shared_ptr<schema> schema::make(json &schema,
1229 root_schema *root,
1230 const std::vector<std::string> &keys,
1231 std::vector<nlohmann::json_uri> uris)
1232 {
1233 // remove URIs which contain plain name identifiers, as sub-schemas cannot be referenced
1234 for (auto uri = uris.begin(); uri != uris.end();)
1235 if (uri->identifier() != "")
1236 uri = uris.erase(uri);
1237 else
1238 uri++;
1239
1240 // append to all URIs the keys for this sub-schema
1241 for (auto &key : keys)
1242 for (auto &uri : uris)
1243 uri = uri.append(key);
1244
1245 std::shared_ptr<::schema> sch;
1246
1247 // boolean schema
1248 if (schema.type() == json::value_t::boolean)
1249 sch = std::make_shared<boolean>(schema, root);
1250 else if (schema.type() == json::value_t::object) {
1251
1252 auto attr = schema.find("$id"); // if $id is present, this schema can be referenced by this ID
1253 // as an additional URI
1254 if (attr != schema.end()) {
1255 if (std::find(uris.begin(),
1256 uris.end(),
1257 attr.value().get<std::string>()) == uris.end())
1258 uris.push_back(uris.back().derive(attr.value())); // so add it to the list if it is not there already
1259 schema.erase(attr);
1260 }
1261
1262 attr = schema.find("definitions");
1263 if (attr != schema.end()) {
1264 for (auto &def : attr.value().items())
1265 schema::make(def.value(), root, {"definitions", def.key()}, uris);
1266 schema.erase(attr);
1267 }
1268
1269 attr = schema.find("$ref");
1270 if (attr != schema.end()) { // this schema is a reference
1271 // the last one on the uri-stack is the last id seen before coming here,
1272 // so this is the origial URI for this reference, the $ref-value has thus be resolved from it
1273 auto id = uris.back().derive(attr.value());
1274 sch = root->get_or_create_ref(id);
1275 schema.erase(attr);
1276 } else {
1277 sch = std::make_shared<type_schema>(schema, root, uris);
1278 }
1279
1280 schema.erase("$schema");
1281 schema.erase("default");
1282 schema.erase("title");
1283 schema.erase("description");
1284 } else {
1285 throw std::invalid_argument("invalid JSON-type for a schema for " + uris[0].to_string() + ", expected: boolean or object");
1286 }
1287
1288 for (auto &uri : uris) { // for all URIs this schema is referenced by
1289 root->insert(uri, sch);
1290
1291 if (schema.type() == json::value_t::object)
1292 for (auto &u : schema.items())
1293 root->insert_unknown_keyword(uri, u.key(), u.value()); // insert unknown keywords for later reference
1294 }
1295 return sch;
1296 }
1297
1298 class throwing_error_handler : public error_handler
1299 {
error(const json::json_pointer & ptr,const json & instance,const std::string & message)1300 void error(const json::json_pointer &ptr, const json &instance, const std::string &message) override
1301 {
1302 throw std::invalid_argument(std::string("At ") + ptr.to_string() + " of " + instance.dump() + " - " + message + "\n");
1303 }
1304 };
1305
1306 } // namespace
1307
1308 namespace nlohmann
1309 {
1310 namespace json_schema
1311 {
1312
json_validator(schema_loader loader,format_checker format,content_checker content)1313 json_validator::json_validator(schema_loader loader,
1314 format_checker format,
1315 content_checker content)
1316 : root_(std::unique_ptr<root_schema>(new root_schema(std::move(loader),
1317 std::move(format),
1318 std::move(content))))
1319 {
1320 }
1321
json_validator(const json & schema,schema_loader loader,format_checker format,content_checker content)1322 json_validator::json_validator(const json &schema,
1323 schema_loader loader,
1324 format_checker format,
1325 content_checker content)
1326 : json_validator(std::move(loader),
1327 std::move(format),
1328 std::move(content))
1329 {
1330 set_root_schema(schema);
1331 }
1332
json_validator(json && schema,schema_loader loader,format_checker format,content_checker content)1333 json_validator::json_validator(json &&schema,
1334 schema_loader loader,
1335 format_checker format,
1336 content_checker content)
1337
1338 : json_validator(std::move(loader),
1339 std::move(format),
1340 std::move(content))
1341 {
1342 set_root_schema(std::move(schema));
1343 }
1344
1345 // move constructor, destructor and move assignment operator can be defaulted here
1346 // where root_schema is a complete type
1347 json_validator::json_validator(json_validator &&) = default;
1348 json_validator::~json_validator() = default;
1349 json_validator &json_validator::operator=(json_validator &&) = default;
1350
set_root_schema(const json & schema)1351 void json_validator::set_root_schema(const json &schema)
1352 {
1353 root_->set_root_schema(schema);
1354 }
1355
set_root_schema(json && schema)1356 void json_validator::set_root_schema(json &&schema)
1357 {
1358 root_->set_root_schema(std::move(schema));
1359 }
1360
validate(const json & instance) const1361 json json_validator::validate(const json &instance) const
1362 {
1363 throwing_error_handler err;
1364 return validate(instance, err);
1365 }
1366
validate(const json & instance,error_handler & err,const json_uri & initial_uri) const1367 json json_validator::validate(const json &instance, error_handler &err, const json_uri &initial_uri) const
1368 {
1369 json::json_pointer ptr;
1370 json_patch patch;
1371 root_->validate(ptr, instance, patch, err, initial_uri);
1372 return patch;
1373 }
1374
1375 } // namespace json_schema
1376 } // namespace nlohmann
1377