1// Copyright 2019 CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package jsonschema
16
17// TODO:
18// - replace converter from YAML to CUE to CUE (schema) to CUE.
19// - define OpenAPI definitions als CUE.
20
21import (
22	"fmt"
23	"net/url"
24	"sort"
25	"strings"
26
27	"cuelang.org/go/cue"
28	"cuelang.org/go/cue/ast"
29	"cuelang.org/go/cue/ast/astutil"
30	"cuelang.org/go/cue/errors"
31	"cuelang.org/go/cue/token"
32	"cuelang.org/go/internal"
33)
34
35// rootDefs defines the top-level name of the map of definitions that do not
36// have a valid identifier name.
37//
38// TODO: find something more principled, like allowing #."a-b" or `#a-b`.
39const rootDefs = "#"
40
41// A decoder converts JSON schema to CUE.
42type decoder struct {
43	cfg   *Config
44	errs  errors.Error
45	numID int // for creating unique numbers: increment on each use
46}
47
48// addImport registers
49func (d *decoder) addImport(n cue.Value, pkg string) *ast.Ident {
50	spec := ast.NewImport(nil, pkg)
51	info, err := astutil.ParseImportSpec(spec)
52	if err != nil {
53		d.errf(cue.Value{}, "invalid import %q", pkg)
54	}
55	ident := ast.NewIdent(info.Ident)
56	ident.Node = spec
57	ast.SetPos(ident, n.Pos())
58
59	return ident
60}
61
62func (d *decoder) decode(v cue.Value) *ast.File {
63	f := &ast.File{}
64
65	if pkgName := d.cfg.PkgName; pkgName != "" {
66		pkg := &ast.Package{Name: ast.NewIdent(pkgName)}
67		f.Decls = append(f.Decls, pkg)
68	}
69
70	var a []ast.Decl
71
72	if d.cfg.Root == "" {
73		a = append(a, d.schema(nil, v)...)
74	} else {
75		ref := d.parseRef(token.NoPos, d.cfg.Root)
76		if ref == nil {
77			return f
78		}
79		i, err := v.Lookup(ref...).Fields()
80		if err != nil {
81			d.errs = errors.Append(d.errs, errors.Promote(err, ""))
82			return nil
83		}
84		for i.Next() {
85			ref := append(ref, i.Label())
86			lab := d.mapRef(i.Value().Pos(), "", ref)
87			if len(lab) == 0 {
88				return nil
89			}
90			decls := d.schema(lab, i.Value())
91			a = append(a, decls...)
92		}
93	}
94
95	f.Decls = append(f.Decls, a...)
96
97	_ = astutil.Sanitize(f)
98
99	return f
100}
101
102func (d *decoder) schema(ref []ast.Label, v cue.Value) (a []ast.Decl) {
103	root := state{decoder: d}
104
105	var name ast.Label
106	inner := len(ref) - 1
107
108	if inner >= 0 {
109		name = ref[inner]
110		root.isSchema = true
111	}
112
113	expr, state := root.schemaState(v, allTypes, nil, false)
114
115	tags := []string{}
116	if state.jsonschema != "" {
117		tags = append(tags, fmt.Sprintf("schema=%q", state.jsonschema))
118	}
119
120	if name == nil {
121		if len(tags) > 0 {
122			body := strings.Join(tags, ",")
123			a = append(a, &ast.Attribute{
124				Text: fmt.Sprintf("@jsonschema(%s)", body)})
125		}
126
127		if state.deprecated {
128			a = append(a, &ast.Attribute{Text: "@deprecated()"})
129		}
130	} else {
131		if len(tags) > 0 {
132			a = append(a, addTag(name, "jsonschema", strings.Join(tags, ",")))
133		}
134
135		if state.deprecated {
136			a = append(a, addTag(name, "deprecated", ""))
137		}
138	}
139
140	if name != nil {
141		f := &ast.Field{
142			Label: name,
143			Value: expr,
144		}
145
146		a = append(a, f)
147	} else if st, ok := expr.(*ast.StructLit); ok {
148		a = append(a, st.Elts...)
149	} else {
150		a = append(a, &ast.EmbedDecl{Expr: expr})
151	}
152
153	state.doc(a[0])
154
155	for i := inner - 1; i >= 0; i-- {
156		a = []ast.Decl{&ast.Field{
157			Label: ref[i],
158			Value: &ast.StructLit{Elts: a},
159		}}
160		expr = ast.NewStruct(ref[i], expr)
161	}
162
163	if root.hasSelfReference {
164		return []ast.Decl{
165			&ast.EmbedDecl{Expr: ast.NewIdent(topSchema)},
166			&ast.Field{
167				Label: ast.NewIdent(topSchema),
168				Value: &ast.StructLit{Elts: a},
169			},
170		}
171	}
172
173	return a
174}
175
176func (d *decoder) errf(n cue.Value, format string, args ...interface{}) ast.Expr {
177	d.warnf(n.Pos(), format, args...)
178	return &ast.BadExpr{From: n.Pos()}
179}
180
181func (d *decoder) warnf(p token.Pos, format string, args ...interface{}) {
182	d.addErr(errors.Newf(p, format, args...))
183}
184
185func (d *decoder) addErr(err errors.Error) {
186	d.errs = errors.Append(d.errs, err)
187}
188
189func (d *decoder) number(n cue.Value) ast.Expr {
190	return n.Syntax(cue.Final()).(ast.Expr)
191}
192
193func (d *decoder) uint(n cue.Value) ast.Expr {
194	_, err := n.Uint64()
195	if err != nil {
196		d.errf(n, "invalid uint")
197	}
198	return n.Syntax(cue.Final()).(ast.Expr)
199}
200
201func (d *decoder) bool(n cue.Value) ast.Expr {
202	return n.Syntax(cue.Final()).(ast.Expr)
203}
204
205func (d *decoder) boolValue(n cue.Value) bool {
206	x, err := n.Bool()
207	if err != nil {
208		d.errf(n, "invalid bool")
209	}
210	return x
211}
212
213func (d *decoder) string(n cue.Value) ast.Expr {
214	return n.Syntax(cue.Final()).(ast.Expr)
215}
216
217func (d *decoder) strValue(n cue.Value) (s string, ok bool) {
218	s, err := n.String()
219	if err != nil {
220		d.errf(n, "invalid string")
221		return "", false
222	}
223	return s, true
224}
225
226// const draftCutoff = 5
227
228type coreType int
229
230const (
231	nullType coreType = iota
232	boolType
233	numType
234	stringType
235	arrayType
236	objectType
237
238	numCoreTypes
239)
240
241var coreToCUE = []cue.Kind{
242	nullType:   cue.NullKind,
243	boolType:   cue.BoolKind,
244	numType:    cue.FloatKind,
245	stringType: cue.StringKind,
246	arrayType:  cue.ListKind,
247	objectType: cue.StructKind,
248}
249
250func kindToAST(k cue.Kind) ast.Expr {
251	switch k {
252	case cue.NullKind:
253		// TODO: handle OpenAPI restrictions.
254		return ast.NewNull()
255	case cue.BoolKind:
256		return ast.NewIdent("bool")
257	case cue.FloatKind:
258		return ast.NewIdent("number")
259	case cue.StringKind:
260		return ast.NewIdent("string")
261	case cue.ListKind:
262		return ast.NewList(&ast.Ellipsis{})
263	case cue.StructKind:
264		return ast.NewStruct(&ast.Ellipsis{})
265	}
266	return nil
267}
268
269var coreTypeName = []string{
270	nullType:   "null",
271	boolType:   "bool",
272	numType:    "number",
273	stringType: "string",
274	arrayType:  "array",
275	objectType: "object",
276}
277
278type constraintInfo struct {
279	// typ is an identifier for the root type, if present.
280	// This can be omitted if there are constraints.
281	typ         ast.Expr
282	constraints []ast.Expr
283}
284
285func (c *constraintInfo) setTypeUsed(n cue.Value, t coreType) {
286	c.typ = kindToAST(coreToCUE[t])
287	setPos(c.typ, n)
288	ast.SetRelPos(c.typ, token.NoRelPos)
289}
290
291func (c *constraintInfo) add(n cue.Value, x ast.Expr) {
292	if !isAny(x) {
293		setPos(x, n)
294		ast.SetRelPos(x, token.NoRelPos)
295		c.constraints = append(c.constraints, x)
296	}
297}
298
299func (s *state) add(n cue.Value, t coreType, x ast.Expr) {
300	s.types[t].add(n, x)
301}
302
303func (s *state) setTypeUsed(n cue.Value, t coreType) {
304	s.types[t].setTypeUsed(n, t)
305}
306
307type state struct {
308	*decoder
309
310	isSchema bool // for omitting ellipsis in an ast.File
311
312	up     *state
313	parent *state
314
315	path []string
316
317	// idRef is used to refer to this schema in case it defines an $id.
318	idRef []label
319
320	pos cue.Value
321
322	// The constraints in types represent disjunctions per type.
323	types    [numCoreTypes]constraintInfo
324	all      constraintInfo // values and oneOf etc.
325	nullable *ast.BasicLit  // nullable
326
327	usedTypes    cue.Kind
328	allowedTypes cue.Kind
329
330	default_     ast.Expr
331	examples     []ast.Expr
332	title        string
333	description  string
334	deprecated   bool
335	exclusiveMin bool // For OpenAPI and legacy support.
336	exclusiveMax bool // For OpenAPI and legacy support.
337	jsonschema   string
338	id           *url.URL // base URI for $ref
339
340	definitions []ast.Decl
341
342	// Used for inserting definitions, properties, etc.
343	hasSelfReference bool
344	obj              *ast.StructLit
345	// Complete at finalize.
346	fieldRefs map[label]refs
347
348	closeStruct bool
349	patterns    []ast.Expr
350
351	list *ast.ListLit
352}
353
354type label struct {
355	name  string
356	isDef bool
357}
358
359type refs struct {
360	field *ast.Field
361	ident string
362	refs  []*ast.Ident
363}
364
365func (s *state) object(n cue.Value) *ast.StructLit {
366	if s.obj == nil {
367		s.obj = &ast.StructLit{}
368		s.add(n, objectType, s.obj)
369	}
370	return s.obj
371}
372
373func (s *state) hasConstraints() bool {
374	if len(s.all.constraints) > 0 {
375		return true
376	}
377	for _, t := range s.types {
378		if len(t.constraints) > 0 {
379			return true
380		}
381	}
382	return len(s.patterns) > 0 ||
383		s.title != "" ||
384		s.description != "" ||
385		s.obj != nil
386}
387
388const allTypes = cue.NullKind | cue.BoolKind | cue.NumberKind | cue.IntKind |
389	cue.StringKind | cue.ListKind | cue.StructKind
390
391// finalize constructs a CUE type from the collected constraints.
392func (s *state) finalize() (e ast.Expr) {
393	conjuncts := []ast.Expr{}
394	disjuncts := []ast.Expr{}
395
396	types := s.allowedTypes &^ s.usedTypes
397	if types == allTypes {
398		disjuncts = append(disjuncts, ast.NewIdent("_"))
399		types = 0
400	}
401
402	// Sort literal structs and list last for nicer formatting.
403	sort.SliceStable(s.types[arrayType].constraints, func(i, j int) bool {
404		_, ok := s.types[arrayType].constraints[i].(*ast.ListLit)
405		return !ok
406	})
407	sort.SliceStable(s.types[objectType].constraints, func(i, j int) bool {
408		_, ok := s.types[objectType].constraints[i].(*ast.StructLit)
409		return !ok
410	})
411
412	for i, t := range s.types {
413		k := coreToCUE[i]
414		isAllowed := s.allowedTypes&k != 0
415		if len(t.constraints) > 0 {
416			if t.typ == nil && !isAllowed {
417				for _, c := range t.constraints {
418					s.addErr(errors.Newf(c.Pos(),
419						"constraint not allowed because type %s is excluded",
420						coreTypeName[i],
421					))
422				}
423				continue
424			}
425			x := ast.NewBinExpr(token.AND, t.constraints...)
426			disjuncts = append(disjuncts, x)
427		} else if s.usedTypes&k != 0 {
428			continue
429		} else if t.typ != nil {
430			if !isAllowed {
431				s.addErr(errors.Newf(t.typ.Pos(),
432					"constraint not allowed because type %s is excluded",
433					coreTypeName[i],
434				))
435				continue
436			}
437			disjuncts = append(disjuncts, t.typ)
438		} else if types&k != 0 {
439			x := kindToAST(k)
440			if x != nil {
441				disjuncts = append(disjuncts, x)
442			}
443		}
444	}
445
446	conjuncts = append(conjuncts, s.all.constraints...)
447
448	obj := s.obj
449	if obj == nil {
450		obj, _ = s.types[objectType].typ.(*ast.StructLit)
451	}
452	if obj != nil {
453		// TODO: may need to explicitly close.
454		if !s.closeStruct {
455			obj.Elts = append(obj.Elts, &ast.Ellipsis{})
456		}
457	}
458
459	if len(disjuncts) > 0 {
460		conjuncts = append(conjuncts, ast.NewBinExpr(token.OR, disjuncts...))
461	}
462
463	if len(conjuncts) == 0 {
464		e = &ast.BottomLit{}
465	} else {
466		e = ast.NewBinExpr(token.AND, conjuncts...)
467	}
468
469	a := []ast.Expr{e}
470	if s.nullable != nil {
471		a = []ast.Expr{s.nullable, e}
472	}
473
474outer:
475	switch {
476	case s.default_ != nil:
477		// check conditions where default can be skipped.
478		switch x := s.default_.(type) {
479		case *ast.ListLit:
480			if s.usedTypes == cue.ListKind && len(x.Elts) == 0 {
481				break outer
482			}
483		}
484		a = append(a, &ast.UnaryExpr{Op: token.MUL, X: s.default_})
485	}
486
487	e = ast.NewBinExpr(token.OR, a...)
488
489	if len(s.definitions) > 0 {
490		if st, ok := e.(*ast.StructLit); ok {
491			st.Elts = append(st.Elts, s.definitions...)
492		} else {
493			st = ast.NewStruct()
494			st.Elts = append(st.Elts, &ast.EmbedDecl{Expr: e})
495			st.Elts = append(st.Elts, s.definitions...)
496			e = st
497		}
498	}
499
500	s.linkReferences()
501
502	return e
503}
504
505func isAny(s ast.Expr) bool {
506	i, ok := s.(*ast.Ident)
507	return ok && i.Name == "_"
508}
509
510func (s *state) comment() *ast.CommentGroup {
511	// Create documentation.
512	doc := strings.TrimSpace(s.title)
513	if s.description != "" {
514		if doc != "" {
515			doc += "\n\n"
516		}
517		doc += s.description
518		doc = strings.TrimSpace(doc)
519	}
520	// TODO: add examples as well?
521	if doc == "" {
522		return nil
523	}
524	return internal.NewComment(true, doc)
525}
526
527func (s *state) doc(n ast.Node) {
528	doc := s.comment()
529	if doc != nil {
530		ast.SetComments(n, []*ast.CommentGroup{doc})
531	}
532}
533
534func (s *state) schema(n cue.Value, idRef ...label) ast.Expr {
535	expr, _ := s.schemaState(n, allTypes, idRef, false)
536	// TODO: report unused doc.
537	return expr
538}
539
540// schemaState is a low-level API for schema. isLogical specifies whether the
541// caller is a logical operator like anyOf, allOf, oneOf, or not.
542func (s *state) schemaState(n cue.Value, types cue.Kind, idRef []label, isLogical bool) (ast.Expr, *state) {
543	state := &state{
544		up:           s,
545		isSchema:     s.isSchema,
546		decoder:      s.decoder,
547		allowedTypes: types,
548		path:         s.path,
549		idRef:        idRef,
550		pos:          n,
551	}
552	if isLogical {
553		state.parent = s
554	}
555
556	if n.Kind() != cue.StructKind {
557		return s.errf(n, "schema expects mapping node, found %s", n.Kind()), state
558	}
559
560	// do multiple passes over the constraints to ensure they are done in order.
561	for pass := 0; pass < 4; pass++ {
562		state.processMap(n, func(key string, value cue.Value) {
563			// Convert each constraint into a either a value or a functor.
564			c := constraintMap[key]
565			if c == nil {
566				if pass == 0 && s.cfg.Strict {
567					// TODO: value is not the correct position, albeit close. Fix this.
568					s.warnf(value.Pos(), "unsupported constraint %q", key)
569				}
570				return
571			}
572			if c.phase == pass {
573				c.fn(value, state)
574			}
575		})
576	}
577
578	return state.finalize(), state
579}
580
581func (s *state) value(n cue.Value) ast.Expr {
582	k := n.Kind()
583	s.usedTypes |= k
584	s.allowedTypes &= k
585	switch k {
586	case cue.ListKind:
587		a := []ast.Expr{}
588		for i, _ := n.List(); i.Next(); {
589			a = append(a, s.value(i.Value()))
590		}
591		return setPos(ast.NewList(a...), n)
592
593	case cue.StructKind:
594		a := []ast.Decl{}
595		s.processMap(n, func(key string, n cue.Value) {
596			a = append(a, &ast.Field{
597				Label: ast.NewString(key),
598				Value: s.value(n),
599			})
600		})
601		// TODO: only open when s.isSchema?
602		a = append(a, &ast.Ellipsis{})
603		return setPos(&ast.StructLit{Elts: a}, n)
604
605	default:
606		if !n.IsConcrete() {
607			s.errf(n, "invalid non-concrete value")
608		}
609		return n.Syntax(cue.Final()).(ast.Expr)
610	}
611}
612
613// processMap processes a yaml node, expanding merges.
614//
615// TODO: in some cases we can translate merges into CUE embeddings.
616// This may also prevent exponential blow-up (as may happen when
617// converting YAML to JSON).
618func (s *state) processMap(n cue.Value, f func(key string, n cue.Value)) {
619	saved := s.path
620	defer func() { s.path = saved }()
621
622	// TODO: intercept references to allow for optimized performance.
623	for i, _ := n.Fields(); i.Next(); {
624		key := i.Label()
625		s.path = append(saved, key)
626		f(key, i.Value())
627	}
628}
629
630func (s *state) listItems(name string, n cue.Value, allowEmpty bool) (a []cue.Value) {
631	if n.Kind() != cue.ListKind {
632		s.errf(n, `value of %q must be an array, found %v`, name, n.Kind())
633	}
634	for i, _ := n.List(); i.Next(); {
635		a = append(a, i.Value())
636	}
637	if !allowEmpty && len(a) == 0 {
638		s.errf(n, `array for %q must be non-empty`, name)
639	}
640	return a
641}
642
643// excludeFields returns a CUE expression that can be used to exclude the
644// fields of the given declaration in a label expression. For instance, for
645//
646//    { foo: 1, bar: int }
647//
648// it creates
649//
650//    "^(foo|bar)$"
651//
652// which can be used in a label expression to define types for all fields but
653// those existing:
654//
655//   [!~"^(foo|bar)$"]: string
656//
657func excludeFields(decls []ast.Decl) ast.Expr {
658	var a []string
659	for _, d := range decls {
660		f, ok := d.(*ast.Field)
661		if !ok {
662			continue
663		}
664		str, _, _ := ast.LabelName(f.Label)
665		if str != "" {
666			a = append(a, str)
667		}
668	}
669	re := fmt.Sprintf("^(%s)$", strings.Join(a, "|"))
670	return &ast.UnaryExpr{Op: token.NMAT, X: ast.NewString(re)}
671}
672
673func addTag(field ast.Label, tag, value string) *ast.Field {
674	return &ast.Field{
675		Label: field,
676		Value: ast.NewIdent("_"),
677		Attrs: []*ast.Attribute{
678			{Text: fmt.Sprintf("@%s(%s)", tag, value)},
679		},
680	}
681}
682
683func setPos(e ast.Expr, v cue.Value) ast.Expr {
684	ast.SetPos(e, v.Pos())
685	return e
686}
687