1// Copyright 2018 The CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package cmd
16
17import (
18	"fmt"
19	"io/ioutil"
20	"os"
21	"path/filepath"
22	"strings"
23	"unicode"
24
25	"github.com/spf13/cobra"
26
27	"cuelang.org/go/cue/ast"
28	"cuelang.org/go/cue/ast/astutil"
29	"cuelang.org/go/cue/build"
30	"cuelang.org/go/cue/errors"
31	"cuelang.org/go/cue/format"
32	"cuelang.org/go/cue/literal"
33	"cuelang.org/go/cue/load"
34	"cuelang.org/go/cue/parser"
35	"cuelang.org/go/cue/token"
36	"cuelang.org/go/encoding/json"
37	"cuelang.org/go/encoding/protobuf"
38	"cuelang.org/go/internal"
39	"cuelang.org/go/internal/third_party/yaml"
40)
41
42func newImportCmd(c *Command) *cobra.Command {
43	cmd := &cobra.Command{
44		Use:   "import [mode] [inputs]",
45		Short: "convert other formats to CUE files",
46		Long: `import converts other formats, like JSON and YAML to CUE files
47
48Files can either be specified explicitly, or inferred from the
49specified packages. Within packages, import only looks for JSON
50and YAML files by default (see the "filetypes" help topic for
51more info). This behavior can be overridden by specifying one of
52the following modes:
53
54   Mode       Extensions
55   json       Look for JSON files (.json, .jsonl, .ldjson).
56   yaml       Look for YAML files (.yaml .yml).
57   text       Look for text files (.txt).
58   binary     Look for files with extensions specified by --ext
59              and interpret them as binary.
60   jsonschema Interpret JSON, YAML or CUE files as JSON Schema.
61   openapi    Interpret JSON, YAML or CUE files as OpenAPI.
62   auto       Look for JSON or YAML files and interpret them as
63              data, JSON Schema, or OpenAPI, depending on
64              existing fields.
65   data       Look for JSON or YAML files and interpret them
66              as data.
67   proto      Convert Protocol buffer definition files and
68              transitive dependencies.
69
70Using the --ext flag in combination with a mode causes matched files to be
71interpreted as the format indicated by the mode, overriding any other meaning
72attributed to that extension.
73
74auto mode
75
76In auto mode, data files are interpreted based on some marker
77fields. JSON Schema is identified by a top-level "$schema" field
78with a URL of the form "https?://json-schema.org/.*schema#?".
79OpenAPI is identified by the existence of a top-level field
80"openapi", which must have a major semantic version of 3, and
81the info.title and info.version fields.
82
83
84proto mode
85
86Proto mode converts .proto files containing Prototcol Buffer
87definitions to CUE. The -I defines the path for includes. The
88module root is added implicitly if it exists.
89
90The package name for a converted file is derived from the
91go_package option. It can be overridden with the -p flag.
92
93A module root must be specified if a .proto files includes other
94files within the module. Files include from outside the module
95are also imported and stored within the cue.mod directory. The
96import path is defined by either the go_package option or, in the
97absence of this option, the googleapis.com/<proto package>
98convention.
99
100The following command imports all .proto files in all
101subdirectories as well all dependencies.
102
103   cue import proto -I ../include ./...
104
105The module root is implicitly added as an import path.
106
107
108Binary mode
109
110Loads matched files as binary.
111
112
113JSON/YAML mode
114
115The -f option allows overwriting of existing files. This only
116applies to files generated for explicitly specified files or
117files contained in explicitly specified packages.
118
119Use the -R option in addition to overwrite files generated for
120transitive dependencies (files written to cue.mod/gen/...).
121
122The -n option is a regexp used to filter file names in the
123matched package directories.
124
125The -I flag is used to specify import paths for proto mode.
126The module root is implicitly added as an import if it exists.
127
128Examples:
129
130  # Convert individual files:
131  $ cue import foo.json bar.json  # create foo.cue and bar.cue
132
133  # Convert all json files in the indicated directories:
134  $ cue import json ./...
135
136The "flags" help topic describes how to assign values to a
137specific path within a CUE namespace. Some examples of that
138
139Examples:
140
141  $ cat <<EOF > foo.yaml
142  kind: Service
143  name: booster
144  EOF
145
146  # include the parsed file as an emit value:
147  $ cue import foo.yaml
148  $ cat foo.cue
149  {
150      kind: Service
151      name: booster
152  }
153
154  # include the parsed file at the root of the CUE file:
155  $ cue import -f foo.yaml
156  $ cat foo.cue
157  kind: Service
158  name: booster
159
160  # include the import config at the mystuff path
161  $ cue import -f -l '"mystuff"' foo.yaml
162  $ cat foo.cue
163  myStuff: {
164      kind: Service
165      name: booster
166  }
167
168  # append another object to the input file
169  $ cat <<EOF >> foo.yaml
170  ---
171  kind: Deployment
172  name: booster
173  replicas: 1
174  EOF
175
176  # base the path values on the input
177  $ cue import -f -l 'strings.ToLower(kind)' -l name foo.yaml
178  $ cat foo.cue
179  service: booster: {
180      kind: "Service"
181      name: "booster"
182  }
183
184  # base the path values on the input and file name
185  $ cue import -f --with-context -l 'path.Base(filename)' -l data.kind foo.yaml
186  $ cat foo.cue
187  "foo.yaml": Service: {
188      kind: "Service"
189      name: "booster"
190  }
191
192  "foo.yaml": Deployment: {
193      kind:     "Deployment"
194      name:     "booster
195      replicas: 1
196  }
197
198  # include all files as list elements
199  $ cue import -f -list -foo.yaml
200  $ cat foo.cue
201  [{
202      kind: "Service"
203      name: "booster"
204  }, {
205      kind:     "Deployment"
206      name:     "booster
207      replicas: 1
208  }]
209
210  # collate files with the same path into a list
211  $ cue import -f -list -l 'strings.ToLower(kind)' foo.yaml
212  $ cat foo.cue
213  service: [{
214      kind: "Service"
215      name: "booster"
216  }
217  deployment: [{
218      kind:     "Deployment"
219      name:     "booster
220      replicas: 1
221  }]
222
223
224Embedded data files
225
226The --recursive or -R flag enables the parsing of fields that are string
227representations of data formats themselves. A field that can be parsed is
228replaced with a call encoding the data from a structured form that is placed
229in a sibling field.
230
231It is also possible to recursively hoist data formats:
232
233Example:
234  $ cat <<EOF > example.json
235  "a": {
236      "data": '{ "foo": 1, "bar": 2 }',
237  }
238  EOF
239
240  $ cue import -R example.json
241  $ cat example.cue
242  import "encoding/json"
243
244  a: {
245      data: json.Encode(_data),
246      _data = {
247          foo: 1
248          bar: 2
249      }
250  }
251`,
252		RunE: mkRunE(c, runImport),
253	}
254
255	addOutFlags(cmd.Flags(), false)
256	addOrphanFlags(cmd.Flags())
257
258	cmd.Flags().Bool(string(flagFiles), false, "split multiple entries into different files")
259	cmd.Flags().Bool(string(flagDryrun), false, "only run simulation")
260	cmd.Flags().BoolP(string(flagRecursive), "R", false, "recursively parse string values")
261	cmd.Flags().StringArray(string(flagExt), nil, "match files with these extensions")
262
263	return cmd
264}
265
266// TODO: factor out rooting of orphaned files.
267
268func runImport(cmd *Command, args []string) (err error) {
269	c := &config{
270		fileFilter:     `\.(json|yaml|yml|jsonl|ldjson)$`,
271		interpretation: build.Auto,
272		loadCfg:        &load.Config{DataFiles: true},
273	}
274
275	var mode string
276	extensions := flagExt.StringArray(cmd)
277	if len(args) >= 1 && !strings.ContainsAny(args[0], `/\:.`) {
278		c.interpretation = ""
279		if len(extensions) > 0 {
280			c.overrideDefault = true
281		}
282
283		mode = args[0]
284		args = args[1:]
285		c.encoding = build.Encoding(mode)
286		switch mode {
287		case "proto":
288			c.fileFilter = `\.proto$`
289		case "json":
290			c.fileFilter = `\.(json|jsonl|ldjson)$`
291		case "yaml":
292			c.fileFilter = `\.(yaml|yml)$`
293		case "text":
294			c.fileFilter = `\.txt$`
295		case "binary":
296			if len(extensions) == 0 {
297				return errors.Newf(token.NoPos,
298					"use of --ext flag required in binary mode")
299			}
300		case "auto", "openapi", "jsonschema":
301			c.interpretation = build.Interpretation(mode)
302			c.encoding = "yaml"
303		case "data":
304			// default mode for encoding/ no interpretation.
305			c.encoding = ""
306		default:
307			return errors.Newf(token.NoPos, "unknown mode %q", mode)
308		}
309	}
310	if len(extensions) > 0 {
311		c.fileFilter = `\.(` + strings.Join(extensions, "|") + `)$`
312	}
313
314	b, err := parseArgs(cmd, args, c)
315	exitOnErr(cmd, err, true)
316
317	switch mode {
318	default:
319		err = genericMode(cmd, b)
320	case "proto":
321		err = protoMode(b)
322	}
323
324	exitOnErr(cmd, err, true)
325	return nil
326}
327
328func protoMode(b *buildPlan) error {
329	var prev *build.Instance
330	root := ""
331	module := ""
332	protoFiles := []*build.File{}
333
334	for _, b := range b.insts {
335		hasProto := false
336		for _, f := range b.OrphanedFiles {
337			if f.Encoding == "proto" {
338				protoFiles = append(protoFiles, f)
339				hasProto = true
340			}
341		}
342		if !hasProto {
343			continue
344		}
345
346		// check dirs, all must have same root.
347		switch {
348		case root != "":
349			if b.Root != "" && root != b.Root {
350				return errors.Newf(token.NoPos,
351					"instances must have same root in proto mode; "+
352						"found %q (%s) and %q (%s)",
353					prev.Root, prev.DisplayPath, b.Root, b.DisplayPath)
354			}
355		case b.Root != "":
356			root = b.Root
357			module = b.Module
358			prev = b
359		}
360	}
361
362	c := &protobuf.Config{
363		Root:     root,
364		Module:   module,
365		Paths:    b.encConfig.ProtoPath,
366		PkgName:  b.encConfig.PkgName,
367		EnumMode: flagProtoEnum.String(b.cmd),
368	}
369	if module != "" {
370		// We only allow imports from packages within the module if an actual
371		// module is allowed.
372		c.Paths = append([]string{root}, c.Paths...)
373	}
374	p := protobuf.NewExtractor(c)
375	for _, f := range protoFiles {
376		_ = p.AddFile(f.Filename, f.Source)
377	}
378
379	files, err := p.Files()
380	if err != nil {
381		return err
382	}
383
384	modDir := ""
385	if root != "" {
386		modDir = internal.GenPath(root)
387	}
388
389	for _, f := range files {
390		// Only write the cue.mod files if they don't exist or if -Rf is used.
391		abs := f.Filename
392		if !filepath.IsAbs(abs) {
393			abs = filepath.Join(root, abs)
394		}
395		force := flagForce.Bool(b.cmd)
396		if flagRecursive.Bool(b.cmd) && strings.HasPrefix(abs, modDir) {
397			force = false
398		}
399
400		cueFile, err := getFilename(b, f, root, force)
401		if cueFile == "" {
402			return err
403		}
404		err = writeFile(b, f, cueFile)
405		if err != nil {
406			return err
407		}
408	}
409	return nil
410}
411
412func genericMode(cmd *Command, b *buildPlan) error {
413	pkgFlag := flagPackage.String(cmd)
414	for _, pkg := range b.insts {
415		pkgName := pkgFlag
416		if pkgName == "" {
417			pkgName = pkg.PkgName
418		}
419		// TODO: allow if there is a unique package name.
420		if pkgName == "" && len(b.insts) > 1 {
421			err := fmt.Errorf("must specify package name with the -p flag")
422			exitOnErr(cmd, err, true)
423		}
424	}
425
426	for _, f := range b.imported {
427		err := handleFile(b, f)
428		if err != nil {
429			return err
430		}
431	}
432	return nil
433}
434
435func getFilename(b *buildPlan, f *ast.File, root string, force bool) (filename string, err error) {
436	cueFile := f.Filename
437	if out := flagOutFile.String(b.cmd); out != "" {
438		cueFile = out
439	}
440
441	if cueFile != "-" {
442		switch _, err := os.Stat(cueFile); {
443		case os.IsNotExist(err):
444		case err == nil:
445			if !force {
446				// TODO: mimic old behavior: write to stderr, but do not exit
447				// with error code. Consider what is best to do here.
448				stderr := b.cmd.Command.OutOrStderr()
449				if root != "" {
450					cueFile, _ = filepath.Rel(root, cueFile)
451				}
452				fmt.Fprintf(stderr, "Skipping file %q: already exists.\n",
453					filepath.ToSlash(cueFile))
454				if strings.HasPrefix(cueFile, "cue.mod") {
455					fmt.Fprintln(stderr, "Use -Rf to override.")
456				} else {
457					fmt.Fprintln(stderr, "Use -f to override.")
458				}
459				return "", nil
460			}
461		default:
462			return "", fmt.Errorf("error creating file: %v", err)
463		}
464	}
465	return cueFile, nil
466}
467
468func handleFile(b *buildPlan, f *ast.File) (err error) {
469	// TODO: fill out root.
470	cueFile, err := getFilename(b, f, "", flagForce.Bool(b.cmd))
471	if cueFile == "" {
472		return err
473	}
474
475	if flagRecursive.Bool(b.cmd) {
476		h := hoister{fields: map[string]bool{}}
477		h.hoist(f)
478	}
479
480	return writeFile(b, f, cueFile)
481}
482
483func writeFile(p *buildPlan, f *ast.File, cueFile string) error {
484	b, err := format.Node(f, format.Simplify())
485	if err != nil {
486		return fmt.Errorf("error formatting file: %v", err)
487	}
488
489	if cueFile == "-" {
490		_, err := p.cmd.OutOrStdout().Write(b)
491		return err
492	}
493	_ = os.MkdirAll(filepath.Dir(cueFile), 0755)
494	return ioutil.WriteFile(cueFile, b, 0644)
495}
496
497type hoister struct {
498	fields map[string]bool
499}
500
501func (h *hoister) hoist(f *ast.File) {
502	ast.Walk(f, nil, func(n ast.Node) {
503		name := ""
504		switch x := n.(type) {
505		case *ast.Field:
506			name, _, _ = ast.LabelName(x.Label)
507		case *ast.Alias:
508			name = x.Ident.Name
509		case *ast.LetClause:
510			name = x.Ident.Name
511		}
512		if name != "" {
513			h.fields[name] = true
514		}
515	})
516
517	_ = astutil.Apply(f, func(c astutil.Cursor) bool {
518		n := c.Node()
519		switch n.(type) {
520		case *ast.Comprehension:
521			return false
522		}
523		return true
524
525	}, func(c astutil.Cursor) bool {
526		switch f := c.Node().(type) {
527		case *ast.Field:
528			name, _, _ := ast.LabelName(f.Label)
529			if name == "" {
530				return false
531			}
532
533			lit, ok := f.Value.(*ast.BasicLit)
534			if !ok || lit.Kind != token.STRING {
535				return false
536			}
537
538			str, err := literal.Unquote(lit.Value)
539			if err != nil {
540				return false
541			}
542
543			expr, enc := tryParse(str)
544			if expr == nil {
545				return false
546			}
547
548			pkg := c.Import("encoding/" + enc)
549			if pkg == nil {
550				return false
551			}
552
553			// found a replacable string
554			dataField := h.uniqueName(name, "_", "cue_")
555
556			f.Value = ast.NewCall(
557				ast.NewSel(pkg, "Marshal"),
558				ast.NewIdent(dataField))
559
560			// TODO: use definitions instead
561			c.InsertAfter(astutil.ApplyRecursively(&ast.LetClause{
562				Ident: ast.NewIdent(dataField),
563				Expr:  expr,
564			}))
565		}
566		return true
567	})
568}
569
570func tryParse(str string) (s ast.Expr, pkg string) {
571	b := []byte(str)
572	if json.Valid(b) {
573		expr, err := parser.ParseExpr("", b)
574		if err != nil {
575			// TODO: report error
576			return nil, ""
577		}
578		switch expr.(type) {
579		case *ast.StructLit, *ast.ListLit:
580		default:
581			return nil, ""
582		}
583		return expr, "json"
584	}
585
586	if expr, err := yaml.Unmarshal("", b); err == nil {
587		switch expr.(type) {
588		case *ast.StructLit, *ast.ListLit:
589		default:
590			return nil, ""
591		}
592		return expr, "yaml"
593	}
594
595	return nil, ""
596}
597
598func (h *hoister) uniqueName(base, prefix, typ string) string {
599	base = strings.Map(func(r rune) rune {
600		if unicode.In(r, unicode.L, unicode.N) {
601			return r
602		}
603		return '_'
604	}, base)
605
606	name := prefix + typ + base
607	for {
608		if !h.fields[name] {
609			h.fields[name] = true
610			return name
611		}
612		name = prefix + typ + base
613		typ += "x"
614	}
615}
616