1/*
2Copyright 2014 The Kubernetes Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package yaml
18
19import (
20	"bufio"
21	"bytes"
22	"encoding/json"
23	"fmt"
24	"io"
25	"strings"
26	"unicode"
27
28	jsonutil "k8s.io/apimachinery/pkg/util/json"
29
30	"sigs.k8s.io/yaml"
31)
32
33// Unmarshal unmarshals the given data
34// If v is a *map[string]interface{}, *[]interface{}, or *interface{} numbers
35// are converted to int64 or float64
36func Unmarshal(data []byte, v interface{}) error {
37	preserveIntFloat := func(d *json.Decoder) *json.Decoder {
38		d.UseNumber()
39		return d
40	}
41	switch v := v.(type) {
42	case *map[string]interface{}:
43		if err := yaml.Unmarshal(data, v, preserveIntFloat); err != nil {
44			return err
45		}
46		return jsonutil.ConvertMapNumbers(*v, 0)
47	case *[]interface{}:
48		if err := yaml.Unmarshal(data, v, preserveIntFloat); err != nil {
49			return err
50		}
51		return jsonutil.ConvertSliceNumbers(*v, 0)
52	case *interface{}:
53		if err := yaml.Unmarshal(data, v, preserveIntFloat); err != nil {
54			return err
55		}
56		return jsonutil.ConvertInterfaceNumbers(v, 0)
57	default:
58		return yaml.Unmarshal(data, v)
59	}
60}
61
62// ToJSON converts a single YAML document into a JSON document
63// or returns an error. If the document appears to be JSON the
64// YAML decoding path is not used (so that error messages are
65// JSON specific).
66func ToJSON(data []byte) ([]byte, error) {
67	if hasJSONPrefix(data) {
68		return data, nil
69	}
70	return yaml.YAMLToJSON(data)
71}
72
73// YAMLToJSONDecoder decodes YAML documents from an io.Reader by
74// separating individual documents. It first converts the YAML
75// body to JSON, then unmarshals the JSON.
76type YAMLToJSONDecoder struct {
77	reader Reader
78}
79
80// NewYAMLToJSONDecoder decodes YAML documents from the provided
81// stream in chunks by converting each document (as defined by
82// the YAML spec) into its own chunk, converting it to JSON via
83// yaml.YAMLToJSON, and then passing it to json.Decoder.
84func NewYAMLToJSONDecoder(r io.Reader) *YAMLToJSONDecoder {
85	reader := bufio.NewReader(r)
86	return &YAMLToJSONDecoder{
87		reader: NewYAMLReader(reader),
88	}
89}
90
91// Decode reads a YAML document as JSON from the stream or returns
92// an error. The decoding rules match json.Unmarshal, not
93// yaml.Unmarshal.
94func (d *YAMLToJSONDecoder) Decode(into interface{}) error {
95	bytes, err := d.reader.Read()
96	if err != nil && err != io.EOF {
97		return err
98	}
99
100	if len(bytes) != 0 {
101		err := yaml.Unmarshal(bytes, into)
102		if err != nil {
103			return YAMLSyntaxError{err}
104		}
105	}
106	return err
107}
108
109// YAMLDecoder reads chunks of objects and returns ErrShortBuffer if
110// the data is not sufficient.
111type YAMLDecoder struct {
112	r         io.ReadCloser
113	scanner   *bufio.Scanner
114	remaining []byte
115}
116
117// NewDocumentDecoder decodes YAML documents from the provided
118// stream in chunks by converting each document (as defined by
119// the YAML spec) into its own chunk. io.ErrShortBuffer will be
120// returned if the entire buffer could not be read to assist
121// the caller in framing the chunk.
122func NewDocumentDecoder(r io.ReadCloser) io.ReadCloser {
123	scanner := bufio.NewScanner(r)
124	// the size of initial allocation for buffer 4k
125	buf := make([]byte, 4*1024)
126	// the maximum size used to buffer a token 5M
127	scanner.Buffer(buf, 5*1024*1024)
128	scanner.Split(splitYAMLDocument)
129	return &YAMLDecoder{
130		r:       r,
131		scanner: scanner,
132	}
133}
134
135// Read reads the previous slice into the buffer, or attempts to read
136// the next chunk.
137// TODO: switch to readline approach.
138func (d *YAMLDecoder) Read(data []byte) (n int, err error) {
139	left := len(d.remaining)
140	if left == 0 {
141		// return the next chunk from the stream
142		if !d.scanner.Scan() {
143			err := d.scanner.Err()
144			if err == nil {
145				err = io.EOF
146			}
147			return 0, err
148		}
149		out := d.scanner.Bytes()
150		d.remaining = out
151		left = len(out)
152	}
153
154	// fits within data
155	if left <= len(data) {
156		copy(data, d.remaining)
157		d.remaining = nil
158		return left, nil
159	}
160
161	// caller will need to reread
162	copy(data, d.remaining[:len(data)])
163	d.remaining = d.remaining[len(data):]
164	return len(data), io.ErrShortBuffer
165}
166
167func (d *YAMLDecoder) Close() error {
168	return d.r.Close()
169}
170
171const yamlSeparator = "\n---"
172const separator = "---"
173
174// splitYAMLDocument is a bufio.SplitFunc for splitting YAML streams into individual documents.
175func splitYAMLDocument(data []byte, atEOF bool) (advance int, token []byte, err error) {
176	if atEOF && len(data) == 0 {
177		return 0, nil, nil
178	}
179	sep := len([]byte(yamlSeparator))
180	if i := bytes.Index(data, []byte(yamlSeparator)); i >= 0 {
181		// We have a potential document terminator
182		i += sep
183		after := data[i:]
184		if len(after) == 0 {
185			// we can't read any more characters
186			if atEOF {
187				return len(data), data[:len(data)-sep], nil
188			}
189			return 0, nil, nil
190		}
191		if j := bytes.IndexByte(after, '\n'); j >= 0 {
192			return i + j + 1, data[0 : i-sep], nil
193		}
194		return 0, nil, nil
195	}
196	// If we're at EOF, we have a final, non-terminated line. Return it.
197	if atEOF {
198		return len(data), data, nil
199	}
200	// Request more data.
201	return 0, nil, nil
202}
203
204// decoder is a convenience interface for Decode.
205type decoder interface {
206	Decode(into interface{}) error
207}
208
209// YAMLOrJSONDecoder attempts to decode a stream of JSON documents or
210// YAML documents by sniffing for a leading { character.
211type YAMLOrJSONDecoder struct {
212	r          io.Reader
213	bufferSize int
214
215	decoder decoder
216}
217
218type JSONSyntaxError struct {
219	Offset int64
220	Err    error
221}
222
223func (e JSONSyntaxError) Error() string {
224	return fmt.Sprintf("json: offset %d: %s", e.Offset, e.Err.Error())
225}
226
227type YAMLSyntaxError struct {
228	err error
229}
230
231func (e YAMLSyntaxError) Error() string {
232	return e.err.Error()
233}
234
235// NewYAMLOrJSONDecoder returns a decoder that will process YAML documents
236// or JSON documents from the given reader as a stream. bufferSize determines
237// how far into the stream the decoder will look to figure out whether this
238// is a JSON stream (has whitespace followed by an open brace).
239func NewYAMLOrJSONDecoder(r io.Reader, bufferSize int) *YAMLOrJSONDecoder {
240	return &YAMLOrJSONDecoder{
241		r:          r,
242		bufferSize: bufferSize,
243	}
244}
245
246// Decode unmarshals the next object from the underlying stream into the
247// provide object, or returns an error.
248func (d *YAMLOrJSONDecoder) Decode(into interface{}) error {
249	if d.decoder == nil {
250		buffer, _, isJSON := GuessJSONStream(d.r, d.bufferSize)
251		if isJSON {
252			d.decoder = json.NewDecoder(buffer)
253		} else {
254			d.decoder = NewYAMLToJSONDecoder(buffer)
255		}
256	}
257	err := d.decoder.Decode(into)
258	if syntax, ok := err.(*json.SyntaxError); ok {
259		return JSONSyntaxError{
260			Offset: syntax.Offset,
261			Err:    syntax,
262		}
263	}
264	return err
265}
266
267type Reader interface {
268	Read() ([]byte, error)
269}
270
271type YAMLReader struct {
272	reader Reader
273}
274
275func NewYAMLReader(r *bufio.Reader) *YAMLReader {
276	return &YAMLReader{
277		reader: &LineReader{reader: r},
278	}
279}
280
281// Read returns a full YAML document.
282func (r *YAMLReader) Read() ([]byte, error) {
283	var buffer bytes.Buffer
284	for {
285		line, err := r.reader.Read()
286		if err != nil && err != io.EOF {
287			return nil, err
288		}
289
290		sep := len([]byte(separator))
291		if i := bytes.Index(line, []byte(separator)); i == 0 {
292			// We have a potential document terminator
293			i += sep
294			trimmed := strings.TrimSpace(string(line[i:]))
295			// We only allow comments and spaces following the yaml doc separator, otherwise we'll return an error
296			if len(trimmed) > 0 && string(trimmed[0]) != "#" {
297				return nil, YAMLSyntaxError{
298					err: fmt.Errorf("invalid Yaml document separator: %s", trimmed),
299				}
300			}
301			if buffer.Len() != 0 {
302				return buffer.Bytes(), nil
303			}
304			if err == io.EOF {
305				return nil, err
306			}
307		}
308		if err == io.EOF {
309			if buffer.Len() != 0 {
310				// If we're at EOF, we have a final, non-terminated line. Return it.
311				return buffer.Bytes(), nil
312			}
313			return nil, err
314		}
315		buffer.Write(line)
316	}
317}
318
319type LineReader struct {
320	reader *bufio.Reader
321}
322
323// Read returns a single line (with '\n' ended) from the underlying reader.
324// An error is returned iff there is an error with the underlying reader.
325func (r *LineReader) Read() ([]byte, error) {
326	var (
327		isPrefix bool  = true
328		err      error = nil
329		line     []byte
330		buffer   bytes.Buffer
331	)
332
333	for isPrefix && err == nil {
334		line, isPrefix, err = r.reader.ReadLine()
335		buffer.Write(line)
336	}
337	buffer.WriteByte('\n')
338	return buffer.Bytes(), err
339}
340
341// GuessJSONStream scans the provided reader up to size, looking
342// for an open brace indicating this is JSON. It will return the
343// bufio.Reader it creates for the consumer.
344func GuessJSONStream(r io.Reader, size int) (io.Reader, []byte, bool) {
345	buffer := bufio.NewReaderSize(r, size)
346	b, _ := buffer.Peek(size)
347	return buffer, b, hasJSONPrefix(b)
348}
349
350// IsJSONBuffer scans the provided buffer, looking
351// for an open brace indicating this is JSON.
352func IsJSONBuffer(buf []byte) bool {
353	return hasJSONPrefix(buf)
354}
355
356var jsonPrefix = []byte("{")
357
358// hasJSONPrefix returns true if the provided buffer appears to start with
359// a JSON open brace.
360func hasJSONPrefix(buf []byte) bool {
361	return hasPrefix(buf, jsonPrefix)
362}
363
364// Return true if the first non-whitespace bytes in buf is
365// prefix.
366func hasPrefix(buf []byte, prefix []byte) bool {
367	trim := bytes.TrimLeftFunc(buf, unicode.IsSpace)
368	return bytes.HasPrefix(trim, prefix)
369}
370