1package yaml
2
3import (
4	"encoding"
5	"encoding/base64"
6	"fmt"
7	"io"
8	"math"
9	"reflect"
10	"strconv"
11	"time"
12)
13
14const (
15	documentNode = 1 << iota
16	mappingNode
17	sequenceNode
18	scalarNode
19	aliasNode
20)
21
22type node struct {
23	kind         int
24	line, column int
25	tag          string
26	// For an alias node, alias holds the resolved alias.
27	alias    *node
28	value    string
29	implicit bool
30	children []*node
31	anchors  map[string]*node
32}
33
34// ----------------------------------------------------------------------------
35// Parser, produces a node tree out of a libyaml event stream.
36
37type parser struct {
38	parser   yaml_parser_t
39	event    yaml_event_t
40	doc      *node
41	doneInit bool
42}
43
44func newParser(b []byte) *parser {
45	p := parser{}
46	if !yaml_parser_initialize(&p.parser) {
47		panic("failed to initialize YAML emitter")
48	}
49	if len(b) == 0 {
50		b = []byte{'\n'}
51	}
52	yaml_parser_set_input_string(&p.parser, b)
53	return &p
54}
55
56func newParserFromReader(r io.Reader) *parser {
57	p := parser{}
58	if !yaml_parser_initialize(&p.parser) {
59		panic("failed to initialize YAML emitter")
60	}
61	yaml_parser_set_input_reader(&p.parser, r)
62	return &p
63}
64
65func (p *parser) init() {
66	if p.doneInit {
67		return
68	}
69	p.expect(yaml_STREAM_START_EVENT)
70	p.doneInit = true
71}
72
73func (p *parser) destroy() {
74	if p.event.typ != yaml_NO_EVENT {
75		yaml_event_delete(&p.event)
76	}
77	yaml_parser_delete(&p.parser)
78}
79
80// expect consumes an event from the event stream and
81// checks that it's of the expected type.
82func (p *parser) expect(e yaml_event_type_t) {
83	if p.event.typ == yaml_NO_EVENT {
84		if !yaml_parser_parse(&p.parser, &p.event) {
85			p.fail()
86		}
87	}
88	if p.event.typ == yaml_STREAM_END_EVENT {
89		failf("attempted to go past the end of stream; corrupted value?")
90	}
91	if p.event.typ != e {
92		p.parser.problem = fmt.Sprintf("expected %s event but got %s", e, p.event.typ)
93		p.fail()
94	}
95	yaml_event_delete(&p.event)
96	p.event.typ = yaml_NO_EVENT
97}
98
99// peek peeks at the next event in the event stream,
100// puts the results into p.event and returns the event type.
101func (p *parser) peek() yaml_event_type_t {
102	if p.event.typ != yaml_NO_EVENT {
103		return p.event.typ
104	}
105	if !yaml_parser_parse(&p.parser, &p.event) {
106		p.fail()
107	}
108	return p.event.typ
109}
110
111func (p *parser) fail() {
112	var where string
113	var line int
114	if p.parser.problem_mark.line != 0 {
115		line = p.parser.problem_mark.line
116		// Scanner errors don't iterate line before returning error
117		if p.parser.error == yaml_SCANNER_ERROR {
118			line++
119		}
120	} else if p.parser.context_mark.line != 0 {
121		line = p.parser.context_mark.line
122	}
123	if line != 0 {
124		where = "line " + strconv.Itoa(line) + ": "
125	}
126	var msg string
127	if len(p.parser.problem) > 0 {
128		msg = p.parser.problem
129	} else {
130		msg = "unknown problem parsing YAML content"
131	}
132	failf("%s%s", where, msg)
133}
134
135func (p *parser) anchor(n *node, anchor []byte) {
136	if anchor != nil {
137		p.doc.anchors[string(anchor)] = n
138	}
139}
140
141func (p *parser) parse() *node {
142	p.init()
143	switch p.peek() {
144	case yaml_SCALAR_EVENT:
145		return p.scalar()
146	case yaml_ALIAS_EVENT:
147		return p.alias()
148	case yaml_MAPPING_START_EVENT:
149		return p.mapping()
150	case yaml_SEQUENCE_START_EVENT:
151		return p.sequence()
152	case yaml_DOCUMENT_START_EVENT:
153		return p.document()
154	case yaml_STREAM_END_EVENT:
155		// Happens when attempting to decode an empty buffer.
156		return nil
157	default:
158		panic("attempted to parse unknown event: " + p.event.typ.String())
159	}
160}
161
162func (p *parser) node(kind int) *node {
163	return &node{
164		kind:   kind,
165		line:   p.event.start_mark.line,
166		column: p.event.start_mark.column,
167	}
168}
169
170func (p *parser) document() *node {
171	n := p.node(documentNode)
172	n.anchors = make(map[string]*node)
173	p.doc = n
174	p.expect(yaml_DOCUMENT_START_EVENT)
175	n.children = append(n.children, p.parse())
176	p.expect(yaml_DOCUMENT_END_EVENT)
177	return n
178}
179
180func (p *parser) alias() *node {
181	n := p.node(aliasNode)
182	n.value = string(p.event.anchor)
183	n.alias = p.doc.anchors[n.value]
184	if n.alias == nil {
185		failf("unknown anchor '%s' referenced", n.value)
186	}
187	p.expect(yaml_ALIAS_EVENT)
188	return n
189}
190
191func (p *parser) scalar() *node {
192	n := p.node(scalarNode)
193	n.value = string(p.event.value)
194	n.tag = string(p.event.tag)
195	n.implicit = p.event.implicit
196	p.anchor(n, p.event.anchor)
197	p.expect(yaml_SCALAR_EVENT)
198	return n
199}
200
201func (p *parser) sequence() *node {
202	n := p.node(sequenceNode)
203	p.anchor(n, p.event.anchor)
204	p.expect(yaml_SEQUENCE_START_EVENT)
205	for p.peek() != yaml_SEQUENCE_END_EVENT {
206		n.children = append(n.children, p.parse())
207	}
208	p.expect(yaml_SEQUENCE_END_EVENT)
209	return n
210}
211
212func (p *parser) mapping() *node {
213	n := p.node(mappingNode)
214	p.anchor(n, p.event.anchor)
215	p.expect(yaml_MAPPING_START_EVENT)
216	for p.peek() != yaml_MAPPING_END_EVENT {
217		n.children = append(n.children, p.parse(), p.parse())
218	}
219	p.expect(yaml_MAPPING_END_EVENT)
220	return n
221}
222
223// ----------------------------------------------------------------------------
224// Decoder, unmarshals a node into a provided value.
225
226type decoder struct {
227	doc     *node
228	aliases map[*node]bool
229	mapType reflect.Type
230	terrors []string
231	strict  bool
232}
233
234var (
235	mapItemType    = reflect.TypeOf(MapItem{})
236	durationType   = reflect.TypeOf(time.Duration(0))
237	defaultMapType = reflect.TypeOf(map[interface{}]interface{}{})
238	ifaceType      = defaultMapType.Elem()
239	timeType       = reflect.TypeOf(time.Time{})
240	ptrTimeType    = reflect.TypeOf(&time.Time{})
241)
242
243func newDecoder(strict bool) *decoder {
244	d := &decoder{mapType: defaultMapType, strict: strict}
245	d.aliases = make(map[*node]bool)
246	return d
247}
248
249func (d *decoder) terror(n *node, tag string, out reflect.Value) {
250	if n.tag != "" {
251		tag = n.tag
252	}
253	value := n.value
254	if tag != yaml_SEQ_TAG && tag != yaml_MAP_TAG {
255		if len(value) > 10 {
256			value = " `" + value[:7] + "...`"
257		} else {
258			value = " `" + value + "`"
259		}
260	}
261	d.terrors = append(d.terrors, fmt.Sprintf("line %d: cannot unmarshal %s%s into %s", n.line+1, shortTag(tag), value, out.Type()))
262}
263
264func (d *decoder) callUnmarshaler(n *node, u Unmarshaler) (good bool) {
265	terrlen := len(d.terrors)
266	err := u.UnmarshalYAML(func(v interface{}) (err error) {
267		defer handleErr(&err)
268		d.unmarshal(n, reflect.ValueOf(v))
269		if len(d.terrors) > terrlen {
270			issues := d.terrors[terrlen:]
271			d.terrors = d.terrors[:terrlen]
272			return &TypeError{issues}
273		}
274		return nil
275	})
276	if e, ok := err.(*TypeError); ok {
277		d.terrors = append(d.terrors, e.Errors...)
278		return false
279	}
280	if err != nil {
281		fail(err)
282	}
283	return true
284}
285
286// d.prepare initializes and dereferences pointers and calls UnmarshalYAML
287// if a value is found to implement it.
288// It returns the initialized and dereferenced out value, whether
289// unmarshalling was already done by UnmarshalYAML, and if so whether
290// its types unmarshalled appropriately.
291//
292// If n holds a null value, prepare returns before doing anything.
293func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unmarshaled, good bool) {
294	if n.tag == yaml_NULL_TAG || n.kind == scalarNode && n.tag == "" && (n.value == "null" || n.value == "~" || n.value == "" && n.implicit) {
295		return out, false, false
296	}
297	again := true
298	for again {
299		again = false
300		if out.Kind() == reflect.Ptr {
301			if out.IsNil() {
302				out.Set(reflect.New(out.Type().Elem()))
303			}
304			out = out.Elem()
305			again = true
306		}
307		if out.CanAddr() {
308			if u, ok := out.Addr().Interface().(Unmarshaler); ok {
309				good = d.callUnmarshaler(n, u)
310				return out, true, good
311			}
312		}
313	}
314	return out, false, false
315}
316
317func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
318	switch n.kind {
319	case documentNode:
320		return d.document(n, out)
321	case aliasNode:
322		return d.alias(n, out)
323	}
324	out, unmarshaled, good := d.prepare(n, out)
325	if unmarshaled {
326		return good
327	}
328	switch n.kind {
329	case scalarNode:
330		good = d.scalar(n, out)
331	case mappingNode:
332		good = d.mapping(n, out)
333	case sequenceNode:
334		good = d.sequence(n, out)
335	default:
336		panic("internal error: unknown node kind: " + strconv.Itoa(n.kind))
337	}
338	return good
339}
340
341func (d *decoder) document(n *node, out reflect.Value) (good bool) {
342	if len(n.children) == 1 {
343		d.doc = n
344		d.unmarshal(n.children[0], out)
345		return true
346	}
347	return false
348}
349
350func (d *decoder) alias(n *node, out reflect.Value) (good bool) {
351	if d.aliases[n] {
352		// TODO this could actually be allowed in some circumstances.
353		failf("anchor '%s' value contains itself", n.value)
354	}
355	d.aliases[n] = true
356	good = d.unmarshal(n.alias, out)
357	delete(d.aliases, n)
358	return good
359}
360
361var zeroValue reflect.Value
362
363func resetMap(out reflect.Value) {
364	for _, k := range out.MapKeys() {
365		out.SetMapIndex(k, zeroValue)
366	}
367}
368
369func (d *decoder) scalar(n *node, out reflect.Value) bool {
370	var tag string
371	var resolved interface{}
372	if n.tag == "" && !n.implicit {
373		tag = yaml_STR_TAG
374		resolved = n.value
375	} else {
376		tag, resolved = resolve(n.tag, n.value)
377		if tag == yaml_BINARY_TAG {
378			data, err := base64.StdEncoding.DecodeString(resolved.(string))
379			if err != nil {
380				failf("!!binary value contains invalid base64 data")
381			}
382			resolved = string(data)
383		}
384	}
385	if resolved == nil {
386		if out.Kind() == reflect.Map && !out.CanAddr() {
387			resetMap(out)
388		} else {
389			out.Set(reflect.Zero(out.Type()))
390		}
391		return true
392	}
393	if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() {
394		// We've resolved to exactly the type we want, so use that.
395		out.Set(resolvedv)
396		return true
397	}
398	// Perhaps we can use the value as a TextUnmarshaler to
399	// set its value.
400	if out.CanAddr() {
401		u, ok := out.Addr().Interface().(encoding.TextUnmarshaler)
402		if ok {
403			var text []byte
404			if tag == yaml_BINARY_TAG {
405				text = []byte(resolved.(string))
406			} else {
407				// We let any value be unmarshaled into TextUnmarshaler.
408				// That might be more lax than we'd like, but the
409				// TextUnmarshaler itself should bowl out any dubious values.
410				text = []byte(n.value)
411			}
412			err := u.UnmarshalText(text)
413			if err != nil {
414				fail(err)
415			}
416			return true
417		}
418	}
419	switch out.Kind() {
420	case reflect.String:
421		if tag == yaml_BINARY_TAG {
422			out.SetString(resolved.(string))
423			return true
424		}
425		if resolved != nil {
426			out.SetString(n.value)
427			return true
428		}
429	case reflect.Interface:
430		if resolved == nil {
431			out.Set(reflect.Zero(out.Type()))
432		} else if tag == yaml_TIMESTAMP_TAG {
433			// It looks like a timestamp but for backward compatibility
434			// reasons we set it as a string, so that code that unmarshals
435			// timestamp-like values into interface{} will continue to
436			// see a string and not a time.Time.
437			// TODO(v3) Drop this.
438			out.Set(reflect.ValueOf(n.value))
439		} else {
440			out.Set(reflect.ValueOf(resolved))
441		}
442		return true
443	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
444		switch resolved := resolved.(type) {
445		case int:
446			if !out.OverflowInt(int64(resolved)) {
447				out.SetInt(int64(resolved))
448				return true
449			}
450		case int64:
451			if !out.OverflowInt(resolved) {
452				out.SetInt(resolved)
453				return true
454			}
455		case uint64:
456			if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) {
457				out.SetInt(int64(resolved))
458				return true
459			}
460		case float64:
461			if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) {
462				out.SetInt(int64(resolved))
463				return true
464			}
465		case string:
466			if out.Type() == durationType {
467				d, err := time.ParseDuration(resolved)
468				if err == nil {
469					out.SetInt(int64(d))
470					return true
471				}
472			}
473		}
474	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
475		switch resolved := resolved.(type) {
476		case int:
477			if resolved >= 0 && !out.OverflowUint(uint64(resolved)) {
478				out.SetUint(uint64(resolved))
479				return true
480			}
481		case int64:
482			if resolved >= 0 && !out.OverflowUint(uint64(resolved)) {
483				out.SetUint(uint64(resolved))
484				return true
485			}
486		case uint64:
487			if !out.OverflowUint(uint64(resolved)) {
488				out.SetUint(uint64(resolved))
489				return true
490			}
491		case float64:
492			if resolved <= math.MaxUint64 && !out.OverflowUint(uint64(resolved)) {
493				out.SetUint(uint64(resolved))
494				return true
495			}
496		}
497	case reflect.Bool:
498		switch resolved := resolved.(type) {
499		case bool:
500			out.SetBool(resolved)
501			return true
502		}
503	case reflect.Float32, reflect.Float64:
504		switch resolved := resolved.(type) {
505		case int:
506			out.SetFloat(float64(resolved))
507			return true
508		case int64:
509			out.SetFloat(float64(resolved))
510			return true
511		case uint64:
512			out.SetFloat(float64(resolved))
513			return true
514		case float64:
515			out.SetFloat(resolved)
516			return true
517		}
518	case reflect.Struct:
519		if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() {
520			out.Set(resolvedv)
521			return true
522		}
523	case reflect.Ptr:
524		if out.Type().Elem() == reflect.TypeOf(resolved) {
525			// TODO DOes this make sense? When is out a Ptr except when decoding a nil value?
526			elem := reflect.New(out.Type().Elem())
527			elem.Elem().Set(reflect.ValueOf(resolved))
528			out.Set(elem)
529			return true
530		}
531	}
532	d.terror(n, tag, out)
533	return false
534}
535
536func settableValueOf(i interface{}) reflect.Value {
537	v := reflect.ValueOf(i)
538	sv := reflect.New(v.Type()).Elem()
539	sv.Set(v)
540	return sv
541}
542
543func (d *decoder) sequence(n *node, out reflect.Value) (good bool) {
544	l := len(n.children)
545
546	var iface reflect.Value
547	switch out.Kind() {
548	case reflect.Slice:
549		out.Set(reflect.MakeSlice(out.Type(), l, l))
550	case reflect.Array:
551		if l != out.Len() {
552			failf("invalid array: want %d elements but got %d", out.Len(), l)
553		}
554	case reflect.Interface:
555		// No type hints. Will have to use a generic sequence.
556		iface = out
557		out = settableValueOf(make([]interface{}, l))
558	default:
559		d.terror(n, yaml_SEQ_TAG, out)
560		return false
561	}
562	et := out.Type().Elem()
563
564	j := 0
565	for i := 0; i < l; i++ {
566		e := reflect.New(et).Elem()
567		if ok := d.unmarshal(n.children[i], e); ok {
568			out.Index(j).Set(e)
569			j++
570		}
571	}
572	if out.Kind() != reflect.Array {
573		out.Set(out.Slice(0, j))
574	}
575	if iface.IsValid() {
576		iface.Set(out)
577	}
578	return true
579}
580
581func (d *decoder) mapping(n *node, out reflect.Value) (good bool) {
582	switch out.Kind() {
583	case reflect.Struct:
584		return d.mappingStruct(n, out)
585	case reflect.Slice:
586		return d.mappingSlice(n, out)
587	case reflect.Map:
588		// okay
589	case reflect.Interface:
590		if d.mapType.Kind() == reflect.Map {
591			iface := out
592			out = reflect.MakeMap(d.mapType)
593			iface.Set(out)
594		} else {
595			slicev := reflect.New(d.mapType).Elem()
596			if !d.mappingSlice(n, slicev) {
597				return false
598			}
599			out.Set(slicev)
600			return true
601		}
602	default:
603		d.terror(n, yaml_MAP_TAG, out)
604		return false
605	}
606	outt := out.Type()
607	kt := outt.Key()
608	et := outt.Elem()
609
610	mapType := d.mapType
611	if outt.Key() == ifaceType && outt.Elem() == ifaceType {
612		d.mapType = outt
613	}
614
615	if out.IsNil() {
616		out.Set(reflect.MakeMap(outt))
617	}
618	l := len(n.children)
619	for i := 0; i < l; i += 2 {
620		if isMerge(n.children[i]) {
621			d.merge(n.children[i+1], out)
622			continue
623		}
624		k := reflect.New(kt).Elem()
625		if d.unmarshal(n.children[i], k) {
626			kkind := k.Kind()
627			if kkind == reflect.Interface {
628				kkind = k.Elem().Kind()
629			}
630			if kkind == reflect.Map || kkind == reflect.Slice {
631				failf("invalid map key: %#v", k.Interface())
632			}
633			e := reflect.New(et).Elem()
634			if d.unmarshal(n.children[i+1], e) {
635				d.setMapIndex(n.children[i+1], out, k, e)
636			}
637		}
638	}
639	d.mapType = mapType
640	return true
641}
642
643func (d *decoder) setMapIndex(n *node, out, k, v reflect.Value) {
644	if d.strict && out.MapIndex(k) != zeroValue {
645		d.terrors = append(d.terrors, fmt.Sprintf("line %d: key %#v already set in map", n.line+1, k.Interface()))
646		return
647	}
648	out.SetMapIndex(k, v)
649}
650
651func (d *decoder) mappingSlice(n *node, out reflect.Value) (good bool) {
652	outt := out.Type()
653	if outt.Elem() != mapItemType {
654		d.terror(n, yaml_MAP_TAG, out)
655		return false
656	}
657
658	mapType := d.mapType
659	d.mapType = outt
660
661	var slice []MapItem
662	var l = len(n.children)
663	for i := 0; i < l; i += 2 {
664		if isMerge(n.children[i]) {
665			d.merge(n.children[i+1], out)
666			continue
667		}
668		item := MapItem{}
669		k := reflect.ValueOf(&item.Key).Elem()
670		if d.unmarshal(n.children[i], k) {
671			v := reflect.ValueOf(&item.Value).Elem()
672			if d.unmarshal(n.children[i+1], v) {
673				slice = append(slice, item)
674			}
675		}
676	}
677	out.Set(reflect.ValueOf(slice))
678	d.mapType = mapType
679	return true
680}
681
682func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) {
683	sinfo, err := getStructInfo(out.Type())
684	if err != nil {
685		panic(err)
686	}
687	name := settableValueOf("")
688	l := len(n.children)
689
690	var inlineMap reflect.Value
691	var elemType reflect.Type
692	if sinfo.InlineMap != -1 {
693		inlineMap = out.Field(sinfo.InlineMap)
694		inlineMap.Set(reflect.New(inlineMap.Type()).Elem())
695		elemType = inlineMap.Type().Elem()
696	}
697
698	var doneFields []bool
699	if d.strict {
700		doneFields = make([]bool, len(sinfo.FieldsList))
701	}
702	for i := 0; i < l; i += 2 {
703		ni := n.children[i]
704		if isMerge(ni) {
705			d.merge(n.children[i+1], out)
706			continue
707		}
708		if !d.unmarshal(ni, name) {
709			continue
710		}
711		if info, ok := sinfo.FieldsMap[name.String()]; ok {
712			if d.strict {
713				if doneFields[info.Id] {
714					d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s already set in type %s", ni.line+1, name.String(), out.Type()))
715					continue
716				}
717				doneFields[info.Id] = true
718			}
719			var field reflect.Value
720			if info.Inline == nil {
721				field = out.Field(info.Num)
722			} else {
723				field = out.FieldByIndex(info.Inline)
724			}
725			d.unmarshal(n.children[i+1], field)
726		} else if sinfo.InlineMap != -1 {
727			if inlineMap.IsNil() {
728				inlineMap.Set(reflect.MakeMap(inlineMap.Type()))
729			}
730			value := reflect.New(elemType).Elem()
731			d.unmarshal(n.children[i+1], value)
732			d.setMapIndex(n.children[i+1], inlineMap, name, value)
733		} else if d.strict {
734			d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s not found in type %s", ni.line+1, name.String(), out.Type()))
735		}
736	}
737	return true
738}
739
740func failWantMap() {
741	failf("map merge requires map or sequence of maps as the value")
742}
743
744func (d *decoder) merge(n *node, out reflect.Value) {
745	switch n.kind {
746	case mappingNode:
747		d.unmarshal(n, out)
748	case aliasNode:
749		an, ok := d.doc.anchors[n.value]
750		if ok && an.kind != mappingNode {
751			failWantMap()
752		}
753		d.unmarshal(n, out)
754	case sequenceNode:
755		// Step backwards as earlier nodes take precedence.
756		for i := len(n.children) - 1; i >= 0; i-- {
757			ni := n.children[i]
758			if ni.kind == aliasNode {
759				an, ok := d.doc.anchors[ni.value]
760				if ok && an.kind != mappingNode {
761					failWantMap()
762				}
763			} else if ni.kind != mappingNode {
764				failWantMap()
765			}
766			d.unmarshal(ni, out)
767		}
768	default:
769		failWantMap()
770	}
771}
772
773func isMerge(n *node) bool {
774	return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG)
775}
776