1package stdlib
2
3import (
4	"bytes"
5	"fmt"
6	"math/big"
7	"strings"
8
9	"github.com/apparentlymart/go-textseg/v13/textseg"
10
11	"github.com/zclconf/go-cty/cty"
12	"github.com/zclconf/go-cty/cty/convert"
13	"github.com/zclconf/go-cty/cty/function"
14	"github.com/zclconf/go-cty/cty/json"
15)
16
17//go:generate ragel -Z format_fsm.rl
18//go:generate gofmt -w format_fsm.go
19
20var FormatFunc = function.New(&function.Spec{
21	Params: []function.Parameter{
22		{
23			Name: "format",
24			Type: cty.String,
25		},
26	},
27	VarParam: &function.Parameter{
28		Name:      "args",
29		Type:      cty.DynamicPseudoType,
30		AllowNull: true,
31	},
32	Type: function.StaticReturnType(cty.String),
33	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
34		for _, arg := range args[1:] {
35			if !arg.IsWhollyKnown() {
36				// We require all nested values to be known because the only
37				// thing we can do for a collection/structural type is print
38				// it as JSON and that requires it to be wholly known.
39				return cty.UnknownVal(cty.String), nil
40			}
41		}
42		str, err := formatFSM(args[0].AsString(), args[1:])
43		return cty.StringVal(str), err
44	},
45})
46
47var FormatListFunc = function.New(&function.Spec{
48	Params: []function.Parameter{
49		{
50			Name: "format",
51			Type: cty.String,
52		},
53	},
54	VarParam: &function.Parameter{
55		Name:         "args",
56		Type:         cty.DynamicPseudoType,
57		AllowNull:    true,
58		AllowUnknown: true,
59	},
60	Type: function.StaticReturnType(cty.List(cty.String)),
61	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
62		fmtVal := args[0]
63		args = args[1:]
64
65		if len(args) == 0 {
66			// With no arguments, this function is equivalent to Format, but
67			// returning a single-element list result.
68			result, err := Format(fmtVal, args...)
69			return cty.ListVal([]cty.Value{result}), err
70		}
71
72		fmtStr := fmtVal.AsString()
73
74		// Each of our arguments will be dealt with either as an iterator
75		// or as a single value. Iterators are used for sequence-type values
76		// (lists, sets, tuples) while everything else is treated as a
77		// single value. The sequences we iterate over are required to be
78		// all the same length.
79		iterLen := -1
80		lenChooser := -1
81		iterators := make([]cty.ElementIterator, len(args))
82		singleVals := make([]cty.Value, len(args))
83		unknowns := make([]bool, len(args))
84		for i, arg := range args {
85			argTy := arg.Type()
86			switch {
87			case (argTy.IsListType() || argTy.IsSetType() || argTy.IsTupleType()) && !arg.IsNull():
88				if !argTy.IsTupleType() && !(arg.IsKnown() && arg.Length().IsKnown()) {
89					// We can't iterate this one at all yet then, so we can't
90					// yet produce a result.
91					unknowns[i] = true
92					continue
93				}
94				thisLen := arg.LengthInt()
95				if iterLen == -1 {
96					iterLen = thisLen
97					lenChooser = i
98				} else {
99					if thisLen != iterLen {
100						return cty.NullVal(cty.List(cty.String)), function.NewArgErrorf(
101							i+1,
102							"argument %d has length %d, which is inconsistent with argument %d of length %d",
103							i+1, thisLen,
104							lenChooser+1, iterLen,
105						)
106					}
107				}
108				if !arg.IsKnown() {
109					// We allowed an unknown tuple value to fall through in
110					// our initial check above so that we'd be able to run
111					// the above error checks against it, but we still can't
112					// iterate it if the checks pass.
113					unknowns[i] = true
114					continue
115				}
116				iterators[i] = arg.ElementIterator()
117			default:
118				singleVals[i] = arg
119			}
120		}
121
122		for _, isUnk := range unknowns {
123			if isUnk {
124				return cty.UnknownVal(retType), nil
125			}
126		}
127
128		if iterLen == 0 {
129			// If our sequences are all empty then our result must be empty.
130			return cty.ListValEmpty(cty.String), nil
131		}
132
133		if iterLen == -1 {
134			// If we didn't encounter any iterables at all then we're going
135			// to just do one iteration with items from singleVals.
136			iterLen = 1
137		}
138
139		ret := make([]cty.Value, 0, iterLen)
140		fmtArgs := make([]cty.Value, len(iterators))
141	Results:
142		for iterIdx := 0; iterIdx < iterLen; iterIdx++ {
143
144			// Construct our arguments for a single format call
145			for i := range fmtArgs {
146				switch {
147				case iterators[i] != nil:
148					iterator := iterators[i]
149					iterator.Next()
150					_, val := iterator.Element()
151					fmtArgs[i] = val
152				default:
153					fmtArgs[i] = singleVals[i]
154				}
155
156				// If any of the arguments to this call would be unknown then
157				// this particular result is unknown, but we'll keep going
158				// to see if any other iterations can produce known values.
159				if !fmtArgs[i].IsWhollyKnown() {
160					// We require all nested values to be known because the only
161					// thing we can do for a collection/structural type is print
162					// it as JSON and that requires it to be wholly known.
163					ret = append(ret, cty.UnknownVal(cty.String))
164					continue Results
165				}
166			}
167
168			str, err := formatFSM(fmtStr, fmtArgs)
169			if err != nil {
170				return cty.NullVal(cty.List(cty.String)), fmt.Errorf(
171					"error on format iteration %d: %s", iterIdx, err,
172				)
173			}
174
175			ret = append(ret, cty.StringVal(str))
176		}
177
178		return cty.ListVal(ret), nil
179	},
180})
181
182// Format produces a string representation of zero or more values using a
183// format string similar to the "printf" function in C.
184//
185// It supports the following "verbs":
186//
187//     %%      Literal percent sign, consuming no value
188//     %v      A default formatting of the value based on type, as described below.
189//     %#v     JSON serialization of the value
190//     %t      Converts to boolean and then produces "true" or "false"
191//     %b      Converts to number, requires integer, produces binary representation
192//     %d      Converts to number, requires integer, produces decimal representation
193//     %o      Converts to number, requires integer, produces octal representation
194//     %x      Converts to number, requires integer, produces hexadecimal representation
195//             with lowercase letters
196//     %X      Like %x but with uppercase letters
197//     %e      Converts to number, produces scientific notation like -1.234456e+78
198//     %E      Like %e but with an uppercase "E" representing the exponent
199//     %f      Converts to number, produces decimal representation with fractional
200//             part but no exponent, like 123.456
201//     %g      %e for large exponents or %f otherwise
202//     %G      %E for large exponents or %f otherwise
203//     %s      Converts to string and produces the string's characters
204//     %q      Converts to string and produces JSON-quoted string representation,
205//             like %v.
206//
207// The default format selections made by %v are:
208//
209//     string  %s
210//     number  %g
211//     bool    %t
212//     other   %#v
213//
214// Null values produce the literal keyword "null" for %v and %#v, and produce
215// an error otherwise.
216//
217// Width is specified by an optional decimal number immediately preceding the
218// verb letter. If absent, the width is whatever is necessary to represent the
219// value. Precision is specified after the (optional) width by a period
220// followed by a decimal number. If no period is present, a default precision
221// is used. A period with no following number is invalid.
222// For examples:
223//
224//     %f     default width, default precision
225//     %9f    width 9, default precision
226//     %.2f   default width, precision 2
227//     %9.2f  width 9, precision 2
228//
229// Width and precision are measured in unicode characters (grapheme clusters).
230//
231// For most values, width is the minimum number of characters to output,
232// padding the formatted form with spaces if necessary.
233//
234// For strings, precision limits the length of the input to be formatted (not
235// the size of the output), truncating if necessary.
236//
237// For numbers, width sets the minimum width of the field and precision sets
238// the number of places after the decimal, if appropriate, except that for
239// %g/%G precision sets the total number of significant digits.
240//
241// The following additional symbols can be used immediately after the percent
242// introducer as flags:
243//
244//           (a space) leave a space where the sign would be if number is positive
245//     +     Include a sign for a number even if it is positive (numeric only)
246//     -     Pad with spaces on the left rather than the right
247//     0     Pad with zeros rather than spaces.
248//
249// Flag characters are ignored for verbs that do not support them.
250//
251// By default, % sequences consume successive arguments starting with the first.
252// Introducing a [n] sequence immediately before the verb letter, where n is a
253// decimal integer, explicitly chooses a particular value argument by its
254// one-based index. Subsequent calls without an explicit index will then
255// proceed with n+1, n+2, etc.
256//
257// An error is produced if the format string calls for an impossible conversion
258// or accesses more values than are given. An error is produced also for
259// an unsupported format verb.
260func Format(format cty.Value, vals ...cty.Value) (cty.Value, error) {
261	args := make([]cty.Value, 0, len(vals)+1)
262	args = append(args, format)
263	args = append(args, vals...)
264	return FormatFunc.Call(args)
265}
266
267// FormatList applies the same formatting behavior as Format, but accepts
268// a mixture of list and non-list values as arguments. Any list arguments
269// passed must have the same length, which dictates the length of the
270// resulting list.
271//
272// Any non-list arguments are used repeatedly for each iteration over the
273// list arguments. The list arguments are iterated in order by key, so
274// corresponding items are formatted together.
275func FormatList(format cty.Value, vals ...cty.Value) (cty.Value, error) {
276	args := make([]cty.Value, 0, len(vals)+1)
277	args = append(args, format)
278	args = append(args, vals...)
279	return FormatListFunc.Call(args)
280}
281
282type formatVerb struct {
283	Raw    string
284	Offset int
285
286	ArgNum int
287	Mode   rune
288
289	Zero  bool
290	Sharp bool
291	Plus  bool
292	Minus bool
293	Space bool
294
295	HasPrec bool
296	Prec    int
297
298	HasWidth bool
299	Width    int
300}
301
302// formatAppend is called by formatFSM (generated by format_fsm.rl) for each
303// formatting sequence that is encountered.
304func formatAppend(verb *formatVerb, buf *bytes.Buffer, args []cty.Value) error {
305	argIdx := verb.ArgNum - 1
306	if argIdx >= len(args) {
307		return fmt.Errorf(
308			"not enough arguments for %q at %d: need index %d but have %d total",
309			verb.Raw, verb.Offset,
310			verb.ArgNum, len(args),
311		)
312	}
313	arg := args[argIdx]
314
315	if verb.Mode != 'v' && arg.IsNull() {
316		return fmt.Errorf("unsupported value for %q at %d: null value cannot be formatted", verb.Raw, verb.Offset)
317	}
318
319	// Normalize to make some things easier for downstream formatters
320	if !verb.HasWidth {
321		verb.Width = -1
322	}
323	if !verb.HasPrec {
324		verb.Prec = -1
325	}
326
327	// For our first pass we'll ensure the verb is supported and then fan
328	// out to other functions based on what conversion is needed.
329	switch verb.Mode {
330
331	case 'v':
332		return formatAppendAsIs(verb, buf, arg)
333
334	case 't':
335		return formatAppendBool(verb, buf, arg)
336
337	case 'b', 'd', 'o', 'x', 'X', 'e', 'E', 'f', 'g', 'G':
338		return formatAppendNumber(verb, buf, arg)
339
340	case 's', 'q':
341		return formatAppendString(verb, buf, arg)
342
343	default:
344		return fmt.Errorf("unsupported format verb %q in %q at offset %d", verb.Mode, verb.Raw, verb.Offset)
345	}
346}
347
348func formatAppendAsIs(verb *formatVerb, buf *bytes.Buffer, arg cty.Value) error {
349
350	if !verb.Sharp && !arg.IsNull() {
351		// Unless the caller overrode it with the sharp flag, we'll try some
352		// specialized formats before we fall back on JSON.
353		switch arg.Type() {
354		case cty.String:
355			fmted := arg.AsString()
356			fmted = formatPadWidth(verb, fmted)
357			buf.WriteString(fmted)
358			return nil
359		case cty.Number:
360			bf := arg.AsBigFloat()
361			fmted := bf.Text('g', -1)
362			fmted = formatPadWidth(verb, fmted)
363			buf.WriteString(fmted)
364			return nil
365		}
366	}
367
368	jb, err := json.Marshal(arg, arg.Type())
369	if err != nil {
370		return fmt.Errorf("unsupported value for %q at %d: %s", verb.Raw, verb.Offset, err)
371	}
372	fmted := formatPadWidth(verb, string(jb))
373	buf.WriteString(fmted)
374
375	return nil
376}
377
378func formatAppendBool(verb *formatVerb, buf *bytes.Buffer, arg cty.Value) error {
379	var err error
380	arg, err = convert.Convert(arg, cty.Bool)
381	if err != nil {
382		return fmt.Errorf("unsupported value for %q at %d: %s", verb.Raw, verb.Offset, err)
383	}
384
385	if arg.True() {
386		buf.WriteString("true")
387	} else {
388		buf.WriteString("false")
389	}
390	return nil
391}
392
393func formatAppendNumber(verb *formatVerb, buf *bytes.Buffer, arg cty.Value) error {
394	var err error
395	arg, err = convert.Convert(arg, cty.Number)
396	if err != nil {
397		return fmt.Errorf("unsupported value for %q at %d: %s", verb.Raw, verb.Offset, err)
398	}
399
400	switch verb.Mode {
401	case 'b', 'd', 'o', 'x', 'X':
402		return formatAppendInteger(verb, buf, arg)
403	default:
404		bf := arg.AsBigFloat()
405
406		// For floats our format syntax is a subset of Go's, so it's
407		// safe for us to just lean on the existing Go implementation.
408		fmtstr := formatStripIndexSegment(verb.Raw)
409		fmted := fmt.Sprintf(fmtstr, bf)
410		buf.WriteString(fmted)
411		return nil
412	}
413}
414
415func formatAppendInteger(verb *formatVerb, buf *bytes.Buffer, arg cty.Value) error {
416	bf := arg.AsBigFloat()
417	bi, acc := bf.Int(nil)
418	if acc != big.Exact {
419		return fmt.Errorf("unsupported value for %q at %d: an integer is required", verb.Raw, verb.Offset)
420	}
421
422	// For integers our format syntax is a subset of Go's, so it's
423	// safe for us to just lean on the existing Go implementation.
424	fmtstr := formatStripIndexSegment(verb.Raw)
425	fmted := fmt.Sprintf(fmtstr, bi)
426	buf.WriteString(fmted)
427	return nil
428}
429
430func formatAppendString(verb *formatVerb, buf *bytes.Buffer, arg cty.Value) error {
431	var err error
432	arg, err = convert.Convert(arg, cty.String)
433	if err != nil {
434		return fmt.Errorf("unsupported value for %q at %d: %s", verb.Raw, verb.Offset, err)
435	}
436
437	// We _cannot_ directly use the Go fmt.Sprintf implementation for strings
438	// because it measures widths and precisions in runes rather than grapheme
439	// clusters.
440
441	str := arg.AsString()
442	if verb.Prec > 0 {
443		strB := []byte(str)
444		pos := 0
445		wanted := verb.Prec
446		for i := 0; i < wanted; i++ {
447			next := strB[pos:]
448			if len(next) == 0 {
449				// ran out of characters before we hit our max width
450				break
451			}
452			d, _, _ := textseg.ScanGraphemeClusters(strB[pos:], true)
453			pos += d
454		}
455		str = str[:pos]
456	}
457
458	switch verb.Mode {
459	case 's':
460		fmted := formatPadWidth(verb, str)
461		buf.WriteString(fmted)
462	case 'q':
463		jb, err := json.Marshal(cty.StringVal(str), cty.String)
464		if err != nil {
465			// Should never happen, since we know this is a known, non-null string
466			panic(fmt.Errorf("failed to marshal %#v as JSON: %s", arg, err))
467		}
468		fmted := formatPadWidth(verb, string(jb))
469		buf.WriteString(fmted)
470	default:
471		// Should never happen because formatAppend should've already validated
472		panic(fmt.Errorf("invalid string formatting mode %q", verb.Mode))
473	}
474	return nil
475}
476
477func formatPadWidth(verb *formatVerb, fmted string) string {
478	if verb.Width < 0 {
479		return fmted
480	}
481
482	// Safe to ignore errors because ScanGraphemeClusters cannot produce errors
483	givenLen, _ := textseg.TokenCount([]byte(fmted), textseg.ScanGraphemeClusters)
484	wantLen := verb.Width
485	if givenLen >= wantLen {
486		return fmted
487	}
488
489	padLen := wantLen - givenLen
490	padChar := " "
491	if verb.Zero {
492		padChar = "0"
493	}
494	pads := strings.Repeat(padChar, padLen)
495
496	if verb.Minus {
497		return fmted + pads
498	}
499	return pads + fmted
500}
501
502// formatStripIndexSegment strips out any [nnn] segment present in a verb
503// string so that we can pass it through to Go's fmt.Sprintf with a single
504// argument. This is used in cases where we're just leaning on Go's formatter
505// because it's a superset of ours.
506func formatStripIndexSegment(rawVerb string) string {
507	// We assume the string has already been validated here, since we should
508	// only be using this function with strings that were accepted by our
509	// scanner in formatFSM.
510	start := strings.Index(rawVerb, "[")
511	end := strings.Index(rawVerb, "]")
512	if start == -1 || end == -1 {
513		return rawVerb
514	}
515
516	return rawVerb[:start] + rawVerb[end+1:]
517}
518