1package getproviders
2
3import (
4	"crypto/sha256"
5	"fmt"
6	"io"
7	"os"
8	"path/filepath"
9	"strings"
10
11	"golang.org/x/mod/sumdb/dirhash"
12)
13
14// Hash is a specially-formatted string representing a checksum of a package
15// or the contents of the package.
16//
17// A Hash string is always starts with a scheme, which is a short series of
18// alphanumeric characters followed by a colon, and then the remainder of the
19// string has a different meaning depending on the scheme prefix.
20//
21// The currently-valid schemes are defined as the constants of type HashScheme
22// in this package.
23//
24// Callers outside of this package must not create Hash values via direct
25// conversion. Instead, use either the HashScheme.New method on one of the
26// HashScheme contents (for a hash of a particular scheme) or the ParseHash
27// function (if hashes of any scheme are acceptable).
28type Hash string
29
30// NilHash is the zero value of Hash. It isn't a valid hash, so all of its
31// methods will panic.
32const NilHash = Hash("")
33
34// ParseHash parses the string representation of a Hash into a Hash value.
35//
36// A particular version of Terraform only supports a fixed set of hash schemes,
37// but this function intentionally allows unrecognized schemes so that we can
38// silently ignore other schemes that may be introduced in the future. For
39// that reason, the Scheme method of the returned Hash may return a value that
40// isn't in one of the HashScheme constants in this package.
41//
42// This function doesn't verify that the value portion of the given hash makes
43// sense for the given scheme. Invalid values are just considered to not match
44// any packages.
45//
46// If this function returns an error then the returned Hash is invalid and
47// must not be used.
48func ParseHash(s string) (Hash, error) {
49	colon := strings.Index(s, ":")
50	if colon < 1 { // 1 because a zero-length scheme is not allowed
51		return NilHash, fmt.Errorf("hash string must start with a scheme keyword followed by a colon")
52	}
53	return Hash(s), nil
54}
55
56// MustParseHash is a wrapper around ParseHash that panics if it returns an
57// error.
58func MustParseHash(s string) Hash {
59	hash, err := ParseHash(s)
60	if err != nil {
61		panic(err.Error())
62	}
63	return hash
64}
65
66// Scheme returns the scheme of the recieving hash. If the receiver is not
67// using valid syntax then this method will panic.
68func (h Hash) Scheme() HashScheme {
69	colon := strings.Index(string(h), ":")
70	if colon < 0 {
71		panic(fmt.Sprintf("invalid hash string %q", h))
72	}
73	return HashScheme(h[:colon+1])
74}
75
76// HasScheme returns true if the given scheme matches the receiver's scheme,
77// or false otherwise.
78//
79// If the receiver is not using valid syntax then this method will panic.
80func (h Hash) HasScheme(want HashScheme) bool {
81	return h.Scheme() == want
82}
83
84// Value returns the scheme-specific value from the recieving hash. The
85// meaning of this value depends on the scheme.
86//
87// If the receiver is not using valid syntax then this method will panic.
88func (h Hash) Value() string {
89	colon := strings.Index(string(h), ":")
90	if colon < 0 {
91		panic(fmt.Sprintf("invalid hash string %q", h))
92	}
93	return string(h[colon+1:])
94}
95
96// String returns a string representation of the receiving hash.
97func (h Hash) String() string {
98	return string(h)
99}
100
101// GoString returns a Go syntax representation of the receiving hash.
102//
103// This is here primarily to help with producing descriptive test failure
104// output; these results are not particularly useful at runtime.
105func (h Hash) GoString() string {
106	if h == NilHash {
107		return "getproviders.NilHash"
108	}
109	switch scheme := h.Scheme(); scheme {
110	case HashScheme1:
111		return fmt.Sprintf("getproviders.HashScheme1.New(%q)", h.Value())
112	case HashSchemeZip:
113		return fmt.Sprintf("getproviders.HashSchemeZip.New(%q)", h.Value())
114	default:
115		// This fallback is for when we encounter lock files or API responses
116		// with hash schemes that the current version of Terraform isn't
117		// familiar with. They were presumably introduced in a later version.
118		return fmt.Sprintf("getproviders.HashScheme(%q).New(%q)", scheme, h.Value())
119	}
120}
121
122// HashScheme is an enumeration of schemes that are allowed for values of type
123// Hash.
124type HashScheme string
125
126const (
127	// HashScheme1 is the scheme identifier for the first hash scheme.
128	//
129	// Use HashV1 (or one of its wrapper functions) to calculate hashes with
130	// this scheme.
131	HashScheme1 HashScheme = HashScheme("h1:")
132
133	// HashSchemeZip is the scheme identifier for the legacy hash scheme that
134	// applies to distribution archives (.zip files) rather than package
135	// contents, and can therefore only be verified against the original
136	// distribution .zip file, not an extracted directory.
137	//
138	// Use PackageHashLegacyZipSHA to calculate hashes with this scheme.
139	HashSchemeZip HashScheme = HashScheme("zh:")
140)
141
142// New creates a new Hash value with the receiver as its scheme and the given
143// raw string as its value.
144//
145// It's the caller's responsibility to make sure that the given value makes
146// sense for the selected scheme.
147func (hs HashScheme) New(value string) Hash {
148	return Hash(string(hs) + value)
149}
150
151// PackageHash computes a hash of the contents of the package at the given
152// location, using whichever hash algorithm is the current default.
153//
154// Currently, this method returns version 1 hashes as produced by the
155// function PackageHashV1, but this function may switch to other versions in
156// later releases. Call PackageHashV1 directly if you specifically need a V1
157// hash.
158//
159// PackageHash can be used only with the two local package location types
160// PackageLocalDir and PackageLocalArchive, because it needs to access the
161// contents of the indicated package in order to compute the hash. If given
162// a non-local location this function will always return an error.
163func PackageHash(loc PackageLocation) (Hash, error) {
164	return PackageHashV1(loc)
165}
166
167// PackageMatchesHash returns true if the package at the given location matches
168// the given hash, or false otherwise.
169//
170// If it cannot read from the given location, or if the given hash is in an
171// unsupported format, PackageMatchesHash returns an error.
172//
173// There is currently only one hash format, as implemented by HashV1. However,
174// if others are introduced in future PackageMatchesHash may accept multiple
175// formats, and may generate errors for any formats that become obsolete.
176//
177// PackageMatchesHash can be used only with the two local package location types
178// PackageLocalDir and PackageLocalArchive, because it needs to access the
179// contents of the indicated package in order to compute the hash. If given
180// a non-local location this function will always return an error.
181func PackageMatchesHash(loc PackageLocation, want Hash) (bool, error) {
182	switch want.Scheme() {
183	case HashScheme1:
184		got, err := PackageHashV1(loc)
185		if err != nil {
186			return false, err
187		}
188		return got == want, nil
189	case HashSchemeZip:
190		archiveLoc, ok := loc.(PackageLocalArchive)
191		if !ok {
192			return false, fmt.Errorf(`ziphash scheme ("zh:" prefix) is not supported for unpacked provider packages`)
193		}
194		got, err := PackageHashLegacyZipSHA(archiveLoc)
195		if err != nil {
196			return false, err
197		}
198		return got == want, nil
199	default:
200		return false, fmt.Errorf("unsupported hash format (this may require a newer version of Terraform)")
201	}
202}
203
204// PackageMatchesAnyHash returns true if the package at the given location
205// matches at least one of the given hashes, or false otherwise.
206//
207// If it cannot read from the given location, PackageMatchesAnyHash returns an
208// error. Unlike the singular PackageMatchesHash, PackageMatchesAnyHash
209// considers unsupported hash formats as successfully non-matching, rather
210// than returning an error.
211//
212// PackageMatchesAnyHash can be used only with the two local package location
213// types PackageLocalDir and PackageLocalArchive, because it needs to access the
214// contents of the indicated package in order to compute the hash. If given
215// a non-local location this function will always return an error.
216func PackageMatchesAnyHash(loc PackageLocation, allowed []Hash) (bool, error) {
217	// It's likely that we'll have multiple hashes of the same scheme in
218	// the "allowed" set, in which case we'll avoid repeatedly re-reading the
219	// given package by caching its result for each of the two
220	// currently-supported hash formats. These will be NilHash until we
221	// encounter the first hash of the corresponding scheme.
222	var v1Hash, zipHash Hash
223	for _, want := range allowed {
224		switch want.Scheme() {
225		case HashScheme1:
226			if v1Hash == NilHash {
227				got, err := PackageHashV1(loc)
228				if err != nil {
229					return false, err
230				}
231				v1Hash = got
232			}
233			if v1Hash == want {
234				return true, nil
235			}
236		case HashSchemeZip:
237			archiveLoc, ok := loc.(PackageLocalArchive)
238			if !ok {
239				// A zip hash can never match an unpacked directory
240				continue
241			}
242			if zipHash == NilHash {
243				got, err := PackageHashLegacyZipSHA(archiveLoc)
244				if err != nil {
245					return false, err
246				}
247				zipHash = got
248			}
249			if zipHash == want {
250				return true, nil
251			}
252		default:
253			// If it's not a supported format then it can't match.
254			continue
255		}
256	}
257	return false, nil
258}
259
260// PreferredHashes examines all of the given hash strings and returns the one
261// that the current version of Terraform considers to provide the strongest
262// verification.
263//
264// Returns an empty string if none of the given hashes are of a supported
265// format. If PreferredHash returns a non-empty string then it will be one
266// of the hash strings in "given", and that hash is the one that must pass
267// verification in order for a package to be considered valid.
268func PreferredHashes(given []Hash) []Hash {
269	// For now this is just filtering for the two hash formats we support,
270	// both of which are considered equally "preferred". If we introduce
271	// a new scheme like "h2:" in future then, depending on the characteristics
272	// of that new version, it might make sense to rework this function so
273	// that it only returns "h1:" hashes if the input has no "h2:" hashes,
274	// so that h2: is preferred when possible and h1: is only a fallback for
275	// interacting with older systems that haven't been updated with the new
276	// scheme yet.
277
278	var ret []Hash
279	for _, hash := range given {
280		switch hash.Scheme() {
281		case HashScheme1, HashSchemeZip:
282			ret = append(ret, hash)
283		}
284	}
285	return ret
286}
287
288// PackageHashLegacyZipSHA implements the old provider package hashing scheme
289// of taking a SHA256 hash of the containing .zip archive itself, rather than
290// of the contents of the archive.
291//
292// The result is a hash string with the "zh:" prefix, which is intended to
293// represent "zip hash". After the prefix is a lowercase-hex encoded SHA256
294// checksum, intended to exactly match the formatting used in the registry
295// API (apart from the prefix) so that checksums can be more conveniently
296// compared by humans.
297//
298// Because this hashing scheme uses the official provider .zip file as its
299// input, it accepts only PackageLocalArchive locations.
300func PackageHashLegacyZipSHA(loc PackageLocalArchive) (Hash, error) {
301	archivePath, err := filepath.EvalSymlinks(string(loc))
302	if err != nil {
303		return "", err
304	}
305
306	f, err := os.Open(archivePath)
307	if err != nil {
308		return "", err
309	}
310	defer f.Close()
311
312	h := sha256.New()
313	_, err = io.Copy(h, f)
314	if err != nil {
315		return "", err
316	}
317
318	gotHash := h.Sum(nil)
319	return HashSchemeZip.New(fmt.Sprintf("%x", gotHash)), nil
320}
321
322// HashLegacyZipSHAFromSHA is a convenience method to produce the schemed-string
323// hash format from an already-calculated hash of a provider .zip archive.
324//
325// This just adds the "zh:" prefix and encodes the string in hex, so that the
326// result is in the same format as PackageHashLegacyZipSHA.
327func HashLegacyZipSHAFromSHA(sum [sha256.Size]byte) Hash {
328	return HashSchemeZip.New(fmt.Sprintf("%x", sum[:]))
329}
330
331// PackageHashV1 computes a hash of the contents of the package at the given
332// location using hash algorithm 1. The resulting Hash is guaranteed to have
333// the scheme HashScheme1.
334//
335// The hash covers the paths to files in the directory and the contents of
336// those files. It does not cover other metadata about the files, such as
337// permissions.
338//
339// This function is named "PackageHashV1" in anticipation of other hashing
340// algorithms being added in a backward-compatible way in future. The result
341// from PackageHashV1 always begins with the prefix "h1:" so that callers can
342// distinguish the results of potentially multiple different hash algorithms in
343// future.
344//
345// PackageHashV1 can be used only with the two local package location types
346// PackageLocalDir and PackageLocalArchive, because it needs to access the
347// contents of the indicated package in order to compute the hash. If given
348// a non-local location this function will always return an error.
349func PackageHashV1(loc PackageLocation) (Hash, error) {
350	// Our HashV1 is really just the Go Modules hash version 1, which is
351	// sufficient for our needs and already well-used for identity of
352	// Go Modules distribution packages. It is also blocked from incompatible
353	// changes by being used in a wide array of go.sum files already.
354	//
355	// In particular, it also supports computing an equivalent hash from
356	// an unpacked zip file, which is not important for Terraform workflow
357	// today but is likely to become so in future if we adopt a top-level
358	// lockfile mechanism that is intended to be checked in to version control,
359	// rather than just a transient lock for a particular local cache directory.
360	// (In that case we'd need to check hashes of _packed_ packages, too.)
361	//
362	// Internally, dirhash.Hash1 produces a string containing a sequence of
363	// newline-separated path+filehash pairs for all of the files in the
364	// directory, and then finally produces a hash of that string to return.
365	// In both cases, the hash algorithm is SHA256.
366
367	switch loc := loc.(type) {
368
369	case PackageLocalDir:
370		// We'll first dereference a possible symlink at our PackageDir location,
371		// as would be created if this package were linked in from another cache.
372		packageDir, err := filepath.EvalSymlinks(string(loc))
373		if err != nil {
374			return "", err
375		}
376
377		// The dirhash.HashDir result is already in our expected h1:...
378		// format, so we can just convert directly to Hash.
379		s, err := dirhash.HashDir(packageDir, "", dirhash.Hash1)
380		return Hash(s), err
381
382	case PackageLocalArchive:
383		archivePath, err := filepath.EvalSymlinks(string(loc))
384		if err != nil {
385			return "", err
386		}
387
388		// The dirhash.HashDir result is already in our expected h1:...
389		// format, so we can just convert directly to Hash.
390		s, err := dirhash.HashZip(archivePath, dirhash.Hash1)
391		return Hash(s), err
392
393	default:
394		return "", fmt.Errorf("cannot hash package at %s", loc.String())
395	}
396}
397
398// Hash computes a hash of the contents of the package at the location
399// associated with the reciever, using whichever hash algorithm is the current
400// default.
401//
402// This method will change to use new hash versions as they are introduced
403// in future. If you need a specific hash version, call the method for that
404// version directly instead, such as HashV1.
405//
406// Hash can be used only with the two local package location types
407// PackageLocalDir and PackageLocalArchive, because it needs to access the
408// contents of the indicated package in order to compute the hash. If given
409// a non-local location this function will always return an error.
410func (m PackageMeta) Hash() (Hash, error) {
411	return PackageHash(m.Location)
412}
413
414// MatchesHash returns true if the package at the location associated with
415// the receiver matches the given hash, or false otherwise.
416//
417// If it cannot read from the given location, or if the given hash is in an
418// unsupported format, MatchesHash returns an error.
419//
420// MatchesHash can be used only with the two local package location types
421// PackageLocalDir and PackageLocalArchive, because it needs to access the
422// contents of the indicated package in order to compute the hash. If given
423// a non-local location this function will always return an error.
424func (m PackageMeta) MatchesHash(want Hash) (bool, error) {
425	return PackageMatchesHash(m.Location, want)
426}
427
428// MatchesAnyHash returns true if the package at the location associated with
429// the receiver matches at least one of the given hashes, or false otherwise.
430//
431// If it cannot read from the given location, MatchesHash returns an error.
432// Unlike the signular MatchesHash, MatchesAnyHash considers an unsupported
433// hash format to be a successful non-match.
434func (m PackageMeta) MatchesAnyHash(acceptable []Hash) (bool, error) {
435	return PackageMatchesAnyHash(m.Location, acceptable)
436}
437
438// HashV1 computes a hash of the contents of the package at the location
439// associated with the receiver using hash algorithm 1.
440//
441// The hash covers the paths to files in the directory and the contents of
442// those files. It does not cover other metadata about the files, such as
443// permissions.
444//
445// HashV1 can be used only with the two local package location types
446// PackageLocalDir and PackageLocalArchive, because it needs to access the
447// contents of the indicated package in order to compute the hash. If given
448// a non-local location this function will always return an error.
449func (m PackageMeta) HashV1() (Hash, error) {
450	return PackageHashV1(m.Location)
451}
452