1package getproviders 2 3import ( 4 "crypto/sha256" 5 "fmt" 6 "io" 7 "os" 8 "path/filepath" 9 "strings" 10 11 "golang.org/x/mod/sumdb/dirhash" 12) 13 14// Hash is a specially-formatted string representing a checksum of a package 15// or the contents of the package. 16// 17// A Hash string is always starts with a scheme, which is a short series of 18// alphanumeric characters followed by a colon, and then the remainder of the 19// string has a different meaning depending on the scheme prefix. 20// 21// The currently-valid schemes are defined as the constants of type HashScheme 22// in this package. 23// 24// Callers outside of this package must not create Hash values via direct 25// conversion. Instead, use either the HashScheme.New method on one of the 26// HashScheme contents (for a hash of a particular scheme) or the ParseHash 27// function (if hashes of any scheme are acceptable). 28type Hash string 29 30// NilHash is the zero value of Hash. It isn't a valid hash, so all of its 31// methods will panic. 32const NilHash = Hash("") 33 34// ParseHash parses the string representation of a Hash into a Hash value. 35// 36// A particular version of Terraform only supports a fixed set of hash schemes, 37// but this function intentionally allows unrecognized schemes so that we can 38// silently ignore other schemes that may be introduced in the future. For 39// that reason, the Scheme method of the returned Hash may return a value that 40// isn't in one of the HashScheme constants in this package. 41// 42// This function doesn't verify that the value portion of the given hash makes 43// sense for the given scheme. Invalid values are just considered to not match 44// any packages. 45// 46// If this function returns an error then the returned Hash is invalid and 47// must not be used. 48func ParseHash(s string) (Hash, error) { 49 colon := strings.Index(s, ":") 50 if colon < 1 { // 1 because a zero-length scheme is not allowed 51 return NilHash, fmt.Errorf("hash string must start with a scheme keyword followed by a colon") 52 } 53 return Hash(s), nil 54} 55 56// MustParseHash is a wrapper around ParseHash that panics if it returns an 57// error. 58func MustParseHash(s string) Hash { 59 hash, err := ParseHash(s) 60 if err != nil { 61 panic(err.Error()) 62 } 63 return hash 64} 65 66// Scheme returns the scheme of the recieving hash. If the receiver is not 67// using valid syntax then this method will panic. 68func (h Hash) Scheme() HashScheme { 69 colon := strings.Index(string(h), ":") 70 if colon < 0 { 71 panic(fmt.Sprintf("invalid hash string %q", h)) 72 } 73 return HashScheme(h[:colon+1]) 74} 75 76// HasScheme returns true if the given scheme matches the receiver's scheme, 77// or false otherwise. 78// 79// If the receiver is not using valid syntax then this method will panic. 80func (h Hash) HasScheme(want HashScheme) bool { 81 return h.Scheme() == want 82} 83 84// Value returns the scheme-specific value from the recieving hash. The 85// meaning of this value depends on the scheme. 86// 87// If the receiver is not using valid syntax then this method will panic. 88func (h Hash) Value() string { 89 colon := strings.Index(string(h), ":") 90 if colon < 0 { 91 panic(fmt.Sprintf("invalid hash string %q", h)) 92 } 93 return string(h[colon+1:]) 94} 95 96// String returns a string representation of the receiving hash. 97func (h Hash) String() string { 98 return string(h) 99} 100 101// GoString returns a Go syntax representation of the receiving hash. 102// 103// This is here primarily to help with producing descriptive test failure 104// output; these results are not particularly useful at runtime. 105func (h Hash) GoString() string { 106 if h == NilHash { 107 return "getproviders.NilHash" 108 } 109 switch scheme := h.Scheme(); scheme { 110 case HashScheme1: 111 return fmt.Sprintf("getproviders.HashScheme1.New(%q)", h.Value()) 112 case HashSchemeZip: 113 return fmt.Sprintf("getproviders.HashSchemeZip.New(%q)", h.Value()) 114 default: 115 // This fallback is for when we encounter lock files or API responses 116 // with hash schemes that the current version of Terraform isn't 117 // familiar with. They were presumably introduced in a later version. 118 return fmt.Sprintf("getproviders.HashScheme(%q).New(%q)", scheme, h.Value()) 119 } 120} 121 122// HashScheme is an enumeration of schemes that are allowed for values of type 123// Hash. 124type HashScheme string 125 126const ( 127 // HashScheme1 is the scheme identifier for the first hash scheme. 128 // 129 // Use HashV1 (or one of its wrapper functions) to calculate hashes with 130 // this scheme. 131 HashScheme1 HashScheme = HashScheme("h1:") 132 133 // HashSchemeZip is the scheme identifier for the legacy hash scheme that 134 // applies to distribution archives (.zip files) rather than package 135 // contents, and can therefore only be verified against the original 136 // distribution .zip file, not an extracted directory. 137 // 138 // Use PackageHashLegacyZipSHA to calculate hashes with this scheme. 139 HashSchemeZip HashScheme = HashScheme("zh:") 140) 141 142// New creates a new Hash value with the receiver as its scheme and the given 143// raw string as its value. 144// 145// It's the caller's responsibility to make sure that the given value makes 146// sense for the selected scheme. 147func (hs HashScheme) New(value string) Hash { 148 return Hash(string(hs) + value) 149} 150 151// PackageHash computes a hash of the contents of the package at the given 152// location, using whichever hash algorithm is the current default. 153// 154// Currently, this method returns version 1 hashes as produced by the 155// function PackageHashV1, but this function may switch to other versions in 156// later releases. Call PackageHashV1 directly if you specifically need a V1 157// hash. 158// 159// PackageHash can be used only with the two local package location types 160// PackageLocalDir and PackageLocalArchive, because it needs to access the 161// contents of the indicated package in order to compute the hash. If given 162// a non-local location this function will always return an error. 163func PackageHash(loc PackageLocation) (Hash, error) { 164 return PackageHashV1(loc) 165} 166 167// PackageMatchesHash returns true if the package at the given location matches 168// the given hash, or false otherwise. 169// 170// If it cannot read from the given location, or if the given hash is in an 171// unsupported format, PackageMatchesHash returns an error. 172// 173// There is currently only one hash format, as implemented by HashV1. However, 174// if others are introduced in future PackageMatchesHash may accept multiple 175// formats, and may generate errors for any formats that become obsolete. 176// 177// PackageMatchesHash can be used only with the two local package location types 178// PackageLocalDir and PackageLocalArchive, because it needs to access the 179// contents of the indicated package in order to compute the hash. If given 180// a non-local location this function will always return an error. 181func PackageMatchesHash(loc PackageLocation, want Hash) (bool, error) { 182 switch want.Scheme() { 183 case HashScheme1: 184 got, err := PackageHashV1(loc) 185 if err != nil { 186 return false, err 187 } 188 return got == want, nil 189 case HashSchemeZip: 190 archiveLoc, ok := loc.(PackageLocalArchive) 191 if !ok { 192 return false, fmt.Errorf(`ziphash scheme ("zh:" prefix) is not supported for unpacked provider packages`) 193 } 194 got, err := PackageHashLegacyZipSHA(archiveLoc) 195 if err != nil { 196 return false, err 197 } 198 return got == want, nil 199 default: 200 return false, fmt.Errorf("unsupported hash format (this may require a newer version of Terraform)") 201 } 202} 203 204// PackageMatchesAnyHash returns true if the package at the given location 205// matches at least one of the given hashes, or false otherwise. 206// 207// If it cannot read from the given location, PackageMatchesAnyHash returns an 208// error. Unlike the singular PackageMatchesHash, PackageMatchesAnyHash 209// considers unsupported hash formats as successfully non-matching, rather 210// than returning an error. 211// 212// PackageMatchesAnyHash can be used only with the two local package location 213// types PackageLocalDir and PackageLocalArchive, because it needs to access the 214// contents of the indicated package in order to compute the hash. If given 215// a non-local location this function will always return an error. 216func PackageMatchesAnyHash(loc PackageLocation, allowed []Hash) (bool, error) { 217 // It's likely that we'll have multiple hashes of the same scheme in 218 // the "allowed" set, in which case we'll avoid repeatedly re-reading the 219 // given package by caching its result for each of the two 220 // currently-supported hash formats. These will be NilHash until we 221 // encounter the first hash of the corresponding scheme. 222 var v1Hash, zipHash Hash 223 for _, want := range allowed { 224 switch want.Scheme() { 225 case HashScheme1: 226 if v1Hash == NilHash { 227 got, err := PackageHashV1(loc) 228 if err != nil { 229 return false, err 230 } 231 v1Hash = got 232 } 233 if v1Hash == want { 234 return true, nil 235 } 236 case HashSchemeZip: 237 archiveLoc, ok := loc.(PackageLocalArchive) 238 if !ok { 239 // A zip hash can never match an unpacked directory 240 continue 241 } 242 if zipHash == NilHash { 243 got, err := PackageHashLegacyZipSHA(archiveLoc) 244 if err != nil { 245 return false, err 246 } 247 zipHash = got 248 } 249 if zipHash == want { 250 return true, nil 251 } 252 default: 253 // If it's not a supported format then it can't match. 254 continue 255 } 256 } 257 return false, nil 258} 259 260// PreferredHashes examines all of the given hash strings and returns the one 261// that the current version of Terraform considers to provide the strongest 262// verification. 263// 264// Returns an empty string if none of the given hashes are of a supported 265// format. If PreferredHash returns a non-empty string then it will be one 266// of the hash strings in "given", and that hash is the one that must pass 267// verification in order for a package to be considered valid. 268func PreferredHashes(given []Hash) []Hash { 269 // For now this is just filtering for the two hash formats we support, 270 // both of which are considered equally "preferred". If we introduce 271 // a new scheme like "h2:" in future then, depending on the characteristics 272 // of that new version, it might make sense to rework this function so 273 // that it only returns "h1:" hashes if the input has no "h2:" hashes, 274 // so that h2: is preferred when possible and h1: is only a fallback for 275 // interacting with older systems that haven't been updated with the new 276 // scheme yet. 277 278 var ret []Hash 279 for _, hash := range given { 280 switch hash.Scheme() { 281 case HashScheme1, HashSchemeZip: 282 ret = append(ret, hash) 283 } 284 } 285 return ret 286} 287 288// PackageHashLegacyZipSHA implements the old provider package hashing scheme 289// of taking a SHA256 hash of the containing .zip archive itself, rather than 290// of the contents of the archive. 291// 292// The result is a hash string with the "zh:" prefix, which is intended to 293// represent "zip hash". After the prefix is a lowercase-hex encoded SHA256 294// checksum, intended to exactly match the formatting used in the registry 295// API (apart from the prefix) so that checksums can be more conveniently 296// compared by humans. 297// 298// Because this hashing scheme uses the official provider .zip file as its 299// input, it accepts only PackageLocalArchive locations. 300func PackageHashLegacyZipSHA(loc PackageLocalArchive) (Hash, error) { 301 archivePath, err := filepath.EvalSymlinks(string(loc)) 302 if err != nil { 303 return "", err 304 } 305 306 f, err := os.Open(archivePath) 307 if err != nil { 308 return "", err 309 } 310 defer f.Close() 311 312 h := sha256.New() 313 _, err = io.Copy(h, f) 314 if err != nil { 315 return "", err 316 } 317 318 gotHash := h.Sum(nil) 319 return HashSchemeZip.New(fmt.Sprintf("%x", gotHash)), nil 320} 321 322// HashLegacyZipSHAFromSHA is a convenience method to produce the schemed-string 323// hash format from an already-calculated hash of a provider .zip archive. 324// 325// This just adds the "zh:" prefix and encodes the string in hex, so that the 326// result is in the same format as PackageHashLegacyZipSHA. 327func HashLegacyZipSHAFromSHA(sum [sha256.Size]byte) Hash { 328 return HashSchemeZip.New(fmt.Sprintf("%x", sum[:])) 329} 330 331// PackageHashV1 computes a hash of the contents of the package at the given 332// location using hash algorithm 1. The resulting Hash is guaranteed to have 333// the scheme HashScheme1. 334// 335// The hash covers the paths to files in the directory and the contents of 336// those files. It does not cover other metadata about the files, such as 337// permissions. 338// 339// This function is named "PackageHashV1" in anticipation of other hashing 340// algorithms being added in a backward-compatible way in future. The result 341// from PackageHashV1 always begins with the prefix "h1:" so that callers can 342// distinguish the results of potentially multiple different hash algorithms in 343// future. 344// 345// PackageHashV1 can be used only with the two local package location types 346// PackageLocalDir and PackageLocalArchive, because it needs to access the 347// contents of the indicated package in order to compute the hash. If given 348// a non-local location this function will always return an error. 349func PackageHashV1(loc PackageLocation) (Hash, error) { 350 // Our HashV1 is really just the Go Modules hash version 1, which is 351 // sufficient for our needs and already well-used for identity of 352 // Go Modules distribution packages. It is also blocked from incompatible 353 // changes by being used in a wide array of go.sum files already. 354 // 355 // In particular, it also supports computing an equivalent hash from 356 // an unpacked zip file, which is not important for Terraform workflow 357 // today but is likely to become so in future if we adopt a top-level 358 // lockfile mechanism that is intended to be checked in to version control, 359 // rather than just a transient lock for a particular local cache directory. 360 // (In that case we'd need to check hashes of _packed_ packages, too.) 361 // 362 // Internally, dirhash.Hash1 produces a string containing a sequence of 363 // newline-separated path+filehash pairs for all of the files in the 364 // directory, and then finally produces a hash of that string to return. 365 // In both cases, the hash algorithm is SHA256. 366 367 switch loc := loc.(type) { 368 369 case PackageLocalDir: 370 // We'll first dereference a possible symlink at our PackageDir location, 371 // as would be created if this package were linked in from another cache. 372 packageDir, err := filepath.EvalSymlinks(string(loc)) 373 if err != nil { 374 return "", err 375 } 376 377 // The dirhash.HashDir result is already in our expected h1:... 378 // format, so we can just convert directly to Hash. 379 s, err := dirhash.HashDir(packageDir, "", dirhash.Hash1) 380 return Hash(s), err 381 382 case PackageLocalArchive: 383 archivePath, err := filepath.EvalSymlinks(string(loc)) 384 if err != nil { 385 return "", err 386 } 387 388 // The dirhash.HashDir result is already in our expected h1:... 389 // format, so we can just convert directly to Hash. 390 s, err := dirhash.HashZip(archivePath, dirhash.Hash1) 391 return Hash(s), err 392 393 default: 394 return "", fmt.Errorf("cannot hash package at %s", loc.String()) 395 } 396} 397 398// Hash computes a hash of the contents of the package at the location 399// associated with the reciever, using whichever hash algorithm is the current 400// default. 401// 402// This method will change to use new hash versions as they are introduced 403// in future. If you need a specific hash version, call the method for that 404// version directly instead, such as HashV1. 405// 406// Hash can be used only with the two local package location types 407// PackageLocalDir and PackageLocalArchive, because it needs to access the 408// contents of the indicated package in order to compute the hash. If given 409// a non-local location this function will always return an error. 410func (m PackageMeta) Hash() (Hash, error) { 411 return PackageHash(m.Location) 412} 413 414// MatchesHash returns true if the package at the location associated with 415// the receiver matches the given hash, or false otherwise. 416// 417// If it cannot read from the given location, or if the given hash is in an 418// unsupported format, MatchesHash returns an error. 419// 420// MatchesHash can be used only with the two local package location types 421// PackageLocalDir and PackageLocalArchive, because it needs to access the 422// contents of the indicated package in order to compute the hash. If given 423// a non-local location this function will always return an error. 424func (m PackageMeta) MatchesHash(want Hash) (bool, error) { 425 return PackageMatchesHash(m.Location, want) 426} 427 428// MatchesAnyHash returns true if the package at the location associated with 429// the receiver matches at least one of the given hashes, or false otherwise. 430// 431// If it cannot read from the given location, MatchesHash returns an error. 432// Unlike the signular MatchesHash, MatchesAnyHash considers an unsupported 433// hash format to be a successful non-match. 434func (m PackageMeta) MatchesAnyHash(acceptable []Hash) (bool, error) { 435 return PackageMatchesAnyHash(m.Location, acceptable) 436} 437 438// HashV1 computes a hash of the contents of the package at the location 439// associated with the receiver using hash algorithm 1. 440// 441// The hash covers the paths to files in the directory and the contents of 442// those files. It does not cover other metadata about the files, such as 443// permissions. 444// 445// HashV1 can be used only with the two local package location types 446// PackageLocalDir and PackageLocalArchive, because it needs to access the 447// contents of the indicated package in order to compute the hash. If given 448// a non-local location this function will always return an error. 449func (m PackageMeta) HashV1() (Hash, error) { 450 return PackageHashV1(m.Location) 451} 452