1// +build !providerless
2
3/*
4Copyright 2014 The Kubernetes Authors.
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10    http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17*/
18
19package gce
20
21import (
22	"context"
23	"fmt"
24	"io"
25	"net/http"
26	"runtime"
27	"strconv"
28	"strings"
29	"sync"
30	"time"
31
32	gcfg "gopkg.in/gcfg.v1"
33
34	"cloud.google.com/go/compute/metadata"
35	"golang.org/x/oauth2"
36	"golang.org/x/oauth2/google"
37	computealpha "google.golang.org/api/compute/v0.alpha"
38	computebeta "google.golang.org/api/compute/v0.beta"
39	compute "google.golang.org/api/compute/v1"
40	container "google.golang.org/api/container/v1"
41	"google.golang.org/api/option"
42
43	"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud"
44
45	"k8s.io/api/core/v1"
46	"k8s.io/apimachinery/pkg/util/sets"
47	"k8s.io/apimachinery/pkg/util/wait"
48	"k8s.io/client-go/informers"
49	clientset "k8s.io/client-go/kubernetes"
50	"k8s.io/client-go/kubernetes/scheme"
51	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
52	"k8s.io/client-go/pkg/version"
53	"k8s.io/client-go/tools/cache"
54	"k8s.io/client-go/tools/record"
55	"k8s.io/client-go/util/flowcontrol"
56	cloudprovider "k8s.io/cloud-provider"
57	"k8s.io/klog/v2"
58)
59
60const (
61	// ProviderName is the official const representation of the Google Cloud Provider
62	ProviderName = "gce"
63
64	k8sNodeRouteTag = "k8s-node-route"
65
66	// AffinityTypeNone - no session affinity.
67	gceAffinityTypeNone = "NONE"
68	// AffinityTypeClientIP - affinity based on Client IP.
69	gceAffinityTypeClientIP = "CLIENT_IP"
70
71	operationPollInterval           = time.Second
72	maxTargetPoolCreateInstances    = 200
73	maxInstancesPerTargetPoolUpdate = 1000
74
75	// HTTP Load Balancer parameters
76	// Configure 8 second period for external health checks.
77	gceHcCheckIntervalSeconds = int64(8)
78	gceHcTimeoutSeconds       = int64(1)
79	// Start sending requests as soon as a pod is found on the node.
80	gceHcHealthyThreshold = int64(1)
81	// Defaults to 3 * 8 = 24 seconds before the LB will steer traffic away.
82	gceHcUnhealthyThreshold = int64(3)
83
84	gceComputeAPIEndpoint     = "https://www.googleapis.com/compute/v1/"
85	gceComputeAPIEndpointBeta = "https://www.googleapis.com/compute/beta/"
86)
87
88var _ cloudprovider.Interface = (*Cloud)(nil)
89var _ cloudprovider.Instances = (*Cloud)(nil)
90var _ cloudprovider.LoadBalancer = (*Cloud)(nil)
91var _ cloudprovider.Routes = (*Cloud)(nil)
92var _ cloudprovider.Zones = (*Cloud)(nil)
93var _ cloudprovider.PVLabeler = (*Cloud)(nil)
94var _ cloudprovider.Clusters = (*Cloud)(nil)
95
96// Cloud is an implementation of Interface, LoadBalancer and Instances for Google Compute Engine.
97type Cloud struct {
98	// ClusterID contains functionality for getting (and initializing) the ingress-uid. Call Cloud.Initialize()
99	// for the cloudprovider to start watching the configmap.
100	ClusterID ClusterID
101
102	// initializer is used for lazy initialization of subnetworkURL
103	// and isLegacyNetwork fields if they are not passed via the config.
104	// The reason is to avoid GCE API calls to initialize them if they
105	// will never be used. This is especially important when
106	// it is run from Kubelets, as there can be thousands  of them.
107	subnetworkURLAndIsLegacyNetworkInitializer sync.Once
108
109	service          *compute.Service
110	serviceBeta      *computebeta.Service
111	serviceAlpha     *computealpha.Service
112	containerService *container.Service
113	tpuService       *tpuService
114	client           clientset.Interface
115	clientBuilder    cloudprovider.ControllerClientBuilder
116	eventBroadcaster record.EventBroadcaster
117	eventRecorder    record.EventRecorder
118	projectID        string
119	region           string
120	regional         bool
121	localZone        string // The zone in which we are running
122	// managedZones will be set to the 1 zone if running a single zone cluster
123	// it will be set to ALL zones in region for any multi-zone cluster
124	// Use GetAllCurrentZones to get only zones that contain nodes
125	managedZones []string
126	networkURL   string
127	// unsafeIsLegacyNetwork should be used only via IsLegacyNetwork() accessor,
128	// to ensure it was properly initialized.
129	unsafeIsLegacyNetwork bool
130	// unsafeSubnetworkURL should be used only via SubnetworkURL() accessor,
131	// to ensure it was properly initialized.
132	unsafeSubnetworkURL string
133	// DEPRECATED: Do not rely on this value as it may be incorrect.
134	secondaryRangeName       string
135	networkProjectID         string
136	onXPN                    bool
137	nodeTags                 []string    // List of tags to use on firewall rules for load balancers
138	lastComputedNodeTags     []string    // List of node tags calculated in GetHostTags()
139	lastKnownNodeNames       sets.String // List of hostnames used to calculate lastComputedHostTags in GetHostTags(names)
140	computeNodeTagLock       sync.Mutex  // Lock for computing and setting node tags
141	nodeInstancePrefix       string      // If non-"", an advisory prefix for all nodes in the cluster
142	useMetadataServer        bool
143	operationPollRateLimiter flowcontrol.RateLimiter
144	manager                  diskServiceManager
145	// Lock for access to nodeZones
146	nodeZonesLock sync.Mutex
147	// nodeZones is a mapping from Zone to a sets.String of Node's names in the Zone
148	// it is updated by the nodeInformer
149	nodeZones          map[string]sets.String
150	nodeInformerSynced cache.InformerSynced
151	// sharedResourceLock is used to serialize GCE operations that may mutate shared state to
152	// prevent inconsistencies. For example, load balancers manipulation methods will take the
153	// lock to prevent shared resources from being prematurely deleted while the operation is
154	// in progress.
155	sharedResourceLock sync.Mutex
156	// AlphaFeatureGate gates gce alpha features in Cloud instance.
157	// Related wrapper functions that interacts with gce alpha api should examine whether
158	// the corresponding api is enabled.
159	// If not enabled, it should return error.
160	AlphaFeatureGate *AlphaFeatureGate
161
162	// New code generated interface to the GCE compute library.
163	c cloud.Cloud
164
165	// Keep a reference of this around so we can inject a new cloud.RateLimiter implementation.
166	s *cloud.Service
167
168	metricsCollector loadbalancerMetricsCollector
169}
170
171// ConfigGlobal is the in memory representation of the gce.conf config data
172// TODO: replace gcfg with json
173type ConfigGlobal struct {
174	TokenURL  string `gcfg:"token-url"`
175	TokenBody string `gcfg:"token-body" datapolicy:"token"`
176	// ProjectID and NetworkProjectID can either be the numeric or string-based
177	// unique identifier that starts with [a-z].
178	ProjectID string `gcfg:"project-id"`
179	// NetworkProjectID refers to the project which owns the network being used.
180	NetworkProjectID string `gcfg:"network-project-id"`
181	NetworkName      string `gcfg:"network-name"`
182	SubnetworkName   string `gcfg:"subnetwork-name"`
183	// DEPRECATED: Do not rely on this value as it may be incorrect.
184	// SecondaryRangeName is the name of the secondary range to allocate IP
185	// aliases. The secondary range must be present on the subnetwork the
186	// cluster is attached to.
187	SecondaryRangeName string   `gcfg:"secondary-range-name"`
188	NodeTags           []string `gcfg:"node-tags"`
189	NodeInstancePrefix string   `gcfg:"node-instance-prefix"`
190	Regional           bool     `gcfg:"regional"`
191	Multizone          bool     `gcfg:"multizone"`
192	// APIEndpoint is the GCE compute API endpoint to use. If this is blank,
193	// then the default endpoint is used.
194	APIEndpoint string `gcfg:"api-endpoint"`
195	// ContainerAPIEndpoint is the GCE container API endpoint to use. If this is blank,
196	// then the default endpoint is used.
197	ContainerAPIEndpoint string `gcfg:"container-api-endpoint"`
198	// LocalZone specifies the GCE zone that gce cloud client instance is
199	// located in (i.e. where the controller will be running). If this is
200	// blank, then the local zone will be discovered via the metadata server.
201	LocalZone string `gcfg:"local-zone"`
202	// Default to none.
203	// For example: MyFeatureFlag
204	AlphaFeatures []string `gcfg:"alpha-features"`
205}
206
207// ConfigFile is the struct used to parse the /etc/gce.conf configuration file.
208// NOTE: Cloud config files should follow the same Kubernetes deprecation policy as
209// flags or CLIs. Config fields should not change behavior in incompatible ways and
210// should be deprecated for at least 2 release prior to removing.
211// See https://kubernetes.io/docs/reference/using-api/deprecation-policy/#deprecating-a-flag-or-cli
212// for more details.
213type ConfigFile struct {
214	Global ConfigGlobal `gcfg:"global"`
215}
216
217// CloudConfig includes all the necessary configuration for creating Cloud
218type CloudConfig struct {
219	APIEndpoint          string
220	ContainerAPIEndpoint string
221	ProjectID            string
222	NetworkProjectID     string
223	Region               string
224	Regional             bool
225	Zone                 string
226	ManagedZones         []string
227	NetworkName          string
228	NetworkURL           string
229	SubnetworkName       string
230	SubnetworkURL        string
231	// DEPRECATED: Do not rely on this value as it may be incorrect.
232	SecondaryRangeName string
233	NodeTags           []string
234	NodeInstancePrefix string
235	TokenSource        oauth2.TokenSource
236	UseMetadataServer  bool
237	AlphaFeatureGate   *AlphaFeatureGate
238}
239
240func init() {
241	cloudprovider.RegisterCloudProvider(
242		ProviderName,
243		func(config io.Reader) (cloudprovider.Interface, error) {
244			return newGCECloud(config)
245		})
246}
247
248// Services is the set of all versions of the compute service.
249type Services struct {
250	// GA, Alpha, Beta versions of the compute API.
251	GA    *compute.Service
252	Alpha *computealpha.Service
253	Beta  *computebeta.Service
254}
255
256// ComputeServices returns access to the internal compute services.
257func (g *Cloud) ComputeServices() *Services {
258	return &Services{g.service, g.serviceAlpha, g.serviceBeta}
259}
260
261// Compute returns the generated stubs for the compute API.
262func (g *Cloud) Compute() cloud.Cloud {
263	return g.c
264}
265
266// ContainerService returns the container service.
267func (g *Cloud) ContainerService() *container.Service {
268	return g.containerService
269}
270
271// newGCECloud creates a new instance of Cloud.
272func newGCECloud(config io.Reader) (gceCloud *Cloud, err error) {
273	var cloudConfig *CloudConfig
274	var configFile *ConfigFile
275
276	if config != nil {
277		configFile, err = readConfig(config)
278		if err != nil {
279			return nil, err
280		}
281		klog.Infof("Using GCE provider config %+v", configFile)
282	}
283
284	cloudConfig, err = generateCloudConfig(configFile)
285	if err != nil {
286		return nil, err
287	}
288	return CreateGCECloud(cloudConfig)
289}
290
291func readConfig(reader io.Reader) (*ConfigFile, error) {
292	cfg := &ConfigFile{}
293	if err := gcfg.FatalOnly(gcfg.ReadInto(cfg, reader)); err != nil {
294		klog.Errorf("Couldn't read config: %v", err)
295		return nil, err
296	}
297	return cfg, nil
298}
299
300func generateCloudConfig(configFile *ConfigFile) (cloudConfig *CloudConfig, err error) {
301	cloudConfig = &CloudConfig{}
302	// By default, fetch token from GCE metadata server
303	cloudConfig.TokenSource = google.ComputeTokenSource("")
304	cloudConfig.UseMetadataServer = true
305	cloudConfig.AlphaFeatureGate = NewAlphaFeatureGate([]string{})
306	if configFile != nil {
307		if configFile.Global.APIEndpoint != "" {
308			cloudConfig.APIEndpoint = configFile.Global.APIEndpoint
309		}
310
311		if configFile.Global.ContainerAPIEndpoint != "" {
312			cloudConfig.ContainerAPIEndpoint = configFile.Global.ContainerAPIEndpoint
313		}
314
315		if configFile.Global.TokenURL != "" {
316			// if tokenURL is nil, set tokenSource to nil. This will force the OAuth client to fall
317			// back to use DefaultTokenSource. This allows running gceCloud remotely.
318			if configFile.Global.TokenURL == "nil" {
319				cloudConfig.TokenSource = nil
320			} else {
321				cloudConfig.TokenSource = NewAltTokenSource(configFile.Global.TokenURL, configFile.Global.TokenBody)
322			}
323		}
324
325		cloudConfig.NodeTags = configFile.Global.NodeTags
326		cloudConfig.NodeInstancePrefix = configFile.Global.NodeInstancePrefix
327		cloudConfig.AlphaFeatureGate = NewAlphaFeatureGate(configFile.Global.AlphaFeatures)
328	}
329
330	// retrieve projectID and zone
331	if configFile == nil || configFile.Global.ProjectID == "" || configFile.Global.LocalZone == "" {
332		cloudConfig.ProjectID, cloudConfig.Zone, err = getProjectAndZone()
333		if err != nil {
334			return nil, err
335		}
336	}
337
338	if configFile != nil {
339		if configFile.Global.ProjectID != "" {
340			cloudConfig.ProjectID = configFile.Global.ProjectID
341		}
342		if configFile.Global.LocalZone != "" {
343			cloudConfig.Zone = configFile.Global.LocalZone
344		}
345		if configFile.Global.NetworkProjectID != "" {
346			cloudConfig.NetworkProjectID = configFile.Global.NetworkProjectID
347		}
348	}
349
350	// retrieve region
351	cloudConfig.Region, err = GetGCERegion(cloudConfig.Zone)
352	if err != nil {
353		return nil, err
354	}
355
356	// Determine if its a regional cluster
357	if configFile != nil && configFile.Global.Regional {
358		cloudConfig.Regional = true
359	}
360
361	// generate managedZones
362	cloudConfig.ManagedZones = []string{cloudConfig.Zone}
363	if configFile != nil && (configFile.Global.Multizone || configFile.Global.Regional) {
364		cloudConfig.ManagedZones = nil // Use all zones in region
365	}
366
367	// Determine if network parameter is URL or Name
368	if configFile != nil && configFile.Global.NetworkName != "" {
369		if strings.Contains(configFile.Global.NetworkName, "/") {
370			cloudConfig.NetworkURL = configFile.Global.NetworkName
371		} else {
372			cloudConfig.NetworkName = configFile.Global.NetworkName
373		}
374	} else {
375		cloudConfig.NetworkName, err = getNetworkNameViaMetadata()
376		if err != nil {
377			return nil, err
378		}
379	}
380
381	// Determine if subnetwork parameter is URL or Name
382	// If cluster is on a GCP network of mode=custom, then `SubnetName` must be specified in config file.
383	if configFile != nil && configFile.Global.SubnetworkName != "" {
384		if strings.Contains(configFile.Global.SubnetworkName, "/") {
385			cloudConfig.SubnetworkURL = configFile.Global.SubnetworkName
386		} else {
387			cloudConfig.SubnetworkName = configFile.Global.SubnetworkName
388		}
389	}
390
391	if configFile != nil {
392		cloudConfig.SecondaryRangeName = configFile.Global.SecondaryRangeName
393	}
394
395	return cloudConfig, err
396}
397
398// CreateGCECloud creates a Cloud object using the specified parameters.
399// If no networkUrl is specified, loads networkName via rest call.
400// If no tokenSource is specified, uses oauth2.DefaultTokenSource.
401// If managedZones is nil / empty all zones in the region will be managed.
402func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
403	// Remove any pre-release version and build metadata from the semver,
404	// leaving only the MAJOR.MINOR.PATCH portion. See http://semver.org/.
405	version := strings.TrimLeft(strings.Split(strings.Split(version.Get().GitVersion, "-")[0], "+")[0], "v")
406
407	// Create a user-agent header append string to supply to the Google API
408	// clients, to identify Kubernetes as the origin of the GCP API calls.
409	userAgent := fmt.Sprintf("Kubernetes/%s (%s %s)", version, runtime.GOOS, runtime.GOARCH)
410
411	// Use ProjectID for NetworkProjectID, if it wasn't explicitly set.
412	if config.NetworkProjectID == "" {
413		config.NetworkProjectID = config.ProjectID
414	}
415
416	service, err := compute.NewService(context.Background(), option.WithTokenSource(config.TokenSource))
417	if err != nil {
418		return nil, err
419	}
420	service.UserAgent = userAgent
421
422	serviceBeta, err := computebeta.NewService(context.Background(), option.WithTokenSource(config.TokenSource))
423	if err != nil {
424		return nil, err
425	}
426	serviceBeta.UserAgent = userAgent
427
428	serviceAlpha, err := computealpha.NewService(context.Background(), option.WithTokenSource(config.TokenSource))
429	if err != nil {
430		return nil, err
431	}
432	serviceAlpha.UserAgent = userAgent
433
434	// Expect override api endpoint to always be v1 api and follows the same pattern as prod.
435	// Generate alpha and beta api endpoints based on override v1 api endpoint.
436	// For example,
437	// staging API endpoint: https://www.googleapis.com/compute/staging_v1/
438	if config.APIEndpoint != "" {
439		service.BasePath = fmt.Sprintf("%sprojects/", config.APIEndpoint)
440		serviceBeta.BasePath = fmt.Sprintf("%sprojects/", strings.Replace(config.APIEndpoint, "v1", "beta", -1))
441		serviceAlpha.BasePath = fmt.Sprintf("%sprojects/", strings.Replace(config.APIEndpoint, "v1", "alpha", -1))
442	}
443
444	containerService, err := container.NewService(context.Background(), option.WithTokenSource(config.TokenSource))
445	if err != nil {
446		return nil, err
447	}
448	containerService.UserAgent = userAgent
449	if config.ContainerAPIEndpoint != "" {
450		containerService.BasePath = config.ContainerAPIEndpoint
451	}
452
453	client, err := newOauthClient(config.TokenSource)
454	if err != nil {
455		return nil, err
456	}
457	tpuService, err := newTPUService(client)
458	if err != nil {
459		return nil, err
460	}
461
462	// ProjectID and.NetworkProjectID may be project number or name.
463	projID, netProjID := tryConvertToProjectNames(config.ProjectID, config.NetworkProjectID, service)
464	onXPN := projID != netProjID
465
466	var networkURL string
467	var subnetURL string
468	var isLegacyNetwork bool
469
470	if config.NetworkURL != "" {
471		networkURL = config.NetworkURL
472	} else if config.NetworkName != "" {
473		networkURL = gceNetworkURL(config.APIEndpoint, netProjID, config.NetworkName)
474	} else {
475		// Other consumers may use the cloudprovider without utilizing the wrapped GCE API functions
476		// or functions requiring network/subnetwork URLs (e.g. Kubelet).
477		klog.Warningf("No network name or URL specified.")
478	}
479
480	if config.SubnetworkURL != "" {
481		subnetURL = config.SubnetworkURL
482	} else if config.SubnetworkName != "" {
483		subnetURL = gceSubnetworkURL(config.APIEndpoint, netProjID, config.Region, config.SubnetworkName)
484	}
485	// If neither SubnetworkURL nor SubnetworkName are provided, defer to
486	// lazy initialization. Determining subnetURL and isLegacyNetwork requires
487	// GCE API call. Given that it's not used in many cases and the fact that
488	// the provider is initialized also for Kubelets (and there can be thousands
489	// of them) we defer to lazy initialization here.
490
491	if len(config.ManagedZones) == 0 {
492		config.ManagedZones, err = getZonesForRegion(service, config.ProjectID, config.Region)
493		if err != nil {
494			return nil, err
495		}
496	}
497	if len(config.ManagedZones) > 1 {
498		klog.Infof("managing multiple zones: %v", config.ManagedZones)
499	}
500
501	operationPollRateLimiter := flowcontrol.NewTokenBucketRateLimiter(5, 5) // 5 qps, 5 burst.
502
503	gce := &Cloud{
504		service:                  service,
505		serviceAlpha:             serviceAlpha,
506		serviceBeta:              serviceBeta,
507		containerService:         containerService,
508		tpuService:               tpuService,
509		projectID:                projID,
510		networkProjectID:         netProjID,
511		onXPN:                    onXPN,
512		region:                   config.Region,
513		regional:                 config.Regional,
514		localZone:                config.Zone,
515		managedZones:             config.ManagedZones,
516		networkURL:               networkURL,
517		unsafeIsLegacyNetwork:    isLegacyNetwork,
518		unsafeSubnetworkURL:      subnetURL,
519		secondaryRangeName:       config.SecondaryRangeName,
520		nodeTags:                 config.NodeTags,
521		nodeInstancePrefix:       config.NodeInstancePrefix,
522		useMetadataServer:        config.UseMetadataServer,
523		operationPollRateLimiter: operationPollRateLimiter,
524		AlphaFeatureGate:         config.AlphaFeatureGate,
525		nodeZones:                map[string]sets.String{},
526		metricsCollector:         newLoadBalancerMetrics(),
527	}
528
529	gce.manager = &gceServiceManager{gce}
530	gce.s = &cloud.Service{
531		GA:            service,
532		Alpha:         serviceAlpha,
533		Beta:          serviceBeta,
534		ProjectRouter: &gceProjectRouter{gce},
535		RateLimiter:   &gceRateLimiter{gce},
536	}
537	gce.c = cloud.NewGCE(gce.s)
538
539	return gce, nil
540}
541
542// initializeNetworkConfig() is supposed to be called under sync.Once()
543// for accessors to subnetworkURL and isLegacyNetwork fields.
544func (g *Cloud) initializeSubnetworkURLAndIsLegacyNetwork() {
545	if g.unsafeSubnetworkURL != "" {
546		// This has already been initialized via the config.
547		return
548	}
549
550	var subnetURL string
551	var isLegacyNetwork bool
552
553	// Determine the type of network and attempt to discover the correct subnet for AUTO mode.
554	// Gracefully fail because kubelet calls CreateGCECloud without any config, and minions
555	// lack the proper credentials for API calls.
556	if networkName := lastComponent(g.NetworkURL()); networkName != "" {
557		if n, err := getNetwork(g.service, g.NetworkProjectID(), networkName); err != nil {
558			klog.Warningf("Could not retrieve network %q; err: %v", networkName, err)
559		} else {
560			switch typeOfNetwork(n) {
561			case netTypeLegacy:
562				klog.Infof("Network %q is type legacy - no subnetwork", networkName)
563				isLegacyNetwork = true
564			case netTypeCustom:
565				klog.Warningf("Network %q is type custom - cannot auto select a subnetwork", networkName)
566			case netTypeAuto:
567				subnetURL, err = determineSubnetURL(g.service, g.NetworkProjectID(), networkName, g.Region())
568				if err != nil {
569					klog.Warningf("Could not determine subnetwork for network %q and region %v; err: %v", networkName, g.Region(), err)
570				} else {
571					klog.Infof("Auto selecting subnetwork %q", subnetURL)
572				}
573			}
574		}
575	}
576
577	g.unsafeSubnetworkURL = subnetURL
578	g.unsafeIsLegacyNetwork = isLegacyNetwork
579}
580
581// SetRateLimiter adds a custom cloud.RateLimiter implementation.
582// WARNING: Calling this could have unexpected behavior if you have in-flight
583// requests. It is best to use this immediately after creating a Cloud.
584func (g *Cloud) SetRateLimiter(rl cloud.RateLimiter) {
585	if rl != nil {
586		g.s.RateLimiter = rl
587	}
588}
589
590// determineSubnetURL queries for all subnetworks in a region for a given network and returns
591// the URL of the subnetwork which exists in the auto-subnet range.
592func determineSubnetURL(service *compute.Service, networkProjectID, networkName, region string) (string, error) {
593	subnets, err := listSubnetworksOfNetwork(service, networkProjectID, networkName, region)
594	if err != nil {
595		return "", err
596	}
597
598	autoSubnets, err := subnetsInCIDR(subnets, autoSubnetIPRange)
599	if err != nil {
600		return "", err
601	}
602
603	if len(autoSubnets) == 0 {
604		return "", fmt.Errorf("no subnet exists in auto CIDR")
605	}
606
607	if len(autoSubnets) > 1 {
608		return "", fmt.Errorf("multiple subnetworks in the same region exist in auto CIDR")
609	}
610
611	return autoSubnets[0].SelfLink, nil
612}
613
614func tryConvertToProjectNames(configProject, configNetworkProject string, service *compute.Service) (projID, netProjID string) {
615	projID = configProject
616	if isProjectNumber(projID) {
617		projName, err := getProjectID(service, projID)
618		if err != nil {
619			klog.Warningf("Failed to retrieve project %v while trying to retrieve its name. err %v", projID, err)
620		} else {
621			projID = projName
622		}
623	}
624
625	netProjID = projID
626	if configNetworkProject != configProject {
627		netProjID = configNetworkProject
628	}
629	if isProjectNumber(netProjID) {
630		netProjName, err := getProjectID(service, netProjID)
631		if err != nil {
632			klog.Warningf("Failed to retrieve network project %v while trying to retrieve its name. err %v", netProjID, err)
633		} else {
634			netProjID = netProjName
635		}
636	}
637
638	return projID, netProjID
639}
640
641// Initialize takes in a clientBuilder and spawns a goroutine for watching the clusterid configmap.
642// This must be called before utilizing the funcs of gce.ClusterID
643func (g *Cloud) Initialize(clientBuilder cloudprovider.ControllerClientBuilder, stop <-chan struct{}) {
644	g.clientBuilder = clientBuilder
645	g.client = clientBuilder.ClientOrDie("cloud-provider")
646
647	g.eventBroadcaster = record.NewBroadcaster()
648	g.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: g.client.CoreV1().Events("")})
649	g.eventRecorder = g.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "g-cloudprovider"})
650
651	go g.watchClusterID(stop)
652	go g.metricsCollector.Run(stop)
653}
654
655// LoadBalancer returns an implementation of LoadBalancer for Google Compute Engine.
656func (g *Cloud) LoadBalancer() (cloudprovider.LoadBalancer, bool) {
657	return g, true
658}
659
660// Instances returns an implementation of Instances for Google Compute Engine.
661func (g *Cloud) Instances() (cloudprovider.Instances, bool) {
662	return g, true
663}
664
665// InstancesV2 returns an implementation of InstancesV2 for Google Compute Engine.
666// Implement ONLY for external cloud provider
667func (g *Cloud) InstancesV2() (cloudprovider.InstancesV2, bool) {
668	return g, true
669}
670
671// Zones returns an implementation of Zones for Google Compute Engine.
672func (g *Cloud) Zones() (cloudprovider.Zones, bool) {
673	return g, true
674}
675
676// Clusters returns an implementation of Clusters for Google Compute Engine.
677func (g *Cloud) Clusters() (cloudprovider.Clusters, bool) {
678	return g, true
679}
680
681// Routes returns an implementation of Routes for Google Compute Engine.
682func (g *Cloud) Routes() (cloudprovider.Routes, bool) {
683	return g, true
684}
685
686// ProviderName returns the cloud provider ID.
687func (g *Cloud) ProviderName() string {
688	return ProviderName
689}
690
691// ProjectID returns the ProjectID corresponding to the project this cloud is in.
692func (g *Cloud) ProjectID() string {
693	return g.projectID
694}
695
696// NetworkProjectID returns the ProjectID corresponding to the project this cluster's network is in.
697func (g *Cloud) NetworkProjectID() string {
698	return g.networkProjectID
699}
700
701// Region returns the region
702func (g *Cloud) Region() string {
703	return g.region
704}
705
706// OnXPN returns true if the cluster is running on a cross project network (XPN)
707func (g *Cloud) OnXPN() bool {
708	return g.onXPN
709}
710
711// NetworkURL returns the network url
712func (g *Cloud) NetworkURL() string {
713	return g.networkURL
714}
715
716// SubnetworkURL returns the subnetwork url
717func (g *Cloud) SubnetworkURL() string {
718	g.subnetworkURLAndIsLegacyNetworkInitializer.Do(g.initializeSubnetworkURLAndIsLegacyNetwork)
719	return g.unsafeSubnetworkURL
720}
721
722// IsLegacyNetwork returns true if the cluster is still running a legacy network configuration.
723func (g *Cloud) IsLegacyNetwork() bool {
724	g.subnetworkURLAndIsLegacyNetworkInitializer.Do(g.initializeSubnetworkURLAndIsLegacyNetwork)
725	return g.unsafeIsLegacyNetwork
726}
727
728// SetInformers sets up the zone handlers we need watching for node changes.
729func (g *Cloud) SetInformers(informerFactory informers.SharedInformerFactory) {
730	klog.Infof("Setting up informers for Cloud")
731	nodeInformer := informerFactory.Core().V1().Nodes().Informer()
732	nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
733		AddFunc: func(obj interface{}) {
734			node := obj.(*v1.Node)
735			g.updateNodeZones(nil, node)
736		},
737		UpdateFunc: func(prev, obj interface{}) {
738			prevNode := prev.(*v1.Node)
739			newNode := obj.(*v1.Node)
740			if newNode.Labels[v1.LabelFailureDomainBetaZone] ==
741				prevNode.Labels[v1.LabelFailureDomainBetaZone] {
742				return
743			}
744			g.updateNodeZones(prevNode, newNode)
745		},
746		DeleteFunc: func(obj interface{}) {
747			node, isNode := obj.(*v1.Node)
748			// We can get DeletedFinalStateUnknown instead of *v1.Node here
749			// and we need to handle that correctly.
750			if !isNode {
751				deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
752				if !ok {
753					klog.Errorf("Received unexpected object: %v", obj)
754					return
755				}
756				node, ok = deletedState.Obj.(*v1.Node)
757				if !ok {
758					klog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj)
759					return
760				}
761			}
762			g.updateNodeZones(node, nil)
763		},
764	})
765	g.nodeInformerSynced = nodeInformer.HasSynced
766}
767
768func (g *Cloud) updateNodeZones(prevNode, newNode *v1.Node) {
769	g.nodeZonesLock.Lock()
770	defer g.nodeZonesLock.Unlock()
771	if prevNode != nil {
772		prevZone, ok := prevNode.ObjectMeta.Labels[v1.LabelFailureDomainBetaZone]
773		if ok {
774			g.nodeZones[prevZone].Delete(prevNode.ObjectMeta.Name)
775			if g.nodeZones[prevZone].Len() == 0 {
776				g.nodeZones[prevZone] = nil
777			}
778		}
779	}
780	if newNode != nil {
781		newZone, ok := newNode.ObjectMeta.Labels[v1.LabelFailureDomainBetaZone]
782		if ok {
783			if g.nodeZones[newZone] == nil {
784				g.nodeZones[newZone] = sets.NewString()
785			}
786			g.nodeZones[newZone].Insert(newNode.ObjectMeta.Name)
787		}
788	}
789}
790
791// HasClusterID returns true if the cluster has a clusterID
792func (g *Cloud) HasClusterID() bool {
793	return true
794}
795
796// Project IDs cannot have a digit for the first characeter. If the id contains a digit,
797// then it must be a project number.
798func isProjectNumber(idOrNumber string) bool {
799	_, err := strconv.ParseUint(idOrNumber, 10, 64)
800	return err == nil
801}
802
803func gceNetworkURL(apiEndpoint, project, network string) string {
804	if apiEndpoint == "" {
805		apiEndpoint = gceComputeAPIEndpoint
806	}
807	return apiEndpoint + strings.Join([]string{"projects", project, "global", "networks", network}, "/")
808}
809
810func gceSubnetworkURL(apiEndpoint, project, region, subnetwork string) string {
811	if apiEndpoint == "" {
812		apiEndpoint = gceComputeAPIEndpoint
813	}
814	return apiEndpoint + strings.Join([]string{"projects", project, "regions", region, "subnetworks", subnetwork}, "/")
815}
816
817// getRegionInURL parses full resource URLS and shorter URLS
818// https://www.googleapis.com/compute/v1/projects/myproject/regions/us-central1/subnetworks/a
819// projects/myproject/regions/us-central1/subnetworks/a
820// All return "us-central1"
821func getRegionInURL(urlStr string) string {
822	fields := strings.Split(urlStr, "/")
823	for i, v := range fields {
824		if v == "regions" && i < len(fields)-1 {
825			return fields[i+1]
826		}
827	}
828	return ""
829}
830
831func getNetworkNameViaMetadata() (string, error) {
832	result, err := metadata.Get("instance/network-interfaces/0/network")
833	if err != nil {
834		return "", err
835	}
836	parts := strings.Split(result, "/")
837	if len(parts) != 4 {
838		return "", fmt.Errorf("unexpected response: %s", result)
839	}
840	return parts[3], nil
841}
842
843// getNetwork returns a GCP network
844func getNetwork(svc *compute.Service, networkProjectID, networkID string) (*compute.Network, error) {
845	return svc.Networks.Get(networkProjectID, networkID).Do()
846}
847
848// listSubnetworksOfNetwork returns a list of subnetworks for a particular region of a network.
849func listSubnetworksOfNetwork(svc *compute.Service, networkProjectID, networkID, region string) ([]*compute.Subnetwork, error) {
850	var subnets []*compute.Subnetwork
851	err := svc.Subnetworks.List(networkProjectID, region).Filter(fmt.Sprintf("network eq .*/%v$", networkID)).Pages(context.Background(), func(res *compute.SubnetworkList) error {
852		subnets = append(subnets, res.Items...)
853		return nil
854	})
855	return subnets, err
856}
857
858// getProjectID returns the project's string ID given a project number or string
859func getProjectID(svc *compute.Service, projectNumberOrID string) (string, error) {
860	proj, err := svc.Projects.Get(projectNumberOrID).Do()
861	if err != nil {
862		return "", err
863	}
864
865	return proj.Name, nil
866}
867
868func getZonesForRegion(svc *compute.Service, projectID, region string) ([]string, error) {
869	// TODO: use PageToken to list all not just the first 500
870	listCall := svc.Zones.List(projectID)
871
872	// Filtering by region doesn't seem to work
873	// (tested in https://cloud.google.com/compute/docs/reference/latest/zones/list)
874	// listCall = listCall.Filter("region eq " + region)
875
876	var zones []string
877	var accumulator = func(response *compute.ZoneList) error {
878		for _, zone := range response.Items {
879			regionName := lastComponent(zone.Region)
880			if regionName == region {
881				zones = append(zones, zone.Name)
882			}
883		}
884		return nil
885	}
886	err := listCall.Pages(context.TODO(), accumulator)
887	if err != nil {
888		return nil, fmt.Errorf("unexpected response listing zones: %v", err)
889	}
890	return zones, nil
891}
892
893func findSubnetForRegion(subnetURLs []string, region string) string {
894	for _, url := range subnetURLs {
895		if thisRegion := getRegionInURL(url); thisRegion == region {
896			return url
897		}
898	}
899	return ""
900}
901
902func newOauthClient(tokenSource oauth2.TokenSource) (*http.Client, error) {
903	if tokenSource == nil {
904		var err error
905		tokenSource, err = google.DefaultTokenSource(
906			context.Background(),
907			compute.CloudPlatformScope,
908			compute.ComputeScope)
909		klog.Infof("Using DefaultTokenSource %#v", tokenSource)
910		if err != nil {
911			return nil, err
912		}
913	} else {
914		klog.Infof("Using existing Token Source %#v", tokenSource)
915	}
916
917	backoff := wait.Backoff{
918		// These values will add up to about a minute. See #56293 for background.
919		Duration: time.Second,
920		Factor:   1.4,
921		Steps:    10,
922	}
923	if err := wait.ExponentialBackoff(backoff, func() (bool, error) {
924		if _, err := tokenSource.Token(); err != nil {
925			klog.Errorf("error fetching initial token: %v", err)
926			return false, nil
927		}
928		return true, nil
929	}); err != nil {
930		return nil, err
931	}
932
933	return oauth2.NewClient(context.Background(), tokenSource), nil
934}
935
936func (manager *gceServiceManager) getProjectsAPIEndpoint() string {
937	projectsAPIEndpoint := gceComputeAPIEndpoint + "projects/"
938	if manager.gce.service != nil {
939		projectsAPIEndpoint = manager.gce.service.BasePath
940	}
941
942	return projectsAPIEndpoint
943}
944