1// Copyright 2015 The Prometheus Authors 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13 14package azure 15 16import ( 17 "fmt" 18 "net" 19 "strings" 20 "time" 21 22 "github.com/Azure/azure-sdk-for-go/arm/compute" 23 "github.com/Azure/azure-sdk-for-go/arm/network" 24 "github.com/Azure/go-autorest/autorest/azure" 25 26 "github.com/prometheus/client_golang/prometheus" 27 "github.com/prometheus/common/log" 28 "github.com/prometheus/common/model" 29 "golang.org/x/net/context" 30 31 "github.com/prometheus/prometheus/config" 32 "github.com/prometheus/prometheus/util/strutil" 33) 34 35const ( 36 azureLabel = model.MetaLabelPrefix + "azure_" 37 azureLabelMachineID = azureLabel + "machine_id" 38 azureLabelMachineResourceGroup = azureLabel + "machine_resource_group" 39 azureLabelMachineName = azureLabel + "machine_name" 40 azureLabelMachineLocation = azureLabel + "machine_location" 41 azureLabelMachinePrivateIP = azureLabel + "machine_private_ip" 42 azureLabelMachineTag = azureLabel + "machine_tag_" 43) 44 45var ( 46 azureSDRefreshFailuresCount = prometheus.NewCounter( 47 prometheus.CounterOpts{ 48 Name: "prometheus_sd_azure_refresh_failures_total", 49 Help: "Number of Azure-SD refresh failures.", 50 }) 51 azureSDRefreshDuration = prometheus.NewSummary( 52 prometheus.SummaryOpts{ 53 Name: "prometheus_sd_azure_refresh_duration_seconds", 54 Help: "The duration of a Azure-SD refresh in seconds.", 55 }) 56) 57 58func init() { 59 prometheus.MustRegister(azureSDRefreshDuration) 60 prometheus.MustRegister(azureSDRefreshFailuresCount) 61} 62 63// Discovery periodically performs Azure-SD requests. It implements 64// the TargetProvider interface. 65type Discovery struct { 66 cfg *config.AzureSDConfig 67 interval time.Duration 68 port int 69 logger log.Logger 70} 71 72// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets. 73func NewDiscovery(cfg *config.AzureSDConfig, logger log.Logger) *Discovery { 74 return &Discovery{ 75 cfg: cfg, 76 interval: time.Duration(cfg.RefreshInterval), 77 port: cfg.Port, 78 logger: logger, 79 } 80} 81 82// Run implements the TargetProvider interface. 83func (d *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { 84 ticker := time.NewTicker(d.interval) 85 defer ticker.Stop() 86 87 for { 88 select { 89 case <-ctx.Done(): 90 return 91 default: 92 } 93 94 tg, err := d.refresh() 95 if err != nil { 96 d.logger.Errorf("unable to refresh during Azure discovery: %s", err) 97 } else { 98 select { 99 case <-ctx.Done(): 100 case ch <- []*config.TargetGroup{tg}: 101 } 102 } 103 104 select { 105 case <-ticker.C: 106 case <-ctx.Done(): 107 return 108 } 109 } 110} 111 112// azureClient represents multiple Azure Resource Manager providers. 113type azureClient struct { 114 nic network.InterfacesClient 115 vm compute.VirtualMachinesClient 116} 117 118// createAzureClient is a helper function for creating an Azure compute client to ARM. 119func createAzureClient(cfg config.AzureSDConfig) (azureClient, error) { 120 var c azureClient 121 oauthConfig, err := azure.PublicCloud.OAuthConfigForTenant(cfg.TenantID) 122 if err != nil { 123 return azureClient{}, err 124 } 125 spt, err := azure.NewServicePrincipalToken(*oauthConfig, cfg.ClientID, string(cfg.ClientSecret), azure.PublicCloud.ResourceManagerEndpoint) 126 if err != nil { 127 return azureClient{}, err 128 } 129 130 c.vm = compute.NewVirtualMachinesClient(cfg.SubscriptionID) 131 c.vm.Authorizer = spt 132 133 c.nic = network.NewInterfacesClient(cfg.SubscriptionID) 134 c.nic.Authorizer = spt 135 136 return c, nil 137} 138 139// azureResource represents a resource identifier in Azure. 140type azureResource struct { 141 Name string 142 ResourceGroup string 143} 144 145// Create a new azureResource object from an ID string. 146func newAzureResourceFromID(id string, logger log.Logger) (azureResource, error) { 147 // Resource IDs have the following format. 148 // /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME 149 s := strings.Split(id, "/") 150 if len(s) != 9 { 151 err := fmt.Errorf("invalid ID '%s'. Refusing to create azureResource", id) 152 logger.Error(err) 153 return azureResource{}, err 154 } 155 return azureResource{ 156 Name: strings.ToLower(s[8]), 157 ResourceGroup: strings.ToLower(s[4]), 158 }, nil 159} 160 161func (d *Discovery) refresh() (tg *config.TargetGroup, err error) { 162 t0 := time.Now() 163 defer func() { 164 azureSDRefreshDuration.Observe(time.Since(t0).Seconds()) 165 if err != nil { 166 azureSDRefreshFailuresCount.Inc() 167 } 168 }() 169 tg = &config.TargetGroup{} 170 client, err := createAzureClient(*d.cfg) 171 if err != nil { 172 return tg, fmt.Errorf("could not create Azure client: %s", err) 173 } 174 175 var machines []compute.VirtualMachine 176 result, err := client.vm.ListAll() 177 if err != nil { 178 return tg, fmt.Errorf("could not list virtual machines: %s", err) 179 } 180 machines = append(machines, *result.Value...) 181 182 // If we still have results, keep going until we have no more. 183 for result.NextLink != nil { 184 result, err = client.vm.ListAllNextResults(result) 185 if err != nil { 186 return tg, fmt.Errorf("could not list virtual machines: %s", err) 187 } 188 machines = append(machines, *result.Value...) 189 } 190 d.logger.Debugf("Found %d virtual machines during Azure discovery.", len(machines)) 191 192 // We have the slice of machines. Now turn them into targets. 193 // Doing them in go routines because the network interface calls are slow. 194 type target struct { 195 labelSet model.LabelSet 196 err error 197 } 198 199 ch := make(chan target, len(machines)) 200 for i, vm := range machines { 201 go func(i int, vm compute.VirtualMachine) { 202 r, err := newAzureResourceFromID(*vm.ID, d.logger) 203 if err != nil { 204 ch <- target{labelSet: nil, err: err} 205 return 206 } 207 208 labels := model.LabelSet{ 209 azureLabelMachineID: model.LabelValue(*vm.ID), 210 azureLabelMachineName: model.LabelValue(*vm.Name), 211 azureLabelMachineLocation: model.LabelValue(*vm.Location), 212 azureLabelMachineResourceGroup: model.LabelValue(r.ResourceGroup), 213 } 214 215 if vm.Tags != nil { 216 for k, v := range *vm.Tags { 217 name := strutil.SanitizeLabelName(k) 218 labels[azureLabelMachineTag+model.LabelName(name)] = model.LabelValue(*v) 219 } 220 } 221 222 // Get the IP address information via separate call to the network provider. 223 for _, nic := range *vm.Properties.NetworkProfile.NetworkInterfaces { 224 r, err := newAzureResourceFromID(*nic.ID, d.logger) 225 if err != nil { 226 ch <- target{labelSet: nil, err: err} 227 return 228 } 229 networkInterface, err := client.nic.Get(r.ResourceGroup, r.Name, "") 230 if err != nil { 231 d.logger.Errorf("Unable to get network interface %s: %s", r.Name, err) 232 ch <- target{labelSet: nil, err: err} 233 // Get out of this routine because we cannot continue without a network interface. 234 return 235 } 236 237 // Unfortunately Azure does not return information on whether a VM is deallocated. 238 // This information is available via another API call however the Go SDK does not 239 // yet support this. On deallocated machines, this value happens to be nil so it 240 // is a cheap and easy way to determine if a machine is allocated or not. 241 if networkInterface.Properties.Primary == nil { 242 d.logger.Debugf("Virtual machine %s is deallocated. Skipping during Azure SD.", *vm.Name) 243 ch <- target{} 244 return 245 } 246 247 if *networkInterface.Properties.Primary { 248 for _, ip := range *networkInterface.Properties.IPConfigurations { 249 if ip.Properties.PrivateIPAddress != nil { 250 labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress) 251 address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, fmt.Sprintf("%d", d.port)) 252 labels[model.AddressLabel] = model.LabelValue(address) 253 ch <- target{labelSet: labels, err: nil} 254 return 255 } 256 // If we made it here, we don't have a private IP which should be impossible. 257 // Return an empty target and error to ensure an all or nothing situation. 258 err = fmt.Errorf("unable to find a private IP for VM %s", *vm.Name) 259 ch <- target{labelSet: nil, err: err} 260 return 261 } 262 } 263 } 264 }(i, vm) 265 } 266 267 for range machines { 268 tgt := <-ch 269 if tgt.err != nil { 270 return nil, fmt.Errorf("unable to complete Azure service discovery: %s", err) 271 } 272 if tgt.labelSet != nil { 273 tg.Targets = append(tg.Targets, tgt.labelSet) 274 } 275 } 276 277 d.logger.Debugf("Azure discovery completed.") 278 return tg, nil 279} 280