1# Windows Azure Linux Agent 2# 3# Copyright 2016 Microsoft Corporation 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18""" 19Handle packages and modules to enable RDMA for IB networking 20""" 21 22import os 23import re 24import time 25 26import azurelinuxagent.common.conf as conf 27import azurelinuxagent.common.logger as logger 28import azurelinuxagent.common.utils.fileutil as fileutil 29import azurelinuxagent.common.utils.shellutil as shellutil 30from azurelinuxagent.common.utils.textutil import parse_doc, find, getattrib 31 32dapl_config_paths = [ 33 '/etc/dat.conf', 34 '/etc/rdma/dat.conf', 35 '/usr/local/etc/dat.conf' 36] 37 38 39def setup_rdma_device(nd_version, shared_conf): 40 logger.verbose("Parsing SharedConfig XML contents for RDMA details") 41 xml_doc = parse_doc(shared_conf.xml_text) 42 if xml_doc is None: 43 logger.error("Could not parse SharedConfig XML document") 44 return 45 instance_elem = find(xml_doc, "Instance") 46 if not instance_elem: 47 logger.error("Could not find <Instance> in SharedConfig document") 48 return 49 50 rdma_ipv4_addr = getattrib(instance_elem, "rdmaIPv4Address") 51 if not rdma_ipv4_addr: 52 logger.error( 53 "Could not find rdmaIPv4Address attribute on Instance element of SharedConfig.xml document") 54 return 55 56 rdma_mac_addr = getattrib(instance_elem, "rdmaMacAddress") 57 if not rdma_mac_addr: 58 logger.error( 59 "Could not find rdmaMacAddress attribute on Instance element of SharedConfig.xml document") 60 return 61 62 # add colons to the MAC address (e.g. 00155D33FF1D -> 63 # 00:15:5D:33:FF:1D) 64 rdma_mac_addr = ':'.join([rdma_mac_addr[i:i + 2] 65 for i in range(0, len(rdma_mac_addr), 2)]) 66 logger.info("Found RDMA details. IPv4={0} MAC={1}".format( 67 rdma_ipv4_addr, rdma_mac_addr)) 68 69 # Set up the RDMA device with collected informatino 70 RDMADeviceHandler(rdma_ipv4_addr, rdma_mac_addr, nd_version).start() 71 logger.info("RDMA: device is set up") 72 return 73 74 75class RDMAHandler(object): 76 driver_module_name = 'hv_network_direct' 77 nd_version = None 78 79 def get_rdma_version(self): # pylint: disable=R1710 80 """Retrieve the firmware version information from the system. 81 This depends on information provided by the Linux kernel.""" 82 83 if self.nd_version: 84 return self.nd_version 85 86 kvp_key_size = 512 87 kvp_value_size = 2048 88 driver_info_source = '/var/lib/hyperv/.kvp_pool_0' 89 base_kernel_err_msg = 'Kernel does not provide the necessary ' 90 base_kernel_err_msg += 'information or the kvp daemon is not running.' 91 if not os.path.isfile(driver_info_source): 92 error_msg = 'RDMA: Source file "%s" does not exist. ' 93 error_msg += base_kernel_err_msg 94 logger.error(error_msg % driver_info_source) 95 return 96 97 with open(driver_info_source, "rb") as pool_file: 98 while True: 99 key = pool_file.read(kvp_key_size) 100 value = pool_file.read(kvp_value_size) 101 if key and value: 102 key_0 = key.partition(b"\x00")[0] 103 if key_0: 104 key_0 = key_0.decode() 105 value_0 = value.partition(b"\x00")[0] 106 if value_0: 107 value_0 = value_0.decode() 108 if key_0 == "NdDriverVersion": 109 self.nd_version = value_0 110 return self.nd_version 111 else: 112 break 113 114 error_msg = 'RDMA: NdDriverVersion not found in "%s"' 115 logger.error(error_msg % driver_info_source) 116 return 117 118 @staticmethod 119 def is_kvp_daemon_running(): 120 """Look for kvp daemon names in ps -ef output and return True/False 121 """ 122 # for centos, the hypervkvpd and the hv_kvp_daemon both are ok. 123 # for suse, it uses hv_kvp_daemon 124 kvp_daemon_names = ['hypervkvpd', 'hv_kvp_daemon'] 125 126 exitcode, ps_out = shellutil.run_get_output("ps -ef") 127 if exitcode != 0: 128 raise Exception('RDMA: ps -ef failed: %s' % ps_out) 129 for n in kvp_daemon_names: 130 if n in ps_out: 131 logger.info('RDMA: kvp daemon (%s) is running' % n) 132 return True 133 else: 134 logger.verbose('RDMA: kvp daemon (%s) is not running' % n) 135 return False 136 137 def load_driver_module(self): 138 """Load the kernel driver, this depends on the proper driver 139 to be installed with the install_driver() method""" 140 logger.info("RDMA: probing module '%s'" % self.driver_module_name) 141 result = shellutil.run('modprobe --first-time %s' % self.driver_module_name) 142 if result != 0: 143 error_msg = 'Could not load "%s" kernel module. ' 144 error_msg += 'Run "modprobe --first-time %s" as root for more details' 145 logger.error( 146 error_msg % (self.driver_module_name, self.driver_module_name) 147 ) 148 return False 149 logger.info('RDMA: Loaded the kernel driver successfully.') 150 return True 151 152 def install_driver_if_needed(self): 153 if self.nd_version: 154 if conf.enable_check_rdma_driver(): 155 self.install_driver() 156 else: 157 logger.info('RDMA: check RDMA driver is disabled, skip installing driver') 158 else: 159 logger.info('RDMA: skip installing driver when ndversion not present\n') 160 161 def install_driver(self): 162 """Install the driver. This is distribution specific and must 163 be overwritten in the child implementation.""" 164 logger.error('RDMAHandler.install_driver not implemented') 165 166 def is_driver_loaded(self): 167 """Check if the network module is loaded in kernel space""" 168 cmd = 'lsmod | grep ^%s' % self.driver_module_name 169 status, loaded_modules = shellutil.run_get_output(cmd) # pylint: disable=W0612 170 logger.info('RDMA: Checking if the module loaded.') 171 if loaded_modules: 172 logger.info('RDMA: module loaded.') 173 return True 174 logger.info('RDMA: module not loaded.') 175 return False 176 177 def reboot_system(self): 178 """Reboot the system. This is required as the kernel module for 179 the rdma driver cannot be unloaded with rmmod""" 180 logger.info('RDMA: Rebooting system.') 181 ret = shellutil.run('shutdown -r now') 182 if ret != 0: 183 logger.error('RDMA: Failed to reboot the system') 184 185 186dapl_config_paths = [ 187 '/etc/dat.conf', '/etc/rdma/dat.conf', '/usr/local/etc/dat.conf'] 188 189 190class RDMADeviceHandler(object): 191 """ 192 Responsible for writing RDMA IP and MAC address to the /dev/hvnd_rdma 193 interface. 194 """ 195 196 rdma_dev = '/dev/hvnd_rdma' 197 sriov_dir = '/sys/class/infiniband' 198 device_check_timeout_sec = 120 199 device_check_interval_sec = 1 200 ipoib_check_timeout_sec = 60 201 ipoib_check_interval_sec = 1 202 203 ipv4_addr = None 204 mac_addr = None 205 nd_version = None 206 207 def __init__(self, ipv4_addr, mac_addr, nd_version): 208 self.ipv4_addr = ipv4_addr 209 self.mac_addr = mac_addr 210 self.nd_version = nd_version 211 212 def start(self): 213 logger.info("RDMA: starting device processing.") 214 self.process() 215 logger.info("RDMA: completed device processing.") 216 217 def process(self): 218 try: 219 if not self.nd_version: 220 logger.info("RDMA: provisioning SRIOV RDMA device.") 221 self.provision_sriov_rdma() 222 else: 223 logger.info("RDMA: provisioning Network Direct RDMA device.") 224 self.provision_network_direct_rdma() 225 except Exception as e: 226 logger.error("RDMA: device processing failed: {0}".format(e)) 227 228 def provision_network_direct_rdma(self): 229 RDMADeviceHandler.update_dat_conf(dapl_config_paths, self.ipv4_addr) 230 231 if not conf.enable_check_rdma_driver(): 232 logger.info("RDMA: skip checking RDMA driver version") 233 RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr) 234 return 235 236 skip_rdma_device = False 237 module_name = "hv_network_direct" 238 retcode, out = shellutil.run_get_output("modprobe -R %s" % module_name, chk_err=False) 239 if retcode == 0: 240 module_name = out.strip() 241 else: 242 logger.info("RDMA: failed to resolve module name. Use original name") 243 retcode, out = shellutil.run_get_output("modprobe %s" % module_name) 244 if retcode != 0: 245 logger.error("RDMA: failed to load module %s" % module_name) 246 return 247 retcode, out = shellutil.run_get_output("modinfo %s" % module_name) 248 if retcode == 0: 249 version = re.search("version:\s+(\d+)\.(\d+)\.(\d+)\D", out, re.IGNORECASE) # pylint: disable=W1401 250 if version: 251 v1 = int(version.groups(0)[0]) 252 v2 = int(version.groups(0)[1]) 253 if v1 > 4 or v1 == 4 and v2 > 0: 254 logger.info("Skip setting /dev/hvnd_rdma on 4.1 or later") 255 skip_rdma_device = True 256 else: 257 logger.info("RDMA: hv_network_direct driver version not present, assuming 4.0.x or older.") 258 else: 259 logger.warn("RDMA: failed to get module info on hv_network_direct.") 260 261 if not skip_rdma_device: 262 RDMADeviceHandler.wait_rdma_device( 263 self.rdma_dev, self.device_check_timeout_sec, self.device_check_interval_sec) 264 RDMADeviceHandler.write_rdma_config_to_device( 265 self.rdma_dev, self.ipv4_addr, self.mac_addr) 266 267 RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr) 268 269 def provision_sriov_rdma(self): 270 271 (key, value) = self.read_ipoib_data() 272 if key: 273 # provision multiple IP over IB addresses 274 logger.info("RDMA: provisioning multiple IP over IB addresses") 275 self.provision_sriov_multiple_ib(value) 276 elif self.ipv4_addr: 277 logger.info("RDMA: provisioning single IP over IB address") 278 # provision a single IP over IB address 279 RDMADeviceHandler.wait_any_rdma_device(self.sriov_dir, 280 self.device_check_timeout_sec, self.device_check_interval_sec) 281 RDMADeviceHandler.update_iboip_interface(self.ipv4_addr, 282 self.ipoib_check_timeout_sec, self.ipoib_check_interval_sec) 283 else: 284 logger.info("RDMA: missing IP address") 285 286 def read_ipoib_data(self) : 287 288 # read from KVP pool 0 to figure out the IP over IB addresses 289 kvp_key_size = 512 290 kvp_value_size = 2048 291 driver_info_source = '/var/lib/hyperv/.kvp_pool_0' 292 293 if not os.path.isfile(driver_info_source): 294 logger.error("RDMA: can't read KVP pool 0") 295 return (None, None) 296 297 key_0 = None 298 value_0 = None 299 with open(driver_info_source, "rb") as pool_file: 300 while True: 301 key = pool_file.read(kvp_key_size) 302 value = pool_file.read(kvp_value_size) 303 if key and value: 304 key_0 = key.partition(b"\x00")[0] 305 if key_0 : 306 key_0 = key_0.decode() 307 if key_0 == "IPoIB_Data": 308 value_0 = value.partition(b"\x00")[0] 309 if value_0 : 310 value_0 = value_0.decode() 311 break 312 else: 313 break 314 315 if key_0 == "IPoIB_Data": 316 return (key_0, value_0) 317 318 return (None, None) 319 320 def provision_sriov_multiple_ib(self, value) : 321 322 mac_ip_array = [] 323 324 values = value.split("|") 325 num_ips = len(values) - 1 326 # values[0] tells how many IPs. Format - NUMPAIRS:<number> 327 match = re.match(r"NUMPAIRS:(\d+)", values[0]) 328 if match: 329 num = int(match.groups(0)[0]) 330 if num != num_ips: 331 logger.error("RDMA: multiple IPs reported num={0} actual number of IPs={1}".format(num, num_ips)) 332 return 333 else: 334 logger.error("RDMA: failed to find number of IP addresses in {0}".format(values[0])) 335 return 336 337 for i in range(1, num_ips+1): 338 # each MAC/IP entry is of format <MAC>:<IP> 339 match = re.match(r"([^:]+):(\d+\.\d+\.\d+\.\d+)", values[i]) 340 if match: 341 mac_addr = match.groups(0)[0] 342 ipv4_addr = match.groups(0)[1] 343 mac_ip_array.append((mac_addr, ipv4_addr)) 344 else: 345 logger.error("RDMA: failed to find MAC/IP address in {0}".format(values[i])) 346 return 347 348 # try to assign all MAC/IP addresses to IB interfaces 349 # retry for up to 60 times, with 1 seconds delay between each 350 retry = 60 351 while retry > 0: 352 count = self.update_iboip_interfaces(mac_ip_array) 353 if count == len(mac_ip_array): 354 return 355 356 time.sleep(1) 357 retry -= 1 358 359 logger.error("RDMA: failed to set all IP over IB addresses") 360 361 # Assign addresses to all IP over IB interfaces specified in mac_ip_array 362 # Return the number of IP addresses successfully assigned 363 364 def update_iboip_interfaces(self, mac_ip_array): 365 366 net_dir = "/sys/class/net" 367 nics = os.listdir(net_dir) 368 count = 0 369 370 for nic in nics: 371 # look for IBoIP interface of format ibXXX 372 if not re.match(r"ib\d+", nic): 373 continue 374 375 mac_addr = None 376 with open(os.path.join(net_dir, nic, "address")) as address_file: 377 mac_addr = address_file.read() 378 379 if not mac_addr: 380 logger.error("RDMA: can't read address for device {0}".format(nic)) 381 continue 382 383 mac_addr = mac_addr.upper() 384 385 match = re.match(r".+(\w\w):(\w\w):(\w\w):\w\w:\w\w:(\w\w):(\w\w):(\w\w)\n", mac_addr) 386 if not match: 387 logger.error("RDMA: failed to parse address for device {0} address {1}".format(nic, mac_addr)) 388 continue 389 390 # format an MAC address without : 391 mac_addr = "" 392 mac_addr = mac_addr.join(match.groups(0)) 393 394 for mac_ip in mac_ip_array: 395 if mac_ip[0] == mac_addr: 396 ret = 0 397 try: 398 # bring up the interface and set its IP address 399 ip_command = ["ip", "link", "set", nic, "up"] 400 shellutil.run_command(ip_command) 401 402 ip_command = ["ip", "addr", "add", "{0}/16".format(mac_ip[1]), "dev", nic] 403 shellutil.run_command(ip_command) 404 except shellutil.CommandError as error: 405 ret = error.returncode 406 407 if ret == 0: 408 logger.info("RDMA: set address {0} to device {1}".format(mac_ip[1], nic)) 409 410 if ret and ret != 2: 411 # return value 2 means the address is already set 412 logger.error("RDMA: failed to set IP address {0} on device {1}".format(mac_ip[1], nic)) 413 else: 414 count += 1 415 416 break 417 418 return count 419 420 @staticmethod 421 def update_iboip_interface(ipv4_addr, timeout_sec, check_interval_sec): 422 logger.info("Wait for ib0 become available") 423 total_retries = timeout_sec / check_interval_sec 424 n = 0 425 found_ib0 = None 426 while not found_ib0 and n < total_retries: 427 ret, output = shellutil.run_get_output("ifconfig -a") 428 if ret != 0: 429 raise Exception("Failed to list network interfaces") 430 found_ib0 = re.search("ib0", output, re.IGNORECASE) 431 if found_ib0: 432 break 433 time.sleep(check_interval_sec) 434 n += 1 435 436 if not found_ib0: 437 raise Exception("ib0 is not available") 438 439 netmask = 16 440 logger.info("RDMA: configuring IPv4 addr and netmask on ipoib interface") 441 addr = '{0}/{1}'.format(ipv4_addr, netmask) 442 if shellutil.run("ifconfig ib0 {0}".format(addr)) != 0: 443 raise Exception("Could set addr to {0} on ib0".format(addr)) 444 logger.info("RDMA: ipoib address and netmask configured on interface") 445 446 @staticmethod 447 def update_dat_conf(paths, ipv4_addr): 448 """ 449 Looks at paths for dat.conf file and updates the ip address for the 450 infiniband interface. 451 """ 452 logger.info("Updating DAPL configuration file") 453 for f in paths: 454 logger.info("RDMA: trying {0}".format(f)) 455 if not os.path.isfile(f): 456 logger.info( 457 "RDMA: DAPL config not found at {0}".format(f)) 458 continue 459 logger.info("RDMA: DAPL config is at: {0}".format(f)) 460 cfg = fileutil.read_file(f) 461 new_cfg = RDMADeviceHandler.replace_dat_conf_contents( 462 cfg, ipv4_addr) 463 fileutil.write_file(f, new_cfg) 464 logger.info("RDMA: DAPL configuration is updated") 465 return 466 467 raise Exception("RDMA: DAPL configuration file not found at predefined paths") 468 469 @staticmethod 470 def replace_dat_conf_contents(cfg, ipv4_addr): 471 old = "ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 \"\S+ 0\"" # pylint: disable=W1401 472 new = "ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 \"{0} 0\"".format( 473 ipv4_addr) 474 return re.sub(old, new, cfg) 475 476 @staticmethod 477 def write_rdma_config_to_device(path, ipv4_addr, mac_addr): 478 data = RDMADeviceHandler.generate_rdma_config(ipv4_addr, mac_addr) 479 logger.info( 480 "RDMA: Updating device with configuration: {0}".format(data)) 481 with open(path, "w") as f: 482 logger.info("RDMA: Device opened for writing") 483 f.write(data) 484 logger.info("RDMA: Updated device with IPv4/MAC addr successfully") 485 486 @staticmethod 487 def generate_rdma_config(ipv4_addr, mac_addr): 488 return 'rdmaMacAddress="{0}" rdmaIPv4Address="{1}"'.format(mac_addr, ipv4_addr) 489 490 @staticmethod 491 def wait_rdma_device(path, timeout_sec, check_interval_sec): 492 logger.info("RDMA: waiting for device={0} timeout={1}s".format(path, timeout_sec)) 493 total_retries = timeout_sec / check_interval_sec 494 n = 0 495 while n < total_retries: 496 if os.path.exists(path): 497 logger.info("RDMA: device ready") 498 return 499 logger.verbose( 500 "RDMA: device not ready, sleep {0}s".format(check_interval_sec)) 501 time.sleep(check_interval_sec) 502 n += 1 503 logger.error("RDMA device wait timed out") 504 raise Exception("The device did not show up in {0} seconds ({1} retries)".format( 505 timeout_sec, total_retries)) 506 507 @staticmethod 508 def wait_any_rdma_device(directory, timeout_sec, check_interval_sec): 509 logger.info( 510 "RDMA: waiting for any Infiniband device at directory={0} timeout={1}s".format( 511 directory, timeout_sec)) 512 total_retries = timeout_sec / check_interval_sec 513 n = 0 514 while n < total_retries: 515 r = os.listdir(directory) 516 if r: 517 logger.info("RDMA: device found in {0}".format(directory)) 518 return 519 logger.verbose( 520 "RDMA: device not ready, sleep {0}s".format(check_interval_sec)) 521 time.sleep(check_interval_sec) 522 n += 1 523 logger.error("RDMA device wait timed out") 524 raise Exception("The device did not show up in {0} seconds ({1} retries)".format( 525 timeout_sec, total_retries)) 526 527 @staticmethod 528 def update_network_interface(mac_addr, ipv4_addr): 529 netmask = 16 530 531 logger.info("RDMA: will update the network interface with IPv4/MAC") 532 533 if_name = RDMADeviceHandler.get_interface_by_mac(mac_addr) 534 logger.info("RDMA: network interface found: {0}", if_name) 535 logger.info("RDMA: bringing network interface up") 536 if shellutil.run("ifconfig {0} up".format(if_name)) != 0: 537 raise Exception("Could not bring up RMDA interface: {0}".format(if_name)) 538 539 logger.info("RDMA: configuring IPv4 addr and netmask on interface") 540 addr = '{0}/{1}'.format(ipv4_addr, netmask) 541 if shellutil.run("ifconfig {0} {1}".format(if_name, addr)) != 0: 542 raise Exception("Could set addr to {1} on {0}".format(if_name, addr)) 543 logger.info("RDMA: network address and netmask configured on interface") 544 545 @staticmethod 546 def get_interface_by_mac(mac): 547 ret, output = shellutil.run_get_output("ifconfig -a") 548 if ret != 0: 549 raise Exception("Failed to list network interfaces") 550 output = output.replace('\n', '') 551 match = re.search(r"(eth\d).*(HWaddr|ether) {0}".format(mac), 552 output, re.IGNORECASE) 553 if match is None: 554 raise Exception("Failed to get ifname with mac: {0}".format(mac)) 555 output = match.group(0) 556 eths = re.findall(r"eth\d", output) 557 if eths is None or len(eths) == 0: 558 raise Exception("ifname with mac: {0} not found".format(mac)) 559 return eths[-1] 560