1# Windows Azure Linux Agent
3# Copyright 2016 Microsoft Corporation
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
9#     http://www.apache.org/licenses/LICENSE-2.0
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
19Handle packages and modules to enable RDMA for IB networking
22import os
23import re
24import time
26import azurelinuxagent.common.conf as conf
27import azurelinuxagent.common.logger as logger
28import azurelinuxagent.common.utils.fileutil as fileutil
29import azurelinuxagent.common.utils.shellutil as shellutil
30from azurelinuxagent.common.utils.textutil import parse_doc, find, getattrib
32dapl_config_paths = [
33    '/etc/dat.conf',
34    '/etc/rdma/dat.conf',
35    '/usr/local/etc/dat.conf'
39def setup_rdma_device(nd_version, shared_conf):
40    logger.verbose("Parsing SharedConfig XML contents for RDMA details")
41    xml_doc = parse_doc(shared_conf.xml_text)
42    if xml_doc is None:
43        logger.error("Could not parse SharedConfig XML document")
44        return
45    instance_elem = find(xml_doc, "Instance")
46    if not instance_elem:
47        logger.error("Could not find <Instance> in SharedConfig document")
48        return
50    rdma_ipv4_addr = getattrib(instance_elem, "rdmaIPv4Address")
51    if not rdma_ipv4_addr:
52        logger.error(
53            "Could not find rdmaIPv4Address attribute on Instance element of SharedConfig.xml document")
54        return
56    rdma_mac_addr = getattrib(instance_elem, "rdmaMacAddress")
57    if not rdma_mac_addr:
58        logger.error(
59            "Could not find rdmaMacAddress attribute on Instance element of SharedConfig.xml document")
60        return
62    # add colons to the MAC address (e.g. 00155D33FF1D ->
63    # 00:15:5D:33:FF:1D)
64    rdma_mac_addr = ':'.join([rdma_mac_addr[i:i + 2]
65                              for i in range(0, len(rdma_mac_addr), 2)])
66    logger.info("Found RDMA details. IPv4={0} MAC={1}".format(
67        rdma_ipv4_addr, rdma_mac_addr))
69    # Set up the RDMA device with collected informatino
70    RDMADeviceHandler(rdma_ipv4_addr, rdma_mac_addr, nd_version).start()
71    logger.info("RDMA: device is set up")
72    return
75class RDMAHandler(object):
76    driver_module_name = 'hv_network_direct'
77    nd_version = None
79    def get_rdma_version(self):  # pylint: disable=R1710
80        """Retrieve the firmware version information from the system.
81           This depends on information provided by the Linux kernel."""
83        if self.nd_version:
84            return self.nd_version
86        kvp_key_size = 512
87        kvp_value_size = 2048
88        driver_info_source = '/var/lib/hyperv/.kvp_pool_0'
89        base_kernel_err_msg = 'Kernel does not provide the necessary '
90        base_kernel_err_msg += 'information or the kvp daemon is not running.'
91        if not os.path.isfile(driver_info_source):
92            error_msg = 'RDMA: Source file "%s" does not exist. '
93            error_msg += base_kernel_err_msg
94            logger.error(error_msg % driver_info_source)
95            return
97        with open(driver_info_source, "rb") as pool_file:
98            while True:
99                key = pool_file.read(kvp_key_size)
100                value = pool_file.read(kvp_value_size)
101                if key and value:
102                    key_0 = key.partition(b"\x00")[0]
103                    if key_0:
104                        key_0 = key_0.decode()
105                    value_0 = value.partition(b"\x00")[0]
106                    if value_0:
107                        value_0 = value_0.decode()
108                    if key_0 == "NdDriverVersion":
109                        self.nd_version = value_0
110                        return self.nd_version
111                else:
112                    break
114        error_msg = 'RDMA: NdDriverVersion not found in "%s"'
115        logger.error(error_msg % driver_info_source)
116        return
118    @staticmethod
119    def is_kvp_daemon_running():
120        """Look for kvp daemon names in ps -ef output and return True/False
121        """
122        # for centos, the hypervkvpd and the hv_kvp_daemon both are ok.
123        # for suse, it uses hv_kvp_daemon
124        kvp_daemon_names = ['hypervkvpd', 'hv_kvp_daemon']
126        exitcode, ps_out = shellutil.run_get_output("ps -ef")
127        if exitcode != 0:
128            raise Exception('RDMA: ps -ef failed: %s' % ps_out)
129        for n in kvp_daemon_names:
130            if n in ps_out:
131                logger.info('RDMA: kvp daemon (%s) is running' % n)
132                return True
133            else:
134                logger.verbose('RDMA: kvp daemon (%s) is not running' % n)
135        return False
137    def load_driver_module(self):
138        """Load the kernel driver, this depends on the proper driver
139           to be installed with the install_driver() method"""
140        logger.info("RDMA: probing module '%s'" % self.driver_module_name)
141        result = shellutil.run('modprobe --first-time %s' % self.driver_module_name)
142        if result != 0:
143            error_msg = 'Could not load "%s" kernel module. '
144            error_msg += 'Run "modprobe --first-time %s" as root for more details'
145            logger.error(
146                error_msg % (self.driver_module_name, self.driver_module_name)
147            )
148            return False
149        logger.info('RDMA: Loaded the kernel driver successfully.')
150        return True
152    def install_driver_if_needed(self):
153        if self.nd_version:
154            if conf.enable_check_rdma_driver():
155                self.install_driver()
156            else:
157                logger.info('RDMA: check RDMA driver is disabled, skip installing driver')
158        else:
159            logger.info('RDMA: skip installing driver when ndversion not present\n')
161    def install_driver(self):
162        """Install the driver. This is distribution specific and must
163           be overwritten in the child implementation."""
164        logger.error('RDMAHandler.install_driver not implemented')
166    def is_driver_loaded(self):
167        """Check if the network module is loaded in kernel space"""
168        cmd = 'lsmod | grep ^%s' % self.driver_module_name
169        status, loaded_modules = shellutil.run_get_output(cmd)  # pylint: disable=W0612
170        logger.info('RDMA: Checking if the module loaded.')
171        if loaded_modules:
172            logger.info('RDMA: module loaded.')
173            return True
174        logger.info('RDMA: module not loaded.')
175        return False
177    def reboot_system(self):
178        """Reboot the system. This is required as the kernel module for
179           the rdma driver cannot be unloaded with rmmod"""
180        logger.info('RDMA: Rebooting system.')
181        ret = shellutil.run('shutdown -r now')
182        if ret != 0:
183            logger.error('RDMA: Failed to reboot the system')
186dapl_config_paths = [
187    '/etc/dat.conf', '/etc/rdma/dat.conf', '/usr/local/etc/dat.conf']
190class RDMADeviceHandler(object):
191    """
192    Responsible for writing RDMA IP and MAC address to the /dev/hvnd_rdma
193    interface.
194    """
196    rdma_dev = '/dev/hvnd_rdma'
197    sriov_dir = '/sys/class/infiniband'
198    device_check_timeout_sec = 120
199    device_check_interval_sec = 1
200    ipoib_check_timeout_sec = 60
201    ipoib_check_interval_sec = 1
203    ipv4_addr = None
204    mac_addr = None
205    nd_version = None
207    def __init__(self, ipv4_addr, mac_addr, nd_version):
208        self.ipv4_addr = ipv4_addr
209        self.mac_addr = mac_addr
210        self.nd_version = nd_version
212    def start(self):
213        logger.info("RDMA: starting device processing.")
214        self.process()
215        logger.info("RDMA: completed device processing.")
217    def process(self):
218        try:
219            if not self.nd_version:
220                logger.info("RDMA: provisioning SRIOV RDMA device.")
221                self.provision_sriov_rdma()
222            else:
223                logger.info("RDMA: provisioning Network Direct RDMA device.")
224                self.provision_network_direct_rdma()
225        except Exception as e:
226            logger.error("RDMA: device processing failed: {0}".format(e))
228    def provision_network_direct_rdma(self):
229        RDMADeviceHandler.update_dat_conf(dapl_config_paths, self.ipv4_addr)
231        if not conf.enable_check_rdma_driver():
232            logger.info("RDMA: skip checking RDMA driver version")
233            RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr)
234            return
236        skip_rdma_device = False
237        module_name = "hv_network_direct"
238        retcode, out = shellutil.run_get_output("modprobe -R %s" % module_name, chk_err=False)
239        if retcode == 0:
240            module_name = out.strip()
241        else:
242            logger.info("RDMA: failed to resolve module name. Use original name")
243        retcode, out = shellutil.run_get_output("modprobe %s" % module_name)
244        if retcode != 0:
245            logger.error("RDMA: failed to load module %s" % module_name)
246            return
247        retcode, out = shellutil.run_get_output("modinfo %s" % module_name)
248        if retcode == 0:
249            version = re.search("version:\s+(\d+)\.(\d+)\.(\d+)\D", out, re.IGNORECASE)  # pylint: disable=W1401
250            if version:
251                v1 = int(version.groups(0)[0])
252                v2 = int(version.groups(0)[1])
253                if v1 > 4 or v1 == 4 and v2 > 0:
254                    logger.info("Skip setting /dev/hvnd_rdma on 4.1 or later")
255                    skip_rdma_device = True
256            else:
257                logger.info("RDMA: hv_network_direct driver version not present, assuming 4.0.x or older.")
258        else:
259            logger.warn("RDMA: failed to get module info on hv_network_direct.")
261        if not skip_rdma_device:
262            RDMADeviceHandler.wait_rdma_device(
263                self.rdma_dev, self.device_check_timeout_sec, self.device_check_interval_sec)
264            RDMADeviceHandler.write_rdma_config_to_device(
265                self.rdma_dev, self.ipv4_addr, self.mac_addr)
267        RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr)
269    def provision_sriov_rdma(self):
271        (key, value) = self.read_ipoib_data()
272        if key:
273            # provision multiple IP over IB addresses
274            logger.info("RDMA: provisioning multiple IP over IB addresses")
275            self.provision_sriov_multiple_ib(value)
276        elif self.ipv4_addr:
277            logger.info("RDMA: provisioning single IP over IB address")
278            # provision a single IP over IB address
279            RDMADeviceHandler.wait_any_rdma_device(self.sriov_dir,
280                self.device_check_timeout_sec, self.device_check_interval_sec)
281            RDMADeviceHandler.update_iboip_interface(self.ipv4_addr,
282                self.ipoib_check_timeout_sec, self.ipoib_check_interval_sec)
283        else:
284            logger.info("RDMA: missing IP address")
286    def read_ipoib_data(self) :
288        # read from KVP pool 0 to figure out the IP over IB addresses
289        kvp_key_size = 512
290        kvp_value_size = 2048
291        driver_info_source = '/var/lib/hyperv/.kvp_pool_0'
293        if not os.path.isfile(driver_info_source):
294            logger.error("RDMA: can't read KVP pool 0")
295            return (None, None)
297        key_0 = None
298        value_0 = None
299        with open(driver_info_source, "rb") as pool_file:
300            while True:
301                key = pool_file.read(kvp_key_size)
302                value = pool_file.read(kvp_value_size)
303                if key and value:
304                    key_0 = key.partition(b"\x00")[0]
305                    if key_0 :
306                        key_0 = key_0.decode()
307                    if key_0 == "IPoIB_Data":
308                        value_0 = value.partition(b"\x00")[0]
309                        if value_0 :
310                            value_0 = value_0.decode()
311                        break
312                else:
313                    break
315        if key_0 == "IPoIB_Data":
316            return (key_0, value_0)
318        return (None, None)
320    def provision_sriov_multiple_ib(self, value) :
322        mac_ip_array = []
324        values = value.split("|")
325        num_ips = len(values) - 1
326        # values[0] tells how many IPs. Format - NUMPAIRS:<number>
327        match = re.match(r"NUMPAIRS:(\d+)", values[0])
328        if match:
329            num = int(match.groups(0)[0])
330            if num != num_ips:
331                logger.error("RDMA: multiple IPs reported num={0} actual number of IPs={1}".format(num, num_ips))
332                return
333        else:
334            logger.error("RDMA: failed to find number of IP addresses in {0}".format(values[0]))
335            return
337        for i in range(1, num_ips+1):
338            # each MAC/IP entry is of format <MAC>:<IP>
339            match = re.match(r"([^:]+):(\d+\.\d+\.\d+\.\d+)", values[i])
340            if match:
341                mac_addr = match.groups(0)[0]
342                ipv4_addr = match.groups(0)[1]
343                mac_ip_array.append((mac_addr, ipv4_addr))
344            else:
345                logger.error("RDMA: failed to find MAC/IP address in {0}".format(values[i]))
346                return
348        # try to assign all MAC/IP addresses to IB interfaces
349        # retry for up to 60 times, with 1 seconds delay between each
350        retry = 60
351        while retry > 0:
352            count = self.update_iboip_interfaces(mac_ip_array)
353            if count == len(mac_ip_array):
354                return
356            time.sleep(1)
357            retry -= 1
359        logger.error("RDMA: failed to set all IP over IB addresses")
361    # Assign addresses to all IP over IB interfaces specified in mac_ip_array
362    # Return the number of IP addresses successfully assigned
364    def update_iboip_interfaces(self, mac_ip_array):
366        net_dir = "/sys/class/net"
367        nics = os.listdir(net_dir)
368        count = 0
370        for nic in nics:
371            # look for IBoIP interface of format ibXXX
372            if not re.match(r"ib\d+", nic):
373                continue
375            mac_addr = None
376            with open(os.path.join(net_dir, nic, "address")) as address_file:
377                mac_addr = address_file.read()
379            if not mac_addr:
380                logger.error("RDMA: can't read address for device {0}".format(nic))
381                continue
383            mac_addr = mac_addr.upper()
385            match = re.match(r".+(\w\w):(\w\w):(\w\w):\w\w:\w\w:(\w\w):(\w\w):(\w\w)\n", mac_addr)
386            if not match:
387                logger.error("RDMA: failed to parse address for device {0} address {1}".format(nic, mac_addr))
388                continue
390            # format an MAC address without :
391            mac_addr = ""
392            mac_addr = mac_addr.join(match.groups(0))
394            for mac_ip in mac_ip_array:
395                if mac_ip[0] == mac_addr:
396                    ret = 0
397                    try:
398                        # bring up the interface and set its IP address
399                        ip_command = ["ip", "link", "set", nic, "up"]
400                        shellutil.run_command(ip_command)
402                        ip_command = ["ip", "addr", "add", "{0}/16".format(mac_ip[1]), "dev", nic]
403                        shellutil.run_command(ip_command)
404                    except shellutil.CommandError as error:
405                        ret = error.returncode
407                    if ret == 0:
408                        logger.info("RDMA: set address {0} to device {1}".format(mac_ip[1], nic))
410                    if ret and ret != 2:
411                        # return value 2 means the address is already set
412                        logger.error("RDMA: failed to set IP address {0} on device {1}".format(mac_ip[1], nic))
413                    else:
414                        count += 1
416                    break
418        return count
420    @staticmethod
421    def update_iboip_interface(ipv4_addr, timeout_sec, check_interval_sec):
422        logger.info("Wait for ib0 become available")
423        total_retries = timeout_sec / check_interval_sec
424        n = 0
425        found_ib0 = None
426        while not found_ib0 and n < total_retries:
427            ret, output = shellutil.run_get_output("ifconfig -a")
428            if ret != 0:
429                raise Exception("Failed to list network interfaces")
430            found_ib0 = re.search("ib0", output, re.IGNORECASE)
431            if found_ib0:
432                break
433            time.sleep(check_interval_sec)
434            n += 1
436        if not found_ib0:
437            raise Exception("ib0 is not available")
439        netmask = 16
440        logger.info("RDMA: configuring IPv4 addr and netmask on ipoib interface")
441        addr = '{0}/{1}'.format(ipv4_addr, netmask)
442        if shellutil.run("ifconfig ib0 {0}".format(addr)) != 0:
443            raise Exception("Could set addr to {0} on ib0".format(addr))
444        logger.info("RDMA: ipoib address and netmask configured on interface")
446    @staticmethod
447    def update_dat_conf(paths, ipv4_addr):
448        """
449        Looks at paths for dat.conf file and updates the ip address for the
450        infiniband interface.
451        """
452        logger.info("Updating DAPL configuration file")
453        for f in paths:
454            logger.info("RDMA: trying {0}".format(f))
455            if not os.path.isfile(f):
456                logger.info(
457                    "RDMA: DAPL config not found at {0}".format(f))
458                continue
459            logger.info("RDMA: DAPL config is at: {0}".format(f))
460            cfg = fileutil.read_file(f)
461            new_cfg = RDMADeviceHandler.replace_dat_conf_contents(
462                cfg, ipv4_addr)
463            fileutil.write_file(f, new_cfg)
464            logger.info("RDMA: DAPL configuration is updated")
465            return
467        raise Exception("RDMA: DAPL configuration file not found at predefined paths")
469    @staticmethod
470    def replace_dat_conf_contents(cfg, ipv4_addr):
471        old = "ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 \"\S+ 0\""  # pylint: disable=W1401
472        new = "ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 \"{0} 0\"".format(
473            ipv4_addr)
474        return re.sub(old, new, cfg)
476    @staticmethod
477    def write_rdma_config_to_device(path, ipv4_addr, mac_addr):
478        data = RDMADeviceHandler.generate_rdma_config(ipv4_addr, mac_addr)
479        logger.info(
480            "RDMA: Updating device with configuration: {0}".format(data))
481        with open(path, "w") as f:
482            logger.info("RDMA: Device opened for writing")
483            f.write(data)
484        logger.info("RDMA: Updated device with IPv4/MAC addr successfully")
486    @staticmethod
487    def generate_rdma_config(ipv4_addr, mac_addr):
488        return 'rdmaMacAddress="{0}" rdmaIPv4Address="{1}"'.format(mac_addr, ipv4_addr)
490    @staticmethod
491    def wait_rdma_device(path, timeout_sec, check_interval_sec):
492        logger.info("RDMA: waiting for device={0} timeout={1}s".format(path, timeout_sec))
493        total_retries = timeout_sec / check_interval_sec
494        n = 0
495        while n < total_retries:
496            if os.path.exists(path):
497                logger.info("RDMA: device ready")
498                return
499            logger.verbose(
500                "RDMA: device not ready, sleep {0}s".format(check_interval_sec))
501            time.sleep(check_interval_sec)
502            n += 1
503        logger.error("RDMA device wait timed out")
504        raise Exception("The device did not show up in {0} seconds ({1} retries)".format(
505            timeout_sec, total_retries))
507    @staticmethod
508    def wait_any_rdma_device(directory, timeout_sec, check_interval_sec):
509        logger.info(
510            "RDMA: waiting for any Infiniband device at directory={0} timeout={1}s".format(
511                directory, timeout_sec))
512        total_retries = timeout_sec / check_interval_sec
513        n = 0
514        while n < total_retries:
515            r = os.listdir(directory)
516            if r:
517                logger.info("RDMA: device found in {0}".format(directory))
518                return
519            logger.verbose(
520                "RDMA: device not ready, sleep {0}s".format(check_interval_sec))
521            time.sleep(check_interval_sec)
522            n += 1
523        logger.error("RDMA device wait timed out")
524        raise Exception("The device did not show up in {0} seconds ({1} retries)".format(
525            timeout_sec, total_retries))
527    @staticmethod
528    def update_network_interface(mac_addr, ipv4_addr):
529        netmask = 16
531        logger.info("RDMA: will update the network interface with IPv4/MAC")
533        if_name = RDMADeviceHandler.get_interface_by_mac(mac_addr)
534        logger.info("RDMA: network interface found: {0}", if_name)
535        logger.info("RDMA: bringing network interface up")
536        if shellutil.run("ifconfig {0} up".format(if_name)) != 0:
537            raise Exception("Could not bring up RMDA interface: {0}".format(if_name))
539        logger.info("RDMA: configuring IPv4 addr and netmask on interface")
540        addr = '{0}/{1}'.format(ipv4_addr, netmask)
541        if shellutil.run("ifconfig {0} {1}".format(if_name, addr)) != 0:
542            raise Exception("Could set addr to {1} on {0}".format(if_name, addr))
543        logger.info("RDMA: network address and netmask configured on interface")
545    @staticmethod
546    def get_interface_by_mac(mac):
547        ret, output = shellutil.run_get_output("ifconfig -a")
548        if ret != 0:
549            raise Exception("Failed to list network interfaces")
550        output = output.replace('\n', '')
551        match = re.search(r"(eth\d).*(HWaddr|ether) {0}".format(mac),
552                          output, re.IGNORECASE)
553        if match is None:
554            raise Exception("Failed to get ifname with mac: {0}".format(mac))
555        output = match.group(0)
556        eths = re.findall(r"eth\d", output)
557        if eths is None or len(eths) == 0:
558            raise Exception("ifname with mac: {0} not found".format(mac))
559        return eths[-1]