1# All Rights Reserved.
2#
3#    Licensed under the Apache License, Version 2.0 (the "License"); you may
4#    not use this file except in compliance with the License. You may obtain
5#    a copy of the License at
6#
7#         http://www.apache.org/licenses/LICENSE-2.0
8#
9#    Unless required by applicable law or agreed to in writing, software
10#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12#    License for the specific language governing permissions and limitations
13#    under the License.
14
15
16from collections import defaultdict
17import copy
18import glob
19import os
20import re
21import time
22from typing import List, Tuple  # noqa: H301
23
24from oslo_concurrency import lockutils
25from oslo_concurrency import processutils as putils
26from oslo_log import log as logging
27from oslo_utils import excutils
28from oslo_utils import strutils
29
30from os_brick import exception
31from os_brick import executor
32from os_brick.i18n import _
33from os_brick import initiator
34from os_brick.initiator.connectors import base
35from os_brick.initiator.connectors import base_iscsi
36from os_brick.initiator import utils as initiator_utils
37from os_brick import utils
38
39synchronized = lockutils.synchronized_with_prefix('os-brick-')
40
41LOG = logging.getLogger(__name__)
42
43
44class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
45    """Connector class to attach/detach iSCSI volumes."""
46
47    supported_transports = ['be2iscsi', 'bnx2i', 'cxgb3i', 'default',
48                            'cxgb4i', 'qla4xxx', 'ocs', 'iser', 'tcp']
49    VALID_SESSIONS_PREFIX = ('tcp:', 'iser:')
50
51    def __init__(
52            self, root_helper: str, driver=None,
53            execute=None, use_multipath: bool = False,
54            device_scan_attempts: int = initiator.DEVICE_SCAN_ATTEMPTS_DEFAULT,
55            transport='default', *args, **kwargs):
56        super(ISCSIConnector, self).__init__(
57            root_helper, driver=driver,
58            execute=execute,
59            device_scan_attempts=device_scan_attempts,
60            transport=transport, *args, **kwargs)  # type: ignore
61        self.use_multipath: bool = use_multipath
62        self.transport: str = self._validate_iface_transport(transport)
63
64    @staticmethod
65    def get_connector_properties(root_helper: str, *args, **kwargs) -> dict:
66        """The iSCSI connector properties."""
67        props = {}
68        iscsi = ISCSIConnector(root_helper=root_helper,
69                               execute=kwargs.get('execute'))
70        initiator = iscsi.get_initiator()
71        if initiator:
72            props['initiator'] = initiator
73
74        return props
75
76    def get_search_path(self) -> str:
77        """Where do we look for iSCSI based volumes."""
78        return '/dev/disk/by-path'
79
80    def get_volume_paths(self, connection_properties: dict) -> list:
81        """Get the list of existing paths for a volume.
82
83        This method's job is to simply report what might/should
84        already exist for a volume.  We aren't trying to attach/discover
85        a new volume, but find any existing paths for a volume we
86        think is already attached.
87
88        :param connection_properties: The dictionary that describes all
89                                      of the target volume attributes.
90        :type connection_properties: dict
91        """
92        volume_paths: list = []
93
94        # if there are no sessions, then target_portal won't exist
95        if (('target_portal' not in connection_properties) and
96           ('target_portals' not in connection_properties)):
97            return volume_paths
98
99        # Don't try and connect to the portals in the list as
100        # this can create empty iSCSI sessions to hosts if they
101        # didn't exist previously.
102        # We are simply trying to find any existing volumes with
103        # already connected sessions.
104        host_devices = self._get_potential_volume_paths(connection_properties)
105        for path in host_devices:
106            if os.path.exists(path):
107                volume_paths.append(path)
108
109        return volume_paths
110
111    def _get_iscsi_sessions_full(self) -> List[tuple]:
112        """Get iSCSI session information as a list of tuples.
113
114        Uses iscsiadm -m session and from a command output like
115            tcp: [1] 192.168.121.250:3260,1 iqn.2010-10.org.openstack:
116            volume- (non-flash)
117
118        This method will drop the node type and return a list like this:
119            [('tcp:', '1', '192.168.121.250:3260', '1',
120              'iqn.2010-10.org.openstack:volume-')]
121        """
122        out, err = self._run_iscsi_session()
123        if err:
124            LOG.warning("iscsiadm stderr output when getting sessions: %s",
125                        err)
126
127        # Parse and clean the output from iscsiadm, which is in the form of:
128        # transport_name: [session_id] ip_address:port,tpgt iqn node_type
129        lines: List[tuple] = []
130        for line in out.splitlines():
131            if line:
132                info = line.split()
133                sid = info[1][1:-1]
134                portal, tpgt = info[2].split(',')
135                lines.append((info[0], sid, portal, tpgt, info[3]))
136        return lines
137
138    def _get_iscsi_nodes(self) -> List[tuple]:
139        """Get iSCSI node information (portal, iqn) as a list of tuples.
140
141        Uses iscsiadm -m node and from a command output like
142            192.168.121.250:3260,1 iqn.2010-10.org.openstack:volume
143
144        This method will drop the tpgt and return a list like this:
145            [('192.168.121.250:3260', 'iqn.2010-10.org.openstack:volume')]
146        """
147        out, err = self._execute('iscsiadm', '-m', 'node', run_as_root=True,
148                                 root_helper=self._root_helper,
149                                 check_exit_code=False)
150        if err:
151            LOG.warning("Couldn't find iSCSI nodes because iscsiadm err: %s",
152                        err)
153            return []
154
155        # Parse and clean the output from iscsiadm which is in the form of:
156        # ip_address:port,tpgt iqn
157        lines: List[tuple] = []
158        for line in out.splitlines():
159            if line:
160                info = line.split()
161                try:
162                    lines.append((info[0].split(',')[0], info[1]))
163                except IndexError:
164                    pass
165        return lines
166
167    def _get_iscsi_sessions(self) -> list:
168        """Return portals for all existing sessions."""
169        # entry: [tcp, [1], 192.168.121.250:3260,1 ...]
170        return [entry[2] for entry in self._get_iscsi_sessions_full()]
171
172    def _get_ips_iqns_luns(self,
173                           connection_properties: dict,
174                           discover: bool = True,
175                           is_disconnect_call: bool = False):
176        """Build a list of ips, iqns, and luns.
177
178        Used when doing singlepath and multipath, and we have 4 cases:
179
180        - All information is in the connection properties
181        - We have to do an iSCSI discovery to get the information
182        - We don't want to do another discovery and we query the discoverydb
183        - Discovery failed because it was actually a single pathed attachment
184
185        :param connection_properties: The dictionary that describes all
186                                      of the target volume attributes.
187        :type connection_properties: dict
188        :param discover: Whether doing an iSCSI discovery is acceptable.
189        :type discover: bool
190        :param is_disconnect_call: Whether this is a call coming from a user
191                                   disconnect_volume call or a call from some
192                                   other operation's cleanup.
193        :type is_disconnect_call: bool
194        :returns: list of tuples of (ip, iqn, lun)
195        """
196        # There are cases where we don't know if the local attach was done
197        # using multipathing or single pathing, so assume multipathing.
198        try:
199            if ('target_portals' in connection_properties and
200                    'target_iqns' in connection_properties):
201                # Use targets specified by connection_properties
202                ips_iqns_luns = self._get_all_targets(connection_properties)
203            else:
204                method = (self._discover_iscsi_portals if discover
205                          else self._get_discoverydb_portals)
206                ips_iqns_luns = method(connection_properties)
207        except exception.TargetPortalNotFound:
208            # Discovery failed, on disconnect this will happen if we
209            # are detaching a single pathed connection, so we use the
210            # connection properties to return the tuple.
211            if is_disconnect_call:
212                return self._get_all_targets(connection_properties)
213            raise
214        except Exception:
215            LOG.exception('Exception encountered during portal discovery')
216            if 'target_portals' in connection_properties:
217                raise exception.TargetPortalsNotFound(
218                    target_portals=connection_properties['target_portals'])
219            if 'target_portal' in connection_properties:
220                raise exception.TargetPortalNotFound(
221                    target_portal=connection_properties['target_portal'])
222            raise
223
224        if not connection_properties.get('target_iqns'):
225            # There are two types of iSCSI multipath devices. One which
226            # shares the same iqn between multiple portals, and the other
227            # which use different iqns on different portals.
228            # Try to identify the type by checking the iscsiadm output
229            # if the iqn is used by multiple portals. If it is, it's
230            # the former, so use the supplied iqn. Otherwise, it's the
231            # latter, so try the ip,iqn combinations to find the targets
232            # which constitutes the multipath device.
233            main_iqn = connection_properties['target_iqn']
234            all_portals = {(ip, lun) for ip, iqn, lun in ips_iqns_luns}
235            match_portals = {(ip, lun) for ip, iqn, lun in ips_iqns_luns
236                             if iqn == main_iqn}
237            if len(all_portals) == len(match_portals):
238                ips_iqns_luns = [(p[0], main_iqn, p[1])
239                                 for p in all_portals]
240
241        return ips_iqns_luns
242
243    def _get_potential_volume_paths(self,
244                                    connection_properties: dict) -> List[str]:
245        """Build a list of potential volume paths that exist.
246
247        Given a list of target_portals in the connection_properties,
248        a list of paths might exist on the system during discovery.
249        This method's job is to build that list of potential paths
250        for a volume that might show up.
251
252        This is only used during get_volume_paths time, we are looking to
253        find a list of existing volume paths for the connection_properties.
254        In this case, we don't want to connect to the portal.  If we
255        blindly try and connect to a portal, it could create a new iSCSI
256        session that didn't exist previously, and then leave it stale.
257
258        :param connection_properties: The dictionary that describes all
259                                      of the target volume attributes.
260        :type connection_properties: dict
261        :returns: list
262        """
263        if self.use_multipath:
264            LOG.info("Multipath discovery for iSCSI enabled")
265            # Multipath installed, discovering other targets if available
266            host_devices = self._get_device_path(connection_properties)
267        else:
268            LOG.info("Multipath discovery for iSCSI not enabled.")
269            iscsi_sessions = self._get_iscsi_sessions()
270
271            host_devices = set()
272            for props in self._iterate_all_targets(connection_properties):
273                # If we aren't trying to connect to the portal, we
274                # want to find ALL possible paths from all of the
275                # alternate portals
276                if props['target_portal'] in iscsi_sessions:
277                    paths = self._get_device_path(props)
278                    host_devices.update(paths)
279            host_devices = list(host_devices)
280
281        return host_devices
282
283    def set_execute(self, execute):
284        super(ISCSIConnector, self).set_execute(execute)
285        self._linuxscsi.set_execute(execute)
286
287    def _validate_iface_transport(self, transport_iface: str) -> str:
288        """Check that given iscsi_iface uses only supported transports
289
290        Accepted transport names for provided iface param are
291        be2iscsi, bnx2i, cxgb3i, cxgb4i, default, qla4xxx, ocs, iser or tcp.
292        Note the difference between transport and iface;
293        unlike default(iscsi_tcp)/iser, this is not one and the same for
294        offloaded transports, where the default format is
295        transport_name.hwaddress
296
297        :param transport_iface: The iscsi transport type.
298        :type transport_iface: str
299        :returns: str
300        """
301        # Note that default(iscsi_tcp) and iser do not require a separate
302        # iface file, just the transport is enough and do not need to be
303        # validated. This is not the case for the other entries in
304        # supported_transports array.
305        if transport_iface in ['default', 'iser']:
306            return transport_iface
307        # Will return (6) if iscsi_iface file was not found, or (2) if iscsid
308        # could not be contacted
309        out = self._run_iscsiadm_bare(['-m',
310                                       'iface',
311                                       '-I',
312                                       transport_iface],
313                                      check_exit_code=[0, 2, 6])[0] or ""
314        LOG.debug("iscsiadm %(iface)s configuration: stdout=%(out)s.",
315                  {'iface': transport_iface, 'out': out})
316        for data in [line.split() for line in out.splitlines()]:
317            if data[0] == 'iface.transport_name':
318                if data[2] in self.supported_transports:
319                    return transport_iface
320
321        LOG.warning("No useable transport found for iscsi iface %s. "
322                    "Falling back to default transport.",
323                    transport_iface)
324        return 'default'
325
326    def _get_transport(self) -> str:
327        return self.transport
328
329    def _get_discoverydb_portals(self,
330                                 connection_properties: dict) -> List[tuple]:
331        """Retrieve iscsi portals information from the discoverydb.
332
333        Example of discoverydb command output:
334
335        SENDTARGETS:
336        DiscoveryAddress: 192.168.1.33,3260
337        DiscoveryAddress: 192.168.1.2,3260
338        Target: iqn.2004-04.com.qnap:ts-831x:iscsi.cinder-20170531114245.9eff88
339            Portal: 192.168.1.3:3260,1
340                Iface Name: default
341            Portal: 192.168.1.2:3260,1
342                Iface Name: default
343        Target: iqn.2004-04.com.qnap:ts-831x:iscsi.cinder-20170531114447.9eff88
344            Portal: 192.168.1.3:3260,1
345                Iface Name: default
346            Portal: 192.168.1.2:3260,1
347                Iface Name: default
348        DiscoveryAddress: 192.168.1.38,3260
349        iSNS:
350        No targets found.
351        STATIC:
352        No targets found.
353        FIRMWARE:
354        No targets found.
355
356        :param connection_properties: The dictionary that describes all
357                                      of the target volume attributes.
358        :type connection_properties: dict
359        :returns: list of tuples of (ip, iqn, lun)
360        """
361        ip, port = connection_properties['target_portal'].rsplit(':', 1)
362        # NOTE(geguileo): I don't know if IPv6 will be reported with []
363        # or not, so we'll make them optional.
364        ip = ip.replace('[', r'\[?').replace(']', r'\]?')
365        out = self._run_iscsiadm_bare(['-m', 'discoverydb',
366                                       '-o', 'show',
367                                       '-P', 1])[0] or ""
368        regex = ''.join(('^SENDTARGETS:\n.*?^DiscoveryAddress: ',
369                         ip, ',', port,
370                         '.*?\n(.*?)^(?:DiscoveryAddress|iSNS):.*'))
371        LOG.debug('Regex to get portals from discoverydb: %s', regex)
372
373        info = re.search(regex, out, re.DOTALL | re.MULTILINE)
374
375        ips = []
376        iqns = []
377
378        if info:
379            iscsi_transport = ('iser' if self._get_transport() == 'iser'
380                               else 'default')
381            iface = 'Iface Name: ' + iscsi_transport
382            current_iqn = ''
383            current_ip = ''
384            for line in info.group(1).splitlines():
385                line = line.strip()
386                if line.startswith('Target:'):
387                    current_iqn = line[8:]
388                elif line.startswith('Portal:'):
389                    current_ip = line[8:].split(',')[0]
390                elif line.startswith(iface):
391                    if current_iqn and current_ip:
392                        iqns.append(current_iqn)
393                        ips.append(current_ip)
394                    current_ip = ''
395
396        if not iqns:
397            raise exception.TargetPortalsNotFound(
398                _('Unable to find target portals information on discoverydb.'))
399
400        luns = self._get_luns(connection_properties, iqns)
401        return list(zip(ips, iqns, luns))
402
403    def _discover_iscsi_portals(self, connection_properties: dict) -> list:
404        out = None
405        iscsi_transport = ('iser' if self._get_transport() == 'iser'
406                           else 'default')
407        if connection_properties.get('discovery_auth_method'):
408            try:
409                self._run_iscsiadm_update_discoverydb(connection_properties,
410                                                      iscsi_transport)
411            except putils.ProcessExecutionError as exception:
412                # iscsiadm returns 6 for "db record not found"
413                if exception.exit_code == 6:
414                    # Create a new record for this target and update the db
415                    self._run_iscsiadm_bare(
416                        ['-m', 'discoverydb',
417                         '-t', 'sendtargets',
418                         '-p', connection_properties['target_portal'],
419                         '-I', iscsi_transport,
420                         '--op', 'new'],
421                        check_exit_code=[0, 255])
422                    self._run_iscsiadm_update_discoverydb(
423                        connection_properties
424                    )
425                else:
426                    LOG.error("Unable to find target portal: "
427                              "%(target_portal)s.",
428                              {'target_portal': connection_properties[
429                                  'target_portal']})
430                    raise
431            old_node_startups = self._get_node_startup_values(
432                connection_properties)
433            out = self._run_iscsiadm_bare(
434                ['-m', 'discoverydb',
435                 '-t', 'sendtargets',
436                 '-I', iscsi_transport,
437                 '-p', connection_properties['target_portal'],
438                 '--discover'],
439                check_exit_code=[0, 255])[0] or ""
440            self._recover_node_startup_values(connection_properties,
441                                              old_node_startups)
442        else:
443            old_node_startups = self._get_node_startup_values(
444                connection_properties)
445            out = self._run_iscsiadm_bare(
446                ['-m', 'discovery',
447                 '-t', 'sendtargets',
448                 '-I', iscsi_transport,
449                 '-p', connection_properties['target_portal']],
450                check_exit_code=[0, 255])[0] or ""
451            self._recover_node_startup_values(connection_properties,
452                                              old_node_startups)
453
454        ips, iqns = self._get_target_portals_from_iscsiadm_output(out)
455        luns = self._get_luns(connection_properties, iqns)
456        return list(zip(ips, iqns, luns))
457
458    def _run_iscsiadm_update_discoverydb(self, connection_properties,
459                                         iscsi_transport='default'):
460        return self._execute(
461            'iscsiadm',
462            '-m', 'discoverydb',
463            '-t', 'sendtargets',
464            '-I', iscsi_transport,
465            '-p', connection_properties['target_portal'],
466            '--op', 'update',
467            '-n', "discovery.sendtargets.auth.authmethod",
468            '-v', connection_properties['discovery_auth_method'],
469            '-n', "discovery.sendtargets.auth.username",
470            '-v', connection_properties['discovery_auth_username'],
471            '-n', "discovery.sendtargets.auth.password",
472            '-v', connection_properties['discovery_auth_password'],
473            run_as_root=True,
474            root_helper=self._root_helper)
475
476    @utils.trace
477    @synchronized('extend_volume', external=True)
478    def extend_volume(self, connection_properties: dict):
479        """Update the local kernel's size information.
480
481        Try and update the local kernel's size information
482        for an iSCSI volume.
483        """
484        LOG.info("Extend volume for %s",
485                 strutils.mask_dict_password(connection_properties))
486
487        volume_paths = self.get_volume_paths(connection_properties)
488        LOG.info("Found paths for volume %s", volume_paths)
489        if volume_paths:
490            return self._linuxscsi.extend_volume(
491                volume_paths, use_multipath=self.use_multipath)
492        else:
493            LOG.warning("Couldn't find any volume paths on the host to "
494                        "extend volume for %(props)s",
495                        {'props': strutils.mask_dict_password(
496                            connection_properties)})
497            raise exception.VolumePathsNotFound()
498
499    @utils.trace
500    @synchronized('connect_volume', external=True)
501    def connect_volume(self, connection_properties: dict):
502        """Attach the volume to instance_name.
503
504        :param connection_properties: The valid dictionary that describes all
505                                      of the target volume attributes.
506        :type connection_properties: dict
507        :returns: dict
508
509        connection_properties for iSCSI must include:
510        target_portal(s) - ip and optional port
511        target_iqn(s) - iSCSI Qualified Name
512        target_lun(s) - LUN id of the volume
513        Note that plural keys may be used when use_multipath=True
514        """
515        try:
516            if self.use_multipath:
517                return self._connect_multipath_volume(connection_properties)
518            return self._connect_single_volume(connection_properties)
519        except Exception:
520            # NOTE(geguileo): By doing the cleanup here we ensure we only do
521            # the logins once for multipath if they succeed, but retry if they
522            # don't, which helps on bad network cases.
523            with excutils.save_and_reraise_exception():
524                self._cleanup_connection(connection_properties, force=True)
525
526    @utils.retry((exception.VolumeDeviceNotFound))
527    def _get_device_link(self, wwn, device, mpath):
528        # These are the default symlinks that should always be there
529        if mpath:
530            symlink = '/dev/disk/by-id/dm-uuid-mpath-' + mpath
531        else:
532            symlink = '/dev/disk/by-id/scsi-' + wwn
533
534        # If default symlinks are not there just search for anything that links
535        # to our device.  In my experience this will return the last added link
536        # first, so if we are going to succeed this should be fast.
537        if not os.path.realpath(symlink) == device:
538            links_path = '/dev/disk/by-id/'
539            for symlink in os.listdir(links_path):
540                symlink = links_path + symlink
541                if os.path.realpath(symlink) == device:
542                    break
543            else:
544                # Raising this will trigger the next retry
545                raise exception.VolumeDeviceNotFound(device='/dev/disk/by-id')
546        return symlink
547
548    def _get_connect_result(self, con_props, wwn, devices_names, mpath=None):
549        device = '/dev/' + (mpath or devices_names[0])
550
551        # NOTE(geguileo): This is only necessary because of the current
552        # encryption flow that requires that connect_volume returns a symlink
553        # because first we do the volume attach, then the libvirt config is
554        # generated using the path returned by the atach, and then we do the
555        # encryption attach, which is forced to preserve the path that was used
556        # in the libvirt config.  If we fix that flow in OS-brick, Nova, and
557        # Cinder we can remove this and just return the real path.
558        if con_props.get('encrypted'):
559            device = self._get_device_link(wwn, device, mpath)
560
561        result = {'type': 'block', 'scsi_wwn': wwn, 'path': device}
562        if mpath:
563            result['multipath_id'] = wwn
564        return result
565
566    @utils.retry((exception.VolumeDeviceNotFound))
567    def _connect_single_volume(self, connection_properties):
568        """Connect to a volume using a single path."""
569        data = {'stop_connecting': False, 'num_logins': 0, 'failed_logins': 0,
570                'stopped_threads': 0, 'found_devices': [],
571                'just_added_devices': []}
572
573        for props in self._iterate_all_targets(connection_properties):
574            self._connect_vol(self.device_scan_attempts, props, data)
575            found_devs = data['found_devices']
576            if found_devs:
577                for __ in range(10):
578                    wwn = self._linuxscsi.get_sysfs_wwn(found_devs)
579                    if wwn:
580                        break
581                    time.sleep(1)
582                else:
583                    LOG.debug('Could not find the WWN for %s.',
584                              found_devs[0])  # type: ignore
585                return self._get_connect_result(connection_properties,
586                                                wwn, found_devs)
587
588            # If we failed we must cleanup the connection, as we could be
589            # leaving the node entry if it's not being used by another device.
590            ips_iqns_luns = ((props['target_portal'], props['target_iqn'],
591                              props['target_lun']), )
592            self._cleanup_connection(props, ips_iqns_luns, force=True,
593                                     ignore_errors=True)
594            # Reset connection result values for next try
595            data.update(num_logins=0, failed_logins=0, found_devices=[])
596
597        raise exception.VolumeDeviceNotFound(device='')
598
599    def _connect_vol(self, rescans, props, data):
600        """Make a connection to a volume, send scans and wait for the device.
601
602        This method is specifically designed to support multithreading and
603        share the results via a shared dictionary with fixed keys, which is
604        thread safe.
605
606        Since the heaviest operations are run via subprocesses we don't worry
607        too much about the GIL or how the eventlets will handle the context
608        switching.
609
610        The method will only try to log in once, since iscsid's initiator
611        already tries 8 times by default to do the login, or whatever value we
612        have as node.session.initial_login_retry_max in our system.
613
614        Shared dictionary has the following keys:
615        - stop_connecting: When the caller wants us to stop the rescans
616        - num_logins: Count of how many threads have successfully logged in
617        - failed_logins: Count of how many threads have failed to log in
618        - stopped_threads: How many threads have finished.  This may be
619                           different than num_logins + failed_logins, since
620                           some threads may still be waiting for a device.
621        - found_devices: List of devices the connections have found
622        - just_added_devices: Devices that have been found and still have not
623                              been processed by the main thread that manages
624                              all the connecting threads.
625
626        :param rescans: Number of rescans to perform before giving up.
627        :param props: Properties of the connection.
628        :param data: Shared data.
629        """
630        device = hctl = None
631        portal = props['target_portal']
632        try:
633            session, manual_scan = self._connect_to_iscsi_portal(props)
634        except Exception:
635            LOG.exception('Exception connecting to %s', portal)
636            session = None
637
638        if session:
639            do_scans = rescans > 0 or manual_scan
640            # Scan is sent on connect by iscsid, but we must do it manually on
641            # manual scan mode.  This scan cannot count towards total rescans.
642            if manual_scan:
643                num_rescans = -1
644                seconds_next_scan = 0
645            else:
646                num_rescans = 0
647                seconds_next_scan = 4
648
649            data['num_logins'] += 1
650            LOG.debug('Connected to %s', portal)
651            while do_scans:
652                try:
653                    if not hctl:
654                        hctl = self._linuxscsi.get_hctl(session,
655                                                        props['target_lun'])
656                    if hctl:
657                        if seconds_next_scan <= 0:
658                            num_rescans += 1
659                            self._linuxscsi.scan_iscsi(*hctl)
660                            # 4 seconds on 1st rescan, 9s on 2nd, 16s on 3rd
661                            seconds_next_scan = (num_rescans + 2) ** 2
662
663                        device = self._linuxscsi.device_name_by_hctl(session,
664                                                                     hctl)
665                        if device:
666                            break
667
668                except Exception:
669                    LOG.exception('Exception scanning %s', portal)
670                    pass
671                do_scans = (num_rescans <= rescans and
672                            not (device or data['stop_connecting']))
673                if do_scans:
674                    time.sleep(1)
675                    seconds_next_scan -= 1
676
677            if device:
678                LOG.debug('Connected to %s using %s', device,
679                          strutils.mask_password(props))
680            else:
681                LOG.warning('LUN %(lun)s on iSCSI portal %(portal)s not found '
682                            'on sysfs after logging in.',
683                            {'lun': props['target_lun'], 'portal': portal})
684        else:
685            LOG.warning('Failed to connect to iSCSI portal %s.', portal)
686            data['failed_logins'] += 1
687
688        if device:
689            data['found_devices'].append(device)
690            data['just_added_devices'].append(device)
691        data['stopped_threads'] += 1
692
693    @utils.retry((exception.VolumeDeviceNotFound))
694    def _connect_multipath_volume(self, connection_properties):
695        """Connect to a multipathed volume launching parallel login requests.
696
697        We will be doing parallel login requests, which will considerably speed
698        up the process when we have flaky connections.
699
700        We'll always try to return a multipath device even if there's only one
701        path discovered, that way we can return once we have logged in in all
702        the portals, because the paths will come up later.
703
704        To make this possible we tell multipathd that the wwid is a multipath
705        as soon as we have one device, and then hint multipathd to reconsider
706        that volume for a multipath asking to add the path, because even if
707        it's already known by multipathd it would have been discarded if it
708        was the first time this volume was seen here.
709        """
710        wwn = mpath = None
711        wwn_added = False
712        last_try_on = 0.0
713        found: list = []
714        just_added_devices: list = []
715        # Dict used to communicate with threads as detailed in _connect_vol
716        data = {'stop_connecting': False, 'num_logins': 0, 'failed_logins': 0,
717                'stopped_threads': 0, 'found_devices': found,
718                'just_added_devices': just_added_devices}
719
720        ips_iqns_luns = self._get_ips_iqns_luns(connection_properties)
721        # Launch individual threads for each session with the own properties
722        retries = self.device_scan_attempts
723        threads = []
724        for ip, iqn, lun in ips_iqns_luns:
725            props = connection_properties.copy()
726            props.update(target_portal=ip, target_iqn=iqn, target_lun=lun)
727
728            # NOTE(yenai): The method _connect_vol is used for parallelize
729            # logins, we shouldn't give these arguments; and it will make a
730            # mess in the debug message in _connect_vol. So, kick them out:
731            for key in ('target_portals', 'target_iqns', 'target_luns'):
732                props.pop(key, None)
733
734            threads.append(executor.Thread(target=self._connect_vol,
735                                           args=(retries, props, data)))
736        for thread in threads:
737            thread.start()
738
739        # Continue until:
740        # - All connection attempts have finished and none has logged in
741        # - Multipath has been found and connection attempts have either
742        #   finished or have already logged in
743        # - We have finished in all threads, logged in, found some device, and
744        #   10 seconds have passed, which should be enough with up to 10%
745        #   network package drops.
746        while not ((len(ips_iqns_luns) == data['stopped_threads'] and
747                    not found) or
748                   (mpath and len(ips_iqns_luns) == data['num_logins'] +
749                    data['failed_logins'])):
750            # We have devices but we don't know the wwn yet
751            if not wwn and found:
752                wwn = self._linuxscsi.get_sysfs_wwn(found, mpath)
753            if not mpath and found:
754                mpath = self._linuxscsi.find_sysfs_multipath_dm(found)
755                # We have the wwn but not a multipath
756                if wwn and not(mpath or wwn_added):
757                    # Tell multipathd that this wwn is a multipath and hint
758                    # multipathd to recheck all the devices we have just
759                    # connected.  We only do this once, since for any new
760                    # device multipathd will already know it is a multipath.
761                    # This is only useful if we have multipathd configured with
762                    # find_multipaths set to yes, and has no effect if it's set
763                    # to no.
764                    wwn_added = self._linuxscsi.multipath_add_wwid(wwn)
765                    while not mpath and just_added_devices:
766                        device_path = '/dev/' + just_added_devices.pop(0)
767                        self._linuxscsi.multipath_add_path(device_path)
768                        mpath = self._linuxscsi.find_sysfs_multipath_dm(found)
769            # Give some extra time after all threads have finished.
770            if (not last_try_on and found and
771                    len(ips_iqns_luns) == data['stopped_threads']):
772                LOG.debug('All connection threads finished, giving 10 seconds '
773                          'for dm to appear.')
774                last_try_on = time.time() + 10
775            elif last_try_on and last_try_on < time.time():
776                break
777            time.sleep(1)
778        data['stop_connecting'] = True
779        for thread in threads:
780            thread.join()
781
782        # If we haven't found any devices let the caller do the cleanup
783        if not found:
784            raise exception.VolumeDeviceNotFound(device='')
785
786        # NOTE(geguileo): If we cannot find the dm it's because all paths are
787        # really bad, so we might as well raise a not found exception, but
788        # in our best effort we'll return a device even if it's probably
789        # useless.
790        if not mpath:
791            LOG.warning('No dm was created, connection to volume is probably '
792                        'bad and will perform poorly.')
793        elif not wwn:
794            wwn = self._linuxscsi.get_sysfs_wwn(found, mpath)
795        return self._get_connect_result(connection_properties, wwn, found,
796                                        mpath)
797
798    def _get_connection_devices(self, connection_properties,
799                                ips_iqns_luns=None, is_disconnect_call=False):
800        """Get map of devices by sessions from our connection.
801
802        For each of the TCP sessions that correspond to our connection
803        properties we generate a map of (ip, iqn) to (belong, other) where
804        belong is a set of devices in that session that populated our system
805        when we did a connection using connection properties, and other are
806        any other devices that share that same session but are the result of
807        connecting with different connection properties.
808
809        We also include all nodes from our connection that don't have a
810        session.
811
812        If ips_iqns_luns parameter is provided connection_properties won't be
813        used to get them.
814
815        When doing multipath we may not have all the information on the
816        connection properties (sendtargets was used on connect) so we may have
817        to retrieve the info from the discoverydb.  Call _get_ips_iqns_luns to
818        do the right things.
819
820        This method currently assumes that it's only called by the
821        _cleanup_conection method.
822        """
823        if not ips_iqns_luns:
824            # This is a cleanup, don't do discovery
825            ips_iqns_luns = self._get_ips_iqns_luns(
826                connection_properties, discover=False,
827                is_disconnect_call=is_disconnect_call)
828        LOG.debug('Getting connected devices for (ips,iqns,luns)=%s',
829                  ips_iqns_luns)
830        nodes = self._get_iscsi_nodes()
831        sessions = self._get_iscsi_sessions_full()
832        # Use (portal, iqn) to map the session value
833        sessions_map = {(s[2], s[4]): s[1] for s in sessions
834                        if s[0] in self.VALID_SESSIONS_PREFIX}
835        # device_map will keep a tuple with devices from the connection and
836        # others that don't belong to this connection" (belong, others)
837        device_map: defaultdict = defaultdict(lambda: (set(), set()))
838
839        for ip, iqn, lun in ips_iqns_luns:
840            session = sessions_map.get((ip, iqn))
841            # Our nodes that don't have a session will be returned as empty
842            if not session:
843                if (ip, iqn) in nodes:
844                    device_map[(ip, iqn)] = (set(), set())
845                continue
846
847            # Get all devices for the session
848            paths = glob.glob('/sys/class/scsi_host/host*/device/session' +
849                              session + '/target*/*:*:*:*/block/*')
850            belong, others = device_map[(ip, iqn)]
851            for path in paths:
852                __, hctl, __, device = path.rsplit('/', 3)
853                lun_path = int(hctl.rsplit(':', 1)[-1])
854                # For partitions turn them into the whole device: sde1 -> sde
855                device = device.strip('0123456789')
856                if lun_path == lun:
857                    belong.add(device)
858                else:
859                    others.add(device)
860
861        LOG.debug('Resulting device map %s', device_map)
862        return device_map
863
864    @utils.trace
865    @synchronized('connect_volume', external=True)
866    def disconnect_volume(self, connection_properties, device_info,
867                          force=False, ignore_errors=False):
868        """Detach the volume from instance_name.
869
870        :param connection_properties: The dictionary that describes all
871                                      of the target volume attributes.
872        :type connection_properties: dict that must include:
873                                     target_portal(s) - IP and optional port
874                                     target_iqn(s) - iSCSI Qualified Name
875                                     target_lun(s) - LUN id of the volume
876        :param device_info: historical difference, but same as connection_props
877        :type device_info: dict
878        :param force: Whether to forcefully disconnect even if flush fails.
879        :type force: bool
880        :param ignore_errors: When force is True, this will decide whether to
881                              ignore errors or raise an exception once finished
882                              the operation.  Default is False.
883        :type ignore_errors: bool
884        """
885        return self._cleanup_connection(connection_properties, force=force,
886                                        ignore_errors=ignore_errors,
887                                        device_info=device_info,
888                                        is_disconnect_call=True)
889
890    def _cleanup_connection(self, connection_properties, ips_iqns_luns=None,
891                            force=False, ignore_errors=False,
892                            device_info=None, is_disconnect_call=False):
893        """Cleans up connection flushing and removing devices and multipath.
894
895        :param connection_properties: The dictionary that describes all
896                                      of the target volume attributes.
897        :type connection_properties: dict that must include:
898                                     target_portal(s) - IP and optional port
899                                     target_iqn(s) - iSCSI Qualified Name
900                                     target_lun(s) - LUN id of the volume
901        :param ips_iqns_luns: Use this list of tuples instead of information
902                              from the connection_properties.
903        :param force: Whether to forcefully disconnect even if flush fails.
904        :type force: bool
905        :param ignore_errors: When force is True, this will decide whether to
906                              ignore errors or raise an exception once finished
907                              the operation.  Default is False.
908        :param device_info: Attached device information.
909        :param is_disconnect_call: Whether this is a call coming from a user
910                                   disconnect_volume call or a call from some
911                                   other operation's cleanup.
912        :type is_disconnect_call: bool
913        :type ignore_errors: bool
914        """
915        exc = exception.ExceptionChainer()
916        try:
917            devices_map = self._get_connection_devices(connection_properties,
918                                                       ips_iqns_luns,
919                                                       is_disconnect_call)
920        except exception.TargetPortalNotFound as exc:
921            # When discovery sendtargets failed on connect there is no
922            # information in the discoverydb, so there's nothing to clean.
923            LOG.debug('Skipping cleanup %s', exc)
924            return
925
926        # Remove devices and multipath from this connection
927        remove_devices = set()
928        for remove, __ in devices_map.values():
929            remove_devices.update(remove)
930
931        path_used = self._linuxscsi.get_dev_path(connection_properties,
932                                                 device_info)
933        was_multipath = (path_used.startswith('/dev/dm-') or
934                         'mpath' in path_used)
935        multipath_name = self._linuxscsi.remove_connection(
936            remove_devices, force,
937            exc, path_used, was_multipath)  # type: ignore
938
939        # Disconnect sessions and remove nodes that are left without devices
940        disconnect = [conn for conn, (__, keep) in devices_map.items()
941                      if not keep]
942
943        # The "type:" comment works around mypy issue #6647
944        self._disconnect_connection(connection_properties, disconnect, force,
945                                    exc)  # type:ignore
946
947        # If flushing the multipath failed before, try now after we have
948        # removed the devices and we may have even logged off (only reaches
949        # here with multipath_name if force=True).
950        if multipath_name:
951            LOG.debug('Flushing again multipath %s now that we removed the '
952                      'devices.', multipath_name)
953            self._linuxscsi.flush_multipath_device(multipath_name)
954
955        if exc:  # type: ignore
956            LOG.warning('There were errors removing %s, leftovers may remain '
957                        'in the system', remove_devices)
958            if not ignore_errors:
959                raise exc  # type: ignore
960
961    def _munge_portal(self, target):
962        """Remove brackets from portal.
963
964        In case IPv6 address was used the udev path should not contain any
965        brackets. Udev code specifically forbids that.
966        """
967        portal, iqn, lun = target
968        return (portal.replace('[', '').replace(']', ''), iqn,
969                self._linuxscsi.process_lun_id(lun))
970
971    def _get_device_path(self, connection_properties):
972        if self._get_transport() == "default":
973            return ["/dev/disk/by-path/ip-%s-iscsi-%s-lun-%s" %
974                    self._munge_portal(x) for x in
975                    self._get_all_targets(connection_properties)]
976        else:
977            # we are looking for paths in the format :
978            # /dev/disk/by-path/
979            # pci-XXXX:XX:XX.X-ip-PORTAL:PORT-iscsi-IQN-lun-LUN_ID
980            device_list = []
981            for x in self._get_all_targets(connection_properties):
982                look_for_device = glob.glob(
983                    '/dev/disk/by-path/*ip-%s-iscsi-%s-lun-%s' %
984                    self._munge_portal(x))
985                if look_for_device:
986                    device_list.extend(look_for_device)
987            return device_list
988
989    def get_initiator(self):
990        """Secure helper to read file as root."""
991        file_path = '/usr/local/etc/iscsi/initiatorname.iscsi'
992        try:
993            lines, _err = self._execute('cat', file_path, run_as_root=True,
994                                        root_helper=self._root_helper)
995
996            for line in lines.split('\n'):
997                if line.startswith('InitiatorName='):
998                    return line[line.index('=') + 1:].strip()
999        except putils.ProcessExecutionError:
1000            LOG.warning("Could not find the iSCSI Initiator File %s",
1001                        file_path)
1002            return None
1003
1004    def _run_iscsiadm(self, connection_properties, iscsi_command, **kwargs):
1005        check_exit_code = kwargs.pop('check_exit_code', 0)
1006        attempts = kwargs.pop('attempts', 1)
1007        delay_on_retry = kwargs.pop('delay_on_retry', True)
1008        (out, err) = self._execute('iscsiadm', '-m', 'node', '-T',
1009                                   connection_properties['target_iqn'],
1010                                   '-p',
1011                                   connection_properties['target_portal'],
1012                                   *iscsi_command, run_as_root=True,
1013                                   root_helper=self._root_helper,
1014                                   check_exit_code=check_exit_code,
1015                                   attempts=attempts,
1016                                   delay_on_retry=delay_on_retry)
1017        msg = ("iscsiadm %(iscsi_command)s: stdout=%(out)s stderr=%(err)s" %
1018               {'iscsi_command': iscsi_command, 'out': out, 'err': err})
1019        # don't let passwords be shown in log output
1020        LOG.debug(strutils.mask_password(msg))
1021
1022        return (out, err)
1023
1024    def _iscsiadm_update(self, connection_properties, property_key,
1025                         property_value, **kwargs):
1026        iscsi_command = ('--op', 'update', '-n', property_key,
1027                         '-v', property_value)
1028        return self._run_iscsiadm(connection_properties, iscsi_command,
1029                                  **kwargs)
1030
1031    def _get_target_portals_from_iscsiadm_output(self, output):
1032        # return both portals and iqns as 2 lists
1033        #
1034        # as we are parsing a command line utility, allow for the
1035        # possibility that additional debug data is spewed in the
1036        # stream, and only grab actual ip / iqn lines.
1037        ips = []
1038        iqns = []
1039        for data in [line.split() for line in output.splitlines()]:
1040            if len(data) == 2 and data[1].startswith('iqn.'):
1041                ips.append(data[0].split(',')[0])
1042                iqns.append(data[1])
1043        return ips, iqns
1044
1045    def _connect_to_iscsi_portal(self, connection_properties):
1046        """Safely connect to iSCSI portal-target and return the session id."""
1047        portal = connection_properties['target_portal'].split(",")[0]
1048        target_iqn = connection_properties['target_iqn']
1049
1050        lock_name = f'connect_to_iscsi_portal-{portal}-{target_iqn}'
1051        method = synchronized(
1052            lock_name, external=True)(self._connect_to_iscsi_portal_unsafe)
1053        return method(connection_properties)
1054
1055    @utils.retry((exception.BrickException))
1056    def _connect_to_iscsi_portal_unsafe(self, connection_properties):
1057        """Connect to an iSCSI portal-target an return the session id."""
1058        portal = connection_properties['target_portal'].split(",")[0]
1059        target_iqn = connection_properties['target_iqn']
1060
1061        # NOTE(vish): If we are on the same host as nova volume, the
1062        #             discovery makes the target so we don't need to
1063        #             run --op new. Therefore, we check to see if the
1064        #             target exists, and if we get 255 (Not Found), then
1065        #             we run --op new. This will also happen if another
1066        #             volume is using the same target.
1067        # iscsiadm returns 21 for "No records found" after version 2.0-871
1068        LOG.info("Trying to connect to iSCSI portal %s", portal)
1069        out, err = self._run_iscsiadm(connection_properties, (),
1070                                      check_exit_code=(0, 21, 255))
1071        if err:
1072            out_new, err_new = self._run_iscsiadm(connection_properties,
1073                                                  ('--interface',
1074                                                   self._get_transport(),
1075                                                   '--op', 'new'),
1076                                                  check_exit_code=(0, 6))
1077            if err_new:
1078                # retry if iscsiadm returns 6 for "database failure"
1079                LOG.debug("Retrying to connect to iSCSI portal %s", portal)
1080                msg = (_("Encountered database failure for %s.") % (portal))
1081                raise exception.BrickException(msg=msg)
1082
1083        # Try to set the scan mode to manual
1084        res = self._iscsiadm_update(connection_properties,
1085                                    'node.session.scan', 'manual',
1086                                    check_exit_code=False)
1087        manual_scan = not res[1]
1088        # Update global indicator of manual scan support used for
1089        # shared_targets locking so we support upgrading open iscsi to a
1090        # version supporting the manual scan feature without restarting Nova
1091        # or Cinder.
1092        initiator_utils.ISCSI_SUPPORTS_MANUAL_SCAN = manual_scan
1093
1094        if connection_properties.get('auth_method'):
1095            self._iscsiadm_update(connection_properties,
1096                                  "node.session.auth.authmethod",
1097                                  connection_properties['auth_method'])
1098            self._iscsiadm_update(connection_properties,
1099                                  "node.session.auth.username",
1100                                  connection_properties['auth_username'])
1101            self._iscsiadm_update(connection_properties,
1102                                  "node.session.auth.password",
1103                                  connection_properties['auth_password'])
1104
1105        # We exit once we are logged in or once we fail login
1106        while True:
1107            # Duplicate logins crash iscsiadm after load, so we scan active
1108            # sessions to see if the node is logged in.
1109            sessions = self._get_iscsi_sessions_full()
1110            for s in sessions:
1111                # Found our session, return session_id
1112                if (s[0] in self.VALID_SESSIONS_PREFIX and
1113                        portal.lower() == s[2].lower() and s[4] == target_iqn):
1114                    return s[1], manual_scan
1115
1116            try:
1117                # exit_code=15 means the session already exists, so it should
1118                # be regarded as successful login.
1119                self._run_iscsiadm(connection_properties, ("--login",),
1120                                   check_exit_code=(0, 15, 255))
1121            except putils.ProcessExecutionError as err:
1122                LOG.warning('Failed to login iSCSI target %(iqn)s on portal '
1123                            '%(portal)s (exit code %(err)s).',
1124                            {'iqn': target_iqn, 'portal': portal,
1125                             'err': err.exit_code})
1126                return None, None
1127            self._iscsiadm_update(connection_properties,
1128                                  "node.startup",
1129                                  "automatic")
1130
1131    def _disconnect_from_iscsi_portal(self, connection_properties):
1132        self._iscsiadm_update(connection_properties, "node.startup", "manual",
1133                              check_exit_code=[0, 21, 255])
1134        self._run_iscsiadm(connection_properties, ("--logout",),
1135                           check_exit_code=[0, 21, 255])
1136        self._run_iscsiadm(connection_properties, ('--op', 'delete'),
1137                           check_exit_code=[0, 21, 255],
1138                           attempts=5,
1139                           delay_on_retry=True)
1140
1141    def _disconnect_connection(self, connection_properties, connections, force,
1142                               exc):
1143        LOG.debug('Disconnecting from: %s', connections)
1144        props = connection_properties.copy()
1145        for ip, iqn in connections:
1146            props['target_portal'] = ip
1147            props['target_iqn'] = iqn
1148            with exc.context(force, 'Disconnect from %s %s failed', ip, iqn):
1149                self._disconnect_from_iscsi_portal(props)
1150
1151    def _run_iscsi_session(self):
1152        (out, err) = self._run_iscsiadm_bare(('-m', 'session'),
1153                                             check_exit_code=[0, 21, 255])
1154        LOG.debug("iscsi session list stdout=%(out)s stderr=%(err)s",
1155                  {'out': out, 'err': err})
1156        return (out, err)
1157
1158    def _run_iscsiadm_bare(self, iscsi_command, **kwargs) -> Tuple[str, str]:
1159        check_exit_code = kwargs.pop('check_exit_code', 0)
1160        (out, err) = self._execute('iscsiadm',
1161                                   *iscsi_command,
1162                                   run_as_root=True,
1163                                   root_helper=self._root_helper,
1164                                   check_exit_code=check_exit_code)
1165        LOG.debug("iscsiadm %(iscsi_command)s: stdout=%(out)s stderr=%(err)s",
1166                  {'iscsi_command': iscsi_command, 'out': out, 'err': err})
1167        return (out, err)
1168
1169    def _run_multipath(self, multipath_command, **kwargs):
1170        check_exit_code = kwargs.pop('check_exit_code', 0)
1171        (out, err) = self._execute('multipath',
1172                                   *multipath_command,
1173                                   run_as_root=True,
1174                                   root_helper=self._root_helper,
1175                                   check_exit_code=check_exit_code)
1176        LOG.debug("multipath %(multipath_command)s: "
1177                  "stdout=%(out)s stderr=%(err)s",
1178                  {'multipath_command': multipath_command,
1179                   'out': out, 'err': err})
1180        return (out, err)
1181
1182    def _get_node_startup_values(self, connection_properties):
1183        # Exit code 21 (ISCSI_ERR_NO_OBJS_FOUND) occurs when no nodes
1184        # exist - must consider this an empty (successful) result.
1185        out, __ = self._run_iscsiadm_bare(
1186            ['-m', 'node', '--op', 'show', '-p',
1187             connection_properties['target_portal']],
1188            check_exit_code=(0, 21)) or ""
1189        node_values_str = out.strip()
1190        node_values = node_values_str.split("\n")
1191        iqn = None
1192        startup = None
1193        startup_values = {}
1194
1195        for node_value in node_values:
1196            node_keys = node_value.split()
1197            try:
1198                if node_keys[0] == "node.name":
1199                    iqn = node_keys[2]
1200                elif node_keys[0] == "node.startup":
1201                    startup = node_keys[2]
1202
1203                if iqn and startup:
1204                    startup_values[iqn] = startup
1205                    iqn = None
1206                    startup = None
1207            except IndexError:
1208                pass
1209
1210        return startup_values
1211
1212    def _recover_node_startup_values(self, connection_properties,
1213                                     old_node_startups):
1214        node_startups = self._get_node_startup_values(connection_properties)
1215        for iqn, node_startup in node_startups.items():
1216            old_node_startup = old_node_startups.get(iqn, None)
1217            if old_node_startup and node_startup != old_node_startup:
1218                # _iscsiadm_update() only uses "target_portal" and "target_iqn"
1219                # of connection_properties.
1220                # And the recovering target belongs to the same target_portal
1221                # as discovering target.
1222                # So target_iqn is updated, and other values aren't updated.
1223                recover_connection = copy.deepcopy(connection_properties)
1224                recover_connection['target_iqn'] = iqn
1225                self._iscsiadm_update(recover_connection,
1226                                      "node.startup",
1227                                      old_node_startup)
1228