1# All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may 4# not use this file except in compliance with the License. You may obtain 5# a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations 13# under the License. 14 15 16from collections import defaultdict 17import copy 18import glob 19import os 20import re 21import time 22from typing import List, Tuple # noqa: H301 23 24from oslo_concurrency import lockutils 25from oslo_concurrency import processutils as putils 26from oslo_log import log as logging 27from oslo_utils import excutils 28from oslo_utils import strutils 29 30from os_brick import exception 31from os_brick import executor 32from os_brick.i18n import _ 33from os_brick import initiator 34from os_brick.initiator.connectors import base 35from os_brick.initiator.connectors import base_iscsi 36from os_brick.initiator import utils as initiator_utils 37from os_brick import utils 38 39synchronized = lockutils.synchronized_with_prefix('os-brick-') 40 41LOG = logging.getLogger(__name__) 42 43 44class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector): 45 """Connector class to attach/detach iSCSI volumes.""" 46 47 supported_transports = ['be2iscsi', 'bnx2i', 'cxgb3i', 'default', 48 'cxgb4i', 'qla4xxx', 'ocs', 'iser', 'tcp'] 49 VALID_SESSIONS_PREFIX = ('tcp:', 'iser:') 50 51 def __init__( 52 self, root_helper: str, driver=None, 53 execute=None, use_multipath: bool = False, 54 device_scan_attempts: int = initiator.DEVICE_SCAN_ATTEMPTS_DEFAULT, 55 transport='default', *args, **kwargs): 56 super(ISCSIConnector, self).__init__( 57 root_helper, driver=driver, 58 execute=execute, 59 device_scan_attempts=device_scan_attempts, 60 transport=transport, *args, **kwargs) # type: ignore 61 self.use_multipath: bool = use_multipath 62 self.transport: str = self._validate_iface_transport(transport) 63 64 @staticmethod 65 def get_connector_properties(root_helper: str, *args, **kwargs) -> dict: 66 """The iSCSI connector properties.""" 67 props = {} 68 iscsi = ISCSIConnector(root_helper=root_helper, 69 execute=kwargs.get('execute')) 70 initiator = iscsi.get_initiator() 71 if initiator: 72 props['initiator'] = initiator 73 74 return props 75 76 def get_search_path(self) -> str: 77 """Where do we look for iSCSI based volumes.""" 78 return '/dev/disk/by-path' 79 80 def get_volume_paths(self, connection_properties: dict) -> list: 81 """Get the list of existing paths for a volume. 82 83 This method's job is to simply report what might/should 84 already exist for a volume. We aren't trying to attach/discover 85 a new volume, but find any existing paths for a volume we 86 think is already attached. 87 88 :param connection_properties: The dictionary that describes all 89 of the target volume attributes. 90 :type connection_properties: dict 91 """ 92 volume_paths: list = [] 93 94 # if there are no sessions, then target_portal won't exist 95 if (('target_portal' not in connection_properties) and 96 ('target_portals' not in connection_properties)): 97 return volume_paths 98 99 # Don't try and connect to the portals in the list as 100 # this can create empty iSCSI sessions to hosts if they 101 # didn't exist previously. 102 # We are simply trying to find any existing volumes with 103 # already connected sessions. 104 host_devices = self._get_potential_volume_paths(connection_properties) 105 for path in host_devices: 106 if os.path.exists(path): 107 volume_paths.append(path) 108 109 return volume_paths 110 111 def _get_iscsi_sessions_full(self) -> List[tuple]: 112 """Get iSCSI session information as a list of tuples. 113 114 Uses iscsiadm -m session and from a command output like 115 tcp: [1] 192.168.121.250:3260,1 iqn.2010-10.org.openstack: 116 volume- (non-flash) 117 118 This method will drop the node type and return a list like this: 119 [('tcp:', '1', '192.168.121.250:3260', '1', 120 'iqn.2010-10.org.openstack:volume-')] 121 """ 122 out, err = self._run_iscsi_session() 123 if err: 124 LOG.warning("iscsiadm stderr output when getting sessions: %s", 125 err) 126 127 # Parse and clean the output from iscsiadm, which is in the form of: 128 # transport_name: [session_id] ip_address:port,tpgt iqn node_type 129 lines: List[tuple] = [] 130 for line in out.splitlines(): 131 if line: 132 info = line.split() 133 sid = info[1][1:-1] 134 portal, tpgt = info[2].split(',') 135 lines.append((info[0], sid, portal, tpgt, info[3])) 136 return lines 137 138 def _get_iscsi_nodes(self) -> List[tuple]: 139 """Get iSCSI node information (portal, iqn) as a list of tuples. 140 141 Uses iscsiadm -m node and from a command output like 142 192.168.121.250:3260,1 iqn.2010-10.org.openstack:volume 143 144 This method will drop the tpgt and return a list like this: 145 [('192.168.121.250:3260', 'iqn.2010-10.org.openstack:volume')] 146 """ 147 out, err = self._execute('iscsiadm', '-m', 'node', run_as_root=True, 148 root_helper=self._root_helper, 149 check_exit_code=False) 150 if err: 151 LOG.warning("Couldn't find iSCSI nodes because iscsiadm err: %s", 152 err) 153 return [] 154 155 # Parse and clean the output from iscsiadm which is in the form of: 156 # ip_address:port,tpgt iqn 157 lines: List[tuple] = [] 158 for line in out.splitlines(): 159 if line: 160 info = line.split() 161 try: 162 lines.append((info[0].split(',')[0], info[1])) 163 except IndexError: 164 pass 165 return lines 166 167 def _get_iscsi_sessions(self) -> list: 168 """Return portals for all existing sessions.""" 169 # entry: [tcp, [1], 192.168.121.250:3260,1 ...] 170 return [entry[2] for entry in self._get_iscsi_sessions_full()] 171 172 def _get_ips_iqns_luns(self, 173 connection_properties: dict, 174 discover: bool = True, 175 is_disconnect_call: bool = False): 176 """Build a list of ips, iqns, and luns. 177 178 Used when doing singlepath and multipath, and we have 4 cases: 179 180 - All information is in the connection properties 181 - We have to do an iSCSI discovery to get the information 182 - We don't want to do another discovery and we query the discoverydb 183 - Discovery failed because it was actually a single pathed attachment 184 185 :param connection_properties: The dictionary that describes all 186 of the target volume attributes. 187 :type connection_properties: dict 188 :param discover: Whether doing an iSCSI discovery is acceptable. 189 :type discover: bool 190 :param is_disconnect_call: Whether this is a call coming from a user 191 disconnect_volume call or a call from some 192 other operation's cleanup. 193 :type is_disconnect_call: bool 194 :returns: list of tuples of (ip, iqn, lun) 195 """ 196 # There are cases where we don't know if the local attach was done 197 # using multipathing or single pathing, so assume multipathing. 198 try: 199 if ('target_portals' in connection_properties and 200 'target_iqns' in connection_properties): 201 # Use targets specified by connection_properties 202 ips_iqns_luns = self._get_all_targets(connection_properties) 203 else: 204 method = (self._discover_iscsi_portals if discover 205 else self._get_discoverydb_portals) 206 ips_iqns_luns = method(connection_properties) 207 except exception.TargetPortalNotFound: 208 # Discovery failed, on disconnect this will happen if we 209 # are detaching a single pathed connection, so we use the 210 # connection properties to return the tuple. 211 if is_disconnect_call: 212 return self._get_all_targets(connection_properties) 213 raise 214 except Exception: 215 LOG.exception('Exception encountered during portal discovery') 216 if 'target_portals' in connection_properties: 217 raise exception.TargetPortalsNotFound( 218 target_portals=connection_properties['target_portals']) 219 if 'target_portal' in connection_properties: 220 raise exception.TargetPortalNotFound( 221 target_portal=connection_properties['target_portal']) 222 raise 223 224 if not connection_properties.get('target_iqns'): 225 # There are two types of iSCSI multipath devices. One which 226 # shares the same iqn between multiple portals, and the other 227 # which use different iqns on different portals. 228 # Try to identify the type by checking the iscsiadm output 229 # if the iqn is used by multiple portals. If it is, it's 230 # the former, so use the supplied iqn. Otherwise, it's the 231 # latter, so try the ip,iqn combinations to find the targets 232 # which constitutes the multipath device. 233 main_iqn = connection_properties['target_iqn'] 234 all_portals = {(ip, lun) for ip, iqn, lun in ips_iqns_luns} 235 match_portals = {(ip, lun) for ip, iqn, lun in ips_iqns_luns 236 if iqn == main_iqn} 237 if len(all_portals) == len(match_portals): 238 ips_iqns_luns = [(p[0], main_iqn, p[1]) 239 for p in all_portals] 240 241 return ips_iqns_luns 242 243 def _get_potential_volume_paths(self, 244 connection_properties: dict) -> List[str]: 245 """Build a list of potential volume paths that exist. 246 247 Given a list of target_portals in the connection_properties, 248 a list of paths might exist on the system during discovery. 249 This method's job is to build that list of potential paths 250 for a volume that might show up. 251 252 This is only used during get_volume_paths time, we are looking to 253 find a list of existing volume paths for the connection_properties. 254 In this case, we don't want to connect to the portal. If we 255 blindly try and connect to a portal, it could create a new iSCSI 256 session that didn't exist previously, and then leave it stale. 257 258 :param connection_properties: The dictionary that describes all 259 of the target volume attributes. 260 :type connection_properties: dict 261 :returns: list 262 """ 263 if self.use_multipath: 264 LOG.info("Multipath discovery for iSCSI enabled") 265 # Multipath installed, discovering other targets if available 266 host_devices = self._get_device_path(connection_properties) 267 else: 268 LOG.info("Multipath discovery for iSCSI not enabled.") 269 iscsi_sessions = self._get_iscsi_sessions() 270 271 host_devices = set() 272 for props in self._iterate_all_targets(connection_properties): 273 # If we aren't trying to connect to the portal, we 274 # want to find ALL possible paths from all of the 275 # alternate portals 276 if props['target_portal'] in iscsi_sessions: 277 paths = self._get_device_path(props) 278 host_devices.update(paths) 279 host_devices = list(host_devices) 280 281 return host_devices 282 283 def set_execute(self, execute): 284 super(ISCSIConnector, self).set_execute(execute) 285 self._linuxscsi.set_execute(execute) 286 287 def _validate_iface_transport(self, transport_iface: str) -> str: 288 """Check that given iscsi_iface uses only supported transports 289 290 Accepted transport names for provided iface param are 291 be2iscsi, bnx2i, cxgb3i, cxgb4i, default, qla4xxx, ocs, iser or tcp. 292 Note the difference between transport and iface; 293 unlike default(iscsi_tcp)/iser, this is not one and the same for 294 offloaded transports, where the default format is 295 transport_name.hwaddress 296 297 :param transport_iface: The iscsi transport type. 298 :type transport_iface: str 299 :returns: str 300 """ 301 # Note that default(iscsi_tcp) and iser do not require a separate 302 # iface file, just the transport is enough and do not need to be 303 # validated. This is not the case for the other entries in 304 # supported_transports array. 305 if transport_iface in ['default', 'iser']: 306 return transport_iface 307 # Will return (6) if iscsi_iface file was not found, or (2) if iscsid 308 # could not be contacted 309 out = self._run_iscsiadm_bare(['-m', 310 'iface', 311 '-I', 312 transport_iface], 313 check_exit_code=[0, 2, 6])[0] or "" 314 LOG.debug("iscsiadm %(iface)s configuration: stdout=%(out)s.", 315 {'iface': transport_iface, 'out': out}) 316 for data in [line.split() for line in out.splitlines()]: 317 if data[0] == 'iface.transport_name': 318 if data[2] in self.supported_transports: 319 return transport_iface 320 321 LOG.warning("No useable transport found for iscsi iface %s. " 322 "Falling back to default transport.", 323 transport_iface) 324 return 'default' 325 326 def _get_transport(self) -> str: 327 return self.transport 328 329 def _get_discoverydb_portals(self, 330 connection_properties: dict) -> List[tuple]: 331 """Retrieve iscsi portals information from the discoverydb. 332 333 Example of discoverydb command output: 334 335 SENDTARGETS: 336 DiscoveryAddress: 192.168.1.33,3260 337 DiscoveryAddress: 192.168.1.2,3260 338 Target: iqn.2004-04.com.qnap:ts-831x:iscsi.cinder-20170531114245.9eff88 339 Portal: 192.168.1.3:3260,1 340 Iface Name: default 341 Portal: 192.168.1.2:3260,1 342 Iface Name: default 343 Target: iqn.2004-04.com.qnap:ts-831x:iscsi.cinder-20170531114447.9eff88 344 Portal: 192.168.1.3:3260,1 345 Iface Name: default 346 Portal: 192.168.1.2:3260,1 347 Iface Name: default 348 DiscoveryAddress: 192.168.1.38,3260 349 iSNS: 350 No targets found. 351 STATIC: 352 No targets found. 353 FIRMWARE: 354 No targets found. 355 356 :param connection_properties: The dictionary that describes all 357 of the target volume attributes. 358 :type connection_properties: dict 359 :returns: list of tuples of (ip, iqn, lun) 360 """ 361 ip, port = connection_properties['target_portal'].rsplit(':', 1) 362 # NOTE(geguileo): I don't know if IPv6 will be reported with [] 363 # or not, so we'll make them optional. 364 ip = ip.replace('[', r'\[?').replace(']', r'\]?') 365 out = self._run_iscsiadm_bare(['-m', 'discoverydb', 366 '-o', 'show', 367 '-P', 1])[0] or "" 368 regex = ''.join(('^SENDTARGETS:\n.*?^DiscoveryAddress: ', 369 ip, ',', port, 370 '.*?\n(.*?)^(?:DiscoveryAddress|iSNS):.*')) 371 LOG.debug('Regex to get portals from discoverydb: %s', regex) 372 373 info = re.search(regex, out, re.DOTALL | re.MULTILINE) 374 375 ips = [] 376 iqns = [] 377 378 if info: 379 iscsi_transport = ('iser' if self._get_transport() == 'iser' 380 else 'default') 381 iface = 'Iface Name: ' + iscsi_transport 382 current_iqn = '' 383 current_ip = '' 384 for line in info.group(1).splitlines(): 385 line = line.strip() 386 if line.startswith('Target:'): 387 current_iqn = line[8:] 388 elif line.startswith('Portal:'): 389 current_ip = line[8:].split(',')[0] 390 elif line.startswith(iface): 391 if current_iqn and current_ip: 392 iqns.append(current_iqn) 393 ips.append(current_ip) 394 current_ip = '' 395 396 if not iqns: 397 raise exception.TargetPortalsNotFound( 398 _('Unable to find target portals information on discoverydb.')) 399 400 luns = self._get_luns(connection_properties, iqns) 401 return list(zip(ips, iqns, luns)) 402 403 def _discover_iscsi_portals(self, connection_properties: dict) -> list: 404 out = None 405 iscsi_transport = ('iser' if self._get_transport() == 'iser' 406 else 'default') 407 if connection_properties.get('discovery_auth_method'): 408 try: 409 self._run_iscsiadm_update_discoverydb(connection_properties, 410 iscsi_transport) 411 except putils.ProcessExecutionError as exception: 412 # iscsiadm returns 6 for "db record not found" 413 if exception.exit_code == 6: 414 # Create a new record for this target and update the db 415 self._run_iscsiadm_bare( 416 ['-m', 'discoverydb', 417 '-t', 'sendtargets', 418 '-p', connection_properties['target_portal'], 419 '-I', iscsi_transport, 420 '--op', 'new'], 421 check_exit_code=[0, 255]) 422 self._run_iscsiadm_update_discoverydb( 423 connection_properties 424 ) 425 else: 426 LOG.error("Unable to find target portal: " 427 "%(target_portal)s.", 428 {'target_portal': connection_properties[ 429 'target_portal']}) 430 raise 431 old_node_startups = self._get_node_startup_values( 432 connection_properties) 433 out = self._run_iscsiadm_bare( 434 ['-m', 'discoverydb', 435 '-t', 'sendtargets', 436 '-I', iscsi_transport, 437 '-p', connection_properties['target_portal'], 438 '--discover'], 439 check_exit_code=[0, 255])[0] or "" 440 self._recover_node_startup_values(connection_properties, 441 old_node_startups) 442 else: 443 old_node_startups = self._get_node_startup_values( 444 connection_properties) 445 out = self._run_iscsiadm_bare( 446 ['-m', 'discovery', 447 '-t', 'sendtargets', 448 '-I', iscsi_transport, 449 '-p', connection_properties['target_portal']], 450 check_exit_code=[0, 255])[0] or "" 451 self._recover_node_startup_values(connection_properties, 452 old_node_startups) 453 454 ips, iqns = self._get_target_portals_from_iscsiadm_output(out) 455 luns = self._get_luns(connection_properties, iqns) 456 return list(zip(ips, iqns, luns)) 457 458 def _run_iscsiadm_update_discoverydb(self, connection_properties, 459 iscsi_transport='default'): 460 return self._execute( 461 'iscsiadm', 462 '-m', 'discoverydb', 463 '-t', 'sendtargets', 464 '-I', iscsi_transport, 465 '-p', connection_properties['target_portal'], 466 '--op', 'update', 467 '-n', "discovery.sendtargets.auth.authmethod", 468 '-v', connection_properties['discovery_auth_method'], 469 '-n', "discovery.sendtargets.auth.username", 470 '-v', connection_properties['discovery_auth_username'], 471 '-n', "discovery.sendtargets.auth.password", 472 '-v', connection_properties['discovery_auth_password'], 473 run_as_root=True, 474 root_helper=self._root_helper) 475 476 @utils.trace 477 @synchronized('extend_volume', external=True) 478 def extend_volume(self, connection_properties: dict): 479 """Update the local kernel's size information. 480 481 Try and update the local kernel's size information 482 for an iSCSI volume. 483 """ 484 LOG.info("Extend volume for %s", 485 strutils.mask_dict_password(connection_properties)) 486 487 volume_paths = self.get_volume_paths(connection_properties) 488 LOG.info("Found paths for volume %s", volume_paths) 489 if volume_paths: 490 return self._linuxscsi.extend_volume( 491 volume_paths, use_multipath=self.use_multipath) 492 else: 493 LOG.warning("Couldn't find any volume paths on the host to " 494 "extend volume for %(props)s", 495 {'props': strutils.mask_dict_password( 496 connection_properties)}) 497 raise exception.VolumePathsNotFound() 498 499 @utils.trace 500 @synchronized('connect_volume', external=True) 501 def connect_volume(self, connection_properties: dict): 502 """Attach the volume to instance_name. 503 504 :param connection_properties: The valid dictionary that describes all 505 of the target volume attributes. 506 :type connection_properties: dict 507 :returns: dict 508 509 connection_properties for iSCSI must include: 510 target_portal(s) - ip and optional port 511 target_iqn(s) - iSCSI Qualified Name 512 target_lun(s) - LUN id of the volume 513 Note that plural keys may be used when use_multipath=True 514 """ 515 try: 516 if self.use_multipath: 517 return self._connect_multipath_volume(connection_properties) 518 return self._connect_single_volume(connection_properties) 519 except Exception: 520 # NOTE(geguileo): By doing the cleanup here we ensure we only do 521 # the logins once for multipath if they succeed, but retry if they 522 # don't, which helps on bad network cases. 523 with excutils.save_and_reraise_exception(): 524 self._cleanup_connection(connection_properties, force=True) 525 526 @utils.retry((exception.VolumeDeviceNotFound)) 527 def _get_device_link(self, wwn, device, mpath): 528 # These are the default symlinks that should always be there 529 if mpath: 530 symlink = '/dev/disk/by-id/dm-uuid-mpath-' + mpath 531 else: 532 symlink = '/dev/disk/by-id/scsi-' + wwn 533 534 # If default symlinks are not there just search for anything that links 535 # to our device. In my experience this will return the last added link 536 # first, so if we are going to succeed this should be fast. 537 if not os.path.realpath(symlink) == device: 538 links_path = '/dev/disk/by-id/' 539 for symlink in os.listdir(links_path): 540 symlink = links_path + symlink 541 if os.path.realpath(symlink) == device: 542 break 543 else: 544 # Raising this will trigger the next retry 545 raise exception.VolumeDeviceNotFound(device='/dev/disk/by-id') 546 return symlink 547 548 def _get_connect_result(self, con_props, wwn, devices_names, mpath=None): 549 device = '/dev/' + (mpath or devices_names[0]) 550 551 # NOTE(geguileo): This is only necessary because of the current 552 # encryption flow that requires that connect_volume returns a symlink 553 # because first we do the volume attach, then the libvirt config is 554 # generated using the path returned by the atach, and then we do the 555 # encryption attach, which is forced to preserve the path that was used 556 # in the libvirt config. If we fix that flow in OS-brick, Nova, and 557 # Cinder we can remove this and just return the real path. 558 if con_props.get('encrypted'): 559 device = self._get_device_link(wwn, device, mpath) 560 561 result = {'type': 'block', 'scsi_wwn': wwn, 'path': device} 562 if mpath: 563 result['multipath_id'] = wwn 564 return result 565 566 @utils.retry((exception.VolumeDeviceNotFound)) 567 def _connect_single_volume(self, connection_properties): 568 """Connect to a volume using a single path.""" 569 data = {'stop_connecting': False, 'num_logins': 0, 'failed_logins': 0, 570 'stopped_threads': 0, 'found_devices': [], 571 'just_added_devices': []} 572 573 for props in self._iterate_all_targets(connection_properties): 574 self._connect_vol(self.device_scan_attempts, props, data) 575 found_devs = data['found_devices'] 576 if found_devs: 577 for __ in range(10): 578 wwn = self._linuxscsi.get_sysfs_wwn(found_devs) 579 if wwn: 580 break 581 time.sleep(1) 582 else: 583 LOG.debug('Could not find the WWN for %s.', 584 found_devs[0]) # type: ignore 585 return self._get_connect_result(connection_properties, 586 wwn, found_devs) 587 588 # If we failed we must cleanup the connection, as we could be 589 # leaving the node entry if it's not being used by another device. 590 ips_iqns_luns = ((props['target_portal'], props['target_iqn'], 591 props['target_lun']), ) 592 self._cleanup_connection(props, ips_iqns_luns, force=True, 593 ignore_errors=True) 594 # Reset connection result values for next try 595 data.update(num_logins=0, failed_logins=0, found_devices=[]) 596 597 raise exception.VolumeDeviceNotFound(device='') 598 599 def _connect_vol(self, rescans, props, data): 600 """Make a connection to a volume, send scans and wait for the device. 601 602 This method is specifically designed to support multithreading and 603 share the results via a shared dictionary with fixed keys, which is 604 thread safe. 605 606 Since the heaviest operations are run via subprocesses we don't worry 607 too much about the GIL or how the eventlets will handle the context 608 switching. 609 610 The method will only try to log in once, since iscsid's initiator 611 already tries 8 times by default to do the login, or whatever value we 612 have as node.session.initial_login_retry_max in our system. 613 614 Shared dictionary has the following keys: 615 - stop_connecting: When the caller wants us to stop the rescans 616 - num_logins: Count of how many threads have successfully logged in 617 - failed_logins: Count of how many threads have failed to log in 618 - stopped_threads: How many threads have finished. This may be 619 different than num_logins + failed_logins, since 620 some threads may still be waiting for a device. 621 - found_devices: List of devices the connections have found 622 - just_added_devices: Devices that have been found and still have not 623 been processed by the main thread that manages 624 all the connecting threads. 625 626 :param rescans: Number of rescans to perform before giving up. 627 :param props: Properties of the connection. 628 :param data: Shared data. 629 """ 630 device = hctl = None 631 portal = props['target_portal'] 632 try: 633 session, manual_scan = self._connect_to_iscsi_portal(props) 634 except Exception: 635 LOG.exception('Exception connecting to %s', portal) 636 session = None 637 638 if session: 639 do_scans = rescans > 0 or manual_scan 640 # Scan is sent on connect by iscsid, but we must do it manually on 641 # manual scan mode. This scan cannot count towards total rescans. 642 if manual_scan: 643 num_rescans = -1 644 seconds_next_scan = 0 645 else: 646 num_rescans = 0 647 seconds_next_scan = 4 648 649 data['num_logins'] += 1 650 LOG.debug('Connected to %s', portal) 651 while do_scans: 652 try: 653 if not hctl: 654 hctl = self._linuxscsi.get_hctl(session, 655 props['target_lun']) 656 if hctl: 657 if seconds_next_scan <= 0: 658 num_rescans += 1 659 self._linuxscsi.scan_iscsi(*hctl) 660 # 4 seconds on 1st rescan, 9s on 2nd, 16s on 3rd 661 seconds_next_scan = (num_rescans + 2) ** 2 662 663 device = self._linuxscsi.device_name_by_hctl(session, 664 hctl) 665 if device: 666 break 667 668 except Exception: 669 LOG.exception('Exception scanning %s', portal) 670 pass 671 do_scans = (num_rescans <= rescans and 672 not (device or data['stop_connecting'])) 673 if do_scans: 674 time.sleep(1) 675 seconds_next_scan -= 1 676 677 if device: 678 LOG.debug('Connected to %s using %s', device, 679 strutils.mask_password(props)) 680 else: 681 LOG.warning('LUN %(lun)s on iSCSI portal %(portal)s not found ' 682 'on sysfs after logging in.', 683 {'lun': props['target_lun'], 'portal': portal}) 684 else: 685 LOG.warning('Failed to connect to iSCSI portal %s.', portal) 686 data['failed_logins'] += 1 687 688 if device: 689 data['found_devices'].append(device) 690 data['just_added_devices'].append(device) 691 data['stopped_threads'] += 1 692 693 @utils.retry((exception.VolumeDeviceNotFound)) 694 def _connect_multipath_volume(self, connection_properties): 695 """Connect to a multipathed volume launching parallel login requests. 696 697 We will be doing parallel login requests, which will considerably speed 698 up the process when we have flaky connections. 699 700 We'll always try to return a multipath device even if there's only one 701 path discovered, that way we can return once we have logged in in all 702 the portals, because the paths will come up later. 703 704 To make this possible we tell multipathd that the wwid is a multipath 705 as soon as we have one device, and then hint multipathd to reconsider 706 that volume for a multipath asking to add the path, because even if 707 it's already known by multipathd it would have been discarded if it 708 was the first time this volume was seen here. 709 """ 710 wwn = mpath = None 711 wwn_added = False 712 last_try_on = 0.0 713 found: list = [] 714 just_added_devices: list = [] 715 # Dict used to communicate with threads as detailed in _connect_vol 716 data = {'stop_connecting': False, 'num_logins': 0, 'failed_logins': 0, 717 'stopped_threads': 0, 'found_devices': found, 718 'just_added_devices': just_added_devices} 719 720 ips_iqns_luns = self._get_ips_iqns_luns(connection_properties) 721 # Launch individual threads for each session with the own properties 722 retries = self.device_scan_attempts 723 threads = [] 724 for ip, iqn, lun in ips_iqns_luns: 725 props = connection_properties.copy() 726 props.update(target_portal=ip, target_iqn=iqn, target_lun=lun) 727 728 # NOTE(yenai): The method _connect_vol is used for parallelize 729 # logins, we shouldn't give these arguments; and it will make a 730 # mess in the debug message in _connect_vol. So, kick them out: 731 for key in ('target_portals', 'target_iqns', 'target_luns'): 732 props.pop(key, None) 733 734 threads.append(executor.Thread(target=self._connect_vol, 735 args=(retries, props, data))) 736 for thread in threads: 737 thread.start() 738 739 # Continue until: 740 # - All connection attempts have finished and none has logged in 741 # - Multipath has been found and connection attempts have either 742 # finished or have already logged in 743 # - We have finished in all threads, logged in, found some device, and 744 # 10 seconds have passed, which should be enough with up to 10% 745 # network package drops. 746 while not ((len(ips_iqns_luns) == data['stopped_threads'] and 747 not found) or 748 (mpath and len(ips_iqns_luns) == data['num_logins'] + 749 data['failed_logins'])): 750 # We have devices but we don't know the wwn yet 751 if not wwn and found: 752 wwn = self._linuxscsi.get_sysfs_wwn(found, mpath) 753 if not mpath and found: 754 mpath = self._linuxscsi.find_sysfs_multipath_dm(found) 755 # We have the wwn but not a multipath 756 if wwn and not(mpath or wwn_added): 757 # Tell multipathd that this wwn is a multipath and hint 758 # multipathd to recheck all the devices we have just 759 # connected. We only do this once, since for any new 760 # device multipathd will already know it is a multipath. 761 # This is only useful if we have multipathd configured with 762 # find_multipaths set to yes, and has no effect if it's set 763 # to no. 764 wwn_added = self._linuxscsi.multipath_add_wwid(wwn) 765 while not mpath and just_added_devices: 766 device_path = '/dev/' + just_added_devices.pop(0) 767 self._linuxscsi.multipath_add_path(device_path) 768 mpath = self._linuxscsi.find_sysfs_multipath_dm(found) 769 # Give some extra time after all threads have finished. 770 if (not last_try_on and found and 771 len(ips_iqns_luns) == data['stopped_threads']): 772 LOG.debug('All connection threads finished, giving 10 seconds ' 773 'for dm to appear.') 774 last_try_on = time.time() + 10 775 elif last_try_on and last_try_on < time.time(): 776 break 777 time.sleep(1) 778 data['stop_connecting'] = True 779 for thread in threads: 780 thread.join() 781 782 # If we haven't found any devices let the caller do the cleanup 783 if not found: 784 raise exception.VolumeDeviceNotFound(device='') 785 786 # NOTE(geguileo): If we cannot find the dm it's because all paths are 787 # really bad, so we might as well raise a not found exception, but 788 # in our best effort we'll return a device even if it's probably 789 # useless. 790 if not mpath: 791 LOG.warning('No dm was created, connection to volume is probably ' 792 'bad and will perform poorly.') 793 elif not wwn: 794 wwn = self._linuxscsi.get_sysfs_wwn(found, mpath) 795 return self._get_connect_result(connection_properties, wwn, found, 796 mpath) 797 798 def _get_connection_devices(self, connection_properties, 799 ips_iqns_luns=None, is_disconnect_call=False): 800 """Get map of devices by sessions from our connection. 801 802 For each of the TCP sessions that correspond to our connection 803 properties we generate a map of (ip, iqn) to (belong, other) where 804 belong is a set of devices in that session that populated our system 805 when we did a connection using connection properties, and other are 806 any other devices that share that same session but are the result of 807 connecting with different connection properties. 808 809 We also include all nodes from our connection that don't have a 810 session. 811 812 If ips_iqns_luns parameter is provided connection_properties won't be 813 used to get them. 814 815 When doing multipath we may not have all the information on the 816 connection properties (sendtargets was used on connect) so we may have 817 to retrieve the info from the discoverydb. Call _get_ips_iqns_luns to 818 do the right things. 819 820 This method currently assumes that it's only called by the 821 _cleanup_conection method. 822 """ 823 if not ips_iqns_luns: 824 # This is a cleanup, don't do discovery 825 ips_iqns_luns = self._get_ips_iqns_luns( 826 connection_properties, discover=False, 827 is_disconnect_call=is_disconnect_call) 828 LOG.debug('Getting connected devices for (ips,iqns,luns)=%s', 829 ips_iqns_luns) 830 nodes = self._get_iscsi_nodes() 831 sessions = self._get_iscsi_sessions_full() 832 # Use (portal, iqn) to map the session value 833 sessions_map = {(s[2], s[4]): s[1] for s in sessions 834 if s[0] in self.VALID_SESSIONS_PREFIX} 835 # device_map will keep a tuple with devices from the connection and 836 # others that don't belong to this connection" (belong, others) 837 device_map: defaultdict = defaultdict(lambda: (set(), set())) 838 839 for ip, iqn, lun in ips_iqns_luns: 840 session = sessions_map.get((ip, iqn)) 841 # Our nodes that don't have a session will be returned as empty 842 if not session: 843 if (ip, iqn) in nodes: 844 device_map[(ip, iqn)] = (set(), set()) 845 continue 846 847 # Get all devices for the session 848 paths = glob.glob('/sys/class/scsi_host/host*/device/session' + 849 session + '/target*/*:*:*:*/block/*') 850 belong, others = device_map[(ip, iqn)] 851 for path in paths: 852 __, hctl, __, device = path.rsplit('/', 3) 853 lun_path = int(hctl.rsplit(':', 1)[-1]) 854 # For partitions turn them into the whole device: sde1 -> sde 855 device = device.strip('0123456789') 856 if lun_path == lun: 857 belong.add(device) 858 else: 859 others.add(device) 860 861 LOG.debug('Resulting device map %s', device_map) 862 return device_map 863 864 @utils.trace 865 @synchronized('connect_volume', external=True) 866 def disconnect_volume(self, connection_properties, device_info, 867 force=False, ignore_errors=False): 868 """Detach the volume from instance_name. 869 870 :param connection_properties: The dictionary that describes all 871 of the target volume attributes. 872 :type connection_properties: dict that must include: 873 target_portal(s) - IP and optional port 874 target_iqn(s) - iSCSI Qualified Name 875 target_lun(s) - LUN id of the volume 876 :param device_info: historical difference, but same as connection_props 877 :type device_info: dict 878 :param force: Whether to forcefully disconnect even if flush fails. 879 :type force: bool 880 :param ignore_errors: When force is True, this will decide whether to 881 ignore errors or raise an exception once finished 882 the operation. Default is False. 883 :type ignore_errors: bool 884 """ 885 return self._cleanup_connection(connection_properties, force=force, 886 ignore_errors=ignore_errors, 887 device_info=device_info, 888 is_disconnect_call=True) 889 890 def _cleanup_connection(self, connection_properties, ips_iqns_luns=None, 891 force=False, ignore_errors=False, 892 device_info=None, is_disconnect_call=False): 893 """Cleans up connection flushing and removing devices and multipath. 894 895 :param connection_properties: The dictionary that describes all 896 of the target volume attributes. 897 :type connection_properties: dict that must include: 898 target_portal(s) - IP and optional port 899 target_iqn(s) - iSCSI Qualified Name 900 target_lun(s) - LUN id of the volume 901 :param ips_iqns_luns: Use this list of tuples instead of information 902 from the connection_properties. 903 :param force: Whether to forcefully disconnect even if flush fails. 904 :type force: bool 905 :param ignore_errors: When force is True, this will decide whether to 906 ignore errors or raise an exception once finished 907 the operation. Default is False. 908 :param device_info: Attached device information. 909 :param is_disconnect_call: Whether this is a call coming from a user 910 disconnect_volume call or a call from some 911 other operation's cleanup. 912 :type is_disconnect_call: bool 913 :type ignore_errors: bool 914 """ 915 exc = exception.ExceptionChainer() 916 try: 917 devices_map = self._get_connection_devices(connection_properties, 918 ips_iqns_luns, 919 is_disconnect_call) 920 except exception.TargetPortalNotFound as exc: 921 # When discovery sendtargets failed on connect there is no 922 # information in the discoverydb, so there's nothing to clean. 923 LOG.debug('Skipping cleanup %s', exc) 924 return 925 926 # Remove devices and multipath from this connection 927 remove_devices = set() 928 for remove, __ in devices_map.values(): 929 remove_devices.update(remove) 930 931 path_used = self._linuxscsi.get_dev_path(connection_properties, 932 device_info) 933 was_multipath = (path_used.startswith('/dev/dm-') or 934 'mpath' in path_used) 935 multipath_name = self._linuxscsi.remove_connection( 936 remove_devices, force, 937 exc, path_used, was_multipath) # type: ignore 938 939 # Disconnect sessions and remove nodes that are left without devices 940 disconnect = [conn for conn, (__, keep) in devices_map.items() 941 if not keep] 942 943 # The "type:" comment works around mypy issue #6647 944 self._disconnect_connection(connection_properties, disconnect, force, 945 exc) # type:ignore 946 947 # If flushing the multipath failed before, try now after we have 948 # removed the devices and we may have even logged off (only reaches 949 # here with multipath_name if force=True). 950 if multipath_name: 951 LOG.debug('Flushing again multipath %s now that we removed the ' 952 'devices.', multipath_name) 953 self._linuxscsi.flush_multipath_device(multipath_name) 954 955 if exc: # type: ignore 956 LOG.warning('There were errors removing %s, leftovers may remain ' 957 'in the system', remove_devices) 958 if not ignore_errors: 959 raise exc # type: ignore 960 961 def _munge_portal(self, target): 962 """Remove brackets from portal. 963 964 In case IPv6 address was used the udev path should not contain any 965 brackets. Udev code specifically forbids that. 966 """ 967 portal, iqn, lun = target 968 return (portal.replace('[', '').replace(']', ''), iqn, 969 self._linuxscsi.process_lun_id(lun)) 970 971 def _get_device_path(self, connection_properties): 972 if self._get_transport() == "default": 973 return ["/dev/disk/by-path/ip-%s-iscsi-%s-lun-%s" % 974 self._munge_portal(x) for x in 975 self._get_all_targets(connection_properties)] 976 else: 977 # we are looking for paths in the format : 978 # /dev/disk/by-path/ 979 # pci-XXXX:XX:XX.X-ip-PORTAL:PORT-iscsi-IQN-lun-LUN_ID 980 device_list = [] 981 for x in self._get_all_targets(connection_properties): 982 look_for_device = glob.glob( 983 '/dev/disk/by-path/*ip-%s-iscsi-%s-lun-%s' % 984 self._munge_portal(x)) 985 if look_for_device: 986 device_list.extend(look_for_device) 987 return device_list 988 989 def get_initiator(self): 990 """Secure helper to read file as root.""" 991 file_path = '/usr/local/etc/iscsi/initiatorname.iscsi' 992 try: 993 lines, _err = self._execute('cat', file_path, run_as_root=True, 994 root_helper=self._root_helper) 995 996 for line in lines.split('\n'): 997 if line.startswith('InitiatorName='): 998 return line[line.index('=') + 1:].strip() 999 except putils.ProcessExecutionError: 1000 LOG.warning("Could not find the iSCSI Initiator File %s", 1001 file_path) 1002 return None 1003 1004 def _run_iscsiadm(self, connection_properties, iscsi_command, **kwargs): 1005 check_exit_code = kwargs.pop('check_exit_code', 0) 1006 attempts = kwargs.pop('attempts', 1) 1007 delay_on_retry = kwargs.pop('delay_on_retry', True) 1008 (out, err) = self._execute('iscsiadm', '-m', 'node', '-T', 1009 connection_properties['target_iqn'], 1010 '-p', 1011 connection_properties['target_portal'], 1012 *iscsi_command, run_as_root=True, 1013 root_helper=self._root_helper, 1014 check_exit_code=check_exit_code, 1015 attempts=attempts, 1016 delay_on_retry=delay_on_retry) 1017 msg = ("iscsiadm %(iscsi_command)s: stdout=%(out)s stderr=%(err)s" % 1018 {'iscsi_command': iscsi_command, 'out': out, 'err': err}) 1019 # don't let passwords be shown in log output 1020 LOG.debug(strutils.mask_password(msg)) 1021 1022 return (out, err) 1023 1024 def _iscsiadm_update(self, connection_properties, property_key, 1025 property_value, **kwargs): 1026 iscsi_command = ('--op', 'update', '-n', property_key, 1027 '-v', property_value) 1028 return self._run_iscsiadm(connection_properties, iscsi_command, 1029 **kwargs) 1030 1031 def _get_target_portals_from_iscsiadm_output(self, output): 1032 # return both portals and iqns as 2 lists 1033 # 1034 # as we are parsing a command line utility, allow for the 1035 # possibility that additional debug data is spewed in the 1036 # stream, and only grab actual ip / iqn lines. 1037 ips = [] 1038 iqns = [] 1039 for data in [line.split() for line in output.splitlines()]: 1040 if len(data) == 2 and data[1].startswith('iqn.'): 1041 ips.append(data[0].split(',')[0]) 1042 iqns.append(data[1]) 1043 return ips, iqns 1044 1045 def _connect_to_iscsi_portal(self, connection_properties): 1046 """Safely connect to iSCSI portal-target and return the session id.""" 1047 portal = connection_properties['target_portal'].split(",")[0] 1048 target_iqn = connection_properties['target_iqn'] 1049 1050 lock_name = f'connect_to_iscsi_portal-{portal}-{target_iqn}' 1051 method = synchronized( 1052 lock_name, external=True)(self._connect_to_iscsi_portal_unsafe) 1053 return method(connection_properties) 1054 1055 @utils.retry((exception.BrickException)) 1056 def _connect_to_iscsi_portal_unsafe(self, connection_properties): 1057 """Connect to an iSCSI portal-target an return the session id.""" 1058 portal = connection_properties['target_portal'].split(",")[0] 1059 target_iqn = connection_properties['target_iqn'] 1060 1061 # NOTE(vish): If we are on the same host as nova volume, the 1062 # discovery makes the target so we don't need to 1063 # run --op new. Therefore, we check to see if the 1064 # target exists, and if we get 255 (Not Found), then 1065 # we run --op new. This will also happen if another 1066 # volume is using the same target. 1067 # iscsiadm returns 21 for "No records found" after version 2.0-871 1068 LOG.info("Trying to connect to iSCSI portal %s", portal) 1069 out, err = self._run_iscsiadm(connection_properties, (), 1070 check_exit_code=(0, 21, 255)) 1071 if err: 1072 out_new, err_new = self._run_iscsiadm(connection_properties, 1073 ('--interface', 1074 self._get_transport(), 1075 '--op', 'new'), 1076 check_exit_code=(0, 6)) 1077 if err_new: 1078 # retry if iscsiadm returns 6 for "database failure" 1079 LOG.debug("Retrying to connect to iSCSI portal %s", portal) 1080 msg = (_("Encountered database failure for %s.") % (portal)) 1081 raise exception.BrickException(msg=msg) 1082 1083 # Try to set the scan mode to manual 1084 res = self._iscsiadm_update(connection_properties, 1085 'node.session.scan', 'manual', 1086 check_exit_code=False) 1087 manual_scan = not res[1] 1088 # Update global indicator of manual scan support used for 1089 # shared_targets locking so we support upgrading open iscsi to a 1090 # version supporting the manual scan feature without restarting Nova 1091 # or Cinder. 1092 initiator_utils.ISCSI_SUPPORTS_MANUAL_SCAN = manual_scan 1093 1094 if connection_properties.get('auth_method'): 1095 self._iscsiadm_update(connection_properties, 1096 "node.session.auth.authmethod", 1097 connection_properties['auth_method']) 1098 self._iscsiadm_update(connection_properties, 1099 "node.session.auth.username", 1100 connection_properties['auth_username']) 1101 self._iscsiadm_update(connection_properties, 1102 "node.session.auth.password", 1103 connection_properties['auth_password']) 1104 1105 # We exit once we are logged in or once we fail login 1106 while True: 1107 # Duplicate logins crash iscsiadm after load, so we scan active 1108 # sessions to see if the node is logged in. 1109 sessions = self._get_iscsi_sessions_full() 1110 for s in sessions: 1111 # Found our session, return session_id 1112 if (s[0] in self.VALID_SESSIONS_PREFIX and 1113 portal.lower() == s[2].lower() and s[4] == target_iqn): 1114 return s[1], manual_scan 1115 1116 try: 1117 # exit_code=15 means the session already exists, so it should 1118 # be regarded as successful login. 1119 self._run_iscsiadm(connection_properties, ("--login",), 1120 check_exit_code=(0, 15, 255)) 1121 except putils.ProcessExecutionError as err: 1122 LOG.warning('Failed to login iSCSI target %(iqn)s on portal ' 1123 '%(portal)s (exit code %(err)s).', 1124 {'iqn': target_iqn, 'portal': portal, 1125 'err': err.exit_code}) 1126 return None, None 1127 self._iscsiadm_update(connection_properties, 1128 "node.startup", 1129 "automatic") 1130 1131 def _disconnect_from_iscsi_portal(self, connection_properties): 1132 self._iscsiadm_update(connection_properties, "node.startup", "manual", 1133 check_exit_code=[0, 21, 255]) 1134 self._run_iscsiadm(connection_properties, ("--logout",), 1135 check_exit_code=[0, 21, 255]) 1136 self._run_iscsiadm(connection_properties, ('--op', 'delete'), 1137 check_exit_code=[0, 21, 255], 1138 attempts=5, 1139 delay_on_retry=True) 1140 1141 def _disconnect_connection(self, connection_properties, connections, force, 1142 exc): 1143 LOG.debug('Disconnecting from: %s', connections) 1144 props = connection_properties.copy() 1145 for ip, iqn in connections: 1146 props['target_portal'] = ip 1147 props['target_iqn'] = iqn 1148 with exc.context(force, 'Disconnect from %s %s failed', ip, iqn): 1149 self._disconnect_from_iscsi_portal(props) 1150 1151 def _run_iscsi_session(self): 1152 (out, err) = self._run_iscsiadm_bare(('-m', 'session'), 1153 check_exit_code=[0, 21, 255]) 1154 LOG.debug("iscsi session list stdout=%(out)s stderr=%(err)s", 1155 {'out': out, 'err': err}) 1156 return (out, err) 1157 1158 def _run_iscsiadm_bare(self, iscsi_command, **kwargs) -> Tuple[str, str]: 1159 check_exit_code = kwargs.pop('check_exit_code', 0) 1160 (out, err) = self._execute('iscsiadm', 1161 *iscsi_command, 1162 run_as_root=True, 1163 root_helper=self._root_helper, 1164 check_exit_code=check_exit_code) 1165 LOG.debug("iscsiadm %(iscsi_command)s: stdout=%(out)s stderr=%(err)s", 1166 {'iscsi_command': iscsi_command, 'out': out, 'err': err}) 1167 return (out, err) 1168 1169 def _run_multipath(self, multipath_command, **kwargs): 1170 check_exit_code = kwargs.pop('check_exit_code', 0) 1171 (out, err) = self._execute('multipath', 1172 *multipath_command, 1173 run_as_root=True, 1174 root_helper=self._root_helper, 1175 check_exit_code=check_exit_code) 1176 LOG.debug("multipath %(multipath_command)s: " 1177 "stdout=%(out)s stderr=%(err)s", 1178 {'multipath_command': multipath_command, 1179 'out': out, 'err': err}) 1180 return (out, err) 1181 1182 def _get_node_startup_values(self, connection_properties): 1183 # Exit code 21 (ISCSI_ERR_NO_OBJS_FOUND) occurs when no nodes 1184 # exist - must consider this an empty (successful) result. 1185 out, __ = self._run_iscsiadm_bare( 1186 ['-m', 'node', '--op', 'show', '-p', 1187 connection_properties['target_portal']], 1188 check_exit_code=(0, 21)) or "" 1189 node_values_str = out.strip() 1190 node_values = node_values_str.split("\n") 1191 iqn = None 1192 startup = None 1193 startup_values = {} 1194 1195 for node_value in node_values: 1196 node_keys = node_value.split() 1197 try: 1198 if node_keys[0] == "node.name": 1199 iqn = node_keys[2] 1200 elif node_keys[0] == "node.startup": 1201 startup = node_keys[2] 1202 1203 if iqn and startup: 1204 startup_values[iqn] = startup 1205 iqn = None 1206 startup = None 1207 except IndexError: 1208 pass 1209 1210 return startup_values 1211 1212 def _recover_node_startup_values(self, connection_properties, 1213 old_node_startups): 1214 node_startups = self._get_node_startup_values(connection_properties) 1215 for iqn, node_startup in node_startups.items(): 1216 old_node_startup = old_node_startups.get(iqn, None) 1217 if old_node_startup and node_startup != old_node_startup: 1218 # _iscsiadm_update() only uses "target_portal" and "target_iqn" 1219 # of connection_properties. 1220 # And the recovering target belongs to the same target_portal 1221 # as discovering target. 1222 # So target_iqn is updated, and other values aren't updated. 1223 recover_connection = copy.deepcopy(connection_properties) 1224 recover_connection['target_iqn'] = iqn 1225 self._iscsiadm_update(recover_connection, 1226 "node.startup", 1227 old_node_startup) 1228