1# Copyright (C) 2012 Canonical Ltd.
2# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
3# Copyright (C) 2012 Yahoo! Inc.
4#
5# Author: Scott Moser <scott.moser@canonical.com>
6# Author: Juerg Haefliger <juerg.haefliger@hp.com>
7# Author: Joshua Harlow <harlowja@yahoo-inc.com>
8#
9# This file is part of cloud-init. See LICENSE file for license information.
10
11import abc
12import copy
13import json
14import os
15from collections import namedtuple
16from typing import Dict, List  # noqa: F401
17
18from cloudinit import dmi
19from cloudinit import importer
20from cloudinit import log as logging
21from cloudinit import net
22from cloudinit import type_utils
23from cloudinit import user_data as ud
24from cloudinit import util
25from cloudinit.atomic_helper import write_json
26from cloudinit.distros import Distro
27from cloudinit.event import EventScope, EventType
28from cloudinit.filters import launch_index
29from cloudinit.persistence import CloudInitPickleMixin
30from cloudinit.reporting import events
31
32DSMODE_DISABLED = "disabled"
33DSMODE_LOCAL = "local"
34DSMODE_NETWORK = "net"
35DSMODE_PASS = "pass"
36
37VALID_DSMODES = [DSMODE_DISABLED, DSMODE_LOCAL, DSMODE_NETWORK]
38
39DEP_FILESYSTEM = "FILESYSTEM"
40DEP_NETWORK = "NETWORK"
41DS_PREFIX = 'DataSource'
42
43EXPERIMENTAL_TEXT = (
44    "EXPERIMENTAL: The structure and format of content scoped under the 'ds'"
45    " key may change in subsequent releases of cloud-init.")
46
47
48# File in which public available instance meta-data is written
49# security-sensitive key values are redacted from this world-readable file
50INSTANCE_JSON_FILE = 'instance-data.json'
51# security-sensitive key values are present in this root-readable file
52INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json'
53REDACT_SENSITIVE_VALUE = 'redacted for non-root user'
54
55# Key which can be provide a cloud's official product name to cloud-init
56METADATA_CLOUD_NAME_KEY = 'cloud-name'
57
58UNSET = "_unset"
59METADATA_UNKNOWN = 'unknown'
60
61LOG = logging.getLogger(__name__)
62
63# CLOUD_ID_REGION_PREFIX_MAP format is:
64#  <region-match-prefix>: (<new-cloud-id>: <test_allowed_cloud_callable>)
65CLOUD_ID_REGION_PREFIX_MAP = {
66    'cn-': ('aws-china', lambda c: c == 'aws'),    # only change aws regions
67    'us-gov-': ('aws-gov', lambda c: c == 'aws'),  # only change aws regions
68    'china': ('azure-china', lambda c: c == 'azure'),  # only change azure
69}
70
71# NetworkConfigSource represents the canonical list of network config sources
72# that cloud-init knows about.  (Python 2.7 lacks PEP 435, so use a singleton
73# namedtuple as an enum; see https://stackoverflow.com/a/6971002)
74_NETCFG_SOURCE_NAMES = ('cmdline', 'ds', 'system_cfg', 'fallback', 'initramfs')
75NetworkConfigSource = namedtuple('NetworkConfigSource',
76                                 _NETCFG_SOURCE_NAMES)(*_NETCFG_SOURCE_NAMES)
77
78
79class DatasourceUnpickleUserDataError(Exception):
80    """Raised when userdata is unable to be unpickled due to python upgrades"""
81
82
83class DataSourceNotFoundException(Exception):
84    pass
85
86
87class InvalidMetaDataException(Exception):
88    """Raised when metadata is broken, unavailable or disabled."""
89
90
91def process_instance_metadata(metadata, key_path='', sensitive_keys=()):
92    """Process all instance metadata cleaning it up for persisting as json.
93
94    Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list
95
96    @return Dict copy of processed metadata.
97    """
98    md_copy = copy.deepcopy(metadata)
99    base64_encoded_keys = []
100    sens_keys = []
101    for key, val in metadata.items():
102        if key_path:
103            sub_key_path = key_path + '/' + key
104        else:
105            sub_key_path = key
106        if key in sensitive_keys or sub_key_path in sensitive_keys:
107            sens_keys.append(sub_key_path)
108        if isinstance(val, str) and val.startswith('ci-b64:'):
109            base64_encoded_keys.append(sub_key_path)
110            md_copy[key] = val.replace('ci-b64:', '')
111        if isinstance(val, dict):
112            return_val = process_instance_metadata(
113                val, sub_key_path, sensitive_keys)
114            base64_encoded_keys.extend(return_val.pop('base64_encoded_keys'))
115            sens_keys.extend(return_val.pop('sensitive_keys'))
116            md_copy[key] = return_val
117    md_copy['base64_encoded_keys'] = sorted(base64_encoded_keys)
118    md_copy['sensitive_keys'] = sorted(sens_keys)
119    return md_copy
120
121
122def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE):
123    """Redact any sensitive keys from to provided metadata dictionary.
124
125    Replace any keys values listed in 'sensitive_keys' with redact_value.
126    """
127    if not metadata.get('sensitive_keys', []):
128        return metadata
129    md_copy = copy.deepcopy(metadata)
130    for key_path in metadata.get('sensitive_keys'):
131        path_parts = key_path.split('/')
132        obj = md_copy
133        for path in path_parts:
134            if isinstance(obj[path], dict) and path != path_parts[-1]:
135                obj = obj[path]
136        obj[path] = redact_value
137    return md_copy
138
139
140URLParams = namedtuple(
141    'URLParms', ['max_wait_seconds', 'timeout_seconds',
142                 'num_retries', 'sec_between_retries'])
143
144
145class DataSource(CloudInitPickleMixin, metaclass=abc.ABCMeta):
146
147    dsmode = DSMODE_NETWORK
148    default_locale = 'en_US.UTF-8'
149
150    # Datasource name needs to be set by subclasses to determine which
151    # cloud-config datasource key is loaded
152    dsname = '_undef'
153
154    # Cached cloud_name as determined by _get_cloud_name
155    _cloud_name = None
156
157    # Cached cloud platform api type: e.g. ec2, openstack, kvm, lxd, azure etc.
158    _platform_type = None
159
160    # More details about the cloud platform:
161    #  - metadata (http://169.254.169.254/)
162    #  - seed-dir (<dirname>)
163    _subplatform = None
164
165    # Track the discovered fallback nic for use in configuration generation.
166    _fallback_interface = None
167
168    # The network configuration sources that should be considered for this data
169    # source.  (The first source in this list that provides network
170    # configuration will be used without considering any that follow.)  This
171    # should always be a subset of the members of NetworkConfigSource with no
172    # duplicate entries.
173    network_config_sources = (NetworkConfigSource.cmdline,
174                              NetworkConfigSource.initramfs,
175                              NetworkConfigSource.system_cfg,
176                              NetworkConfigSource.ds)
177
178    # read_url_params
179    url_max_wait = -1            # max_wait < 0 means do not wait
180    url_timeout = 10             # timeout for each metadata url read attempt
181    url_retries = 5              # number of times to retry url upon 404
182    url_sec_between_retries = 1  # amount of seconds to wait between retries
183
184    # The datasource defines a set of supported EventTypes during which
185    # the datasource can react to changes in metadata and regenerate
186    # network configuration on metadata changes. These are defined in
187    # `supported_network_events`.
188    # The datasource also defines a set of default EventTypes that the
189    # datasource can react to. These are the event types that will be used
190    # if not overridden by the user.
191    # A datasource requiring to write network config on each system boot
192    # would call default_update_events['network'].add(EventType.BOOT).
193
194    # Default: generate network config on new instance id (first boot).
195    supported_update_events = {EventScope.NETWORK: {
196        EventType.BOOT_NEW_INSTANCE,
197        EventType.BOOT,
198        EventType.BOOT_LEGACY,
199        EventType.HOTPLUG,
200    }}
201    default_update_events = {EventScope.NETWORK: {
202        EventType.BOOT_NEW_INSTANCE,
203    }}
204
205    # N-tuple listing default values for any metadata-related class
206    # attributes cached on an instance by a process_data runs. These attribute
207    # values are reset via clear_cached_attrs during any update_metadata call.
208    cached_attr_defaults = (
209        ('ec2_metadata', UNSET), ('network_json', UNSET),
210        ('metadata', {}), ('userdata', None), ('userdata_raw', None),
211        ('vendordata', None), ('vendordata_raw', None),
212        ('vendordata2', None), ('vendordata2_raw', None))
213
214    _dirty_cache = False
215
216    # N-tuple of keypaths or keynames redact from instance-data.json for
217    # non-root users
218    sensitive_metadata_keys = ('merged_cfg', 'security-credentials',)
219
220    _ci_pkl_version = 1
221
222    def __init__(self, sys_cfg, distro: Distro, paths, ud_proc=None):
223        self.sys_cfg = sys_cfg
224        self.distro = distro
225        self.paths = paths
226        self.userdata = None
227        self.metadata = {}
228        self.userdata_raw = None
229        self.vendordata = None
230        self.vendordata2 = None
231        self.vendordata_raw = None
232        self.vendordata2_raw = None
233
234        self.ds_cfg = util.get_cfg_by_path(
235            self.sys_cfg, ("datasource", self.dsname), {})
236        if not self.ds_cfg:
237            self.ds_cfg = {}
238
239        if not ud_proc:
240            self.ud_proc = ud.UserDataProcessor(self.paths)
241        else:
242            self.ud_proc = ud_proc
243
244    def _unpickle(self, ci_pkl_version: int) -> None:
245        """Perform deserialization fixes for Paths."""
246        if not hasattr(self, 'vendordata2'):
247            self.vendordata2 = None
248        if not hasattr(self, 'vendordata2_raw'):
249            self.vendordata2_raw = None
250        if hasattr(self, 'userdata') and self.userdata is not None:
251            # If userdata stores MIME data, on < python3.6 it will be
252            # missing the 'policy' attribute that exists on >=python3.6.
253            # Calling str() on the userdata will attempt to access this
254            # policy attribute. This will raise an exception, causing
255            # the pickle load to fail, so cloud-init will discard the cache
256            try:
257                str(self.userdata)
258            except AttributeError as e:
259                LOG.debug(
260                    "Unable to unpickle datasource: %s."
261                    " Ignoring current cache.", e
262                )
263                raise DatasourceUnpickleUserDataError() from e
264
265    def __str__(self):
266        return type_utils.obj_name(self)
267
268    def _get_standardized_metadata(self, instance_data):
269        """Return a dictionary of standardized metadata keys."""
270        local_hostname = self.get_hostname()
271        instance_id = self.get_instance_id()
272        availability_zone = self.availability_zone
273        # In the event of upgrade from existing cloudinit, pickled datasource
274        # will not contain these new class attributes. So we need to recrawl
275        # metadata to discover that content
276        sysinfo = instance_data["sys_info"]
277        return {
278            'v1': {
279                '_beta_keys': ['subplatform'],
280                'availability-zone': availability_zone,
281                'availability_zone': availability_zone,
282                'cloud-name': self.cloud_name,
283                'cloud_name': self.cloud_name,
284                'distro': sysinfo["dist"][0],
285                'distro_version': sysinfo["dist"][1],
286                'distro_release': sysinfo["dist"][2],
287                'platform': self.platform_type,
288                'public_ssh_keys': self.get_public_ssh_keys(),
289                'python_version': sysinfo["python"],
290                'instance-id': instance_id,
291                'instance_id': instance_id,
292                'kernel_release': sysinfo["uname"][2],
293                'local-hostname': local_hostname,
294                'local_hostname': local_hostname,
295                'machine': sysinfo["uname"][4],
296                'region': self.region,
297                'subplatform': self.subplatform,
298                'system_platform': sysinfo["platform"],
299                'variant': sysinfo["variant"]}}
300
301    def clear_cached_attrs(self, attr_defaults=()):
302        """Reset any cached metadata attributes to datasource defaults.
303
304        @param attr_defaults: Optional tuple of (attr, value) pairs to
305           set instead of cached_attr_defaults.
306        """
307        if not self._dirty_cache:
308            return
309        if attr_defaults:
310            attr_values = attr_defaults
311        else:
312            attr_values = self.cached_attr_defaults
313
314        for attribute, value in attr_values:
315            if hasattr(self, attribute):
316                setattr(self, attribute, value)
317        if not attr_defaults:
318            self._dirty_cache = False
319
320    def get_data(self):
321        """Datasources implement _get_data to setup metadata and userdata_raw.
322
323        Minimally, the datasource should return a boolean True on success.
324        """
325        self._dirty_cache = True
326        return_value = self._get_data()
327        if not return_value:
328            return return_value
329        self.persist_instance_data()
330        return return_value
331
332    def persist_instance_data(self):
333        """Process and write INSTANCE_JSON_FILE with all instance metadata.
334
335        Replace any hyphens with underscores in key names for use in template
336        processing.
337
338        @return True on successful write, False otherwise.
339        """
340        if hasattr(self, '_crawled_metadata'):
341            # Any datasource with _crawled_metadata will best represent
342            # most recent, 'raw' metadata
343            crawled_metadata = copy.deepcopy(
344                getattr(self, '_crawled_metadata'))
345            crawled_metadata.pop('user-data', None)
346            crawled_metadata.pop('vendor-data', None)
347            instance_data = {'ds': crawled_metadata}
348        else:
349            instance_data = {'ds': {'meta_data': self.metadata}}
350            if hasattr(self, 'network_json'):
351                network_json = getattr(self, 'network_json')
352                if network_json != UNSET:
353                    instance_data['ds']['network_json'] = network_json
354            if hasattr(self, 'ec2_metadata'):
355                ec2_metadata = getattr(self, 'ec2_metadata')
356                if ec2_metadata != UNSET:
357                    instance_data['ds']['ec2_metadata'] = ec2_metadata
358        instance_data['ds']['_doc'] = EXPERIMENTAL_TEXT
359        # Add merged cloud.cfg and sys info for jinja templates and cli query
360        instance_data['merged_cfg'] = copy.deepcopy(self.sys_cfg)
361        instance_data['merged_cfg']['_doc'] = (
362            'Merged cloud-init system config from /etc/cloud/cloud.cfg and'
363            ' /etc/cloud/cloud.cfg.d/')
364        instance_data['sys_info'] = util.system_info()
365        instance_data.update(
366            self._get_standardized_metadata(instance_data))
367        try:
368            # Process content base64encoding unserializable values
369            content = util.json_dumps(instance_data)
370            # Strip base64: prefix and set base64_encoded_keys list.
371            processed_data = process_instance_metadata(
372                json.loads(content),
373                sensitive_keys=self.sensitive_metadata_keys)
374        except TypeError as e:
375            LOG.warning('Error persisting instance-data.json: %s', str(e))
376            return False
377        except UnicodeDecodeError as e:
378            LOG.warning('Error persisting instance-data.json: %s', str(e))
379            return False
380        json_sensitive_file = os.path.join(self.paths.run_dir,
381                                           INSTANCE_JSON_SENSITIVE_FILE)
382        write_json(json_sensitive_file, processed_data, mode=0o600)
383        json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE)
384        # World readable
385        write_json(json_file, redact_sensitive_keys(processed_data))
386        return True
387
388    def _get_data(self):
389        """Walk metadata sources, process crawled data and save attributes."""
390        raise NotImplementedError(
391            'Subclasses of DataSource must implement _get_data which'
392            ' sets self.metadata, vendordata_raw and userdata_raw.')
393
394    def get_url_params(self):
395        """Return the Datasource's prefered url_read parameters.
396
397        Subclasses may override url_max_wait, url_timeout, url_retries.
398
399        @return: A URLParams object with max_wait_seconds, timeout_seconds,
400            num_retries.
401        """
402        max_wait = self.url_max_wait
403        try:
404            max_wait = int(self.ds_cfg.get("max_wait", self.url_max_wait))
405        except ValueError:
406            util.logexc(
407                LOG, "Config max_wait '%s' is not an int, using default '%s'",
408                self.ds_cfg.get("max_wait"), max_wait)
409
410        timeout = self.url_timeout
411        try:
412            timeout = max(
413                0, int(self.ds_cfg.get("timeout", self.url_timeout)))
414        except ValueError:
415            timeout = self.url_timeout
416            util.logexc(
417                LOG, "Config timeout '%s' is not an int, using default '%s'",
418                self.ds_cfg.get('timeout'), timeout)
419
420        retries = self.url_retries
421        try:
422            retries = int(self.ds_cfg.get("retries", self.url_retries))
423        except Exception:
424            util.logexc(
425                LOG, "Config retries '%s' is not an int, using default '%s'",
426                self.ds_cfg.get('retries'), retries)
427
428        sec_between_retries = self.url_sec_between_retries
429        try:
430            sec_between_retries = int(self.ds_cfg.get(
431                "sec_between_retries",
432                self.url_sec_between_retries))
433        except Exception:
434            util.logexc(
435                LOG, "Config sec_between_retries '%s' is not an int,"
436                     " using default '%s'",
437                self.ds_cfg.get("sec_between_retries"), sec_between_retries)
438
439        return URLParams(max_wait, timeout, retries, sec_between_retries)
440
441    def get_userdata(self, apply_filter=False):
442        if self.userdata is None:
443            self.userdata = self.ud_proc.process(self.get_userdata_raw())
444        if apply_filter:
445            return self._filter_xdata(self.userdata)
446        return self.userdata
447
448    def get_vendordata(self):
449        if self.vendordata is None:
450            self.vendordata = self.ud_proc.process(self.get_vendordata_raw())
451        return self.vendordata
452
453    def get_vendordata2(self):
454        if self.vendordata2 is None:
455            self.vendordata2 = self.ud_proc.process(self.get_vendordata2_raw())
456        return self.vendordata2
457
458    @property
459    def fallback_interface(self):
460        """Determine the network interface used during local network config."""
461        if self._fallback_interface is None:
462            self._fallback_interface = net.find_fallback_nic()
463            if self._fallback_interface is None:
464                LOG.warning(
465                    "Did not find a fallback interface on %s.",
466                    self.cloud_name)
467        return self._fallback_interface
468
469    @property
470    def platform_type(self):
471        if not hasattr(self, '_platform_type'):
472            # Handle upgrade path where pickled datasource has no _platform.
473            self._platform_type = self.dsname.lower()
474        if not self._platform_type:
475            self._platform_type = self.dsname.lower()
476        return self._platform_type
477
478    @property
479    def subplatform(self):
480        """Return a string representing subplatform details for the datasource.
481
482        This should be guidance for where the metadata is sourced.
483        Examples of this on different clouds:
484            ec2:       metadata (http://169.254.169.254)
485            openstack: configdrive (/dev/path)
486            openstack: metadata (http://169.254.169.254)
487            nocloud:   seed-dir (/seed/dir/path)
488            lxd:   nocloud (/seed/dir/path)
489        """
490        if not hasattr(self, '_subplatform'):
491            # Handle upgrade path where pickled datasource has no _platform.
492            self._subplatform = self._get_subplatform()
493        if not self._subplatform:
494            self._subplatform = self._get_subplatform()
495        return self._subplatform
496
497    def _get_subplatform(self):
498        """Subclasses should implement to return a "slug (detail)" string."""
499        if hasattr(self, 'metadata_address'):
500            return 'metadata (%s)' % getattr(self, 'metadata_address')
501        return METADATA_UNKNOWN
502
503    @property
504    def cloud_name(self):
505        """Return lowercase cloud name as determined by the datasource.
506
507        Datasource can determine or define its own cloud product name in
508        metadata.
509        """
510        if self._cloud_name:
511            return self._cloud_name
512        if self.metadata and self.metadata.get(METADATA_CLOUD_NAME_KEY):
513            cloud_name = self.metadata.get(METADATA_CLOUD_NAME_KEY)
514            if isinstance(cloud_name, str):
515                self._cloud_name = cloud_name.lower()
516            else:
517                self._cloud_name = self._get_cloud_name().lower()
518                LOG.debug(
519                    'Ignoring metadata provided key %s: non-string type %s',
520                    METADATA_CLOUD_NAME_KEY, type(cloud_name))
521        else:
522            self._cloud_name = self._get_cloud_name().lower()
523        return self._cloud_name
524
525    def _get_cloud_name(self):
526        """Return the datasource name as it frequently matches cloud name.
527
528        Should be overridden in subclasses which can run on multiple
529        cloud names, such as DatasourceEc2.
530        """
531        return self.dsname
532
533    @property
534    def launch_index(self):
535        if not self.metadata:
536            return None
537        if 'launch-index' in self.metadata:
538            return self.metadata['launch-index']
539        return None
540
541    def _filter_xdata(self, processed_ud):
542        filters = [
543            launch_index.Filter(util.safe_int(self.launch_index)),
544        ]
545        new_ud = processed_ud
546        for f in filters:
547            new_ud = f.apply(new_ud)
548        return new_ud
549
550    @property
551    def is_disconnected(self):
552        return False
553
554    def get_userdata_raw(self):
555        return self.userdata_raw
556
557    def get_vendordata_raw(self):
558        return self.vendordata_raw
559
560    def get_vendordata2_raw(self):
561        return self.vendordata2_raw
562
563    # the data sources' config_obj is a cloud-config formated
564    # object that came to it from ways other than cloud-config
565    # because cloud-config content would be handled elsewhere
566    def get_config_obj(self):
567        return {}
568
569    def get_public_ssh_keys(self):
570        return normalize_pubkey_data(self.metadata.get('public-keys'))
571
572    def publish_host_keys(self, hostkeys):
573        """Publish the public SSH host keys (found in /etc/ssh/*.pub).
574
575        @param hostkeys: List of host key tuples (key_type, key_value),
576            where key_type is the first field in the public key file
577            (e.g. 'ssh-rsa') and key_value is the key itself
578            (e.g. 'AAAAB3NzaC1y...').
579        """
580
581    def _remap_device(self, short_name):
582        # LP: #611137
583        # the metadata service may believe that devices are named 'sda'
584        # when the kernel named them 'vda' or 'xvda'
585        # we want to return the correct value for what will actually
586        # exist in this instance
587        mappings = {"sd": ("vd", "xvd", "vtb")}
588        for (nfrom, tlist) in mappings.items():
589            if not short_name.startswith(nfrom):
590                continue
591            for nto in tlist:
592                cand = "/dev/%s%s" % (nto, short_name[len(nfrom):])
593                if os.path.exists(cand):
594                    return cand
595        return None
596
597    def device_name_to_device(self, _name):
598        # translate a 'name' to a device
599        # the primary function at this point is on ec2
600        # to consult metadata service, that has
601        #  ephemeral0: sdb
602        # and return 'sdb' for input 'ephemeral0'
603        return None
604
605    def get_locale(self):
606        """Default locale is en_US.UTF-8, but allow distros to override"""
607        locale = self.default_locale
608        try:
609            locale = self.distro.get_locale()
610        except NotImplementedError:
611            pass
612        return locale
613
614    @property
615    def availability_zone(self):
616        top_level_az = self.metadata.get(
617            'availability-zone', self.metadata.get('availability_zone'))
618        if top_level_az:
619            return top_level_az
620        return self.metadata.get('placement', {}).get('availability-zone')
621
622    @property
623    def region(self):
624        return self.metadata.get('region')
625
626    def get_instance_id(self):
627        if not self.metadata or 'instance-id' not in self.metadata:
628            # Return a magic not really instance id string
629            return "iid-datasource"
630        return str(self.metadata['instance-id'])
631
632    def get_hostname(self, fqdn=False, resolve_ip=False, metadata_only=False):
633        """Get hostname or fqdn from the datasource. Look it up if desired.
634
635        @param fqdn: Boolean, set True to return hostname with domain.
636        @param resolve_ip: Boolean, set True to attempt to resolve an ipv4
637            address provided in local-hostname meta-data.
638        @param metadata_only: Boolean, set True to avoid looking up hostname
639            if meta-data doesn't have local-hostname present.
640
641        @return: hostname or qualified hostname. Optionally return None when
642            metadata_only is True and local-hostname data is not available.
643        """
644        defdomain = "localdomain"
645        defhost = "localhost"
646        domain = defdomain
647
648        if not self.metadata or not self.metadata.get('local-hostname'):
649            if metadata_only:
650                return None
651            # this is somewhat questionable really.
652            # the cloud datasource was asked for a hostname
653            # and didn't have one. raising error might be more appropriate
654            # but instead, basically look up the existing hostname
655            toks = []
656            hostname = util.get_hostname()
657            hosts_fqdn = util.get_fqdn_from_hosts(hostname)
658            if hosts_fqdn and hosts_fqdn.find(".") > 0:
659                toks = str(hosts_fqdn).split(".")
660            elif hostname and hostname.find(".") > 0:
661                toks = str(hostname).split(".")
662            elif hostname:
663                toks = [hostname, defdomain]
664            else:
665                toks = [defhost, defdomain]
666        else:
667            # if there is an ipv4 address in 'local-hostname', then
668            # make up a hostname (LP: #475354) in format ip-xx.xx.xx.xx
669            lhost = self.metadata['local-hostname']
670            if net.is_ipv4_address(lhost):
671                toks = []
672                if resolve_ip:
673                    toks = util.gethostbyaddr(lhost)
674
675                if toks:
676                    toks = str(toks).split('.')
677                else:
678                    toks = ["ip-%s" % lhost.replace(".", "-")]
679            else:
680                toks = lhost.split(".")
681
682        if len(toks) > 1:
683            hostname = toks[0]
684            domain = '.'.join(toks[1:])
685        else:
686            hostname = toks[0]
687
688        if fqdn and domain != defdomain:
689            return "%s.%s" % (hostname, domain)
690        else:
691            return hostname
692
693    def get_package_mirror_info(self):
694        return self.distro.get_package_mirror_info(data_source=self)
695
696    def get_supported_events(self, source_event_types: List[EventType]):
697        supported_events = {}  # type: Dict[EventScope, set]
698        for event in source_event_types:
699            for update_scope, update_events in self.supported_update_events.items():  # noqa: E501
700                if event in update_events:
701                    if not supported_events.get(update_scope):
702                        supported_events[update_scope] = set()
703                    supported_events[update_scope].add(event)
704        return supported_events
705
706    def update_metadata_if_supported(
707        self, source_event_types: List[EventType]
708    ) -> bool:
709        """Refresh cached metadata if the datasource supports this event.
710
711        The datasource has a list of supported_update_events which
712        trigger refreshing all cached metadata as well as refreshing the
713        network configuration.
714
715        @param source_event_types: List of EventTypes which may trigger a
716            metadata update.
717
718        @return True if the datasource did successfully update cached metadata
719            due to source_event_type.
720        """
721        supported_events = self.get_supported_events(source_event_types)
722        for scope, matched_events in supported_events.items():
723            LOG.debug(
724                "Update datasource metadata and %s config due to events: %s",
725                scope.value,
726                ', '.join([event.value for event in matched_events]))
727            # Each datasource has a cached config property which needs clearing
728            # Once cleared that config property will be regenerated from
729            # current metadata.
730            self.clear_cached_attrs((('_%s_config' % scope, UNSET),))
731        if supported_events:
732            self.clear_cached_attrs()
733            result = self.get_data()
734            if result:
735                return True
736        LOG.debug("Datasource %s not updated for events: %s", self,
737                  ', '.join([event.value for event in source_event_types]))
738        return False
739
740    def check_instance_id(self, sys_cfg):
741        # quickly (local check only) if self.instance_id is still
742        return False
743
744    @staticmethod
745    def _determine_dsmode(candidates, default=None, valid=None):
746        # return the first candidate that is non None, warn if not valid
747        if default is None:
748            default = DSMODE_NETWORK
749
750        if valid is None:
751            valid = VALID_DSMODES
752
753        for candidate in candidates:
754            if candidate is None:
755                continue
756            if candidate in valid:
757                return candidate
758            else:
759                LOG.warning("invalid dsmode '%s', using default=%s",
760                            candidate, default)
761                return default
762
763        return default
764
765    @property
766    def network_config(self):
767        return None
768
769    @property
770    def first_instance_boot(self):
771        return
772
773    def setup(self, is_new_instance):
774        """setup(is_new_instance)
775
776        This is called before user-data and vendor-data have been processed.
777
778        Unless the datasource has set mode to 'local', then networking
779        per 'fallback' or per 'network_config' will have been written and
780        brought up the OS at this point.
781        """
782        return
783
784    def activate(self, cfg, is_new_instance):
785        """activate(cfg, is_new_instance)
786
787        This is called before the init_modules will be called but after
788        the user-data and vendor-data have been fully processed.
789
790        The cfg is fully up to date config, it contains a merged view of
791           system config, datasource config, user config, vendor config.
792        It should be used rather than the sys_cfg passed to __init__.
793
794        is_new_instance is a boolean indicating if this is a new instance.
795        """
796        return
797
798
799def normalize_pubkey_data(pubkey_data):
800    keys = []
801
802    if not pubkey_data:
803        return keys
804
805    if isinstance(pubkey_data, str):
806        return pubkey_data.splitlines()
807
808    if isinstance(pubkey_data, (list, set)):
809        return list(pubkey_data)
810
811    if isinstance(pubkey_data, (dict)):
812        for (_keyname, klist) in pubkey_data.items():
813            # lp:506332 uec metadata service responds with
814            # data that makes boto populate a string for 'klist' rather
815            # than a list.
816            if isinstance(klist, str):
817                klist = [klist]
818            if isinstance(klist, (list, set)):
819                for pkey in klist:
820                    # There is an empty string at
821                    # the end of the keylist, trim it
822                    if pkey:
823                        keys.append(pkey)
824
825    return keys
826
827
828def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list, reporter):
829    ds_list = list_sources(cfg_list, ds_deps, pkg_list)
830    ds_names = [type_utils.obj_name(f) for f in ds_list]
831    mode = "network" if DEP_NETWORK in ds_deps else "local"
832    LOG.debug("Searching for %s data source in: %s", mode, ds_names)
833
834    for name, cls in zip(ds_names, ds_list):
835        myrep = events.ReportEventStack(
836            name="search-%s" % name.replace("DataSource", ""),
837            description="searching for %s data from %s" % (mode, name),
838            message="no %s data found from %s" % (mode, name),
839            parent=reporter)
840        try:
841            with myrep:
842                LOG.debug("Seeing if we can get any data from %s", cls)
843                s = cls(sys_cfg, distro, paths)
844                if s.update_metadata_if_supported(
845                    [EventType.BOOT_NEW_INSTANCE]
846                ):
847                    myrep.message = "found %s data from %s" % (mode, name)
848                    return (s, type_utils.obj_name(cls))
849        except Exception:
850            util.logexc(LOG, "Getting data from %s failed", cls)
851
852    msg = ("Did not find any data source,"
853           " searched classes: (%s)") % (", ".join(ds_names))
854    raise DataSourceNotFoundException(msg)
855
856
857# Return a list of classes that have the same depends as 'depends'
858# iterate through cfg_list, loading "DataSource*" modules
859# and calling their "get_datasource_list".
860# Return an ordered list of classes that match (if any)
861def list_sources(cfg_list, depends, pkg_list):
862    src_list = []
863    LOG.debug(("Looking for data source in: %s,"
864               " via packages %s that matches dependencies %s"),
865              cfg_list, pkg_list, depends)
866    for ds_name in cfg_list:
867        if not ds_name.startswith(DS_PREFIX):
868            ds_name = '%s%s' % (DS_PREFIX, ds_name)
869        m_locs, _looked_locs = importer.find_module(ds_name,
870                                                    pkg_list,
871                                                    ['get_datasource_list'])
872        for m_loc in m_locs:
873            mod = importer.import_module(m_loc)
874            lister = getattr(mod, "get_datasource_list")
875            matches = lister(depends)
876            if matches:
877                src_list.extend(matches)
878                break
879    return src_list
880
881
882def instance_id_matches_system_uuid(instance_id, field='system-uuid'):
883    # quickly (local check only) if self.instance_id is still valid
884    # we check kernel command line or files.
885    if not instance_id:
886        return False
887
888    dmi_value = dmi.read_dmi_data(field)
889    if not dmi_value:
890        return False
891    return instance_id.lower() == dmi_value.lower()
892
893
894def canonical_cloud_id(cloud_name, region, platform):
895    """Lookup the canonical cloud-id for a given cloud_name and region."""
896    if not cloud_name:
897        cloud_name = METADATA_UNKNOWN
898    if not region:
899        region = METADATA_UNKNOWN
900    if region == METADATA_UNKNOWN:
901        if cloud_name != METADATA_UNKNOWN:
902            return cloud_name
903        return platform
904    for prefix, cloud_id_test in CLOUD_ID_REGION_PREFIX_MAP.items():
905        (cloud_id, valid_cloud) = cloud_id_test
906        if region.startswith(prefix) and valid_cloud(cloud_name):
907            return cloud_id
908    if cloud_name != METADATA_UNKNOWN:
909        return cloud_name
910    return platform
911
912
913def convert_vendordata(data, recurse=True):
914    """data: a loaded object (strings, arrays, dicts).
915    return something suitable for cloudinit vendordata_raw.
916
917    if data is:
918       None: return None
919       string: return string
920       list: return data
921             the list is then processed in UserDataProcessor
922       dict: return convert_vendordata(data.get('cloud-init'))
923    """
924    if not data:
925        return None
926    if isinstance(data, str):
927        return data
928    if isinstance(data, list):
929        return copy.deepcopy(data)
930    if isinstance(data, dict):
931        if recurse is True:
932            return convert_vendordata(data.get('cloud-init'),
933                                      recurse=False)
934        raise ValueError("vendordata['cloud-init'] cannot be dict")
935    raise ValueError("Unknown data type for vendordata: %s" % type(data))
936
937
938class BrokenMetadata(IOError):
939    pass
940
941
942# 'depends' is a list of dependencies (DEP_FILESYSTEM)
943# ds_list is a list of 2 item lists
944# ds_list = [
945#   ( class, ( depends-that-this-class-needs ) )
946# }
947# It returns a list of 'class' that matched these deps exactly
948# It mainly is a helper function for DataSourceCollections
949def list_from_depends(depends, ds_list):
950    ret_list = []
951    depset = set(depends)
952    for (cls, deps) in ds_list:
953        if depset == set(deps):
954            ret_list.append(cls)
955    return ret_list
956
957
958# vi: ts=4 expandtab
959