File: //lib/python3/dist-packages/cloudinit/sources/DataSourceMAAS.py
# Copyright (C) 2012 Canonical Ltd.
# Copyright (C) 2012 Yahoo! Inc.
#
# Author: Scott Moser <scott.moser@canonical.com>
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
#
# This file is part of cloud-init. See LICENSE file for license information.
import hashlib
import logging
import os
import time
from cloudinit import sources, url_helper, util
from cloudinit.net.cmdline import KlibcNetworkConfigSource
from cloudinit.sources import NetworkConfigSource
LOG = logging.getLogger(__name__)
MD_VERSION = "2012-03-01"
DS_FIELDS = [
    # remote path, location in dictionary, binary data?, optional?
    ("meta-data/instance-id", "meta-data/instance-id", False, False),
    ("meta-data/local-hostname", "meta-data/local-hostname", False, False),
    ("meta-data/public-keys", "meta-data/public-keys", False, True),
    ("meta-data/vendor-data", "vendor-data", True, True),
    ("user-data", "user-data", True, True),
]
class DataSourceMAAS(sources.DataSource):
    """
    DataSourceMAAS reads instance information from MAAS.
    Given a config metadata_url, and oauth tokens, it expects to find
    files under the root named:
      instance-id
      user-data
      hostname
      vendor-data
    """
    dsname = "MAAS"
    id_hash = None
    _oauth_helper = None
    # Setup read_url parameters per get_url_params.
    url_max_wait = 120
    url_timeout = 50
    def __init__(self, sys_cfg, distro, paths):
        sources.DataSource.__init__(self, sys_cfg, distro, paths)
        self.base_url = None
        self.seed_dir = os.path.join(paths.seed_dir, "maas")
        self.id_hash = get_id_from_ds_cfg(self.ds_cfg)
    @property
    def oauth_helper(self):
        if not self._oauth_helper:
            self._oauth_helper = get_oauth_helper(self.ds_cfg)
        return self._oauth_helper
    def __str__(self):
        root = sources.DataSource.__str__(self)
        return "%s [%s]" % (root, self.base_url)
    def _get_data(self):
        mcfg = self.ds_cfg
        try:
            self._set_data(self.seed_dir, read_maas_seed_dir(self.seed_dir))
            return True
        except MAASSeedDirNone:
            pass
        except MAASSeedDirMalformed as exc:
            LOG.warning("%s was malformed: %s", self.seed_dir, exc)
            raise
        # If there is no metadata_url, then we're not configured
        url = mcfg.get("metadata_url", None)
        if not url:
            return False
        try:
            # doing this here actually has a side affect of
            # getting oauth time-fix in place.  As no where else would
            # retry by default, so even if we could fix the timestamp
            # we would not.
            if not self.wait_for_metadata_service(url):
                return False
            self._set_data(
                url,
                read_maas_seed_url(
                    url,
                    read_file_or_url=self.oauth_helper.readurl,
                    paths=self.paths,
                    retries=1,
                ),
            )
            return True
        except Exception:
            util.logexc(LOG, "Failed fetching metadata from url %s", url)
            return False
    def _set_data(self, url, data):
        # takes a url for base_url and a tuple of userdata, metadata, vd.
        self.base_url = url
        ud, md, vd = data
        self.userdata_raw = ud
        self.metadata = md
        if vd:
            try:
                self.vendordata_raw = sources.convert_vendordata(vd)
            except ValueError as e:
                LOG.warning("Invalid content in vendor-data: %s", e)
                self.vendordata_raw = None
    def _get_subplatform(self):
        """Return the subplatform metadata source details."""
        return "seed-dir (%s)" % self.base_url
    def wait_for_metadata_service(self, url):
        url_params = self.get_url_params()
        if url_params.max_wait_seconds == 0:
            return False
        starttime = time.monotonic()
        url = url.rstrip("/")
        check_url = "%s/%s/meta-data/instance-id" % (url, MD_VERSION)
        urls = [check_url]
        url, _response = self.oauth_helper.wait_for_url(
            urls=urls,
            max_wait=url_params.max_wait_seconds,
            timeout=url_params.timeout_seconds,
        )
        if url:
            LOG.debug("Using metadata source: '%s'", url)
        else:
            LOG.critical(
                "Giving up on md from %s after %i seconds",
                urls,
                int(time.monotonic() - starttime),
            )
        return bool(url)
    def check_instance_id(self, sys_cfg):
        """locally check if the current system is the same instance.
        MAAS doesn't provide a real instance-id, and if it did, it is
        still only available over the network.  We need to check based
        only on local resources.  So compute a hash based on Oauth tokens."""
        if self.id_hash is None:
            return False
        ncfg = util.get_cfg_by_path(sys_cfg, ("datasource", self.dsname), {})
        return self.id_hash == get_id_from_ds_cfg(ncfg)
class DataSourceMAASLocal(DataSourceMAAS):
    network_config_sources = (
        NetworkConfigSource.CMD_LINE,
        NetworkConfigSource.SYSTEM_CFG,
        NetworkConfigSource.DS,
        NetworkConfigSource.INITRAMFS,
    )
    def _get_data(self):
        if not KlibcNetworkConfigSource().is_applicable():
            # We booted from disk. Initramfs didn't bring up a network, so
            # nothing to do. Wait until network timeframe to run _get_data()
            LOG.debug("No initramfs applicable config")
            return False
        LOG.debug("Found initramfs applicable config")
        return super()._get_data()
def get_oauth_helper(cfg):
    """Return an oauth helper instance for values in cfg.
    @raises ValueError from OauthUrlHelper if some required fields have
    true-ish values but others do not."""
    keys = ("consumer_key", "consumer_secret", "token_key", "token_secret")
    kwargs = dict([(r, cfg.get(r)) for r in keys])
    return url_helper.OauthUrlHelper(**kwargs)
def get_id_from_ds_cfg(ds_cfg):
    """Given a config, generate a unique identifier for this node."""
    fields = ("consumer_key", "token_key", "token_secret")
    idstr = "\0".join([ds_cfg.get(f, "") for f in fields])
    # store the encoding version as part of the hash in the event
    # that it ever changed we can compute older versions.
    return "v1:" + hashlib.sha256(idstr.encode("utf-8")).hexdigest()
def read_maas_seed_dir(seed_d):
    if seed_d.startswith("file://"):
        seed_d = seed_d[7:]
    if not os.path.isdir(seed_d) or not os.listdir(seed_d):
        raise MAASSeedDirNone("%s: not a directory")
    # seed_dir looks in seed_dir, not seed_dir/VERSION
    return read_maas_seed_url("file://%s" % seed_d, version=None)
def read_maas_seed_url(
    seed_url,
    read_file_or_url=None,
    timeout=None,
    version=MD_VERSION,
    paths=None,
    retries=None,
):
    """
    Read the maas datasource at seed_url.
      read_file_or_url is a method that should provide an interface
      like util.read_file_or_url
    Expected format of seed_url is are the following files:
      * <seed_url>/<version>/meta-data/instance-id
      * <seed_url>/<version>/meta-data/local-hostname
      * <seed_url>/<version>/user-data
    If version is None, then <version>/ will not be used.
    """
    if read_file_or_url is None:
        read_file_or_url = url_helper.read_file_or_url
    if seed_url.endswith("/"):
        seed_url = seed_url[:-1]
    md = {}
    for path, _dictname, binary, optional in DS_FIELDS:
        if version is None:
            url = "%s/%s" % (seed_url, path)
        else:
            url = "%s/%s/%s" % (seed_url, version, path)
        try:
            ssl_details = util.fetch_ssl_details(paths)
            resp = read_file_or_url(
                url, retries=retries, timeout=timeout, ssl_details=ssl_details
            )
            if resp.ok():
                if binary:
                    md[path] = resp.contents
                else:
                    md[path] = util.decode_binary(resp.contents)
            else:
                LOG.warning(
                    "Fetching from %s resulted in an invalid http code %s",
                    url,
                    resp.code,
                )
        except url_helper.UrlError as e:
            if e.code == 404 and not optional:
                raise MAASSeedDirMalformed(
                    "Missing required %s: %s" % (path, e)
                ) from e
            elif e.code != 404:
                raise e
    return check_seed_contents(md, seed_url)
def check_seed_contents(content, seed):
    """Validate if dictionary content valid as a return for a datasource.
    Either return a (userdata, metadata, vendordata) tuple or
    Raise MAASSeedDirMalformed or MAASSeedDirNone
    """
    ret = {}
    missing = []
    for spath, dpath, _binary, optional in DS_FIELDS:
        if spath not in content:
            if not optional:
                missing.append(spath)
            continue
        if "/" in dpath:
            top, _, p = dpath.partition("/")
            if top not in ret:
                ret[top] = {}
            ret[top][p] = content[spath]
        else:
            ret[dpath] = content[spath]
    if not ret:
        raise MAASSeedDirNone("%s: no data files found" % seed)
    if missing:
        raise MAASSeedDirMalformed("%s: missing files %s" % (seed, missing))
    vd_data = None
    if ret.get("vendor-data"):
        err = object()
        vd_data = util.load_yaml(
            ret.get("vendor-data"), default=err, allowed=(object)
        )
        if vd_data is err:
            raise MAASSeedDirMalformed("vendor-data was not loadable as yaml.")
    return ret.get("user-data"), ret.get("meta-data"), vd_data
class MAASSeedDirNone(Exception):
    pass
class MAASSeedDirMalformed(Exception):
    pass
# Used to match classes to dependencies
datasources = [
    (DataSourceMAASLocal, (sources.DEP_FILESYSTEM,)),
    (DataSourceMAAS, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)),
]
# Return a list of data sources that match this set of dependencies
def get_datasource_list(depends):
    return sources.list_from_depends(depends, datasources)
if __name__ == "__main__":
    def main():
        """
        Call with single argument of directory or http or https url.
        If url is given additional arguments are allowed, which will be
        interpreted as consumer_key, token_key, token_secret, consumer_secret
        """
        import argparse
        import pprint
        import sys
        parser = argparse.ArgumentParser(description="Interact with MAAS DS")
        parser.add_argument(
            "--config",
            metavar="file",
            help="specify DS config file",
            default=None,
        )
        parser.add_argument(
            "--ckey",
            metavar="key",
            help="the consumer key to auth with",
            default=None,
        )
        parser.add_argument(
            "--tkey",
            metavar="key",
            help="the token key to auth with",
            default=None,
        )
        parser.add_argument(
            "--csec",
            metavar="secret",
            help="the consumer secret (likely '')",
            default="",
        )
        parser.add_argument(
            "--tsec",
            metavar="secret",
            help="the token secret to auth with",
            default=None,
        )
        parser.add_argument(
            "--apiver",
            metavar="version",
            help="the apiver to use ( can be used)",
            default=MD_VERSION,
        )
        subcmds = parser.add_subparsers(title="subcommands", dest="subcmd")
        for name, help in (
            ("crawl", "crawl the datasource"),
            ("get", "do a single GET of provided url"),
            ("check-seed", "read and verify seed at url"),
        ):
            p = subcmds.add_parser(name, help=help)
            p.add_argument(
                "url", help="the datasource url", nargs="?", default=None
            )
        args = parser.parse_args()
        creds = {
            "consumer_key": args.ckey,
            "token_key": args.tkey,
            "token_secret": args.tsec,
            "consumer_secret": args.csec,
        }
        if args.config is None:
            for fname in ("91_kernel_cmdline_url", "90_dpkg_maas"):
                fpath = "/etc/cloud/cloud.cfg.d/" + fname + ".cfg"
                if os.path.exists(fpath) and os.access(fpath, os.R_OK):
                    sys.stderr.write("Used config in %s.\n" % fpath)
                    args.config = fpath
        if args.config:
            cfg = util.read_conf(args.config)
            if "datasource" in cfg:
                cfg = cfg["datasource"]["MAAS"]
            for key in creds.keys():
                if key in cfg and creds[key] is None:
                    creds[key] = cfg[key]
            if args.url is None and "metadata_url" in cfg:
                args.url = cfg["metadata_url"]
        if args.url is None:
            sys.stderr.write("Must provide a url or a config with url.\n")
            sys.exit(1)
        oauth_helper = get_oauth_helper(creds)
        def geturl(url):
            # the retry is to ensure that oauth timestamp gets fixed
            return oauth_helper.readurl(url, retries=1).contents
        def printurl(url):
            print("== %s ==\n%s\n" % (url, geturl(url).decode()))
        def crawl(url):
            if url.endswith("/"):
                for line in geturl(url).decode().splitlines():
                    if line.endswith("/"):
                        crawl("%s%s" % (url, line))
                    elif line == "meta-data":
                        # meta-data is a dir, it *should* end in a /
                        crawl("%s%s" % (url, "meta-data/"))
                    else:
                        printurl("%s%s" % (url, line))
            else:
                printurl(url)
        if args.subcmd == "check-seed":
            sys.stderr.write("Checking seed at %s\n" % args.url)
            readurl = oauth_helper.readurl
            if args.url[0] == "/" or args.url.startswith("file://"):
                (userdata, metadata, vd) = read_maas_seed_dir(args.url)
            else:
                (userdata, metadata, vd) = read_maas_seed_url(
                    args.url,
                    version=args.apiver,
                    read_file_or_url=readurl,
                    retries=2,
                )
            print("=== user-data ===")
            print("N/A" if userdata is None else userdata.decode())
            print("=== meta-data ===")
            pprint.pprint(metadata)
            print("=== vendor-data ===")
            pprint.pprint("N/A" if vd is None else vd)
        elif args.subcmd == "get":
            printurl(args.url)
        elif args.subcmd == "crawl":
            if not args.url.endswith("/"):
                args.url = "%s/" % args.url
            crawl(args.url)
    main()