1# repo.py
2# DNF Repository objects.
3#
4# Copyright (C) 2013-2016 Red Hat, Inc.
5#
6# This copyrighted material is made available to anyone wishing to use,
7# modify, copy, or redistribute it subject to the terms and conditions of
8# the GNU General Public License v.2, or (at your option) any later version.
9# This program is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY expressed or implied, including the implied warranties of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
12# Public License for more details.  You should have received a copy of the
13# GNU General Public License along with this program; if not, write to the
14# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15# 02110-1301, USA.  Any Red Hat trademarks that are incorporated in the
16# source code or documentation are not subject to the GNU General Public
17# License and may only be used or replicated with the express permission of
18# Red Hat, Inc.
19#
20
21from __future__ import absolute_import
22from __future__ import unicode_literals
23
24from dnf.i18n import ucd, _
25
26import dnf.callback
27import dnf.conf
28import dnf.conf.substitutions
29import dnf.const
30import dnf.crypto
31import dnf.exceptions
32import dnf.logging
33import dnf.pycomp
34import dnf.util
35import dnf.yum.misc
36import libdnf.error
37import libdnf.repo
38import functools
39import hashlib
40import hawkey
41import logging
42import operator
43import os
44import re
45import shutil
46import string
47import sys
48import time
49import traceback
50
51_PACKAGES_RELATIVE_DIR = "packages"
52_MIRRORLIST_FILENAME = "mirrorlist"
53# Chars allowed in a repo ID
54_REPOID_CHARS = string.ascii_letters + string.digits + '-_.:'
55# Regex pattern that matches a repo cachedir and captures the repo ID
56_CACHEDIR_RE = r'(?P<repoid>[%s]+)\-[%s]{16}' % (re.escape(_REPOID_CHARS),
57                                                 string.hexdigits)
58
59# Regex patterns matching any filename that is repo-specific cache data of a
60# particular type.  The filename is expected to not contain the base cachedir
61# path components.
62CACHE_FILES = {
63    'metadata': r'^%s\/.*((xml|yaml)(\.gz|\.xz|\.bz2|.zck)?|asc|cachecookie|%s)$' %
64                (_CACHEDIR_RE, _MIRRORLIST_FILENAME),
65    'packages': r'^%s\/%s\/.+rpm$' % (_CACHEDIR_RE, _PACKAGES_RELATIVE_DIR),
66    'dbcache': r'^.+(solv|solvx)$',
67}
68
69logger = logging.getLogger("dnf")
70
71
72def repo_id_invalid(repo_id):
73    # :api
74    """Return index of an invalid character in the repo ID (if present)."""
75    first_invalid = libdnf.repo.Repo.verifyId(repo_id)
76    return None if first_invalid < 0 else first_invalid
77
78
79def _pkg2payload(pkg, progress, *factories):
80    for fn in factories:
81        pload = fn(pkg, progress)
82        if pload is not None:
83            return pload
84    raise ValueError(_('no matching payload factory for %s') % pkg)
85
86
87def _download_payloads(payloads, drpm):
88    # download packages
89    def _download_sort_key(payload):
90        return not hasattr(payload, 'delta')
91
92    drpm.err.clear()
93    targets = [pload._librepo_target()
94               for pload in sorted(payloads, key=_download_sort_key)]
95    errs = _DownloadErrors()
96    try:
97        libdnf.repo.PackageTarget.downloadPackages(libdnf.repo.VectorPPackageTarget(targets), True)
98    except RuntimeError as e:
99        errs._fatal = str(e)
100    drpm.wait()
101
102    # process downloading errors
103    errs._recoverable = drpm.err.copy()
104    for tgt in targets:
105        err = tgt.getErr()
106        if err is None or err.startswith('Not finished'):
107            continue
108        callbacks = tgt.getCallbacks()
109        payload = callbacks.package_pload
110        pkg = payload.pkg
111        if err == _('Already downloaded'):
112            errs._skipped.add(pkg)
113            continue
114        pkg.repo._repo.expire()
115        errs._irrecoverable[pkg] = [err]
116
117    return errs
118
119
120def _update_saving(saving, payloads, errs):
121    real, full = saving
122    for pload in payloads:
123        pkg = pload.pkg
124        if pkg in errs:
125            real += pload.download_size
126            continue
127        real += pload.download_size
128        full += pload._full_size
129    return real, full
130
131
132class _DownloadErrors(object):
133    def __init__(self):
134        self._val_irrecoverable = {}
135        self._val_recoverable = {}
136        self._fatal = None
137        self._skipped = set()
138
139    @property
140    def _irrecoverable(self):
141        if self._val_irrecoverable:
142            return self._val_irrecoverable
143        if self._fatal:
144            return {'': [self._fatal]}
145        return {}
146
147    @property
148    def _recoverable(self):
149        return self._val_recoverable
150
151    @_recoverable.setter
152    def _recoverable(self, new_dct):
153        self._val_recoverable = new_dct
154
155    def _bandwidth_used(self, pload):
156        if pload.pkg in self._skipped:
157            return 0
158        return pload.download_size
159
160
161class _DetailedLibrepoError(Exception):
162    def __init__(self, librepo_err, source_url):
163        Exception.__init__(self)
164        self.librepo_code = librepo_err.args[0]
165        self.librepo_msg = librepo_err.args[1]
166        self.source_url = source_url
167
168
169class _NullKeyImport(dnf.callback.KeyImport):
170    def _confirm(self, id, userid, fingerprint, url, timestamp):
171        return True
172
173
174class Metadata(object):
175    def __init__(self, repo):
176        self._repo = repo
177
178    @property
179    def fresh(self):
180        # :api
181        return self._repo.fresh()
182
183
184class PackageTargetCallbacks(libdnf.repo.PackageTargetCB):
185    def __init__(self, package_pload):
186        super(PackageTargetCallbacks, self).__init__()
187        self.package_pload = package_pload
188
189    def end(self, status, msg):
190        self.package_pload._end_cb(None, status, msg)
191        return 0
192
193    def progress(self, totalToDownload, downloaded):
194        self.package_pload._progress_cb(None, totalToDownload, downloaded)
195        return 0
196
197    def mirrorFailure(self, msg, url):
198        self.package_pload._mirrorfail_cb(None, msg, url)
199        return 0
200
201
202class PackagePayload(dnf.callback.Payload):
203    def __init__(self, pkg, progress):
204        super(PackagePayload, self).__init__(progress)
205        self.callbacks = PackageTargetCallbacks(self)
206        self.pkg = pkg
207
208    def _end_cb(self, cbdata, lr_status, msg):
209        """End callback to librepo operation."""
210        status = dnf.callback.STATUS_FAILED
211        if msg is None:
212            status = dnf.callback.STATUS_OK
213        elif msg.startswith('Not finished'):
214            return
215        elif lr_status == libdnf.repo.PackageTargetCB.TransferStatus_ALREADYEXISTS:
216            status = dnf.callback.STATUS_ALREADY_EXISTS
217
218        self.progress.end(self, status, msg)
219
220    def _mirrorfail_cb(self, cbdata, err, url):
221        self.progress.end(self, dnf.callback.STATUS_MIRROR, err)
222
223    def _progress_cb(self, cbdata, total, done):
224        try:
225            self.progress.progress(self, done)
226        except Exception:
227            exc_type, exc_value, exc_traceback = sys.exc_info()
228            except_list = traceback.format_exception(exc_type, exc_value, exc_traceback)
229            logger.critical(''.join(except_list))
230
231    @property
232    def _full_size(self):
233        return self.download_size
234
235    def _librepo_target(self):
236        pkg = self.pkg
237        pkgdir = pkg.pkgdir
238        dnf.util.ensure_dir(pkgdir)
239
240        target_dct = {
241            'dest': pkgdir,
242            'resume': True,
243            'cbdata': self,
244            'progresscb': self._progress_cb,
245            'endcb': self._end_cb,
246            'mirrorfailurecb': self._mirrorfail_cb,
247        }
248        target_dct.update(self._target_params())
249
250        return libdnf.repo.PackageTarget(
251            pkg.repo._repo,
252            target_dct['relative_url'],
253            target_dct['dest'], target_dct['checksum_type'], target_dct['checksum'],
254            target_dct['expectedsize'], target_dct['base_url'], target_dct['resume'],
255            0, 0, self.callbacks)
256
257
258class RPMPayload(PackagePayload):
259
260    def __str__(self):
261        return os.path.basename(self.pkg.location)
262
263    def _target_params(self):
264        pkg = self.pkg
265        ctype, csum = pkg.returnIdSum()
266        ctype_code = libdnf.repo.PackageTarget.checksumType(ctype)
267        if ctype_code == libdnf.repo.PackageTarget.ChecksumType_UNKNOWN:
268            logger.warning(_("unsupported checksum type: %s"), ctype)
269
270        return {
271            'relative_url': pkg.location,
272            'checksum_type': ctype_code,
273            'checksum': csum,
274            'expectedsize': pkg.downloadsize,
275            'base_url': pkg.baseurl,
276        }
277
278    @property
279    def download_size(self):
280        """Total size of the download."""
281        return self.pkg.downloadsize
282
283
284class RemoteRPMPayload(PackagePayload):
285
286    def __init__(self, remote_location, conf, progress):
287        super(RemoteRPMPayload, self).__init__("unused_object", progress)
288        self.remote_location = remote_location
289        self.remote_size = 0
290        self.conf = conf
291        s = (self.conf.releasever or "") + self.conf.substitutions.get('basearch')
292        digest = hashlib.sha256(s.encode('utf8')).hexdigest()[:16]
293        repodir = "commandline-" + digest
294        self.pkgdir = os.path.join(self.conf.cachedir, repodir, "packages")
295        dnf.util.ensure_dir(self.pkgdir)
296        self.local_path = os.path.join(self.pkgdir, self.__str__().lstrip("/"))
297
298    def __str__(self):
299        return os.path.basename(self.remote_location)
300
301    def _progress_cb(self, cbdata, total, done):
302        self.remote_size = total
303        try:
304            self.progress.progress(self, done)
305        except Exception:
306            exc_type, exc_value, exc_traceback = sys.exc_info()
307            except_list = traceback.format_exception(exc_type, exc_value, exc_traceback)
308            logger.critical(''.join(except_list))
309
310    def _librepo_target(self):
311        return libdnf.repo.PackageTarget(
312            self.conf._config, os.path.basename(self.remote_location),
313            self.pkgdir, 0, None, 0, os.path.dirname(self.remote_location),
314            True, 0, 0, self.callbacks)
315
316    @property
317    def download_size(self):
318        """Total size of the download."""
319        return self.remote_size
320
321
322class MDPayload(dnf.callback.Payload):
323
324    def __init__(self, progress):
325        super(MDPayload, self).__init__(progress)
326        self._text = ""
327        self._download_size = 0
328        self.fastest_mirror_running = False
329        self.mirror_failures = set()
330
331    def __str__(self):
332        if dnf.pycomp.PY3:
333            return self._text
334        else:
335            return self._text.encode('utf-8')
336
337    def __unicode__(self):
338        return self._text
339
340    def _progress_cb(self, cbdata, total, done):
341        self._download_size = total
342        self.progress.progress(self, done)
343
344    def _fastestmirror_cb(self, cbdata, stage, data):
345        if stage == libdnf.repo.RepoCB.FastestMirrorStage_DETECTION:
346            # pinging mirrors, this might take a while
347            msg = _('determining the fastest mirror (%s hosts).. ') % data
348            self.fastest_mirror_running = True
349        elif stage == libdnf.repo.RepoCB.FastestMirrorStage_STATUS and self.fastest_mirror_running:
350            # done.. report but ignore any errors
351            msg = 'error: %s\n' % data if data else 'done.\n'
352        else:
353            return
354        self.progress.message(msg)
355
356    def _mirror_failure_cb(self, cbdata, msg, url, metadata):
357        self.mirror_failures.add(msg)
358        msg = 'error: %s (%s).' % (msg, url)
359        logger.debug(msg)
360
361    @property
362    def download_size(self):
363        return self._download_size
364
365    @property
366    def progress(self):
367        return self._progress
368
369    @progress.setter
370    def progress(self, progress):
371        if progress is None:
372            progress = dnf.callback.NullDownloadProgress()
373        self._progress = progress
374
375    def start(self, text):
376        self._text = text
377        self.progress.start(1, 0)
378
379    def end(self):
380        self._download_size = 0
381        self.progress.end(self, None, None)
382
383
384# use the local cache even if it's expired. download if there's no cache.
385SYNC_LAZY = libdnf.repo.Repo.SyncStrategy_LAZY
386# use the local cache, even if it's expired, never download.
387SYNC_ONLY_CACHE = libdnf.repo.Repo.SyncStrategy_ONLY_CACHE
388# try the cache, if it is expired download new md.
389SYNC_TRY_CACHE = libdnf.repo.Repo.SyncStrategy_TRY_CACHE
390
391
392class RepoCallbacks(libdnf.repo.RepoCB):
393    def __init__(self, repo):
394        super(RepoCallbacks, self).__init__()
395        self._repo = repo
396        self._md_pload = repo._md_pload
397
398    def start(self, what):
399        self._md_pload.start(what)
400
401    def end(self):
402        self._md_pload.end()
403
404    def progress(self, totalToDownload, downloaded):
405        self._md_pload._progress_cb(None, totalToDownload, downloaded)
406        return 0
407
408    def fastestMirror(self, stage, ptr):
409        self._md_pload._fastestmirror_cb(None, stage, ptr)
410
411    def handleMirrorFailure(self, msg, url, metadata):
412        self._md_pload._mirror_failure_cb(None, msg, url, metadata)
413        return 0
414
415    def repokeyImport(self, id, userid, fingerprint, url, timestamp):
416        return self._repo._key_import._confirm(id, userid, fingerprint, url, timestamp)
417
418
419class Repo(dnf.conf.RepoConf):
420    # :api
421    DEFAULT_SYNC = SYNC_TRY_CACHE
422
423    def __init__(self, name=None, parent_conf=None):
424        # :api
425        super(Repo, self).__init__(section=name, parent=parent_conf)
426
427        self._config.this.disown()  # _repo will be the owner of _config
428        self._repo = libdnf.repo.Repo(name if name else "", self._config)
429
430        self._md_pload = MDPayload(dnf.callback.NullDownloadProgress())
431        self._callbacks = RepoCallbacks(self)
432        self._callbacks.this.disown()  # _repo will be the owner of callbacks
433        self._repo.setCallbacks(self._callbacks)
434
435        self._pkgdir = None
436        self._key_import = _NullKeyImport()
437        self.metadata = None  # :api
438        self._repo.setSyncStrategy(self.DEFAULT_SYNC)
439        if parent_conf:
440            self._repo.setSubstitutions(parent_conf.substitutions)
441        self._substitutions = dnf.conf.substitutions.Substitutions()
442        self._check_config_file_age = parent_conf.check_config_file_age \
443            if parent_conf is not None else True
444
445    @property
446    def id(self):
447        # :api
448        return self._repo.getId()
449
450    @property
451    def repofile(self):
452        # :api
453        return self._repo.getRepoFilePath()
454
455    @repofile.setter
456    def repofile(self, value):
457        self._repo.setRepoFilePath(value)
458
459    @property
460    def pkgdir(self):
461        # :api
462        if self._repo.isLocal():
463            return self._repo.getLocalBaseurl()
464        return self.cache_pkgdir()
465
466    def cache_pkgdir(self):
467        if self._pkgdir is not None:
468            return self._pkgdir
469        return os.path.join(self._repo.getCachedir(), _PACKAGES_RELATIVE_DIR)
470
471    @pkgdir.setter
472    def pkgdir(self, val):
473        # :api
474        self._pkgdir = val
475
476    @property
477    def _pubring_dir(self):
478        return os.path.join(self._repo.getCachedir(), 'pubring')
479
480    @property
481    def load_metadata_other(self):
482        return self._repo.getLoadMetadataOther()
483
484    @load_metadata_other.setter
485    def load_metadata_other(self, val):
486        self._repo.setLoadMetadataOther(val)
487
488    def __lt__(self, other):
489        return self.id < other.id
490
491    def __repr__(self):
492        return "<%s %s>" % (self.__class__.__name__, self.id)
493
494    def __setattr__(self, name, value):
495        super(Repo, self).__setattr__(name, value)
496
497    def disable(self):
498        # :api
499        self._repo.disable()
500
501    def enable(self):
502        # :api
503        self._repo.enable()
504
505    def add_metadata_type_to_download(self, metadata_type):
506        # :api
507        """Ask for additional repository metadata type to download.
508
509        Given metadata_type is appended to the default metadata set when
510        repository is downloaded.
511
512        Parameters
513        ----------
514        metadata_type: string
515
516        Example: add_metadata_type_to_download("productid")
517        """
518        self._repo.addMetadataTypeToDownload(metadata_type)
519
520    def remove_metadata_type_from_download(self, metadata_type):
521        # :api
522        """Stop asking for this additional repository metadata type
523        in download.
524
525        Given metadata_type is no longer downloaded by default
526        when this repository is downloaded.
527
528        Parameters
529        ----------
530        metadata_type: string
531
532        Example: remove_metadata_type_from_download("productid")
533        """
534        self._repo.removeMetadataTypeFromDownload(metadata_type)
535
536    def get_metadata_path(self, metadata_type):
537        # :api
538        """Return path to the file with downloaded repository metadata of given type.
539
540        Parameters
541        ----------
542        metadata_type: string
543        """
544        return self._repo.getMetadataPath(metadata_type)
545
546    def get_metadata_content(self, metadata_type):
547        # :api
548        """Return content of the file with downloaded repository metadata of given type.
549
550        Content of compressed metadata file is returned uncompressed.
551
552        Parameters
553        ----------
554        metadata_type: string
555        """
556        return self._repo.getMetadataContent(metadata_type)
557
558    def load(self):
559        # :api
560        """Load the metadata for this repo.
561
562        Depending on the configuration and the age and consistence of data
563        available on the disk cache, either loads the metadata from the cache or
564        downloads them from the mirror, baseurl or metalink.
565
566        This method will by default not try to refresh already loaded data if
567        called repeatedly.
568
569        Returns True if this call to load() caused a fresh metadata download.
570
571        """
572        ret = False
573        try:
574            ret = self._repo.load()
575        except (libdnf.error.Error, RuntimeError) as e:
576            if self._md_pload.mirror_failures:
577                msg = "Errors during downloading metadata for repository '%s':" % self.id
578                for failure in self._md_pload.mirror_failures:
579                    msg += "\n  - %s" % failure
580                logger.warning(msg)
581            raise dnf.exceptions.RepoError(str(e))
582        finally:
583            self._md_pload.mirror_failures = set()
584        self.metadata = Metadata(self._repo)
585        return ret
586
587    def _metadata_expire_in(self):
588        """Get the number of seconds after which the cached metadata will expire.
589
590        Returns a tuple, boolean whether there even is cached metadata and the
591        number of seconds it will expire in. Negative number means the metadata
592        has expired already, None that it never expires.
593
594        """
595        if not self.metadata:
596            self._repo.loadCache(False)
597        if self.metadata:
598            if self.metadata_expire == -1:
599                return True, None
600            expiration = self._repo.getExpiresIn()
601            if self._repo.isExpired():
602                expiration = min(0, expiration)
603            return True, expiration
604        return False, 0
605
606    def _set_key_import(self, key_import):
607        self._key_import = key_import
608
609    def set_progress_bar(self, progress):
610        # :api
611        self._md_pload.progress = progress
612
613    def get_http_headers(self):
614        # :api
615        """Returns user defined http headers.
616
617        Returns
618        -------
619        headers : tuple of strings
620        """
621        return self._repo.getHttpHeaders()
622
623    def set_http_headers(self, headers):
624        # :api
625        """Sets http headers.
626
627        Sets new http headers and rewrites existing ones.
628
629        Parameters
630        ----------
631        headers : tuple or list of strings
632            Example: set_http_headers(["User-Agent: Agent007", "MyFieldName: MyFieldValue"])
633        """
634        self._repo.setHttpHeaders(headers)
635
636    def remote_location(self, location, schemes=('http', 'ftp', 'file', 'https')):
637        """
638        :param location: relative location inside the repo
639        :param schemes: list of allowed protocols. Default is ('http', 'ftp', 'file', 'https')
640        :return: absolute url (string) or None
641        """
642        def schemes_filter(url_list):
643            for url in url_list:
644                if schemes:
645                    s = dnf.pycomp.urlparse.urlparse(url)[0]
646                    if s in schemes:
647                        return os.path.join(url, location.lstrip('/'))
648                else:
649                    return os.path.join(url, location.lstrip('/'))
650            return None
651
652        if not location:
653            return None
654
655        mirrors = self._repo.getMirrors()
656        if mirrors:
657            return schemes_filter(mirrors)
658        elif self.baseurl:
659            return schemes_filter(self.baseurl)
660