1# match_counter.py
2# Implements class MatchCounter.
3#
4# Copyright (C) 2012-2016 Red Hat, Inc.
5#
6# This copyrighted material is made available to anyone wishing to use,
7# modify, copy, or redistribute it subject to the terms and conditions of
8# the GNU General Public License v.2, or (at your option) any later version.
9# This program is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY expressed or implied, including the implied warranties of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
12# Public License for more details.  You should have received a copy of the
13# GNU General Public License along with this program; if not, write to the
14# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15# 02110-1301, USA.  Any Red Hat trademarks that are incorporated in the
16# source code or documentation are not subject to the GNU General Public
17# License and may only be used or replicated with the express permission of
18# Red Hat, Inc.
19#
20
21from __future__ import absolute_import
22from __future__ import print_function
23from __future__ import unicode_literals
24from functools import reduce
25
26WEIGHTS = {
27    'name'		: 7,
28    'summary'		: 4,
29    'description'	: 2,
30    'url'		: 1,
31    }
32
33
34def _canonize_string_set(sset, length):
35    """ Ordered sset with empty strings prepended. """
36    current = len(sset)
37    l = [''] * (length - current) + sorted(sset)
38    return l
39
40
41class MatchCounter(dict):
42    """Map packages to which of their attributes matched in a search against
43    what values.
44
45    The mapping is: ``package -> [(key, needle), ... ]``.
46
47    """
48
49    @staticmethod
50    def _eval_weights(pkg, matches):
51        # how much is each match worth and return their sum:
52        def weight(match):
53            key = match[0]
54            needle = match[1]
55            haystack = getattr(pkg, key)
56            if key == "name" and haystack == needle:
57                # if package matches exactly by name, increase weight
58                return 2 * WEIGHTS[key]
59            return WEIGHTS[key]
60
61        return sum(map(weight, matches))
62
63    def _key_func(self):
64        """Get the key function used for sorting matches.
65
66        It is not enough to only look at the matches and order them by the sum
67        of their weighted hits. In case this number is the same we have to
68        ensure that the same matched needles are next to each other in the
69        result.
70
71        Returned function is:
72        pkg -> (weights_sum, canonized_needles_set, -distance)
73
74        """
75        def get_key(pkg):
76            return (
77                # use negative value to make sure packages with the highest weight come first
78                - self._eval_weights(pkg, self[pkg]),
79                # then order packages alphabetically
80                pkg.name,
81            )
82        return get_key
83
84    def _max_needles(self):
85        """Return the max count of needles of all packages."""
86        if self:
87            return max(len(self.matched_needles(pkg)) for pkg in self)
88        return 0
89
90    def add(self, pkg, key, needle):
91        self.setdefault(pkg, []).append((key, needle))
92
93    def dump(self):
94        for pkg in self:
95            print('%s\t%s' % (pkg, self[pkg]))
96
97    def matched_haystacks(self, pkg):
98        return set(getattr(pkg, m[0]) for m in self[pkg])
99
100    def matched_keys(self, pkg):
101        # return keys in the same order they appear in the list
102        result = []
103        for i in self[pkg]:
104            if i[0] in result:
105                continue
106            result.append(i[0])
107        return result
108
109    def matched_needles(self, pkg):
110        return set(m[1] for m in self[pkg])
111
112    def sorted(self, reverse=False, limit_to=None):
113        keys = limit_to if limit_to else self.keys()
114        return sorted(keys, key=self._key_func())
115
116    def total(self):
117        return reduce(lambda total, pkg: total + len(self[pkg]), self, 0)
118