1
2import re
3from itertools import chain
4
5import numpy as np
6from scipy.sparse import issparse
7
8from AnyQt.QtCore import Qt, QItemSelection, QItemSelectionModel
9from AnyQt.QtWidgets import QTreeWidget, QTreeWidgetItem, qApp
10
11from Orange.data import Table
12from Orange.widgets import widget, gui, settings
13from Orange.widgets.widget import Input, Output
14from Orange.widgets.utils.widgetpreview import WidgetPreview
15
16from orangecontrib.associate.fpgrowth import frequent_itemsets, OneHot
17
18
19class OWItemsets(widget.OWWidget):
20    name = 'Frequent Itemsets'
21    description = 'Explore sets of items that frequently appear together.'
22    icon = 'icons/FrequentItemsets.svg'
23    priority = 10
24
25    class Inputs:
26        data = Input("Data", Table)
27
28    class Outputs:
29        matching_data = Output("Matching Data", Table)
30
31    class Error(widget.OWWidget.Error):
32        need_discrete_data = widget.Msg("Need some discrete data to work with.")
33        no_disc_features = widget.Msg("Discrete features required but data has none.")
34
35    class Warning(widget.OWWidget.Warning):
36        cont_attrs = widget.Msg("Data has continuous attributes which will be skipped.")
37        err_reg_expression = widget.Msg("Error in regular expression: {}")
38
39    minSupport = settings.Setting(30)
40    maxItemsets = settings.Setting(10000)
41    filterSearch = settings.Setting(True)
42    autoFind = settings.Setting(False)
43    autoSend = settings.Setting(True)
44    filterKeywords = settings.Setting('')
45    filterMinItems = settings.Setting(1)
46    filterMaxItems = settings.Setting(10000)
47
48    UserAdviceMessages = [
49        widget.Message('Itemset are listed in item-sorted order, i.e. '
50                       'an itemset containing A and B is only listed once, as '
51                       'A > B (and not also B > A).',
52                       'itemsets-order', widget.Message.Warning),
53        widget.Message('To select all the itemsets that are descendants of '
54                       '(include) some item X (i.e. the whole subtree), you '
55                       'can fold the subtree at that item and then select it.',
56                       'itemsets-order', widget.Message.Information)
57    ]
58
59    support_options = \
60        [.0001, .0005, .001, .005, .01, .05, .1, .5] \
61        + list(chain(range(1, 10), range(10, 101, 5)))
62
63    def __init__(self):
64        self.data = None
65        self.output = None
66        self._is_running = False
67        self.isRegexMatch = lambda x: True
68        self.tree = QTreeWidget(self.mainArea,
69                                columnCount=2,
70                                allColumnsShowFocus=True,
71                                alternatingRowColors=True,
72                                selectionMode=QTreeWidget.ExtendedSelection,
73                                uniformRowHeights=True)
74        self.tree.setHeaderLabels(["Itemsets", "Support", "%"])
75        self.tree.header().setStretchLastSection(True)
76        self.tree.itemSelectionChanged.connect(self.selectionChanged)
77        self.mainArea.layout().addWidget(self.tree)
78
79        box = gui.widgetBox(self.controlArea, "Info")
80        self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = ''
81        gui.label(box, self, "Number of itemsets: %(nItemsets)s")
82        gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s")
83        gui.label(box, self, "Selected examples: %(nSelectedExamples)s")
84        hbox = gui.widgetBox(box, orientation='horizontal')
85        gui.button(hbox, self, "Expand all", callback=self.tree.expandAll)
86        gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll)
87
88        box = gui.widgetBox(self.controlArea, 'Find itemsets')
89        gui.valueSlider(
90            box, self, 'minSupport',
91            values=self.support_options,
92            label='Minimal support:', labelFormat="%g%%",
93            callback=lambda: self.find_itemsets())
94        gui.hSlider(box, self, 'maxItemsets', minValue=10000, maxValue=100000, step=10000,
95                    label='Max. number of itemsets:', labelFormat="%d",
96                    callback=lambda: self.find_itemsets())
97        self.button = gui.auto_commit(
98            box, self, 'autoFind', 'Find Itemsets', commit=self.find_itemsets,
99            callback=lambda: self.autoFind and self.find_itemsets())
100
101        box = gui.widgetBox(self.controlArea, 'Filter itemsets')
102        gui.lineEdit(box, self, 'filterKeywords', 'Contains:',
103                     callback=self.filter_change, orientation='horizontal',
104                     tooltip='A comma or space-separated list of regular '
105                             'expressions.')
106        hbox = gui.widgetBox(box, orientation='horizontal')
107        gui.spin(hbox, self, 'filterMinItems', 1, 998, label='Min. items:',
108                 callback=self.filter_change)
109        gui.spin(hbox, self, 'filterMaxItems', 2, 999, label='Max. items:',
110                 callback=self.filter_change)
111        gui.checkBox(box, self, 'filterSearch',
112                     label='Apply these filters in search',
113                     tooltip='If checked, the itemsets are filtered according '
114                             'to these filter conditions already in the search '
115                             'phase. \nIf unchecked, the only filters applied '
116                             'during search are the ones above, '
117                             'and the itemsets are \nfiltered afterwards only for '
118                             'display, i.e. only the matching itemsets are shown.')
119
120        gui.rubber(hbox)
121
122        gui.rubber(self.controlArea)
123        gui.auto_commit(self.controlArea, self, 'autoSend', 'Send selection')
124
125        self.filter_change()
126
127    ITEM_DATA_ROLE = Qt.UserRole + 1
128
129    def selectionChanged(self):
130        X = self.X
131        mapping = self.onehot_mapping
132        instances = set()
133        where = np.where
134
135        def whole_subtree(node):
136            yield node
137            for i in range(node.childCount()):
138                yield from whole_subtree(node.child(i))
139
140        def itemset(node):
141            while node:
142                yield node.data(0, self.ITEM_DATA_ROLE)
143                node = node.parent()
144
145        def selection_ranges(node):
146            n_children = node.childCount()
147            if n_children:
148                yield (self.tree.indexFromItem(node.child(0)),
149                       self.tree.indexFromItem(node.child(n_children - 1)))
150            for i in range(n_children):
151                yield from selection_ranges(node.child(i))
152
153        nSelectedItemsets = 0
154        item_selection = QItemSelection()
155        for node in self.tree.selectedItems():
156            nodes = (node,) if node.isExpanded() else whole_subtree(node)
157            if not node.isExpanded():
158                for srange in selection_ranges(node):
159                    item_selection.select(*srange)
160            for node in nodes:
161                nSelectedItemsets += 1
162                cols, vals = zip(*(mapping[i] for i in itemset(node)))
163                if issparse(X):
164                    rows = (len(cols) == np.bincount((X[:, cols] != 0).indices,
165                                                     minlength=X.shape[0])).nonzero()[0]
166                else:
167                    rows = where((X[:, cols] == vals).all(axis=1))[0]
168                instances.update(rows)
169        self.tree.itemSelectionChanged.disconnect(self.selectionChanged)
170        self.tree.selectionModel().select(item_selection,
171                                          QItemSelectionModel.Select | QItemSelectionModel.Rows)
172        self.tree.itemSelectionChanged.connect(self.selectionChanged)
173
174        self.nSelectedExamples = len(instances)
175        self.nSelectedItemsets = nSelectedItemsets
176        self.output = self.data[sorted(instances)] or None
177        self.commit()
178
179    def commit(self):
180        self.Outputs.matching_data.send(self.output)
181
182    def filter_change(self):
183        self.Warning.err_reg_expression.clear()
184        try:
185            isRegexMatch = self.isRegexMatch = re.compile(
186                '|'.join(i.strip()
187                         for i in re.split('(,|\s)+', self.filterKeywords.strip())
188                         if i.strip()), re.IGNORECASE).search
189        except Exception as e:
190            self.Warning.err_reg_expression(e.args[0])
191            isRegexMatch = self.isRegexMatch = lambda x: True
192
193        def hide(node, depth, has_kw):
194            if not has_kw:
195                has_kw = isRegexMatch(node.text(0))
196            hidden = (sum(hide(node.child(i), depth + 1, has_kw)
197                          for i in range(node.childCount())) == node.childCount()
198                      if node.childCount() else
199                      (not has_kw or
200                       not self.filterMinItems <= depth <= self.filterMaxItems))
201            node.setHidden(hidden)
202            return hidden
203
204        hide(self.tree.invisibleRootItem(), 0, False)
205
206    class TreeWidgetItem(QTreeWidgetItem):
207        def data(self, column, role):
208            """Construct lazy tooltips"""
209            if role != Qt.ToolTipRole:
210                return super().data(column, role)
211            tooltip = []
212            while self:
213                tooltip.append(self.text(0))
214                self = self.parent()
215            return '\n'.join(reversed(tooltip))
216
217    def find_itemsets(self):
218        if self.data is None or not len(self.data):
219            return
220        if self._is_running:
221            self._is_running = False
222            return
223        self._is_running = True
224
225        self.button.button.setText('Cancel')
226
227        data = self.data
228        self.tree.clear()
229        self.tree.setUpdatesEnabled(False)
230        self.tree.blockSignals(True)
231
232        class ItemDict(dict):
233            def __init__(self, item):
234                self.item = item
235
236        top = ItemDict(self.tree.invisibleRootItem())
237        X, mapping = OneHot.encode(data)
238        self.Error.need_discrete_data.clear()
239        if X is None:
240            self.Error.need_discrete_data()
241
242        self.onehot_mapping = mapping
243        ITEM_FMT = '{}' if issparse(data.X) else '{}={}'
244        names = {item: ITEM_FMT.format(var.name, val)
245                 for item, var, val in OneHot.decode(mapping.keys(), data, mapping)}
246        nItemsets = 0
247
248        filterSearch = self.filterSearch
249        filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems
250        isRegexMatch = self.isRegexMatch
251
252        # Find itemsets and populate the TreeView
253        with self.progressBar(self.maxItemsets + 1) as progress:
254            for itemset, support in frequent_itemsets(X, self.minSupport / 100):
255
256                if filterSearch and not filterMinItems <= len(itemset) <= filterMaxItems:
257                    continue
258
259                parent = top
260                first_new_item = None
261                itemset_matches_filter = False
262
263                for item in sorted(itemset):
264                    name = names[item]
265
266                    if filterSearch and not itemset_matches_filter:
267                        itemset_matches_filter = isRegexMatch(name)
268
269                    child = parent.get(name)
270                    if child is None:
271                        try:
272                            wi = self.TreeWidgetItem(
273                                parent.item,
274                                [name, str(support), '{:.4g}'.format(100 * support / len(data))])
275                        except RuntimeError:
276                            # FIXME: When autoFind was in effect and the support
277                            # slider was moved, this line excepted with:
278                            #     RuntimeError: wrapped C/C++ object of type
279                            #                   TreeWidgetItem has been deleted
280                            return
281                        wi.setData(0, self.ITEM_DATA_ROLE, item)
282                        child = parent[name] = ItemDict(wi)
283
284                        if first_new_item is None:
285                            first_new_item = (parent, name)
286                    parent = child
287
288                if filterSearch and not itemset_matches_filter:
289                    parent, name = first_new_item
290                    parent.item.removeChild(parent[name].item)
291                    del parent[name].item
292                    del parent[name]
293                else:
294                    nItemsets += 1
295                    progress.advance()
296
297                if not self._is_running or nItemsets >= self.maxItemsets:
298                    break
299
300                qApp.processEvents()
301
302        if not filterSearch:
303            self.filter_change()
304        self.nItemsets = nItemsets
305        self.nSelectedItemsets = 0
306        self.nSelectedExamples = 0
307        self.tree.expandAll()
308        for i in range(self.tree.columnCount()):
309            self.tree.resizeColumnToContents(i)
310        self.tree.setUpdatesEnabled(True)
311        self.tree.blockSignals(False)
312        self._is_running = False
313        self.button.button.setText('Find Itemsets')
314
315    @Inputs.data
316    def set_data(self, data):
317        self.data = data
318        is_error = False
319        if data is not None:
320            self.Warning.cont_attrs.clear()
321            self.Error.no_disc_features.clear()
322            self.button.setDisabled(False)
323            self.X = data.X
324            if issparse(data.X):
325                self.X = data.X.tocsc()
326            else:
327                if not data.domain.has_discrete_attributes():
328                    self.Error.no_disc_features()
329                    is_error = True
330                    self.button.setDisabled(True)
331                elif data.domain.has_continuous_attributes():
332                    self.Warning.cont_attrs()
333        else:
334            self.output = None
335            self.commit()
336        if self.autoFind and not is_error:
337            self.find_itemsets()
338
339    @classmethod
340    def migrate_settings(cls, settings, _):
341        x = settings.get("minSupport", 1)
342        settings["minSupport"] = min(cls.support_options, key=lambda t: abs(t - x))
343
344
345if __name__ == "__main__":  # pragma: no cover
346    WidgetPreview(OWItemsets).run(Table("zoo"))
347