1 2import re 3from itertools import chain 4 5import numpy as np 6from scipy.sparse import issparse 7 8from AnyQt.QtCore import Qt, QItemSelection, QItemSelectionModel 9from AnyQt.QtWidgets import QTreeWidget, QTreeWidgetItem, qApp 10 11from Orange.data import Table 12from Orange.widgets import widget, gui, settings 13from Orange.widgets.widget import Input, Output 14from Orange.widgets.utils.widgetpreview import WidgetPreview 15 16from orangecontrib.associate.fpgrowth import frequent_itemsets, OneHot 17 18 19class OWItemsets(widget.OWWidget): 20 name = 'Frequent Itemsets' 21 description = 'Explore sets of items that frequently appear together.' 22 icon = 'icons/FrequentItemsets.svg' 23 priority = 10 24 25 class Inputs: 26 data = Input("Data", Table) 27 28 class Outputs: 29 matching_data = Output("Matching Data", Table) 30 31 class Error(widget.OWWidget.Error): 32 need_discrete_data = widget.Msg("Need some discrete data to work with.") 33 no_disc_features = widget.Msg("Discrete features required but data has none.") 34 35 class Warning(widget.OWWidget.Warning): 36 cont_attrs = widget.Msg("Data has continuous attributes which will be skipped.") 37 err_reg_expression = widget.Msg("Error in regular expression: {}") 38 39 minSupport = settings.Setting(30) 40 maxItemsets = settings.Setting(10000) 41 filterSearch = settings.Setting(True) 42 autoFind = settings.Setting(False) 43 autoSend = settings.Setting(True) 44 filterKeywords = settings.Setting('') 45 filterMinItems = settings.Setting(1) 46 filterMaxItems = settings.Setting(10000) 47 48 UserAdviceMessages = [ 49 widget.Message('Itemset are listed in item-sorted order, i.e. ' 50 'an itemset containing A and B is only listed once, as ' 51 'A > B (and not also B > A).', 52 'itemsets-order', widget.Message.Warning), 53 widget.Message('To select all the itemsets that are descendants of ' 54 '(include) some item X (i.e. the whole subtree), you ' 55 'can fold the subtree at that item and then select it.', 56 'itemsets-order', widget.Message.Information) 57 ] 58 59 support_options = \ 60 [.0001, .0005, .001, .005, .01, .05, .1, .5] \ 61 + list(chain(range(1, 10), range(10, 101, 5))) 62 63 def __init__(self): 64 self.data = None 65 self.output = None 66 self._is_running = False 67 self.isRegexMatch = lambda x: True 68 self.tree = QTreeWidget(self.mainArea, 69 columnCount=2, 70 allColumnsShowFocus=True, 71 alternatingRowColors=True, 72 selectionMode=QTreeWidget.ExtendedSelection, 73 uniformRowHeights=True) 74 self.tree.setHeaderLabels(["Itemsets", "Support", "%"]) 75 self.tree.header().setStretchLastSection(True) 76 self.tree.itemSelectionChanged.connect(self.selectionChanged) 77 self.mainArea.layout().addWidget(self.tree) 78 79 box = gui.widgetBox(self.controlArea, "Info") 80 self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = '' 81 gui.label(box, self, "Number of itemsets: %(nItemsets)s") 82 gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s") 83 gui.label(box, self, "Selected examples: %(nSelectedExamples)s") 84 hbox = gui.widgetBox(box, orientation='horizontal') 85 gui.button(hbox, self, "Expand all", callback=self.tree.expandAll) 86 gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll) 87 88 box = gui.widgetBox(self.controlArea, 'Find itemsets') 89 gui.valueSlider( 90 box, self, 'minSupport', 91 values=self.support_options, 92 label='Minimal support:', labelFormat="%g%%", 93 callback=lambda: self.find_itemsets()) 94 gui.hSlider(box, self, 'maxItemsets', minValue=10000, maxValue=100000, step=10000, 95 label='Max. number of itemsets:', labelFormat="%d", 96 callback=lambda: self.find_itemsets()) 97 self.button = gui.auto_commit( 98 box, self, 'autoFind', 'Find Itemsets', commit=self.find_itemsets, 99 callback=lambda: self.autoFind and self.find_itemsets()) 100 101 box = gui.widgetBox(self.controlArea, 'Filter itemsets') 102 gui.lineEdit(box, self, 'filterKeywords', 'Contains:', 103 callback=self.filter_change, orientation='horizontal', 104 tooltip='A comma or space-separated list of regular ' 105 'expressions.') 106 hbox = gui.widgetBox(box, orientation='horizontal') 107 gui.spin(hbox, self, 'filterMinItems', 1, 998, label='Min. items:', 108 callback=self.filter_change) 109 gui.spin(hbox, self, 'filterMaxItems', 2, 999, label='Max. items:', 110 callback=self.filter_change) 111 gui.checkBox(box, self, 'filterSearch', 112 label='Apply these filters in search', 113 tooltip='If checked, the itemsets are filtered according ' 114 'to these filter conditions already in the search ' 115 'phase. \nIf unchecked, the only filters applied ' 116 'during search are the ones above, ' 117 'and the itemsets are \nfiltered afterwards only for ' 118 'display, i.e. only the matching itemsets are shown.') 119 120 gui.rubber(hbox) 121 122 gui.rubber(self.controlArea) 123 gui.auto_commit(self.controlArea, self, 'autoSend', 'Send selection') 124 125 self.filter_change() 126 127 ITEM_DATA_ROLE = Qt.UserRole + 1 128 129 def selectionChanged(self): 130 X = self.X 131 mapping = self.onehot_mapping 132 instances = set() 133 where = np.where 134 135 def whole_subtree(node): 136 yield node 137 for i in range(node.childCount()): 138 yield from whole_subtree(node.child(i)) 139 140 def itemset(node): 141 while node: 142 yield node.data(0, self.ITEM_DATA_ROLE) 143 node = node.parent() 144 145 def selection_ranges(node): 146 n_children = node.childCount() 147 if n_children: 148 yield (self.tree.indexFromItem(node.child(0)), 149 self.tree.indexFromItem(node.child(n_children - 1))) 150 for i in range(n_children): 151 yield from selection_ranges(node.child(i)) 152 153 nSelectedItemsets = 0 154 item_selection = QItemSelection() 155 for node in self.tree.selectedItems(): 156 nodes = (node,) if node.isExpanded() else whole_subtree(node) 157 if not node.isExpanded(): 158 for srange in selection_ranges(node): 159 item_selection.select(*srange) 160 for node in nodes: 161 nSelectedItemsets += 1 162 cols, vals = zip(*(mapping[i] for i in itemset(node))) 163 if issparse(X): 164 rows = (len(cols) == np.bincount((X[:, cols] != 0).indices, 165 minlength=X.shape[0])).nonzero()[0] 166 else: 167 rows = where((X[:, cols] == vals).all(axis=1))[0] 168 instances.update(rows) 169 self.tree.itemSelectionChanged.disconnect(self.selectionChanged) 170 self.tree.selectionModel().select(item_selection, 171 QItemSelectionModel.Select | QItemSelectionModel.Rows) 172 self.tree.itemSelectionChanged.connect(self.selectionChanged) 173 174 self.nSelectedExamples = len(instances) 175 self.nSelectedItemsets = nSelectedItemsets 176 self.output = self.data[sorted(instances)] or None 177 self.commit() 178 179 def commit(self): 180 self.Outputs.matching_data.send(self.output) 181 182 def filter_change(self): 183 self.Warning.err_reg_expression.clear() 184 try: 185 isRegexMatch = self.isRegexMatch = re.compile( 186 '|'.join(i.strip() 187 for i in re.split('(,|\s)+', self.filterKeywords.strip()) 188 if i.strip()), re.IGNORECASE).search 189 except Exception as e: 190 self.Warning.err_reg_expression(e.args[0]) 191 isRegexMatch = self.isRegexMatch = lambda x: True 192 193 def hide(node, depth, has_kw): 194 if not has_kw: 195 has_kw = isRegexMatch(node.text(0)) 196 hidden = (sum(hide(node.child(i), depth + 1, has_kw) 197 for i in range(node.childCount())) == node.childCount() 198 if node.childCount() else 199 (not has_kw or 200 not self.filterMinItems <= depth <= self.filterMaxItems)) 201 node.setHidden(hidden) 202 return hidden 203 204 hide(self.tree.invisibleRootItem(), 0, False) 205 206 class TreeWidgetItem(QTreeWidgetItem): 207 def data(self, column, role): 208 """Construct lazy tooltips""" 209 if role != Qt.ToolTipRole: 210 return super().data(column, role) 211 tooltip = [] 212 while self: 213 tooltip.append(self.text(0)) 214 self = self.parent() 215 return '\n'.join(reversed(tooltip)) 216 217 def find_itemsets(self): 218 if self.data is None or not len(self.data): 219 return 220 if self._is_running: 221 self._is_running = False 222 return 223 self._is_running = True 224 225 self.button.button.setText('Cancel') 226 227 data = self.data 228 self.tree.clear() 229 self.tree.setUpdatesEnabled(False) 230 self.tree.blockSignals(True) 231 232 class ItemDict(dict): 233 def __init__(self, item): 234 self.item = item 235 236 top = ItemDict(self.tree.invisibleRootItem()) 237 X, mapping = OneHot.encode(data) 238 self.Error.need_discrete_data.clear() 239 if X is None: 240 self.Error.need_discrete_data() 241 242 self.onehot_mapping = mapping 243 ITEM_FMT = '{}' if issparse(data.X) else '{}={}' 244 names = {item: ITEM_FMT.format(var.name, val) 245 for item, var, val in OneHot.decode(mapping.keys(), data, mapping)} 246 nItemsets = 0 247 248 filterSearch = self.filterSearch 249 filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems 250 isRegexMatch = self.isRegexMatch 251 252 # Find itemsets and populate the TreeView 253 with self.progressBar(self.maxItemsets + 1) as progress: 254 for itemset, support in frequent_itemsets(X, self.minSupport / 100): 255 256 if filterSearch and not filterMinItems <= len(itemset) <= filterMaxItems: 257 continue 258 259 parent = top 260 first_new_item = None 261 itemset_matches_filter = False 262 263 for item in sorted(itemset): 264 name = names[item] 265 266 if filterSearch and not itemset_matches_filter: 267 itemset_matches_filter = isRegexMatch(name) 268 269 child = parent.get(name) 270 if child is None: 271 try: 272 wi = self.TreeWidgetItem( 273 parent.item, 274 [name, str(support), '{:.4g}'.format(100 * support / len(data))]) 275 except RuntimeError: 276 # FIXME: When autoFind was in effect and the support 277 # slider was moved, this line excepted with: 278 # RuntimeError: wrapped C/C++ object of type 279 # TreeWidgetItem has been deleted 280 return 281 wi.setData(0, self.ITEM_DATA_ROLE, item) 282 child = parent[name] = ItemDict(wi) 283 284 if first_new_item is None: 285 first_new_item = (parent, name) 286 parent = child 287 288 if filterSearch and not itemset_matches_filter: 289 parent, name = first_new_item 290 parent.item.removeChild(parent[name].item) 291 del parent[name].item 292 del parent[name] 293 else: 294 nItemsets += 1 295 progress.advance() 296 297 if not self._is_running or nItemsets >= self.maxItemsets: 298 break 299 300 qApp.processEvents() 301 302 if not filterSearch: 303 self.filter_change() 304 self.nItemsets = nItemsets 305 self.nSelectedItemsets = 0 306 self.nSelectedExamples = 0 307 self.tree.expandAll() 308 for i in range(self.tree.columnCount()): 309 self.tree.resizeColumnToContents(i) 310 self.tree.setUpdatesEnabled(True) 311 self.tree.blockSignals(False) 312 self._is_running = False 313 self.button.button.setText('Find Itemsets') 314 315 @Inputs.data 316 def set_data(self, data): 317 self.data = data 318 is_error = False 319 if data is not None: 320 self.Warning.cont_attrs.clear() 321 self.Error.no_disc_features.clear() 322 self.button.setDisabled(False) 323 self.X = data.X 324 if issparse(data.X): 325 self.X = data.X.tocsc() 326 else: 327 if not data.domain.has_discrete_attributes(): 328 self.Error.no_disc_features() 329 is_error = True 330 self.button.setDisabled(True) 331 elif data.domain.has_continuous_attributes(): 332 self.Warning.cont_attrs() 333 else: 334 self.output = None 335 self.commit() 336 if self.autoFind and not is_error: 337 self.find_itemsets() 338 339 @classmethod 340 def migrate_settings(cls, settings, _): 341 x = settings.get("minSupport", 1) 342 settings["minSupport"] = min(cls.support_options, key=lambda t: abs(t - x)) 343 344 345if __name__ == "__main__": # pragma: no cover 346 WidgetPreview(OWItemsets).run(Table("zoo")) 347