1from AnyQt.QtWidgets import QApplication, QFormLayout 2 3from Orange.widgets import gui 4from Orange.widgets import settings 5from orangecontrib.text.corpus import Corpus 6from orangecontrib.text.vectorization import SimhashVectorizer 7from orangecontrib.text.widgets.utils import owbasevectorizer 8 9 10class OWSimhash(owbasevectorizer.OWBaseVectorizer): 11 name = 'Similarity Hashing' 12 description = 'Computes documents hashes.' 13 icon = 'icons/Simhash.svg' 14 priority = 310 15 keywords = ["SimHash"] 16 17 Method = SimhashVectorizer 18 19 f = settings.Setting(64) 20 shingle_len = settings.Setting(10) 21 22 def create_configuration_layout(self): 23 layout = QFormLayout() 24 25 spin = gui.spin(self, self, 'f', minv=1, 26 maxv=SimhashVectorizer.max_f) 27 spin.editingFinished.connect(self.on_change) 28 layout.addRow('Simhash size:', spin) 29 30 spin = gui.spin(self, self, 'shingle_len', minv=1, maxv=100) 31 spin.editingFinished.connect(self.on_change) 32 layout.addRow('Shingle length:', spin) 33 return layout 34 35 def update_method(self): 36 self.method = self.Method(shingle_len=self.shingle_len, 37 f=self.f) 38 39 40if __name__ == '__main__': 41 app = QApplication([]) 42 widget = OWSimhash() 43 widget.show() 44 corpus = Corpus.from_file('book-excerpts') 45 widget.set_data(corpus) 46 app.exec() 47