1from AnyQt.QtWidgets import QApplication, QFormLayout
2
3from Orange.widgets import gui
4from Orange.widgets import settings
5from orangecontrib.text.corpus import Corpus
6from orangecontrib.text.vectorization import SimhashVectorizer
7from orangecontrib.text.widgets.utils import owbasevectorizer
8
9
10class OWSimhash(owbasevectorizer.OWBaseVectorizer):
11    name = 'Similarity Hashing'
12    description = 'Computes documents hashes.'
13    icon = 'icons/Simhash.svg'
14    priority = 310
15    keywords = ["SimHash"]
16
17    Method = SimhashVectorizer
18
19    f = settings.Setting(64)
20    shingle_len = settings.Setting(10)
21
22    def create_configuration_layout(self):
23        layout = QFormLayout()
24
25        spin = gui.spin(self, self, 'f', minv=1,
26                        maxv=SimhashVectorizer.max_f)
27        spin.editingFinished.connect(self.on_change)
28        layout.addRow('Simhash size:', spin)
29
30        spin = gui.spin(self, self, 'shingle_len', minv=1, maxv=100)
31        spin.editingFinished.connect(self.on_change)
32        layout.addRow('Shingle length:', spin)
33        return layout
34
35    def update_method(self):
36        self.method = self.Method(shingle_len=self.shingle_len,
37                                  f=self.f)
38
39
40if __name__ == '__main__':
41    app = QApplication([])
42    widget = OWSimhash()
43    widget.show()
44    corpus = Corpus.from_file('book-excerpts')
45    widget.set_data(corpus)
46    app.exec()
47