1import os
2import re
3from datetime import date
4
5from AnyQt.QtCore import QDate, Qt
6from AnyQt.QtWidgets import (QApplication, QComboBox, QDateEdit, QTextEdit,
7                             QFrame, QDialog, QCalendarWidget, QVBoxLayout,
8                             QFormLayout)
9
10from Orange.widgets import gui
11from Orange.widgets.credentials import CredentialManager
12from Orange.widgets.settings import Setting
13from Orange.widgets.widget import OWWidget, Msg
14from orangecontrib.text.corpus import Corpus
15from orangecontrib.text.pubmed import (
16    Pubmed, PUBMED_TEXT_FIELDS
17)
18
19
20def _i(name, icon_path='icons'):
21    widget_path = os.path.dirname(os.path.abspath(__file__))
22    return os.path.join(widget_path, icon_path, name)
23
24
25EMAIL_REGEX = re.compile(r"[^@]+@[^@]+\.[^@]+")
26
27
28def validate_email(email):
29    return EMAIL_REGEX.match(email)
30
31
32class Output:
33    CORPUS = 'Corpus'
34
35
36class OWPubmed(OWWidget):
37    class EmailCredentialsDialog(OWWidget):
38        name = "Pubmed Email"
39        want_main_area = False
40        resizing_enabled = False
41        email_manager = CredentialManager('Email')
42        email_input = ''
43
44        class Error(OWWidget.Error):
45            invalid_credentials = Msg('This email is invalid.')
46
47        def __init__(self, parent):
48            super().__init__()
49            self.parent = parent
50            self.api = None
51
52            form = QFormLayout()
53            form.setContentsMargins(5, 5, 5, 5)
54            self.email_edit = gui.lineEdit(
55                self, self, 'email_input', controlWidth=400)
56            form.addRow('Email:', self.email_edit)
57            self.controlArea.layout().addLayout(form)
58            self.submit_button = gui.button(
59                self.controlArea, self, "OK", self.accept)
60
61            self.load_credentials()
62
63        def setVisible(self, visible):
64            super().setVisible(visible)
65            self.email_edit.setFocus()
66
67        def load_credentials(self):
68            self.email_edit.setText(self.email_manager.key)
69
70        def save_credentials(self):
71            self.email_manager.key = self.email_input
72
73        def check_credentials(self):
74            if validate_email(self.email_input):
75                self.save_credentials()
76                return True
77            else:
78                return False
79
80        def accept(self, silent=False):
81            if not silent:
82                self.Error.invalid_credentials.clear()
83            valid = self.check_credentials()
84            if valid:
85                self.parent.sync_email(self.email_input)
86                super().accept()
87            else:
88                self.Error.invalid_credentials()
89
90    name = 'Pubmed'
91    description = 'Fetch data from Pubmed.'
92    icon = 'icons/Pubmed.svg'
93    priority = 140
94
95    outputs = [(Output.CORPUS, Corpus)]
96    want_main_area = False
97    resizing_enabled = False
98
99    QT_DATE_FORMAT = 'yyyy-MM-dd'
100    PY_DATE_FORMAT = '%Y-%m-%d'
101    MIN_DATE = date(1800, 1, 1)
102
103    # Settings.
104    author = Setting('')
105    pub_date_from = Setting('')
106    pub_date_to = Setting('')
107    recent_keywords = Setting([])
108    last_advanced_query = Setting('')
109    num_records = Setting(1000)
110
111    # Text includes checkboxes.
112    includes_authors = Setting(True)
113    includes_title = Setting(True)
114    includes_mesh = Setting(True)
115    includes_abstract = Setting(True)
116    includes_url = Setting(True)
117
118    email = None
119
120    class Warning(OWWidget.Warning):
121        no_query = Msg('Please specify the keywords for this query.')
122
123    class Error(OWWidget.Error):
124        api_error = Msg('API error: {}.')
125        email_error = Msg('Email not set. Pleas set it with the email button.')
126
127    def __init__(self):
128        super().__init__()
129
130        self.output_corpus = None
131        self.pubmed_api = None
132        self.progress = None
133        self.record_count = 0
134        self.download_running = False
135
136        # API key
137        self.email_dlg = self.EmailCredentialsDialog(self)
138        gui.button(self.controlArea, self, 'Email',
139                   callback=self.email_dlg.exec_,
140                   focusPolicy=Qt.NoFocus)
141        gui.separator(self.controlArea)
142
143        # To hold all the controls. Makes access easier.
144        self.pubmed_controls = []
145
146        # RECORD SEARCH
147        self.search_tabs = gui.tabWidget(self.controlArea)
148        # --- Regular search ---
149        regular_search_box = gui.widgetBox(self.controlArea, addSpace=True)
150
151        # Author
152        self.author_input = gui.lineEdit(regular_search_box, self, 'author',
153                                         'Author:', orientation=Qt.Horizontal)
154        self.pubmed_controls.append(self.author_input)
155
156        h_box = gui.hBox(regular_search_box)
157        year_box = gui.widgetBox(h_box, orientation=Qt.Horizontal)
158        min_date = QDate.fromString(
159                self.MIN_DATE.strftime(self.PY_DATE_FORMAT),
160                self.QT_DATE_FORMAT
161        )
162
163        if not self.pub_date_from:
164            self.pub_date_from = self.MIN_DATE.strftime(self.PY_DATE_FORMAT)
165        if not self.pub_date_to:
166            self.pub_date_to = date.today().strftime(self.PY_DATE_FORMAT)
167
168        self.date_from = QDateEdit(
169                QDate.fromString(self.pub_date_from, self.QT_DATE_FORMAT),
170                displayFormat=self.QT_DATE_FORMAT,
171                minimumDate=min_date,
172                calendarPopup=True
173        )
174        self.date_to = QDateEdit(
175                QDate.fromString(self.pub_date_to, self.QT_DATE_FORMAT),
176                displayFormat=self.QT_DATE_FORMAT,
177                minimumDate=min_date,
178                calendarPopup=True
179        )
180
181        self.date_from.dateChanged.connect(
182            lambda date: setattr(self, 'pub_date_from',
183                                 date.toString(self.QT_DATE_FORMAT)))
184        self.date_to.dateChanged.connect(
185            lambda date: setattr(self, 'pub_date_to',
186                                 date.toString(self.QT_DATE_FORMAT)))
187        self.pubmed_controls.append(self.date_from)
188        self.pubmed_controls.append(self.date_to)
189
190        gui.label(year_box, self, 'From:')
191        year_box.layout().addWidget(self.date_from)
192        gui.label(year_box, self, 'to:')
193        year_box.layout().addWidget(self.date_to)
194
195        # Keywords.
196        h_box = gui.hBox(regular_search_box)
197        label = gui.label(h_box, self, 'Query:')
198        label.setMaximumSize(label.sizeHint())
199        self.keyword_combo = QComboBox(h_box)
200        self.keyword_combo.setMinimumWidth(150)
201        self.keyword_combo.setEditable(True)
202        h_box.layout().addWidget(self.keyword_combo)
203        self.keyword_combo.activated[int].connect(self.select_keywords)
204        self.pubmed_controls.append(self.keyword_combo)
205
206        tab_height = regular_search_box.sizeHint()
207        regular_search_box.setMaximumSize(tab_height)
208
209        # --- Advanced search ---
210        advanced_search_box = gui.widgetBox(self.controlArea, addSpace=True)
211        # Advanced search query.
212        h_box = gui.hBox(advanced_search_box)
213        self.advanced_query_input = QTextEdit(h_box)
214        h_box.layout().addWidget(self.advanced_query_input)
215        self.advanced_query_input.setMaximumSize(tab_height)
216        self.pubmed_controls.append(self.advanced_query_input)
217
218        gui.createTabPage(self.search_tabs, 'Regular search',
219                          regular_search_box)
220        gui.createTabPage(self.search_tabs, 'Advanced search',
221                          advanced_search_box)
222
223        # Search info label.
224        self.search_info_label = gui.label(
225                self.controlArea, self,
226                'Number of records found: /')
227
228        # Search for records button.
229        self.run_search_button = gui.button(
230                self.controlArea,
231                self,
232                'Find records',
233                callback=self.run_search,
234                tooltip='Performs a search for articles that fit the '
235                        'specified parameters.')
236        self.pubmed_controls.append(self.run_search_button)
237
238        h_line = QFrame()
239        h_line.setFrameShape(QFrame.HLine)
240        h_line.setFrameShadow(QFrame.Sunken)
241        self.controlArea.layout().addWidget(h_line)
242
243        # RECORD RETRIEVAL
244        # Text includes box.
245        text_includes_box = gui.widgetBox(
246            self.controlArea, 'Text includes', addSpace=True)
247        self.authors_checkbox = gui.checkBox(
248            text_includes_box, self, 'includes_authors', 'Authors')
249        self.title_checkbox = gui.checkBox(
250            text_includes_box, self, 'includes_title', 'Article title')
251        self.mesh_checkbox = gui.checkBox(
252            text_includes_box, self, 'includes_mesh', 'Mesh headings')
253        self.abstract_checkbox = gui.checkBox(
254            text_includes_box, self, 'includes_abstract', 'Abstract')
255        self.url_checkbox = gui.checkBox(
256            text_includes_box, self, 'includes_url', 'URL')
257        self.pubmed_controls.append(self.authors_checkbox)
258        self.pubmed_controls.append(self.title_checkbox)
259        self.pubmed_controls.append(self.mesh_checkbox)
260        self.pubmed_controls.append(self.abstract_checkbox)
261        self.pubmed_controls.append(self.url_checkbox)
262
263        # Num. records.
264        h_box = gui.hBox(self.controlArea)
265        label = gui.label(h_box, self, 'Retrieve')
266        label.setMaximumSize(label.sizeHint())
267        self.num_records_input = gui.spin(h_box, self, 'num_records',
268                                          minv=1, maxv=10000)
269        self.max_records_label = gui.label(h_box, self, 'records from /.')
270        self.max_records_label.setMaximumSize(self.max_records_label
271                                              .sizeHint())
272        self.pubmed_controls.append(self.num_records_input)
273
274        # Download articles.
275        # Search for records button.
276        self.retrieve_records_button = gui.button(
277                self.controlArea,
278                self,
279                'Retrieve records',
280                callback=self.retrieve_records,
281                tooltip='Retrieves the specified documents.')
282        self.pubmed_controls.append(self.retrieve_records_button)
283
284        # Num. retrieved records info label.
285        self.retrieval_info_label = gui.label(
286                self.controlArea,
287                self,
288                'Number of records retrieved: /')
289
290        # Load the most recent queries.
291        self.set_keyword_list()
292
293    def sync_email(self, email):
294        self.Error.email_error.clear()
295        self.email = email
296
297    def run_search(self):
298        self.Error.clear()
299        self.Warning.clear()
300
301        # check if email exists
302        if self.email is None:
303            self.Error.email_error()
304            return
305
306        self.run_search_button.setEnabled(False)
307        self.retrieve_records_button.setEnabled(False)
308
309        # Check if the PubMed object is present.
310        if self.pubmed_api is None:
311            self.pubmed_api = Pubmed(
312                    email=self.email,
313                    progress_callback=self.api_progress_callback,
314                    error_callback=self.api_error_callback,
315            )
316
317        if self.search_tabs.currentIndex() == 0:
318            # Get query parameters.
319            terms = self.keyword_combo.currentText().split()
320            authors = self.author_input.text().split()
321
322            error = self.pubmed_api._search_for_records(
323                    terms, authors, self.pub_date_from, self.pub_date_to
324            )
325            if error is not None:
326                self.Error.api_error(str(error))
327                return
328
329            if self.keyword_combo.currentText() not in self.recent_keywords:
330                self.recent_keywords.insert(
331                        0,
332                        self.keyword_combo.currentText()
333                )
334        else:
335            query = self.advanced_query_input.toPlainText()
336            if not query:
337                self.Warning.no_query()
338                self.run_search_button.setEnabled(True)
339                self.retrieve_records_button.setEnabled(True)
340                return
341            error = self.pubmed_api._search_for_records(advanced_query=query)
342
343            if error is not None:
344                self.Error.api_error(str(error))
345                return
346
347            self.last_advanced_query = query
348
349        self.enable_controls()
350        self.update_search_info()
351
352    def enable_controls(self):
353        # Enable/disable controls accordingly.
354        self.run_search_button.setEnabled(True)
355        enabled = self.pubmed_api is not None and \
356            not self.pubmed_api.search_record_count == 0
357        self.retrieve_records_button.setEnabled(enabled)
358
359    def retrieve_records(self):
360        self.Warning.clear()
361        self.Error.clear()
362
363        if self.pubmed_api is None:
364            return
365
366        if self.download_running:
367            self.download_running = False
368            self.retrieve_records_button.setText('Retrieve records')
369            self.pubmed_api.stop_retrieving()
370            return
371
372        self.download_running = True
373        self.output_corpus = None  # Clear the old records.
374
375        # Change the button label.
376        self.retrieve_records_button.setText('Stop retrieving')
377
378        # Text fields.
379        text_includes_params = [
380            self.includes_authors,
381            self.includes_title,
382            self.includes_mesh,
383            self.includes_abstract,
384            self.includes_url,
385            True,  # Publication date field; included always.
386        ]
387        required_text_fields = [
388            field
389            for field_name, field
390            in zip(text_includes_params, PUBMED_TEXT_FIELDS)
391            if field_name
392        ]
393
394        batch_size = min(Pubmed.MAX_BATCH_SIZE, self.num_records) + 1
395        with self.progressBar(self.num_records/batch_size) as progress:
396            self.progress = progress
397            self.output_corpus = self.pubmed_api._retrieve_records(
398                    self.num_records,
399                    required_text_fields
400            )
401        self.retrieve_records_button.setText('Retrieve records')
402        self.download_running = False
403
404        self.send(Output.CORPUS, self.output_corpus)
405        self.update_retrieval_info()
406        self.run_search_button.setEnabled(True)
407
408    def api_progress_callback(self, start_at=None):
409        if start_at is not None:
410            self.progress.count = start_at
411        else:
412            self.progress.advance()
413
414    def api_error_callback(self, error):
415        self.Error.api_error(str(error))
416        if self.progress is not None:
417            self.progress.finish()
418
419    def update_search_info(self):
420        max_records_count = min(
421                self.pubmed_api.MAX_RECORDS,
422                self.pubmed_api.search_record_count
423        )
424        self.search_info_label.setText(
425                'Number of retrievable records for '
426                'this search query: {} '.format(max_records_count)
427        )
428        self.max_records_label.setText(
429                'records from {}.'.format(max_records_count)
430        )
431        self.max_records_label.setMaximumSize(self.max_records_label
432                                              .sizeHint())
433
434        self.num_records_input.setMaximum(max_records_count)
435        self.retrieve_records_button.setFocus()
436
437    def update_retrieval_info(self):
438        document_count = 0
439        if self.output_corpus is not None:
440            document_count = len(self.output_corpus)
441
442        self.retrieval_info_label.setText(
443                'Number of records retrieved: {} '.format(document_count)
444        )
445        self.retrieval_info_label.setMaximumSize(
446                self.retrieval_info_label.sizeHint()
447        )
448
449    def select_keywords(self, n):
450        if n < len(self.recent_keywords):
451            keywords = self.recent_keywords[n]
452            del self.recent_keywords[n]
453            self.recent_keywords.insert(0, keywords)
454
455        if len(self.recent_keywords) > 0:
456            self.set_keyword_list()
457
458    def set_keyword_list(self):
459        self.keyword_combo.clear()
460        if not self.recent_keywords:
461            # Sample queries.
462            self.recent_keywords.append('orchid')
463            self.recent_keywords.append('hypertension')
464            self.recent_keywords.append('blood pressure')
465            self.recent_keywords.append('radiology')
466        for keywords in self.recent_keywords:
467            self.keyword_combo.addItem(keywords)
468
469    def open_calendar(self, widget):
470        cal_dlg = CalendarDialog(self, 'Date picker')
471        if cal_dlg.exec_():
472            widget.setText(cal_dlg.picked_date)
473
474    def send_report(self):
475        if not self.pubmed_api:
476            return
477        max_records_count = min(
478            self.pubmed_api.MAX_RECORDS,
479            self.pubmed_api.search_record_count
480        )
481        if self.search_tabs.currentIndex() == 0:
482            terms = self.keyword_combo.currentText()
483            authors = self.author_input.text()
484            self.report_items((
485                ('Query', terms if terms else None),
486                ('Authors', authors if authors else None),
487                ('Date', 'from {} to {}'.format(self.pub_date_from,
488                                                self.pub_date_to)),
489                ('Number of records retrieved', '{}/{}'.format(
490                    len(self.output_corpus) if self.output_corpus else 0,
491                    max_records_count))
492            ))
493        else:
494            query = self.advanced_query_input.toPlainText()
495            self.report_items((
496                ('Query', query if query else None),
497                ('Number of records retrieved', '{}/{}'.format(
498                    len(self.output_corpus) if self.output_corpus else 0,
499                    max_records_count))
500            ))
501
502
503class CalendarDialog(QDialog):
504
505    picked_date = None
506    source = None
507    parent = None
508
509    def __init__(self, parent, windowTitle='Date picker'):
510        super().__init__(parent, windowTitle=windowTitle)
511
512        self.parent = parent
513
514        self.setLayout(QVBoxLayout())
515        self.mainArea = gui.widgetBox(self)
516        self.layout().addWidget(self.mainArea)
517
518        self.cal = QCalendarWidget(self)
519        self.cal.setGridVisible(True)
520        self.cal.move(20, 20)
521        self.cal.clicked[QDate].connect(self.set_date)
522        self.mainArea.layout().addWidget(self.cal)
523
524        # Set the default date.
525        self.picked_date = self.cal.selectedDate().toString('yyyy/MM/dd')
526
527        gui.button(self.mainArea, self, 'OK', lambda: QDialog.accept(self))
528
529    def set_date(self, date):
530        self.picked_date = date.toString('yyyy/MM/dd')
531
532
533if __name__ == '__main__':
534    app = QApplication([])
535    widget = OWPubmed()
536    widget.show()
537    app.exec()
538