1import os 2import re 3from datetime import date 4 5from AnyQt.QtCore import QDate, Qt 6from AnyQt.QtWidgets import (QApplication, QComboBox, QDateEdit, QTextEdit, 7 QFrame, QDialog, QCalendarWidget, QVBoxLayout, 8 QFormLayout) 9 10from Orange.widgets import gui 11from Orange.widgets.credentials import CredentialManager 12from Orange.widgets.settings import Setting 13from Orange.widgets.widget import OWWidget, Msg 14from orangecontrib.text.corpus import Corpus 15from orangecontrib.text.pubmed import ( 16 Pubmed, PUBMED_TEXT_FIELDS 17) 18 19 20def _i(name, icon_path='icons'): 21 widget_path = os.path.dirname(os.path.abspath(__file__)) 22 return os.path.join(widget_path, icon_path, name) 23 24 25EMAIL_REGEX = re.compile(r"[^@]+@[^@]+\.[^@]+") 26 27 28def validate_email(email): 29 return EMAIL_REGEX.match(email) 30 31 32class Output: 33 CORPUS = 'Corpus' 34 35 36class OWPubmed(OWWidget): 37 class EmailCredentialsDialog(OWWidget): 38 name = "Pubmed Email" 39 want_main_area = False 40 resizing_enabled = False 41 email_manager = CredentialManager('Email') 42 email_input = '' 43 44 class Error(OWWidget.Error): 45 invalid_credentials = Msg('This email is invalid.') 46 47 def __init__(self, parent): 48 super().__init__() 49 self.parent = parent 50 self.api = None 51 52 form = QFormLayout() 53 form.setContentsMargins(5, 5, 5, 5) 54 self.email_edit = gui.lineEdit( 55 self, self, 'email_input', controlWidth=400) 56 form.addRow('Email:', self.email_edit) 57 self.controlArea.layout().addLayout(form) 58 self.submit_button = gui.button( 59 self.controlArea, self, "OK", self.accept) 60 61 self.load_credentials() 62 63 def setVisible(self, visible): 64 super().setVisible(visible) 65 self.email_edit.setFocus() 66 67 def load_credentials(self): 68 self.email_edit.setText(self.email_manager.key) 69 70 def save_credentials(self): 71 self.email_manager.key = self.email_input 72 73 def check_credentials(self): 74 if validate_email(self.email_input): 75 self.save_credentials() 76 return True 77 else: 78 return False 79 80 def accept(self, silent=False): 81 if not silent: 82 self.Error.invalid_credentials.clear() 83 valid = self.check_credentials() 84 if valid: 85 self.parent.sync_email(self.email_input) 86 super().accept() 87 else: 88 self.Error.invalid_credentials() 89 90 name = 'Pubmed' 91 description = 'Fetch data from Pubmed.' 92 icon = 'icons/Pubmed.svg' 93 priority = 140 94 95 outputs = [(Output.CORPUS, Corpus)] 96 want_main_area = False 97 resizing_enabled = False 98 99 QT_DATE_FORMAT = 'yyyy-MM-dd' 100 PY_DATE_FORMAT = '%Y-%m-%d' 101 MIN_DATE = date(1800, 1, 1) 102 103 # Settings. 104 author = Setting('') 105 pub_date_from = Setting('') 106 pub_date_to = Setting('') 107 recent_keywords = Setting([]) 108 last_advanced_query = Setting('') 109 num_records = Setting(1000) 110 111 # Text includes checkboxes. 112 includes_authors = Setting(True) 113 includes_title = Setting(True) 114 includes_mesh = Setting(True) 115 includes_abstract = Setting(True) 116 includes_url = Setting(True) 117 118 email = None 119 120 class Warning(OWWidget.Warning): 121 no_query = Msg('Please specify the keywords for this query.') 122 123 class Error(OWWidget.Error): 124 api_error = Msg('API error: {}.') 125 email_error = Msg('Email not set. Pleas set it with the email button.') 126 127 def __init__(self): 128 super().__init__() 129 130 self.output_corpus = None 131 self.pubmed_api = None 132 self.progress = None 133 self.record_count = 0 134 self.download_running = False 135 136 # API key 137 self.email_dlg = self.EmailCredentialsDialog(self) 138 gui.button(self.controlArea, self, 'Email', 139 callback=self.email_dlg.exec_, 140 focusPolicy=Qt.NoFocus) 141 gui.separator(self.controlArea) 142 143 # To hold all the controls. Makes access easier. 144 self.pubmed_controls = [] 145 146 # RECORD SEARCH 147 self.search_tabs = gui.tabWidget(self.controlArea) 148 # --- Regular search --- 149 regular_search_box = gui.widgetBox(self.controlArea, addSpace=True) 150 151 # Author 152 self.author_input = gui.lineEdit(regular_search_box, self, 'author', 153 'Author:', orientation=Qt.Horizontal) 154 self.pubmed_controls.append(self.author_input) 155 156 h_box = gui.hBox(regular_search_box) 157 year_box = gui.widgetBox(h_box, orientation=Qt.Horizontal) 158 min_date = QDate.fromString( 159 self.MIN_DATE.strftime(self.PY_DATE_FORMAT), 160 self.QT_DATE_FORMAT 161 ) 162 163 if not self.pub_date_from: 164 self.pub_date_from = self.MIN_DATE.strftime(self.PY_DATE_FORMAT) 165 if not self.pub_date_to: 166 self.pub_date_to = date.today().strftime(self.PY_DATE_FORMAT) 167 168 self.date_from = QDateEdit( 169 QDate.fromString(self.pub_date_from, self.QT_DATE_FORMAT), 170 displayFormat=self.QT_DATE_FORMAT, 171 minimumDate=min_date, 172 calendarPopup=True 173 ) 174 self.date_to = QDateEdit( 175 QDate.fromString(self.pub_date_to, self.QT_DATE_FORMAT), 176 displayFormat=self.QT_DATE_FORMAT, 177 minimumDate=min_date, 178 calendarPopup=True 179 ) 180 181 self.date_from.dateChanged.connect( 182 lambda date: setattr(self, 'pub_date_from', 183 date.toString(self.QT_DATE_FORMAT))) 184 self.date_to.dateChanged.connect( 185 lambda date: setattr(self, 'pub_date_to', 186 date.toString(self.QT_DATE_FORMAT))) 187 self.pubmed_controls.append(self.date_from) 188 self.pubmed_controls.append(self.date_to) 189 190 gui.label(year_box, self, 'From:') 191 year_box.layout().addWidget(self.date_from) 192 gui.label(year_box, self, 'to:') 193 year_box.layout().addWidget(self.date_to) 194 195 # Keywords. 196 h_box = gui.hBox(regular_search_box) 197 label = gui.label(h_box, self, 'Query:') 198 label.setMaximumSize(label.sizeHint()) 199 self.keyword_combo = QComboBox(h_box) 200 self.keyword_combo.setMinimumWidth(150) 201 self.keyword_combo.setEditable(True) 202 h_box.layout().addWidget(self.keyword_combo) 203 self.keyword_combo.activated[int].connect(self.select_keywords) 204 self.pubmed_controls.append(self.keyword_combo) 205 206 tab_height = regular_search_box.sizeHint() 207 regular_search_box.setMaximumSize(tab_height) 208 209 # --- Advanced search --- 210 advanced_search_box = gui.widgetBox(self.controlArea, addSpace=True) 211 # Advanced search query. 212 h_box = gui.hBox(advanced_search_box) 213 self.advanced_query_input = QTextEdit(h_box) 214 h_box.layout().addWidget(self.advanced_query_input) 215 self.advanced_query_input.setMaximumSize(tab_height) 216 self.pubmed_controls.append(self.advanced_query_input) 217 218 gui.createTabPage(self.search_tabs, 'Regular search', 219 regular_search_box) 220 gui.createTabPage(self.search_tabs, 'Advanced search', 221 advanced_search_box) 222 223 # Search info label. 224 self.search_info_label = gui.label( 225 self.controlArea, self, 226 'Number of records found: /') 227 228 # Search for records button. 229 self.run_search_button = gui.button( 230 self.controlArea, 231 self, 232 'Find records', 233 callback=self.run_search, 234 tooltip='Performs a search for articles that fit the ' 235 'specified parameters.') 236 self.pubmed_controls.append(self.run_search_button) 237 238 h_line = QFrame() 239 h_line.setFrameShape(QFrame.HLine) 240 h_line.setFrameShadow(QFrame.Sunken) 241 self.controlArea.layout().addWidget(h_line) 242 243 # RECORD RETRIEVAL 244 # Text includes box. 245 text_includes_box = gui.widgetBox( 246 self.controlArea, 'Text includes', addSpace=True) 247 self.authors_checkbox = gui.checkBox( 248 text_includes_box, self, 'includes_authors', 'Authors') 249 self.title_checkbox = gui.checkBox( 250 text_includes_box, self, 'includes_title', 'Article title') 251 self.mesh_checkbox = gui.checkBox( 252 text_includes_box, self, 'includes_mesh', 'Mesh headings') 253 self.abstract_checkbox = gui.checkBox( 254 text_includes_box, self, 'includes_abstract', 'Abstract') 255 self.url_checkbox = gui.checkBox( 256 text_includes_box, self, 'includes_url', 'URL') 257 self.pubmed_controls.append(self.authors_checkbox) 258 self.pubmed_controls.append(self.title_checkbox) 259 self.pubmed_controls.append(self.mesh_checkbox) 260 self.pubmed_controls.append(self.abstract_checkbox) 261 self.pubmed_controls.append(self.url_checkbox) 262 263 # Num. records. 264 h_box = gui.hBox(self.controlArea) 265 label = gui.label(h_box, self, 'Retrieve') 266 label.setMaximumSize(label.sizeHint()) 267 self.num_records_input = gui.spin(h_box, self, 'num_records', 268 minv=1, maxv=10000) 269 self.max_records_label = gui.label(h_box, self, 'records from /.') 270 self.max_records_label.setMaximumSize(self.max_records_label 271 .sizeHint()) 272 self.pubmed_controls.append(self.num_records_input) 273 274 # Download articles. 275 # Search for records button. 276 self.retrieve_records_button = gui.button( 277 self.controlArea, 278 self, 279 'Retrieve records', 280 callback=self.retrieve_records, 281 tooltip='Retrieves the specified documents.') 282 self.pubmed_controls.append(self.retrieve_records_button) 283 284 # Num. retrieved records info label. 285 self.retrieval_info_label = gui.label( 286 self.controlArea, 287 self, 288 'Number of records retrieved: /') 289 290 # Load the most recent queries. 291 self.set_keyword_list() 292 293 def sync_email(self, email): 294 self.Error.email_error.clear() 295 self.email = email 296 297 def run_search(self): 298 self.Error.clear() 299 self.Warning.clear() 300 301 # check if email exists 302 if self.email is None: 303 self.Error.email_error() 304 return 305 306 self.run_search_button.setEnabled(False) 307 self.retrieve_records_button.setEnabled(False) 308 309 # Check if the PubMed object is present. 310 if self.pubmed_api is None: 311 self.pubmed_api = Pubmed( 312 email=self.email, 313 progress_callback=self.api_progress_callback, 314 error_callback=self.api_error_callback, 315 ) 316 317 if self.search_tabs.currentIndex() == 0: 318 # Get query parameters. 319 terms = self.keyword_combo.currentText().split() 320 authors = self.author_input.text().split() 321 322 error = self.pubmed_api._search_for_records( 323 terms, authors, self.pub_date_from, self.pub_date_to 324 ) 325 if error is not None: 326 self.Error.api_error(str(error)) 327 return 328 329 if self.keyword_combo.currentText() not in self.recent_keywords: 330 self.recent_keywords.insert( 331 0, 332 self.keyword_combo.currentText() 333 ) 334 else: 335 query = self.advanced_query_input.toPlainText() 336 if not query: 337 self.Warning.no_query() 338 self.run_search_button.setEnabled(True) 339 self.retrieve_records_button.setEnabled(True) 340 return 341 error = self.pubmed_api._search_for_records(advanced_query=query) 342 343 if error is not None: 344 self.Error.api_error(str(error)) 345 return 346 347 self.last_advanced_query = query 348 349 self.enable_controls() 350 self.update_search_info() 351 352 def enable_controls(self): 353 # Enable/disable controls accordingly. 354 self.run_search_button.setEnabled(True) 355 enabled = self.pubmed_api is not None and \ 356 not self.pubmed_api.search_record_count == 0 357 self.retrieve_records_button.setEnabled(enabled) 358 359 def retrieve_records(self): 360 self.Warning.clear() 361 self.Error.clear() 362 363 if self.pubmed_api is None: 364 return 365 366 if self.download_running: 367 self.download_running = False 368 self.retrieve_records_button.setText('Retrieve records') 369 self.pubmed_api.stop_retrieving() 370 return 371 372 self.download_running = True 373 self.output_corpus = None # Clear the old records. 374 375 # Change the button label. 376 self.retrieve_records_button.setText('Stop retrieving') 377 378 # Text fields. 379 text_includes_params = [ 380 self.includes_authors, 381 self.includes_title, 382 self.includes_mesh, 383 self.includes_abstract, 384 self.includes_url, 385 True, # Publication date field; included always. 386 ] 387 required_text_fields = [ 388 field 389 for field_name, field 390 in zip(text_includes_params, PUBMED_TEXT_FIELDS) 391 if field_name 392 ] 393 394 batch_size = min(Pubmed.MAX_BATCH_SIZE, self.num_records) + 1 395 with self.progressBar(self.num_records/batch_size) as progress: 396 self.progress = progress 397 self.output_corpus = self.pubmed_api._retrieve_records( 398 self.num_records, 399 required_text_fields 400 ) 401 self.retrieve_records_button.setText('Retrieve records') 402 self.download_running = False 403 404 self.send(Output.CORPUS, self.output_corpus) 405 self.update_retrieval_info() 406 self.run_search_button.setEnabled(True) 407 408 def api_progress_callback(self, start_at=None): 409 if start_at is not None: 410 self.progress.count = start_at 411 else: 412 self.progress.advance() 413 414 def api_error_callback(self, error): 415 self.Error.api_error(str(error)) 416 if self.progress is not None: 417 self.progress.finish() 418 419 def update_search_info(self): 420 max_records_count = min( 421 self.pubmed_api.MAX_RECORDS, 422 self.pubmed_api.search_record_count 423 ) 424 self.search_info_label.setText( 425 'Number of retrievable records for ' 426 'this search query: {} '.format(max_records_count) 427 ) 428 self.max_records_label.setText( 429 'records from {}.'.format(max_records_count) 430 ) 431 self.max_records_label.setMaximumSize(self.max_records_label 432 .sizeHint()) 433 434 self.num_records_input.setMaximum(max_records_count) 435 self.retrieve_records_button.setFocus() 436 437 def update_retrieval_info(self): 438 document_count = 0 439 if self.output_corpus is not None: 440 document_count = len(self.output_corpus) 441 442 self.retrieval_info_label.setText( 443 'Number of records retrieved: {} '.format(document_count) 444 ) 445 self.retrieval_info_label.setMaximumSize( 446 self.retrieval_info_label.sizeHint() 447 ) 448 449 def select_keywords(self, n): 450 if n < len(self.recent_keywords): 451 keywords = self.recent_keywords[n] 452 del self.recent_keywords[n] 453 self.recent_keywords.insert(0, keywords) 454 455 if len(self.recent_keywords) > 0: 456 self.set_keyword_list() 457 458 def set_keyword_list(self): 459 self.keyword_combo.clear() 460 if not self.recent_keywords: 461 # Sample queries. 462 self.recent_keywords.append('orchid') 463 self.recent_keywords.append('hypertension') 464 self.recent_keywords.append('blood pressure') 465 self.recent_keywords.append('radiology') 466 for keywords in self.recent_keywords: 467 self.keyword_combo.addItem(keywords) 468 469 def open_calendar(self, widget): 470 cal_dlg = CalendarDialog(self, 'Date picker') 471 if cal_dlg.exec_(): 472 widget.setText(cal_dlg.picked_date) 473 474 def send_report(self): 475 if not self.pubmed_api: 476 return 477 max_records_count = min( 478 self.pubmed_api.MAX_RECORDS, 479 self.pubmed_api.search_record_count 480 ) 481 if self.search_tabs.currentIndex() == 0: 482 terms = self.keyword_combo.currentText() 483 authors = self.author_input.text() 484 self.report_items(( 485 ('Query', terms if terms else None), 486 ('Authors', authors if authors else None), 487 ('Date', 'from {} to {}'.format(self.pub_date_from, 488 self.pub_date_to)), 489 ('Number of records retrieved', '{}/{}'.format( 490 len(self.output_corpus) if self.output_corpus else 0, 491 max_records_count)) 492 )) 493 else: 494 query = self.advanced_query_input.toPlainText() 495 self.report_items(( 496 ('Query', query if query else None), 497 ('Number of records retrieved', '{}/{}'.format( 498 len(self.output_corpus) if self.output_corpus else 0, 499 max_records_count)) 500 )) 501 502 503class CalendarDialog(QDialog): 504 505 picked_date = None 506 source = None 507 parent = None 508 509 def __init__(self, parent, windowTitle='Date picker'): 510 super().__init__(parent, windowTitle=windowTitle) 511 512 self.parent = parent 513 514 self.setLayout(QVBoxLayout()) 515 self.mainArea = gui.widgetBox(self) 516 self.layout().addWidget(self.mainArea) 517 518 self.cal = QCalendarWidget(self) 519 self.cal.setGridVisible(True) 520 self.cal.move(20, 20) 521 self.cal.clicked[QDate].connect(self.set_date) 522 self.mainArea.layout().addWidget(self.cal) 523 524 # Set the default date. 525 self.picked_date = self.cal.selectedDate().toString('yyyy/MM/dd') 526 527 gui.button(self.mainArea, self, 'OK', lambda: QDialog.accept(self)) 528 529 def set_date(self, date): 530 self.picked_date = date.toString('yyyy/MM/dd') 531 532 533if __name__ == '__main__': 534 app = QApplication([]) 535 widget = OWPubmed() 536 widget.show() 537 app.exec() 538