1# Copyright (c) 2014-2021 Cedric Bellegarde <cedric.bellegarde@adishatz.org> 2# This program is free software: you can redistribute it and/or modify 3# it under the terms of the GNU General Public License as published by 4# the Free Software Foundation, either version 3 of the License, or 5# (at your option) any later version. 6# This program is distributed in the hope that it will be useful, 7# but WITHOUT ANY WARRANTY; without even the implied warranty of 8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9# GNU General Public License for more details. 10# You should have received a copy of the GNU General Public License 11# along with this program. If not, see <http://www.gnu.org/licenses/>. 12 13 14from gettext import gettext as _ 15import json 16from locale import getdefaultlocale 17 18from lollypop.define import App 19from lollypop.utils import escape 20from lollypop.logger import Logger 21 22 23class WikipediaHelper: 24 """ 25 Helper for wikipedia search 26 """ 27 28 __API_SEARCH = "https://%s.wikipedia.org/w/api.php?action=query" +\ 29 "&list=search&srsearch=%s&format=json" 30 __API_INFO = "https://%s.wikipedia.org/w/api.php?action=query" +\ 31 "&pageids=%s&format=json" +\ 32 "&prop=extracts&exlimit=max&explaintext&redirects=1" 33 34 def __init__(self): 35 """ 36 Init wikipedia 37 """ 38 self.__locale = getdefaultlocale()[0][0:2] 39 40 def get_content_for_term(self, term): 41 """ 42 Get content for term 43 @param term as str 44 @return bytes/None 45 """ 46 try: 47 (locale, page_id) = self.__search_term(term) 48 if page_id is None: 49 return None 50 uri = self.__API_INFO % (locale, page_id) 51 (status, data) = App().task_helper.load_uri_content_sync(uri) 52 if status: 53 decode = json.loads(data.decode("utf-8")) 54 extract = decode["query"]["pages"][str(page_id)]["extract"] 55 return extract.encode("utf-8") 56 except Exception as e: 57 Logger.error("Wikipedia::get_content_for_term(): %s", e) 58 return None 59 60 def get_content_for_page_id(self, page_id, locale): 61 """ 62 Get page content 63 @param page_id as str 64 @param locale as str 65 @return bytes/None 66 """ 67 try: 68 uri = self.__API_INFO % (locale, page_id) 69 (status, data) = App().task_helper.load_uri_content_sync(uri) 70 if status: 71 decode = json.loads(data.decode("utf-8")) 72 extract = decode["query"]["pages"][str(page_id)]["extract"] 73 return extract.encode("utf-8") 74 except Exception as e: 75 Logger.error("Wikipedia::get_content_for_page_id(): %s", e) 76 return None 77 78 def get_search_list(self, term): 79 """ 80 Get search list for term 81 @param term as str 82 @return [(str, str)]: list of locales/title 83 """ 84 pages = [] 85 try: 86 for locale in [self.__locale, "en"]: 87 uri = self.__API_SEARCH % (locale, term) 88 (status, data) = App().task_helper.load_uri_content_sync(uri) 89 decode = json.loads(data.decode("utf-8")) 90 if status: 91 for item in decode["query"]["search"]: 92 pages.append((locale, item["title"], item["pageid"])) 93 except Exception as e: 94 print("Wikipedia::get_search_list(): %s", e) 95 return pages 96 97####################### 98# PRIVATE # 99####################### 100 def __search_term(self, term): 101 """ 102 Search term on Wikipdia 103 @param term as str 104 @return pageid as str 105 """ 106 try: 107 for locale in [self.__locale, "en"]: 108 uri = self.__API_SEARCH % (locale, term) 109 (status, data) = App().task_helper.load_uri_content_sync(uri) 110 if status: 111 decode = json.loads(data.decode("utf-8")) 112 for item in decode["query"]["search"]: 113 if escape(item["title"].lower()) ==\ 114 escape(term.lower()): 115 return (locale, item["pageid"]) 116 else: 117 for word in [_("band"), _("singer"), 118 "band", "singer"]: 119 if item["snippet"].lower().find(word) != -1: 120 return (locale, item["pageid"]) 121 except Exception as e: 122 print("Wikipedia::__search_term(): %s", e) 123 return ("", None) 124