1# Copyright (c) 2014-2021 Cedric Bellegarde <cedric.bellegarde@adishatz.org>
2# This program is free software: you can redistribute it and/or modify
3# it under the terms of the GNU General Public License as published by
4# the Free Software Foundation, either version 3 of the License, or
5# (at your option) any later version.
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU General Public License for more details.
10# You should have received a copy of the GNU General Public License
11# along with this program. If not, see <http://www.gnu.org/licenses/>.
12
13
14from gettext import gettext as _
15import json
16from locale import getdefaultlocale
17
18from lollypop.define import App
19from lollypop.utils import escape
20from lollypop.logger import Logger
21
22
23class WikipediaHelper:
24    """
25        Helper for wikipedia search
26    """
27
28    __API_SEARCH = "https://%s.wikipedia.org/w/api.php?action=query" +\
29        "&list=search&srsearch=%s&format=json"
30    __API_INFO = "https://%s.wikipedia.org/w/api.php?action=query" +\
31        "&pageids=%s&format=json" +\
32        "&prop=extracts&exlimit=max&explaintext&redirects=1"
33
34    def __init__(self):
35        """
36            Init wikipedia
37        """
38        self.__locale = getdefaultlocale()[0][0:2]
39
40    def get_content_for_term(self, term):
41        """
42            Get content for term
43            @param term as str
44            @return bytes/None
45        """
46        try:
47            (locale, page_id) = self.__search_term(term)
48            if page_id is None:
49                return None
50            uri = self.__API_INFO % (locale, page_id)
51            (status, data) = App().task_helper.load_uri_content_sync(uri)
52            if status:
53                decode = json.loads(data.decode("utf-8"))
54                extract = decode["query"]["pages"][str(page_id)]["extract"]
55                return extract.encode("utf-8")
56        except Exception as e:
57            Logger.error("Wikipedia::get_content_for_term(): %s", e)
58        return None
59
60    def get_content_for_page_id(self, page_id, locale):
61        """
62            Get page content
63            @param page_id as str
64            @param locale as str
65            @return bytes/None
66        """
67        try:
68            uri = self.__API_INFO % (locale, page_id)
69            (status, data) = App().task_helper.load_uri_content_sync(uri)
70            if status:
71                decode = json.loads(data.decode("utf-8"))
72                extract = decode["query"]["pages"][str(page_id)]["extract"]
73                return extract.encode("utf-8")
74        except Exception as e:
75            Logger.error("Wikipedia::get_content_for_page_id(): %s", e)
76        return None
77
78    def get_search_list(self, term):
79        """
80            Get search list for term
81            @param term as str
82            @return [(str, str)]: list of locales/title
83        """
84        pages = []
85        try:
86            for locale in [self.__locale, "en"]:
87                uri = self.__API_SEARCH % (locale, term)
88                (status, data) = App().task_helper.load_uri_content_sync(uri)
89                decode = json.loads(data.decode("utf-8"))
90                if status:
91                    for item in decode["query"]["search"]:
92                        pages.append((locale, item["title"], item["pageid"]))
93        except Exception as e:
94            print("Wikipedia::get_search_list(): %s", e)
95        return pages
96
97#######################
98# PRIVATE             #
99#######################
100    def __search_term(self, term):
101        """
102            Search term on Wikipdia
103            @param term as str
104            @return pageid as str
105        """
106        try:
107            for locale in [self.__locale, "en"]:
108                uri = self.__API_SEARCH % (locale, term)
109                (status, data) = App().task_helper.load_uri_content_sync(uri)
110                if status:
111                    decode = json.loads(data.decode("utf-8"))
112                    for item in decode["query"]["search"]:
113                        if escape(item["title"].lower()) ==\
114                                escape(term.lower()):
115                            return (locale, item["pageid"])
116                        else:
117                            for word in [_("band"), _("singer"),
118                                         "band", "singer"]:
119                                if item["snippet"].lower().find(word) != -1:
120                                    return (locale, item["pageid"])
121        except Exception as e:
122            print("Wikipedia::__search_term(): %s", e)
123        return ("", None)
124