1# -*- coding: utf-8 -*-
2from __future__ import absolute_import, division, print_function, unicode_literals
3
4store_version = 7  # Needed for dynamic plugin loading
5
6__license__ = 'GPL 3'
7__copyright__ = '2011, John Schember <john@nachtimwald.com>'
8__docformat__ = 'restructuredtext en'
9
10from contextlib import closing
11try:
12    from urllib.parse import quote_plus
13except ImportError:
14    from urllib import quote_plus
15
16from lxml import html
17from qt.core import QUrl
18
19from calibre import browser, url_slash_cleaner
20from calibre.gui2 import open_url
21from calibre.gui2.store import StorePlugin
22from calibre.gui2.store.basic_config import BasicStoreConfig
23from calibre.gui2.store.search_result import SearchResult
24from calibre.gui2.store.web_store_dialog import WebStoreDialog
25
26
27def parse_html(raw):
28    try:
29        from html5_parser import parse
30    except ImportError:
31        # Old versions of calibre
32        import html5lib
33        return html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
34    else:
35        return parse(raw)
36
37
38def search_google(query, max_results=10, timeout=60, write_html_to=None):
39    url = 'https://www.google.com/search?tbm=bks&q=' + quote_plus(query)
40
41    br = browser()
42
43    counter = max_results
44    with closing(br.open(url, timeout=timeout)) as f:
45        raw = f.read()
46        doc = parse_html(raw)
47        if write_html_to is not None:
48            praw = html.tostring(doc, encoding='utf-8')
49            open(write_html_to, 'wb').write(praw)
50        for data in doc.xpath('//div[@id="rso"]/div'):
51            if counter <= 0:
52                break
53            h3 = data.xpath('descendant::h3')
54            if not h3:
55                continue
56            h3 = h3[0]
57            a = h3.getparent()
58            id = a.get('href')
59            if not id:
60                continue
61
62            title = ''.join(data.xpath('.//h3//text()')).strip()
63            authors = data.xpath('descendant::a[@class="fl" and @href]//text()')
64            while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
65                authors = authors[:-1]
66            if not authors:
67                continue
68            author = ' & '.join(authors)
69
70            counter -= 1
71
72            s = SearchResult()
73            s.title = title.strip()
74            s.author = author.strip()
75            s.detail_item = id.strip()
76            s.drm = SearchResult.DRM_UNKNOWN
77
78            yield s
79
80
81class GoogleBooksStore(BasicStoreConfig, StorePlugin):
82
83    def open(self, parent=None, detail_item=None, external=False):
84        url = 'https://books.google.com/books'
85        if True or external or self.config.get('open_external', False):
86            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
87        else:
88            d = WebStoreDialog(self.gui, url, parent, detail_item)
89            d.setWindowTitle(self.name)
90            d.set_tags(self.config.get('tags', ''))
91            d.exec()
92
93    def search(self, query, max_results=10, timeout=60):
94        for result in search_google(query, max_results=max_results, timeout=timeout):
95            yield result
96
97    def get_details(self, search_result, timeout):
98        br = browser()
99        with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
100            doc = parse_html(nf.read())
101
102            search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src'))
103
104            # Try to get the set price.
105            price = ''.join(doc.xpath('//div[@id="gb-get-book-container"]//a/text()'))
106            if 'read' in price.lower():
107                price = 'Unknown'
108            elif 'free' in price.lower() or not price.strip():
109                price = '$0.00'
110            elif '-' in price:
111                a, b, price = price.partition(' - ')
112            search_result.price = price.strip()
113
114            search_result.formats = ', '.join(doc.xpath('//div[contains(@class, "download-panel-div")]//a/text()')).upper()
115            if not search_result.formats:
116                search_result.formats = _('Unknown')
117
118        return True
119
120
121if __name__ == '__main__':
122    import sys
123    for result in search_google(' '.join(sys.argv[1:]), write_html_to='/t/google.html'):
124        print(result)
125