1# -*- coding: utf-8 -*- 2from __future__ import absolute_import, division, print_function, unicode_literals 3 4store_version = 7 # Needed for dynamic plugin loading 5 6__license__ = 'GPL 3' 7__copyright__ = '2011, John Schember <john@nachtimwald.com>' 8__docformat__ = 'restructuredtext en' 9 10from contextlib import closing 11try: 12 from urllib.parse import quote_plus 13except ImportError: 14 from urllib import quote_plus 15 16from lxml import html 17from qt.core import QUrl 18 19from calibre import browser, url_slash_cleaner 20from calibre.gui2 import open_url 21from calibre.gui2.store import StorePlugin 22from calibre.gui2.store.basic_config import BasicStoreConfig 23from calibre.gui2.store.search_result import SearchResult 24from calibre.gui2.store.web_store_dialog import WebStoreDialog 25 26 27def parse_html(raw): 28 try: 29 from html5_parser import parse 30 except ImportError: 31 # Old versions of calibre 32 import html5lib 33 return html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) 34 else: 35 return parse(raw) 36 37 38def search_google(query, max_results=10, timeout=60, write_html_to=None): 39 url = 'https://www.google.com/search?tbm=bks&q=' + quote_plus(query) 40 41 br = browser() 42 43 counter = max_results 44 with closing(br.open(url, timeout=timeout)) as f: 45 raw = f.read() 46 doc = parse_html(raw) 47 if write_html_to is not None: 48 praw = html.tostring(doc, encoding='utf-8') 49 open(write_html_to, 'wb').write(praw) 50 for data in doc.xpath('//div[@id="rso"]/div'): 51 if counter <= 0: 52 break 53 h3 = data.xpath('descendant::h3') 54 if not h3: 55 continue 56 h3 = h3[0] 57 a = h3.getparent() 58 id = a.get('href') 59 if not id: 60 continue 61 62 title = ''.join(data.xpath('.//h3//text()')).strip() 63 authors = data.xpath('descendant::a[@class="fl" and @href]//text()') 64 while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'): 65 authors = authors[:-1] 66 if not authors: 67 continue 68 author = ' & '.join(authors) 69 70 counter -= 1 71 72 s = SearchResult() 73 s.title = title.strip() 74 s.author = author.strip() 75 s.detail_item = id.strip() 76 s.drm = SearchResult.DRM_UNKNOWN 77 78 yield s 79 80 81class GoogleBooksStore(BasicStoreConfig, StorePlugin): 82 83 def open(self, parent=None, detail_item=None, external=False): 84 url = 'https://books.google.com/books' 85 if True or external or self.config.get('open_external', False): 86 open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url))) 87 else: 88 d = WebStoreDialog(self.gui, url, parent, detail_item) 89 d.setWindowTitle(self.name) 90 d.set_tags(self.config.get('tags', '')) 91 d.exec() 92 93 def search(self, query, max_results=10, timeout=60): 94 for result in search_google(query, max_results=max_results, timeout=timeout): 95 yield result 96 97 def get_details(self, search_result, timeout): 98 br = browser() 99 with closing(br.open(search_result.detail_item, timeout=timeout)) as nf: 100 doc = parse_html(nf.read()) 101 102 search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src')) 103 104 # Try to get the set price. 105 price = ''.join(doc.xpath('//div[@id="gb-get-book-container"]//a/text()')) 106 if 'read' in price.lower(): 107 price = 'Unknown' 108 elif 'free' in price.lower() or not price.strip(): 109 price = '$0.00' 110 elif '-' in price: 111 a, b, price = price.partition(' - ') 112 search_result.price = price.strip() 113 114 search_result.formats = ', '.join(doc.xpath('//div[contains(@class, "download-panel-div")]//a/text()')).upper() 115 if not search_result.formats: 116 search_result.formats = _('Unknown') 117 118 return True 119 120 121if __name__ == '__main__': 122 import sys 123 for result in search_google(' '.join(sys.argv[1:]), write_html_to='/t/google.html'): 124 print(result) 125