1# -*- coding: utf-8 -*- 2__license__ = 'GPL v3' 3__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>' 4''' 5LePoint.fr 6''' 7from calibre.web.feeds.recipes import BasicNewsRecipe 8 9 10class lepoint(BasicNewsRecipe): 11 12 title = 'Le Point' 13 __author__ = 'calibre' 14 description = 'Actualités' 15 encoding = 'utf-8' 16 publisher = 'LePoint.fr' 17 category = 'news, France, world' 18 language = 'fr' 19 20 use_embedded_content = False 21 timefmt = ' [%d %b %Y]' 22 max_articles_per_feed = 15 23 no_stylesheets = True 24 remove_empty_feeds = True 25 filterDuplicates = True 26 27 extra_css = ''' 28 h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} 29 .chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;} 30 .info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} 31 .media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} 32 .article {font-size:medium; font-family:Arial,Helvetica,sans-serif;} 33 ''' 34 35 remove_tags = [ 36 dict(name='iframe'), 37 dict(name='div', attrs={'class': ['entete_chroniqueur']}), 38 dict(name='div', attrs={'class': ['col_article']}), 39 dict(name='div', attrs={'class': ['signature_article']}), 40 dict(name='div', attrs={'class': ['util_font util_article']}), 41 dict(name='div', attrs={'class': ['util_article bottom']}) 42 ] 43 44 keep_only_tags = [dict(name='div', attrs={'class': ['page_article']})] 45 46 remove_tags_after = dict( 47 name='div', attrs={'class': ['util_article bottom']}) 48 49 feeds = [ 50 (u'À la une', 'http://www.lepoint.fr/rss.xml'), 51 ('International', 'http://www.lepoint.fr/monde/rss.xml'), 52 ('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'), 53 ('Sciences', 'http://www.lepoint.fr/science/rss.xml'), 54 ('Economie', 'http://www.lepoint.fr/economie/rss.xml'), 55 (u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'), 56 ('Politique', 'http://www.lepoint.fr/politique/rss.xml'), 57 (u'Médias', 'http://www.lepoint.fr/medias/rss.xml'), 58 ('Culture', 'http://www.lepoint.fr/culture/rss.xml'), 59 (u'Santé', 'http://www.lepoint.fr/sante/rss.xml'), 60 ('Sport', 'http://www.lepoint.fr/sport/rss.xml') 61 ] 62 63 def preprocess_html(self, soup): 64 for item in soup.findAll(style=True): 65 del item['style'] 66 return soup 67 68 def get_masthead_url(self): 69 masthead = 'http://www.lepoint.fr/images/commun/logo.png' 70 br = BasicNewsRecipe.get_browser(self) 71 try: 72 br.open(masthead) 73 except: 74 self.log("\nCover unavailable") 75 masthead = None 76 return masthead 77