1#!/usr/local/bin/python3.8 2# vim:fileencoding=utf-8 3# 4from __future__ import unicode_literals, division, absolute_import, print_function 5 6__license__ = 'GPL v3' 7__copyright__ = '2020, Pat Stapleton <pat.stapleton at gmail.com>' 8''' 9Recipe for Independent Australia 10''' 11 12from calibre.web.feeds.news import BasicNewsRecipe 13from calibre.web.feeds import Feed 14 15 16class IndependentAustralia(BasicNewsRecipe): 17 title = 'Independent Australia' 18 language = 'en_AU' 19 __author__ = 'Pat Stapleton' 20 description = ( 21 'Independent Australia is a progressive journal focusing on politics, democracy, the environment, Australian history and Australian identity.' 22 ' It contains news and opinion from Australia and around the world.') 23 oldest_article = 7 # days 24 max_articles_per_feed = 100 25 26 feeds = [ 27 ( 28 'Independent Australia', 29 'https://feeds.feedburner.com/IndependentAustralia' 30 ), 31 ] 32 33 masthead_url = 'https://independentaustralia.net/t/2018/logo-2018-lg-h90.png' 34 cover_url = 'https://independentaustralia.net/t/apple-touch-icon.png' 35 # cover_margins = (0,20,'#000000') 36 scale_news_images_to_device = True 37 oldest_article = 7 # days 38 max_articles_per_feed = 100 39 publication_type = 'newspaper' 40 41 # auto_cleanup = True # enable this as a backup option if recipe stops working 42 43 # use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data) 44 45 no_stylesheets = True 46 remove_javascript = True 47 48 keep_only_tags = [ 49 dict(name='div', attrs={'class': "art-display"}) 50 ] # the article content is contained in 51 52 # ************************************ 53 # Clear out all the unwanted html tags: 54 # ************************************ 55 remove_tags = [{ 56 'name': ['meta', 'link', 'noscript', 'script', 'footer'] 57 }, { 58 'attrs': { 59 'class': ['tagFooter', 'noshow', 'panelSubscription', 'mt-2'] 60 } 61 }] 62 63 # ************************************ 64 # Tidy up the output to look neat for reading 65 # ************************************ 66 remove_attributes = ['width', 'height', 'style'] 67 extra_css = '.byline{font-size:smaller;margin-bottom:10px;}.inline-caption{display:block;font-size:smaller;text-decoration: none;}' 68 compress_news_images = True 69 70 feeds = [ 71 ( 72 'Independent Australia', 73 'https://feeds.feedburner.com/IndependentAustralia' 74 ), 75 ] 76 77 # ************************************ 78 # Break up feed into categories (based on BrianG's code snippet): 79 # ************************************ 80 def parse_feeds(self): 81 # Do the "official" parse_feeds first 82 feeds = BasicNewsRecipe.parse_feeds(self) 83 84 politicsArticles = [] 85 environmentArticles = [] 86 businessArticles = [] 87 lifeArticles = [] 88 australiaArticles = [] 89 # Loop thru the articles in all feeds to find articles with base categories in it 90 for curfeed in feeds: 91 delList = [] 92 for a, curarticle in enumerate(curfeed.articles): 93 if curarticle.url.lower( 94 ).find('independentaustralia.net/politics/') >= 0: 95 politicsArticles.append(curarticle) 96 delList.append(curarticle) 97 elif curarticle.url.lower( 98 ).find('independentaustralia.net/environment/') >= 0: 99 environmentArticles.append(curarticle) 100 delList.append(curarticle) 101 elif curarticle.url.lower( 102 ).find('independentaustralia.net/business/') >= 0: 103 businessArticles.append(curarticle) 104 delList.append(curarticle) 105 elif curarticle.url.lower( 106 ).find('independentaustralia.net/life/') >= 0: 107 lifeArticles.append(curarticle) 108 delList.append(curarticle) 109 elif curarticle.url.lower( 110 ).find('independentaustralia.net/australia/') >= 0: 111 australiaArticles.append(curarticle) 112 delList.append(curarticle) 113 if len(delList) > 0: 114 for d in delList: 115 index = curfeed.articles.index(d) 116 curfeed.articles[index:index + 1] = [] 117 118 # If there are any of each base category found, create, append a new Feed object 119 if len(politicsArticles) > 0: 120 pfeed = Feed() 121 pfeed.title = 'Politics' 122 pfeed.image_url = None 123 pfeed.oldest_article = 30 124 pfeed.id_counter = len(politicsArticles) 125 # Create a new Feed, add the articles, and append to "official" list of feeds 126 pfeed.articles = politicsArticles[:] 127 feeds.append(pfeed) 128 if len(environmentArticles) > 0: 129 pfeed = Feed() 130 pfeed.title = 'Environment' 131 pfeed.image_url = None 132 pfeed.oldest_article = 30 133 pfeed.id_counter = len(environmentArticles) 134 # Create a new Feed, add the articles, and append to "official" list of feeds 135 pfeed.articles = environmentArticles[:] 136 feeds.append(pfeed) 137 if len(businessArticles) > 0: 138 pfeed = Feed() 139 pfeed.title = 'Business' 140 pfeed.image_url = None 141 pfeed.oldest_article = 30 142 pfeed.id_counter = len(businessArticles) 143 # Create a new Feed, add the articles, and append to "official" list of feeds 144 pfeed.articles = businessArticles[:] 145 feeds.append(pfeed) 146 if len(lifeArticles) > 0: 147 pfeed = Feed() 148 pfeed.title = 'Life' 149 pfeed.image_url = None 150 pfeed.oldest_article = 30 151 pfeed.id_counter = len(lifeArticles) 152 # Create a new Feed, add the articles, and append to "official" list of feeds 153 pfeed.articles = lifeArticles[:] 154 feeds.append(pfeed) 155 if len(australiaArticles) > 0: 156 pfeed = Feed() 157 pfeed.title = 'Australia' 158 pfeed.image_url = None 159 pfeed.oldest_article = 30 160 pfeed.id_counter = len(australiaArticles) 161 # Create a new Feed, add the articles, and append to "official" list of feeds 162 pfeed.articles = australiaArticles[:] 163 feeds.append(pfeed) 164 165 if len(feeds) > 1: # cleanup empty first feed item 166 if len(feeds[0]) == 0: 167 del feeds[0] 168 return feeds 169