1<?php 2class JustETFBridge extends BridgeAbstract { 3 const NAME = 'justETF Bridge'; 4 const URI = 'https://www.justetf.com'; 5 const DESCRIPTION = 'Currently only supports the news feed'; 6 const MAINTAINER = 'logmanoriginal'; 7 const PARAMETERS = array( 8 'News' => array( 9 'full' => array( 10 'name' => 'Full Article', 11 'type' => 'checkbox', 12 'title' => 'Enable to load full articles' 13 ) 14 ), 15 'Profile' => array( 16 'isin' => array( 17 'name' => 'ISIN', 18 'type' => 'text', 19 'required' => true, 20 'pattern' => '[a-zA-Z]{2}[a-zA-Z0-9]{10}', 21 'title' => 'ISIN, consisting of 2-letter country code, 9-character identifier, check character' 22 ), 23 'strategy' => array( 24 'name' => 'Include Strategy', 25 'type' => 'checkbox', 26 'defaultValue' => 'checked' 27 ), 28 'description' => array( 29 'name' => 'Include Description', 30 'type' => 'checkbox', 31 'defaultValue' => 'checked' 32 ) 33 ), 34 'global' => array( 35 'lang' => array( 36 'name' => 'Language', 37 'type' => 'list', 38 'values' => array( 39 'Englisch' => 'en', 40 'Deutsch' => 'de', 41 'Italiano' => 'it' 42 ), 43 'defaultValue' => 'Englisch' 44 ) 45 ) 46 ); 47 48 public function collectData() { 49 $html = getSimpleHTMLDOM($this->getURI()) 50 or returnServerError('Failed loading contents from ' . $this->getURI()); 51 52 defaultLinkTo($html, static::URI); 53 54 switch($this->queriedContext) { 55 case 'News': 56 $this->collectNews($html); 57 break; 58 case 'Profile': 59 $this->collectProfile($html); 60 break; 61 } 62 } 63 64 public function getURI() { 65 $uri = static::URI; 66 67 if($this->getInput('lang')) { 68 $uri .= '/' . $this->getInput('lang'); 69 } 70 71 switch($this->queriedContext) { 72 case 'News': 73 $uri .= '/news'; 74 break; 75 case 'Profile': 76 $uri .= '/etf-profile.html?' . http_build_query(array( 77 'isin' => strtoupper($this->getInput('isin')) 78 )); 79 break; 80 } 81 82 return $uri; 83 } 84 85 public function getName() { 86 $name = static::NAME; 87 88 $name .= ($this->queriedContext) ? ' - ' . $this->queriedContext : ''; 89 90 switch($this->queriedContext) { 91 case 'News': break; 92 case 'Profile': 93 if($this->getInput('isin')) { 94 $name .= ' ISIN ' . strtoupper($this->getInput('isin')); 95 } 96 } 97 98 if($this->getInput('lang')) { 99 $name .= ' (' . strtoupper($this->getInput('lang')) . ')'; 100 } 101 102 return $name; 103 } 104 105 #region Common 106 107 /** 108 * Fixes dates depending on the choosen language: 109 * 110 * de : dd.mm.yy 111 * en : dd.mm.yy 112 * it : dd/mm/yy 113 * 114 * Basically strtotime doesn't convert dates correctly due to formats 115 * being hard to interpret. So we use the DateTime object, manually 116 * fixing dates and times (set to 00:00:00.000). 117 * 118 * We don't know the timezone, so just assume +00:00 (or whatever 119 * DateTime chooses) 120 */ 121 private function fixDate($date) { 122 switch($this->getInput('lang')) { 123 case 'en': 124 case 'de': 125 $df = date_create_from_format('d.m.y', $date); 126 break; 127 case 'it': 128 $df = date_create_from_format('d/m/y', $date); 129 break; 130 } 131 132 date_time_set($df, 0, 0); 133 134 // Debug::log(date_format($df, 'U')); 135 136 return date_format($df, 'U'); 137 } 138 139 private function extractImages($article) { 140 // Notice: We can have zero or more images (though it should mostly be 1) 141 $elements = $article->find('img'); 142 143 $images = array(); 144 145 foreach($elements as $img) { 146 // Skip the logo (mostly provided part of a hidden div) 147 if(substr($img->src, strrpos($img->src, '/') + 1) === 'logo.png') 148 continue; 149 150 $images[] = $img->src; 151 } 152 153 return $images; 154 } 155 156 #endregion 157 158 #region News 159 160 private function collectNews($html) { 161 $articles = $html->find('div.newsTopArticle') 162 or returnServerError('No articles found! Layout might have changed!'); 163 164 foreach($articles as $article) { 165 166 $item = array(); 167 168 // Common data 169 170 $item['uri'] = $this->extractNewsUri($article); 171 $item['timestamp'] = $this->extractNewsDate($article); 172 $item['title'] = $this->extractNewsTitle($article); 173 174 if($this->getInput('full')) { 175 176 $uri = $this->extractNewsUri($article); 177 178 $html = getSimpleHTMLDOMCached($uri) 179 or returnServerError('Failed loading full article from ' . $uri); 180 181 $fullArticle = $html->find('div.article', 0) 182 or returnServerError('No content found! Layout might have changed!'); 183 184 defaultLinkTo($fullArticle, static::URI); 185 186 $item['author'] = $this->extractFullArticleAuthor($fullArticle); 187 $item['content'] = $this->extractFullArticleContent($fullArticle); 188 $item['enclosures'] = $this->extractImages($fullArticle); 189 190 } else { 191 192 $item['content'] = $this->extractNewsDescription($article); 193 $item['enclosures'] = $this->extractImages($article); 194 195 } 196 197 $this->items[] = $item; 198 } 199 } 200 201 private function extractNewsUri($article) { 202 $element = $article->find('a', 0) 203 or returnServerError('Anchor not found!'); 204 205 return $element->href; 206 } 207 208 private function extractNewsDate($article) { 209 $element = $article->find('div.subheadline', 0) 210 or returnServerError('Date not found!'); 211 212 // Debug::log($element->plaintext); 213 214 $date = trim(explode('|', $element->plaintext)[0]); 215 216 return $this->fixDate($date); 217 } 218 219 private function extractNewsDescription($article) { 220 $element = $article->find('span.newsText', 0) 221 or returnServerError('Description not found!'); 222 223 $element->find('a', 0)->onclick = ''; 224 225 // Debug::log($element->innertext); 226 227 return $element->innertext; 228 } 229 230 private function extractNewsTitle($article) { 231 $element = $article->find('h3', 0) 232 or returnServerError('Title not found!'); 233 234 return $element->plaintext; 235 } 236 237 private function extractFullArticleContent($article) { 238 $element = $article->find('div.article_body', 0) 239 or returnServerError('Article body not found!'); 240 241 // Remove teaser image 242 $element->find('img.teaser-img', 0)->outertext = ''; 243 244 // Remove self advertisements 245 foreach($element->find('.call-action') as $adv) { 246 $adv->outertext = ''; 247 } 248 249 // Remove tips 250 foreach($element->find('.panel-edu') as $tip) { 251 $tip->outertext = ''; 252 } 253 254 // Remove inline scripts (used for i.e. interactive graphs) as they are 255 // rendered as a long series of strings 256 foreach($element->find('script') as $script) { 257 $script->outertext = '[Content removed! Visit site to see full contents!]'; 258 } 259 260 return $element->innertext; 261 } 262 263 private function extractFullArticleAuthor($article) { 264 $element = $article->find('span[itemprop=name]', 0) 265 or returnServerError('Author not found!'); 266 267 return $element->plaintext; 268 } 269 270 #endregion 271 272 #region Profile 273 274 private function collectProfile($html) { 275 $item = array(); 276 277 $item['uri'] = $this->getURI(); 278 $item['timestamp'] = $this->extractProfileDate($html); 279 $item['title'] = $this->extractProfiletitle($html); 280 $item['author'] = $this->extractProfileAuthor($html); 281 $item['content'] = $this->extractProfileContent($html); 282 283 $this->items[] = $item; 284 } 285 286 private function extractProfileDate($html) { 287 $element = $html->find('div.infobox div.vallabel', 0) 288 or returnServerError('Date not found!'); 289 290 // Debug::log($element->plaintext); 291 292 $date = trim(explode("\r\n", $element->plaintext)[1]); 293 294 return $this->fixDate($date); 295 } 296 297 private function extractProfileTitle($html) { 298 $element = $html->find('span.h1', 0) 299 or returnServerError('Title not found!'); 300 301 return $element->plaintext; 302 } 303 304 private function extractProfileContent($html) { 305 // There are a few thins we are interested: 306 // - Investment Strategy 307 // - Description 308 // - Quote 309 310 $strategy = $html->find('div.tab-container div.col-sm-6 p', 0) 311 or returnServerError('Investment Strategy not found!'); 312 313 // Description requires a bit of cleanup due to lack of propper identification 314 315 $description = $html->find('div.headline', 5) 316 or returnServerError('Description container not found!'); 317 318 $description = $description->parent(); 319 320 foreach($description->find('div') as $div) { 321 $div->outertext = ''; 322 } 323 324 $quote = $html->find('div.infobox div.val', 0) 325 or returnServerError('Quote not found!'); 326 327 $quote_html = '<strong>Quote</strong><br><p>' . $quote . '</p>'; 328 $strategy_html = ''; 329 $description_html = ''; 330 331 if($this->getInput('strategy') === true) { 332 $strategy_html = '<strong>Strategy</strong><br><p>' . $strategy . '</p><br>'; 333 } 334 335 if($this->getInput('description') === true) { 336 $description_html = '<strong>Description</strong><br><p>' . $description . '</p><br>'; 337 } 338 339 return $strategy_html . $description_html . $quote_html; 340 } 341 342 private function extractProfileAuthor($html) { 343 // Use ISIN + WKN as author 344 // Notice: "identfier" is not a typo [sic]! 345 $element = $html->find('span.identfier', 0) 346 or returnServerError('Author not found!'); 347 348 return $element->plaintext; 349 } 350 351 #endregion 352} 353