1<?php
2class JustETFBridge extends BridgeAbstract {
3	const NAME = 'justETF Bridge';
4	const URI = 'https://www.justetf.com';
5	const DESCRIPTION = 'Currently only supports the news feed';
6	const MAINTAINER = 'logmanoriginal';
7	const PARAMETERS = array(
8		'News' => array(
9			'full' => array(
10				'name' => 'Full Article',
11				'type' => 'checkbox',
12				'title' => 'Enable to load full articles'
13			)
14		),
15		'Profile' => array(
16			'isin' => array(
17				'name' => 'ISIN',
18				'type' => 'text',
19				'required' => true,
20				'pattern' => '[a-zA-Z]{2}[a-zA-Z0-9]{10}',
21				'title' => 'ISIN, consisting of 2-letter country code, 9-character identifier, check character'
22			),
23			'strategy' => array(
24				'name' => 'Include Strategy',
25				'type' => 'checkbox',
26				'defaultValue' => 'checked'
27			),
28			'description' => array(
29				'name' => 'Include Description',
30				'type' => 'checkbox',
31				'defaultValue' => 'checked'
32			)
33		),
34		'global' => array(
35			'lang' => array(
36				'name' => 'Language',
37				'type' => 'list',
38				'values' => array(
39					'Englisch' => 'en',
40					'Deutsch'  => 'de',
41					'Italiano' => 'it'
42				),
43				'defaultValue' => 'Englisch'
44			)
45		)
46	);
47
48	public function collectData() {
49		$html = getSimpleHTMLDOM($this->getURI())
50			or returnServerError('Failed loading contents from ' . $this->getURI());
51
52		defaultLinkTo($html, static::URI);
53
54		switch($this->queriedContext) {
55			case 'News':
56				$this->collectNews($html);
57				break;
58			case 'Profile':
59				$this->collectProfile($html);
60				break;
61		}
62	}
63
64	public function getURI() {
65		$uri = static::URI;
66
67		if($this->getInput('lang')) {
68			$uri .= '/' . $this->getInput('lang');
69		}
70
71		switch($this->queriedContext) {
72			case 'News':
73				$uri .= '/news';
74				break;
75			case 'Profile':
76				$uri .= '/etf-profile.html?' . http_build_query(array(
77					'isin' => strtoupper($this->getInput('isin'))
78				));
79				break;
80		}
81
82		return $uri;
83	}
84
85	public function getName() {
86		$name = static::NAME;
87
88		$name .= ($this->queriedContext) ? ' - ' . $this->queriedContext : '';
89
90		switch($this->queriedContext) {
91			case 'News': break;
92			case 'Profile':
93				if($this->getInput('isin')) {
94					$name .= ' ISIN ' . strtoupper($this->getInput('isin'));
95				}
96		}
97
98		if($this->getInput('lang')) {
99			$name .= ' (' . strtoupper($this->getInput('lang')) . ')';
100		}
101
102		return $name;
103	}
104
105	#region Common
106
107	/**
108	 * Fixes dates depending on the choosen language:
109	 *
110	 * de : dd.mm.yy
111	 * en : dd.mm.yy
112	 * it : dd/mm/yy
113	 *
114	 * Basically strtotime doesn't convert dates correctly due to formats
115	 * being hard to interpret. So we use the DateTime object, manually
116	 * fixing dates and times (set to 00:00:00.000).
117	 *
118	 * We don't know the timezone, so just assume +00:00 (or whatever
119	 * DateTime chooses)
120	 */
121	private function fixDate($date) {
122		switch($this->getInput('lang')) {
123			case 'en':
124			case 'de':
125				$df = date_create_from_format('d.m.y', $date);
126				break;
127			case 'it':
128				$df = date_create_from_format('d/m/y', $date);
129				break;
130		}
131
132		date_time_set($df, 0, 0);
133
134		// Debug::log(date_format($df, 'U'));
135
136		return date_format($df, 'U');
137	}
138
139	private function extractImages($article) {
140		// Notice: We can have zero or more images (though it should mostly be 1)
141		$elements = $article->find('img');
142
143		$images = array();
144
145		foreach($elements as $img) {
146			// Skip the logo (mostly provided part of a hidden div)
147			if(substr($img->src, strrpos($img->src, '/') + 1) === 'logo.png')
148				continue;
149
150			$images[] = $img->src;
151		}
152
153		return $images;
154	}
155
156	#endregion
157
158	#region News
159
160	private function collectNews($html) {
161		$articles = $html->find('div.newsTopArticle')
162			or returnServerError('No articles found! Layout might have changed!');
163
164		foreach($articles as $article) {
165
166			$item = array();
167
168			// Common data
169
170			$item['uri'] = $this->extractNewsUri($article);
171			$item['timestamp'] = $this->extractNewsDate($article);
172			$item['title'] = $this->extractNewsTitle($article);
173
174			if($this->getInput('full')) {
175
176				$uri = $this->extractNewsUri($article);
177
178				$html = getSimpleHTMLDOMCached($uri)
179					or returnServerError('Failed loading full article from ' . $uri);
180
181				$fullArticle = $html->find('div.article', 0)
182					or returnServerError('No content found! Layout might have changed!');
183
184				defaultLinkTo($fullArticle, static::URI);
185
186				$item['author'] = $this->extractFullArticleAuthor($fullArticle);
187				$item['content'] = $this->extractFullArticleContent($fullArticle);
188				$item['enclosures'] = $this->extractImages($fullArticle);
189
190			} else {
191
192				$item['content'] = $this->extractNewsDescription($article);
193				$item['enclosures'] = $this->extractImages($article);
194
195			}
196
197			$this->items[] = $item;
198		}
199	}
200
201	private function extractNewsUri($article) {
202		$element = $article->find('a', 0)
203			or returnServerError('Anchor not found!');
204
205		return $element->href;
206	}
207
208	private function extractNewsDate($article) {
209		$element = $article->find('div.subheadline', 0)
210			or returnServerError('Date not found!');
211
212		// Debug::log($element->plaintext);
213
214		$date = trim(explode('|', $element->plaintext)[0]);
215
216		return $this->fixDate($date);
217	}
218
219	private function extractNewsDescription($article) {
220		$element = $article->find('span.newsText', 0)
221			or returnServerError('Description not found!');
222
223		$element->find('a', 0)->onclick = '';
224
225		// Debug::log($element->innertext);
226
227		return $element->innertext;
228	}
229
230	private function extractNewsTitle($article) {
231		$element = $article->find('h3', 0)
232			or returnServerError('Title not found!');
233
234		return $element->plaintext;
235	}
236
237	private function extractFullArticleContent($article) {
238		$element = $article->find('div.article_body', 0)
239			or returnServerError('Article body not found!');
240
241		// Remove teaser image
242		$element->find('img.teaser-img', 0)->outertext = '';
243
244		// Remove self advertisements
245		foreach($element->find('.call-action') as $adv) {
246			$adv->outertext = '';
247		}
248
249		// Remove tips
250		foreach($element->find('.panel-edu') as $tip) {
251			$tip->outertext = '';
252		}
253
254		// Remove inline scripts (used for i.e. interactive graphs) as they are
255		// rendered as a long series of strings
256		foreach($element->find('script') as $script) {
257			$script->outertext = '[Content removed! Visit site to see full contents!]';
258		}
259
260		return $element->innertext;
261	}
262
263	private function extractFullArticleAuthor($article) {
264		$element = $article->find('span[itemprop=name]', 0)
265			or returnServerError('Author not found!');
266
267		return $element->plaintext;
268	}
269
270	#endregion
271
272	#region Profile
273
274	private function collectProfile($html) {
275		$item = array();
276
277		$item['uri'] = $this->getURI();
278		$item['timestamp'] = $this->extractProfileDate($html);
279		$item['title'] = $this->extractProfiletitle($html);
280		$item['author'] = $this->extractProfileAuthor($html);
281		$item['content'] = $this->extractProfileContent($html);
282
283		$this->items[] = $item;
284	}
285
286	private function extractProfileDate($html) {
287		$element = $html->find('div.infobox div.vallabel', 0)
288			or returnServerError('Date not found!');
289
290		// Debug::log($element->plaintext);
291
292		$date = trim(explode("\r\n", $element->plaintext)[1]);
293
294		return $this->fixDate($date);
295	}
296
297	private function extractProfileTitle($html) {
298		$element = $html->find('span.h1', 0)
299			or returnServerError('Title not found!');
300
301		return $element->plaintext;
302	}
303
304	private function extractProfileContent($html) {
305		// There are a few thins we are interested:
306		// - Investment Strategy
307		// - Description
308		// - Quote
309
310		$strategy = $html->find('div.tab-container div.col-sm-6 p', 0)
311			or returnServerError('Investment Strategy not found!');
312
313		// Description requires a bit of cleanup due to lack of propper identification
314
315		$description = $html->find('div.headline', 5)
316			or returnServerError('Description container not found!');
317
318		$description = $description->parent();
319
320		foreach($description->find('div') as $div) {
321			$div->outertext = '';
322		}
323
324		$quote = $html->find('div.infobox div.val', 0)
325			or returnServerError('Quote not found!');
326
327		$quote_html = '<strong>Quote</strong><br><p>' . $quote . '</p>';
328		$strategy_html = '';
329		$description_html = '';
330
331		if($this->getInput('strategy') === true) {
332			$strategy_html = '<strong>Strategy</strong><br><p>' . $strategy . '</p><br>';
333		}
334
335		if($this->getInput('description') === true) {
336			$description_html = '<strong>Description</strong><br><p>' . $description . '</p><br>';
337		}
338
339		return $strategy_html . $description_html . $quote_html;
340	}
341
342	private function extractProfileAuthor($html) {
343		// Use ISIN + WKN as author
344		// Notice: "identfier" is not a typo [sic]!
345		$element = $html->find('span.identfier', 0)
346			or returnServerError('Author not found!');
347
348		return $element->plaintext;
349	}
350
351	#endregion
352}
353