1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Yahoo! Finance market data downloader (+fix for Pandas Datareader)
5# https://github.com/ranaroussi/yfinance
6#
7# Copyright 2017-2019 Ran Aroussi
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13#     http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22from __future__ import print_function
23
24import time as _time
25import datetime as _datetime
26import requests as _requests
27import pandas as _pd
28import numpy as _np
29import re as _re
30
31try:
32    from urllib.parse import quote as urlencode
33except ImportError:
34    from urllib import quote as urlencode
35
36from . import utils
37
38# import json as _json
39# import re as _re
40# import sys as _sys
41
42from . import shared
43
44_BASE_URL_ = 'https://query2.finance.yahoo.com'
45_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
46
47class TickerBase():
48    def __init__(self, ticker, session=None):
49        self.ticker = ticker.upper()
50        self.session = session
51        self._history = None
52        self._base_url = _BASE_URL_
53        self._scrape_url = _SCRAPE_URL_
54
55        self._fundamentals = False
56        self._info = None
57        self._analysis = None
58        self._sustainability = None
59        self._recommendations = None
60        self._major_holders = None
61        self._institutional_holders = None
62        self._mutualfund_holders = None
63        self._isin = None
64        self._news = []
65
66        self._calendar = None
67        self._expirations = {}
68
69        self._earnings = {
70            "yearly": utils.empty_df(),
71            "quarterly": utils.empty_df()}
72        self._financials = {
73            "yearly": utils.empty_df(),
74            "quarterly": utils.empty_df()}
75        self._balancesheet = {
76            "yearly": utils.empty_df(),
77            "quarterly": utils.empty_df()}
78        self._cashflow = {
79            "yearly": utils.empty_df(),
80            "quarterly": utils.empty_df()}
81
82        # accept isin as ticker
83        if utils.is_isin(self.ticker):
84            self.ticker = utils.get_ticker_by_isin(self.ticker, None, session)
85
86    def stats(self, proxy=None):
87        # setup proxy in requests format
88        if proxy is not None:
89            if isinstance(proxy, dict) and "https" in proxy:
90                proxy = proxy["https"]
91            proxy = {"https": proxy}
92
93        if self._fundamentals:
94            return
95
96        ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
97
98        # get info and sustainability
99        data = utils.get_json(ticker_url, proxy, self.session)
100        return data
101
102    def history(self, period="1mo", interval="1d",
103                start=None, end=None, prepost=False, actions=True,
104                auto_adjust=True, back_adjust=False,
105                proxy=None, rounding=False, tz=None, timeout=None, **kwargs):
106        """
107        :Parameters:
108            period : str
109                Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
110                Either Use period parameter or use start and end
111            interval : str
112                Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
113                Intraday data cannot extend last 60 days
114            start: str
115                Download start date string (YYYY-MM-DD) or _datetime.
116                Default is 1900-01-01
117            end: str
118                Download end date string (YYYY-MM-DD) or _datetime.
119                Default is now
120            prepost : bool
121                Include Pre and Post market data in results?
122                Default is False
123            auto_adjust: bool
124                Adjust all OHLC automatically? Default is True
125            back_adjust: bool
126                Back-adjusted data to mimic true historical prices
127            proxy: str
128                Optional. Proxy server URL scheme. Default is None
129            rounding: bool
130                Round values to 2 decimal places?
131                Optional. Default is False = precision suggested by Yahoo!
132            tz: str
133                Optional timezone locale for dates.
134                (default data is returned as non-localized dates)
135            timeout: None or float
136                If not None stops waiting for a response after given number of
137                seconds. (Can also be a fraction of a second e.g. 0.01)
138                Default is None.
139            **kwargs: dict
140                debug: bool
141                    Optional. If passed as False, will suppress
142                    error message printing to console.
143        """
144
145        if start or period is None or period.lower() == "max":
146            if start is None:
147                start = -631159200
148            elif isinstance(start, _datetime.datetime):
149                start = int(_time.mktime(start.timetuple()))
150            else:
151                start = int(_time.mktime(
152                    _time.strptime(str(start), '%Y-%m-%d')))
153            if end is None:
154                end = int(_time.time())
155            elif isinstance(end, _datetime.datetime):
156                end = int(_time.mktime(end.timetuple()))
157            else:
158                end = int(_time.mktime(_time.strptime(str(end), '%Y-%m-%d')))
159
160            params = {"period1": start, "period2": end}
161        else:
162            period = period.lower()
163            params = {"range": period}
164
165        params["interval"] = interval.lower()
166        params["includePrePost"] = prepost
167        params["events"] = "div,splits"
168
169        # 1) fix weired bug with Yahoo! - returning 60m for 30m bars
170        if params["interval"] == "30m":
171            params["interval"] = "15m"
172
173        # setup proxy in requests format
174        if proxy is not None:
175            if isinstance(proxy, dict) and "https" in proxy:
176                proxy = proxy["https"]
177            proxy = {"https": proxy}
178
179        # Getting data from json
180        url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
181
182        session = self.session or _requests
183        data = session.get(
184            url=url,
185            params=params,
186            proxies=proxy,
187            headers=utils.user_agent_headers,
188            timeout=timeout
189        )
190        if "Will be right back" in data.text:
191            raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
192                               "Our engineers are working quickly to resolve "
193                               "the issue. Thank you for your patience.")
194        data = data.json()
195
196        # Work with errors
197        debug_mode = True
198        if "debug" in kwargs and isinstance(kwargs["debug"], bool):
199            debug_mode = kwargs["debug"]
200
201        err_msg = "No data found for this date range, symbol may be delisted"
202        if "chart" in data and data["chart"]["error"]:
203            err_msg = data["chart"]["error"]["description"]
204            shared._DFS[self.ticker] = utils.empty_df()
205            shared._ERRORS[self.ticker] = err_msg
206            if "many" not in kwargs and debug_mode:
207                print('- %s: %s' % (self.ticker, err_msg))
208            return shared._DFS[self.ticker]
209
210        elif "chart" not in data or data["chart"]["result"] is None or \
211                not data["chart"]["result"]:
212            shared._DFS[self.ticker] = utils.empty_df()
213            shared._ERRORS[self.ticker] = err_msg
214            if "many" not in kwargs and debug_mode:
215                print('- %s: %s' % (self.ticker, err_msg))
216            return shared._DFS[self.ticker]
217
218        # parse quotes
219        try:
220            quotes = utils.parse_quotes(data["chart"]["result"][0], tz)
221        except Exception:
222            shared._DFS[self.ticker] = utils.empty_df()
223            shared._ERRORS[self.ticker] = err_msg
224            if "many" not in kwargs and debug_mode:
225                print('- %s: %s' % (self.ticker, err_msg))
226            return shared._DFS[self.ticker]
227
228        # 2) fix weired bug with Yahoo! - returning 60m for 30m bars
229        if interval.lower() == "30m":
230            quotes2 = quotes.resample('30T')
231            quotes = _pd.DataFrame(index=quotes2.last().index, data={
232                'Open': quotes2['Open'].first(),
233                'High': quotes2['High'].max(),
234                'Low': quotes2['Low'].min(),
235                'Close': quotes2['Close'].last(),
236                'Adj Close': quotes2['Adj Close'].last(),
237                'Volume': quotes2['Volume'].sum()
238            })
239            try:
240                quotes['Dividends'] = quotes2['Dividends'].max()
241            except Exception:
242                pass
243            try:
244                quotes['Stock Splits'] = quotes2['Dividends'].max()
245            except Exception:
246                pass
247
248        try:
249            if auto_adjust:
250                quotes = utils.auto_adjust(quotes)
251            elif back_adjust:
252                quotes = utils.back_adjust(quotes)
253        except Exception as e:
254            if auto_adjust:
255                err_msg = "auto_adjust failed with %s" % e
256            else:
257                err_msg = "back_adjust failed with %s" % e
258            shared._DFS[self.ticker] = utils.empty_df()
259            shared._ERRORS[self.ticker] = err_msg
260            if "many" not in kwargs and debug_mode:
261                print('- %s: %s' % (self.ticker, err_msg))
262
263        if rounding:
264            quotes = _np.round(quotes, data[
265                "chart"]["result"][0]["meta"]["priceHint"])
266        quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64)
267
268        quotes.dropna(inplace=True)
269
270        # actions
271        dividends, splits = utils.parse_actions(data["chart"]["result"][0], tz)
272
273        # combine
274        df = _pd.concat([quotes, dividends, splits], axis=1, sort=True)
275        df["Dividends"].fillna(0, inplace=True)
276        df["Stock Splits"].fillna(0, inplace=True)
277
278        # index eod/intraday
279        df.index = df.index.tz_localize("UTC").tz_convert(
280            data["chart"]["result"][0]["meta"]["exchangeTimezoneName"])
281
282        if params["interval"][-1] == "m":
283            df.index.name = "Datetime"
284        elif params["interval"] == "1h":
285            pass
286        else:
287            df.index = _pd.to_datetime(df.index.date)
288            if tz is not None:
289                df.index = df.index.tz_localize(tz)
290            df.index.name = "Date"
291
292        # duplicates and missing rows cleanup
293        df.dropna(how='all', inplace=True)
294        df = df[~df.index.duplicated(keep='first')]
295
296        self._history = df.copy()
297
298        if not actions:
299            df.drop(columns=["Dividends", "Stock Splits"], inplace=True)
300
301        return df
302
303    # ------------------------
304
305    def _get_fundamentals(self, kind=None, proxy=None):
306        def cleanup(data):
307            df = _pd.DataFrame(data).drop(columns=['maxAge'])
308            for col in df.columns:
309                df[col] = _np.where(
310                    df[col].astype(str) == '-', _np.nan, df[col])
311
312            df.set_index('endDate', inplace=True)
313            try:
314                df.index = _pd.to_datetime(df.index, unit='s')
315            except ValueError:
316                df.index = _pd.to_datetime(df.index)
317            df = df.T
318            df.columns.name = ''
319            df.index.name = 'Breakdown'
320
321            df.index = utils.camel2title(df.index)
322            return df
323
324        # setup proxy in requests format
325        if proxy is not None:
326            if isinstance(proxy, dict) and "https" in proxy:
327                proxy = proxy["https"]
328            proxy = {"https": proxy}
329
330        if self._fundamentals:
331            return
332
333        ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
334
335        # get info and sustainability
336        data = utils.get_json(ticker_url, proxy, self.session)
337
338        # holders
339        try:
340            resp = utils.get_html(ticker_url + '/holders', proxy, self.session)
341            holders = _pd.read_html(resp)
342        except Exception:
343            holders = []
344
345        if len(holders) >= 3:
346            self._major_holders = holders[0]
347            self._institutional_holders = holders[1]
348            self._mutualfund_holders = holders[2]
349        elif len(holders) >= 2:
350            self._major_holders = holders[0]
351            self._institutional_holders = holders[1]
352        elif len(holders) >= 1:
353            self._major_holders = holders[0]
354
355        # self._major_holders = holders[0]
356        # self._institutional_holders = holders[1]
357
358        if self._institutional_holders is not None:
359            if 'Date Reported' in self._institutional_holders:
360                self._institutional_holders['Date Reported'] = _pd.to_datetime(
361                    self._institutional_holders['Date Reported'])
362            if '% Out' in self._institutional_holders:
363                self._institutional_holders['% Out'] = self._institutional_holders[
364                    '% Out'].str.replace('%', '').astype(float) / 100
365
366        if self._mutualfund_holders is not None:
367            if 'Date Reported' in self._mutualfund_holders:
368                self._mutualfund_holders['Date Reported'] = _pd.to_datetime(
369                    self._mutualfund_holders['Date Reported'])
370            if '% Out' in self._mutualfund_holders:
371                self._mutualfund_holders['% Out'] = self._mutualfund_holders[
372                    '% Out'].str.replace('%', '').astype(float) / 100
373
374        # sustainability
375        d = {}
376        try:
377            if isinstance(data.get('esgScores'), dict):
378                for item in data['esgScores']:
379                    if not isinstance(data['esgScores'][item], (dict, list)):
380                        d[item] = data['esgScores'][item]
381
382                s = _pd.DataFrame(index=[0], data=d)[-1:].T
383                s.columns = ['Value']
384                s.index.name = '%.f-%.f' % (
385                    s[s.index == 'ratingYear']['Value'].values[0],
386                    s[s.index == 'ratingMonth']['Value'].values[0])
387
388                self._sustainability = s[~s.index.isin(
389                    ['maxAge', 'ratingYear', 'ratingMonth'])]
390        except Exception:
391            pass
392
393        # info (be nice to python 2)
394        self._info = {}
395        try:
396            items = ['summaryProfile', 'financialData', 'quoteType',
397                     'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
398            for item in items:
399                if isinstance(data.get(item), dict):
400                    self._info.update(data[item])
401        except Exception:
402            pass
403
404       # For ETFs, provide this valuable data: the top holdings of the ETF
405        try:
406            if 'topHoldings' in data:
407                self._info.update(data['topHoldings'])
408        except Exception:
409            pass
410
411        try:
412            if not isinstance(data.get('summaryDetail'), dict):
413                # For some reason summaryDetail did not give any results. The price dict usually has most of the same info
414                self._info.update(data.get('price', {}))
415        except Exception:
416            pass
417
418        try:
419            # self._info['regularMarketPrice'] = self._info['regularMarketOpen']
420            self._info['regularMarketPrice'] = data.get('price', {}).get(
421                'regularMarketPrice', self._info.get('regularMarketOpen', None))
422        except Exception:
423            pass
424
425        try:
426            self._info['preMarketPrice'] = data.get('price', {}).get(
427                'preMarketPrice', self._info.get('preMarketPrice', None))
428        except Exception:
429            pass
430
431        self._info['logo_url'] = ""
432        try:
433            domain = self._info['website'].split(
434                '://')[1].split('/')[0].replace('www.', '')
435            self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
436        except Exception:
437            pass
438
439        # events
440        try:
441            cal = _pd.DataFrame(
442                data['calendarEvents']['earnings'])
443            cal['earningsDate'] = _pd.to_datetime(
444                cal['earningsDate'], unit='s')
445            self._calendar = cal.T
446            self._calendar.index = utils.camel2title(self._calendar.index)
447            self._calendar.columns = ['Value']
448        except Exception:
449            pass
450
451        # analyst recommendations
452        try:
453            rec = _pd.DataFrame(
454                data['upgradeDowngradeHistory']['history'])
455            rec['earningsDate'] = _pd.to_datetime(
456                rec['epochGradeDate'], unit='s')
457            rec.set_index('earningsDate', inplace=True)
458            rec.index.name = 'Date'
459            rec.columns = utils.camel2title(rec.columns)
460            self._recommendations = rec[[
461                'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
462        except Exception:
463            pass
464
465        # get fundamentals
466        data = utils.get_json(ticker_url + '/financials', proxy, self.session)
467
468        # generic patterns
469        for key in (
470            (self._cashflow, 'cashflowStatement', 'cashflowStatements'),
471            (self._balancesheet, 'balanceSheet', 'balanceSheetStatements'),
472            (self._financials, 'incomeStatement', 'incomeStatementHistory')
473        ):
474            item = key[1] + 'History'
475            if isinstance(data.get(item), dict):
476                try:
477                    key[0]['yearly'] = cleanup(data[item][key[2]])
478                except Exception:
479                    pass
480
481            item = key[1] + 'HistoryQuarterly'
482            if isinstance(data.get(item), dict):
483                try:
484                    key[0]['quarterly'] = cleanup(data[item][key[2]])
485                except Exception:
486                    pass
487
488        # earnings
489        if isinstance(data.get('earnings'), dict):
490            try:
491                earnings = data['earnings']['financialsChart']
492                earnings['financialCurrency'] = 'USD' if 'financialCurrency' not in data['earnings'] else data['earnings']['financialCurrency']
493                self._earnings['financialCurrency'] = earnings['financialCurrency']
494                df = _pd.DataFrame(earnings['yearly']).set_index('date')
495                df.columns = utils.camel2title(df.columns)
496                df.index.name = 'Year'
497                self._earnings['yearly'] = df
498
499                df = _pd.DataFrame(earnings['quarterly']).set_index('date')
500                df.columns = utils.camel2title(df.columns)
501                df.index.name = 'Quarter'
502                self._earnings['quarterly'] = df
503            except Exception:
504                pass
505
506        # Analysis
507        data = utils.get_json(ticker_url + '/analysis', proxy, self.session)
508
509        if isinstance(data.get('earningsTrend'), dict):
510            try:
511                analysis = _pd.DataFrame(data['earningsTrend']['trend'])
512                analysis['endDate'] = _pd.to_datetime(analysis['endDate'])
513                analysis.set_index('period', inplace=True)
514                analysis.index = analysis.index.str.upper()
515                analysis.index.name = 'Period'
516                analysis.columns = utils.camel2title(analysis.columns)
517
518                dict_cols = []
519
520                for idx, row in analysis.iterrows():
521                    for colname, colval in row.items():
522                        if isinstance(colval, dict):
523                            dict_cols.append(colname)
524                            for k, v in colval.items():
525                                new_colname = colname + ' ' + utils.camel2title([k])[0]
526                                analysis.loc[idx, new_colname] = v
527
528                self._analysis = analysis[[c for c in analysis.columns if c not in dict_cols]]
529            except Exception:
530                pass
531
532        self._fundamentals = True
533
534    def get_recommendations(self, proxy=None, as_dict=False, *args, **kwargs):
535        self._get_fundamentals(proxy=proxy)
536        data = self._recommendations
537        if as_dict:
538            return data.to_dict()
539        return data
540
541    def get_calendar(self, proxy=None, as_dict=False, *args, **kwargs):
542        self._get_fundamentals(proxy=proxy)
543        data = self._calendar
544        if as_dict:
545            return data.to_dict()
546        return data
547
548    def get_major_holders(self, proxy=None, as_dict=False, *args, **kwargs):
549        self._get_fundamentals(proxy=proxy)
550        data = self._major_holders
551        if as_dict:
552            return data.to_dict()
553        return data
554
555    def get_institutional_holders(self, proxy=None, as_dict=False, *args, **kwargs):
556        self._get_fundamentals(proxy=proxy)
557        data = self._institutional_holders
558        if data is not None:
559            if as_dict:
560                return data.to_dict()
561            return data
562
563    def get_mutualfund_holders(self, proxy=None, as_dict=False, *args, **kwargs):
564        self._get_fundamentals(proxy=proxy)
565        data = self._mutualfund_holders
566        if data is not None:
567            if as_dict:
568                return data.to_dict()
569            return data
570
571    def get_info(self, proxy=None, as_dict=False, *args, **kwargs):
572        self._get_fundamentals(proxy=proxy)
573        data = self._info
574        if as_dict:
575            return data.to_dict()
576        return data
577
578    def get_sustainability(self, proxy=None, as_dict=False, *args, **kwargs):
579        self._get_fundamentals(proxy=proxy)
580        data = self._sustainability
581        if as_dict:
582            return data.to_dict()
583        return data
584
585    def get_earnings(self, proxy=None, as_dict=False, freq="yearly"):
586        self._get_fundamentals(proxy=proxy)
587        data = self._earnings[freq]
588        if as_dict:
589            dict_data = data.to_dict()
590            dict_data['financialCurrency'] = 'USD' if 'financialCurrency' not in self._earnings else self._earnings['financialCurrency']
591            return dict_data
592        return data
593
594    def get_analysis(self, proxy=None, as_dict=False, *args, **kwargs):
595        self._get_fundamentals(proxy=proxy)
596        data = self._analysis
597        if as_dict:
598            return data.to_dict()
599        return data
600
601    def get_financials(self, proxy=None, as_dict=False, freq="yearly"):
602        self._get_fundamentals(proxy=proxy)
603        data = self._financials[freq]
604        if as_dict:
605            return data.to_dict()
606        return data
607
608    def get_balancesheet(self, proxy=None, as_dict=False, freq="yearly"):
609        self._get_fundamentals(proxy=proxy)
610        data = self._balancesheet[freq]
611        if as_dict:
612            return data.to_dict()
613        return data
614
615    def get_balance_sheet(self, proxy=None, as_dict=False, freq="yearly"):
616        return self.get_balancesheet(proxy, as_dict, freq)
617
618    def get_cashflow(self, proxy=None, as_dict=False, freq="yearly"):
619        self._get_fundamentals(proxy=proxy)
620        data = self._cashflow[freq]
621        if as_dict:
622            return data.to_dict()
623        return data
624
625    def get_dividends(self, proxy=None):
626        if self._history is None:
627            self.history(period="max", proxy=proxy)
628        if self._history is not None and "Dividends" in self._history:
629            dividends = self._history["Dividends"]
630            return dividends[dividends != 0]
631        return []
632
633    def get_splits(self, proxy=None):
634        if self._history is None:
635            self.history(period="max", proxy=proxy)
636        if self._history is not None and "Stock Splits" in self._history:
637            splits = self._history["Stock Splits"]
638            return splits[splits != 0]
639        return []
640
641    def get_actions(self, proxy=None):
642        if self._history is None:
643            self.history(period="max", proxy=proxy)
644        if self._history is not None and "Dividends" in self._history and "Stock Splits" in self._history:
645            actions = self._history[["Dividends", "Stock Splits"]]
646            return actions[actions != 0].dropna(how='all').fillna(0)
647        return []
648
649    def get_isin(self, proxy=None):
650        # *** experimental ***
651        if self._isin is not None:
652            return self._isin
653
654        ticker = self.ticker.upper()
655
656        if "-" in ticker or "^" in ticker:
657            self._isin = '-'
658            return self._isin
659
660        # setup proxy in requests format
661        if proxy is not None:
662            if isinstance(proxy, dict) and "https" in proxy:
663                proxy = proxy["https"]
664            proxy = {"https": proxy}
665
666        q = ticker
667        self.get_info(proxy=proxy)
668        if "shortName" in self._info:
669            q = self._info['shortName']
670
671        url = 'https://markets.businessinsider.com/ajax/' \
672              'SearchController_Suggest?max_results=25&query=%s' \
673            % urlencode(q)
674        session = self.session or _requests
675        data = session.get(
676            url=url,
677            proxies=proxy,
678            headers=utils.user_agent_headers
679        ).text
680
681        search_str = '"{}|'.format(ticker)
682        if search_str not in data:
683            if q.lower() in data.lower():
684                search_str = '"|'
685                if search_str not in data:
686                    self._isin = '-'
687                    return self._isin
688            else:
689                self._isin = '-'
690                return self._isin
691
692        self._isin = data.split(search_str)[1].split('"')[0].split('|')[0]
693        return self._isin
694
695    def get_news(self, proxy=None):
696        if self._news:
697            return self._news
698
699        # setup proxy in requests format
700        if proxy is not None:
701            if isinstance(proxy, dict) and "https" in proxy:
702                proxy = proxy["https"]
703            proxy = {"https": proxy}
704
705        # Getting data from json
706        url = "{}/v1/finance/search?q={}".format(self._base_url, self.ticker)
707        session = self.session or _requests
708        data = session.get(
709            url=url,
710            proxies=proxy,
711            headers=utils.user_agent_headers
712        )
713        if "Will be right back" in data.text:
714            raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
715                               "Our engineers are working quickly to resolve "
716                               "the issue. Thank you for your patience.")
717        data = data.json()
718
719        # parse news
720        self._news = data.get("news", [])
721        return self._news
722