1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3# 4# Yahoo! Finance market data downloader (+fix for Pandas Datareader) 5# https://github.com/ranaroussi/yfinance 6# 7# Copyright 2017-2019 Ran Aroussi 8# 9# Licensed under the Apache License, Version 2.0 (the "License"); 10# you may not use this file except in compliance with the License. 11# You may obtain a copy of the License at 12# 13# http://www.apache.org/licenses/LICENSE-2.0 14# 15# Unless required by applicable law or agreed to in writing, software 16# distributed under the License is distributed on an "AS IS" BASIS, 17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18# See the License for the specific language governing permissions and 19# limitations under the License. 20# 21 22from __future__ import print_function 23 24import time as _time 25import datetime as _datetime 26import requests as _requests 27import pandas as _pd 28import numpy as _np 29import re as _re 30 31try: 32 from urllib.parse import quote as urlencode 33except ImportError: 34 from urllib import quote as urlencode 35 36from . import utils 37 38# import json as _json 39# import re as _re 40# import sys as _sys 41 42from . import shared 43 44_BASE_URL_ = 'https://query2.finance.yahoo.com' 45_SCRAPE_URL_ = 'https://finance.yahoo.com/quote' 46 47class TickerBase(): 48 def __init__(self, ticker, session=None): 49 self.ticker = ticker.upper() 50 self.session = session 51 self._history = None 52 self._base_url = _BASE_URL_ 53 self._scrape_url = _SCRAPE_URL_ 54 55 self._fundamentals = False 56 self._info = None 57 self._analysis = None 58 self._sustainability = None 59 self._recommendations = None 60 self._major_holders = None 61 self._institutional_holders = None 62 self._mutualfund_holders = None 63 self._isin = None 64 self._news = [] 65 66 self._calendar = None 67 self._expirations = {} 68 69 self._earnings = { 70 "yearly": utils.empty_df(), 71 "quarterly": utils.empty_df()} 72 self._financials = { 73 "yearly": utils.empty_df(), 74 "quarterly": utils.empty_df()} 75 self._balancesheet = { 76 "yearly": utils.empty_df(), 77 "quarterly": utils.empty_df()} 78 self._cashflow = { 79 "yearly": utils.empty_df(), 80 "quarterly": utils.empty_df()} 81 82 # accept isin as ticker 83 if utils.is_isin(self.ticker): 84 self.ticker = utils.get_ticker_by_isin(self.ticker, None, session) 85 86 def stats(self, proxy=None): 87 # setup proxy in requests format 88 if proxy is not None: 89 if isinstance(proxy, dict) and "https" in proxy: 90 proxy = proxy["https"] 91 proxy = {"https": proxy} 92 93 if self._fundamentals: 94 return 95 96 ticker_url = "{}/{}".format(self._scrape_url, self.ticker) 97 98 # get info and sustainability 99 data = utils.get_json(ticker_url, proxy, self.session) 100 return data 101 102 def history(self, period="1mo", interval="1d", 103 start=None, end=None, prepost=False, actions=True, 104 auto_adjust=True, back_adjust=False, 105 proxy=None, rounding=False, tz=None, timeout=None, **kwargs): 106 """ 107 :Parameters: 108 period : str 109 Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max 110 Either Use period parameter or use start and end 111 interval : str 112 Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo 113 Intraday data cannot extend last 60 days 114 start: str 115 Download start date string (YYYY-MM-DD) or _datetime. 116 Default is 1900-01-01 117 end: str 118 Download end date string (YYYY-MM-DD) or _datetime. 119 Default is now 120 prepost : bool 121 Include Pre and Post market data in results? 122 Default is False 123 auto_adjust: bool 124 Adjust all OHLC automatically? Default is True 125 back_adjust: bool 126 Back-adjusted data to mimic true historical prices 127 proxy: str 128 Optional. Proxy server URL scheme. Default is None 129 rounding: bool 130 Round values to 2 decimal places? 131 Optional. Default is False = precision suggested by Yahoo! 132 tz: str 133 Optional timezone locale for dates. 134 (default data is returned as non-localized dates) 135 timeout: None or float 136 If not None stops waiting for a response after given number of 137 seconds. (Can also be a fraction of a second e.g. 0.01) 138 Default is None. 139 **kwargs: dict 140 debug: bool 141 Optional. If passed as False, will suppress 142 error message printing to console. 143 """ 144 145 if start or period is None or period.lower() == "max": 146 if start is None: 147 start = -631159200 148 elif isinstance(start, _datetime.datetime): 149 start = int(_time.mktime(start.timetuple())) 150 else: 151 start = int(_time.mktime( 152 _time.strptime(str(start), '%Y-%m-%d'))) 153 if end is None: 154 end = int(_time.time()) 155 elif isinstance(end, _datetime.datetime): 156 end = int(_time.mktime(end.timetuple())) 157 else: 158 end = int(_time.mktime(_time.strptime(str(end), '%Y-%m-%d'))) 159 160 params = {"period1": start, "period2": end} 161 else: 162 period = period.lower() 163 params = {"range": period} 164 165 params["interval"] = interval.lower() 166 params["includePrePost"] = prepost 167 params["events"] = "div,splits" 168 169 # 1) fix weired bug with Yahoo! - returning 60m for 30m bars 170 if params["interval"] == "30m": 171 params["interval"] = "15m" 172 173 # setup proxy in requests format 174 if proxy is not None: 175 if isinstance(proxy, dict) and "https" in proxy: 176 proxy = proxy["https"] 177 proxy = {"https": proxy} 178 179 # Getting data from json 180 url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker) 181 182 session = self.session or _requests 183 data = session.get( 184 url=url, 185 params=params, 186 proxies=proxy, 187 headers=utils.user_agent_headers, 188 timeout=timeout 189 ) 190 if "Will be right back" in data.text: 191 raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" 192 "Our engineers are working quickly to resolve " 193 "the issue. Thank you for your patience.") 194 data = data.json() 195 196 # Work with errors 197 debug_mode = True 198 if "debug" in kwargs and isinstance(kwargs["debug"], bool): 199 debug_mode = kwargs["debug"] 200 201 err_msg = "No data found for this date range, symbol may be delisted" 202 if "chart" in data and data["chart"]["error"]: 203 err_msg = data["chart"]["error"]["description"] 204 shared._DFS[self.ticker] = utils.empty_df() 205 shared._ERRORS[self.ticker] = err_msg 206 if "many" not in kwargs and debug_mode: 207 print('- %s: %s' % (self.ticker, err_msg)) 208 return shared._DFS[self.ticker] 209 210 elif "chart" not in data or data["chart"]["result"] is None or \ 211 not data["chart"]["result"]: 212 shared._DFS[self.ticker] = utils.empty_df() 213 shared._ERRORS[self.ticker] = err_msg 214 if "many" not in kwargs and debug_mode: 215 print('- %s: %s' % (self.ticker, err_msg)) 216 return shared._DFS[self.ticker] 217 218 # parse quotes 219 try: 220 quotes = utils.parse_quotes(data["chart"]["result"][0], tz) 221 except Exception: 222 shared._DFS[self.ticker] = utils.empty_df() 223 shared._ERRORS[self.ticker] = err_msg 224 if "many" not in kwargs and debug_mode: 225 print('- %s: %s' % (self.ticker, err_msg)) 226 return shared._DFS[self.ticker] 227 228 # 2) fix weired bug with Yahoo! - returning 60m for 30m bars 229 if interval.lower() == "30m": 230 quotes2 = quotes.resample('30T') 231 quotes = _pd.DataFrame(index=quotes2.last().index, data={ 232 'Open': quotes2['Open'].first(), 233 'High': quotes2['High'].max(), 234 'Low': quotes2['Low'].min(), 235 'Close': quotes2['Close'].last(), 236 'Adj Close': quotes2['Adj Close'].last(), 237 'Volume': quotes2['Volume'].sum() 238 }) 239 try: 240 quotes['Dividends'] = quotes2['Dividends'].max() 241 except Exception: 242 pass 243 try: 244 quotes['Stock Splits'] = quotes2['Dividends'].max() 245 except Exception: 246 pass 247 248 try: 249 if auto_adjust: 250 quotes = utils.auto_adjust(quotes) 251 elif back_adjust: 252 quotes = utils.back_adjust(quotes) 253 except Exception as e: 254 if auto_adjust: 255 err_msg = "auto_adjust failed with %s" % e 256 else: 257 err_msg = "back_adjust failed with %s" % e 258 shared._DFS[self.ticker] = utils.empty_df() 259 shared._ERRORS[self.ticker] = err_msg 260 if "many" not in kwargs and debug_mode: 261 print('- %s: %s' % (self.ticker, err_msg)) 262 263 if rounding: 264 quotes = _np.round(quotes, data[ 265 "chart"]["result"][0]["meta"]["priceHint"]) 266 quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64) 267 268 quotes.dropna(inplace=True) 269 270 # actions 271 dividends, splits = utils.parse_actions(data["chart"]["result"][0], tz) 272 273 # combine 274 df = _pd.concat([quotes, dividends, splits], axis=1, sort=True) 275 df["Dividends"].fillna(0, inplace=True) 276 df["Stock Splits"].fillna(0, inplace=True) 277 278 # index eod/intraday 279 df.index = df.index.tz_localize("UTC").tz_convert( 280 data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]) 281 282 if params["interval"][-1] == "m": 283 df.index.name = "Datetime" 284 elif params["interval"] == "1h": 285 pass 286 else: 287 df.index = _pd.to_datetime(df.index.date) 288 if tz is not None: 289 df.index = df.index.tz_localize(tz) 290 df.index.name = "Date" 291 292 # duplicates and missing rows cleanup 293 df.dropna(how='all', inplace=True) 294 df = df[~df.index.duplicated(keep='first')] 295 296 self._history = df.copy() 297 298 if not actions: 299 df.drop(columns=["Dividends", "Stock Splits"], inplace=True) 300 301 return df 302 303 # ------------------------ 304 305 def _get_fundamentals(self, kind=None, proxy=None): 306 def cleanup(data): 307 df = _pd.DataFrame(data).drop(columns=['maxAge']) 308 for col in df.columns: 309 df[col] = _np.where( 310 df[col].astype(str) == '-', _np.nan, df[col]) 311 312 df.set_index('endDate', inplace=True) 313 try: 314 df.index = _pd.to_datetime(df.index, unit='s') 315 except ValueError: 316 df.index = _pd.to_datetime(df.index) 317 df = df.T 318 df.columns.name = '' 319 df.index.name = 'Breakdown' 320 321 df.index = utils.camel2title(df.index) 322 return df 323 324 # setup proxy in requests format 325 if proxy is not None: 326 if isinstance(proxy, dict) and "https" in proxy: 327 proxy = proxy["https"] 328 proxy = {"https": proxy} 329 330 if self._fundamentals: 331 return 332 333 ticker_url = "{}/{}".format(self._scrape_url, self.ticker) 334 335 # get info and sustainability 336 data = utils.get_json(ticker_url, proxy, self.session) 337 338 # holders 339 try: 340 resp = utils.get_html(ticker_url + '/holders', proxy, self.session) 341 holders = _pd.read_html(resp) 342 except Exception: 343 holders = [] 344 345 if len(holders) >= 3: 346 self._major_holders = holders[0] 347 self._institutional_holders = holders[1] 348 self._mutualfund_holders = holders[2] 349 elif len(holders) >= 2: 350 self._major_holders = holders[0] 351 self._institutional_holders = holders[1] 352 elif len(holders) >= 1: 353 self._major_holders = holders[0] 354 355 # self._major_holders = holders[0] 356 # self._institutional_holders = holders[1] 357 358 if self._institutional_holders is not None: 359 if 'Date Reported' in self._institutional_holders: 360 self._institutional_holders['Date Reported'] = _pd.to_datetime( 361 self._institutional_holders['Date Reported']) 362 if '% Out' in self._institutional_holders: 363 self._institutional_holders['% Out'] = self._institutional_holders[ 364 '% Out'].str.replace('%', '').astype(float) / 100 365 366 if self._mutualfund_holders is not None: 367 if 'Date Reported' in self._mutualfund_holders: 368 self._mutualfund_holders['Date Reported'] = _pd.to_datetime( 369 self._mutualfund_holders['Date Reported']) 370 if '% Out' in self._mutualfund_holders: 371 self._mutualfund_holders['% Out'] = self._mutualfund_holders[ 372 '% Out'].str.replace('%', '').astype(float) / 100 373 374 # sustainability 375 d = {} 376 try: 377 if isinstance(data.get('esgScores'), dict): 378 for item in data['esgScores']: 379 if not isinstance(data['esgScores'][item], (dict, list)): 380 d[item] = data['esgScores'][item] 381 382 s = _pd.DataFrame(index=[0], data=d)[-1:].T 383 s.columns = ['Value'] 384 s.index.name = '%.f-%.f' % ( 385 s[s.index == 'ratingYear']['Value'].values[0], 386 s[s.index == 'ratingMonth']['Value'].values[0]) 387 388 self._sustainability = s[~s.index.isin( 389 ['maxAge', 'ratingYear', 'ratingMonth'])] 390 except Exception: 391 pass 392 393 # info (be nice to python 2) 394 self._info = {} 395 try: 396 items = ['summaryProfile', 'financialData', 'quoteType', 397 'defaultKeyStatistics', 'assetProfile', 'summaryDetail'] 398 for item in items: 399 if isinstance(data.get(item), dict): 400 self._info.update(data[item]) 401 except Exception: 402 pass 403 404 # For ETFs, provide this valuable data: the top holdings of the ETF 405 try: 406 if 'topHoldings' in data: 407 self._info.update(data['topHoldings']) 408 except Exception: 409 pass 410 411 try: 412 if not isinstance(data.get('summaryDetail'), dict): 413 # For some reason summaryDetail did not give any results. The price dict usually has most of the same info 414 self._info.update(data.get('price', {})) 415 except Exception: 416 pass 417 418 try: 419 # self._info['regularMarketPrice'] = self._info['regularMarketOpen'] 420 self._info['regularMarketPrice'] = data.get('price', {}).get( 421 'regularMarketPrice', self._info.get('regularMarketOpen', None)) 422 except Exception: 423 pass 424 425 try: 426 self._info['preMarketPrice'] = data.get('price', {}).get( 427 'preMarketPrice', self._info.get('preMarketPrice', None)) 428 except Exception: 429 pass 430 431 self._info['logo_url'] = "" 432 try: 433 domain = self._info['website'].split( 434 '://')[1].split('/')[0].replace('www.', '') 435 self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain 436 except Exception: 437 pass 438 439 # events 440 try: 441 cal = _pd.DataFrame( 442 data['calendarEvents']['earnings']) 443 cal['earningsDate'] = _pd.to_datetime( 444 cal['earningsDate'], unit='s') 445 self._calendar = cal.T 446 self._calendar.index = utils.camel2title(self._calendar.index) 447 self._calendar.columns = ['Value'] 448 except Exception: 449 pass 450 451 # analyst recommendations 452 try: 453 rec = _pd.DataFrame( 454 data['upgradeDowngradeHistory']['history']) 455 rec['earningsDate'] = _pd.to_datetime( 456 rec['epochGradeDate'], unit='s') 457 rec.set_index('earningsDate', inplace=True) 458 rec.index.name = 'Date' 459 rec.columns = utils.camel2title(rec.columns) 460 self._recommendations = rec[[ 461 'Firm', 'To Grade', 'From Grade', 'Action']].sort_index() 462 except Exception: 463 pass 464 465 # get fundamentals 466 data = utils.get_json(ticker_url + '/financials', proxy, self.session) 467 468 # generic patterns 469 for key in ( 470 (self._cashflow, 'cashflowStatement', 'cashflowStatements'), 471 (self._balancesheet, 'balanceSheet', 'balanceSheetStatements'), 472 (self._financials, 'incomeStatement', 'incomeStatementHistory') 473 ): 474 item = key[1] + 'History' 475 if isinstance(data.get(item), dict): 476 try: 477 key[0]['yearly'] = cleanup(data[item][key[2]]) 478 except Exception: 479 pass 480 481 item = key[1] + 'HistoryQuarterly' 482 if isinstance(data.get(item), dict): 483 try: 484 key[0]['quarterly'] = cleanup(data[item][key[2]]) 485 except Exception: 486 pass 487 488 # earnings 489 if isinstance(data.get('earnings'), dict): 490 try: 491 earnings = data['earnings']['financialsChart'] 492 earnings['financialCurrency'] = 'USD' if 'financialCurrency' not in data['earnings'] else data['earnings']['financialCurrency'] 493 self._earnings['financialCurrency'] = earnings['financialCurrency'] 494 df = _pd.DataFrame(earnings['yearly']).set_index('date') 495 df.columns = utils.camel2title(df.columns) 496 df.index.name = 'Year' 497 self._earnings['yearly'] = df 498 499 df = _pd.DataFrame(earnings['quarterly']).set_index('date') 500 df.columns = utils.camel2title(df.columns) 501 df.index.name = 'Quarter' 502 self._earnings['quarterly'] = df 503 except Exception: 504 pass 505 506 # Analysis 507 data = utils.get_json(ticker_url + '/analysis', proxy, self.session) 508 509 if isinstance(data.get('earningsTrend'), dict): 510 try: 511 analysis = _pd.DataFrame(data['earningsTrend']['trend']) 512 analysis['endDate'] = _pd.to_datetime(analysis['endDate']) 513 analysis.set_index('period', inplace=True) 514 analysis.index = analysis.index.str.upper() 515 analysis.index.name = 'Period' 516 analysis.columns = utils.camel2title(analysis.columns) 517 518 dict_cols = [] 519 520 for idx, row in analysis.iterrows(): 521 for colname, colval in row.items(): 522 if isinstance(colval, dict): 523 dict_cols.append(colname) 524 for k, v in colval.items(): 525 new_colname = colname + ' ' + utils.camel2title([k])[0] 526 analysis.loc[idx, new_colname] = v 527 528 self._analysis = analysis[[c for c in analysis.columns if c not in dict_cols]] 529 except Exception: 530 pass 531 532 self._fundamentals = True 533 534 def get_recommendations(self, proxy=None, as_dict=False, *args, **kwargs): 535 self._get_fundamentals(proxy=proxy) 536 data = self._recommendations 537 if as_dict: 538 return data.to_dict() 539 return data 540 541 def get_calendar(self, proxy=None, as_dict=False, *args, **kwargs): 542 self._get_fundamentals(proxy=proxy) 543 data = self._calendar 544 if as_dict: 545 return data.to_dict() 546 return data 547 548 def get_major_holders(self, proxy=None, as_dict=False, *args, **kwargs): 549 self._get_fundamentals(proxy=proxy) 550 data = self._major_holders 551 if as_dict: 552 return data.to_dict() 553 return data 554 555 def get_institutional_holders(self, proxy=None, as_dict=False, *args, **kwargs): 556 self._get_fundamentals(proxy=proxy) 557 data = self._institutional_holders 558 if data is not None: 559 if as_dict: 560 return data.to_dict() 561 return data 562 563 def get_mutualfund_holders(self, proxy=None, as_dict=False, *args, **kwargs): 564 self._get_fundamentals(proxy=proxy) 565 data = self._mutualfund_holders 566 if data is not None: 567 if as_dict: 568 return data.to_dict() 569 return data 570 571 def get_info(self, proxy=None, as_dict=False, *args, **kwargs): 572 self._get_fundamentals(proxy=proxy) 573 data = self._info 574 if as_dict: 575 return data.to_dict() 576 return data 577 578 def get_sustainability(self, proxy=None, as_dict=False, *args, **kwargs): 579 self._get_fundamentals(proxy=proxy) 580 data = self._sustainability 581 if as_dict: 582 return data.to_dict() 583 return data 584 585 def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): 586 self._get_fundamentals(proxy=proxy) 587 data = self._earnings[freq] 588 if as_dict: 589 dict_data = data.to_dict() 590 dict_data['financialCurrency'] = 'USD' if 'financialCurrency' not in self._earnings else self._earnings['financialCurrency'] 591 return dict_data 592 return data 593 594 def get_analysis(self, proxy=None, as_dict=False, *args, **kwargs): 595 self._get_fundamentals(proxy=proxy) 596 data = self._analysis 597 if as_dict: 598 return data.to_dict() 599 return data 600 601 def get_financials(self, proxy=None, as_dict=False, freq="yearly"): 602 self._get_fundamentals(proxy=proxy) 603 data = self._financials[freq] 604 if as_dict: 605 return data.to_dict() 606 return data 607 608 def get_balancesheet(self, proxy=None, as_dict=False, freq="yearly"): 609 self._get_fundamentals(proxy=proxy) 610 data = self._balancesheet[freq] 611 if as_dict: 612 return data.to_dict() 613 return data 614 615 def get_balance_sheet(self, proxy=None, as_dict=False, freq="yearly"): 616 return self.get_balancesheet(proxy, as_dict, freq) 617 618 def get_cashflow(self, proxy=None, as_dict=False, freq="yearly"): 619 self._get_fundamentals(proxy=proxy) 620 data = self._cashflow[freq] 621 if as_dict: 622 return data.to_dict() 623 return data 624 625 def get_dividends(self, proxy=None): 626 if self._history is None: 627 self.history(period="max", proxy=proxy) 628 if self._history is not None and "Dividends" in self._history: 629 dividends = self._history["Dividends"] 630 return dividends[dividends != 0] 631 return [] 632 633 def get_splits(self, proxy=None): 634 if self._history is None: 635 self.history(period="max", proxy=proxy) 636 if self._history is not None and "Stock Splits" in self._history: 637 splits = self._history["Stock Splits"] 638 return splits[splits != 0] 639 return [] 640 641 def get_actions(self, proxy=None): 642 if self._history is None: 643 self.history(period="max", proxy=proxy) 644 if self._history is not None and "Dividends" in self._history and "Stock Splits" in self._history: 645 actions = self._history[["Dividends", "Stock Splits"]] 646 return actions[actions != 0].dropna(how='all').fillna(0) 647 return [] 648 649 def get_isin(self, proxy=None): 650 # *** experimental *** 651 if self._isin is not None: 652 return self._isin 653 654 ticker = self.ticker.upper() 655 656 if "-" in ticker or "^" in ticker: 657 self._isin = '-' 658 return self._isin 659 660 # setup proxy in requests format 661 if proxy is not None: 662 if isinstance(proxy, dict) and "https" in proxy: 663 proxy = proxy["https"] 664 proxy = {"https": proxy} 665 666 q = ticker 667 self.get_info(proxy=proxy) 668 if "shortName" in self._info: 669 q = self._info['shortName'] 670 671 url = 'https://markets.businessinsider.com/ajax/' \ 672 'SearchController_Suggest?max_results=25&query=%s' \ 673 % urlencode(q) 674 session = self.session or _requests 675 data = session.get( 676 url=url, 677 proxies=proxy, 678 headers=utils.user_agent_headers 679 ).text 680 681 search_str = '"{}|'.format(ticker) 682 if search_str not in data: 683 if q.lower() in data.lower(): 684 search_str = '"|' 685 if search_str not in data: 686 self._isin = '-' 687 return self._isin 688 else: 689 self._isin = '-' 690 return self._isin 691 692 self._isin = data.split(search_str)[1].split('"')[0].split('|')[0] 693 return self._isin 694 695 def get_news(self, proxy=None): 696 if self._news: 697 return self._news 698 699 # setup proxy in requests format 700 if proxy is not None: 701 if isinstance(proxy, dict) and "https" in proxy: 702 proxy = proxy["https"] 703 proxy = {"https": proxy} 704 705 # Getting data from json 706 url = "{}/v1/finance/search?q={}".format(self._base_url, self.ticker) 707 session = self.session or _requests 708 data = session.get( 709 url=url, 710 proxies=proxy, 711 headers=utils.user_agent_headers 712 ) 713 if "Will be right back" in data.text: 714 raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" 715 "Our engineers are working quickly to resolve " 716 "the issue. Thank you for your patience.") 717 data = data.json() 718 719 # parse news 720 self._news = data.get("news", []) 721 return self._news 722