1"""Various display related classes.
2
3Authors : MinRK, gregcaporaso, dannystaple
4"""
5from html import escape as html_escape
6from os.path import exists, isfile, splitext, abspath, join, isdir
7from os import walk, sep, fsdecode
8
9from IPython.core.display import DisplayObject, TextDisplayObject
10
11from typing import Tuple, Iterable
12
13__all__ = ['Audio', 'IFrame', 'YouTubeVideo', 'VimeoVideo', 'ScribdDocument',
14           'FileLink', 'FileLinks', 'Code']
15
16
17class Audio(DisplayObject):
18    """Create an audio object.
19
20    When this object is returned by an input cell or passed to the
21    display function, it will result in Audio controls being displayed
22    in the frontend (only works in the notebook).
23
24    Parameters
25    ----------
26    data : numpy array, list, unicode, str or bytes
27        Can be one of
28
29          * Numpy 1d array containing the desired waveform (mono)
30          * Numpy 2d array containing waveforms for each channel.
31            Shape=(NCHAN, NSAMPLES). For the standard channel order, see
32            http://msdn.microsoft.com/en-us/library/windows/hardware/dn653308(v=vs.85).aspx
33          * List of float or integer representing the waveform (mono)
34          * String containing the filename
35          * Bytestring containing raw PCM data or
36          * URL pointing to a file on the web.
37
38        If the array option is used, the waveform will be normalized.
39
40        If a filename or url is used, the format support will be browser
41        dependent.
42    url : unicode
43        A URL to download the data from.
44    filename : unicode
45        Path to a local file to load the data from.
46    embed : boolean
47        Should the audio data be embedded using a data URI (True) or should
48        the original source be referenced. Set this to True if you want the
49        audio to playable later with no internet connection in the notebook.
50
51        Default is `True`, unless the keyword argument `url` is set, then
52        default value is `False`.
53    rate : integer
54        The sampling rate of the raw data.
55        Only required when data parameter is being used as an array
56    autoplay : bool
57        Set to True if the audio should immediately start playing.
58        Default is `False`.
59    normalize : bool
60        Whether audio should be normalized (rescaled) to the maximum possible
61        range. Default is `True`. When set to `False`, `data` must be between
62        -1 and 1 (inclusive), otherwise an error is raised.
63        Applies only when `data` is a list or array of samples; other types of
64        audio are never normalized.
65
66    Examples
67    --------
68    ::
69
70        # Generate a sound
71        import numpy as np
72        framerate = 44100
73        t = np.linspace(0,5,framerate*5)
74        data = np.sin(2*np.pi*220*t) + np.sin(2*np.pi*224*t)
75        Audio(data,rate=framerate)
76
77        # Can also do stereo or more channels
78        dataleft = np.sin(2*np.pi*220*t)
79        dataright = np.sin(2*np.pi*224*t)
80        Audio([dataleft, dataright],rate=framerate)
81
82        Audio("http://www.nch.com.au/acm/8k16bitpcm.wav")  # From URL
83        Audio(url="http://www.w3schools.com/html/horse.ogg")
84
85        Audio('/path/to/sound.wav')  # From file
86        Audio(filename='/path/to/sound.ogg')
87
88        Audio(b'RAW_WAV_DATA..)  # From bytes
89        Audio(data=b'RAW_WAV_DATA..)
90
91    See Also
92    --------
93
94    See also the ``Audio`` widgets form the ``ipywidget`` package for more flexibility and options.
95
96    """
97    _read_flags = 'rb'
98
99    def __init__(self, data=None, filename=None, url=None, embed=None, rate=None, autoplay=False, normalize=True, *,
100                 element_id=None):
101        if filename is None and url is None and data is None:
102            raise ValueError("No audio data found. Expecting filename, url, or data.")
103        if embed is False and url is None:
104            raise ValueError("No url found. Expecting url when embed=False")
105
106        if url is not None and embed is not True:
107            self.embed = False
108        else:
109            self.embed = True
110        self.autoplay = autoplay
111        self.element_id = element_id
112        super(Audio, self).__init__(data=data, url=url, filename=filename)
113
114        if self.data is not None and not isinstance(self.data, bytes):
115            if rate is None:
116                raise ValueError("rate must be specified when data is a numpy array or list of audio samples.")
117            self.data = Audio._make_wav(data, rate, normalize)
118
119    def reload(self):
120        """Reload the raw data from file or URL."""
121        import mimetypes
122        if self.embed:
123            super(Audio, self).reload()
124
125        if self.filename is not None:
126            self.mimetype = mimetypes.guess_type(self.filename)[0]
127        elif self.url is not None:
128            self.mimetype = mimetypes.guess_type(self.url)[0]
129        else:
130            self.mimetype = "audio/wav"
131
132    @staticmethod
133    def _make_wav(data, rate, normalize):
134        """ Transform a numpy array to a PCM bytestring """
135        from io import BytesIO
136        import wave
137
138        try:
139            scaled, nchan = Audio._validate_and_normalize_with_numpy(data, normalize)
140        except ImportError:
141            scaled, nchan = Audio._validate_and_normalize_without_numpy(data, normalize)
142
143        fp = BytesIO()
144        waveobj = wave.open(fp,mode='wb')
145        waveobj.setnchannels(nchan)
146        waveobj.setframerate(rate)
147        waveobj.setsampwidth(2)
148        waveobj.setcomptype('NONE','NONE')
149        waveobj.writeframes(scaled)
150        val = fp.getvalue()
151        waveobj.close()
152
153        return val
154
155    @staticmethod
156    def _validate_and_normalize_with_numpy(data, normalize) -> Tuple[bytes, int]:
157        import numpy as np
158
159        data = np.array(data, dtype=float)
160        if len(data.shape) == 1:
161            nchan = 1
162        elif len(data.shape) == 2:
163            # In wave files,channels are interleaved. E.g.,
164            # "L1R1L2R2..." for stereo. See
165            # http://msdn.microsoft.com/en-us/library/windows/hardware/dn653308(v=vs.85).aspx
166            # for channel ordering
167            nchan = data.shape[0]
168            data = data.T.ravel()
169        else:
170            raise ValueError('Array audio input must be a 1D or 2D array')
171
172        max_abs_value = np.max(np.abs(data))
173        normalization_factor = Audio._get_normalization_factor(max_abs_value, normalize)
174        scaled = data / normalization_factor * 32767
175        return scaled.astype("<h").tobytes(), nchan
176
177    @staticmethod
178    def _validate_and_normalize_without_numpy(data, normalize):
179        import array
180        import sys
181
182        data = array.array('f', data)
183
184        try:
185            max_abs_value = float(max([abs(x) for x in data]))
186        except TypeError:
187            raise TypeError('Only lists of mono audio are '
188                'supported if numpy is not installed')
189
190        normalization_factor = Audio._get_normalization_factor(max_abs_value, normalize)
191        scaled = array.array('h', [int(x / normalization_factor * 32767) for x in data])
192        if sys.byteorder == 'big':
193            scaled.byteswap()
194        nchan = 1
195        return scaled.tobytes(), nchan
196
197    @staticmethod
198    def _get_normalization_factor(max_abs_value, normalize):
199        if not normalize and max_abs_value > 1:
200            raise ValueError('Audio data must be between -1 and 1 when normalize=False.')
201        return max_abs_value if normalize else 1
202
203    def _data_and_metadata(self):
204        """shortcut for returning metadata with url information, if defined"""
205        md = {}
206        if self.url:
207            md['url'] = self.url
208        if md:
209            return self.data, md
210        else:
211            return self.data
212
213    def _repr_html_(self):
214        src = """
215                <audio {element_id} controls="controls" {autoplay}>
216                    <source src="{src}" type="{type}" />
217                    Your browser does not support the audio element.
218                </audio>
219              """
220        return src.format(src=self.src_attr(), type=self.mimetype, autoplay=self.autoplay_attr(),
221                          element_id=self.element_id_attr())
222
223    def src_attr(self):
224        import base64
225        if self.embed and (self.data is not None):
226            data = base64=base64.b64encode(self.data).decode('ascii')
227            return """data:{type};base64,{base64}""".format(type=self.mimetype,
228                                                            base64=data)
229        elif self.url is not None:
230            return self.url
231        else:
232            return ""
233
234    def autoplay_attr(self):
235        if(self.autoplay):
236            return 'autoplay="autoplay"'
237        else:
238            return ''
239
240    def element_id_attr(self):
241        if (self.element_id):
242            return 'id="{element_id}"'.format(element_id=self.element_id)
243        else:
244            return ''
245
246class IFrame(object):
247    """
248    Generic class to embed an iframe in an IPython notebook
249    """
250
251    iframe = """
252        <iframe
253            width="{width}"
254            height="{height}"
255            src="{src}{params}"
256            frameborder="0"
257            allowfullscreen
258            {extras}
259        ></iframe>
260        """
261
262    def __init__(self, src, width, height, extras: Iterable[str] = None, **kwargs):
263        if extras is None:
264            extras = []
265
266        self.src = src
267        self.width = width
268        self.height = height
269        self.extras = extras
270        self.params = kwargs
271
272    def _repr_html_(self):
273        """return the embed iframe"""
274        if self.params:
275            try:
276                from urllib.parse import urlencode # Py 3
277            except ImportError:
278                from urllib import urlencode
279            params = "?" + urlencode(self.params)
280        else:
281            params = ""
282        return self.iframe.format(
283            src=self.src,
284            width=self.width,
285            height=self.height,
286            params=params,
287            extras=" ".join(self.extras),
288        )
289
290
291class YouTubeVideo(IFrame):
292    """Class for embedding a YouTube Video in an IPython session, based on its video id.
293
294    e.g. to embed the video from https://www.youtube.com/watch?v=foo , you would
295    do::
296
297        vid = YouTubeVideo("foo")
298        display(vid)
299
300    To start from 30 seconds::
301
302        vid = YouTubeVideo("abc", start=30)
303        display(vid)
304
305    To calculate seconds from time as hours, minutes, seconds use
306    :class:`datetime.timedelta`::
307
308        start=int(timedelta(hours=1, minutes=46, seconds=40).total_seconds())
309
310    Other parameters can be provided as documented at
311    https://developers.google.com/youtube/player_parameters#Parameters
312
313    When converting the notebook using nbconvert, a jpeg representation of the video
314    will be inserted in the document.
315    """
316
317    def __init__(self, id, width=400, height=300, allow_autoplay=False, **kwargs):
318        self.id=id
319        src = "https://www.youtube.com/embed/{0}".format(id)
320        if allow_autoplay:
321            extras = list(kwargs.get("extras", [])) + ['allow="autoplay"']
322            kwargs.update(autoplay=1, extras=extras)
323        super(YouTubeVideo, self).__init__(src, width, height, **kwargs)
324
325    def _repr_jpeg_(self):
326        # Deferred import
327        from urllib.request import urlopen
328
329        try:
330            return urlopen("https://img.youtube.com/vi/{id}/hqdefault.jpg".format(id=self.id)).read()
331        except IOError:
332            return None
333
334class VimeoVideo(IFrame):
335    """
336    Class for embedding a Vimeo video in an IPython session, based on its video id.
337    """
338
339    def __init__(self, id, width=400, height=300, **kwargs):
340        src="https://player.vimeo.com/video/{0}".format(id)
341        super(VimeoVideo, self).__init__(src, width, height, **kwargs)
342
343class ScribdDocument(IFrame):
344    """
345    Class for embedding a Scribd document in an IPython session
346
347    Use the start_page params to specify a starting point in the document
348    Use the view_mode params to specify display type one off scroll | slideshow | book
349
350    e.g to Display Wes' foundational paper about PANDAS in book mode from page 3
351
352    ScribdDocument(71048089, width=800, height=400, start_page=3, view_mode="book")
353    """
354
355    def __init__(self, id, width=400, height=300, **kwargs):
356        src="https://www.scribd.com/embeds/{0}/content".format(id)
357        super(ScribdDocument, self).__init__(src, width, height, **kwargs)
358
359class FileLink(object):
360    """Class for embedding a local file link in an IPython session, based on path
361
362    e.g. to embed a link that was generated in the IPython notebook as my/data.txt
363
364    you would do::
365
366        local_file = FileLink("my/data.txt")
367        display(local_file)
368
369    or in the HTML notebook, just::
370
371        FileLink("my/data.txt")
372    """
373
374    html_link_str = "<a href='%s' target='_blank'>%s</a>"
375
376    def __init__(self,
377                 path,
378                 url_prefix='',
379                 result_html_prefix='',
380                 result_html_suffix='<br>'):
381        """
382        Parameters
383        ----------
384        path : str
385            path to the file or directory that should be formatted
386        url_prefix : str
387            prefix to be prepended to all files to form a working link [default:
388            '']
389        result_html_prefix : str
390            text to append to beginning to link [default: '']
391        result_html_suffix : str
392            text to append at the end of link [default: '<br>']
393        """
394        if isdir(path):
395            raise ValueError("Cannot display a directory using FileLink. "
396              "Use FileLinks to display '%s'." % path)
397        self.path = fsdecode(path)
398        self.url_prefix = url_prefix
399        self.result_html_prefix = result_html_prefix
400        self.result_html_suffix = result_html_suffix
401
402    def _format_path(self):
403        fp = ''.join([self.url_prefix, html_escape(self.path)])
404        return ''.join([self.result_html_prefix,
405                        self.html_link_str % \
406                            (fp, html_escape(self.path, quote=False)),
407                        self.result_html_suffix])
408
409    def _repr_html_(self):
410        """return html link to file
411        """
412        if not exists(self.path):
413            return ("Path (<tt>%s</tt>) doesn't exist. "
414                    "It may still be in the process of "
415                    "being generated, or you may have the "
416                    "incorrect path." % self.path)
417
418        return self._format_path()
419
420    def __repr__(self):
421        """return absolute path to file
422        """
423        return abspath(self.path)
424
425class FileLinks(FileLink):
426    """Class for embedding local file links in an IPython session, based on path
427
428    e.g. to embed links to files that were generated in the IPython notebook
429    under ``my/data``, you would do::
430
431        local_files = FileLinks("my/data")
432        display(local_files)
433
434    or in the HTML notebook, just::
435
436        FileLinks("my/data")
437    """
438    def __init__(self,
439                 path,
440                 url_prefix='',
441                 included_suffixes=None,
442                 result_html_prefix='',
443                 result_html_suffix='<br>',
444                 notebook_display_formatter=None,
445                 terminal_display_formatter=None,
446                 recursive=True):
447        """
448        See :class:`FileLink` for the ``path``, ``url_prefix``,
449        ``result_html_prefix`` and ``result_html_suffix`` parameters.
450
451        included_suffixes : list
452          Filename suffixes to include when formatting output [default: include
453          all files]
454
455        notebook_display_formatter : function
456          Used to format links for display in the notebook. See discussion of
457          formatter functions below.
458
459        terminal_display_formatter : function
460          Used to format links for display in the terminal. See discussion of
461          formatter functions below.
462
463        Formatter functions must be of the form::
464
465            f(dirname, fnames, included_suffixes)
466
467        dirname : str
468          The name of a directory
469        fnames : list
470          The files in that directory
471        included_suffixes : list
472          The file suffixes that should be included in the output (passing None
473          meansto include all suffixes in the output in the built-in formatters)
474        recursive : boolean
475          Whether to recurse into subdirectories. Default is True.
476
477        The function should return a list of lines that will be printed in the
478        notebook (if passing notebook_display_formatter) or the terminal (if
479        passing terminal_display_formatter). This function is iterated over for
480        each directory in self.path. Default formatters are in place, can be
481        passed here to support alternative formatting.
482
483        """
484        if isfile(path):
485            raise ValueError("Cannot display a file using FileLinks. "
486              "Use FileLink to display '%s'." % path)
487        self.included_suffixes = included_suffixes
488        # remove trailing slashes for more consistent output formatting
489        path = path.rstrip('/')
490
491        self.path = path
492        self.url_prefix = url_prefix
493        self.result_html_prefix = result_html_prefix
494        self.result_html_suffix = result_html_suffix
495
496        self.notebook_display_formatter = \
497             notebook_display_formatter or self._get_notebook_display_formatter()
498        self.terminal_display_formatter = \
499             terminal_display_formatter or self._get_terminal_display_formatter()
500
501        self.recursive = recursive
502
503    def _get_display_formatter(self,
504                               dirname_output_format,
505                               fname_output_format,
506                               fp_format,
507                               fp_cleaner=None):
508        """ generate built-in formatter function
509
510           this is used to define both the notebook and terminal built-in
511            formatters as they only differ by some wrapper text for each entry
512
513           dirname_output_format: string to use for formatting directory
514            names, dirname will be substituted for a single "%s" which
515            must appear in this string
516           fname_output_format: string to use for formatting file names,
517            if a single "%s" appears in the string, fname will be substituted
518            if two "%s" appear in the string, the path to fname will be
519             substituted for the first and fname will be substituted for the
520             second
521           fp_format: string to use for formatting filepaths, must contain
522            exactly two "%s" and the dirname will be substituted for the first
523            and fname will be substituted for the second
524        """
525        def f(dirname, fnames, included_suffixes=None):
526            result = []
527            # begin by figuring out which filenames, if any,
528            # are going to be displayed
529            display_fnames = []
530            for fname in fnames:
531                if (isfile(join(dirname,fname)) and
532                       (included_suffixes is None or
533                        splitext(fname)[1] in included_suffixes)):
534                      display_fnames.append(fname)
535
536            if len(display_fnames) == 0:
537                # if there are no filenames to display, don't print anything
538                # (not even the directory name)
539                pass
540            else:
541                # otherwise print the formatted directory name followed by
542                # the formatted filenames
543                dirname_output_line = dirname_output_format % dirname
544                result.append(dirname_output_line)
545                for fname in display_fnames:
546                    fp = fp_format % (dirname,fname)
547                    if fp_cleaner is not None:
548                        fp = fp_cleaner(fp)
549                    try:
550                        # output can include both a filepath and a filename...
551                        fname_output_line = fname_output_format % (fp, fname)
552                    except TypeError:
553                        # ... or just a single filepath
554                        fname_output_line = fname_output_format % fname
555                    result.append(fname_output_line)
556            return result
557        return f
558
559    def _get_notebook_display_formatter(self,
560                                        spacer="&nbsp;&nbsp;"):
561        """ generate function to use for notebook formatting
562        """
563        dirname_output_format = \
564         self.result_html_prefix + "%s/" + self.result_html_suffix
565        fname_output_format = \
566         self.result_html_prefix + spacer + self.html_link_str + self.result_html_suffix
567        fp_format = self.url_prefix + '%s/%s'
568        if sep == "\\":
569            # Working on a platform where the path separator is "\", so
570            # must convert these to "/" for generating a URI
571            def fp_cleaner(fp):
572                # Replace all occurrences of backslash ("\") with a forward
573                # slash ("/") - this is necessary on windows when a path is
574                # provided as input, but we must link to a URI
575                return fp.replace('\\','/')
576        else:
577            fp_cleaner = None
578
579        return self._get_display_formatter(dirname_output_format,
580                                           fname_output_format,
581                                           fp_format,
582                                           fp_cleaner)
583
584    def _get_terminal_display_formatter(self,
585                                        spacer="  "):
586        """ generate function to use for terminal formatting
587        """
588        dirname_output_format = "%s/"
589        fname_output_format = spacer + "%s"
590        fp_format = '%s/%s'
591
592        return self._get_display_formatter(dirname_output_format,
593                                           fname_output_format,
594                                           fp_format)
595
596    def _format_path(self):
597        result_lines = []
598        if self.recursive:
599            walked_dir = list(walk(self.path))
600        else:
601            walked_dir = [next(walk(self.path))]
602        walked_dir.sort()
603        for dirname, subdirs, fnames in walked_dir:
604            result_lines += self.notebook_display_formatter(dirname, fnames, self.included_suffixes)
605        return '\n'.join(result_lines)
606
607    def __repr__(self):
608        """return newline-separated absolute paths
609        """
610        result_lines = []
611        if self.recursive:
612            walked_dir = list(walk(self.path))
613        else:
614            walked_dir = [next(walk(self.path))]
615        walked_dir.sort()
616        for dirname, subdirs, fnames in walked_dir:
617            result_lines += self.terminal_display_formatter(dirname, fnames, self.included_suffixes)
618        return '\n'.join(result_lines)
619
620
621class Code(TextDisplayObject):
622    """Display syntax-highlighted source code.
623
624    This uses Pygments to highlight the code for HTML and Latex output.
625
626    Parameters
627    ----------
628    data : str
629        The code as a string
630    url : str
631        A URL to fetch the code from
632    filename : str
633        A local filename to load the code from
634    language : str
635        The short name of a Pygments lexer to use for highlighting.
636        If not specified, it will guess the lexer based on the filename
637        or the code. Available lexers: http://pygments.org/docs/lexers/
638    """
639    def __init__(self, data=None, url=None, filename=None, language=None):
640        self.language = language
641        super().__init__(data=data, url=url, filename=filename)
642
643    def _get_lexer(self):
644        if self.language:
645            from pygments.lexers import get_lexer_by_name
646            return get_lexer_by_name(self.language)
647        elif self.filename:
648            from pygments.lexers import get_lexer_for_filename
649            return get_lexer_for_filename(self.filename)
650        else:
651            from pygments.lexers import guess_lexer
652            return guess_lexer(self.data)
653
654    def __repr__(self):
655        return self.data
656
657    def _repr_html_(self):
658        from pygments import highlight
659        from pygments.formatters import HtmlFormatter
660        fmt = HtmlFormatter()
661        style = '<style>{}</style>'.format(fmt.get_style_defs('.output_html'))
662        return style + highlight(self.data, self._get_lexer(), fmt)
663
664    def _repr_latex_(self):
665        from pygments import highlight
666        from pygments.formatters import LatexFormatter
667        return highlight(self.data, self._get_lexer(), LatexFormatter())
668