1"""A base class for contents managers."""
2# Copyright (c) Jupyter Development Team.
3# Distributed under the terms of the Modified BSD License.
4import itertools
5import json
6import re
7from fnmatch import fnmatch
8
9from ipython_genutils.importstring import import_item
10from nbformat import sign
11from nbformat import validate as validate_nb
12from nbformat import ValidationError
13from nbformat.v4 import new_notebook
14from tornado.web import HTTPError
15from tornado.web import RequestHandler
16from traitlets import Any
17from traitlets import Bool
18from traitlets import default
19from traitlets import Dict
20from traitlets import Instance
21from traitlets import List
22from traitlets import TraitError
23from traitlets import Type
24from traitlets import Unicode
25from traitlets import validate
26from traitlets.config.configurable import LoggingConfigurable
27
28from ...files.handlers import FilesHandler
29from .checkpoints import AsyncCheckpoints
30from .checkpoints import Checkpoints
31from jupyter_server.transutils import _i18n
32from jupyter_server.utils import ensure_async
33
34
35copy_pat = re.compile(r"\-Copy\d*\.")
36
37
38class ContentsManager(LoggingConfigurable):
39    """Base class for serving files and directories.
40
41    This serves any text or binary file,
42    as well as directories,
43    with special handling for JSON notebook documents.
44
45    Most APIs take a path argument,
46    which is always an API-style unicode path,
47    and always refers to a directory.
48
49    - unicode, not url-escaped
50    - '/'-separated
51    - leading and trailing '/' will be stripped
52    - if unspecified, path defaults to '',
53      indicating the root path.
54
55    """
56
57    root_dir = Unicode("/", config=True)
58
59    allow_hidden = Bool(False, config=True, help="Allow access to hidden files")
60
61    notary = Instance(sign.NotebookNotary)
62
63    def _notary_default(self):
64        return sign.NotebookNotary(parent=self)
65
66    hide_globs = List(
67        Unicode(),
68        [
69            u"__pycache__",
70            "*.pyc",
71            "*.pyo",
72            ".DS_Store",
73            "*.so",
74            "*.dylib",
75            "*~",
76        ],
77        config=True,
78        help="""
79        Glob patterns to hide in file and directory listings.
80    """,
81    )
82
83    untitled_notebook = Unicode(
84        _i18n("Untitled"), config=True, help="The base name used when creating untitled notebooks."
85    )
86
87    untitled_file = Unicode(
88        "untitled", config=True, help="The base name used when creating untitled files."
89    )
90
91    untitled_directory = Unicode(
92        "Untitled Folder",
93        config=True,
94        help="The base name used when creating untitled directories.",
95    )
96
97    pre_save_hook = Any(
98        None,
99        config=True,
100        allow_none=True,
101        help="""Python callable or importstring thereof
102
103        To be called on a contents model prior to save.
104
105        This can be used to process the structure,
106        such as removing notebook outputs or other side effects that
107        should not be saved.
108
109        It will be called as (all arguments passed by keyword)::
110
111            hook(path=path, model=model, contents_manager=self)
112
113        - model: the model to be saved. Includes file contents.
114          Modifying this dict will affect the file that is stored.
115        - path: the API path of the save destination
116        - contents_manager: this ContentsManager instance
117        """,
118    )
119
120    @validate("pre_save_hook")
121    def _validate_pre_save_hook(self, proposal):
122        value = proposal["value"]
123        if isinstance(value, str):
124            value = import_item(self.pre_save_hook)
125        if not callable(value):
126            raise TraitError("pre_save_hook must be callable")
127        return value
128
129    def run_pre_save_hook(self, model, path, **kwargs):
130        """Run the pre-save hook if defined, and log errors"""
131        if self.pre_save_hook:
132            try:
133                self.log.debug("Running pre-save hook on %s", path)
134                self.pre_save_hook(model=model, path=path, contents_manager=self, **kwargs)
135            except HTTPError:
136                # allow custom HTTPErrors to raise,
137                # rejecting the save with a message.
138                raise
139            except Exception:
140                # unhandled errors don't prevent saving,
141                # which could cause frustrating data loss
142                self.log.error("Pre-save hook failed on %s", path, exc_info=True)
143
144    checkpoints_class = Type(Checkpoints, config=True)
145    checkpoints = Instance(Checkpoints, config=True)
146    checkpoints_kwargs = Dict(config=True)
147
148    @default("checkpoints")
149    def _default_checkpoints(self):
150        return self.checkpoints_class(**self.checkpoints_kwargs)
151
152    @default("checkpoints_kwargs")
153    def _default_checkpoints_kwargs(self):
154        return dict(
155            parent=self,
156            log=self.log,
157        )
158
159    files_handler_class = Type(
160        FilesHandler,
161        klass=RequestHandler,
162        allow_none=True,
163        config=True,
164        help="""handler class to use when serving raw file requests.
165
166        Default is a fallback that talks to the ContentsManager API,
167        which may be inefficient, especially for large files.
168
169        Local files-based ContentsManagers can use a StaticFileHandler subclass,
170        which will be much more efficient.
171
172        Access to these files should be Authenticated.
173        """,
174    )
175
176    files_handler_params = Dict(
177        config=True,
178        help="""Extra parameters to pass to files_handler_class.
179
180        For example, StaticFileHandlers generally expect a `path` argument
181        specifying the root directory from which to serve files.
182        """,
183    )
184
185    def get_extra_handlers(self):
186        """Return additional handlers
187
188        Default: self.files_handler_class on /files/.*
189        """
190        handlers = []
191        if self.files_handler_class:
192            handlers.append((r"/files/(.*)", self.files_handler_class, self.files_handler_params))
193        return handlers
194
195    # ContentsManager API part 1: methods that must be
196    # implemented in subclasses.
197
198    def dir_exists(self, path):
199        """Does a directory exist at the given path?
200
201        Like os.path.isdir
202
203        Override this method in subclasses.
204
205        Parameters
206        ----------
207        path : string
208            The path to check
209
210        Returns
211        -------
212        exists : bool
213            Whether the path does indeed exist.
214        """
215        raise NotImplementedError
216
217    def is_hidden(self, path):
218        """Is path a hidden directory or file?
219
220        Parameters
221        ----------
222        path : string
223            The path to check. This is an API path (`/` separated,
224            relative to root dir).
225
226        Returns
227        -------
228        hidden : bool
229            Whether the path is hidden.
230
231        """
232        raise NotImplementedError
233
234    def file_exists(self, path=""):
235        """Does a file exist at the given path?
236
237        Like os.path.isfile
238
239        Override this method in subclasses.
240
241        Parameters
242        ----------
243        path : string
244            The API path of a file to check for.
245
246        Returns
247        -------
248        exists : bool
249            Whether the file exists.
250        """
251        raise NotImplementedError("must be implemented in a subclass")
252
253    def exists(self, path):
254        """Does a file or directory exist at the given path?
255
256        Like os.path.exists
257
258        Parameters
259        ----------
260        path : string
261            The API path of a file or directory to check for.
262
263        Returns
264        -------
265        exists : bool
266            Whether the target exists.
267        """
268        return self.file_exists(path) or self.dir_exists(path)
269
270    def get(self, path, content=True, type=None, format=None):
271        """Get a file or directory model."""
272        raise NotImplementedError("must be implemented in a subclass")
273
274    def save(self, model, path):
275        """
276        Save a file or directory model to path.
277
278        Should return the saved model with no content.  Save implementations
279        should call self.run_pre_save_hook(model=model, path=path) prior to
280        writing any data.
281        """
282        raise NotImplementedError("must be implemented in a subclass")
283
284    def delete_file(self, path):
285        """Delete the file or directory at path."""
286        raise NotImplementedError("must be implemented in a subclass")
287
288    def rename_file(self, old_path, new_path):
289        """Rename a file or directory."""
290        raise NotImplementedError("must be implemented in a subclass")
291
292    # ContentsManager API part 2: methods that have useable default
293    # implementations, but can be overridden in subclasses.
294
295    def delete(self, path):
296        """Delete a file/directory and any associated checkpoints."""
297        path = path.strip("/")
298        if not path:
299            raise HTTPError(400, "Can't delete root")
300        self.delete_file(path)
301        self.checkpoints.delete_all_checkpoints(path)
302
303    def rename(self, old_path, new_path):
304        """Rename a file and any checkpoints associated with that file."""
305        self.rename_file(old_path, new_path)
306        self.checkpoints.rename_all_checkpoints(old_path, new_path)
307
308    def update(self, model, path):
309        """Update the file's path
310
311        For use in PATCH requests, to enable renaming a file without
312        re-uploading its contents. Only used for renaming at the moment.
313        """
314        path = path.strip("/")
315        new_path = model.get("path", path).strip("/")
316        if path != new_path:
317            self.rename(path, new_path)
318        model = self.get(new_path, content=False)
319        return model
320
321    def info_string(self):
322        return "Serving contents"
323
324    def get_kernel_path(self, path, model=None):
325        """Return the API path for the kernel
326
327        KernelManagers can turn this value into a filesystem path,
328        or ignore it altogether.
329
330        The default value here will start kernels in the directory of the
331        notebook server. FileContentsManager overrides this to use the
332        directory containing the notebook.
333        """
334        return ""
335
336    def increment_filename(self, filename, path="", insert=""):
337        """Increment a filename until it is unique.
338
339        Parameters
340        ----------
341        filename : unicode
342            The name of a file, including extension
343        path : unicode
344            The API path of the target's directory
345        insert : unicode
346            The characters to insert after the base filename
347
348        Returns
349        -------
350        name : unicode
351            A filename that is unique, based on the input filename.
352        """
353        # Extract the full suffix from the filename (e.g. .tar.gz)
354        path = path.strip("/")
355        basename, dot, ext = filename.rpartition(".")
356        if ext != "ipynb":
357            basename, dot, ext = filename.partition(".")
358
359        suffix = dot + ext
360
361        for i in itertools.count():
362            if i:
363                insert_i = "{}{}".format(insert, i)
364            else:
365                insert_i = ""
366            name = u"{basename}{insert}{suffix}".format(
367                basename=basename, insert=insert_i, suffix=suffix
368            )
369            if not self.exists(u"{}/{}".format(path, name)):
370                break
371        return name
372
373    def validate_notebook_model(self, model):
374        """Add failed-validation message to model"""
375        try:
376            validate_nb(model["content"])
377        except ValidationError as e:
378            model["message"] = u"Notebook validation failed: {}:\n{}".format(
379                e.message,
380                json.dumps(e.instance, indent=1, default=lambda obj: "<UNKNOWN>"),
381            )
382        return model
383
384    def new_untitled(self, path="", type="", ext=""):
385        """Create a new untitled file or directory in path
386
387        path must be a directory
388
389        File extension can be specified.
390
391        Use `new` to create files with a fully specified path (including filename).
392        """
393        path = path.strip("/")
394        if not self.dir_exists(path):
395            raise HTTPError(404, "No such directory: %s" % path)
396
397        model = {}
398        if type:
399            model["type"] = type
400
401        if ext == ".ipynb":
402            model.setdefault("type", "notebook")
403        else:
404            model.setdefault("type", "file")
405
406        insert = ""
407        if model["type"] == "directory":
408            untitled = self.untitled_directory
409            insert = " "
410        elif model["type"] == "notebook":
411            untitled = self.untitled_notebook
412            ext = ".ipynb"
413        elif model["type"] == "file":
414            untitled = self.untitled_file
415        else:
416            raise HTTPError(400, "Unexpected model type: %r" % model["type"])
417
418        name = self.increment_filename(untitled + ext, path, insert=insert)
419        path = u"{0}/{1}".format(path, name)
420        return self.new(model, path)
421
422    def new(self, model=None, path=""):
423        """Create a new file or directory and return its model with no content.
424
425        To create a new untitled entity in a directory, use `new_untitled`.
426        """
427        path = path.strip("/")
428        if model is None:
429            model = {}
430
431        if path.endswith(".ipynb"):
432            model.setdefault("type", "notebook")
433        else:
434            model.setdefault("type", "file")
435
436        # no content, not a directory, so fill out new-file model
437        if "content" not in model and model["type"] != "directory":
438            if model["type"] == "notebook":
439                model["content"] = new_notebook()
440                model["format"] = "json"
441            else:
442                model["content"] = ""
443                model["type"] = "file"
444                model["format"] = "text"
445
446        model = self.save(model, path)
447        return model
448
449    def copy(self, from_path, to_path=None):
450        """Copy an existing file and return its new model.
451
452        If to_path not specified, it will be the parent directory of from_path.
453        If to_path is a directory, filename will increment `from_path-Copy#.ext`.
454        Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
455        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
456
457        from_path must be a full path to a file.
458        """
459        path = from_path.strip("/")
460        if to_path is not None:
461            to_path = to_path.strip("/")
462
463        if "/" in path:
464            from_dir, from_name = path.rsplit("/", 1)
465        else:
466            from_dir = ""
467            from_name = path
468
469        model = self.get(path)
470        model.pop("path", None)
471        model.pop("name", None)
472        if model["type"] == "directory":
473            raise HTTPError(400, "Can't copy directories")
474
475        if to_path is None:
476            to_path = from_dir
477        if self.dir_exists(to_path):
478            name = copy_pat.sub(u".", from_name)
479            to_name = self.increment_filename(name, to_path, insert="-Copy")
480            to_path = u"{0}/{1}".format(to_path, to_name)
481
482        model = self.save(model, to_path)
483        return model
484
485    def log_info(self):
486        self.log.info(self.info_string())
487
488    def trust_notebook(self, path):
489        """Explicitly trust a notebook
490
491        Parameters
492        ----------
493        path : string
494            The path of a notebook
495        """
496        model = self.get(path)
497        nb = model["content"]
498        self.log.warning("Trusting notebook %s", path)
499        self.notary.mark_cells(nb, True)
500        self.check_and_sign(nb, path)
501
502    def check_and_sign(self, nb, path=""):
503        """Check for trusted cells, and sign the notebook.
504
505        Called as a part of saving notebooks.
506
507        Parameters
508        ----------
509        nb : dict
510            The notebook dict
511        path : string
512            The notebook's path (for logging)
513        """
514        if self.notary.check_cells(nb):
515            self.notary.sign(nb)
516        else:
517            self.log.warning("Notebook %s is not trusted", path)
518
519    def mark_trusted_cells(self, nb, path=""):
520        """Mark cells as trusted if the notebook signature matches.
521
522        Called as a part of loading notebooks.
523
524        Parameters
525        ----------
526        nb : dict
527            The notebook object (in current nbformat)
528        path : string
529            The notebook's path (for logging)
530        """
531        trusted = self.notary.check_signature(nb)
532        if not trusted:
533            self.log.warning("Notebook %s is not trusted", path)
534        self.notary.mark_cells(nb, trusted)
535
536    def should_list(self, name):
537        """Should this file/directory name be displayed in a listing?"""
538        return not any(fnmatch(name, glob) for glob in self.hide_globs)
539
540    # Part 3: Checkpoints API
541    def create_checkpoint(self, path):
542        """Create a checkpoint."""
543        return self.checkpoints.create_checkpoint(self, path)
544
545    def restore_checkpoint(self, checkpoint_id, path):
546        """
547        Restore a checkpoint.
548        """
549        self.checkpoints.restore_checkpoint(self, checkpoint_id, path)
550
551    def list_checkpoints(self, path):
552        return self.checkpoints.list_checkpoints(path)
553
554    def delete_checkpoint(self, checkpoint_id, path):
555        return self.checkpoints.delete_checkpoint(checkpoint_id, path)
556
557
558class AsyncContentsManager(ContentsManager):
559    """Base class for serving files and directories asynchronously."""
560
561    checkpoints_class = Type(AsyncCheckpoints, config=True)
562    checkpoints = Instance(AsyncCheckpoints, config=True)
563    checkpoints_kwargs = Dict(config=True)
564
565    @default("checkpoints")
566    def _default_checkpoints(self):
567        return self.checkpoints_class(**self.checkpoints_kwargs)
568
569    @default("checkpoints_kwargs")
570    def _default_checkpoints_kwargs(self):
571        return dict(
572            parent=self,
573            log=self.log,
574        )
575
576    # ContentsManager API part 1: methods that must be
577    # implemented in subclasses.
578
579    async def dir_exists(self, path):
580        """Does a directory exist at the given path?
581
582        Like os.path.isdir
583
584        Override this method in subclasses.
585
586        Parameters
587        ----------
588        path : string
589            The path to check
590
591        Returns
592        -------
593        exists : bool
594            Whether the path does indeed exist.
595        """
596        raise NotImplementedError
597
598    async def is_hidden(self, path):
599        """Is path a hidden directory or file?
600
601        Parameters
602        ----------
603        path : string
604            The path to check. This is an API path (`/` separated,
605            relative to root dir).
606
607        Returns
608        -------
609        hidden : bool
610            Whether the path is hidden.
611
612        """
613        raise NotImplementedError
614
615    async def file_exists(self, path=""):
616        """Does a file exist at the given path?
617
618        Like os.path.isfile
619
620        Override this method in subclasses.
621
622        Parameters
623        ----------
624        path : string
625            The API path of a file to check for.
626
627        Returns
628        -------
629        exists : bool
630            Whether the file exists.
631        """
632        raise NotImplementedError("must be implemented in a subclass")
633
634    async def exists(self, path):
635        """Does a file or directory exist at the given path?
636
637        Like os.path.exists
638
639        Parameters
640        ----------
641        path : string
642            The API path of a file or directory to check for.
643
644        Returns
645        -------
646        exists : bool
647            Whether the target exists.
648        """
649        return await ensure_async(self.file_exists(path)) or await ensure_async(
650            self.dir_exists(path)
651        )
652
653    async def get(self, path, content=True, type=None, format=None):
654        """Get a file or directory model."""
655        raise NotImplementedError("must be implemented in a subclass")
656
657    async def save(self, model, path):
658        """
659        Save a file or directory model to path.
660
661        Should return the saved model with no content.  Save implementations
662        should call self.run_pre_save_hook(model=model, path=path) prior to
663        writing any data.
664        """
665        raise NotImplementedError("must be implemented in a subclass")
666
667    async def delete_file(self, path):
668        """Delete the file or directory at path."""
669        raise NotImplementedError("must be implemented in a subclass")
670
671    async def rename_file(self, old_path, new_path):
672        """Rename a file or directory."""
673        raise NotImplementedError("must be implemented in a subclass")
674
675    # ContentsManager API part 2: methods that have useable default
676    # implementations, but can be overridden in subclasses.
677
678    async def delete(self, path):
679        """Delete a file/directory and any associated checkpoints."""
680        path = path.strip("/")
681        if not path:
682            raise HTTPError(400, "Can't delete root")
683
684        await self.delete_file(path)
685        await self.checkpoints.delete_all_checkpoints(path)
686
687    async def rename(self, old_path, new_path):
688        """Rename a file and any checkpoints associated with that file."""
689        await self.rename_file(old_path, new_path)
690        await self.checkpoints.rename_all_checkpoints(old_path, new_path)
691
692    async def update(self, model, path):
693        """Update the file's path
694
695        For use in PATCH requests, to enable renaming a file without
696        re-uploading its contents. Only used for renaming at the moment.
697        """
698        path = path.strip("/")
699        new_path = model.get("path", path).strip("/")
700        if path != new_path:
701            await self.rename(path, new_path)
702        model = await self.get(new_path, content=False)
703        return model
704
705    async def increment_filename(self, filename, path="", insert=""):
706        """Increment a filename until it is unique.
707
708        Parameters
709        ----------
710        filename : unicode
711            The name of a file, including extension
712        path : unicode
713            The API path of the target's directory
714        insert : unicode
715            The characters to insert after the base filename
716
717        Returns
718        -------
719        name : unicode
720            A filename that is unique, based on the input filename.
721        """
722        # Extract the full suffix from the filename (e.g. .tar.gz)
723        path = path.strip("/")
724        basename, dot, ext = filename.rpartition(".")
725        if ext != "ipynb":
726            basename, dot, ext = filename.partition(".")
727
728        suffix = dot + ext
729
730        for i in itertools.count():
731            if i:
732                insert_i = "{}{}".format(insert, i)
733            else:
734                insert_i = ""
735            name = u"{basename}{insert}{suffix}".format(
736                basename=basename, insert=insert_i, suffix=suffix
737            )
738            file_exists = await ensure_async(self.exists(u"{}/{}".format(path, name)))
739            if not file_exists:
740                break
741        return name
742
743    async def new_untitled(self, path="", type="", ext=""):
744        """Create a new untitled file or directory in path
745
746        path must be a directory
747
748        File extension can be specified.
749
750        Use `new` to create files with a fully specified path (including filename).
751        """
752        path = path.strip("/")
753        dir_exists = await ensure_async(self.dir_exists(path))
754        if not dir_exists:
755            raise HTTPError(404, "No such directory: %s" % path)
756
757        model = {}
758        if type:
759            model["type"] = type
760
761        if ext == ".ipynb":
762            model.setdefault("type", "notebook")
763        else:
764            model.setdefault("type", "file")
765
766        insert = ""
767        if model["type"] == "directory":
768            untitled = self.untitled_directory
769            insert = " "
770        elif model["type"] == "notebook":
771            untitled = self.untitled_notebook
772            ext = ".ipynb"
773        elif model["type"] == "file":
774            untitled = self.untitled_file
775        else:
776            raise HTTPError(400, "Unexpected model type: %r" % model["type"])
777
778        name = await self.increment_filename(untitled + ext, path, insert=insert)
779        path = u"{0}/{1}".format(path, name)
780        return await self.new(model, path)
781
782    async def new(self, model=None, path=""):
783        """Create a new file or directory and return its model with no content.
784
785        To create a new untitled entity in a directory, use `new_untitled`.
786        """
787        path = path.strip("/")
788        if model is None:
789            model = {}
790
791        if path.endswith(".ipynb"):
792            model.setdefault("type", "notebook")
793        else:
794            model.setdefault("type", "file")
795
796        # no content, not a directory, so fill out new-file model
797        if "content" not in model and model["type"] != "directory":
798            if model["type"] == "notebook":
799                model["content"] = new_notebook()
800                model["format"] = "json"
801            else:
802                model["content"] = ""
803                model["type"] = "file"
804                model["format"] = "text"
805
806        model = await self.save(model, path)
807        return model
808
809    async def copy(self, from_path, to_path=None):
810        """Copy an existing file and return its new model.
811
812        If to_path not specified, it will be the parent directory of from_path.
813        If to_path is a directory, filename will increment `from_path-Copy#.ext`.
814        Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
815        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
816
817        from_path must be a full path to a file.
818        """
819        path = from_path.strip("/")
820        if to_path is not None:
821            to_path = to_path.strip("/")
822
823        if "/" in path:
824            from_dir, from_name = path.rsplit("/", 1)
825        else:
826            from_dir = ""
827            from_name = path
828
829        model = await self.get(path)
830        model.pop("path", None)
831        model.pop("name", None)
832        if model["type"] == "directory":
833            raise HTTPError(400, "Can't copy directories")
834        if to_path is None:
835            to_path = from_dir
836        if await ensure_async(self.dir_exists(to_path)):
837            name = copy_pat.sub(u".", from_name)
838            to_name = await self.increment_filename(name, to_path, insert="-Copy")
839            to_path = u"{0}/{1}".format(to_path, to_name)
840
841        model = await self.save(model, to_path)
842        return model
843
844    async def trust_notebook(self, path):
845        """Explicitly trust a notebook
846
847        Parameters
848        ----------
849        path : string
850            The path of a notebook
851        """
852        model = await self.get(path)
853        nb = model["content"]
854        self.log.warning("Trusting notebook %s", path)
855        self.notary.mark_cells(nb, True)
856        self.check_and_sign(nb, path)
857
858    # Part 3: Checkpoints API
859    async def create_checkpoint(self, path):
860        """Create a checkpoint."""
861        return await self.checkpoints.create_checkpoint(self, path)
862
863    async def restore_checkpoint(self, checkpoint_id, path):
864        """
865        Restore a checkpoint.
866        """
867        await self.checkpoints.restore_checkpoint(self, checkpoint_id, path)
868
869    async def list_checkpoints(self, path):
870        return await self.checkpoints.list_checkpoints(path)
871
872    async def delete_checkpoint(self, checkpoint_id, path):
873        return await self.checkpoints.delete_checkpoint(checkpoint_id, path)
874