1"""A base class for contents managers.""" 2# Copyright (c) Jupyter Development Team. 3# Distributed under the terms of the Modified BSD License. 4import itertools 5import json 6import re 7from fnmatch import fnmatch 8 9from ipython_genutils.importstring import import_item 10from nbformat import sign 11from nbformat import validate as validate_nb 12from nbformat import ValidationError 13from nbformat.v4 import new_notebook 14from tornado.web import HTTPError 15from tornado.web import RequestHandler 16from traitlets import Any 17from traitlets import Bool 18from traitlets import default 19from traitlets import Dict 20from traitlets import Instance 21from traitlets import List 22from traitlets import TraitError 23from traitlets import Type 24from traitlets import Unicode 25from traitlets import validate 26from traitlets.config.configurable import LoggingConfigurable 27 28from ...files.handlers import FilesHandler 29from .checkpoints import AsyncCheckpoints 30from .checkpoints import Checkpoints 31from jupyter_server.transutils import _i18n 32from jupyter_server.utils import ensure_async 33 34 35copy_pat = re.compile(r"\-Copy\d*\.") 36 37 38class ContentsManager(LoggingConfigurable): 39 """Base class for serving files and directories. 40 41 This serves any text or binary file, 42 as well as directories, 43 with special handling for JSON notebook documents. 44 45 Most APIs take a path argument, 46 which is always an API-style unicode path, 47 and always refers to a directory. 48 49 - unicode, not url-escaped 50 - '/'-separated 51 - leading and trailing '/' will be stripped 52 - if unspecified, path defaults to '', 53 indicating the root path. 54 55 """ 56 57 root_dir = Unicode("/", config=True) 58 59 allow_hidden = Bool(False, config=True, help="Allow access to hidden files") 60 61 notary = Instance(sign.NotebookNotary) 62 63 def _notary_default(self): 64 return sign.NotebookNotary(parent=self) 65 66 hide_globs = List( 67 Unicode(), 68 [ 69 u"__pycache__", 70 "*.pyc", 71 "*.pyo", 72 ".DS_Store", 73 "*.so", 74 "*.dylib", 75 "*~", 76 ], 77 config=True, 78 help=""" 79 Glob patterns to hide in file and directory listings. 80 """, 81 ) 82 83 untitled_notebook = Unicode( 84 _i18n("Untitled"), config=True, help="The base name used when creating untitled notebooks." 85 ) 86 87 untitled_file = Unicode( 88 "untitled", config=True, help="The base name used when creating untitled files." 89 ) 90 91 untitled_directory = Unicode( 92 "Untitled Folder", 93 config=True, 94 help="The base name used when creating untitled directories.", 95 ) 96 97 pre_save_hook = Any( 98 None, 99 config=True, 100 allow_none=True, 101 help="""Python callable or importstring thereof 102 103 To be called on a contents model prior to save. 104 105 This can be used to process the structure, 106 such as removing notebook outputs or other side effects that 107 should not be saved. 108 109 It will be called as (all arguments passed by keyword):: 110 111 hook(path=path, model=model, contents_manager=self) 112 113 - model: the model to be saved. Includes file contents. 114 Modifying this dict will affect the file that is stored. 115 - path: the API path of the save destination 116 - contents_manager: this ContentsManager instance 117 """, 118 ) 119 120 @validate("pre_save_hook") 121 def _validate_pre_save_hook(self, proposal): 122 value = proposal["value"] 123 if isinstance(value, str): 124 value = import_item(self.pre_save_hook) 125 if not callable(value): 126 raise TraitError("pre_save_hook must be callable") 127 return value 128 129 def run_pre_save_hook(self, model, path, **kwargs): 130 """Run the pre-save hook if defined, and log errors""" 131 if self.pre_save_hook: 132 try: 133 self.log.debug("Running pre-save hook on %s", path) 134 self.pre_save_hook(model=model, path=path, contents_manager=self, **kwargs) 135 except HTTPError: 136 # allow custom HTTPErrors to raise, 137 # rejecting the save with a message. 138 raise 139 except Exception: 140 # unhandled errors don't prevent saving, 141 # which could cause frustrating data loss 142 self.log.error("Pre-save hook failed on %s", path, exc_info=True) 143 144 checkpoints_class = Type(Checkpoints, config=True) 145 checkpoints = Instance(Checkpoints, config=True) 146 checkpoints_kwargs = Dict(config=True) 147 148 @default("checkpoints") 149 def _default_checkpoints(self): 150 return self.checkpoints_class(**self.checkpoints_kwargs) 151 152 @default("checkpoints_kwargs") 153 def _default_checkpoints_kwargs(self): 154 return dict( 155 parent=self, 156 log=self.log, 157 ) 158 159 files_handler_class = Type( 160 FilesHandler, 161 klass=RequestHandler, 162 allow_none=True, 163 config=True, 164 help="""handler class to use when serving raw file requests. 165 166 Default is a fallback that talks to the ContentsManager API, 167 which may be inefficient, especially for large files. 168 169 Local files-based ContentsManagers can use a StaticFileHandler subclass, 170 which will be much more efficient. 171 172 Access to these files should be Authenticated. 173 """, 174 ) 175 176 files_handler_params = Dict( 177 config=True, 178 help="""Extra parameters to pass to files_handler_class. 179 180 For example, StaticFileHandlers generally expect a `path` argument 181 specifying the root directory from which to serve files. 182 """, 183 ) 184 185 def get_extra_handlers(self): 186 """Return additional handlers 187 188 Default: self.files_handler_class on /files/.* 189 """ 190 handlers = [] 191 if self.files_handler_class: 192 handlers.append((r"/files/(.*)", self.files_handler_class, self.files_handler_params)) 193 return handlers 194 195 # ContentsManager API part 1: methods that must be 196 # implemented in subclasses. 197 198 def dir_exists(self, path): 199 """Does a directory exist at the given path? 200 201 Like os.path.isdir 202 203 Override this method in subclasses. 204 205 Parameters 206 ---------- 207 path : string 208 The path to check 209 210 Returns 211 ------- 212 exists : bool 213 Whether the path does indeed exist. 214 """ 215 raise NotImplementedError 216 217 def is_hidden(self, path): 218 """Is path a hidden directory or file? 219 220 Parameters 221 ---------- 222 path : string 223 The path to check. This is an API path (`/` separated, 224 relative to root dir). 225 226 Returns 227 ------- 228 hidden : bool 229 Whether the path is hidden. 230 231 """ 232 raise NotImplementedError 233 234 def file_exists(self, path=""): 235 """Does a file exist at the given path? 236 237 Like os.path.isfile 238 239 Override this method in subclasses. 240 241 Parameters 242 ---------- 243 path : string 244 The API path of a file to check for. 245 246 Returns 247 ------- 248 exists : bool 249 Whether the file exists. 250 """ 251 raise NotImplementedError("must be implemented in a subclass") 252 253 def exists(self, path): 254 """Does a file or directory exist at the given path? 255 256 Like os.path.exists 257 258 Parameters 259 ---------- 260 path : string 261 The API path of a file or directory to check for. 262 263 Returns 264 ------- 265 exists : bool 266 Whether the target exists. 267 """ 268 return self.file_exists(path) or self.dir_exists(path) 269 270 def get(self, path, content=True, type=None, format=None): 271 """Get a file or directory model.""" 272 raise NotImplementedError("must be implemented in a subclass") 273 274 def save(self, model, path): 275 """ 276 Save a file or directory model to path. 277 278 Should return the saved model with no content. Save implementations 279 should call self.run_pre_save_hook(model=model, path=path) prior to 280 writing any data. 281 """ 282 raise NotImplementedError("must be implemented in a subclass") 283 284 def delete_file(self, path): 285 """Delete the file or directory at path.""" 286 raise NotImplementedError("must be implemented in a subclass") 287 288 def rename_file(self, old_path, new_path): 289 """Rename a file or directory.""" 290 raise NotImplementedError("must be implemented in a subclass") 291 292 # ContentsManager API part 2: methods that have useable default 293 # implementations, but can be overridden in subclasses. 294 295 def delete(self, path): 296 """Delete a file/directory and any associated checkpoints.""" 297 path = path.strip("/") 298 if not path: 299 raise HTTPError(400, "Can't delete root") 300 self.delete_file(path) 301 self.checkpoints.delete_all_checkpoints(path) 302 303 def rename(self, old_path, new_path): 304 """Rename a file and any checkpoints associated with that file.""" 305 self.rename_file(old_path, new_path) 306 self.checkpoints.rename_all_checkpoints(old_path, new_path) 307 308 def update(self, model, path): 309 """Update the file's path 310 311 For use in PATCH requests, to enable renaming a file without 312 re-uploading its contents. Only used for renaming at the moment. 313 """ 314 path = path.strip("/") 315 new_path = model.get("path", path).strip("/") 316 if path != new_path: 317 self.rename(path, new_path) 318 model = self.get(new_path, content=False) 319 return model 320 321 def info_string(self): 322 return "Serving contents" 323 324 def get_kernel_path(self, path, model=None): 325 """Return the API path for the kernel 326 327 KernelManagers can turn this value into a filesystem path, 328 or ignore it altogether. 329 330 The default value here will start kernels in the directory of the 331 notebook server. FileContentsManager overrides this to use the 332 directory containing the notebook. 333 """ 334 return "" 335 336 def increment_filename(self, filename, path="", insert=""): 337 """Increment a filename until it is unique. 338 339 Parameters 340 ---------- 341 filename : unicode 342 The name of a file, including extension 343 path : unicode 344 The API path of the target's directory 345 insert : unicode 346 The characters to insert after the base filename 347 348 Returns 349 ------- 350 name : unicode 351 A filename that is unique, based on the input filename. 352 """ 353 # Extract the full suffix from the filename (e.g. .tar.gz) 354 path = path.strip("/") 355 basename, dot, ext = filename.rpartition(".") 356 if ext != "ipynb": 357 basename, dot, ext = filename.partition(".") 358 359 suffix = dot + ext 360 361 for i in itertools.count(): 362 if i: 363 insert_i = "{}{}".format(insert, i) 364 else: 365 insert_i = "" 366 name = u"{basename}{insert}{suffix}".format( 367 basename=basename, insert=insert_i, suffix=suffix 368 ) 369 if not self.exists(u"{}/{}".format(path, name)): 370 break 371 return name 372 373 def validate_notebook_model(self, model): 374 """Add failed-validation message to model""" 375 try: 376 validate_nb(model["content"]) 377 except ValidationError as e: 378 model["message"] = u"Notebook validation failed: {}:\n{}".format( 379 e.message, 380 json.dumps(e.instance, indent=1, default=lambda obj: "<UNKNOWN>"), 381 ) 382 return model 383 384 def new_untitled(self, path="", type="", ext=""): 385 """Create a new untitled file or directory in path 386 387 path must be a directory 388 389 File extension can be specified. 390 391 Use `new` to create files with a fully specified path (including filename). 392 """ 393 path = path.strip("/") 394 if not self.dir_exists(path): 395 raise HTTPError(404, "No such directory: %s" % path) 396 397 model = {} 398 if type: 399 model["type"] = type 400 401 if ext == ".ipynb": 402 model.setdefault("type", "notebook") 403 else: 404 model.setdefault("type", "file") 405 406 insert = "" 407 if model["type"] == "directory": 408 untitled = self.untitled_directory 409 insert = " " 410 elif model["type"] == "notebook": 411 untitled = self.untitled_notebook 412 ext = ".ipynb" 413 elif model["type"] == "file": 414 untitled = self.untitled_file 415 else: 416 raise HTTPError(400, "Unexpected model type: %r" % model["type"]) 417 418 name = self.increment_filename(untitled + ext, path, insert=insert) 419 path = u"{0}/{1}".format(path, name) 420 return self.new(model, path) 421 422 def new(self, model=None, path=""): 423 """Create a new file or directory and return its model with no content. 424 425 To create a new untitled entity in a directory, use `new_untitled`. 426 """ 427 path = path.strip("/") 428 if model is None: 429 model = {} 430 431 if path.endswith(".ipynb"): 432 model.setdefault("type", "notebook") 433 else: 434 model.setdefault("type", "file") 435 436 # no content, not a directory, so fill out new-file model 437 if "content" not in model and model["type"] != "directory": 438 if model["type"] == "notebook": 439 model["content"] = new_notebook() 440 model["format"] = "json" 441 else: 442 model["content"] = "" 443 model["type"] = "file" 444 model["format"] = "text" 445 446 model = self.save(model, path) 447 return model 448 449 def copy(self, from_path, to_path=None): 450 """Copy an existing file and return its new model. 451 452 If to_path not specified, it will be the parent directory of from_path. 453 If to_path is a directory, filename will increment `from_path-Copy#.ext`. 454 Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. 455 For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. 456 457 from_path must be a full path to a file. 458 """ 459 path = from_path.strip("/") 460 if to_path is not None: 461 to_path = to_path.strip("/") 462 463 if "/" in path: 464 from_dir, from_name = path.rsplit("/", 1) 465 else: 466 from_dir = "" 467 from_name = path 468 469 model = self.get(path) 470 model.pop("path", None) 471 model.pop("name", None) 472 if model["type"] == "directory": 473 raise HTTPError(400, "Can't copy directories") 474 475 if to_path is None: 476 to_path = from_dir 477 if self.dir_exists(to_path): 478 name = copy_pat.sub(u".", from_name) 479 to_name = self.increment_filename(name, to_path, insert="-Copy") 480 to_path = u"{0}/{1}".format(to_path, to_name) 481 482 model = self.save(model, to_path) 483 return model 484 485 def log_info(self): 486 self.log.info(self.info_string()) 487 488 def trust_notebook(self, path): 489 """Explicitly trust a notebook 490 491 Parameters 492 ---------- 493 path : string 494 The path of a notebook 495 """ 496 model = self.get(path) 497 nb = model["content"] 498 self.log.warning("Trusting notebook %s", path) 499 self.notary.mark_cells(nb, True) 500 self.check_and_sign(nb, path) 501 502 def check_and_sign(self, nb, path=""): 503 """Check for trusted cells, and sign the notebook. 504 505 Called as a part of saving notebooks. 506 507 Parameters 508 ---------- 509 nb : dict 510 The notebook dict 511 path : string 512 The notebook's path (for logging) 513 """ 514 if self.notary.check_cells(nb): 515 self.notary.sign(nb) 516 else: 517 self.log.warning("Notebook %s is not trusted", path) 518 519 def mark_trusted_cells(self, nb, path=""): 520 """Mark cells as trusted if the notebook signature matches. 521 522 Called as a part of loading notebooks. 523 524 Parameters 525 ---------- 526 nb : dict 527 The notebook object (in current nbformat) 528 path : string 529 The notebook's path (for logging) 530 """ 531 trusted = self.notary.check_signature(nb) 532 if not trusted: 533 self.log.warning("Notebook %s is not trusted", path) 534 self.notary.mark_cells(nb, trusted) 535 536 def should_list(self, name): 537 """Should this file/directory name be displayed in a listing?""" 538 return not any(fnmatch(name, glob) for glob in self.hide_globs) 539 540 # Part 3: Checkpoints API 541 def create_checkpoint(self, path): 542 """Create a checkpoint.""" 543 return self.checkpoints.create_checkpoint(self, path) 544 545 def restore_checkpoint(self, checkpoint_id, path): 546 """ 547 Restore a checkpoint. 548 """ 549 self.checkpoints.restore_checkpoint(self, checkpoint_id, path) 550 551 def list_checkpoints(self, path): 552 return self.checkpoints.list_checkpoints(path) 553 554 def delete_checkpoint(self, checkpoint_id, path): 555 return self.checkpoints.delete_checkpoint(checkpoint_id, path) 556 557 558class AsyncContentsManager(ContentsManager): 559 """Base class for serving files and directories asynchronously.""" 560 561 checkpoints_class = Type(AsyncCheckpoints, config=True) 562 checkpoints = Instance(AsyncCheckpoints, config=True) 563 checkpoints_kwargs = Dict(config=True) 564 565 @default("checkpoints") 566 def _default_checkpoints(self): 567 return self.checkpoints_class(**self.checkpoints_kwargs) 568 569 @default("checkpoints_kwargs") 570 def _default_checkpoints_kwargs(self): 571 return dict( 572 parent=self, 573 log=self.log, 574 ) 575 576 # ContentsManager API part 1: methods that must be 577 # implemented in subclasses. 578 579 async def dir_exists(self, path): 580 """Does a directory exist at the given path? 581 582 Like os.path.isdir 583 584 Override this method in subclasses. 585 586 Parameters 587 ---------- 588 path : string 589 The path to check 590 591 Returns 592 ------- 593 exists : bool 594 Whether the path does indeed exist. 595 """ 596 raise NotImplementedError 597 598 async def is_hidden(self, path): 599 """Is path a hidden directory or file? 600 601 Parameters 602 ---------- 603 path : string 604 The path to check. This is an API path (`/` separated, 605 relative to root dir). 606 607 Returns 608 ------- 609 hidden : bool 610 Whether the path is hidden. 611 612 """ 613 raise NotImplementedError 614 615 async def file_exists(self, path=""): 616 """Does a file exist at the given path? 617 618 Like os.path.isfile 619 620 Override this method in subclasses. 621 622 Parameters 623 ---------- 624 path : string 625 The API path of a file to check for. 626 627 Returns 628 ------- 629 exists : bool 630 Whether the file exists. 631 """ 632 raise NotImplementedError("must be implemented in a subclass") 633 634 async def exists(self, path): 635 """Does a file or directory exist at the given path? 636 637 Like os.path.exists 638 639 Parameters 640 ---------- 641 path : string 642 The API path of a file or directory to check for. 643 644 Returns 645 ------- 646 exists : bool 647 Whether the target exists. 648 """ 649 return await ensure_async(self.file_exists(path)) or await ensure_async( 650 self.dir_exists(path) 651 ) 652 653 async def get(self, path, content=True, type=None, format=None): 654 """Get a file or directory model.""" 655 raise NotImplementedError("must be implemented in a subclass") 656 657 async def save(self, model, path): 658 """ 659 Save a file or directory model to path. 660 661 Should return the saved model with no content. Save implementations 662 should call self.run_pre_save_hook(model=model, path=path) prior to 663 writing any data. 664 """ 665 raise NotImplementedError("must be implemented in a subclass") 666 667 async def delete_file(self, path): 668 """Delete the file or directory at path.""" 669 raise NotImplementedError("must be implemented in a subclass") 670 671 async def rename_file(self, old_path, new_path): 672 """Rename a file or directory.""" 673 raise NotImplementedError("must be implemented in a subclass") 674 675 # ContentsManager API part 2: methods that have useable default 676 # implementations, but can be overridden in subclasses. 677 678 async def delete(self, path): 679 """Delete a file/directory and any associated checkpoints.""" 680 path = path.strip("/") 681 if not path: 682 raise HTTPError(400, "Can't delete root") 683 684 await self.delete_file(path) 685 await self.checkpoints.delete_all_checkpoints(path) 686 687 async def rename(self, old_path, new_path): 688 """Rename a file and any checkpoints associated with that file.""" 689 await self.rename_file(old_path, new_path) 690 await self.checkpoints.rename_all_checkpoints(old_path, new_path) 691 692 async def update(self, model, path): 693 """Update the file's path 694 695 For use in PATCH requests, to enable renaming a file without 696 re-uploading its contents. Only used for renaming at the moment. 697 """ 698 path = path.strip("/") 699 new_path = model.get("path", path).strip("/") 700 if path != new_path: 701 await self.rename(path, new_path) 702 model = await self.get(new_path, content=False) 703 return model 704 705 async def increment_filename(self, filename, path="", insert=""): 706 """Increment a filename until it is unique. 707 708 Parameters 709 ---------- 710 filename : unicode 711 The name of a file, including extension 712 path : unicode 713 The API path of the target's directory 714 insert : unicode 715 The characters to insert after the base filename 716 717 Returns 718 ------- 719 name : unicode 720 A filename that is unique, based on the input filename. 721 """ 722 # Extract the full suffix from the filename (e.g. .tar.gz) 723 path = path.strip("/") 724 basename, dot, ext = filename.rpartition(".") 725 if ext != "ipynb": 726 basename, dot, ext = filename.partition(".") 727 728 suffix = dot + ext 729 730 for i in itertools.count(): 731 if i: 732 insert_i = "{}{}".format(insert, i) 733 else: 734 insert_i = "" 735 name = u"{basename}{insert}{suffix}".format( 736 basename=basename, insert=insert_i, suffix=suffix 737 ) 738 file_exists = await ensure_async(self.exists(u"{}/{}".format(path, name))) 739 if not file_exists: 740 break 741 return name 742 743 async def new_untitled(self, path="", type="", ext=""): 744 """Create a new untitled file or directory in path 745 746 path must be a directory 747 748 File extension can be specified. 749 750 Use `new` to create files with a fully specified path (including filename). 751 """ 752 path = path.strip("/") 753 dir_exists = await ensure_async(self.dir_exists(path)) 754 if not dir_exists: 755 raise HTTPError(404, "No such directory: %s" % path) 756 757 model = {} 758 if type: 759 model["type"] = type 760 761 if ext == ".ipynb": 762 model.setdefault("type", "notebook") 763 else: 764 model.setdefault("type", "file") 765 766 insert = "" 767 if model["type"] == "directory": 768 untitled = self.untitled_directory 769 insert = " " 770 elif model["type"] == "notebook": 771 untitled = self.untitled_notebook 772 ext = ".ipynb" 773 elif model["type"] == "file": 774 untitled = self.untitled_file 775 else: 776 raise HTTPError(400, "Unexpected model type: %r" % model["type"]) 777 778 name = await self.increment_filename(untitled + ext, path, insert=insert) 779 path = u"{0}/{1}".format(path, name) 780 return await self.new(model, path) 781 782 async def new(self, model=None, path=""): 783 """Create a new file or directory and return its model with no content. 784 785 To create a new untitled entity in a directory, use `new_untitled`. 786 """ 787 path = path.strip("/") 788 if model is None: 789 model = {} 790 791 if path.endswith(".ipynb"): 792 model.setdefault("type", "notebook") 793 else: 794 model.setdefault("type", "file") 795 796 # no content, not a directory, so fill out new-file model 797 if "content" not in model and model["type"] != "directory": 798 if model["type"] == "notebook": 799 model["content"] = new_notebook() 800 model["format"] = "json" 801 else: 802 model["content"] = "" 803 model["type"] = "file" 804 model["format"] = "text" 805 806 model = await self.save(model, path) 807 return model 808 809 async def copy(self, from_path, to_path=None): 810 """Copy an existing file and return its new model. 811 812 If to_path not specified, it will be the parent directory of from_path. 813 If to_path is a directory, filename will increment `from_path-Copy#.ext`. 814 Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. 815 For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. 816 817 from_path must be a full path to a file. 818 """ 819 path = from_path.strip("/") 820 if to_path is not None: 821 to_path = to_path.strip("/") 822 823 if "/" in path: 824 from_dir, from_name = path.rsplit("/", 1) 825 else: 826 from_dir = "" 827 from_name = path 828 829 model = await self.get(path) 830 model.pop("path", None) 831 model.pop("name", None) 832 if model["type"] == "directory": 833 raise HTTPError(400, "Can't copy directories") 834 if to_path is None: 835 to_path = from_dir 836 if await ensure_async(self.dir_exists(to_path)): 837 name = copy_pat.sub(u".", from_name) 838 to_name = await self.increment_filename(name, to_path, insert="-Copy") 839 to_path = u"{0}/{1}".format(to_path, to_name) 840 841 model = await self.save(model, to_path) 842 return model 843 844 async def trust_notebook(self, path): 845 """Explicitly trust a notebook 846 847 Parameters 848 ---------- 849 path : string 850 The path of a notebook 851 """ 852 model = await self.get(path) 853 nb = model["content"] 854 self.log.warning("Trusting notebook %s", path) 855 self.notary.mark_cells(nb, True) 856 self.check_and_sign(nb, path) 857 858 # Part 3: Checkpoints API 859 async def create_checkpoint(self, path): 860 """Create a checkpoint.""" 861 return await self.checkpoints.create_checkpoint(self, path) 862 863 async def restore_checkpoint(self, checkpoint_id, path): 864 """ 865 Restore a checkpoint. 866 """ 867 await self.checkpoints.restore_checkpoint(self, checkpoint_id, path) 868 869 async def list_checkpoints(self, path): 870 return await self.checkpoints.list_checkpoints(path) 871 872 async def delete_checkpoint(self, checkpoint_id, path): 873 return await self.checkpoints.delete_checkpoint(checkpoint_id, path) 874