1import json 2import time 3import re 4import os 5import copy 6import base64 7import sys 8 9import gevent 10 11from Debug import Debug 12from Crypt import CryptHash 13from Config import config 14from util import helper 15from util import Diff 16from util import SafeRe 17from Peer import PeerHashfield 18from .ContentDbDict import ContentDbDict 19from Plugin import PluginManager 20 21 22class VerifyError(Exception): 23 pass 24 25 26class SignError(Exception): 27 pass 28 29 30@PluginManager.acceptPlugins 31class ContentManager(object): 32 33 def __init__(self, site): 34 self.site = site 35 self.log = self.site.log 36 self.contents = ContentDbDict(site) 37 self.hashfield = PeerHashfield() 38 self.has_optional_files = False 39 40 # Load all content.json files 41 def loadContents(self): 42 if len(self.contents) == 0: 43 self.log.debug("ContentDb not initialized, load files from filesystem") 44 self.loadContent(add_bad_files=False, delete_removed_files=False) 45 self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize() 46 47 # Load hashfield cache 48 if "hashfield" in self.site.settings.get("cache", {}): 49 self.hashfield.frombytes(base64.b64decode(self.site.settings["cache"]["hashfield"])) 50 del self.site.settings["cache"]["hashfield"] 51 elif self.contents.get("content.json") and self.site.settings["size_optional"] > 0: 52 self.site.storage.updateBadFiles() # No hashfield cache created yet 53 self.has_optional_files = bool(self.hashfield) 54 55 self.contents.db.initSite(self.site) 56 57 def getFileChanges(self, old_files, new_files): 58 deleted = {key: val for key, val in old_files.items() if key not in new_files} 59 deleted_hashes = {val.get("sha512"): key for key, val in old_files.items() if key not in new_files} 60 added = {key: val for key, val in new_files.items() if key not in old_files} 61 renamed = {} 62 for relative_path, node in added.items(): 63 hash = node.get("sha512") 64 if hash in deleted_hashes: 65 relative_path_old = deleted_hashes[hash] 66 renamed[relative_path_old] = relative_path 67 del(deleted[relative_path_old]) 68 return list(deleted), renamed 69 70 # Load content.json to self.content 71 # Return: Changed files ["index.html", "data/messages.json"], Deleted files ["old.jpg"] 72 def loadContent(self, content_inner_path="content.json", add_bad_files=True, delete_removed_files=True, load_includes=True, force=False): 73 content_inner_path = content_inner_path.strip("/") # Remove / from beginning 74 old_content = self.contents.get(content_inner_path) 75 content_path = self.site.storage.getPath(content_inner_path) 76 content_dir = helper.getDirname(self.site.storage.getPath(content_inner_path)) 77 content_inner_dir = helper.getDirname(content_inner_path) 78 79 if os.path.isfile(content_path): 80 try: 81 # Check if file is newer than what we have 82 if not force and old_content and not self.site.settings.get("own"): 83 for line in open(content_path): 84 if '"modified"' not in line: 85 continue 86 match = re.search(r"([0-9\.]+),$", line.strip(" \r\n")) 87 if match and float(match.group(1)) <= old_content.get("modified", 0): 88 self.log.debug("%s loadContent same json file, skipping" % content_inner_path) 89 return [], [] 90 91 new_content = self.site.storage.loadJson(content_inner_path) 92 except Exception as err: 93 self.log.warning("%s load error: %s" % (content_path, Debug.formatException(err))) 94 return [], [] 95 else: 96 self.log.debug("Content.json not exist: %s" % content_path) 97 return [], [] # Content.json not exist 98 99 try: 100 # Get the files where the sha512 changed 101 changed = [] 102 deleted = [] 103 # Check changed 104 for relative_path, info in new_content.get("files", {}).items(): 105 if "sha512" in info: 106 hash_type = "sha512" 107 else: # Backward compatibility 108 hash_type = "sha1" 109 110 new_hash = info[hash_type] 111 if old_content and old_content["files"].get(relative_path): # We have the file in the old content 112 old_hash = old_content["files"][relative_path].get(hash_type) 113 else: # The file is not in the old content 114 old_hash = None 115 if old_hash != new_hash: 116 changed.append(content_inner_dir + relative_path) 117 118 # Check changed optional files 119 for relative_path, info in new_content.get("files_optional", {}).items(): 120 file_inner_path = content_inner_dir + relative_path 121 new_hash = info["sha512"] 122 if old_content and old_content.get("files_optional", {}).get(relative_path): 123 # We have the file in the old content 124 old_hash = old_content["files_optional"][relative_path].get("sha512") 125 if old_hash != new_hash and self.site.isDownloadable(file_inner_path): 126 changed.append(file_inner_path) # Download new file 127 elif old_hash != new_hash and self.hashfield.hasHash(old_hash) and not self.site.settings.get("own"): 128 try: 129 old_hash_id = self.hashfield.getHashId(old_hash) 130 self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][relative_path]["size"]) 131 self.optionalDelete(file_inner_path) 132 self.log.debug("Deleted changed optional file: %s" % file_inner_path) 133 except Exception as err: 134 self.log.warning("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err))) 135 else: # The file is not in the old content 136 if self.site.isDownloadable(file_inner_path): 137 changed.append(file_inner_path) # Download new file 138 139 # Check deleted 140 if old_content: 141 old_files = dict( 142 old_content.get("files", {}), 143 **old_content.get("files_optional", {}) 144 ) 145 146 new_files = dict( 147 new_content.get("files", {}), 148 **new_content.get("files_optional", {}) 149 ) 150 151 deleted, renamed = self.getFileChanges(old_files, new_files) 152 153 for relative_path_old, relative_path_new in renamed.items(): 154 self.log.debug("Renaming: %s -> %s" % (relative_path_old, relative_path_new)) 155 if relative_path_new in new_content.get("files_optional", {}): 156 self.optionalRenamed(content_inner_dir + relative_path_old, content_inner_dir + relative_path_new) 157 if self.site.storage.isFile(relative_path_old): 158 try: 159 self.site.storage.rename(relative_path_old, relative_path_new) 160 if relative_path_new in changed: 161 changed.remove(relative_path_new) 162 self.log.debug("Renamed: %s -> %s" % (relative_path_old, relative_path_new)) 163 except Exception as err: 164 self.log.warning("Error renaming file: %s -> %s %s" % (relative_path_old, relative_path_new, err)) 165 166 if deleted and not self.site.settings.get("own"): 167 # Deleting files that no longer in content.json 168 for file_relative_path in deleted: 169 file_inner_path = content_inner_dir + file_relative_path 170 try: 171 # Check if the deleted file is optional 172 if old_content.get("files_optional") and old_content["files_optional"].get(file_relative_path): 173 self.optionalDelete(file_inner_path) 174 old_hash = old_content["files_optional"][file_relative_path].get("sha512") 175 if self.hashfield.hasHash(old_hash): 176 old_hash_id = self.hashfield.getHashId(old_hash) 177 self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][file_relative_path]["size"]) 178 else: 179 self.site.storage.delete(file_inner_path) 180 181 self.log.debug("Deleted file: %s" % file_inner_path) 182 except Exception as err: 183 self.log.debug("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err))) 184 185 # Cleanup empty dirs 186 tree = {root: [dirs, files] for root, dirs, files in os.walk(self.site.storage.getPath(content_inner_dir))} 187 for root in sorted(tree, key=len, reverse=True): 188 dirs, files = tree[root] 189 if dirs == [] and files == []: 190 root_inner_path = self.site.storage.getInnerPath(root.replace("\\", "/")) 191 self.log.debug("Empty directory: %s, cleaning up." % root_inner_path) 192 try: 193 self.site.storage.deleteDir(root_inner_path) 194 # Remove from tree dict to reflect changed state 195 tree[os.path.dirname(root)][0].remove(os.path.basename(root)) 196 except Exception as err: 197 self.log.debug("Error deleting empty directory %s: %s" % (root_inner_path, err)) 198 199 # Check archived 200 if old_content and "user_contents" in new_content and "archived" in new_content["user_contents"]: 201 old_archived = old_content.get("user_contents", {}).get("archived", {}) 202 new_archived = new_content.get("user_contents", {}).get("archived", {}) 203 self.log.debug("old archived: %s, new archived: %s" % (len(old_archived), len(new_archived))) 204 archived_changed = { 205 key: date_archived 206 for key, date_archived in new_archived.items() 207 if old_archived.get(key) != new_archived[key] 208 } 209 if archived_changed: 210 self.log.debug("Archived changed: %s" % archived_changed) 211 for archived_dirname, date_archived in archived_changed.items(): 212 archived_inner_path = content_inner_dir + archived_dirname + "/content.json" 213 if self.contents.get(archived_inner_path, {}).get("modified", 0) < date_archived: 214 self.removeContent(archived_inner_path) 215 deleted += archived_inner_path 216 self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize() 217 218 # Check archived before 219 if old_content and "user_contents" in new_content and "archived_before" in new_content["user_contents"]: 220 old_archived_before = old_content.get("user_contents", {}).get("archived_before", 0) 221 new_archived_before = new_content.get("user_contents", {}).get("archived_before", 0) 222 if old_archived_before != new_archived_before: 223 self.log.debug("Archived before changed: %s -> %s" % (old_archived_before, new_archived_before)) 224 225 # Remove downloaded archived files 226 num_removed_contents = 0 227 for archived_inner_path in self.listModified(before=new_archived_before): 228 if archived_inner_path.startswith(content_inner_dir) and archived_inner_path != content_inner_path: 229 self.removeContent(archived_inner_path) 230 num_removed_contents += 1 231 self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize() 232 233 # Remove archived files from download queue 234 num_removed_bad_files = 0 235 for bad_file in list(self.site.bad_files.keys()): 236 if bad_file.endswith("content.json"): 237 del self.site.bad_files[bad_file] 238 num_removed_bad_files += 1 239 240 if num_removed_bad_files > 0: 241 self.site.worker_manager.removeSolvedFileTasks(mark_as_good=False) 242 gevent.spawn(self.site.update, since=0) 243 244 self.log.debug("Archived removed contents: %s, removed bad files: %s" % (num_removed_contents, num_removed_bad_files)) 245 246 # Load includes 247 if load_includes and "includes" in new_content: 248 for relative_path, info in list(new_content["includes"].items()): 249 include_inner_path = content_inner_dir + relative_path 250 if self.site.storage.isFile(include_inner_path): # Content.json exists, load it 251 include_changed, include_deleted = self.loadContent( 252 include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files 253 ) 254 if include_changed: 255 changed += include_changed # Add changed files 256 if include_deleted: 257 deleted += include_deleted # Add changed files 258 else: # Content.json not exist, add to changed files 259 self.log.debug("Missing include: %s" % include_inner_path) 260 changed += [include_inner_path] 261 262 # Load blind user includes (all subdir) 263 if load_includes and "user_contents" in new_content: 264 for relative_dir in os.listdir(content_dir): 265 include_inner_path = content_inner_dir + relative_dir + "/content.json" 266 if not self.site.storage.isFile(include_inner_path): 267 continue # Content.json not exist 268 include_changed, include_deleted = self.loadContent( 269 include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files, 270 load_includes=False 271 ) 272 if include_changed: 273 changed += include_changed # Add changed files 274 if include_deleted: 275 deleted += include_deleted # Add changed files 276 277 # Save some memory 278 new_content["signs"] = None 279 if "cert_sign" in new_content: 280 new_content["cert_sign"] = None 281 282 if new_content.get("files_optional"): 283 self.has_optional_files = True 284 # Update the content 285 self.contents[content_inner_path] = new_content 286 except Exception as err: 287 self.log.warning("%s parse error: %s" % (content_inner_path, Debug.formatException(err))) 288 return [], [] # Content.json parse error 289 290 # Add changed files to bad files 291 if add_bad_files: 292 for inner_path in changed: 293 self.site.bad_files[inner_path] = self.site.bad_files.get(inner_path, 0) + 1 294 for inner_path in deleted: 295 if inner_path in self.site.bad_files: 296 del self.site.bad_files[inner_path] 297 self.site.worker_manager.removeSolvedFileTasks() 298 299 if new_content.get("modified", 0) > self.site.settings.get("modified", 0): 300 # Dont store modifications in the far future (more than 10 minute) 301 self.site.settings["modified"] = min(time.time() + 60 * 10, new_content["modified"]) 302 303 return changed, deleted 304 305 def removeContent(self, inner_path): 306 inner_dir = helper.getDirname(inner_path) 307 try: 308 content = self.contents[inner_path] 309 files = dict( 310 content.get("files", {}), 311 **content.get("files_optional", {}) 312 ) 313 except Exception as err: 314 self.log.debug("Error loading %s for removeContent: %s" % (inner_path, Debug.formatException(err))) 315 files = {} 316 files["content.json"] = True 317 # Deleting files that no longer in content.json 318 for file_relative_path in files: 319 file_inner_path = inner_dir + file_relative_path 320 try: 321 self.site.storage.delete(file_inner_path) 322 self.log.debug("Deleted file: %s" % file_inner_path) 323 except Exception as err: 324 self.log.debug("Error deleting file %s: %s" % (file_inner_path, err)) 325 try: 326 self.site.storage.deleteDir(inner_dir) 327 except Exception as err: 328 self.log.debug("Error deleting dir %s: %s" % (inner_dir, err)) 329 330 try: 331 del self.contents[inner_path] 332 except Exception as err: 333 self.log.debug("Error key from contents: %s" % inner_path) 334 335 # Get total size of site 336 # Return: 32819 (size of files in kb) 337 def getTotalSize(self, ignore=None): 338 return self.contents.db.getTotalSize(self.site, ignore) 339 340 def listModified(self, after=None, before=None): 341 return self.contents.db.listModified(self.site, after=after, before=before) 342 343 def listContents(self, inner_path="content.json", user_files=False): 344 if inner_path not in self.contents: 345 return [] 346 back = [inner_path] 347 content_inner_dir = helper.getDirname(inner_path) 348 for relative_path in list(self.contents[inner_path].get("includes", {}).keys()): 349 include_inner_path = content_inner_dir + relative_path 350 back += self.listContents(include_inner_path) 351 return back 352 353 # Returns if file with the given modification date is archived or not 354 def isArchived(self, inner_path, modified): 355 match = re.match(r"(.*)/(.*?)/", inner_path) 356 if not match: 357 return False 358 user_contents_inner_path = match.group(1) + "/content.json" 359 relative_directory = match.group(2) 360 361 file_info = self.getFileInfo(user_contents_inner_path) 362 if file_info: 363 time_archived_before = file_info.get("archived_before", 0) 364 time_directory_archived = file_info.get("archived", {}).get(relative_directory, 0) 365 if modified <= time_archived_before or modified <= time_directory_archived: 366 return True 367 else: 368 return False 369 else: 370 return False 371 372 def isDownloaded(self, inner_path, hash_id=None): 373 if not hash_id: 374 file_info = self.getFileInfo(inner_path) 375 if not file_info or "sha512" not in file_info: 376 return False 377 hash_id = self.hashfield.getHashId(file_info["sha512"]) 378 return hash_id in self.hashfield 379 380 # Is modified since signing 381 def isModified(self, inner_path): 382 s = time.time() 383 if inner_path.endswith("content.json"): 384 try: 385 is_valid = self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False) 386 if is_valid: 387 is_modified = False 388 else: 389 is_modified = True 390 except VerifyError: 391 is_modified = True 392 else: 393 try: 394 self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False) 395 is_modified = False 396 except VerifyError: 397 is_modified = True 398 return is_modified 399 400 # Find the file info line from self.contents 401 # Return: { "sha512": "c29d73d...21f518", "size": 41 , "content_inner_path": "content.json"} 402 def getFileInfo(self, inner_path, new_file=False): 403 dirs = inner_path.split("/") # Parent dirs of content.json 404 inner_path_parts = [dirs.pop()] # Filename relative to content.json 405 while True: 406 content_inner_path = "%s/content.json" % "/".join(dirs) 407 content_inner_path = content_inner_path.strip("/") 408 content = self.contents.get(content_inner_path) 409 410 # Check in files 411 if content and "files" in content: 412 back = content["files"].get("/".join(inner_path_parts)) 413 if back: 414 back["content_inner_path"] = content_inner_path 415 back["optional"] = False 416 back["relative_path"] = "/".join(inner_path_parts) 417 return back 418 419 # Check in optional files 420 if content and "files_optional" in content: # Check if file in this content.json 421 back = content["files_optional"].get("/".join(inner_path_parts)) 422 if back: 423 back["content_inner_path"] = content_inner_path 424 back["optional"] = True 425 back["relative_path"] = "/".join(inner_path_parts) 426 return back 427 428 # Return the rules if user dir 429 if content and "user_contents" in content: 430 back = content["user_contents"] 431 content_inner_path_dir = helper.getDirname(content_inner_path) 432 relative_content_path = inner_path[len(content_inner_path_dir):] 433 user_auth_address_match = re.match(r"([A-Za-z0-9]+)/.*", relative_content_path) 434 if user_auth_address_match: 435 user_auth_address = user_auth_address_match.group(1) 436 back["content_inner_path"] = "%s%s/content.json" % (content_inner_path_dir, user_auth_address) 437 else: 438 back["content_inner_path"] = content_inner_path_dir + "content.json" 439 back["optional"] = None 440 back["relative_path"] = "/".join(inner_path_parts) 441 return back 442 443 if new_file and content: 444 back = {} 445 back["content_inner_path"] = content_inner_path 446 back["relative_path"] = "/".join(inner_path_parts) 447 back["optional"] = None 448 return back 449 450 # No inner path in this dir, lets try the parent dir 451 if dirs: 452 inner_path_parts.insert(0, dirs.pop()) 453 else: # No more parent dirs 454 break 455 456 # Not found 457 return False 458 459 # Get rules for the file 460 # Return: The rules for the file or False if not allowed 461 def getRules(self, inner_path, content=None): 462 if not inner_path.endswith("content.json"): # Find the files content.json first 463 file_info = self.getFileInfo(inner_path) 464 if not file_info: 465 return False # File not found 466 inner_path = file_info["content_inner_path"] 467 468 if inner_path == "content.json": # Root content.json 469 rules = {} 470 rules["signers"] = self.getValidSigners(inner_path, content) 471 return rules 472 473 dirs = inner_path.split("/") # Parent dirs of content.json 474 inner_path_parts = [dirs.pop()] # Filename relative to content.json 475 inner_path_parts.insert(0, dirs.pop()) # Dont check in self dir 476 while True: 477 content_inner_path = "%s/content.json" % "/".join(dirs) 478 parent_content = self.contents.get(content_inner_path.strip("/")) 479 if parent_content and "includes" in parent_content: 480 return parent_content["includes"].get("/".join(inner_path_parts)) 481 elif parent_content and "user_contents" in parent_content: 482 return self.getUserContentRules(parent_content, inner_path, content) 483 else: # No inner path in this dir, lets try the parent dir 484 if dirs: 485 inner_path_parts.insert(0, dirs.pop()) 486 else: # No more parent dirs 487 break 488 489 return False 490 491 # Get rules for a user file 492 # Return: The rules of the file or False if not allowed 493 def getUserContentRules(self, parent_content, inner_path, content): 494 user_contents = parent_content["user_contents"] 495 496 # Delivered for directory 497 if "inner_path" in parent_content: 498 parent_content_dir = helper.getDirname(parent_content["inner_path"]) 499 user_address = re.match(r"([A-Za-z0-9]*?)/", inner_path[len(parent_content_dir):]).group(1) 500 else: 501 user_address = re.match(r".*/([A-Za-z0-9]*?)/.*?$", inner_path).group(1) 502 503 try: 504 if not content: 505 content = self.site.storage.loadJson(inner_path) # Read the file if no content specified 506 user_urn = "%s/%s" % (content["cert_auth_type"], content["cert_user_id"]) # web/nofish@zeroid.bit 507 cert_user_id = content["cert_user_id"] 508 except Exception: # Content.json not exist 509 user_urn = "n-a/n-a" 510 cert_user_id = "n-a" 511 512 if user_address in user_contents["permissions"]: 513 rules = copy.copy(user_contents["permissions"].get(user_address, {})) # Default rules based on address 514 else: 515 rules = copy.copy(user_contents["permissions"].get(cert_user_id, {})) # Default rules based on username 516 517 if rules is False: 518 banned = True 519 rules = {} 520 else: 521 banned = False 522 if "signers" in rules: 523 rules["signers"] = rules["signers"][:] # Make copy of the signers 524 for permission_pattern, permission_rules in list(user_contents["permission_rules"].items()): # Regexp rules 525 if not SafeRe.match(permission_pattern, user_urn): 526 continue # Rule is not valid for user 527 # Update rules if its better than current recorded ones 528 for key, val in permission_rules.items(): 529 if key not in rules: 530 if type(val) is list: 531 rules[key] = val[:] # Make copy 532 else: 533 rules[key] = val 534 elif type(val) is int: # Int, update if larger 535 if val > rules[key]: 536 rules[key] = val 537 elif hasattr(val, "startswith"): # String, update if longer 538 if len(val) > len(rules[key]): 539 rules[key] = val 540 elif type(val) is list: # List, append 541 rules[key] += val 542 543 # Accepted cert signers 544 rules["cert_signers"] = user_contents.get("cert_signers", {}) 545 rules["cert_signers_pattern"] = user_contents.get("cert_signers_pattern") 546 547 if "signers" not in rules: 548 rules["signers"] = [] 549 550 if not banned: 551 rules["signers"].append(user_address) # Add user as valid signer 552 rules["user_address"] = user_address 553 rules["includes_allowed"] = False 554 555 return rules 556 557 # Get diffs for changed files 558 def getDiffs(self, inner_path, limit=30 * 1024, update_files=True): 559 if inner_path not in self.contents: 560 return {} 561 diffs = {} 562 content_inner_path_dir = helper.getDirname(inner_path) 563 for file_relative_path in self.contents[inner_path].get("files", {}): 564 file_inner_path = content_inner_path_dir + file_relative_path 565 if self.site.storage.isFile(file_inner_path + "-new"): # New version present 566 diffs[file_relative_path] = Diff.diff( 567 list(self.site.storage.open(file_inner_path)), 568 list(self.site.storage.open(file_inner_path + "-new")), 569 limit=limit 570 ) 571 if update_files: 572 self.site.storage.delete(file_inner_path) 573 self.site.storage.rename(file_inner_path + "-new", file_inner_path) 574 if self.site.storage.isFile(file_inner_path + "-old"): # Old version present 575 diffs[file_relative_path] = Diff.diff( 576 list(self.site.storage.open(file_inner_path + "-old")), 577 list(self.site.storage.open(file_inner_path)), 578 limit=limit 579 ) 580 if update_files: 581 self.site.storage.delete(file_inner_path + "-old") 582 return diffs 583 584 def hashFile(self, dir_inner_path, file_relative_path, optional=False): 585 back = {} 586 file_inner_path = dir_inner_path + "/" + file_relative_path 587 588 file_path = self.site.storage.getPath(file_inner_path) 589 file_size = os.path.getsize(file_path) 590 sha512sum = CryptHash.sha512sum(file_path) # Calculate sha512 sum of file 591 if optional and not self.hashfield.hasHash(sha512sum): 592 self.optionalDownloaded(file_inner_path, self.hashfield.getHashId(sha512sum), file_size, own=True) 593 594 back[file_relative_path] = {"sha512": sha512sum, "size": os.path.getsize(file_path)} 595 return back 596 597 def isValidRelativePath(self, relative_path): 598 if ".." in relative_path.replace("\\", "/").split("/"): 599 return False 600 elif len(relative_path) > 255: 601 return False 602 else: 603 return re.match(r"^[a-z\[\]\(\) A-Z0-9~_@=\.\+-/]+$", relative_path) 604 605 def sanitizePath(self, inner_path): 606 return re.sub("[^a-z\[\]\(\) A-Z0-9_@=\.\+-/]", "", inner_path) 607 608 # Hash files in directory 609 def hashFiles(self, dir_inner_path, ignore_pattern=None, optional_pattern=None): 610 files_node = {} 611 files_optional_node = {} 612 if dir_inner_path and not self.isValidRelativePath(dir_inner_path): 613 ignored = True 614 self.log.error("- [ERROR] Only ascii encoded directories allowed: %s" % dir_inner_path) 615 616 for file_relative_path in self.site.storage.walk(dir_inner_path, ignore_pattern): 617 file_name = helper.getFilename(file_relative_path) 618 619 ignored = optional = False 620 if file_name == "content.json": 621 ignored = True 622 elif file_name.startswith(".") or file_name.endswith("-old") or file_name.endswith("-new"): 623 ignored = True 624 elif not self.isValidRelativePath(file_relative_path): 625 ignored = True 626 self.log.error("- [ERROR] Invalid filename: %s" % file_relative_path) 627 elif dir_inner_path == "" and self.site.storage.getDbFile() and file_relative_path.startswith(self.site.storage.getDbFile()): 628 ignored = True 629 elif optional_pattern and SafeRe.match(optional_pattern, file_relative_path): 630 optional = True 631 632 if ignored: # Ignore content.json, defined regexp and files starting with . 633 self.log.info("- [SKIPPED] %s" % file_relative_path) 634 else: 635 if optional: 636 self.log.info("- [OPTIONAL] %s" % file_relative_path) 637 files_optional_node.update( 638 self.hashFile(dir_inner_path, file_relative_path, optional=True) 639 ) 640 else: 641 self.log.info("- %s" % file_relative_path) 642 files_node.update( 643 self.hashFile(dir_inner_path, file_relative_path) 644 ) 645 return files_node, files_optional_node 646 647 # Create and sign a content.json 648 # Return: The new content if filewrite = False 649 def sign(self, inner_path="content.json", privatekey=None, filewrite=True, update_changed_files=False, extend=None, remove_missing_optional=False): 650 if not inner_path.endswith("content.json"): 651 raise SignError("Invalid file name, you can only sign content.json files") 652 653 if inner_path in self.contents: 654 content = self.contents.get(inner_path) 655 if content and content.get("cert_sign", False) is None and self.site.storage.isFile(inner_path): 656 # Recover cert_sign from file 657 content["cert_sign"] = self.site.storage.loadJson(inner_path).get("cert_sign") 658 else: 659 content = None 660 if not content: # Content not exist yet, load default one 661 self.log.info("File %s not exist yet, loading default values..." % inner_path) 662 663 if self.site.storage.isFile(inner_path): 664 content = self.site.storage.loadJson(inner_path) 665 if "files" not in content: 666 content["files"] = {} 667 if "signs" not in content: 668 content["signs"] = {} 669 else: 670 content = {"files": {}, "signs": {}} # Default content.json 671 672 if inner_path == "content.json": # It's the root content.json, add some more fields 673 content["title"] = "%s - ZeroNet_" % self.site.address 674 content["description"] = "" 675 content["signs_required"] = 1 676 content["ignore"] = "" 677 678 if extend: 679 # Add extend keys if not exists 680 for key, val in list(extend.items()): 681 if not content.get(key): 682 content[key] = val 683 self.log.info("Extending content.json with: %s" % key) 684 685 directory = helper.getDirname(self.site.storage.getPath(inner_path)) 686 inner_directory = helper.getDirname(inner_path) 687 self.log.info("Opening site data directory: %s..." % directory) 688 689 changed_files = [inner_path] 690 files_node, files_optional_node = self.hashFiles( 691 helper.getDirname(inner_path), content.get("ignore"), content.get("optional") 692 ) 693 694 if not remove_missing_optional: 695 for file_inner_path, file_details in content.get("files_optional", {}).items(): 696 if file_inner_path not in files_optional_node: 697 files_optional_node[file_inner_path] = file_details 698 699 # Find changed files 700 files_merged = files_node.copy() 701 files_merged.update(files_optional_node) 702 for file_relative_path, file_details in files_merged.items(): 703 old_hash = content.get("files", {}).get(file_relative_path, {}).get("sha512") 704 new_hash = files_merged[file_relative_path]["sha512"] 705 if old_hash != new_hash: 706 changed_files.append(inner_directory + file_relative_path) 707 708 self.log.debug("Changed files: %s" % changed_files) 709 if update_changed_files: 710 for file_path in changed_files: 711 self.site.storage.onUpdated(file_path) 712 713 # Generate new content.json 714 self.log.info("Adding timestamp and sha512sums to new content.json...") 715 716 new_content = content.copy() # Create a copy of current content.json 717 new_content["files"] = files_node # Add files sha512 hash 718 if files_optional_node: 719 new_content["files_optional"] = files_optional_node 720 elif "files_optional" in new_content: 721 del new_content["files_optional"] 722 723 new_content["modified"] = int(time.time()) # Add timestamp 724 if inner_path == "content.json": 725 new_content["zeronet_version"] = config.version 726 new_content["signs_required"] = content.get("signs_required", 1) 727 728 new_content["address"] = self.site.address 729 new_content["inner_path"] = inner_path 730 731 # Verify private key 732 from Crypt import CryptBitcoin 733 self.log.info("Verifying private key...") 734 privatekey_address = CryptBitcoin.privatekeyToAddress(privatekey) 735 valid_signers = self.getValidSigners(inner_path, new_content) 736 if privatekey_address not in valid_signers: 737 raise SignError( 738 "Private key invalid! Valid signers: %s, Private key address: %s" % 739 (valid_signers, privatekey_address) 740 ) 741 self.log.info("Correct %s in valid signers: %s" % (privatekey_address, valid_signers)) 742 743 if inner_path == "content.json" and privatekey_address == self.site.address: 744 # If signing using the root key, then sign the valid signers 745 signers_data = "%s:%s" % (new_content["signs_required"], ",".join(valid_signers)) 746 new_content["signers_sign"] = CryptBitcoin.sign(str(signers_data), privatekey) 747 if not new_content["signers_sign"]: 748 self.log.info("Old style address, signers_sign is none") 749 750 self.log.info("Signing %s..." % inner_path) 751 752 if "signs" in new_content: 753 del(new_content["signs"]) # Delete old signs 754 if "sign" in new_content: 755 del(new_content["sign"]) # Delete old sign (backward compatibility) 756 757 sign_content = json.dumps(new_content, sort_keys=True) 758 sign = CryptBitcoin.sign(sign_content, privatekey) 759 # new_content["signs"] = content.get("signs", {}) # TODO: Multisig 760 if sign: # If signing is successful (not an old address) 761 new_content["signs"] = {} 762 new_content["signs"][privatekey_address] = sign 763 764 self.verifyContent(inner_path, new_content) 765 766 if filewrite: 767 self.log.info("Saving to %s..." % inner_path) 768 self.site.storage.writeJson(inner_path, new_content) 769 self.contents[inner_path] = new_content 770 771 self.log.info("File %s signed!" % inner_path) 772 773 if filewrite: # Written to file 774 return True 775 else: # Return the new content 776 return new_content 777 778 # The valid signers of content.json file 779 # Return: ["1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6", "13ReyhCsjhpuCVahn1DHdf6eMqqEVev162"] 780 def getValidSigners(self, inner_path, content=None): 781 valid_signers = [] 782 if inner_path == "content.json": # Root content.json 783 if "content.json" in self.contents and "signers" in self.contents["content.json"]: 784 valid_signers += self.contents["content.json"]["signers"][:] 785 else: 786 rules = self.getRules(inner_path, content) 787 if rules and "signers" in rules: 788 valid_signers += rules["signers"] 789 790 if self.site.address not in valid_signers: 791 valid_signers.append(self.site.address) # Site address always valid 792 return valid_signers 793 794 # Return: The required number of valid signs for the content.json 795 def getSignsRequired(self, inner_path, content=None): 796 return 1 # Todo: Multisig 797 798 def verifyCert(self, inner_path, content): 799 from Crypt import CryptBitcoin 800 801 rules = self.getRules(inner_path, content) 802 803 if not rules: 804 raise VerifyError("No rules for this file") 805 806 if not rules.get("cert_signers") and not rules.get("cert_signers_pattern"): 807 return True # Does not need cert 808 809 if "cert_user_id" not in content: 810 raise VerifyError("Missing cert_user_id") 811 812 if content["cert_user_id"].count("@") != 1: 813 raise VerifyError("Invalid domain in cert_user_id") 814 815 name, domain = content["cert_user_id"].rsplit("@", 1) 816 cert_address = rules["cert_signers"].get(domain) 817 if not cert_address: # Unknown Cert signer 818 if rules.get("cert_signers_pattern") and SafeRe.match(rules["cert_signers_pattern"], domain): 819 cert_address = domain 820 else: 821 raise VerifyError("Invalid cert signer: %s" % domain) 822 823 try: 824 cert_subject = "%s#%s/%s" % (rules["user_address"], content["cert_auth_type"], name) 825 result = CryptBitcoin.verify(cert_subject, cert_address, content["cert_sign"]) 826 except Exception as err: 827 raise VerifyError("Certificate verify error: %s" % err) 828 return result 829 830 # Checks if the content.json content is valid 831 # Return: True or False 832 def verifyContent(self, inner_path, content): 833 content_size = len(json.dumps(content, indent=1)) + sum([file["size"] for file in list(content["files"].values()) if file["size"] >= 0]) # Size of new content 834 # Calculate old content size 835 old_content = self.contents.get(inner_path) 836 if old_content: 837 old_content_size = len(json.dumps(old_content, indent=1)) + sum([file["size"] for file in list(old_content.get("files", {}).values())]) 838 old_content_size_optional = sum([file["size"] for file in list(old_content.get("files_optional", {}).values())]) 839 else: 840 old_content_size = 0 841 old_content_size_optional = 0 842 843 # Reset site site on first content.json 844 if not old_content and inner_path == "content.json": 845 self.site.settings["size"] = 0 846 847 content_size_optional = sum([file["size"] for file in list(content.get("files_optional", {}).values()) if file["size"] >= 0]) 848 site_size = self.site.settings["size"] - old_content_size + content_size # Site size without old content plus the new 849 site_size_optional = self.site.settings["size_optional"] - old_content_size_optional + content_size_optional # Site size without old content plus the new 850 851 site_size_limit = self.site.getSizeLimit() * 1024 * 1024 852 853 # Check site address 854 if content.get("address") and content["address"] != self.site.address: 855 raise VerifyError("Wrong site address: %s != %s" % (content["address"], self.site.address)) 856 857 # Check file inner path 858 if content.get("inner_path") and content["inner_path"] != inner_path: 859 raise VerifyError("Wrong inner_path: %s" % content["inner_path"]) 860 861 # Check total site size limit 862 if site_size > site_size_limit: 863 if inner_path == "content.json" and self.site.settings["size"] == 0: 864 # First content.json download, save site size to display warning 865 self.site.settings["size"] = site_size 866 task = self.site.worker_manager.findTask(inner_path) 867 if task: # Dont try to download from other peers 868 self.site.worker_manager.failTask(task) 869 raise VerifyError("Content too large %sB > %sB, aborting task..." % (site_size, site_size_limit)) 870 871 # Verify valid filenames 872 for file_relative_path in list(content.get("files", {}).keys()) + list(content.get("files_optional", {}).keys()): 873 if not self.isValidRelativePath(file_relative_path): 874 raise VerifyError("Invalid relative path: %s" % file_relative_path) 875 876 if inner_path == "content.json": 877 self.site.settings["size"] = site_size 878 self.site.settings["size_optional"] = site_size_optional 879 return True # Root content.json is passed 880 else: 881 if self.verifyContentInclude(inner_path, content, content_size, content_size_optional): 882 self.site.settings["size"] = site_size 883 self.site.settings["size_optional"] = site_size_optional 884 return True 885 else: 886 return False 887 888 def verifyContentInclude(self, inner_path, content, content_size, content_size_optional): 889 # Load include details 890 rules = self.getRules(inner_path, content) 891 if not rules: 892 raise VerifyError("No rules") 893 894 # Check include size limit 895 if rules.get("max_size") is not None: # Include size limit 896 if content_size > rules["max_size"]: 897 raise VerifyError("Include too large %sB > %sB" % (content_size, rules["max_size"])) 898 899 if rules.get("max_size_optional") is not None: # Include optional files limit 900 if content_size_optional > rules["max_size_optional"]: 901 raise VerifyError("Include optional files too large %sB > %sB" % ( 902 content_size_optional, rules["max_size_optional"]) 903 ) 904 905 # Filename limit 906 if rules.get("files_allowed"): 907 for file_inner_path in list(content["files"].keys()): 908 if not SafeRe.match(r"^%s$" % rules["files_allowed"], file_inner_path): 909 raise VerifyError("File not allowed: %s" % file_inner_path) 910 911 if rules.get("files_allowed_optional"): 912 for file_inner_path in list(content.get("files_optional", {}).keys()): 913 if not SafeRe.match(r"^%s$" % rules["files_allowed_optional"], file_inner_path): 914 raise VerifyError("Optional file not allowed: %s" % file_inner_path) 915 916 # Check if content includes allowed 917 if rules.get("includes_allowed") is False and content.get("includes"): 918 raise VerifyError("Includes not allowed") 919 920 return True # All good 921 922 # Verify file validity 923 # Return: None = Same as before, False = Invalid, True = Valid 924 def verifyFile(self, inner_path, file, ignore_same=True): 925 if inner_path.endswith("content.json"): # content.json: Check using sign 926 from Crypt import CryptBitcoin 927 try: 928 if type(file) is dict: 929 new_content = file 930 else: 931 if sys.version_info.major == 3 and sys.version_info.minor < 6: 932 new_content = json.loads(file.read().decode("utf8")) 933 else: 934 new_content = json.load(file) 935 if inner_path in self.contents: 936 old_content = self.contents.get(inner_path, {"modified": 0}) 937 # Checks if its newer the ours 938 if old_content["modified"] == new_content["modified"] and ignore_same: # Ignore, have the same content.json 939 return None 940 elif old_content["modified"] > new_content["modified"]: # We have newer 941 raise VerifyError( 942 "We have newer (Our: %s, Sent: %s)" % 943 (old_content["modified"], new_content["modified"]) 944 ) 945 if new_content["modified"] > time.time() + 60 * 60 * 24: # Content modified in the far future (allow 1 day+) 946 raise VerifyError("Modify timestamp is in the far future!") 947 if self.isArchived(inner_path, new_content["modified"]): 948 if inner_path in self.site.bad_files: 949 del self.site.bad_files[inner_path] 950 raise VerifyError("This file is archived!") 951 # Check sign 952 sign = new_content.get("sign") 953 signs = new_content.get("signs", {}) 954 if "sign" in new_content: 955 del(new_content["sign"]) # The file signed without the sign 956 if "signs" in new_content: 957 del(new_content["signs"]) # The file signed without the signs 958 959 sign_content = json.dumps(new_content, sort_keys=True) # Dump the json to string to remove whitepsace 960 961 # Fix float representation error on Android 962 modified = new_content["modified"] 963 if config.fix_float_decimals and type(modified) is float and not str(modified).endswith(".0"): 964 modified_fixed = "{:.6f}".format(modified).strip("0.") 965 sign_content = sign_content.replace( 966 '"modified": %s' % repr(modified), 967 '"modified": %s' % modified_fixed 968 ) 969 970 if signs: # New style signing 971 valid_signers = self.getValidSigners(inner_path, new_content) 972 signs_required = self.getSignsRequired(inner_path, new_content) 973 974 if inner_path == "content.json" and len(valid_signers) > 1: # Check signers_sign on root content.json 975 signers_data = "%s:%s" % (signs_required, ",".join(valid_signers)) 976 if not CryptBitcoin.verify(signers_data, self.site.address, new_content["signers_sign"]): 977 raise VerifyError("Invalid signers_sign!") 978 979 if inner_path != "content.json" and not self.verifyCert(inner_path, new_content): # Check if cert valid 980 raise VerifyError("Invalid cert!") 981 982 valid_signs = 0 983 for address in valid_signers: 984 if address in signs: 985 valid_signs += CryptBitcoin.verify(sign_content, address, signs[address]) 986 if valid_signs >= signs_required: 987 break # Break if we has enough signs 988 if valid_signs < signs_required: 989 raise VerifyError("Valid signs: %s/%s" % (valid_signs, signs_required)) 990 else: 991 return self.verifyContent(inner_path, new_content) 992 else: # Old style signing 993 raise VerifyError("Invalid old-style sign") 994 995 except Exception as err: 996 self.log.warning("%s: verify sign error: %s" % (inner_path, Debug.formatException(err))) 997 raise err 998 999 else: # Check using sha512 hash 1000 file_info = self.getFileInfo(inner_path) 1001 if file_info: 1002 if CryptHash.sha512sum(file) != file_info.get("sha512", ""): 1003 raise VerifyError("Invalid hash") 1004 1005 if file_info.get("size", 0) != file.tell(): 1006 raise VerifyError( 1007 "File size does not match %s <> %s" % 1008 (inner_path, file.tell(), file_info.get("size", 0)) 1009 ) 1010 1011 return True 1012 1013 else: # File not in content.json 1014 raise VerifyError("File not in content.json") 1015 1016 def optionalDelete(self, inner_path): 1017 self.site.storage.delete(inner_path) 1018 1019 def optionalDownloaded(self, inner_path, hash_id, size=None, own=False): 1020 if size is None: 1021 size = self.site.storage.getSize(inner_path) 1022 1023 done = self.hashfield.appendHashId(hash_id) 1024 self.site.settings["optional_downloaded"] += size 1025 return done 1026 1027 def optionalRemoved(self, inner_path, hash_id, size=None): 1028 if size is None: 1029 size = self.site.storage.getSize(inner_path) 1030 done = self.hashfield.removeHashId(hash_id) 1031 1032 self.site.settings["optional_downloaded"] -= size 1033 return done 1034 1035 def optionalRenamed(self, inner_path_old, inner_path_new): 1036 return True 1037