1import json
2import time
3import re
4import os
5import copy
6import base64
7import sys
8
9import gevent
10
11from Debug import Debug
12from Crypt import CryptHash
13from Config import config
14from util import helper
15from util import Diff
16from util import SafeRe
17from Peer import PeerHashfield
18from .ContentDbDict import ContentDbDict
19from Plugin import PluginManager
20
21
22class VerifyError(Exception):
23    pass
24
25
26class SignError(Exception):
27    pass
28
29
30@PluginManager.acceptPlugins
31class ContentManager(object):
32
33    def __init__(self, site):
34        self.site = site
35        self.log = self.site.log
36        self.contents = ContentDbDict(site)
37        self.hashfield = PeerHashfield()
38        self.has_optional_files = False
39
40    # Load all content.json files
41    def loadContents(self):
42        if len(self.contents) == 0:
43            self.log.debug("ContentDb not initialized, load files from filesystem")
44            self.loadContent(add_bad_files=False, delete_removed_files=False)
45        self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
46
47        # Load hashfield cache
48        if "hashfield" in self.site.settings.get("cache", {}):
49            self.hashfield.frombytes(base64.b64decode(self.site.settings["cache"]["hashfield"]))
50            del self.site.settings["cache"]["hashfield"]
51        elif self.contents.get("content.json") and self.site.settings["size_optional"] > 0:
52            self.site.storage.updateBadFiles()  # No hashfield cache created yet
53        self.has_optional_files = bool(self.hashfield)
54
55        self.contents.db.initSite(self.site)
56
57    def getFileChanges(self, old_files, new_files):
58        deleted = {key: val for key, val in old_files.items() if key not in new_files}
59        deleted_hashes = {val.get("sha512"): key for key, val in old_files.items() if key not in new_files}
60        added = {key: val for key, val in new_files.items() if key not in old_files}
61        renamed = {}
62        for relative_path, node in added.items():
63            hash = node.get("sha512")
64            if hash in deleted_hashes:
65                relative_path_old = deleted_hashes[hash]
66                renamed[relative_path_old] = relative_path
67                del(deleted[relative_path_old])
68        return list(deleted), renamed
69
70    # Load content.json to self.content
71    # Return: Changed files ["index.html", "data/messages.json"], Deleted files ["old.jpg"]
72    def loadContent(self, content_inner_path="content.json", add_bad_files=True, delete_removed_files=True, load_includes=True, force=False):
73        content_inner_path = content_inner_path.strip("/")  # Remove / from beginning
74        old_content = self.contents.get(content_inner_path)
75        content_path = self.site.storage.getPath(content_inner_path)
76        content_dir = helper.getDirname(self.site.storage.getPath(content_inner_path))
77        content_inner_dir = helper.getDirname(content_inner_path)
78
79        if os.path.isfile(content_path):
80            try:
81                # Check if file is newer than what we have
82                if not force and old_content and not self.site.settings.get("own"):
83                    for line in open(content_path):
84                        if '"modified"' not in line:
85                            continue
86                        match = re.search(r"([0-9\.]+),$", line.strip(" \r\n"))
87                        if match and float(match.group(1)) <= old_content.get("modified", 0):
88                            self.log.debug("%s loadContent same json file, skipping" % content_inner_path)
89                            return [], []
90
91                new_content = self.site.storage.loadJson(content_inner_path)
92            except Exception as err:
93                self.log.warning("%s load error: %s" % (content_path, Debug.formatException(err)))
94                return [], []
95        else:
96            self.log.debug("Content.json not exist: %s" % content_path)
97            return [], []  # Content.json not exist
98
99        try:
100            # Get the files where the sha512 changed
101            changed = []
102            deleted = []
103            # Check changed
104            for relative_path, info in new_content.get("files", {}).items():
105                if "sha512" in info:
106                    hash_type = "sha512"
107                else:  # Backward compatibility
108                    hash_type = "sha1"
109
110                new_hash = info[hash_type]
111                if old_content and old_content["files"].get(relative_path):  # We have the file in the old content
112                    old_hash = old_content["files"][relative_path].get(hash_type)
113                else:  # The file is not in the old content
114                    old_hash = None
115                if old_hash != new_hash:
116                    changed.append(content_inner_dir + relative_path)
117
118            # Check changed optional files
119            for relative_path, info in new_content.get("files_optional", {}).items():
120                file_inner_path = content_inner_dir + relative_path
121                new_hash = info["sha512"]
122                if old_content and old_content.get("files_optional", {}).get(relative_path):
123                    # We have the file in the old content
124                    old_hash = old_content["files_optional"][relative_path].get("sha512")
125                    if old_hash != new_hash and self.site.isDownloadable(file_inner_path):
126                        changed.append(file_inner_path)  # Download new file
127                    elif old_hash != new_hash and self.hashfield.hasHash(old_hash) and not self.site.settings.get("own"):
128                        try:
129                            old_hash_id = self.hashfield.getHashId(old_hash)
130                            self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][relative_path]["size"])
131                            self.optionalDelete(file_inner_path)
132                            self.log.debug("Deleted changed optional file: %s" % file_inner_path)
133                        except Exception as err:
134                            self.log.warning("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err)))
135                else:  # The file is not in the old content
136                    if self.site.isDownloadable(file_inner_path):
137                        changed.append(file_inner_path)  # Download new file
138
139            # Check deleted
140            if old_content:
141                old_files = dict(
142                    old_content.get("files", {}),
143                    **old_content.get("files_optional", {})
144                )
145
146                new_files = dict(
147                    new_content.get("files", {}),
148                    **new_content.get("files_optional", {})
149                )
150
151                deleted, renamed = self.getFileChanges(old_files, new_files)
152
153                for relative_path_old, relative_path_new in renamed.items():
154                    self.log.debug("Renaming: %s -> %s" % (relative_path_old, relative_path_new))
155                    if relative_path_new in new_content.get("files_optional", {}):
156                        self.optionalRenamed(content_inner_dir + relative_path_old, content_inner_dir + relative_path_new)
157                    if self.site.storage.isFile(relative_path_old):
158                        try:
159                            self.site.storage.rename(relative_path_old, relative_path_new)
160                            if relative_path_new in changed:
161                                changed.remove(relative_path_new)
162                            self.log.debug("Renamed: %s -> %s" % (relative_path_old, relative_path_new))
163                        except Exception as err:
164                            self.log.warning("Error renaming file: %s -> %s %s" % (relative_path_old, relative_path_new, err))
165
166                if deleted and not self.site.settings.get("own"):
167                    # Deleting files that no longer in content.json
168                    for file_relative_path in deleted:
169                        file_inner_path = content_inner_dir + file_relative_path
170                        try:
171                            # Check if the deleted file is optional
172                            if old_content.get("files_optional") and old_content["files_optional"].get(file_relative_path):
173                                self.optionalDelete(file_inner_path)
174                                old_hash = old_content["files_optional"][file_relative_path].get("sha512")
175                                if self.hashfield.hasHash(old_hash):
176                                    old_hash_id = self.hashfield.getHashId(old_hash)
177                                    self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][file_relative_path]["size"])
178                            else:
179                                self.site.storage.delete(file_inner_path)
180
181                            self.log.debug("Deleted file: %s" % file_inner_path)
182                        except Exception as err:
183                            self.log.debug("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err)))
184
185                    # Cleanup empty dirs
186                    tree = {root: [dirs, files] for root, dirs, files in os.walk(self.site.storage.getPath(content_inner_dir))}
187                    for root in sorted(tree, key=len, reverse=True):
188                        dirs, files = tree[root]
189                        if dirs == [] and files == []:
190                            root_inner_path = self.site.storage.getInnerPath(root.replace("\\", "/"))
191                            self.log.debug("Empty directory: %s, cleaning up." % root_inner_path)
192                            try:
193                                self.site.storage.deleteDir(root_inner_path)
194                                # Remove from tree dict to reflect changed state
195                                tree[os.path.dirname(root)][0].remove(os.path.basename(root))
196                            except Exception as err:
197                                self.log.debug("Error deleting empty directory %s: %s" % (root_inner_path, err))
198
199            # Check archived
200            if old_content and "user_contents" in new_content and "archived" in new_content["user_contents"]:
201                old_archived = old_content.get("user_contents", {}).get("archived", {})
202                new_archived = new_content.get("user_contents", {}).get("archived", {})
203                self.log.debug("old archived: %s, new archived: %s" % (len(old_archived), len(new_archived)))
204                archived_changed = {
205                    key: date_archived
206                    for key, date_archived in new_archived.items()
207                    if old_archived.get(key) != new_archived[key]
208                }
209                if archived_changed:
210                    self.log.debug("Archived changed: %s" % archived_changed)
211                    for archived_dirname, date_archived in archived_changed.items():
212                        archived_inner_path = content_inner_dir + archived_dirname + "/content.json"
213                        if self.contents.get(archived_inner_path, {}).get("modified", 0) < date_archived:
214                            self.removeContent(archived_inner_path)
215                            deleted += archived_inner_path
216                    self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
217
218            # Check archived before
219            if old_content and "user_contents" in new_content and "archived_before" in new_content["user_contents"]:
220                old_archived_before = old_content.get("user_contents", {}).get("archived_before", 0)
221                new_archived_before = new_content.get("user_contents", {}).get("archived_before", 0)
222                if old_archived_before != new_archived_before:
223                    self.log.debug("Archived before changed: %s -> %s" % (old_archived_before, new_archived_before))
224
225                    # Remove downloaded archived files
226                    num_removed_contents = 0
227                    for archived_inner_path in self.listModified(before=new_archived_before):
228                        if archived_inner_path.startswith(content_inner_dir) and archived_inner_path != content_inner_path:
229                            self.removeContent(archived_inner_path)
230                            num_removed_contents += 1
231                    self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
232
233                    # Remove archived files from download queue
234                    num_removed_bad_files = 0
235                    for bad_file in list(self.site.bad_files.keys()):
236                        if bad_file.endswith("content.json"):
237                            del self.site.bad_files[bad_file]
238                            num_removed_bad_files += 1
239
240                    if num_removed_bad_files > 0:
241                        self.site.worker_manager.removeSolvedFileTasks(mark_as_good=False)
242                        gevent.spawn(self.site.update, since=0)
243
244                    self.log.debug("Archived removed contents: %s, removed bad files: %s" % (num_removed_contents, num_removed_bad_files))
245
246            # Load includes
247            if load_includes and "includes" in new_content:
248                for relative_path, info in list(new_content["includes"].items()):
249                    include_inner_path = content_inner_dir + relative_path
250                    if self.site.storage.isFile(include_inner_path):  # Content.json exists, load it
251                        include_changed, include_deleted = self.loadContent(
252                            include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files
253                        )
254                        if include_changed:
255                            changed += include_changed  # Add changed files
256                        if include_deleted:
257                            deleted += include_deleted  # Add changed files
258                    else:  # Content.json not exist, add to changed files
259                        self.log.debug("Missing include: %s" % include_inner_path)
260                        changed += [include_inner_path]
261
262            # Load blind user includes (all subdir)
263            if load_includes and "user_contents" in new_content:
264                for relative_dir in os.listdir(content_dir):
265                    include_inner_path = content_inner_dir + relative_dir + "/content.json"
266                    if not self.site.storage.isFile(include_inner_path):
267                        continue  # Content.json not exist
268                    include_changed, include_deleted = self.loadContent(
269                        include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files,
270                        load_includes=False
271                    )
272                    if include_changed:
273                        changed += include_changed  # Add changed files
274                    if include_deleted:
275                        deleted += include_deleted  # Add changed files
276
277            # Save some memory
278            new_content["signs"] = None
279            if "cert_sign" in new_content:
280                new_content["cert_sign"] = None
281
282            if new_content.get("files_optional"):
283                self.has_optional_files = True
284            # Update the content
285            self.contents[content_inner_path] = new_content
286        except Exception as err:
287            self.log.warning("%s parse error: %s" % (content_inner_path, Debug.formatException(err)))
288            return [], []  # Content.json parse error
289
290        # Add changed files to bad files
291        if add_bad_files:
292            for inner_path in changed:
293                self.site.bad_files[inner_path] = self.site.bad_files.get(inner_path, 0) + 1
294            for inner_path in deleted:
295                if inner_path in self.site.bad_files:
296                    del self.site.bad_files[inner_path]
297                self.site.worker_manager.removeSolvedFileTasks()
298
299        if new_content.get("modified", 0) > self.site.settings.get("modified", 0):
300            # Dont store modifications in the far future (more than 10 minute)
301            self.site.settings["modified"] = min(time.time() + 60 * 10, new_content["modified"])
302
303        return changed, deleted
304
305    def removeContent(self, inner_path):
306        inner_dir = helper.getDirname(inner_path)
307        try:
308            content = self.contents[inner_path]
309            files = dict(
310                content.get("files", {}),
311                **content.get("files_optional", {})
312            )
313        except Exception as err:
314            self.log.debug("Error loading %s for removeContent: %s" % (inner_path, Debug.formatException(err)))
315            files = {}
316        files["content.json"] = True
317        # Deleting files that no longer in content.json
318        for file_relative_path in files:
319            file_inner_path = inner_dir + file_relative_path
320            try:
321                self.site.storage.delete(file_inner_path)
322                self.log.debug("Deleted file: %s" % file_inner_path)
323            except Exception as err:
324                self.log.debug("Error deleting file %s: %s" % (file_inner_path, err))
325        try:
326            self.site.storage.deleteDir(inner_dir)
327        except Exception as err:
328            self.log.debug("Error deleting dir %s: %s" % (inner_dir, err))
329
330        try:
331            del self.contents[inner_path]
332        except Exception as err:
333            self.log.debug("Error key from contents: %s" % inner_path)
334
335    # Get total size of site
336    # Return: 32819 (size of files in kb)
337    def getTotalSize(self, ignore=None):
338        return self.contents.db.getTotalSize(self.site, ignore)
339
340    def listModified(self, after=None, before=None):
341        return self.contents.db.listModified(self.site, after=after, before=before)
342
343    def listContents(self, inner_path="content.json", user_files=False):
344        if inner_path not in self.contents:
345            return []
346        back = [inner_path]
347        content_inner_dir = helper.getDirname(inner_path)
348        for relative_path in list(self.contents[inner_path].get("includes", {}).keys()):
349            include_inner_path = content_inner_dir + relative_path
350            back += self.listContents(include_inner_path)
351        return back
352
353    # Returns if file with the given modification date is archived or not
354    def isArchived(self, inner_path, modified):
355        match = re.match(r"(.*)/(.*?)/", inner_path)
356        if not match:
357            return False
358        user_contents_inner_path = match.group(1) + "/content.json"
359        relative_directory = match.group(2)
360
361        file_info = self.getFileInfo(user_contents_inner_path)
362        if file_info:
363            time_archived_before = file_info.get("archived_before", 0)
364            time_directory_archived = file_info.get("archived", {}).get(relative_directory, 0)
365            if modified <= time_archived_before or modified <= time_directory_archived:
366                return True
367            else:
368                return False
369        else:
370            return False
371
372    def isDownloaded(self, inner_path, hash_id=None):
373        if not hash_id:
374            file_info = self.getFileInfo(inner_path)
375            if not file_info or "sha512" not in file_info:
376                return False
377            hash_id = self.hashfield.getHashId(file_info["sha512"])
378        return hash_id in self.hashfield
379
380    # Is modified since signing
381    def isModified(self, inner_path):
382        s = time.time()
383        if inner_path.endswith("content.json"):
384            try:
385                is_valid = self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False)
386                if is_valid:
387                    is_modified = False
388                else:
389                    is_modified = True
390            except VerifyError:
391                is_modified = True
392        else:
393            try:
394                self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False)
395                is_modified = False
396            except VerifyError:
397                is_modified = True
398        return is_modified
399
400    # Find the file info line from self.contents
401    # Return: { "sha512": "c29d73d...21f518", "size": 41 , "content_inner_path": "content.json"}
402    def getFileInfo(self, inner_path, new_file=False):
403        dirs = inner_path.split("/")  # Parent dirs of content.json
404        inner_path_parts = [dirs.pop()]  # Filename relative to content.json
405        while True:
406            content_inner_path = "%s/content.json" % "/".join(dirs)
407            content_inner_path = content_inner_path.strip("/")
408            content = self.contents.get(content_inner_path)
409
410            # Check in files
411            if content and "files" in content:
412                back = content["files"].get("/".join(inner_path_parts))
413                if back:
414                    back["content_inner_path"] = content_inner_path
415                    back["optional"] = False
416                    back["relative_path"] = "/".join(inner_path_parts)
417                    return back
418
419            # Check in optional files
420            if content and "files_optional" in content:  # Check if file in this content.json
421                back = content["files_optional"].get("/".join(inner_path_parts))
422                if back:
423                    back["content_inner_path"] = content_inner_path
424                    back["optional"] = True
425                    back["relative_path"] = "/".join(inner_path_parts)
426                    return back
427
428            # Return the rules if user dir
429            if content and "user_contents" in content:
430                back = content["user_contents"]
431                content_inner_path_dir = helper.getDirname(content_inner_path)
432                relative_content_path = inner_path[len(content_inner_path_dir):]
433                user_auth_address_match = re.match(r"([A-Za-z0-9]+)/.*", relative_content_path)
434                if user_auth_address_match:
435                    user_auth_address = user_auth_address_match.group(1)
436                    back["content_inner_path"] = "%s%s/content.json" % (content_inner_path_dir, user_auth_address)
437                else:
438                    back["content_inner_path"] = content_inner_path_dir + "content.json"
439                back["optional"] = None
440                back["relative_path"] = "/".join(inner_path_parts)
441                return back
442
443            if new_file and content:
444                back = {}
445                back["content_inner_path"] = content_inner_path
446                back["relative_path"] = "/".join(inner_path_parts)
447                back["optional"] = None
448                return back
449
450            # No inner path in this dir, lets try the parent dir
451            if dirs:
452                inner_path_parts.insert(0, dirs.pop())
453            else:  # No more parent dirs
454                break
455
456        # Not found
457        return False
458
459    # Get rules for the file
460    # Return: The rules for the file or False if not allowed
461    def getRules(self, inner_path, content=None):
462        if not inner_path.endswith("content.json"):  # Find the files content.json first
463            file_info = self.getFileInfo(inner_path)
464            if not file_info:
465                return False  # File not found
466            inner_path = file_info["content_inner_path"]
467
468        if inner_path == "content.json":  # Root content.json
469            rules = {}
470            rules["signers"] = self.getValidSigners(inner_path, content)
471            return rules
472
473        dirs = inner_path.split("/")  # Parent dirs of content.json
474        inner_path_parts = [dirs.pop()]  # Filename relative to content.json
475        inner_path_parts.insert(0, dirs.pop())  # Dont check in self dir
476        while True:
477            content_inner_path = "%s/content.json" % "/".join(dirs)
478            parent_content = self.contents.get(content_inner_path.strip("/"))
479            if parent_content and "includes" in parent_content:
480                return parent_content["includes"].get("/".join(inner_path_parts))
481            elif parent_content and "user_contents" in parent_content:
482                return self.getUserContentRules(parent_content, inner_path, content)
483            else:  # No inner path in this dir, lets try the parent dir
484                if dirs:
485                    inner_path_parts.insert(0, dirs.pop())
486                else:  # No more parent dirs
487                    break
488
489        return False
490
491    # Get rules for a user file
492    # Return: The rules of the file or False if not allowed
493    def getUserContentRules(self, parent_content, inner_path, content):
494        user_contents = parent_content["user_contents"]
495
496        # Delivered for directory
497        if "inner_path" in parent_content:
498            parent_content_dir = helper.getDirname(parent_content["inner_path"])
499            user_address = re.match(r"([A-Za-z0-9]*?)/", inner_path[len(parent_content_dir):]).group(1)
500        else:
501            user_address = re.match(r".*/([A-Za-z0-9]*?)/.*?$", inner_path).group(1)
502
503        try:
504            if not content:
505                content = self.site.storage.loadJson(inner_path)  # Read the file if no content specified
506            user_urn = "%s/%s" % (content["cert_auth_type"], content["cert_user_id"])  # web/nofish@zeroid.bit
507            cert_user_id = content["cert_user_id"]
508        except Exception:  # Content.json not exist
509            user_urn = "n-a/n-a"
510            cert_user_id = "n-a"
511
512        if user_address in user_contents["permissions"]:
513            rules = copy.copy(user_contents["permissions"].get(user_address, {}))  # Default rules based on address
514        else:
515            rules = copy.copy(user_contents["permissions"].get(cert_user_id, {}))  # Default rules based on username
516
517        if rules is False:
518            banned = True
519            rules = {}
520        else:
521            banned = False
522        if "signers" in rules:
523            rules["signers"] = rules["signers"][:]  # Make copy of the signers
524        for permission_pattern, permission_rules in list(user_contents["permission_rules"].items()):  # Regexp rules
525            if not SafeRe.match(permission_pattern, user_urn):
526                continue  # Rule is not valid for user
527            # Update rules if its better than current recorded ones
528            for key, val in permission_rules.items():
529                if key not in rules:
530                    if type(val) is list:
531                        rules[key] = val[:]  # Make copy
532                    else:
533                        rules[key] = val
534                elif type(val) is int:  # Int, update if larger
535                    if val > rules[key]:
536                        rules[key] = val
537                elif hasattr(val, "startswith"):  # String, update if longer
538                    if len(val) > len(rules[key]):
539                        rules[key] = val
540                elif type(val) is list:  # List, append
541                    rules[key] += val
542
543        # Accepted cert signers
544        rules["cert_signers"] = user_contents.get("cert_signers", {})
545        rules["cert_signers_pattern"] = user_contents.get("cert_signers_pattern")
546
547        if "signers" not in rules:
548            rules["signers"] = []
549
550        if not banned:
551            rules["signers"].append(user_address)  # Add user as valid signer
552        rules["user_address"] = user_address
553        rules["includes_allowed"] = False
554
555        return rules
556
557    # Get diffs for changed files
558    def getDiffs(self, inner_path, limit=30 * 1024, update_files=True):
559        if inner_path not in self.contents:
560            return {}
561        diffs = {}
562        content_inner_path_dir = helper.getDirname(inner_path)
563        for file_relative_path in self.contents[inner_path].get("files", {}):
564            file_inner_path = content_inner_path_dir + file_relative_path
565            if self.site.storage.isFile(file_inner_path + "-new"):  # New version present
566                diffs[file_relative_path] = Diff.diff(
567                    list(self.site.storage.open(file_inner_path)),
568                    list(self.site.storage.open(file_inner_path + "-new")),
569                    limit=limit
570                )
571                if update_files:
572                    self.site.storage.delete(file_inner_path)
573                    self.site.storage.rename(file_inner_path + "-new", file_inner_path)
574            if self.site.storage.isFile(file_inner_path + "-old"):  # Old version present
575                diffs[file_relative_path] = Diff.diff(
576                    list(self.site.storage.open(file_inner_path + "-old")),
577                    list(self.site.storage.open(file_inner_path)),
578                    limit=limit
579                )
580                if update_files:
581                    self.site.storage.delete(file_inner_path + "-old")
582        return diffs
583
584    def hashFile(self, dir_inner_path, file_relative_path, optional=False):
585        back = {}
586        file_inner_path = dir_inner_path + "/" + file_relative_path
587
588        file_path = self.site.storage.getPath(file_inner_path)
589        file_size = os.path.getsize(file_path)
590        sha512sum = CryptHash.sha512sum(file_path)  # Calculate sha512 sum of file
591        if optional and not self.hashfield.hasHash(sha512sum):
592            self.optionalDownloaded(file_inner_path, self.hashfield.getHashId(sha512sum), file_size, own=True)
593
594        back[file_relative_path] = {"sha512": sha512sum, "size": os.path.getsize(file_path)}
595        return back
596
597    def isValidRelativePath(self, relative_path):
598        if ".." in relative_path.replace("\\", "/").split("/"):
599            return False
600        elif len(relative_path) > 255:
601            return False
602        else:
603            return re.match(r"^[a-z\[\]\(\) A-Z0-9~_@=\.\+-/]+$", relative_path)
604
605    def sanitizePath(self, inner_path):
606        return re.sub("[^a-z\[\]\(\) A-Z0-9_@=\.\+-/]", "", inner_path)
607
608    # Hash files in directory
609    def hashFiles(self, dir_inner_path, ignore_pattern=None, optional_pattern=None):
610        files_node = {}
611        files_optional_node = {}
612        if dir_inner_path and not self.isValidRelativePath(dir_inner_path):
613            ignored = True
614            self.log.error("- [ERROR] Only ascii encoded directories allowed: %s" % dir_inner_path)
615
616        for file_relative_path in self.site.storage.walk(dir_inner_path, ignore_pattern):
617            file_name = helper.getFilename(file_relative_path)
618
619            ignored = optional = False
620            if file_name == "content.json":
621                ignored = True
622            elif file_name.startswith(".") or file_name.endswith("-old") or file_name.endswith("-new"):
623                ignored = True
624            elif not self.isValidRelativePath(file_relative_path):
625                ignored = True
626                self.log.error("- [ERROR] Invalid filename: %s" % file_relative_path)
627            elif dir_inner_path == "" and self.site.storage.getDbFile() and file_relative_path.startswith(self.site.storage.getDbFile()):
628                ignored = True
629            elif optional_pattern and SafeRe.match(optional_pattern, file_relative_path):
630                optional = True
631
632            if ignored:  # Ignore content.json, defined regexp and files starting with .
633                self.log.info("- [SKIPPED] %s" % file_relative_path)
634            else:
635                if optional:
636                    self.log.info("- [OPTIONAL] %s" % file_relative_path)
637                    files_optional_node.update(
638                        self.hashFile(dir_inner_path, file_relative_path, optional=True)
639                    )
640                else:
641                    self.log.info("- %s" % file_relative_path)
642                    files_node.update(
643                        self.hashFile(dir_inner_path, file_relative_path)
644                    )
645        return files_node, files_optional_node
646
647    # Create and sign a content.json
648    # Return: The new content if filewrite = False
649    def sign(self, inner_path="content.json", privatekey=None, filewrite=True, update_changed_files=False, extend=None, remove_missing_optional=False):
650        if not inner_path.endswith("content.json"):
651            raise SignError("Invalid file name, you can only sign content.json files")
652
653        if inner_path in self.contents:
654            content = self.contents.get(inner_path)
655            if content and content.get("cert_sign", False) is None and self.site.storage.isFile(inner_path):
656                # Recover cert_sign from file
657                content["cert_sign"] = self.site.storage.loadJson(inner_path).get("cert_sign")
658        else:
659            content = None
660        if not content:  # Content not exist yet, load default one
661            self.log.info("File %s not exist yet, loading default values..." % inner_path)
662
663            if self.site.storage.isFile(inner_path):
664                content = self.site.storage.loadJson(inner_path)
665                if "files" not in content:
666                    content["files"] = {}
667                if "signs" not in content:
668                    content["signs"] = {}
669            else:
670                content = {"files": {}, "signs": {}}  # Default content.json
671
672            if inner_path == "content.json":  # It's the root content.json, add some more fields
673                content["title"] = "%s - ZeroNet_" % self.site.address
674                content["description"] = ""
675                content["signs_required"] = 1
676                content["ignore"] = ""
677
678        if extend:
679            # Add extend keys if not exists
680            for key, val in list(extend.items()):
681                if not content.get(key):
682                    content[key] = val
683                    self.log.info("Extending content.json with: %s" % key)
684
685        directory = helper.getDirname(self.site.storage.getPath(inner_path))
686        inner_directory = helper.getDirname(inner_path)
687        self.log.info("Opening site data directory: %s..." % directory)
688
689        changed_files = [inner_path]
690        files_node, files_optional_node = self.hashFiles(
691            helper.getDirname(inner_path), content.get("ignore"), content.get("optional")
692        )
693
694        if not remove_missing_optional:
695            for file_inner_path, file_details in content.get("files_optional", {}).items():
696                if file_inner_path not in files_optional_node:
697                    files_optional_node[file_inner_path] = file_details
698
699        # Find changed files
700        files_merged = files_node.copy()
701        files_merged.update(files_optional_node)
702        for file_relative_path, file_details in files_merged.items():
703            old_hash = content.get("files", {}).get(file_relative_path, {}).get("sha512")
704            new_hash = files_merged[file_relative_path]["sha512"]
705            if old_hash != new_hash:
706                changed_files.append(inner_directory + file_relative_path)
707
708        self.log.debug("Changed files: %s" % changed_files)
709        if update_changed_files:
710            for file_path in changed_files:
711                self.site.storage.onUpdated(file_path)
712
713        # Generate new content.json
714        self.log.info("Adding timestamp and sha512sums to new content.json...")
715
716        new_content = content.copy()  # Create a copy of current content.json
717        new_content["files"] = files_node  # Add files sha512 hash
718        if files_optional_node:
719            new_content["files_optional"] = files_optional_node
720        elif "files_optional" in new_content:
721            del new_content["files_optional"]
722
723        new_content["modified"] = int(time.time())  # Add timestamp
724        if inner_path == "content.json":
725            new_content["zeronet_version"] = config.version
726            new_content["signs_required"] = content.get("signs_required", 1)
727
728        new_content["address"] = self.site.address
729        new_content["inner_path"] = inner_path
730
731        # Verify private key
732        from Crypt import CryptBitcoin
733        self.log.info("Verifying private key...")
734        privatekey_address = CryptBitcoin.privatekeyToAddress(privatekey)
735        valid_signers = self.getValidSigners(inner_path, new_content)
736        if privatekey_address not in valid_signers:
737            raise SignError(
738                "Private key invalid! Valid signers: %s, Private key address: %s" %
739                (valid_signers, privatekey_address)
740            )
741        self.log.info("Correct %s in valid signers: %s" % (privatekey_address, valid_signers))
742
743        if inner_path == "content.json" and privatekey_address == self.site.address:
744            # If signing using the root key, then sign the valid signers
745            signers_data = "%s:%s" % (new_content["signs_required"], ",".join(valid_signers))
746            new_content["signers_sign"] = CryptBitcoin.sign(str(signers_data), privatekey)
747            if not new_content["signers_sign"]:
748                self.log.info("Old style address, signers_sign is none")
749
750        self.log.info("Signing %s..." % inner_path)
751
752        if "signs" in new_content:
753            del(new_content["signs"])  # Delete old signs
754        if "sign" in new_content:
755            del(new_content["sign"])  # Delete old sign (backward compatibility)
756
757        sign_content = json.dumps(new_content, sort_keys=True)
758        sign = CryptBitcoin.sign(sign_content, privatekey)
759        # new_content["signs"] = content.get("signs", {}) # TODO: Multisig
760        if sign:  # If signing is successful (not an old address)
761            new_content["signs"] = {}
762            new_content["signs"][privatekey_address] = sign
763
764        self.verifyContent(inner_path, new_content)
765
766        if filewrite:
767            self.log.info("Saving to %s..." % inner_path)
768            self.site.storage.writeJson(inner_path, new_content)
769            self.contents[inner_path] = new_content
770
771        self.log.info("File %s signed!" % inner_path)
772
773        if filewrite:  # Written to file
774            return True
775        else:  # Return the new content
776            return new_content
777
778    # The valid signers of content.json file
779    # Return: ["1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6", "13ReyhCsjhpuCVahn1DHdf6eMqqEVev162"]
780    def getValidSigners(self, inner_path, content=None):
781        valid_signers = []
782        if inner_path == "content.json":  # Root content.json
783            if "content.json" in self.contents and "signers" in self.contents["content.json"]:
784                valid_signers += self.contents["content.json"]["signers"][:]
785        else:
786            rules = self.getRules(inner_path, content)
787            if rules and "signers" in rules:
788                valid_signers += rules["signers"]
789
790        if self.site.address not in valid_signers:
791            valid_signers.append(self.site.address)  # Site address always valid
792        return valid_signers
793
794    # Return: The required number of valid signs for the content.json
795    def getSignsRequired(self, inner_path, content=None):
796        return 1  # Todo: Multisig
797
798    def verifyCert(self, inner_path, content):
799        from Crypt import CryptBitcoin
800
801        rules = self.getRules(inner_path, content)
802
803        if not rules:
804            raise VerifyError("No rules for this file")
805
806        if not rules.get("cert_signers") and not rules.get("cert_signers_pattern"):
807            return True  # Does not need cert
808
809        if "cert_user_id" not in content:
810            raise VerifyError("Missing cert_user_id")
811
812        if content["cert_user_id"].count("@") != 1:
813            raise VerifyError("Invalid domain in cert_user_id")
814
815        name, domain = content["cert_user_id"].rsplit("@", 1)
816        cert_address = rules["cert_signers"].get(domain)
817        if not cert_address:  # Unknown Cert signer
818            if rules.get("cert_signers_pattern") and SafeRe.match(rules["cert_signers_pattern"], domain):
819                cert_address = domain
820            else:
821                raise VerifyError("Invalid cert signer: %s" % domain)
822
823        try:
824            cert_subject = "%s#%s/%s" % (rules["user_address"], content["cert_auth_type"], name)
825            result = CryptBitcoin.verify(cert_subject, cert_address, content["cert_sign"])
826        except Exception as err:
827            raise VerifyError("Certificate verify error: %s" % err)
828        return result
829
830    # Checks if the content.json content is valid
831    # Return: True or False
832    def verifyContent(self, inner_path, content):
833        content_size = len(json.dumps(content, indent=1)) + sum([file["size"] for file in list(content["files"].values()) if file["size"] >= 0])  # Size of new content
834        # Calculate old content size
835        old_content = self.contents.get(inner_path)
836        if old_content:
837            old_content_size = len(json.dumps(old_content, indent=1)) + sum([file["size"] for file in list(old_content.get("files", {}).values())])
838            old_content_size_optional = sum([file["size"] for file in list(old_content.get("files_optional", {}).values())])
839        else:
840            old_content_size = 0
841            old_content_size_optional = 0
842
843        # Reset site site on first content.json
844        if not old_content and inner_path == "content.json":
845            self.site.settings["size"] = 0
846
847        content_size_optional = sum([file["size"] for file in list(content.get("files_optional", {}).values()) if file["size"] >= 0])
848        site_size = self.site.settings["size"] - old_content_size + content_size  # Site size without old content plus the new
849        site_size_optional = self.site.settings["size_optional"] - old_content_size_optional + content_size_optional  # Site size without old content plus the new
850
851        site_size_limit = self.site.getSizeLimit() * 1024 * 1024
852
853        # Check site address
854        if content.get("address") and content["address"] != self.site.address:
855            raise VerifyError("Wrong site address: %s != %s" % (content["address"], self.site.address))
856
857        # Check file inner path
858        if content.get("inner_path") and content["inner_path"] != inner_path:
859            raise VerifyError("Wrong inner_path: %s" % content["inner_path"])
860
861        # Check total site size limit
862        if site_size > site_size_limit:
863            if inner_path == "content.json" and self.site.settings["size"] == 0:
864                # First content.json download, save site size to display warning
865                self.site.settings["size"] = site_size
866            task = self.site.worker_manager.findTask(inner_path)
867            if task:  # Dont try to download from other peers
868                self.site.worker_manager.failTask(task)
869            raise VerifyError("Content too large %sB > %sB, aborting task..." % (site_size, site_size_limit))
870
871        # Verify valid filenames
872        for file_relative_path in list(content.get("files", {}).keys()) + list(content.get("files_optional", {}).keys()):
873            if not self.isValidRelativePath(file_relative_path):
874                raise VerifyError("Invalid relative path: %s" % file_relative_path)
875
876        if inner_path == "content.json":
877            self.site.settings["size"] = site_size
878            self.site.settings["size_optional"] = site_size_optional
879            return True  # Root content.json is passed
880        else:
881            if self.verifyContentInclude(inner_path, content, content_size, content_size_optional):
882                self.site.settings["size"] = site_size
883                self.site.settings["size_optional"] = site_size_optional
884                return True
885            else:
886                return False
887
888    def verifyContentInclude(self, inner_path, content, content_size, content_size_optional):
889        # Load include details
890        rules = self.getRules(inner_path, content)
891        if not rules:
892            raise VerifyError("No rules")
893
894        # Check include size limit
895        if rules.get("max_size") is not None:  # Include size limit
896            if content_size > rules["max_size"]:
897                raise VerifyError("Include too large %sB > %sB" % (content_size, rules["max_size"]))
898
899        if rules.get("max_size_optional") is not None:  # Include optional files limit
900            if content_size_optional > rules["max_size_optional"]:
901                raise VerifyError("Include optional files too large %sB > %sB" % (
902                    content_size_optional, rules["max_size_optional"])
903                )
904
905        # Filename limit
906        if rules.get("files_allowed"):
907            for file_inner_path in list(content["files"].keys()):
908                if not SafeRe.match(r"^%s$" % rules["files_allowed"], file_inner_path):
909                    raise VerifyError("File not allowed: %s" % file_inner_path)
910
911        if rules.get("files_allowed_optional"):
912            for file_inner_path in list(content.get("files_optional", {}).keys()):
913                if not SafeRe.match(r"^%s$" % rules["files_allowed_optional"], file_inner_path):
914                    raise VerifyError("Optional file not allowed: %s" % file_inner_path)
915
916        # Check if content includes allowed
917        if rules.get("includes_allowed") is False and content.get("includes"):
918            raise VerifyError("Includes not allowed")
919
920        return True  # All good
921
922    # Verify file validity
923    # Return: None = Same as before, False = Invalid, True = Valid
924    def verifyFile(self, inner_path, file, ignore_same=True):
925        if inner_path.endswith("content.json"):  # content.json: Check using sign
926            from Crypt import CryptBitcoin
927            try:
928                if type(file) is dict:
929                    new_content = file
930                else:
931                    if sys.version_info.major == 3 and sys.version_info.minor < 6:
932                        new_content = json.loads(file.read().decode("utf8"))
933                    else:
934                        new_content = json.load(file)
935                if inner_path in self.contents:
936                    old_content = self.contents.get(inner_path, {"modified": 0})
937                    # Checks if its newer the ours
938                    if old_content["modified"] == new_content["modified"] and ignore_same:  # Ignore, have the same content.json
939                        return None
940                    elif old_content["modified"] > new_content["modified"]:  # We have newer
941                        raise VerifyError(
942                            "We have newer (Our: %s, Sent: %s)" %
943                            (old_content["modified"], new_content["modified"])
944                        )
945                if new_content["modified"] > time.time() + 60 * 60 * 24:  # Content modified in the far future (allow 1 day+)
946                    raise VerifyError("Modify timestamp is in the far future!")
947                if self.isArchived(inner_path, new_content["modified"]):
948                    if inner_path in self.site.bad_files:
949                        del self.site.bad_files[inner_path]
950                    raise VerifyError("This file is archived!")
951                # Check sign
952                sign = new_content.get("sign")
953                signs = new_content.get("signs", {})
954                if "sign" in new_content:
955                    del(new_content["sign"])  # The file signed without the sign
956                if "signs" in new_content:
957                    del(new_content["signs"])  # The file signed without the signs
958
959                sign_content = json.dumps(new_content, sort_keys=True)  # Dump the json to string to remove whitepsace
960
961                # Fix float representation error on Android
962                modified = new_content["modified"]
963                if config.fix_float_decimals and type(modified) is float and not str(modified).endswith(".0"):
964                    modified_fixed = "{:.6f}".format(modified).strip("0.")
965                    sign_content = sign_content.replace(
966                        '"modified": %s' % repr(modified),
967                        '"modified": %s' % modified_fixed
968                    )
969
970                if signs:  # New style signing
971                    valid_signers = self.getValidSigners(inner_path, new_content)
972                    signs_required = self.getSignsRequired(inner_path, new_content)
973
974                    if inner_path == "content.json" and len(valid_signers) > 1:  # Check signers_sign on root content.json
975                        signers_data = "%s:%s" % (signs_required, ",".join(valid_signers))
976                        if not CryptBitcoin.verify(signers_data, self.site.address, new_content["signers_sign"]):
977                            raise VerifyError("Invalid signers_sign!")
978
979                    if inner_path != "content.json" and not self.verifyCert(inner_path, new_content):  # Check if cert valid
980                        raise VerifyError("Invalid cert!")
981
982                    valid_signs = 0
983                    for address in valid_signers:
984                        if address in signs:
985                            valid_signs += CryptBitcoin.verify(sign_content, address, signs[address])
986                        if valid_signs >= signs_required:
987                            break  # Break if we has enough signs
988                    if valid_signs < signs_required:
989                        raise VerifyError("Valid signs: %s/%s" % (valid_signs, signs_required))
990                    else:
991                        return self.verifyContent(inner_path, new_content)
992                else:  # Old style signing
993                    raise VerifyError("Invalid old-style sign")
994
995            except Exception as err:
996                self.log.warning("%s: verify sign error: %s" % (inner_path, Debug.formatException(err)))
997                raise err
998
999        else:  # Check using sha512 hash
1000            file_info = self.getFileInfo(inner_path)
1001            if file_info:
1002                if CryptHash.sha512sum(file) != file_info.get("sha512", ""):
1003                    raise VerifyError("Invalid hash")
1004
1005                if file_info.get("size", 0) != file.tell():
1006                    raise VerifyError(
1007                        "File size does not match %s <> %s" %
1008                        (inner_path, file.tell(), file_info.get("size", 0))
1009                    )
1010
1011                return True
1012
1013            else:  # File not in content.json
1014                raise VerifyError("File not in content.json")
1015
1016    def optionalDelete(self, inner_path):
1017        self.site.storage.delete(inner_path)
1018
1019    def optionalDownloaded(self, inner_path, hash_id, size=None, own=False):
1020        if size is None:
1021            size = self.site.storage.getSize(inner_path)
1022
1023        done = self.hashfield.appendHashId(hash_id)
1024        self.site.settings["optional_downloaded"] += size
1025        return done
1026
1027    def optionalRemoved(self, inner_path, hash_id, size=None):
1028        if size is None:
1029            size = self.site.storage.getSize(inner_path)
1030        done = self.hashfield.removeHashId(hash_id)
1031
1032        self.site.settings["optional_downloaded"] -= size
1033        return done
1034
1035    def optionalRenamed(self, inner_path_old, inner_path_new):
1036        return True
1037