1# Copyright (c) 2012-2016 Seafile Ltd.
2# encoding: utf-8
3from functools import partial
4import os
5import re
6import urllib.request, urllib.parse, urllib.error
7import urllib.request, urllib.error, urllib.parse
8import uuid
9import logging
10import hashlib
11import tempfile
12import locale
13import configparser
14import mimetypes
15import contextlib
16from datetime import datetime
17from urllib.parse import urlparse, urljoin
18import json
19
20from constance import config
21import seaserv
22from seaserv import seafile_api
23
24from django.urls import reverse
25from django.core.mail import EmailMessage
26from django.shortcuts import render
27from django.template import Context, loader
28from django.utils.translation import ugettext as _
29from django.http import HttpResponseRedirect, HttpResponse, HttpResponseNotModified
30from django.utils.http import urlquote
31from django.utils.html import escape
32from django.utils.timezone import make_naive, is_aware
33from django.views.static import serve as django_static_serve
34
35from seahub.auth import REDIRECT_FIELD_NAME
36from seahub.api2.models import Token, TokenV2
37import seahub.settings
38from seahub.settings import SITE_NAME, MEDIA_URL, LOGO_PATH, \
39        MEDIA_ROOT, CUSTOM_LOGO_PATH
40try:
41    from seahub.settings import EVENTS_CONFIG_FILE
42except ImportError:
43    EVENTS_CONFIG_FILE = None
44try:
45    from seahub.settings import EMAIL_HOST
46    IS_EMAIL_CONFIGURED = True
47except ImportError:
48    IS_EMAIL_CONFIGURED = False
49try:
50    from seahub.settings import CLOUD_MODE
51except ImportError:
52    CLOUD_MODE = False
53try:
54    from seahub.settings import ENABLE_INNER_FILESERVER
55except ImportError:
56    ENABLE_INNER_FILESERVER = True
57try:
58    from seahub.settings import CHECK_SHARE_LINK_TRAFFIC
59except ImportError:
60    CHECK_SHARE_LINK_TRAFFIC = False
61
62logger = logging.getLogger(__name__)
63
64# init Seafevents API
65if EVENTS_CONFIG_FILE:
66    try:
67        from seafevents import seafevents_api
68        seafevents_api.init(EVENTS_CONFIG_FILE)
69    except ImportError:
70        logging.exception('Failed to import seafevents package.')
71        seafevents_api = None
72else:
73    class RPCProxy(object):
74        def __getattr__(self, name):
75            return partial(self.method_missing, name)
76
77        def method_missing(self, name, *args, **kwargs):
78            return None
79    seafevents_api = RPCProxy()
80
81def is_pro_version():
82    if seahub.settings.DEBUG:
83        if hasattr(seahub.settings, 'IS_PRO_VERSION') \
84            and seahub.settings.IS_PRO_VERSION:
85            return True
86
87    try:
88        return bool(seafevents_api.is_pro())
89    except AttributeError:
90        return False
91
92def is_cluster_mode():
93    cfg = configparser.ConfigParser()
94    if 'SEAFILE_CENTRAL_CONF_DIR' in os.environ:
95        confdir = os.environ['SEAFILE_CENTRAL_CONF_DIR']
96    else:
97        confdir = os.environ['SEAFILE_CONF_DIR']
98    conf = os.path.join(confdir, 'seafile.conf')
99    cfg.read(conf)
100    if cfg.has_option('cluster', 'enabled'):
101        enabled = cfg.getboolean('cluster', 'enabled')
102    else:
103        enabled = False
104
105    if enabled:
106        logging.debug('cluster mode is enabled')
107    else:
108        logging.debug('cluster mode is disabled')
109
110    return enabled
111
112CLUSTER_MODE = is_cluster_mode()
113
114try:
115    from seahub.settings import OFFICE_CONVERTOR_ROOT
116except ImportError:
117    OFFICE_CONVERTOR_ROOT = ''
118
119from seahub.utils.file_types import *
120from seahub.utils.htmldiff import HtmlDiff # used in views/files.py
121
122EMPTY_SHA1 = '0000000000000000000000000000000000000000'
123MAX_INT = 2147483647
124
125PREVIEW_FILEEXT = {
126    IMAGE: ('gif', 'jpeg', 'jpg', 'png', 'ico', 'bmp', 'tif', 'tiff', 'psd'),
127    DOCUMENT: ('doc', 'docx', 'ppt', 'pptx', 'odt', 'fodt', 'odp', 'fodp'),
128    SPREADSHEET: ('xls', 'xlsx', 'ods', 'fods'),
129    SVG: ('svg',),
130    PDF: ('pdf', 'ai'),
131    MARKDOWN: ('markdown', 'md'),
132    VIDEO: ('mp4', 'ogv', 'webm', 'mov'),
133    AUDIO: ('mp3', 'oga', 'ogg'),
134    #'3D': ('stl', 'obj'),
135    XMIND: ('xmind',),
136    CDOC: ('cdoc',),
137}
138
139def gen_fileext_type_map():
140    """
141    Generate previewed file extension and file type relation map.
142
143    """
144    d = {}
145    for filetype in list(PREVIEW_FILEEXT.keys()):
146        for fileext in PREVIEW_FILEEXT.get(filetype):
147            d[fileext] = filetype
148
149    return d
150FILEEXT_TYPE_MAP = gen_fileext_type_map()
151
152def render_permission_error(request, msg=None, extra_ctx=None):
153    """
154    Return permisson error page.
155
156    """
157    ctx = {}
158    ctx['error_msg'] = msg or _('permission error')
159
160    if extra_ctx:
161        for k in extra_ctx:
162            ctx[k] = extra_ctx[k]
163
164    return render(request, 'error.html', ctx)
165
166def render_error(request, msg=None, extra_ctx=None):
167    """
168    Return normal error page.
169
170    """
171    ctx = {}
172    ctx['error_msg'] = msg or _('Internal Server Error')
173
174    if extra_ctx:
175        for k in extra_ctx:
176            ctx[k] = extra_ctx[k]
177
178    return render(request, 'error.html', ctx)
179
180def list_to_string(l):
181    """
182    Return string of a list.
183
184    """
185    return ','.join(l)
186
187def get_fileserver_root():
188    """ Construct seafile fileserver address and port.
189
190    Returns:
191    	Constructed fileserver root.
192    """
193    return config.FILE_SERVER_ROOT
194
195def get_inner_fileserver_root():
196    """Construct inner seafile fileserver address and port.
197
198    Inner fileserver root allows Seahub access fileserver through local
199    address, thus avoiding the overhead of DNS queries, as well as other
200    related issues, for example, the server can not ping itself, etc.
201
202    Returns:
203    	http://127.0.0.1:<port>
204    """
205
206    return seahub.settings.INNER_FILE_SERVER_ROOT
207
208def gen_token(max_length=5):
209    """
210    Generate a random token.
211
212    """
213
214    return uuid.uuid4().hex[:max_length]
215
216def normalize_cache_key(value, prefix=None, token=None, max_length=200):
217    """Returns a cache key consisten of ``value`` and ``prefix`` and ``token``. Cache key
218    must not include control characters or whitespace.
219    """
220    key = value if prefix is None else prefix + value
221    key = key if token is None else key + '_' + token
222    return urlquote(key)[:max_length]
223
224def get_repo_last_modify(repo):
225    """ Get last modification time for a repo.
226
227    If head commit id of a repo is provided, we use that commit as last commit,
228    otherwise falls back to getting last commit of a repo which is time
229    consuming.
230    """
231    if repo.head_cmmt_id is not None:
232        last_cmmt = seaserv.get_commit(repo.id, repo.version, repo.head_cmmt_id)
233    else:
234        logger = logging.getLogger(__name__)
235        logger.info('[repo %s] head_cmmt_id is missing.' % repo.id)
236        last_cmmt = seafile_api.get_commit_list(repo.id, 0, 1)[0]
237    return last_cmmt.ctime if last_cmmt else 0
238
239def calculate_repos_last_modify(repo_list):
240    """ Get last modification time for repos.
241    """
242    for repo in repo_list:
243        repo.latest_modify = get_repo_last_modify(repo)
244
245def normalize_dir_path(path):
246    """Add '/' at the end of directory path if necessary.
247
248    And make sure path starts with '/'
249    """
250
251    path = path.strip('/')
252    if path == '':
253        return '/'
254    else:
255        return '/' + path + '/'
256
257def normalize_file_path(path):
258    """Remove '/' at the end of file path if necessary.
259
260    And make sure path starts with '/'
261    """
262
263    path = path.strip('/')
264    if path == '':
265        return ''
266    else:
267        return '/' + path
268
269# modified from django1.5:/core/validators, and remove the support for single
270# quote in email address
271email_re = re.compile(
272    r"(^[-!#$%&*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&*+/=?^_`{}|~0-9A-Z]+)*"  # dot-atom
273    # quoted-string, see also http://tools.ietf.org/html/rfc2822#section-3.2.5
274    r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"'
275    r')@((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)$)'  # domain
276    r'|\[(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\]$', re.IGNORECASE)  # literal form, ipv4 address (SMTP 4.1.3)
277
278def is_valid_email(email):
279    """A heavy email format validation.
280    """
281    return True if email_re.match(email) is not None else False
282
283def is_valid_username(username):
284    """Check whether username is valid, currently only email can be a username.
285    """
286    return is_valid_email(username)
287
288def is_valid_username2(username):
289    """ New username check function, old version is used by many others, stay put
290    """
291    return (not username.startswith(' ')) and (not username.endswith(' '))
292
293def is_valid_dirent_name(name):
294    """Check whether repo/dir/file name is valid.
295    """
296    # `repo_id` parameter is not used in seafile api
297    return seafile_api.is_valid_filename('fake_repo_id', name)
298
299def is_ldap_user(user):
300    """Check whether user is a LDAP user.
301    """
302    return user.source == 'LDAP' or user.source == 'LDAPImport'
303
304def get_no_duplicate_obj_name(obj_name, exist_obj_names):
305
306    def no_duplicate(obj_name):
307        for exist_obj_name in exist_obj_names:
308            if exist_obj_name == obj_name:
309                return False
310        return True
311
312    def make_new_name(obj_name, i):
313        base, ext = os.path.splitext(obj_name)
314        if ext:
315            new_base = "%s (%d)" % (base, i)
316            return new_base + ext
317        else:
318            return "%s (%d)" % (obj_name, i)
319
320    if no_duplicate(obj_name):
321        return obj_name
322    else:
323        i = 1
324        while True:
325            new_name = make_new_name(obj_name, i)
326            if no_duplicate(new_name):
327                return new_name
328            else:
329                i += 1
330
331def check_filename_with_rename(repo_id, parent_dir, obj_name):
332    cmmts = seafile_api.get_commit_list(repo_id, 0, 1)
333    latest_commit = cmmts[0] if cmmts else None
334    if not latest_commit:
335        return ''
336    # TODO: what if parrent_dir does not exist?
337    dirents = seafile_api.list_dir_by_commit_and_path(repo_id,
338            latest_commit.id, parent_dir)
339
340    exist_obj_names = [dirent.obj_name for dirent in dirents]
341    return get_no_duplicate_obj_name(obj_name, exist_obj_names)
342
343def get_user_repos(username, org_id=None):
344    """
345    Get all repos that user can access, including owns, shared, public, and
346    repo in groups.
347    If ``org_id`` is not None, get org repos that user can access.
348    """
349    if org_id is None:
350        owned_repos = seafile_api.get_owned_repo_list(username)
351        shared_repos = seafile_api.get_share_in_repo_list(username, -1, -1)
352        groups_repos = seafile_api.get_group_repos_by_user(username)
353        if CLOUD_MODE:
354            public_repos = []
355        else:
356            public_repos = seafile_api.get_inner_pub_repo_list()
357
358        for r in shared_repos + public_repos:
359            # collumn names in shared_repo struct are not same as owned or group
360            # repos.
361            r.id = r.repo_id
362            r.name = r.repo_name
363            r.last_modify = r.last_modified
364    else:
365        owned_repos = seafile_api.get_org_owned_repo_list(org_id,
366                username)
367        shared_repos = seafile_api.get_org_share_in_repo_list(org_id,
368                username, -1, -1)
369        groups_repos = seafile_api.get_org_group_repos_by_user(username,
370                org_id)
371        public_repos = seaserv.seafserv_threaded_rpc.list_org_inner_pub_repos(org_id)
372
373        for r in shared_repos + groups_repos + public_repos:
374            # collumn names in shared_repo struct are not same as owned
375            # repos.
376            r.id = r.repo_id
377            r.name = r.repo_name
378            r.last_modify = r.last_modified
379
380    return (owned_repos, shared_repos, groups_repos, public_repos)
381
382def get_conf_text_ext():
383    """
384    Get the conf of text ext in constance settings, and remove space.
385    """
386    if hasattr(config, 'TEXT_PREVIEW_EXT'):
387        text_ext = getattr(config, 'TEXT_PREVIEW_EXT').split(',')
388        return [x.strip() for x in text_ext]
389    return []
390
391def get_file_type_and_ext(filename):
392    """
393    Return file type and extension if the file can be previewd online,
394    otherwise, return unknown type.
395    """
396    fileExt = os.path.splitext(filename)[1][1:].lower()
397    if fileExt in get_conf_text_ext():
398        return (TEXT, fileExt)
399
400    filetype = FILEEXT_TYPE_MAP.get(fileExt)
401    if filetype:
402        return (filetype, fileExt)
403    else:
404        return ('Unknown', fileExt)
405
406def get_file_revision_id_size(repo_id, commit_id, path):
407    """Given a commit and a file path in that commit, return the seafile id
408    and size of the file blob
409
410    """
411    repo = seafile_api.get_repo(repo_id)
412    dirname  = os.path.dirname(path)
413    filename = os.path.basename(path)
414    seafdir = seafile_api.list_dir_by_commit_and_path(repo_id, commit_id, dirname)
415    for dirent in seafdir:
416        if dirent.obj_name == filename:
417            file_size = seafile_api.get_file_size(repo.store_id, repo.version,
418                                                  dirent.obj_id)
419            return dirent.obj_id, file_size
420
421    return None, None
422
423def new_merge_with_no_conflict(commit):
424    """Check whether a commit is a new merge, and no conflict.
425
426    Arguments:
427    - `commit`:
428    """
429    if commit.second_parent_id is not None and commit.new_merge is True and \
430            commit.conflict is False:
431        return True
432    else:
433        return False
434
435def get_commit_before_new_merge(commit):
436    """Traverse parents of ``commit``, and get a commit which is not a new merge.
437
438    Pre-condition: ``commit`` must be a new merge and not conflict.
439
440    Arguments:
441    - `commit`:
442    """
443    assert new_merge_with_no_conflict(commit) is True
444
445    while(new_merge_with_no_conflict(commit)):
446        p1 = seaserv.get_commit(commit.repo_id, commit.version, commit.parent_id)
447        p2 = seaserv.get_commit(commit.repo_id, commit.version, commit.second_parent_id)
448        commit = p1 if p1.ctime > p2.ctime else p2
449
450    assert new_merge_with_no_conflict(commit) is False
451
452    return commit
453
454def gen_inner_file_get_url(token, filename):
455    """Generate inner fileserver file url.
456
457    If ``ENABLE_INNER_FILESERVER`` set to False(defaults to True), will
458    returns outer fileserver file url.
459
460    Arguments:
461    - `token`:
462    - `filename`:
463
464    Returns:
465    	e.g., http://127.0.0.1:<port>/files/<token>/<filename>
466    """
467    if ENABLE_INNER_FILESERVER:
468        return '%s/files/%s/%s' % (get_inner_fileserver_root(), token,
469                                   urlquote(filename))
470    else:
471        return gen_file_get_url(token, filename)
472
473def gen_inner_file_upload_url(op, token):
474    """Generate inner fileserver upload url.
475
476    If ``ENABLE_INNER_FILESERVER`` set to False(defaults to True), will
477    returns outer fileserver file url.
478
479    Arguments:
480    - `op`:
481    - `token`:
482
483    Returns:
484        e.g., http://127.0.0.1:<port>/<op>/<token>
485        http://127.0.0.1:8082/update-api/80c69afa-9438-4ee6-a297-a24fadb10750
486    """
487    if ENABLE_INNER_FILESERVER:
488        return '%s/%s/%s' % (get_inner_fileserver_root(), op, token)
489    else:
490        return gen_file_upload_url(token, op)
491
492def get_max_upload_file_size():
493    """Get max upload file size from config file, defaults to no limit.
494
495    Returns ``None`` if this value is not set.
496    """
497    return seaserv.MAX_UPLOAD_FILE_SIZE
498
499def gen_block_get_url(token, blkid):
500    """
501    Generate fileserver block url.
502    Format: http://<domain:port>/blks/<token>/<blkid>
503    """
504    if blkid:
505        return '%s/blks/%s/%s' % (get_fileserver_root(), token, blkid)
506    else:
507        return '%s/blks/%s/' % (get_fileserver_root(), token)
508
509def gen_file_get_url(token, filename):
510    """
511    Generate fileserver file url.
512    Format: http://<domain:port>/files/<token>/<filename>
513    """
514    return '%s/files/%s/%s' % (get_fileserver_root(), token, urlquote(filename))
515
516def gen_file_upload_url(token, op, replace=False):
517    url = '%s/%s/%s' % (get_fileserver_root(), op, token)
518    if replace is True:
519        url += '?replace=1'
520    return url
521
522def gen_dir_zip_download_url(token):
523    """
524    Generate fileserver file url.
525    Format: http://<domain:port>/files/<token>/<filename>
526    """
527    return '%s/zip/%s' % (get_fileserver_root(), token)
528
529def string2list(string):
530    """
531    Split string contacted with different separators to a list, and remove
532    duplicated strings.
533    """
534    tmp_str = string.replace(';', ',').replace('\n', ',').replace('\r', ',')
535    # Remove empty and duplicate strings
536    s = set()
537    for e in tmp_str.split(','):
538        e = e.strip(' ')
539        if not e:
540            continue
541        s.add(e)
542    return [ x for x in s ]
543
544def is_org_context(request):
545    """An organization context is a virtual private Seafile instance on cloud
546    service.
547
548    Arguments:
549    - `request`:
550    """
551    return request.cloud_mode and request.user.org is not None
552
553# events related
554if EVENTS_CONFIG_FILE:
555    parsed_events_conf = configparser.ConfigParser()
556    parsed_events_conf.read(EVENTS_CONFIG_FILE)
557
558    try:
559        import seafevents
560        EVENTS_ENABLED = True
561        SeafEventsSession = seafevents.init_db_session_class(EVENTS_CONFIG_FILE)
562    except ImportError:
563        logging.exception('Failed to import seafevents package.')
564        seafevents = None
565        EVENTS_ENABLED = False
566
567    @contextlib.contextmanager
568    def _get_seafevents_session():
569        try:
570            session = SeafEventsSession()
571            yield session
572        finally:
573           session.close()
574
575    def _same_events(e1, e2):
576        """Two events are equal should follow two rules:
577        1. event1.repo_id = event2.repo_id
578        2. event1.commit.creator = event2.commit.creator
579        3. event1.commit.desc = event2.commit.desc
580        """
581        if hasattr(e1, 'commit') and hasattr(e2, 'commit'):
582            if e1.repo_id == e2.repo_id and \
583               e1.commit.desc == e2.commit.desc and \
584               e1.commit.creator_name == e2.commit.creator_name:
585                return True
586        return False
587
588    def _get_events(username, start, count, org_id=None):
589        ev_session = SeafEventsSession()
590
591        valid_events = []
592        total_used = 0
593        try:
594            next_start = start
595            while True:
596                events = _get_events_inner(ev_session, username, next_start,
597                                           count, org_id)
598                if not events:
599                    break
600
601                # filter duplicatly commit and merged commit
602                for e1 in events:
603                    duplicate = False
604                    for e2 in valid_events:
605                        if _same_events(e1, e2): duplicate = True; break
606
607                    new_merge = False
608                    if hasattr(e1, 'commit') and e1.commit and \
609                       new_merge_with_no_conflict(e1.commit):
610                        new_merge = True
611
612                    if not duplicate and not new_merge:
613                        valid_events.append(e1)
614                    total_used = total_used + 1
615                    if len(valid_events) == count:
616                        break
617
618                if len(valid_events) == count:
619                    break
620                next_start = next_start + len(events)
621        finally:
622            ev_session.close()
623
624        for e in valid_events:            # parse commit description
625            if hasattr(e, 'commit'):
626                e.commit.converted_cmmt_desc = convert_cmmt_desc_link(e.commit)
627                e.commit.more_files = more_files_in_commit(e.commit)
628        return valid_events, start + total_used
629
630    def _get_activities(username, start, count):
631        ev_session = SeafEventsSession()
632
633        events, total_count = [], 0
634        try:
635            events = seafevents.get_user_activities(ev_session,
636                    username, start, count)
637        finally:
638            ev_session.close()
639
640        return events
641
642    def _get_events_inner(ev_session, username, start, limit, org_id=None):
643        '''Read events from seafevents database, and remove events that are
644        no longer valid
645
646        Return 'limit' events or less than 'limit' events if no more events remain
647        '''
648        valid_events = []
649        next_start = start
650        while True:
651            if org_id and org_id > 0:
652                events = seafevents.get_org_user_events(ev_session, org_id,
653                                                        username, next_start,
654                                                        limit)
655            else:
656                events = seafevents.get_user_events(ev_session, username,
657                                                    next_start, limit)
658            if not events:
659                break
660
661            for ev in events:
662                if ev.etype == 'repo-update':
663                    repo = seafile_api.get_repo(ev.repo_id)
664                    if not repo:
665                        # delete the update event for repo which has been deleted
666                        seafevents.delete_event(ev_session, ev.uuid)
667                        continue
668                    if repo.encrypted:
669                        repo.password_set = seafile_api.is_password_set(
670                            repo.id, username)
671                    ev.repo = repo
672                    ev.commit = seaserv.get_commit(repo.id, repo.version, ev.commit_id)
673
674                valid_events.append(ev)
675                if len(valid_events) == limit:
676                    break
677
678            if len(valid_events) == limit:
679                break
680            next_start = next_start + len(valid_events)
681
682        return valid_events
683
684
685    def get_user_events(username, start, count):
686        """Return user events list and a new start.
687
688        For example:
689        ``get_user_events('foo@example.com', 0, 10)`` returns the first 10
690        events.
691        ``get_user_events('foo@example.com', 5, 10)`` returns the 6th through
692        15th events.
693        """
694        return _get_events(username, start, count)
695
696    def get_user_activities(username, start, count):
697        """Return user events list and a new start.
698        For example:
699        ``get_user_activities('foo@example.com', 0, 10)`` returns the first 10
700        ``get_user_activities('foo@example.com', 4, 10)`` returns the 6th through
701                 15th events.
702        """
703        return _get_activities(username, start, count)
704
705    def get_user_activity_stats_by_day(start, end, offset):
706        """
707        """
708        with _get_seafevents_session() as session:
709            res = seafevents.get_user_activity_stats_by_day(session, start, end, offset)
710        return res
711
712    def get_org_user_events(org_id, username, start, count):
713        return _get_events(username, start, count, org_id=org_id)
714
715    def get_file_history(repo_id, path, start, count, history_limit=-1):
716        """Return file histories
717        """
718        with _get_seafevents_session() as session:
719            res = seafevents.get_file_history(session, repo_id, path, start, count, history_limit)
720        return res
721
722    def get_log_events_by_time(log_type, tstart, tend):
723        """Return log events list by start/end timestamp. (If no logs, return 'None')
724        """
725        with _get_seafevents_session() as session:
726            events = seafevents.get_event_log_by_time(session, log_type, tstart, tend)
727
728        return events if events else None
729
730    def generate_file_audit_event_type(e):
731
732        event_type_dict = {
733            'file-download-web': ('web', ''),
734            'file-download-share-link': ('share-link', ''),
735            'file-download-api': ('API', e.device),
736            'repo-download-sync': ('download-sync', e.device),
737            'repo-upload-sync': ('upload-sync', e.device),
738            'seadrive-download-file': ('seadrive-download', e.device),
739        }
740
741        if e.etype not in event_type_dict:
742            event_type_dict[e.etype] = (e.etype, e.device if e.device else '')
743
744        return event_type_dict[e.etype]
745
746    def get_file_audit_events_by_path(email, org_id, repo_id, file_path, start, limit):
747        """Return file audit events list by file path. (If no file audit, return 'None')
748
749        For example:
750        ``get_file_audit_events_by_path(email, org_id, repo_id, file_path, 0, 10)`` returns the first 10
751        events.
752        ``get_file_audit_events_by_path(email, org_id, repo_id, file_path, 5, 10)`` returns the 6th through
753        15th events.
754        """
755        with _get_seafevents_session() as session:
756            events = seafevents.get_file_audit_events_by_path(session,
757                email, org_id, repo_id, file_path, start, limit)
758
759        return events if events else None
760
761    def get_file_audit_events(email, org_id, repo_id, start, limit):
762        """Return file audit events list. (If no file audit, return 'None')
763
764        For example:
765        ``get_file_audit_events(email, org_id, repo_id, 0, 10)`` returns the first 10
766        events.
767        ``get_file_audit_events(email, org_id, repo_id, 5, 10)`` returns the 6th through
768        15th events.
769        """
770        with _get_seafevents_session() as session:
771            events = seafevents.get_file_audit_events(session, email, org_id, repo_id, start, limit)
772
773        return events if events else None
774
775    def get_file_ops_stats_by_day(start, end, offset):
776        """ return file audit record of sepcifiy time group by day.
777        """
778        with _get_seafevents_session() as session:
779            res = seafevents.get_file_ops_stats_by_day(session, start, end, offset)
780        return res
781
782    def get_total_storage_stats_by_day(start, end, offset):
783        """
784        """
785        with _get_seafevents_session() as session:
786            res = seafevents.get_total_storage_stats_by_day(session, start, end, offset)
787        return res
788
789    def get_system_traffic_by_day(start, end, offset, op_type='all'):
790        with _get_seafevents_session() as session:
791            res = seafevents.get_system_traffic_by_day(session, start, end, offset, op_type)
792        return res
793
794    def get_org_traffic_by_day(org_id, start, end, offset, op_type='all'):
795        with _get_seafevents_session() as session:
796            res = seafevents.get_org_traffic_by_day(session, org_id, start, end, offset, op_type)
797        return res
798
799    def get_file_update_events(email, org_id, repo_id, start, limit):
800        """Return file update events list. (If no file update, return 'None')
801
802        For example:
803        ``get_file_update_events(email, org_id, repo_id, 0, 10)`` returns the first 10
804        events.
805        ``get_file_update_events(email, org_id, repo_id, 5, 10)`` returns the 6th through
806        15th events.
807        """
808        with _get_seafevents_session() as session:
809            events = seafevents.get_file_update_events(session, email, org_id, repo_id, start, limit)
810        return events if events else None
811
812    def get_perm_audit_events(email, org_id, repo_id, start, limit):
813        """Return repo perm events list. (If no repo perm, return 'None')
814
815        For example:
816        ``get_repo_perm_events(email, org_id, repo_id, 0, 10)`` returns the first 10
817        events.
818        ``get_repo_perm_events(email, org_id, repo_id, 5, 10)`` returns the 6th through
819        15th events.
820        """
821        with _get_seafevents_session() as session:
822            events = seafevents.get_perm_audit_events(session, email, org_id, repo_id, start, limit)
823
824        return events if events else None
825
826    def get_virus_files(repo_id=None, has_handled=None, start=-1, limit=-1):
827        with _get_seafevents_session() as session:
828            r = seafevents.get_virus_files(session, repo_id, has_handled, start, limit)
829        return r if r else []
830
831    def delete_virus_file(vid):
832        with _get_seafevents_session() as session:
833            return True if seafevents.delete_virus_file(session, vid) == 0 else False
834
835    def operate_virus_file(vid, ignore):
836        with _get_seafevents_session() as session:
837            return True if seafevents.operate_virus_file(session, vid, ignore) == 0 else False
838
839    def get_virus_file_by_vid(vid):
840        with _get_seafevents_session() as session:
841            return seafevents.get_virus_file_by_vid(session, vid)
842
843    def get_file_scan_record(start=-1, limit=-1):
844        records = seafevents_api.get_content_scan_results(start, limit)
845        return records if records else []
846
847    def get_user_activities_by_timestamp(username, start, end):
848        events = seafevents.get_user_activities_by_timestamp(username, start, end)
849        return events if events else []
850
851else:
852    EVENTS_ENABLED = False
853    def get_user_events():
854        pass
855    def get_user_activity_stats_by_day():
856        pass
857    def get_log_events_by_time():
858        pass
859    def get_org_user_events():
860        pass
861    def get_user_activities():
862        pass
863    def get_file_history():
864        pass
865    def generate_file_audit_event_type():
866        pass
867    def get_file_audit_events_by_path():
868        pass
869    def get_file_audit_events():
870        pass
871    def get_file_ops_stats_by_day():
872        pass
873    def get_total_storage_stats_by_day():
874        pass
875    def get_system_traffic_by_day():
876        pass
877    def get_org_traffic_by_day():
878        pass
879    def get_file_update_events():
880        pass
881    def get_perm_audit_events():
882        pass
883    def get_virus_files():
884        pass
885    def delete_virus_file():
886        pass
887    def operate_virus_file():
888        pass
889    def get_virus_file_by_vid(vid):
890        pass
891    def get_file_scan_record():
892        pass
893    def get_user_activities_by_timestamp():
894        pass
895
896
897def calc_file_path_hash(path, bits=12):
898    if isinstance(path, str):
899        path = path.encode('UTF-8')
900
901    path_hash = hashlib.md5(urllib.parse.quote(path)).hexdigest()[:bits]
902
903    return path_hash
904
905def get_service_url():
906    """Get service url from seaserv.
907    """
908    return config.SERVICE_URL
909
910def get_server_id():
911    """Get server id from seaserv.
912    """
913    return getattr(seaserv, 'SERVER_ID', '-')
914
915def get_site_scheme_and_netloc():
916    """Return a string contains site scheme and network location part from
917    service url.
918
919    For example:
920    >>> get_site_scheme_and_netloc("https://example.com:8000/seafile/")
921    https://example.com:8000
922
923    """
924    parse_result = urlparse(get_service_url())
925    return "%s://%s" % (parse_result.scheme, parse_result.netloc)
926
927def get_site_name():
928    """Return site name from settings.
929    """
930    return config.SITE_NAME
931
932def send_html_email(subject, con_template, con_context, from_email, to_email,
933                    reply_to=None):
934    """Send HTML email
935    """
936
937    # get logo path
938    logo_path = LOGO_PATH
939    custom_logo_file = os.path.join(MEDIA_ROOT, CUSTOM_LOGO_PATH)
940    if os.path.exists(custom_logo_file):
941        logo_path = CUSTOM_LOGO_PATH
942
943    base_context = {
944        'url_base': get_site_scheme_and_netloc(),
945        'site_name': get_site_name(),
946        'media_url': MEDIA_URL,
947        'logo_path': logo_path,
948    }
949    t = loader.get_template(con_template)
950    con_context.update(base_context)
951
952    headers = {}
953    if IS_EMAIL_CONFIGURED:
954        if reply_to is not None:
955            headers['Reply-to'] = reply_to
956
957    msg = EmailMessage(subject, t.render(con_context), from_email,
958                       to_email, headers=headers)
959    msg.content_subtype = "html"
960    msg.send()
961
962def gen_dir_share_link(token):
963    """Generate directory share link.
964    """
965    return gen_shared_link(token, 'd')
966
967def gen_file_share_link(token):
968    """Generate file share link.
969    """
970    return gen_shared_link(token, 'f')
971
972def gen_shared_link(token, s_type):
973    service_url = get_service_url()
974    assert service_url is not None
975
976    service_url = service_url.rstrip('/')
977    if s_type == 'f':
978        return '%s/f/%s/' % (service_url, token)
979    else:
980        return '%s/d/%s/' % (service_url, token)
981
982def gen_shared_upload_link(token):
983    service_url = get_service_url()
984    assert service_url is not None
985
986    service_url = service_url.rstrip('/')
987    return '%s/u/d/%s/' % (service_url, token)
988
989
990def show_delete_days(request):
991    if request.method == 'GET':
992        days_str = request.GET.get('days', '')
993    elif request.method == 'POST':
994        days_str = request.POST.get('days', '')
995    else:
996        days_str = ''
997
998    try:
999        days = int(days_str)
1000    except ValueError:
1001        days = 7
1002
1003    return days
1004
1005def is_textual_file(file_type):
1006    """
1007    Check whether a file type is a textual file.
1008    """
1009    if file_type == TEXT or file_type == MARKDOWN:
1010        return True
1011    else:
1012        return False
1013
1014def redirect_to_login(request):
1015    from django.conf import settings
1016    login_url = settings.LOGIN_URL
1017    path = urlquote(request.get_full_path())
1018    tup = login_url, REDIRECT_FIELD_NAME, path
1019    return HttpResponseRedirect('%s?%s=%s' % tup)
1020
1021def mkstemp():
1022    '''Returns (fd, filepath), the same as tempfile.mkstemp, except the
1023    filepath is encoded in UTF-8
1024
1025    '''
1026    fd, path = tempfile.mkstemp()
1027
1028    return fd, path
1029
1030# File or directory operations
1031FILE_OP = ('Added or modified', 'Added', 'Modified', 'Renamed', 'Moved',
1032           'Added directory', 'Renamed directory', 'Moved directory')
1033
1034OPS = '|'.join(FILE_OP)
1035CMMT_DESC_PATT = re.compile(r'(%s) "(.*)"\s?(and \d+ more (?:files|directories))?' % OPS)
1036
1037API_OPS = '|'.join((OPS, 'Deleted', 'Removed'))
1038API_CMMT_DESC_PATT = r'(%s) "(.*)"\s?(and \d+ more (?:files|directories))?' % API_OPS
1039
1040
1041def convert_cmmt_desc_link(commit):
1042    """Wrap file/folder with ``<a></a>`` in commit description.
1043    """
1044    repo_id = commit.repo_id
1045    cmmt_id = commit.id
1046    conv_link_url = reverse('convert_cmmt_desc_link')
1047
1048    def link_repl(matchobj):
1049        op = matchobj.group(1)
1050        file_or_dir = matchobj.group(2)
1051        remaining = matchobj.group(3)
1052
1053        tmp_str = '%s "<a href="%s?repo_id=%s&cmmt_id=%s&nm=%s" class="normal">%s</a>"'
1054        if remaining:
1055            return (tmp_str + ' %s') % (op, conv_link_url, repo_id, cmmt_id, urlquote(file_or_dir),
1056                                        escape(file_or_dir), remaining)
1057        else:
1058            return tmp_str % (op, conv_link_url, repo_id, cmmt_id, urlquote(file_or_dir), escape(file_or_dir))
1059
1060    return re.sub(CMMT_DESC_PATT, link_repl, commit.desc)
1061
1062def api_tsstr_sec(value):
1063    """Turn a timestamp to string"""
1064    try:
1065        return datetime.fromtimestamp(value).strftime("%Y-%m-%d %H:%M:%S")
1066    except:
1067        return datetime.fromtimestamp(value/1000000).strftime("%Y-%m-%d %H:%M:%S")
1068
1069MORE_PATT = r'and \d+ more (?:files|directories)'
1070def more_files_in_commit(commit):
1071    """Check whether added/deleted/modified more files in commit description.
1072    """
1073    return True if re.search(MORE_PATT, commit.desc) else False
1074
1075# file audit related
1076FILE_AUDIT_ENABLED = False
1077if EVENTS_CONFIG_FILE:
1078    def check_file_audit_enabled():
1079        enabled = False
1080        if hasattr(seafevents, 'is_audit_enabled'):
1081            enabled = seafevents.is_audit_enabled(parsed_events_conf)
1082
1083            if enabled:
1084                logging.debug('file audit: enabled')
1085            else:
1086                logging.debug('file audit: not enabled')
1087        return enabled
1088
1089    FILE_AUDIT_ENABLED = check_file_audit_enabled()
1090
1091# office convert related
1092HAS_OFFICE_CONVERTER = False
1093if EVENTS_CONFIG_FILE:
1094    def check_office_converter_enabled():
1095        enabled = False
1096        if hasattr(seafevents, 'is_office_converter_enabled'):
1097            enabled = seafevents.is_office_converter_enabled(parsed_events_conf)
1098
1099            if enabled:
1100                logging.debug('office converter: enabled')
1101            else:
1102                logging.debug('office converter: not enabled')
1103        return enabled
1104
1105    def get_office_converter_html_dir():
1106        return seafevents.get_office_converter_dir(parsed_events_conf, 'html')
1107
1108    def get_office_converter_pdf_dir():
1109        return seafevents.get_office_converter_dir(parsed_events_conf, 'pdf')
1110
1111    def get_office_converter_limit():
1112        return seafevents.get_office_converter_limit(parsed_events_conf)
1113
1114    HAS_OFFICE_CONVERTER = check_office_converter_enabled()
1115
1116OFFICE_PREVIEW_MAX_SIZE = 2 * 1024 * 1024
1117if HAS_OFFICE_CONVERTER:
1118
1119    import time
1120    import requests
1121    import jwt
1122
1123    OFFICE_HTML_DIR = get_office_converter_html_dir()
1124    OFFICE_PDF_DIR = get_office_converter_pdf_dir()
1125    OFFICE_PREVIEW_MAX_SIZE, OFFICE_PREVIEW_MAX_PAGES = get_office_converter_limit()
1126
1127    def add_office_convert_task(file_id, doctype, raw_path):
1128        payload = {'exp': int(time.time()) + 300, }
1129        token = jwt.encode(payload, seahub.settings.SECRET_KEY, algorithm='HS256')
1130        headers = {"Authorization": "Token %s" % token}
1131        params = {'file_id': file_id, 'doctype': doctype, 'raw_path': raw_path}
1132        url = urljoin(OFFICE_CONVERTOR_ROOT, '/add-task')
1133        requests.get(url, params, headers=headers)
1134        return {'exists': False}
1135
1136    def query_office_convert_status(file_id, doctype):
1137        payload = {'exp': int(time.time()) + 300, }
1138        token = jwt.encode(payload, seahub.settings.SECRET_KEY, algorithm='HS256')
1139        headers = {"Authorization": "Token %s" % token}
1140        params = {'file_id': file_id, 'doctype': doctype}
1141        url = urljoin(OFFICE_CONVERTOR_ROOT, '/query-status')
1142        d = requests.get(url, params, headers=headers)
1143        d = d.json()
1144        ret = {}
1145        if 'error' in d:
1146            ret['error'] = d['error']
1147            ret['status'] = 'ERROR'
1148        else:
1149            ret['success'] = True
1150            ret['status'] = d['status']
1151        return ret
1152
1153    def get_office_converted_page(request, static_filename, file_id):
1154        office_out_dir = OFFICE_HTML_DIR
1155        filepath = os.path.join(file_id, static_filename)
1156        if static_filename.endswith('.pdf'):
1157            office_out_dir = OFFICE_PDF_DIR
1158            filepath = static_filename
1159        return django_static_serve(request,
1160                                   filepath,
1161                                   document_root=office_out_dir)
1162
1163    def cluster_get_office_converted_page(path, static_filename, file_id):
1164        url = urljoin(OFFICE_CONVERTOR_ROOT, '/get-converted-page')
1165        payload = {'exp': int(time.time()) + 300, }
1166        token = jwt.encode(payload, seahub.settings.SECRET_KEY, algorithm='HS256')
1167        headers = {"Authorization": "Token %s" % token}
1168        params = {'static_filename': static_filename, 'file_id': file_id}
1169        try:
1170            ret = requests.get(url, params, headers=headers)
1171        except urllib.error.HTTPError as e:
1172            raise Exception(e)
1173
1174        content_type = ret.headers.get('content-type', None)
1175        if content_type is None:
1176            dummy, ext = os.path.splitext(os.path.basename(path))
1177            content_type = mimetypes.types_map.get(ext, 'application/octet-stream')
1178
1179        resp = HttpResponse(ret, content_type=content_type)
1180        if 'last-modified' in ret.headers:
1181            resp['Last-Modified'] = ret.headers.get('last-modified')
1182
1183        return resp
1184
1185    def prepare_converted_html(raw_path, obj_id, doctype, ret_dict):
1186        try:
1187            add_office_convert_task(obj_id, doctype, raw_path)
1188        except Exception as e:
1189            print(e)
1190            logging.exception('failed to add_office_convert_task: %s' % e)
1191            return _('Internal Server Error')
1192        return None
1193
1194# search realted
1195HAS_FILE_SEARCH = False
1196if EVENTS_CONFIG_FILE:
1197    def check_search_enabled():
1198        enabled = False
1199        if hasattr(seafevents, 'is_search_enabled'):
1200            enabled = seafevents.is_search_enabled(parsed_events_conf)
1201
1202            if enabled:
1203                logging.debug('search: enabled')
1204            else:
1205                logging.debug('search: not enabled')
1206        return enabled
1207
1208    HAS_FILE_SEARCH = check_search_enabled()
1209
1210
1211def user_traffic_over_limit(username):
1212    """Return ``True`` if user traffic over the limit, otherwise ``False``.
1213    """
1214    if not CHECK_SHARE_LINK_TRAFFIC:
1215        return False
1216
1217    from seahub_extra.plan.models import UserPlan
1218    from seahub_extra.plan.settings import PLAN
1219    up = UserPlan.objects.get_valid_plan_by_user(username)
1220    plan = 'Free' if up is None else up.plan_type
1221    traffic_limit = int(PLAN[plan]['share_link_traffic']) * 1024 * 1024 * 1024
1222
1223    try:
1224        stat = seafevents_api.get_user_traffic_by_month(username, datetime.now())
1225    except Exception as e:
1226        logger = logging.getLogger(__name__)
1227        logger.error('Failed to get user traffic stat: %s' % username,
1228                     exc_info=True)
1229        return True
1230
1231    if not stat:            # No traffic record yet
1232        return False
1233
1234    month_traffic = stat['link_file_upload'] + stat['link_file_download']
1235    return True if month_traffic >= traffic_limit else False
1236
1237def is_user_password_strong(password):
1238    """Return ``True`` if user's password is STRONG, otherwise ``False``.
1239       STRONG means password has at least USER_PASSWORD_STRENGTH_LEVEL(3) types of the bellow:
1240       num, upper letter, lower letter, other symbols
1241    """
1242
1243    if len(password) < config.USER_PASSWORD_MIN_LENGTH:
1244        return False
1245    else:
1246        num = 0
1247        for letter in password:
1248            # get ascii dec
1249            # bitwise OR
1250            num |= get_char_mode(ord(letter))
1251
1252        if calculate_bitwise(num) < config.USER_PASSWORD_STRENGTH_LEVEL:
1253            return False
1254        else:
1255            return True
1256
1257def get_char_mode(n):
1258    """Return different num according to the type of given letter:
1259       '1': num,
1260       '2': upper_letter,
1261       '4': lower_letter,
1262       '8': other symbols
1263    """
1264    if (n >= 48 and n <= 57): #nums
1265        return 1;
1266    if (n >= 65 and n <= 90): #uppers
1267        return 2;
1268    if (n >= 97 and n <= 122): #lowers
1269        return 4;
1270    else:
1271        return 8;
1272
1273def calculate_bitwise(num):
1274    """Return different level according to the given num:
1275    """
1276    level = 0
1277    for i in range(4):
1278        # bitwise AND
1279        if (num&1):
1280            level += 1
1281        # Right logical shift
1282        num = num >> 1
1283    return level
1284
1285def do_md5(s):
1286    if isinstance(s, str):
1287        s = s.encode('UTF-8')
1288    return hashlib.md5(s).hexdigest()
1289
1290def do_urlopen(url, data=None, headers=None):
1291    headers = headers or {}
1292    req = urllib.request.Request(url, data=data, headers=headers)
1293    ret = urllib.request.urlopen(req)
1294    return ret
1295
1296def clear_token(username):
1297    '''
1298    clear web api and repo sync token
1299    when delete/inactive an user
1300    '''
1301    Token.objects.filter(user = username).delete()
1302    TokenV2.objects.filter(user = username).delete()
1303    seafile_api.delete_repo_tokens_by_email(username)
1304
1305def send_perm_audit_msg(etype, from_user, to, repo_id, path, perm):
1306    """Send repo permission audit msg.
1307
1308    Arguments:
1309    - `etype`: add/modify/delete-repo-perm
1310    - `from_user`: email
1311    - `to`: email or group_id or all(public)
1312    - `repo_id`: origin repo id
1313    - `path`: dir path
1314    - `perm`: r or rw
1315    """
1316    msg = 'perm-change\t%s\t%s\t%s\t%s\t%s\t%s' % \
1317        (etype, from_user, to, repo_id, path, perm)
1318
1319    try:
1320        seafile_api.publish_event('seahub.audit', msg)
1321    except Exception as e:
1322        logger.error("Error when sending perm-audit-%s message: %s" %
1323                     (etype, str(e)))
1324
1325def get_origin_repo_info(repo_id):
1326    repo = seafile_api.get_repo(repo_id)
1327    if repo.origin_repo_id is not None:
1328        origin_repo_id = repo.origin_repo_id
1329        origin_path = repo.origin_path
1330        return (origin_repo_id, origin_path)
1331
1332    return (None, None)
1333
1334def within_time_range(d1, d2, maxdiff_seconds):
1335    '''Return true if two datetime.datetime object differs less than the given seconds'''
1336    if is_aware(d1):
1337        d1 = make_naive(d1)
1338
1339    if is_aware(d2):
1340        d2 = make_naive(d2)
1341
1342    delta = d2 - d1 if d2 > d1 else d1 - d2
1343    # delta.total_seconds() is only available in python 2.7+
1344    diff = (delta.microseconds + (delta.seconds + delta.days*24*3600) * 1e6) / 1e6
1345    return diff < maxdiff_seconds
1346
1347def get_system_admins():
1348    db_users = seaserv.get_emailusers('DB', -1, -1)
1349    ldpa_imported_users = seaserv.get_emailusers('LDAPImport', -1, -1)
1350
1351    admins = []
1352    for user in db_users + ldpa_imported_users:
1353        if user.is_staff:
1354            admins.append(user)
1355
1356    return admins
1357
1358def is_windows_operating_system(request):
1359    if 'HTTP_USER_AGENT' not in request.META:
1360        return False
1361
1362    if 'windows' in request.META['HTTP_USER_AGENT'].lower():
1363        return True
1364    else:
1365        return False
1366
1367def get_folder_permission_recursively(username, repo_id, path):
1368    """ Get folder permission recursively
1369
1370    Ger permission from the innermost layer of subdirectories to root
1371    directory.
1372    """
1373    if not path or not isinstance(path, str):
1374        raise Exception('path invalid.')
1375
1376    if not seafile_api.get_dir_id_by_path(repo_id, path):
1377       # get current folder's parent directory
1378        path = os.path.dirname(path.rstrip('/'))
1379        return get_folder_permission_recursively(
1380                username, repo_id, path)
1381    else:
1382        return seafile_api.check_permission_by_path(
1383                repo_id, path, username)
1384
1385def is_valid_org_id(org_id):
1386    if org_id and org_id > 0:
1387        return True
1388    else:
1389        return False
1390
1391
1392def encrypt_with_sha1(origin_str):
1393
1394    return hashlib.sha1(origin_str.encode()).hexdigest()
1395