1import ast
2import collections
3import errno
4import functools
5import hashlib
6import json
7import keyword
8import logging
9import multiprocessing
10import shutil
11from contextlib import contextmanager
12
13from generator3.constants import *
14
15try:
16    import inspect
17except ImportError:
18    inspect = None
19
20BIN_READ_BLOCK = 64 * 1024
21
22
23def create_named_tuple():   #TODO: user-skeleton
24    return """
25class __namedtuple(tuple):
26    '''A mock base class for named tuples.'''
27
28    __slots__ = ()
29    _fields = ()
30
31    def __new__(cls, *args, **kwargs):
32        'Create a new instance of the named tuple.'
33        return tuple.__new__(cls, *args)
34
35    @classmethod
36    def _make(cls, iterable, new=tuple.__new__, len=len):
37        'Make a new named tuple object from a sequence or iterable.'
38        return new(cls, iterable)
39
40    def __repr__(self):
41        return ''
42
43    def _asdict(self):
44        'Return a new dict which maps field types to their values.'
45        return {}
46
47    def _replace(self, **kwargs):
48        'Return a new named tuple object replacing specified fields with new values.'
49        return self
50
51    def __getnewargs__(self):
52        return tuple(self)
53"""
54
55def create_generator():
56    # Fake <type 'generator'>
57    if version[0] < 3:
58        next_name = "next"
59    else:
60        next_name = "__next__"
61    txt = """
62class __generator(object):
63    '''A mock class representing the generator function type.'''
64    def __init__(self):
65        self.gi_code = None
66        self.gi_frame = None
67        self.gi_running = 0
68
69    def __iter__(self):
70        '''Defined to support iteration over container.'''
71        pass
72
73    def %s(self):
74        '''Return the next item from the container.'''
75        pass
76""" % (next_name,)
77    if version[0] >= 3 or (version[0] == 2 and version[1] >= 5):
78        txt += """
79    def close(self):
80        '''Raises new GeneratorExit exception inside the generator to terminate the iteration.'''
81        pass
82
83    def send(self, value):
84        '''Resumes the generator and "sends" a value that becomes the result of the current yield-expression.'''
85        pass
86
87    def throw(self, type, value=None, traceback=None):
88        '''Used to raise an exception inside the generator.'''
89        pass
90"""
91    return txt
92
93def create_async_generator():
94    # Fake <type 'asyncgenerator'>
95    txt = """
96class __asyncgenerator(object):
97    '''A mock class representing the async generator function type.'''
98    def __init__(self):
99        '''Create an async generator object.'''
100        self.__name__ = ''
101        self.__qualname__ = ''
102        self.ag_await = None
103        self.ag_frame = None
104        self.ag_running = False
105        self.ag_code = None
106
107    def __aiter__(self):
108        '''Defined to support iteration over container.'''
109        pass
110
111    def __anext__(self):
112        '''Returns an awaitable, that performs one asynchronous generator iteration when awaited.'''
113        pass
114
115    def aclose(self):
116        '''Returns an awaitable, that throws a GeneratorExit exception into generator.'''
117        pass
118
119    def asend(self, value):
120        '''Returns an awaitable, that pushes the value object in generator.'''
121        pass
122
123    def athrow(self, type, value=None, traceback=None):
124        '''Returns an awaitable, that throws an exception into generator.'''
125        pass
126"""
127    return txt
128
129def create_function():
130    txt = """
131class __function(object):
132    '''A mock class representing function type.'''
133
134    def __init__(self):
135        self.__name__ = ''
136        self.__doc__ = ''
137        self.__dict__ = ''
138        self.__module__ = ''
139"""
140    if version[0] == 2:
141        txt += """
142        self.func_defaults = {}
143        self.func_globals = {}
144        self.func_closure = None
145        self.func_code = None
146        self.func_name = ''
147        self.func_doc = ''
148        self.func_dict = ''
149"""
150    if version[0] >= 3 or (version[0] == 2 and version[1] >= 6):
151        txt += """
152        self.__defaults__ = {}
153        self.__globals__ = {}
154        self.__closure__ = None
155        self.__code__ = None
156        self.__name__ = ''
157"""
158    if version[0] >= 3:
159        txt += """
160        self.__annotations__ = {}
161        self.__kwdefaults__ = {}
162"""
163    if version[0] >= 3 and version[1] >= 3:
164        txt += """
165        self.__qualname__ = ''
166"""
167    return txt
168
169def create_method():
170    txt = """
171class __method(object):
172    '''A mock class representing method type.'''
173
174    def __init__(self):
175"""
176    if version[0] == 2:
177        txt += """
178        self.im_class = None
179        self.im_self = None
180        self.im_func = None
181"""
182    if version[0] >= 3 or (version[0] == 2 and version[1] >= 6):
183        txt += """
184        self.__func__ = None
185        self.__self__ = None
186"""
187    return txt
188
189
190def create_coroutine():
191    if version[0] == 3 and version[1] >= 5:
192        return """
193class __coroutine(object):
194    '''A mock class representing coroutine type.'''
195
196    def __init__(self):
197        self.__name__ = ''
198        self.__qualname__ = ''
199        self.cr_await = None
200        self.cr_frame = None
201        self.cr_running = False
202        self.cr_code = None
203
204    def __await__(self):
205        return []
206
207    def close(self):
208        pass
209
210    def send(self, value):
211        pass
212
213    def throw(self, type, value=None, traceback=None):
214        pass
215"""
216    return ""
217
218
219def _searchbases(cls, accum):
220    # logic copied from inspect.py
221    if cls not in accum:
222        accum.append(cls)
223        for x in cls.__bases__:
224            _searchbases(x, accum)
225
226
227def get_mro(a_class):
228    # logic copied from inspect.py
229    """Returns a tuple of MRO classes."""
230    if hasattr(a_class, "__mro__"):
231        return a_class.__mro__
232    elif hasattr(a_class, "__bases__"):
233        bases = []
234        _searchbases(a_class, bases)
235        return tuple(bases)
236    else:
237        return tuple()
238
239
240def get_bases(a_class): # TODO: test for classes that don't fit this scheme
241    """Returns a sequence of class's bases."""
242    if hasattr(a_class, "__bases__"):
243        return a_class.__bases__
244    else:
245        return ()
246
247
248def is_callable(x):
249    return hasattr(x, '__call__')
250
251
252def sorted_no_case(p_array):
253    """Sort an array case insensitively, returns a sorted copy"""
254    p_array = list(p_array)
255    p_array = sorted(p_array, key=lambda x: x.upper())
256    return p_array
257
258
259def cleanup(value):
260    result = []
261    prev = i = 0
262    length = len(value)
263    last_ascii = chr(127)
264    while i < length:
265        char = value[i]
266        replacement = None
267        if char == '\n':
268            replacement = '\\n'
269        elif char == '\r':
270            replacement = '\\r'
271        elif char < ' ' or char > last_ascii:
272            replacement = '?' # NOTE: such chars are rare; long swaths could be precessed differently
273        if replacement:
274            result.append(value[prev:i])
275            result.append(replacement)
276            prev = i + 1
277        i += 1
278    result.append(value[prev:])
279    return "".join(result)
280
281
282def is_valid_expr(s):
283    try:
284        compile(s, '<unknown>', 'eval', ast.PyCF_ONLY_AST)
285    except SyntaxError:
286        return False
287    return True
288
289
290_prop_types = [type(property())]
291#noinspection PyBroadException
292try:
293    _prop_types.append(types.GetSetDescriptorType)
294except:
295    pass
296
297#noinspection PyBroadException
298try:
299    _prop_types.append(types.MemberDescriptorType)
300except:
301    pass
302
303_prop_types = tuple(_prop_types)
304
305
306def is_property(x):
307    return isinstance(x, _prop_types)
308
309
310def reliable_repr(value):
311    # some subclasses of built-in types (see PyGtk) may provide invalid __repr__ implementations,
312    # so we need to sanitize the output
313    if type(bool) == type and isinstance(value, bool):
314        return repr(bool(value))
315    for num_type in NUM_TYPES:
316        if isinstance(value, num_type):
317            return repr(num_type(value))
318    return repr(value)
319
320
321def sanitize_value(p_value):
322    """Returns p_value or its part if it represents a sane simple value, else returns 'None'"""
323    if isinstance(p_value, STR_TYPES):
324        match = SIMPLE_VALUE_RE.match(p_value)
325        if match:
326            return match.groups()[match.lastindex - 1]
327        else:
328            return 'None'
329    elif isinstance(p_value, NUM_TYPES):
330        return reliable_repr(p_value)
331    elif p_value is None:
332        return 'None'
333    else:
334        if hasattr(p_value, "__name__") and hasattr(p_value, "__module__") and p_value.__module__ == BUILTIN_MOD_NAME:
335            return p_value.__name__ # float -> "float"
336        else:
337            return repr(repr(p_value)) # function -> "<function ...>", etc
338
339
340def report(msg, *data):
341    """Say something at error level (stderr)"""
342    sys.stderr.write(msg % data)
343    sys.stderr.write("\n")
344
345
346def say(msg, *data):
347    """Say something at info level (stdout)"""
348    sys.stdout.write(msg % data)
349    sys.stdout.write("\n")
350    sys.stdout.flush()
351
352
353def flatten(seq):
354    """Transforms tree lists like ['a', ['b', 'c'], 'd'] to strings like '(a, (b, c), d)', enclosing each tree level in parens."""
355    ret = []
356    for one in seq:
357        if type(one) is list:
358            ret.append(flatten(one))
359        else:
360            ret.append(one)
361    return "(" + ", ".join(ret) + ")"
362
363
364def make_names_unique(seq, name_map=None):
365    """
366    Returns a copy of tree list seq where all clashing names are modified by numeric suffixes:
367    ['a', 'b', 'a', 'b'] becomes ['a', 'b', 'a_1', 'b_1'].
368    Each repeating name has its own counter in the name_map.
369    """
370    ret = []
371    if not name_map:
372        name_map = {}
373    for one in seq:
374        if type(one) is list:
375            ret.append(make_names_unique(one, name_map))
376        else:
377            if keyword.iskeyword(one):
378                one += "_"
379            one_key = lstrip(one, "*") # starred parameters are unique sans stars
380            if one_key in name_map:
381                old_one = one_key
382                one = one + "_" + str(name_map[old_one])
383                name_map[old_one] += 1
384            else:
385                name_map[one_key] = 1
386            ret.append(one)
387    return ret
388
389
390def out_docstring(out_func, docstring, indent):
391    if not isinstance(docstring, str): return
392    lines = docstring.strip().split("\n")
393    if lines:
394        if len(lines) == 1:
395            out_func(indent, '""" ' + lines[0] + ' """')
396        else:
397            out_func(indent, '"""')
398            for line in lines:
399                try:
400                    out_func(indent, line)
401                except UnicodeEncodeError:
402                    continue
403            out_func(indent, '"""')
404
405def out_doc_attr(out_func, p_object, indent, p_class=None):
406    the_doc = getattr(p_object, "__doc__", None)
407    if the_doc:
408        if p_class and the_doc == object.__init__.__doc__ and p_object is not object.__init__ and p_class.__doc__:
409            the_doc = str(p_class.__doc__) # replace stock init's doc with class's; make it a certain string.
410            the_doc += "\n# (copied from class doc)"
411        out_docstring(out_func, the_doc, indent)
412    else:
413        out_func(indent, "# no doc")
414
415def is_skipped_in_module(p_module, p_value):
416    """
417    Returns True if p_value's value must be skipped for module p_module.
418    """
419    skip_list = SKIP_VALUE_IN_MODULE.get(p_module, [])
420    if p_value in skip_list:
421        return True
422    skip_list = SKIP_VALUE_IN_MODULE.get("*", [])
423    if p_value in skip_list:
424        return True
425    return False
426
427def restore_predefined_builtin(class_name, func_name):
428    spec = func_name + PREDEFINED_BUILTIN_SIGS[(class_name, func_name)]
429    note = "known special case of " + (class_name and class_name + "." or "") + func_name
430    return (spec, note)
431
432def restore_by_inspect(p_func):
433    """
434    Returns paramlist restored by inspect.
435    """
436    args, varg, kwarg, defaults, kwonlyargs, kwonlydefaults, _ = getfullargspec(p_func)
437    spec = []
438    if defaults:
439        dcnt = len(defaults) - 1
440    else:
441        dcnt = -1
442    args = args or []
443    args.reverse() # backwards, for easier defaults handling
444    for arg in args:
445        if dcnt >= 0:
446            arg += "=" + sanitize_value(defaults[dcnt])
447            dcnt -= 1
448        spec.insert(0, arg)
449    if varg:
450        spec.append("*" + varg)
451    elif kwonlyargs:
452        spec.append("*")
453
454    kwonlydefaults = kwonlydefaults or {}
455    for arg in kwonlyargs:
456        if arg in kwonlydefaults:
457            spec.append(arg + '=' + sanitize_value(kwonlydefaults[arg]))
458        else:
459            spec.append(arg)
460
461    if kwarg:
462        spec.append("**" + kwarg)
463    return flatten(spec)
464
465def restore_parameters_for_overloads(parameter_lists):
466    param_index = 0
467    star_args = False
468    optional = False
469    params = []
470    while True:
471        parameter_lists_copy = [pl for pl in parameter_lists]
472        for pl in parameter_lists_copy:
473            if param_index >= len(pl):
474                parameter_lists.remove(pl)
475                optional = True
476        if not parameter_lists:
477            break
478        name = parameter_lists[0][param_index]
479        for pl in parameter_lists[1:]:
480            if pl[param_index] != name:
481                star_args = True
482                break
483        if star_args: break
484        if optional and not '=' in name:
485            params.append(name + '=None')
486        else:
487            params.append(name)
488        param_index += 1
489    if star_args:
490        params.append("*__args")
491    return params
492
493def build_signature(p_name, params):
494    return p_name + '(' + ', '.join(params) + ')'
495
496
497def propose_first_param(deco):
498    """@return: name of missing first paramater, considering a decorator"""
499    if deco is None:
500        return "self"
501    if deco == "classmethod":
502        return "cls"
503        # if deco == "staticmethod":
504    return None
505
506def qualifier_of(cls, qualifiers_to_skip):
507    m = getattr(cls, "__module__", None)
508    if m in qualifiers_to_skip:
509        return ""
510    return m
511
512def handle_error_func(item_name, out):
513    exctype, value = sys.exc_info()[:2]
514    msg = "Error generating skeleton for function %s: %s"
515    args = item_name, value
516    report(msg, *args)
517    out(0, "# " + msg % args)
518    out(0, "")
519
520def format_accessors(accessor_line, getter, setter, deleter):
521    """Nicely format accessors, like 'getter, fdel=deleter'"""
522    ret = []
523    consecutive = True
524    for key, arg, par in (('r', 'fget', getter), ('w', 'fset', setter), ('d', 'fdel', deleter)):
525        if key in accessor_line:
526            if consecutive:
527                ret.append(par)
528            else:
529                ret.append(arg + "=" + par)
530        else:
531            consecutive = False
532    return ", ".join(ret)
533
534
535def has_regular_python_ext(file_name):
536    """Does name end with .py?"""
537    return file_name.endswith(".py")
538    # Note that the standard library on MacOS X 10.6 is shipped only as .pyc files, so we need to
539    # have them processed by the generator in order to have any code insight for the standard library.
540
541
542def detect_constructor(p_class):
543    # try to inspect the thing
544    constr = getattr(p_class, "__init__")
545    if constr and inspect and inspect.isfunction(constr):
546        args, _, _, _, kwonlyargs, _, _ = getfullargspec(constr)
547        return ", ".join(args + [a + '=' + a for a in kwonlyargs])
548    else:
549        return None
550
551##############  notes, actions #################################################################
552_is_verbose = False # controlled by -v
553
554CURRENT_ACTION = "nothing yet"
555
556def action(msg, *data):
557    global CURRENT_ACTION
558    CURRENT_ACTION = msg % data
559    note(msg, *data)
560
561
562def set_verbose(verbose):
563    global _is_verbose
564    _is_verbose = verbose
565
566
567def note(msg, *data):
568    """Say something at debug info level (stderr)"""
569    if _is_verbose:
570        sys.stderr.write(msg % data)
571        sys.stderr.write("\n")
572
573
574##############  plaform-specific methods    #######################################################
575import sys
576if sys.platform == 'cli':
577    #noinspection PyUnresolvedReferences
578    import clr
579
580# http://blogs.msdn.com/curth/archive/2009/03/29/an-ironpython-profiler.aspx
581def print_profile():
582    data = []
583    data.extend(clr.GetProfilerData())
584    data.sort(lambda x, y: -cmp(x.ExclusiveTime, y.ExclusiveTime))
585
586    for pd in data:
587        say('%s\t%d\t%d\t%d', pd.Name, pd.InclusiveTime, pd.ExclusiveTime, pd.Calls)
588
589def is_clr_type(clr_type):
590    if not clr_type: return False
591    try:
592        clr.GetClrType(clr_type)
593        return True
594    except TypeError:
595        return False
596
597def restore_clr(p_name, p_class):
598    """
599    Restore the function signature by the CLR type signature
600    :return (is_static, spec, sig_note)
601    """
602    clr_type = clr.GetClrType(p_class)
603    if p_name == '__new__':
604        methods = [c for c in clr_type.GetConstructors()]
605        if not methods:
606            return False, p_name + '(self, *args)', 'cannot find CLR constructor' # "self" is always first argument of any non-static method
607    else:
608        methods = [m for m in clr_type.GetMethods() if m.Name == p_name]
609        if not methods:
610            bases = p_class.__bases__
611            if len(bases) == 1 and p_name in dir(bases[0]):
612                # skip inherited methods
613                return False, None, None
614            return False, p_name + '(self, *args)', 'cannot find CLR method'
615            # "self" is always first argument of any non-static method
616
617    parameter_lists = []
618    for m in methods:
619        parameter_lists.append([p.Name for p in m.GetParameters()])
620    params = restore_parameters_for_overloads(parameter_lists)
621    is_static = False
622    if not methods[0].IsStatic:
623        params = ['self'] + params
624    else:
625        is_static = True
626    return is_static, build_signature(p_name, params), None
627
628
629def build_pkg_structure(base_dir, qname):
630    if not qname:
631        return base_dir
632
633    subdirname = base_dir
634    for part in qname.split("."):
635        subdirname = os.path.join(subdirname, part)
636        if not os.path.isdir(subdirname):
637            action("creating subdir %r", subdirname)
638            os.makedirs(subdirname)
639        init_py = os.path.join(subdirname, "__init__.py")
640        if os.path.isfile(subdirname + ".py"):
641            os.rename(subdirname + ".py", init_py)
642        elif not os.path.isfile(init_py):
643            fopen(init_py, "w").close()
644
645    return subdirname
646
647
648def is_valid_implicit_namespace_package_name(s):
649    """
650    Checks whether provided string could represent implicit namespace package name.
651    :param s: string to check
652    :return: True if provided string could represent implicit namespace package name and False otherwise
653    """
654    return isidentifier(s) and not keyword.iskeyword(s)
655
656
657def isidentifier(s):
658    """
659    Checks whether provided string complies Python identifier syntax requirements.
660    :param s: string to check
661    :return: True if provided string comply Python identifier syntax requirements and False otherwise
662    """
663    if version[0] >= 3:
664        return s.isidentifier()
665    else:
666        # quick test on provided string to comply major Python identifier syntax requirements
667        return (s and
668                not s[:1].isdigit() and
669                "-" not in s and
670                " " not in s)
671
672
673@contextmanager
674def ignored_os_errors(*errno):
675    try:
676        yield
677    # Since Python 3.3 IOError and OSError were merged into OSError
678    except EnvironmentError as e:
679        if e.errno not in errno:
680            raise
681
682
683def mkdir(path):
684    try:
685        os.makedirs(path)
686    except EnvironmentError as e:
687        if e.errno != errno.EEXIST or not os.path.isdir(path):
688            raise
689
690
691def copy(src, dst, merge=False, pre_copy_hook=None, conflict_handler=None, post_copy_hook=None):
692    if pre_copy_hook is None:
693        def pre_copy_hook(p1, p2):
694            return True
695
696    if conflict_handler is None:
697        def conflict_handler(p1, p2):
698            return False
699
700    if post_copy_hook is None:
701        def post_copy_hook(p1, p2):
702            pass
703
704    if not pre_copy_hook(src, dst):
705        return
706
707    # Note about shutil.copy vs shutil.copy2.
708    # There is an open CPython bug which breaks copy2 on NFS when it tries to copy the xattr.
709    # https://bugs.python.org/issue24564
710    # https://youtrack.jetbrains.com/issue/PY-37523
711    # However, in all our use cases, we do not care about the xattr,
712    # so just always use shutil.copy to avoid this problem.
713    if os.path.isdir(src):
714        if not merge:
715            if version[0] >= 3:
716                shutil.copytree(src, dst, copy_function=shutil.copy)
717            else:
718                shutil.copytree(src, dst)
719        else:
720            mkdir(dst)
721            for child in os.listdir(src):
722                child_src = os.path.join(src, child)
723                child_dst = os.path.join(dst, child)
724                try:
725                    copy(child_src, child_dst, merge=merge,
726                         pre_copy_hook=pre_copy_hook,
727                         conflict_handler=conflict_handler,
728                         post_copy_hook=post_copy_hook)
729                except OSError as e:
730                    if e.errno == errno.EEXIST and not (os.path.isdir(child_src) and os.path.isdir(child_dst)):
731                        if conflict_handler(child_src, child_dst):
732                            continue
733                    raise
734    else:
735        mkdir(os.path.dirname(dst))
736        shutil.copy(src, dst)
737    post_copy_hook(src, dst)
738
739
740def copy_skeletons(src_dir, dst_dir, new_origin=None):
741    def overwrite(src, dst):
742        delete(dst)
743        copy(src, dst)
744        return True
745
746    # Remove packages/modules with the same import name
747    def mod_pkg_cleanup(src, dst):
748        dst_dir = os.path.dirname(dst)
749        name, ext = os.path.splitext(os.path.basename(src))
750        if ext == '.py':
751            delete(os.path.join(dst_dir, name))
752        elif not ext:
753            delete(dst + '.py')
754
755    def override_origin_stamp(src, dst):
756        _, ext = os.path.splitext(dst)
757        if ext == '.py' and new_origin:
758            with fopen(dst, 'r') as f:
759                lines = f.readlines()
760                for i, line in enumerate(lines):
761                    if not line.startswith('#'):
762                        return
763
764                    m = SKELETON_HEADER_ORIGIN_LINE.match(line)
765                    if m:
766                        break
767                else:
768                    return
769            with fopen(dst, 'w') as f:
770                lines[i] = '# from ' + new_origin + '\n'
771                f.writelines(lines)
772
773    def post_copy_hook(src, dst):
774        override_origin_stamp(src, dst)
775        mod_pkg_cleanup(src, dst)
776
777    def ignore_failed_version_stamps(src, dst):
778        return not os.path.basename(src).startswith(FAILED_VERSION_STAMP_PREFIX)
779
780    copy(src_dir, dst_dir, merge=True,
781         pre_copy_hook=ignore_failed_version_stamps,
782         conflict_handler=overwrite,
783         post_copy_hook=post_copy_hook)
784
785
786def delete(path, content=False):
787    with ignored_os_errors(errno.ENOENT):
788        if os.path.isdir(path):
789            if not content:
790                shutil.rmtree(path)
791            else:
792                for child in os.listdir(path):
793                    delete(child)
794        else:
795            os.remove(path)
796
797
798def cached(func):
799    func._results = {}
800    unknown = object()
801
802    # noinspection PyProtectedMember
803    @functools.wraps(func)
804    def wrapper(*args):
805        result = func._results.get(args, unknown)
806        if result is unknown:
807            result = func._results[args] = func(*args)
808        return result
809
810    return wrapper
811
812
813def sha256_digest(binary_or_file):
814    # "bytes" type is available in Python 2.7
815    if isinstance(binary_or_file, bytes):
816        return hashlib.sha256(binary_or_file).hexdigest()
817    else:
818        acc = hashlib.sha256()
819        while True:
820            block = binary_or_file.read(BIN_READ_BLOCK)
821            if not block:
822                break
823            acc.update(block)
824        return acc.hexdigest()
825
826
827def get_relative_path_by_qname(abs_path, qname):
828    abs_path_components = os.path.split(abs_path)
829    qname_components_count = len(qname.split('.'))
830    if os.path.splitext(abs_path_components[-1])[0] == '__init__':
831        rel_path_components_count = qname_components_count + 1
832    else:
833        rel_path_components_count = qname_components_count
834    return os.path.join(*abs_path_components[-rel_path_components_count:])
835
836def is_text_file(path):
837    """
838    Verify that some path is a text file (not a binary file).
839    Ideally there should be usage of libmagic but it can be not
840    installed on a target machine.
841
842    Actually this algorithm is inspired by function `file_encoding`
843    from libmagic.
844    """
845    try:
846        with open(path, 'rb') as candidate_stream:
847            # Buffer size like in libmagic
848            buffer = candidate_stream.read(256 * 1024)
849    except EnvironmentError:
850        return False
851
852    # Verify that it looks like ASCII, UTF-8 or UTF-16.
853    for encoding in 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le':
854        try:
855            buffer.decode(encoding)
856        except UnicodeDecodeError as err:
857            if err.args[0].endswith(('truncated data', 'unexpected end of data')):
858                return True
859        else:
860            return True
861
862    # Verify that it looks like ISO-8859 or non-ISO extended ASCII.
863    return all(c not in _bytes_that_never_appears_in_text for c in buffer)
864
865
866_bytes_that_never_appears_in_text = set(range(7)) | {11} | set(range(14, 27)) | set(range(28, 32)) | {127}
867
868
869# This wrapper is intentionally made top-level: local functions can't be pickled.
870def _multiprocessing_wrapper(data, func, *args, **kwargs):
871    configure_logging(data.root_logger_level)
872    data.result_conn.send(func(*args, **kwargs))
873
874
875_MainProcessData = collections.namedtuple('_MainProcessData', ['result_conn', 'root_logger_level'])
876
877
878def execute_in_subprocess_synchronously(name, func, args, kwargs, failure_result=None):
879    import multiprocessing as mp
880
881    extra_process_kwargs = {}
882    if sys.version_info[0] >= 3:
883        extra_process_kwargs['daemon'] = True
884
885    # There is no need to use a full-blown queue for single producer/single consumer scenario.
886    # Also, Pipes don't suffer from issues such as https://bugs.python.org/issue35797.
887    # TODO experiment with a shared queue maintained by multiprocessing.Manager
888    #  (it will require an additional service process)
889    recv_conn, send_conn = mp.Pipe(duplex=False)
890    data = _MainProcessData(result_conn=send_conn,
891                            root_logger_level=logging.getLogger().level)
892    p = mp.Process(name=name,
893                   target=_multiprocessing_wrapper,
894                   args=(data, func) + args,
895                   kwargs=kwargs,
896                   **extra_process_kwargs)
897    p.start()
898    # This is actually against the multiprocessing guidelines
899    # https://docs.python.org/3/library/multiprocessing.html#programming-guidelines
900    # but allows us to fail-fast if the child process terminated abnormally with a segfault
901    # (otherwise we would have to wait by timeout on acquiring the result) and should work
902    # fine for small result values such as generation status.
903    p.join()
904    if recv_conn.poll():
905        return recv_conn.recv()
906    else:
907        return failure_result
908
909
910def configure_logging(root_level):
911    logging.addLevelName(logging.DEBUG - 1, 'TRACE')
912
913    root = logging.getLogger()
914    root.setLevel(root_level)
915
916    # In environments where fork is implemented entire logging configuration is already inherited by child processes.
917    # Configuring it twice will lead to duplicated records.
918
919    # Reset logger similarly to how it's done in logging.config
920    for h in root.handlers[:]:
921        root.removeHandler(h)
922
923    for f in root.filters[:]:
924        root.removeFilter(f)
925
926    class JsonFormatter(logging.Formatter):
927        def format(self, record):
928            s = super(JsonFormatter, self).format(record)
929            return json.dumps({
930                'type': 'log',
931                'level': record.levelname.lower(),
932                'message': s
933            })
934
935    handler = logging.StreamHandler(sys.stdout)
936    handler.setFormatter(JsonFormatter())
937    root.addHandler(handler)
938