1"""Consts and function to handle target format.
2ALL_SUPPORTED_FORMATS - list of supported formats
3get_decompress_function - returns stream decompress function for a current
4    format (specified or autodetected)
5get_compress_function - returns compress function for a current format
6    (specifed or default)
7"""
8from __future__ import absolute_import
9
10from .snappy import (
11    stream_compress, stream_decompress, check_format, UncompressError)
12from .hadoop_snappy import (
13    stream_compress as hadoop_stream_compress,
14    stream_decompress as hadoop_stream_decompress,
15    check_format as hadoop_check_format)
16
17
18FRAMING_FORMAT = 'framing'
19
20HADOOP_FORMAT = 'hadoop_snappy'
21
22# Means format auto detection.
23# For compression will be used framing format.
24# In case of decompression will try to detect a format from the input stream
25# header.
26FORMAT_AUTO = 'auto'
27
28DEFAULT_FORMAT = FORMAT_AUTO
29
30ALL_SUPPORTED_FORMATS = [FRAMING_FORMAT, HADOOP_FORMAT, FORMAT_AUTO]
31
32_COMPRESS_METHODS = {
33    FRAMING_FORMAT: stream_compress,
34    HADOOP_FORMAT: hadoop_stream_compress,
35}
36
37_DECOMPRESS_METHODS = {
38    FRAMING_FORMAT: stream_decompress,
39    HADOOP_FORMAT: hadoop_stream_decompress,
40}
41
42# We will use framing format as the default to compression.
43# And for decompression, if it's not defined explicitly, we will try to
44# guess the format from the file header.
45_DEFAULT_COMPRESS_FORMAT = FRAMING_FORMAT
46
47# The tuple contains an ordered sequence of a format checking function and
48# a format-specific decompression function.
49# Framing format has it's header, that may be recognized.
50# Hadoop snappy format hasn't any special headers, it contains only
51# uncompressed block length integer and length of compressed subblock.
52# So we first check framing format and if it is not the case, then
53# check for snappy format.
54_DECOMPRESS_FORMAT_FUNCS = (
55    (check_format, stream_decompress),
56    (hadoop_check_format, hadoop_stream_decompress),
57)
58
59
60def guess_format_by_header(fin):
61    """Tries to guess a compression format for the given input file by it's
62    header.
63    :return: tuple of decompression method and a chunk that was taken from the
64        input for format detection.
65    """
66    chunk = None
67    for check_method, decompress_func in _DECOMPRESS_FORMAT_FUNCS:
68        ok, chunk = check_method(fin=fin, chunk=chunk)
69        if not ok:
70            continue
71        return decompress_func, chunk
72    raise UncompressError("Can't detect archive format")
73
74
75def get_decompress_function(specified_format, fin):
76    if specified_format == FORMAT_AUTO:
77        decompress_func, read_chunk = guess_format_by_header(fin)
78        return decompress_func, read_chunk
79    return _DECOMPRESS_METHODS[specified_format], None
80
81
82def get_compress_function(specified_format):
83    if specified_format == FORMAT_AUTO:
84        return _COMPRESS_METHODS[_DEFAULT_COMPRESS_FORMAT]
85    return _COMPRESS_METHODS[specified_format]
86