1 /*  $Id: blob_splitter_params.cpp 200566 2010-08-10 16:32:31Z vasilche $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Eugene Vasilchenko
27 *
28 * File Description:
29 *   Application for splitting blobs withing ID1 cache
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <objmgr/split/blob_splitter_params.hpp>
36 #include <objmgr/split/id2_compress.hpp>
37 #include <objmgr/split/split_exceptions.hpp>
38 #include <util/compress/zlib.hpp>
39 
40 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)41 BEGIN_SCOPE(objects)
42 
43 
44 #define DISABLE_SPLIT_DESCRIPTIONS false
45 #define DISABLE_SPLIT_ASSEMBLY     true
46 
47 
48 SSplitterParams::SSplitterParams(void)
49     : m_MinChunkCount(kDefaultMinChunkCount),
50       m_Compression(eCompression_none),
51       m_Verbose(0),
52       m_DisableSplitDescriptions(DISABLE_SPLIT_DESCRIPTIONS),
53       m_DisableSplitSequence(false),
54       m_DisableSplitAnnotations(false),
55       m_DisableSplitAssembly(DISABLE_SPLIT_ASSEMBLY),
56       m_JoinSmallChunks(false),
57       m_SplitWholeBioseqs(true),
58       m_SplitNonFeatureSeqTables(kDefaultSplitNonFeatureSeqTables)
59 {
60     SetChunkSize(kDefaultChunkSize);
61 }
62 
63 
SetChunkSize(size_t size)64 void SSplitterParams::SetChunkSize(size_t size)
65 {
66     m_ChunkSize = size;
67     m_MinChunkSize = size_t(double(size) * 0.8);
68     m_MaxChunkSize = size_t(double(size) * 1.2);
69 }
70 
71 
72 static const size_t kChunkSize = 32*1024;
73 
74 
Compress(const SSplitterParams & params,list<vector<char> * > & dst,const char * data,size_t size)75 void CId2Compressor::Compress(const SSplitterParams& params,
76                               list<vector<char>*>& dst,
77                               const char* data, size_t size)
78 {
79     vector<char>* vec;
80     dst.push_back(vec = new vector<char>);
81     CompressHeader(params, *vec, size);
82     while ( size ) {
83         size_t chunk_size = min(size, kChunkSize);
84         CompressChunk(params, *vec, data, chunk_size);
85         data += chunk_size;
86         size -= chunk_size;
87         if ( size ) { // another vector<char> for next chunk
88             dst.push_back(vec = new vector<char>);
89         }
90     }
91     CompressFooter(params, *vec, size);
92 }
93 
94 
Compress(const SSplitterParams & params,vector<char> & dst,const char * data,size_t size)95 void CId2Compressor::Compress(const SSplitterParams& params,
96                               vector<char>& dst,
97                               const char* data, size_t size)
98 {
99     CompressHeader(params, dst, size);
100     CompressChunk(params, dst, data, size);
101     CompressFooter(params, dst, size);
102 }
103 
104 
CompressChunk(const SSplitterParams & params,vector<char> & dst,const char * data,size_t size)105 void CId2Compressor::CompressChunk(const SSplitterParams& params,
106                                    vector<char>& dst,
107                                    const char* data, size_t size)
108 {
109     switch ( params.m_Compression ) {
110     case SSplitterParams::eCompression_none:
111         sx_Append(dst, data, size);
112         break;
113     case SSplitterParams::eCompression_nlm_zip:
114     {{
115         size_t pos = dst.size();
116         CZipCompression compr(CCompression::eLevel_Default);
117         dst.resize(pos + 32 + size_t(double(size)*1.01));
118         size_t real_size = 0;
119         if ( !compr.CompressBuffer(data, size,
120                                    &dst[pos+8], dst.size()-(pos+8),
121                                    &real_size) ) {
122             NCBI_THROW(CSplitException, eCompressionError,
123                        "zip compression failed");
124         }
125         for ( size_t i = 0, s = real_size; i < 4; ++i, s <<= 8 ) {
126             dst[pos+i] = char(s >> 24);
127         }
128         for ( size_t i = 0, s = size; i < 4; ++i, s <<= 8 ) {
129             dst[pos+4+i] = char(s >> 24);
130         }
131         dst.resize(pos+8+real_size);
132         break;
133     }}
134     default:
135         NCBI_THROW(CSplitException, eNotImplemented,
136                    "compression method is not implemented");
137     }
138 }
139 
140 
CompressHeader(const SSplitterParams & params,vector<char> & dst,size_t)141 void CId2Compressor::CompressHeader(const SSplitterParams& params,
142                                     vector<char>& dst,
143                                     size_t)
144 {
145     switch ( params.m_Compression ) {
146     case SSplitterParams::eCompression_none:
147         break;
148     case SSplitterParams::eCompression_nlm_zip:
149         sx_Append(dst, "ZIP", 4);
150         break;
151     default:
152         NCBI_THROW(CSplitException, eNotImplemented,
153                    "compression method is not implemented");
154     }
155 }
156 
157 
CompressFooter(const SSplitterParams &,vector<char> &,size_t)158 void CId2Compressor::CompressFooter(const SSplitterParams& ,
159                                     vector<char>& ,
160                                     size_t)
161 {
162 }
163 
164 
sx_Append(vector<char> & dst,const char * data,size_t size)165 void CId2Compressor::sx_Append(vector<char>& dst,
166                                const char* data, size_t size)
167 {
168     size_t pos = dst.size();
169     dst.resize(pos + size);
170     memcpy(&dst[pos], data, size);
171 }
172 
173 
174 END_SCOPE(objects)
175 END_NCBI_SCOPE
176