1 /* $Id
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  */
27 #include <ncbi_pch.hpp>
28 #include <objects/genomecoll/cached_assembly.hpp>
29 #include <sstream>
30 
31 BEGIN_NCBI_SCOPE
32 USING_SCOPE(objects);
33 
CCachedAssembly(CRef<CGC_Assembly> assembly)34 CCachedAssembly::CCachedAssembly(CRef<CGC_Assembly> assembly)
35         : m_assembly(assembly)
36 {}
37 
CCachedAssembly(const string & blob)38 CCachedAssembly::CCachedAssembly(const string& blob)
39         : m_blob(blob)
40 {}
41 
CCachedAssembly(const vector<char> & blob)42 CCachedAssembly::CCachedAssembly(const vector<char>& blob)
43         : m_blob(blob.begin(), blob.end())
44 {}
45 
46 static
UncomressAndCreate(const string & blob,CCompressStream::EMethod method)47 CRef<CGC_Assembly> UncomressAndCreate(const string& blob, CCompressStream::EMethod method) {
48     CStopWatch sw(CStopWatch::eStart);
49 
50     CNcbiIstrstream in(blob);
51     CDecompressIStream decompress(in, method);
52 
53     CRef<CGC_Assembly> m_assembly(new CGC_Assembly);
54     decompress >> MSerial_AsnBinary
55                 >> MSerial_SkipUnknownMembers(eSerialSkipUnknown_Yes)   // Make reading cache backward compatible
56                 >> MSerial_SkipUnknownVariants(eSerialSkipUnknown_Yes)
57                 >> (*m_assembly);
58 
59     sw.Stop();
60     LOG_POST(Info << "Assembly uncomressed and created in (sec): " << sw.Elapsed());
61     GetDiagContext().Extra().Print("Create-assembly-from-blob-time", sw.Elapsed() * 1000) // need millisecond
62                             .Print("compress-method", method)
63                             .Print("blob-size", blob.size());
64     return m_assembly;
65 }
66 
67 //static
68 //void Uncomress(const string& blob, CCompressStream::EMethod m) {
69 //    CStopWatch g(CStopWatch::eStart);
70 //
71 //    CNcbiIstrstream in(blob.data(), blob.size());
72 //    CDecompressIStream lzip(in, m);
73 //
74 //    size_t n = 1024*1024;
75 //    char* buf = new char[n];
76 //    while (!lzip.eof()) lzip.read(buf, n);
77 //    delete [] buf;
78 //
79 //    LOG_POST(Info << "processed: " << lzip.GetProcessedSize() << ", out: " << lzip.GetOutputSize());
80 //    LOG_POST(Info << "Assebmly uncomressed in (sec): " << g.Elapsed());
81 //}
82 
Compression(const string & blob)83 CCompressStream::EMethod CCachedAssembly::Compression(const string& blob)
84 {
85     if (!CCachedAssembly::ValidBlob(blob.size()))
86         NCBI_THROW(CCoreException, eCore, "Invalid blob size detected: " + blob.size());
87     const char bzip2Header[] = {0x42, 0x5a, 0x68};
88     const char zlibHeader[] = {0x78};
89     if (NStr::StartsWith(blob, CTempString(bzip2Header, sizeof(bzip2Header))))
90         return CCompressStream::eBZip2;
91     if (NStr::StartsWith(blob, CTempString(zlibHeader, sizeof(zlibHeader))))
92         return CCompressStream::eZip;
93     NCBI_THROW(CCoreException, eInvalidArg, "Cant determine compression method: " + blob.substr(0, 10));
94 }
95 
Assembly()96 CRef<CGC_Assembly> CCachedAssembly::Assembly()
97 {
98     if (m_assembly.NotNull()) {
99         return m_assembly;
100     }
101 
102     if (ValidBlob(m_blob.size())) {
103         m_assembly = UncomressAndCreate(m_blob, Compression(m_blob));
104     }
105     return m_assembly;
106 }
107 
108 static
CompressAssembly(string & blob,CRef<CGC_Assembly> assembly,CCompressStream::EMethod method)109 void CompressAssembly(string& blob, CRef<CGC_Assembly> assembly, CCompressStream::EMethod method)
110 {
111     CStopWatch sw(CStopWatch::eStart);
112 
113     LOG_POST(Info << "Creating blob with compression: " << method);
114 
115     CNcbiOstrstream out;
116     CCompressOStream compress(out, method);
117 
118     compress << MSerial_AsnBinary << (*assembly);
119     compress.Finalize();
120 
121     blob = CNcbiOstrstreamToString(out);
122 
123     sw.Stop();
124     GetDiagContext().Extra().Print("Compress-assembly-to-blob-time", sw.Elapsed() * 1000) // need millisecond
125                             .Print("compress-method", method)
126                             .Print("blob-size", blob.size());
127 }
128 
Blob()129 const string& CCachedAssembly::Blob()
130 {
131     if (ValidBlob(m_blob.size()))
132         return m_blob;
133 
134     if (m_assembly)
135         CompressAssembly(m_blob, m_assembly, CCompressStream::eZip);
136     else
137         m_blob.clear();
138 
139     return m_blob;
140 }
141 
ValidBlob(size_t blobSize)142 bool CCachedAssembly::ValidBlob(size_t blobSize)
143 {
144     const int kSmallestZip = 200; // No assembly, let alone a compressed one, will be smaller than this.
145     return blobSize >= kSmallestZip;
146 }
147 
148 END_NCBI_SCOPE
149