1 /* $Id
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 */
27 #include <ncbi_pch.hpp>
28 #include <objects/genomecoll/cached_assembly.hpp>
29 #include <sstream>
30
31 BEGIN_NCBI_SCOPE
32 USING_SCOPE(objects);
33
CCachedAssembly(CRef<CGC_Assembly> assembly)34 CCachedAssembly::CCachedAssembly(CRef<CGC_Assembly> assembly)
35 : m_assembly(assembly)
36 {}
37
CCachedAssembly(const string & blob)38 CCachedAssembly::CCachedAssembly(const string& blob)
39 : m_blob(blob)
40 {}
41
CCachedAssembly(const vector<char> & blob)42 CCachedAssembly::CCachedAssembly(const vector<char>& blob)
43 : m_blob(blob.begin(), blob.end())
44 {}
45
46 static
UncomressAndCreate(const string & blob,CCompressStream::EMethod method)47 CRef<CGC_Assembly> UncomressAndCreate(const string& blob, CCompressStream::EMethod method) {
48 CStopWatch sw(CStopWatch::eStart);
49
50 CNcbiIstrstream in(blob);
51 CDecompressIStream decompress(in, method);
52
53 CRef<CGC_Assembly> m_assembly(new CGC_Assembly);
54 decompress >> MSerial_AsnBinary
55 >> MSerial_SkipUnknownMembers(eSerialSkipUnknown_Yes) // Make reading cache backward compatible
56 >> MSerial_SkipUnknownVariants(eSerialSkipUnknown_Yes)
57 >> (*m_assembly);
58
59 sw.Stop();
60 LOG_POST(Info << "Assembly uncomressed and created in (sec): " << sw.Elapsed());
61 GetDiagContext().Extra().Print("Create-assembly-from-blob-time", sw.Elapsed() * 1000) // need millisecond
62 .Print("compress-method", method)
63 .Print("blob-size", blob.size());
64 return m_assembly;
65 }
66
67 //static
68 //void Uncomress(const string& blob, CCompressStream::EMethod m) {
69 // CStopWatch g(CStopWatch::eStart);
70 //
71 // CNcbiIstrstream in(blob.data(), blob.size());
72 // CDecompressIStream lzip(in, m);
73 //
74 // size_t n = 1024*1024;
75 // char* buf = new char[n];
76 // while (!lzip.eof()) lzip.read(buf, n);
77 // delete [] buf;
78 //
79 // LOG_POST(Info << "processed: " << lzip.GetProcessedSize() << ", out: " << lzip.GetOutputSize());
80 // LOG_POST(Info << "Assebmly uncomressed in (sec): " << g.Elapsed());
81 //}
82
Compression(const string & blob)83 CCompressStream::EMethod CCachedAssembly::Compression(const string& blob)
84 {
85 if (!CCachedAssembly::ValidBlob(blob.size()))
86 NCBI_THROW(CCoreException, eCore, "Invalid blob size detected: " + blob.size());
87 const char bzip2Header[] = {0x42, 0x5a, 0x68};
88 const char zlibHeader[] = {0x78};
89 if (NStr::StartsWith(blob, CTempString(bzip2Header, sizeof(bzip2Header))))
90 return CCompressStream::eBZip2;
91 if (NStr::StartsWith(blob, CTempString(zlibHeader, sizeof(zlibHeader))))
92 return CCompressStream::eZip;
93 NCBI_THROW(CCoreException, eInvalidArg, "Cant determine compression method: " + blob.substr(0, 10));
94 }
95
Assembly()96 CRef<CGC_Assembly> CCachedAssembly::Assembly()
97 {
98 if (m_assembly.NotNull()) {
99 return m_assembly;
100 }
101
102 if (ValidBlob(m_blob.size())) {
103 m_assembly = UncomressAndCreate(m_blob, Compression(m_blob));
104 }
105 return m_assembly;
106 }
107
108 static
CompressAssembly(string & blob,CRef<CGC_Assembly> assembly,CCompressStream::EMethod method)109 void CompressAssembly(string& blob, CRef<CGC_Assembly> assembly, CCompressStream::EMethod method)
110 {
111 CStopWatch sw(CStopWatch::eStart);
112
113 LOG_POST(Info << "Creating blob with compression: " << method);
114
115 CNcbiOstrstream out;
116 CCompressOStream compress(out, method);
117
118 compress << MSerial_AsnBinary << (*assembly);
119 compress.Finalize();
120
121 blob = CNcbiOstrstreamToString(out);
122
123 sw.Stop();
124 GetDiagContext().Extra().Print("Compress-assembly-to-blob-time", sw.Elapsed() * 1000) // need millisecond
125 .Print("compress-method", method)
126 .Print("blob-size", blob.size());
127 }
128
Blob()129 const string& CCachedAssembly::Blob()
130 {
131 if (ValidBlob(m_blob.size()))
132 return m_blob;
133
134 if (m_assembly)
135 CompressAssembly(m_blob, m_assembly, CCompressStream::eZip);
136 else
137 m_blob.clear();
138
139 return m_blob;
140 }
141
ValidBlob(size_t blobSize)142 bool CCachedAssembly::ValidBlob(size_t blobSize)
143 {
144 const int kSmallestZip = 200; // No assembly, let alone a compressed one, will be smaller than this.
145 return blobSize >= kSmallestZip;
146 }
147
148 END_NCBI_SCOPE
149