1 #ifndef PACK_STRING__HPP_INCLUDED
2 #define PACK_STRING__HPP_INCLUDED
3 
4 /*  $Id: pack_string.hpp 151707 2009-02-06 16:05:12Z ucko $
5 * ===========================================================================
6 *                            PUBLIC DOMAIN NOTICE
7 *               National Center for Biotechnology Information
8 *
9 *  This software/database is a "United States Government Work" under the
10 *  terms of the United States Copyright Act.  It was written as part of
11 *  the author's official duties as a United States Government employee and
12 *  thus cannot be copyrighted.  This software/database is freely available
13 *  to the public for use. The National Library of Medicine and the U.S.
14 *  Government have not placed any restriction on its use or reproduction.
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 *  Please cite the author in any work or product based on this material.
25 * ===========================================================================
26 *
27 *  Author:  Eugene Vasilchenko
28 *
29 *  File Description: Data reader from Pubseq_OS
30 *
31 */
32 
33 #include <serial/objhook.hpp>
34 #include <serial/impl/objecttype.hpp>
35 #include <serial/objistr.hpp>
36 
37 #include <string>
38 #include <set>
39 
40 BEGIN_NCBI_SCOPE
41 
42 class NCBI_XSERIAL_EXPORT CPackString
43 {
44 public:
45     CPackString(void);
46     CPackString(size_t length_limit, size_t count_limit);
47     ~CPackString(void);
48 
49     struct SNode {
SNodeCPackString::SNode50         SNode(const string& s)
51             : m_Length(s.size()),
52               m_Chars(s.data()),
53               m_CompressedIn(0)
54             {
55             }
SNodeCPackString::SNode56         SNode(const SNode& n)
57             : m_Length(n.m_Length),
58               m_Chars(n.m_Chars),
59               m_CompressedIn(0)
60             {
61             }
SNodeCPackString::SNode62         SNode(const char* str, size_t len)
63             : m_Length(len),
64               m_Chars(str),
65               m_CompressedIn(0)
66             {
67             }
68 
x_CompareCPackString::SNode69         int x_Compare(const char* ptr) const
70             {
71                 return memcmp(m_Chars, ptr, m_Length);
72             }
73 
operator <CPackString::SNode74         bool operator<(const SNode& n) const
75             {
76                 return m_Length < n.m_Length ||
77                     (m_Length == n.m_Length && x_Compare(n.m_Chars) < 0);
78             }
operator ==CPackString::SNode79         bool operator==(const SNode& n) const
80             {
81                 return m_Length == n.m_Length && x_Compare(n.m_Chars) == 0;
82             }
83 
84         void AssignTo(string& s) const;
85 
86         void SetString(const string& s) const;
87         void SetString(void) const;
88 
GetStringCPackString::SNode89         const string& GetString(void) const
90             {
91                 return m_String;
92             }
GetCountCPackString::SNode93         size_t GetCount(void) const
94             {
95                 return m_CompressedIn;
96             }
97 
98     private:
99         SNode& operator=(const SNode&);
100 
101         size_t m_Length;
102         const char* m_Chars;
103         string m_String;
104         mutable size_t m_CompressedIn;
105     };
106 
107     typedef SNode TKey;
108     typedef set<TKey> TStrings;
109     typedef TStrings::iterator iterator;
110 
111     void ReadString(CObjectIStream& in, string& s);
112 
113     // return true if src was updated
114     static bool Assign(string& s, const string& src);
115 
116     size_t GetLengthLimit(void) const;
117     size_t GetCountLimit(void) const;
118     size_t GetCount(void) const;
119 
120     // return true if the string is new in cache
121     bool Pack(string& s);
122     bool Pack(string& s, const char* data, size_t size);
123 
124     pair<iterator, bool> Locate(const char* data, size_t size);
125     void AddOld(string& s, const iterator& iter);
126     bool AddNew(string& s, const char* data, size_t size, iterator iter);
127     void Skipped(void);
128 
129     static bool s_GetEnvFlag(const char* env, bool def_val);
130 
131     static bool TryStringPack(void);
132 
133     CNcbiOstream& DumpStatistics(CNcbiOstream& out) const;
134 
135 private:
136     CPackString(const CPackString&);
137     CPackString& operator=(const CPackString&);
138 
139     static void x_RefCounterError(void);
140     // return true if src was updated
141     static bool x_Assign(string& s, const string& src);
142 
143     size_t m_LengthLimit;
144     size_t m_CountLimit;
145     size_t m_Skipped;
146     size_t m_CompressedIn;
147     size_t m_CompressedOut;
148     set<SNode> m_Strings;
149 };
150 
151 
152 class NCBI_XSERIAL_EXPORT CPackStringClassHook : public CReadClassMemberHook
153 {
154 public:
155     CPackStringClassHook(void);
156     CPackStringClassHook(size_t length_limit, size_t count_limit);
157     ~CPackStringClassHook(void);
158 
159     void ReadClassMember(CObjectIStream& in, const CObjectInfoMI& member);
160 
161 private:
162     CPackString m_PackString;
163 };
164 
165 
166 class NCBI_XSERIAL_EXPORT CPackStringChoiceHook : public CReadChoiceVariantHook
167 {
168 public:
169     CPackStringChoiceHook(void);
170     CPackStringChoiceHook(size_t length_limit, size_t count_limit);
171     ~CPackStringChoiceHook(void);
172 
173     void ReadChoiceVariant(CObjectIStream& in, const CObjectInfoCV& variant);
174 
175 private:
176     CPackString m_PackString;
177 };
178 
179 
180 /////////////////////////////////////////////////////////////////////////////
181 // CPackString
182 /////////////////////////////////////////////////////////////////////////////
183 
184 inline
GetLengthLimit(void) const185 size_t CPackString::GetLengthLimit(void) const
186 {
187     return m_LengthLimit;
188 }
189 
190 
191 inline
GetCountLimit(void) const192 size_t CPackString::GetCountLimit(void) const
193 {
194     return m_CountLimit;
195 }
196 
197 
198 inline
GetCount(void) const199 size_t CPackString::GetCount(void) const
200 {
201     return m_CompressedOut;
202 }
203 
204 
205 inline
Assign(string & s,const string & src)206 bool CPackString::Assign(string& s, const string& src)
207 {
208     s = src;
209     if ( s.data() != src.data() ) {
210         return x_Assign(s, src);
211     }
212     else {
213         return false;
214     }
215 }
216 
217 
218 inline
AssignTo(string & s) const219 void CPackString::SNode::AssignTo(string& s) const
220 {
221     ++m_CompressedIn;
222     if ( CPackString::Assign(s, m_String) ) {
223         const_cast<SNode*>(this)->m_Chars = m_String.data();
224     }
225 }
226 
227 
228 inline
SetString(const string & s) const229 void CPackString::SNode::SetString(const string& s) const
230 {
231     _ASSERT(m_String.empty());
232     _ASSERT(s.size() == m_Length && x_Compare(s.data()) == 0);
233     const_cast<SNode*>(this)->m_String = s;
234     const_cast<SNode*>(this)->m_Chars = m_String.data();
235 }
236 
237 
238 inline
SetString(void) const239 void CPackString::SNode::SetString(void) const
240 {
241     _ASSERT(m_String.empty());
242     const_cast<SNode*>(this)->m_String.assign(m_Chars, m_Length);
243     const_cast<SNode*>(this)->m_Chars = m_String.data();
244 }
245 
246 
247 inline
ReadString(CObjectIStream & in,string & s)248 void CPackString::ReadString(CObjectIStream& in, string& s)
249 {
250     in.ReadPackedString(s, *this);
251 }
252 
253 
254 inline
255 pair<CPackString::iterator, bool>
Locate(const char * data,size_t size)256 CPackString::Locate(const char* data, size_t size)
257 {
258     pair<iterator, bool> ret;
259     _ASSERT(size <= GetLengthLimit());
260     SNode key(data, size);
261     ret.first = m_Strings.lower_bound(key);
262     ret.second = ret.first != m_Strings.end() && *ret.first == key;
263     return ret;
264 }
265 
266 
267 inline
AddOld(string & s,const iterator & iter)268 void CPackString::AddOld(string& s, const iterator& iter)
269 {
270     ++m_CompressedIn;
271     iter->AssignTo(s);
272 }
273 
274 
275 inline
Skipped(void)276 void CPackString::Skipped(void)
277 {
278     ++m_Skipped;
279 }
280 
281 
282 inline
operator <<(CNcbiOstream & out,const CPackString & pack_string)283 CNcbiOstream& operator<<(CNcbiOstream& out, const CPackString& pack_string)
284 {
285     return pack_string.DumpStatistics(out);
286 }
287 
288 
289 inline
ReadClassMember(CObjectIStream & in,const CObjectInfoMI & member)290 void CPackStringClassHook::ReadClassMember(CObjectIStream& in,
291                                            const CObjectInfoMI& member)
292 {
293     m_PackString.ReadString(in, *CType<string>::GetUnchecked(*member));
294 }
295 
296 
297 inline
ReadChoiceVariant(CObjectIStream & in,const CObjectInfoCV & variant)298 void CPackStringChoiceHook::ReadChoiceVariant(CObjectIStream& in,
299                                               const CObjectInfoCV& variant)
300 {
301     m_PackString.ReadString(in, *CType<string>::GetUnchecked(*variant));
302 }
303 
304 
305 END_NCBI_SCOPE
306 
307 #endif // PACK_STRING__HPP_INCLUDED
308