1 #ifndef PACK_STRING__HPP_INCLUDED
2 #define PACK_STRING__HPP_INCLUDED
3
4 /* $Id: pack_string.hpp 151707 2009-02-06 16:05:12Z ucko $
5 * ===========================================================================
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 * ===========================================================================
26 *
27 * Author: Eugene Vasilchenko
28 *
29 * File Description: Data reader from Pubseq_OS
30 *
31 */
32
33 #include <serial/objhook.hpp>
34 #include <serial/impl/objecttype.hpp>
35 #include <serial/objistr.hpp>
36
37 #include <string>
38 #include <set>
39
40 BEGIN_NCBI_SCOPE
41
42 class NCBI_XSERIAL_EXPORT CPackString
43 {
44 public:
45 CPackString(void);
46 CPackString(size_t length_limit, size_t count_limit);
47 ~CPackString(void);
48
49 struct SNode {
SNodeCPackString::SNode50 SNode(const string& s)
51 : m_Length(s.size()),
52 m_Chars(s.data()),
53 m_CompressedIn(0)
54 {
55 }
SNodeCPackString::SNode56 SNode(const SNode& n)
57 : m_Length(n.m_Length),
58 m_Chars(n.m_Chars),
59 m_CompressedIn(0)
60 {
61 }
SNodeCPackString::SNode62 SNode(const char* str, size_t len)
63 : m_Length(len),
64 m_Chars(str),
65 m_CompressedIn(0)
66 {
67 }
68
x_CompareCPackString::SNode69 int x_Compare(const char* ptr) const
70 {
71 return memcmp(m_Chars, ptr, m_Length);
72 }
73
operator <CPackString::SNode74 bool operator<(const SNode& n) const
75 {
76 return m_Length < n.m_Length ||
77 (m_Length == n.m_Length && x_Compare(n.m_Chars) < 0);
78 }
operator ==CPackString::SNode79 bool operator==(const SNode& n) const
80 {
81 return m_Length == n.m_Length && x_Compare(n.m_Chars) == 0;
82 }
83
84 void AssignTo(string& s) const;
85
86 void SetString(const string& s) const;
87 void SetString(void) const;
88
GetStringCPackString::SNode89 const string& GetString(void) const
90 {
91 return m_String;
92 }
GetCountCPackString::SNode93 size_t GetCount(void) const
94 {
95 return m_CompressedIn;
96 }
97
98 private:
99 SNode& operator=(const SNode&);
100
101 size_t m_Length;
102 const char* m_Chars;
103 string m_String;
104 mutable size_t m_CompressedIn;
105 };
106
107 typedef SNode TKey;
108 typedef set<TKey> TStrings;
109 typedef TStrings::iterator iterator;
110
111 void ReadString(CObjectIStream& in, string& s);
112
113 // return true if src was updated
114 static bool Assign(string& s, const string& src);
115
116 size_t GetLengthLimit(void) const;
117 size_t GetCountLimit(void) const;
118 size_t GetCount(void) const;
119
120 // return true if the string is new in cache
121 bool Pack(string& s);
122 bool Pack(string& s, const char* data, size_t size);
123
124 pair<iterator, bool> Locate(const char* data, size_t size);
125 void AddOld(string& s, const iterator& iter);
126 bool AddNew(string& s, const char* data, size_t size, iterator iter);
127 void Skipped(void);
128
129 static bool s_GetEnvFlag(const char* env, bool def_val);
130
131 static bool TryStringPack(void);
132
133 CNcbiOstream& DumpStatistics(CNcbiOstream& out) const;
134
135 private:
136 CPackString(const CPackString&);
137 CPackString& operator=(const CPackString&);
138
139 static void x_RefCounterError(void);
140 // return true if src was updated
141 static bool x_Assign(string& s, const string& src);
142
143 size_t m_LengthLimit;
144 size_t m_CountLimit;
145 size_t m_Skipped;
146 size_t m_CompressedIn;
147 size_t m_CompressedOut;
148 set<SNode> m_Strings;
149 };
150
151
152 class NCBI_XSERIAL_EXPORT CPackStringClassHook : public CReadClassMemberHook
153 {
154 public:
155 CPackStringClassHook(void);
156 CPackStringClassHook(size_t length_limit, size_t count_limit);
157 ~CPackStringClassHook(void);
158
159 void ReadClassMember(CObjectIStream& in, const CObjectInfoMI& member);
160
161 private:
162 CPackString m_PackString;
163 };
164
165
166 class NCBI_XSERIAL_EXPORT CPackStringChoiceHook : public CReadChoiceVariantHook
167 {
168 public:
169 CPackStringChoiceHook(void);
170 CPackStringChoiceHook(size_t length_limit, size_t count_limit);
171 ~CPackStringChoiceHook(void);
172
173 void ReadChoiceVariant(CObjectIStream& in, const CObjectInfoCV& variant);
174
175 private:
176 CPackString m_PackString;
177 };
178
179
180 /////////////////////////////////////////////////////////////////////////////
181 // CPackString
182 /////////////////////////////////////////////////////////////////////////////
183
184 inline
GetLengthLimit(void) const185 size_t CPackString::GetLengthLimit(void) const
186 {
187 return m_LengthLimit;
188 }
189
190
191 inline
GetCountLimit(void) const192 size_t CPackString::GetCountLimit(void) const
193 {
194 return m_CountLimit;
195 }
196
197
198 inline
GetCount(void) const199 size_t CPackString::GetCount(void) const
200 {
201 return m_CompressedOut;
202 }
203
204
205 inline
Assign(string & s,const string & src)206 bool CPackString::Assign(string& s, const string& src)
207 {
208 s = src;
209 if ( s.data() != src.data() ) {
210 return x_Assign(s, src);
211 }
212 else {
213 return false;
214 }
215 }
216
217
218 inline
AssignTo(string & s) const219 void CPackString::SNode::AssignTo(string& s) const
220 {
221 ++m_CompressedIn;
222 if ( CPackString::Assign(s, m_String) ) {
223 const_cast<SNode*>(this)->m_Chars = m_String.data();
224 }
225 }
226
227
228 inline
SetString(const string & s) const229 void CPackString::SNode::SetString(const string& s) const
230 {
231 _ASSERT(m_String.empty());
232 _ASSERT(s.size() == m_Length && x_Compare(s.data()) == 0);
233 const_cast<SNode*>(this)->m_String = s;
234 const_cast<SNode*>(this)->m_Chars = m_String.data();
235 }
236
237
238 inline
SetString(void) const239 void CPackString::SNode::SetString(void) const
240 {
241 _ASSERT(m_String.empty());
242 const_cast<SNode*>(this)->m_String.assign(m_Chars, m_Length);
243 const_cast<SNode*>(this)->m_Chars = m_String.data();
244 }
245
246
247 inline
ReadString(CObjectIStream & in,string & s)248 void CPackString::ReadString(CObjectIStream& in, string& s)
249 {
250 in.ReadPackedString(s, *this);
251 }
252
253
254 inline
255 pair<CPackString::iterator, bool>
Locate(const char * data,size_t size)256 CPackString::Locate(const char* data, size_t size)
257 {
258 pair<iterator, bool> ret;
259 _ASSERT(size <= GetLengthLimit());
260 SNode key(data, size);
261 ret.first = m_Strings.lower_bound(key);
262 ret.second = ret.first != m_Strings.end() && *ret.first == key;
263 return ret;
264 }
265
266
267 inline
AddOld(string & s,const iterator & iter)268 void CPackString::AddOld(string& s, const iterator& iter)
269 {
270 ++m_CompressedIn;
271 iter->AssignTo(s);
272 }
273
274
275 inline
Skipped(void)276 void CPackString::Skipped(void)
277 {
278 ++m_Skipped;
279 }
280
281
282 inline
operator <<(CNcbiOstream & out,const CPackString & pack_string)283 CNcbiOstream& operator<<(CNcbiOstream& out, const CPackString& pack_string)
284 {
285 return pack_string.DumpStatistics(out);
286 }
287
288
289 inline
ReadClassMember(CObjectIStream & in,const CObjectInfoMI & member)290 void CPackStringClassHook::ReadClassMember(CObjectIStream& in,
291 const CObjectInfoMI& member)
292 {
293 m_PackString.ReadString(in, *CType<string>::GetUnchecked(*member));
294 }
295
296
297 inline
ReadChoiceVariant(CObjectIStream & in,const CObjectInfoCV & variant)298 void CPackStringChoiceHook::ReadChoiceVariant(CObjectIStream& in,
299 const CObjectInfoCV& variant)
300 {
301 m_PackString.ReadString(in, *CType<string>::GetUnchecked(*variant));
302 }
303
304
305 END_NCBI_SCOPE
306
307 #endif // PACK_STRING__HPP_INCLUDED
308