1 #ifndef NCBI_OBJMGR_SPLIT_OBJECT_SPLITINFO__HPP
2 #define NCBI_OBJMGR_SPLIT_OBJECT_SPLITINFO__HPP
3 
4 /*  $Id: object_splitinfo.hpp 594873 2019-10-10 16:05:19Z vasilche $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's official duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author:  Eugene Vasilchenko
30 *
31 * File Description:
32 *   Application for splitting blobs withing ID1 cache
33 *
34 * ===========================================================================
35 */
36 
37 
38 #include <corelib/ncbistd.hpp>
39 #include <corelib/ncbiobj.hpp>
40 
41 #include <objects/seq/Seq_annot.hpp>
42 #include <objects/seq/Seq_inst.hpp>
43 #include <objects/seq/Seq_data.hpp>
44 #include <objects/seq/Seq_descr.hpp>
45 #include <objects/seq/Bioseq.hpp>
46 #include <objects/seq/Seq_hist.hpp>
47 #include <objects/seqalign/Seq_align.hpp>
48 
49 #include <objmgr/annot_name.hpp>
50 #include <objects/seq/seq_id_handle.hpp>
51 
52 #include <memory>
53 #include <map>
54 #include <vector>
55 
56 #include <objmgr/split/id_range.hpp>
57 #include <objmgr/split/size.hpp>
58 #include <objmgr/split/place_id.hpp>
59 
60 BEGIN_NCBI_SCOPE
61 
62 class CObjectOStream;
63 
64 BEGIN_SCOPE(objects)
65 
66 class CSeq_entry;
67 class CBioseq;
68 class CBioseq_set;
69 class CSeq_annot;
70 class CSeq_feat;
71 class CSeq_align;
72 class CSeq_graph;
73 class CSeq_data;
74 class CSeq_inst;
75 class CSeq_descr;
76 class CID2S_Split_Info;
77 class CID2S_Chunk_Id;
78 class CID2S_Chunk;
79 class CBlobSplitter;
80 class CBlobSplitterImpl;
81 struct SSplitterParams;
82 
83 
84 enum EAnnotPriority
85 {
86     eAnnotPriority_skeleton = 0,
87     eAnnotPriority_landmark,
88     eAnnotPriority_regular,
89     eAnnotPriority_low,
90     eAnnotPriority_lowest,
91     eAnnotPriority_zoomed,
92     eAnnotPriority_max = kMax_Int
93 };
94 typedef unsigned TAnnotPriority;
95 
96 
97 class CAnnotObject_SplitInfo
98 {
99 public:
CAnnotObject_SplitInfo(void)100     CAnnotObject_SplitInfo(void)
101         : m_ObjectType(0)
102         {
103         }
104     CAnnotObject_SplitInfo(const CSeq_feat& obj,
105                            const CBlobSplitterImpl& impl,
106                            CSize::TSizeRatio ratio);
107     CAnnotObject_SplitInfo(const CSeq_align& obj,
108                            const CBlobSplitterImpl& impl,
109                            CSize::TSizeRatio ratio);
110     CAnnotObject_SplitInfo(const CSeq_graph& obj,
111                            const CBlobSplitterImpl& impl,
112                            CSize::TSizeRatio ratio);
113     CAnnotObject_SplitInfo(const CSeq_table& obj,
114                            const CBlobSplitterImpl& impl,
115                            CSize::TSizeRatio ratio);
116 
117     TAnnotPriority GetPriority(void) const;
118     TAnnotPriority CalcPriority(void) const;
119 
120     int Compare(const CAnnotObject_SplitInfo& other) const;
121 
122     int         m_ObjectType;
123     CConstRef<CObject> m_Object;
124 
125     TAnnotPriority m_Priority;
126 
127     CSize       m_Size;
128     CSeqsRange  m_Location;
129 };
130 
131 
132 class CLocObjects_SplitInfo : public CObject
133 {
134 public:
135     typedef vector<CAnnotObject_SplitInfo> TObjects;
136     typedef TObjects::const_iterator const_iterator;
137 
138     void Add(const CAnnotObject_SplitInfo& obj);
139     CNcbiOstream& Print(CNcbiOstream& out) const;
140 
empty(void) const141     bool empty(void) const
142         {
143             return m_Objects.empty();
144         }
size(void) const145     size_t size(void) const
146         {
147             return m_Objects.size();
148         }
clear(void)149     void clear(void)
150         {
151             m_Objects.clear();
152             m_Size.clear();
153             m_Location.clear();
154         }
begin(void) const155     const_iterator begin(void) const
156         {
157             return m_Objects.begin();
158         }
end(void) const159     const_iterator end(void) const
160         {
161             return m_Objects.end();
162         }
163 
164     TObjects    m_Objects;
165 
166     CSize       m_Size;
167     CSeqsRange  m_Location;
168 };
169 
170 
171 inline
operator <<(CNcbiOstream & out,const CLocObjects_SplitInfo & info)172 CNcbiOstream& operator<<(CNcbiOstream& out, const CLocObjects_SplitInfo& info)
173 {
174     return info.Print(out);
175 }
176 
177 
178 class CSeq_annot_SplitInfo : public CObject
179 {
180 public:
181     typedef vector< CRef<CLocObjects_SplitInfo> > TObjects;
182 
183     CSeq_annot_SplitInfo(void);
184 
185     void SetSeq_annot(const CSeq_annot& annot,
186                       const SSplitterParams& params,
187                       const CBlobSplitterImpl& impl);
188     void Add(const CAnnotObject_SplitInfo& obj);
189 
190     CNcbiOstream& Print(CNcbiOstream& out) const;
191 
192     static CAnnotName GetName(const CSeq_annot& annot);
193     static size_t CountAnnotObjects(const CSeq_annot& annot);
194 
195     TAnnotPriority GetPriority(void) const;
196     TAnnotPriority GetPriority(const CAnnotObject_SplitInfo& obj) const;
197 
198     int Compare(const CSeq_annot_SplitInfo& other) const;
199 
200     CConstRef<CSeq_annot> m_Src_annot;
201     CAnnotName      m_Name;
202 
203     TAnnotPriority  m_TopPriority;
204     TAnnotPriority  m_NamePriority;
205     TObjects        m_Objects;
206 
207     CSize           m_Size;
208     CSeqsRange      m_Location;
209 };
210 
211 
212 inline
operator <<(CNcbiOstream & out,const CSeq_annot_SplitInfo & info)213 CNcbiOstream& operator<<(CNcbiOstream& out, const CSeq_annot_SplitInfo& info)
214 {
215     return info.Print(out);
216 }
217 
218 
219 class CSeq_descr_SplitInfo : public CObject
220 {
221 public:
222     CSeq_descr_SplitInfo(const CPlaceId& place_id,
223                          TSeqPos seq_length,
224                          const CSeq_descr& descr,
225                          const SSplitterParams& params);
226 
227     TAnnotPriority GetPriority(void) const;
228 
229     int Compare(const CSeq_descr_SplitInfo& other) const;
230 
231     CConstRef<CSeq_descr> m_Descr;
232 
233     TAnnotPriority m_Priority;
234 
235     CSize       m_Size;
236     CSeqsRange  m_Location;
237     mutable vector< CRef<CSeq_descr_SplitInfo> > m_SubPieces;
238 };
239 
240 
241 class CSeq_hist_SplitInfo : public CObject
242 {
243 public:
244     CSeq_hist_SplitInfo(const CPlaceId& place_id,
245                         const CSeq_hist& hist,
246                         const SSplitterParams& params);
247     CSeq_hist_SplitInfo(const CPlaceId& place_id,
248                         const CSeq_align& align,
249                         const SSplitterParams& params);
250 
251     TAnnotPriority GetPriority(void) const;
252 
253     typedef CSeq_hist::TAssembly TAssembly;
254 
255     TAssembly      m_Assembly;
256     TAnnotPriority m_Priority;
257     CSize          m_Size;
258     CSeqsRange     m_Location;
259 };
260 
261 
262 class CSeq_data_SplitInfo : public CObject
263 {
264 public:
265     typedef CRange<TSeqPos> TRange;
266     void SetSeq_data(const CPlaceId& place_id, const TRange& range,
267                      TSeqPos seq_length,
268                      const CSeq_data& data,
269                      const SSplitterParams& params);
270 
271     TAnnotPriority GetPriority(void) const;
272 
273     TRange GetRange(void) const;
274 
275     CConstRef<CSeq_data> m_Data;
276 
277     TAnnotPriority m_Priority;
278 
279     CSize       m_Size;
280     CSeqsRange  m_Location;
281 };
282 
283 
284 class CSeq_inst_SplitInfo : public CObject
285 {
286 public:
287     typedef vector<CSeq_data_SplitInfo> TSeq_data;
288 
289     void Add(const CSeq_data_SplitInfo& data);
290 
291     CConstRef<CSeq_inst> m_Seq_inst;
292 
293     TSeq_data m_Seq_data;
294 };
295 
296 
297 class CBioseq_SplitInfo : public CObject
298 {
299 public:
300     CBioseq_SplitInfo(const CBioseq& bioseq, const SSplitterParams& params);
301 
302     bool CanSplit(void) const;
303     TAnnotPriority GetPriority(void) const;
304 
305     CConstRef<CBioseq> m_Bioseq;
306 
307     TAnnotPriority m_Priority;
308 
309     CSize              m_Size;
310     CSeqsRange         m_Location;
311 };
312 
313 
314 class CPlace_SplitInfo
315 {
316 public:
317     typedef map<CConstRef<CSeq_annot>, CSeq_annot_SplitInfo> TSeq_annots;
318     typedef vector<CBioseq_SplitInfo> TBioseqs;
319 
320     CPlace_SplitInfo(void);
321     ~CPlace_SplitInfo(void);
322 
323     CRef<CBioseq> m_Bioseq;
324     CRef<CBioseq_set> m_Bioseq_set;
325 
326     CPlaceId                    m_PlaceId;
327     CRef<CSeq_descr_SplitInfo>  m_Descr;
328     TSeq_annots                 m_Annots;
329     CRef<CSeq_inst_SplitInfo>   m_Inst;
330     CRef<CSeq_hist_SplitInfo>   m_Hist;
331     TBioseqs                    m_Bioseqs;
332 };
333 
334 
335 END_SCOPE(objects)
336 END_NCBI_SCOPE
337 
338 #endif//NCBI_OBJMGR_SPLIT_OBJECT_SPLITINFO__HPP
339