1 /*  $Id: graphread.cpp 618599 2020-10-22 17:38:51Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Eugene Vasilchenko
27  *
28  * File Description:
29  *   Access to WGS files
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <sra/readers/sra/graphread.hpp>
35 #include <sra/readers/ncbi_traces_path.hpp>
36 #include <corelib/ncbistr.hpp>
37 #include <corelib/ncbi_param.hpp>
38 #include <objects/general/general__.hpp>
39 #include <objects/seq/seq__.hpp>
40 #include <objects/seqset/seqset__.hpp>
41 #include <objects/seqloc/seqloc__.hpp>
42 #include <objects/seqalign/seqalign__.hpp>
43 #include <objects/seqres/seqres__.hpp>
44 #include <objects/seqtable/seqtable__.hpp>
45 #include <serial/objistrasnb.hpp>
46 #include <serial/serial.hpp>
47 #include <sra/error_codes.hpp>
48 
49 #include <sra/readers/sra/kdbread.hpp>
50 
51 BEGIN_NCBI_NAMESPACE;
52 
53 #define NCBI_USE_ERRCODE_X   VDBGraphReader
54 NCBI_DEFINE_ERR_SUBCODE_X(1);
55 
56 BEGIN_NAMESPACE(objects);
57 
58 
59 NCBI_PARAM_DECL(bool, VDBGRAPH, USE_VDB_INDEX);
60 NCBI_PARAM_DEF_EX(bool, VDBGRAPH, USE_VDB_INDEX, true,
61                   eParam_NoThread, VDBGRAPH_USE_VDB_INDEX);
62 
63 
LookupIsInMemory(ELookupType lookup_type)64 bool CVDBGraphDb_Impl::LookupIsInMemory(ELookupType lookup_type)
65 {
66     if ( lookup_type == eLookupDefault ) {
67         static bool use_vdb_index = NCBI_PARAM_TYPE(VDBGRAPH, USE_VDB_INDEX)::GetDefault();
68         return !use_vdb_index;
69     }
70     else {
71         return lookup_type == eLookupInMemory;
72     }
73 }
74 
75 
76 /////////////////////////////////////////////////////////////////////////////
77 // CVDBGraphDb_Impl
78 /////////////////////////////////////////////////////////////////////////////
79 
80 
SGraphTableCursor(const CVDBTable & table)81 CVDBGraphDb_Impl::SGraphTableCursor::SGraphTableCursor(const CVDBTable& table)
82     : m_Cursor(table),
83       INIT_VDB_COLUMN(SID),
84       INIT_VDB_COLUMN(START),
85       INIT_VDB_COLUMN(LEN),
86       INIT_VDB_COLUMN(GR_Q0),
87       INIT_VDB_COLUMN(GR_Q10),
88       INIT_VDB_COLUMN(GR_Q50),
89       INIT_VDB_COLUMN(GR_Q90),
90       INIT_VDB_COLUMN(GR_Q100),
91       INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q0),
92       INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q10),
93       INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q50),
94       INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q90),
95       INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q100),
96       INIT_VDB_COLUMN(GRAPH),
97       INIT_VDB_COLUMN(SCALE),
98       INIT_OPTIONAL_VDB_COLUMN(NUM_SWITCHES)
99 {
100 }
101 
102 
CVDBGraphDb_Impl(CVDBMgr & mgr,CTempString path,ELookupType lookup_type)103 CVDBGraphDb_Impl::CVDBGraphDb_Impl(CVDBMgr& mgr, CTempString path, ELookupType lookup_type)
104     : m_Mgr(mgr),
105       m_Path(path)
106 {
107     // VDB graph are plain VDB table objects.
108     // However, there could be other VDBs in the same namespace (NA*)
109     // so we have to check this situation and return normal eNotFoundDb error.
110     try {
111         m_GraphTable = CVDBTable(mgr, path);
112     }
113     catch ( CSraException& exc ) {
114         bool another_vdb = false;
115         if ( exc.GetErrCode() != exc.eNotFoundTable ) {
116             // check if the accession refers some other VDB object
117             try {
118                 CVDB db(mgr, path);
119                 another_vdb = true;
120             }
121             catch ( CSraException& /*exc2*/ ) {
122             }
123         }
124         if ( another_vdb || exc.GetErrCode() == exc.eNotFoundTable ) {
125             // It's either some other VDB object, or not an VDB object at all
126             // report eNotFoundDb with original rc
127             NCBI_THROW2_FMT(CSraException, eNotFoundDb,
128                             "Cannot open VDB graph table: "<<path,
129                             exc.GetRC());
130         }
131         else {
132             // neither vdbgraph table nor another VDB
133             // report original exception
134             throw;
135         }
136     }
137     CRef<SGraphTableCursor> curs = Graph();
138 
139     TVDBRowId last_row = curs->m_Cursor.GetMaxRowId();
140     SSeqInfo info;
141     CVDBTableIndex idx(GraphTable(), "sid", CVDBTableIndex::eMissing_Allow);
142     if ( !idx ) {
143         ERR_POST(Warning<<"CVDBGraphDb: sid index not found. Scanning sequentially.");
144         for ( TVDBRowId row = 1; row <= last_row; ++row ) {
145             // read range and names
146             TSeqPos start = *curs->START(row);
147             TSeqPos len = *curs->LEN(row);
148             CVDBStringValue seq_id = curs->SID(row);
149             if ( *seq_id == info.m_SeqId ) {
150                 // continuation of graph
151                 info.m_RowLast = row;
152                 info.m_SeqLength = start + len;
153                 continue;
154             }
155             if ( !info.m_SeqId.empty() ) {
156                 m_SeqList.push_back(info);
157             }
158             info.m_SeqId = *seq_id;
159             info.m_Seq_id_Handle = CSeq_id_Handle::GetHandle(info.m_SeqId);
160             info.m_RowFirst = row;
161             info.m_RowSize = len;
162             info.m_SeqLength = len;
163         }
164         if ( !info.m_SeqId.empty() ) {
165             info.m_RowLast = last_row;
166             m_SeqList.push_back(info);
167         }
168     }
169     else if ( LookupIsInMemory(lookup_type) ) {
170         for ( TVDBRowId row = 1; row <= last_row; ++row ) {
171             CVDBStringValue seq_id = curs->SID(row);
172             info.m_SeqId = *seq_id;
173             info.m_Seq_id_Handle = CSeq_id_Handle::GetHandle(info.m_SeqId);
174             info.m_RowSize = *curs->LEN(row);
175             info.m_SeqLength = *curs->START(row) + info.m_RowSize;
176             info.m_RowLast = info.m_RowFirst = row;
177             TVDBRowIdRange range = idx.Find(info.m_SeqId);
178             _ASSERT(row == range.first);
179             _ASSERT(range.second);
180             if ( range.second > 1 ) {
181                 row += range.second-1;
182                 info.m_RowLast = row;
183                 info.m_SeqLength = *curs->START(row)+*curs->LEN(row);
184             }
185             m_SeqList.push_back(info);
186         }
187     }
188     else {
189         m_LookupIndex = idx;
190     }
191     Put(curs);
192 
193     NON_CONST_ITERATE ( TSeqInfoList, it, m_SeqList ) {
194         m_SeqMapByFirstRow.insert
195             (TSeqInfoMapByFirstRow::value_type(it->m_RowFirst, it));
196         m_SeqMapBySeq_id.insert
197             (TSeqInfoMapBySeq_id::value_type(it->m_Seq_id_Handle, it));
198     }
199 }
200 
201 
~CVDBGraphDb_Impl(void)202 CVDBGraphDb_Impl::~CVDBGraphDb_Impl(void)
203 {
204 }
205 
206 
Graph(void)207 CRef<CVDBGraphDb_Impl::SGraphTableCursor> CVDBGraphDb_Impl::Graph(void)
208 {
209     CRef<SGraphTableCursor> curs = m_Graph.Get();
210     if ( !curs ) {
211         curs = new SGraphTableCursor(GraphTable());
212     }
213     return curs;
214 }
215 
216 
HasMidZoomGraphs(void)217 bool CVDBGraphDb_Impl::HasMidZoomGraphs(void)
218 {
219     CRef<SGraphTableCursor> curs = Graph();
220     bool ret = curs->m_GR_ZOOM_Q100;
221     Put(curs);
222     return ret;
223 }
224 
225 
GetSeqInfoAtRow(TVDBRowId first_row)226 CVDBGraphDb_Impl::SSeqInfo CVDBGraphDb_Impl::GetSeqInfoAtRow(TVDBRowId first_row)
227 {
228     CMutexGuard guard(m_SeqInfoMutex);
229     auto iter = m_SeqMapByFirstRow.find(first_row);
230     if ( iter != m_SeqMapByFirstRow.end() ) {
231         return *iter->second;
232     }
233     if ( m_LookupIndex ) {
234         auto curs = Graph();
235         CVDBStringValue id = curs->SID(first_row, CVDBValue::eMissing_Allow);
236         if ( id.empty() ) {
237             Put(curs);
238             return SSeqInfo();
239         }
240         SSeqInfo info;
241         info.m_SeqId = *id;
242         info.m_Seq_id_Handle = CSeq_id_Handle::GetHandle(info.m_SeqId);
243         info.m_RowFirst = first_row;
244         TSeqPos first_len = *curs->LEN(first_row);
245         info.m_RowSize = first_len;
246         TVDBRowIdRange range = m_LookupIndex.Find(info.m_SeqId);
247         _ASSERT(first_row == range.first);
248         _ASSERT(range.second);
249         if ( range.second > 1 ) {
250             // multi page
251             TVDBRowId last_row = first_row + range.second - 1;
252             info.m_RowLast = last_row;
253             TSeqPos last_start = *curs->START(last_row);
254             TSeqPos last_len = *curs->LEN(last_row);
255             info.m_SeqLength = last_start + last_len;
256         }
257         else {
258             // single page
259             info.m_RowLast = first_row;
260             TSeqPos first_start = *curs->START(first_row);
261             info.m_SeqLength = first_start + first_len;
262         }
263         Put(curs);
264         return info;
265     }
266     return SSeqInfo();
267 }
268 
269 
GetSeqInfo(const CSeq_id_Handle & idh)270 CVDBGraphDb_Impl::SSeqInfo CVDBGraphDb_Impl::GetSeqInfo(const CSeq_id_Handle& idh)
271 {
272     CMutexGuard guard(m_SeqInfoMutex);
273     auto iter = m_SeqMapBySeq_id.find(idh);
274     if ( iter != m_SeqMapBySeq_id.end() ) {
275         return *iter->second;
276     }
277     if ( m_LookupIndex ) {
278         auto seq_id = idh.GetSeqId();
279         const CTextseq_id* text_id = seq_id->GetTextseq_Id();
280         if ( !text_id ||
281              text_id->IsSetName() || text_id->IsSetRelease() ||
282              !text_id->IsSetAccession() || !text_id->IsSetVersion() ) {
283             return SSeqInfo();
284         }
285         string id = text_id->GetAccession()+'.'+NStr::NumericToString(text_id->GetVersion());
286         NStr::ToUpper(id);
287         auto curs = Graph();
288         auto range = m_LookupIndex.Find(id);
289         if ( !range.second ) {
290             Put(curs);
291             return SSeqInfo();
292         }
293         auto first_row = range.first;
294         SSeqInfo info;
295         info.m_SeqId = id;
296         info.m_Seq_id_Handle = idh;
297         info.m_RowFirst = first_row;
298         TSeqPos first_len = *curs->LEN(first_row);
299         info.m_RowSize = first_len;
300         if ( range.second > 1 ) {
301             // multi page
302             TVDBRowId last_row = first_row + range.second - 1;
303             info.m_RowLast = last_row;
304             TSeqPos last_start = *curs->START(last_row);
305             TSeqPos last_len = *curs->LEN(last_row);
306             info.m_SeqLength = last_start + last_len;
307         }
308         else {
309             // single page
310             info.m_RowLast = first_row;
311             TSeqPos first_start = *curs->START(first_row);
312             info.m_SeqLength = first_start + first_len;
313         }
314         Put(curs);
315         return info;
316     }
317     return SSeqInfo();
318 }
319 
320 
321 /////////////////////////////////////////////////////////////////////////////
322 // CVDBGraphSeqIterator
323 /////////////////////////////////////////////////////////////////////////////
324 
CVDBGraphSeqIterator(const CVDBGraphDb & db)325 CVDBGraphSeqIterator::CVDBGraphSeqIterator(const CVDBGraphDb& db)
326     : m_Db(db)
327 {
328     m_Info = db.GetNCObject().GetSeqInfoAtRow(1);
329 }
330 
331 
CVDBGraphSeqIterator(const CVDBGraphDb & db,const CSeq_id_Handle & seq_id)332 CVDBGraphSeqIterator::CVDBGraphSeqIterator(const CVDBGraphDb& db,
333                                            const CSeq_id_Handle& seq_id)
334     : m_Db(db)
335 {
336     m_Info = db.GetNCObject().GetSeqInfo(seq_id);
337 }
338 
339 
operator ++(void)340 CVDBGraphSeqIterator& CVDBGraphSeqIterator::operator++(void)
341 {
342     m_Info = m_Db->GetSeqInfoAtRow(GetInfo().m_RowLast+1);
343     return *this;
344 }
345 
346 
GetInfo(void) const347 const CVDBGraphDb_Impl::SSeqInfo& CVDBGraphSeqIterator::GetInfo(void) const
348 {
349     if ( !*this ) {
350         NCBI_THROW(CSraException, eInvalidState,
351                    "CVDBGraphSeqIterator is invalid");
352     }
353     return m_Info;
354 }
355 
356 
357 template<class DstVector, class SrcVector>
sx_Assign(DstVector & dst,const SrcVector & src)358 static void sx_Assign(DstVector& dst, const SrcVector& src)
359 {
360     dst.clear();
361     dst.reserve(src.size());
362     ITERATE ( typename SrcVector, it, src ) {
363         dst.push_back(*it);
364     }
365 }
366 
367 
368 CRef<CSeq_graph>
x_MakeGraph(const string & annot_name,CSeq_loc & loc,const SSeqInfo & info,const COpenRange<TSeqPos> & range,TSeqPos step,SGraphTableCursor & cursor,CVDBColumn & column,int level) const369 CVDBGraphSeqIterator::x_MakeGraph(const string& annot_name,
370                                   CSeq_loc& loc,
371                                   const SSeqInfo& info,
372                                   const COpenRange<TSeqPos>& range,
373                                   TSeqPos step,
374                                   SGraphTableCursor& cursor,
375                                   CVDBColumn& column,
376                                   int level) const
377 {
378     if ( !column ) {
379         return null;
380     }
381 
382     CRef<CSeq_graph> graph(new CSeq_graph);
383     if ( !annot_name.empty() ) {
384         graph->SetTitle(annot_name);
385     }
386     graph->SetLoc(loc);
387     if ( level >= 0 ) {
388         graph->SetComment(NStr::IntToString(level)+"%");
389     }
390 
391     typedef SGraphTableCursor::TGraphQ TValue;
392     const TValue kMinIntValue = kMin_I4;
393     const TValue kMaxIntValue = kMax_I4;
394     const TValue kMinByteValue = 0;
395     const TValue kMaxByteValue = kMax_UI1;
396 
397     TValue max_v = numeric_limits<TValue>::min();
398     TValue min_v = numeric_limits<TValue>::max();
399     CInt_graph* int_graph = &graph->SetGraph().SetInt();
400     CReal_graph* real_graph = 0;
401     CInt_graph::TValues* int_vv = &int_graph->SetValues();
402     CReal_graph::TValues* real_vv = 0;
403     TSeqPos row_size = info.m_RowSize;
404     TSeqPos pos = range.GetFrom();
405     for ( TVDBRowId row = info.m_RowFirst + pos/row_size;
406           pos < range.GetToOpen();
407           ++row ) {
408         CVDBValueFor<TValue> values(cursor.m_Cursor, row, column);
409         for ( size_t index = pos%row_size/step;
410               index < values.size() && pos < range.GetToOpen();
411               ++index, pos += step ) {
412             TValue v = values[index];
413             bool switch_to_real = false;
414             if ( v < min_v ) {
415                 min_v = v;
416                 switch_to_real = v < kMinIntValue;
417             }
418             if ( v > max_v ) {
419                 max_v = v;
420                 switch_to_real = v > kMaxIntValue;
421             }
422             if ( switch_to_real && int_vv ) {
423                 // switch to real graph
424                 CRef<CInt_graph> save_int_graph(int_graph);
425                 real_graph = &graph->SetGraph().SetReal();
426                 int_graph = 0;
427                 real_vv = &real_graph->SetValues();
428                 sx_Assign(*real_vv, *int_vv);
429                 int_vv = 0;
430             }
431             if ( int_vv ) {
432                 int_vv->push_back(int(v));
433             }
434             else {
435                 real_vv->push_back(double(v));
436             }
437         }
438         if ( pos < range.GetToOpen() &&
439              pos < (row-info.m_RowFirst+1)*row_size ) {
440             NCBI_THROW(CSraException, eNotFoundValue,
441                        "CVDBGraphSeqIterator: graph data array is too short");
442         }
443     }
444     size_t numval = 0;
445     if ( min_v >= kMinByteValue && max_v <= kMaxByteValue ) {
446         // use smaller byte representation
447         numval = int_vv->size();
448         CRef<CByte_graph> byte_graph(new CByte_graph);
449         byte_graph->SetAxis(0);
450         byte_graph->SetMin(int(min_v));
451         byte_graph->SetMax(int(max_v));
452         sx_Assign(byte_graph->SetValues(), *int_vv);
453         graph->SetGraph().SetByte(*byte_graph);
454         int_graph = 0;
455         int_vv = 0;
456     }
457     else if ( real_graph ) {
458         // need bigger double representation
459         numval = real_vv->size();
460         real_graph->SetAxis(0);
461         real_graph->SetMin(double(min_v));
462         real_graph->SetMax(double(max_v));
463     }
464     else {
465         // int graph
466         numval = int_vv->size();
467         int_graph->SetAxis(0);
468         int_graph->SetMin(int(min_v));
469         int_graph->SetMax(int(max_v));
470     }
471     if ( step > 1 ) {
472         graph->SetComp(step);
473     }
474     uint32_t scale = cursor.SCALE(info.m_RowFirst);
475     if ( scale != 1 ) {
476         graph->SetA(1./scale);
477     }
478     if ( numval > size_t(kMax_Int) ) {
479         NCBI_THROW(CSraException, eDataError,
480                    "CVDBGraphSeqIterator::x_MakeGraph: graph too big");
481     }
482     graph->SetNumval(int(numval));
483     return graph;
484 }
485 
486 
487 CRef<CSeq_table>
x_MakeTable(const string & annot_name,CSeq_loc & loc,const SSeqInfo & info,const COpenRange<TSeqPos> & range,SGraphTableCursor & cursor) const488 CVDBGraphSeqIterator::x_MakeTable(const string& annot_name,
489                                   CSeq_loc& loc,
490                                   const SSeqInfo& info,
491                                   const COpenRange<TSeqPos>& range,
492                                   SGraphTableCursor& cursor) const
493 {
494     TSeqPos size = range.GetLength();
495     TSeqPos row_size = info.m_RowSize;
496     typedef SGraphTableCursor::TGraphQ TValue;
497     const TValue kMinIntValue = kMin_I4;
498     const TValue kMaxIntValue = kMax_I4;
499 
500     TValue min_v = 0, max_v = 0;
501     vector<TValue> vv;
502     vv.reserve(size);
503     TSeqPos pos = range.GetFrom();
504     TVDBRowId row = pos/row_size;
505     for ( ; pos < range.GetToOpen(); ++row, pos += row_size ) {
506         CVDBValueFor<TValue> vv_arr(cursor.GRAPH(info.m_RowFirst+row));
507         TSeqPos off = TSeqPos(pos - row*row_size);
508         TSeqPos cnt = min(row_size-off, range.GetToOpen()-pos);
509         for ( TSeqPos i = 0; i < cnt; ++i ) {
510             TValue v = vv_arr[off+i];
511             vv.push_back(v);
512             if ( v > max_v ) {
513                 max_v = v;
514             }
515             if ( v < min_v ) {
516                 min_v = v;
517             }
518         }
519     }
520 
521     CRef<CSeq_table> table(new CSeq_table);
522     table->SetFeat_type(0);
523 
524     { // Seq-table location
525         CRef<CSeqTable_column> col_id(new CSeqTable_column);
526         table->SetColumns().push_back(col_id);
527         col_id->SetHeader().SetField_name("Seq-table location");
528         col_id->SetDefault().SetLoc(loc);
529     }
530     { // Seq-id
531         CRef<CSeqTable_column> col_id(new CSeqTable_column);
532         table->SetColumns().push_back(col_id);
533         col_id->SetHeader().SetField_id(CSeqTable_column_info::eField_id_location_id);
534         col_id->SetDefault().SetId(loc.SetInt().SetId());
535     }
536 
537     // position
538     CRef<CSeqTable_column> col_pos(new CSeqTable_column);
539     table->SetColumns().push_back(col_pos);
540     col_pos->SetHeader().SetField_id(CSeqTable_column_info::eField_id_location_from);
541     CSeqTable_multi_data::TInt& arr_pos = col_pos->SetData().SetInt();
542 
543     // span
544     CRef<CSeqTable_column> col_span(new CSeqTable_column);
545     table->SetColumns().push_back(col_span);
546     col_span->SetHeader().SetField_name("span");
547     CSeqTable_multi_data::TInt& arr_span = col_span->SetData().SetInt();
548 
549     CRef<CSeqTable_column> col_val(new CSeqTable_column);
550     table->SetColumns().push_back(col_val);
551     col_val->SetHeader().SetField_name("values");
552 
553     TSeqPos cur_i = 0;
554     TValue cur_v = vv[0];
555     if ( min_v < kMinIntValue || max_v > kMaxIntValue ) {
556         CSeqTable_multi_data::TReal& arr_vv = col_val->SetData().SetReal();
557         for ( TSeqPos i = 0; i < size; ++i ) {
558             TValue v = vv[i];
559             if ( v != cur_v ) {
560                 arr_pos.push_back(cur_i+range.GetFrom());
561                 arr_span.push_back(i-cur_i);
562                 arr_vv.push_back(double(cur_v));
563                 cur_i = i;
564                 cur_v = v;
565             }
566         }
567         arr_pos.push_back(cur_i+range.GetFrom());
568         arr_span.push_back(size-cur_i);
569         arr_vv.push_back(double(cur_v));
570     }
571     else {
572         CSeqTable_multi_data::TInt& arr_vv = col_val->SetData().SetInt();
573         for ( TSeqPos i = 0; i < size; ++i ) {
574             TValue v = vv[i];
575             if ( v != cur_v ) {
576                 arr_pos.push_back(cur_i+range.GetFrom());
577                 arr_span.push_back(i-cur_i);
578                 arr_vv.push_back(int(cur_v));
579                 cur_i = i;
580                 cur_v = v;
581             }
582         }
583         arr_pos.push_back(cur_i+range.GetFrom());
584         arr_span.push_back(size-cur_i);
585         arr_vv.push_back(int(cur_v));
586     }
587 
588     uint32_t scale = cursor.SCALE(info.m_RowFirst);
589     if ( scale != 1 ) {
590         CRef<CSeqTable_column> col_step(new CSeqTable_column);
591         table->SetColumns().push_back(col_step);
592         col_step->SetHeader().SetField_name("value_step");
593         col_step->SetDefault().SetReal(1./scale);
594     }
595     if ( arr_pos.size() > size_t(kMax_Int) ) {
596         NCBI_THROW(CSraException, eDataError,
597                    "CVDBGraphSeqIterator::x_MakeTable: graph too big");
598     }
599     table->SetNum_rows(int(arr_pos.size()));
600     return table;
601 }
602 
603 
x_SeqTableIsSmaller(COpenRange<TSeqPos> range,SGraphTableCursor & cursor) const604 bool CVDBGraphSeqIterator::x_SeqTableIsSmaller(COpenRange<TSeqPos> range,
605                                                SGraphTableCursor& cursor) const
606 {
607     if ( !cursor.m_NUM_SWITCHES ) {
608         // no info about value changes
609         return false;
610     }
611     typedef SGraphTableCursor::TGraphV TValue;
612     const TValue kMinIntValue = kMin_I4;
613     const TValue kMaxIntValue = kMax_I4;
614     const TValue kMinByteValue = 0;
615     const TValue kMaxByteValue = kMax_UI1;
616     TValue min_v = 0, max_v = 0;
617     size_t values = 0;
618     uint64_t switches = 0;
619     TSeqPos pos = range.GetFrom();
620     const SSeqInfo& info = GetInfo();
621     TSeqPos row_size = info.m_RowSize;
622     TVDBRowId row = pos/row_size;
623     for ( ; pos < range.GetToOpen(); ++row, pos += row_size ) {
624         values += row_size;
625         switches += *cursor.NUM_SWITCHES(info.m_RowFirst+row);
626         TValue v = cursor.GR_Q100(info.m_RowFirst+row);
627         if ( v > max_v ) {
628             max_v = v;
629         }
630         if ( v < min_v ) {
631             min_v = v;
632         }
633     }
634     size_t table_value_size =
635         min_v < kMinIntValue || max_v > kMaxIntValue? sizeof(double): sizeof(int);
636     size_t graph_value_size =
637         min_v < kMinByteValue || max_v > kMaxByteValue? table_value_size: 1;
638     uint64_t table_size =
639         (table_value_size+2*sizeof(int))*switches; //+pos+span
640     size_t graph_size =
641         graph_value_size*values;
642     return table_size < graph_size;
643 }
644 
645 
SeqTableIsSmaller(COpenRange<TSeqPos> range) const646 bool CVDBGraphSeqIterator::SeqTableIsSmaller(COpenRange<TSeqPos> range) const
647 {
648     const SSeqInfo& info = GetInfo();
649     if ( range.GetToOpen() > info.m_SeqLength ) {
650         range.SetToOpen(info.m_SeqLength);
651     }
652     if ( range.Empty() ) {
653         return false;
654     }
655     CRef<SGraphTableCursor> curs(GetDb().Graph());
656     bool seq_table_is_smaller = x_SeqTableIsSmaller(range, *curs);
657     GetDb().Put(curs);
658     return seq_table_is_smaller;
659 }
660 
661 
662 CRef<CSeq_annot>
GetAnnot(COpenRange<TSeqPos> range0,const string & annot_name,TContentFlags content) const663 CVDBGraphSeqIterator::GetAnnot(COpenRange<TSeqPos> range0,
664                                const string& annot_name,
665                                TContentFlags content) const
666 {
667     const SSeqInfo& info = GetInfo();
668     if ( range0.GetToOpen() > info.m_SeqLength ) {
669         range0.SetToOpen(info.m_SeqLength);
670     }
671     if ( range0.Empty() ) {
672         return null;
673     }
674 
675     CRef<CSeq_annot> annot(new CSeq_annot);
676 
677     if ( !annot_name.empty() ) {
678         CRef<CAnnotdesc> desc(new CAnnotdesc);
679         desc->SetName(annot_name);
680         annot->SetDesc().Set().push_back(desc);
681     }
682 
683     CRef<SGraphTableCursor> curs(GetDb().Graph());
684 
685     if ( content & fGraphQAll ) {
686         if ( content & (fGraphZoomQAll|fGraphMain) ) {
687             NCBI_THROW(CSraException, eInvalidArg, "CVDBGraphSeqIterator: "
688                        "several zoom tracks are requested");
689         }
690 
691         TSeqPos step = info.m_RowSize;
692 
693         COpenRange<TSeqPos> range = range0;
694         if ( TSeqPos adjust = range.GetFrom() % step ) {
695             range.SetFrom(range.GetFrom() - adjust);
696         }
697         if ( TSeqPos adjust = range.GetToOpen() % step ) {
698             range.SetToOpen(min(info.m_SeqLength,
699                                 range.GetToOpen() + (step - adjust)));
700         }
701 
702         CRef<CSeq_loc> loc(new CSeq_loc);
703         loc->SetInt().SetId(*SerialClone(*info.m_Seq_id_Handle.GetSeqId()));
704         loc->SetInt().SetFrom(range.GetFrom());
705         loc->SetInt().SetTo(range.GetTo());
706 
707         // describe statistics
708         {
709             CRef<CAnnotdesc> desc(new CAnnotdesc);
710             CUser_object& obj = desc->SetUser();
711             obj.SetType().SetStr("AnnotationTrack");
712             obj.AddField("ZoomLevel", int(step));
713             obj.AddField("StatisticsType", "Percentiles");
714             annot->SetDesc().Set().push_back(desc);
715         }
716 
717         for ( TContentFlags f = fGraphQ0; f & fGraphQAll; f <<= 1 ) {
718             if ( !(content & f) ) {
719                 continue;
720             }
721             CRef<CSeq_graph> graph;
722             switch ( f ) {
723             case fGraphQ0  :
724                 graph = x_MakeGraph(annot_name, *loc, info, range,
725                                     step, *curs, curs->m_GR_Q0  ,   0);
726                 break;
727             case fGraphQ10 :
728                 graph = x_MakeGraph(annot_name, *loc, info, range,
729                                     step, *curs, curs->m_GR_Q10 ,  10);
730                 break;
731             case fGraphQ50 :
732                 graph = x_MakeGraph(annot_name, *loc, info, range,
733                                     step, *curs, curs->m_GR_Q50 ,  50);
734                 break;
735             case fGraphQ90 :
736                 graph = x_MakeGraph(annot_name, *loc, info, range,
737                                     step, *curs, curs->m_GR_Q90 ,  90);
738                 break;
739             case fGraphQ100:
740                 graph = x_MakeGraph(annot_name, *loc, info, range,
741                                     step, *curs, curs->m_GR_Q100, 100);
742                 break;
743             default:
744                 break;
745             }
746             if ( graph ) {
747                 annot->SetData().SetGraph().push_back(graph);
748             }
749         }
750     }
751 
752     if ( content & fGraphZoomQAll ) {
753         if ( content & (fGraphQAll|fGraphMain) ) {
754             NCBI_THROW(CSraException, eInvalidArg, "CVDBGraphSeqIterator: "
755                        "several zoom tracks are requested");
756         }
757 
758         TSeqPos step = 100;
759 
760         COpenRange<TSeqPos> range = range0;
761         if ( TSeqPos adjust = range.GetFrom() % step ) {
762             range.SetFrom(range.GetFrom() - adjust);
763         }
764         if ( TSeqPos adjust = range.GetToOpen() % step ) {
765             range.SetToOpen(min(info.m_SeqLength,
766                                 range.GetToOpen() + (step - adjust)));
767         }
768 
769         CRef<CSeq_loc> loc(new CSeq_loc);
770         loc->SetInt().SetId(*SerialClone(*info.m_Seq_id_Handle.GetSeqId()));
771         loc->SetInt().SetFrom(range.GetFrom());
772         loc->SetInt().SetTo(range.GetTo());
773 
774         // describe statistics
775         {
776             CRef<CAnnotdesc> desc(new CAnnotdesc);
777             CUser_object& obj = desc->SetUser();
778             obj.SetType().SetStr("AnnotationTrack");
779             obj.AddField("ZoomLevel", int(step));
780             obj.AddField("StatisticsType", "Percentiles");
781             annot->SetDesc().Set().push_back(desc);
782         }
783 
784         for ( TContentFlags f = fGraphZoomQ0; f & fGraphZoomQAll; f <<= 1 ) {
785             if ( !(content & f) ) {
786                 continue;
787             }
788             CRef<CSeq_graph> graph;
789             switch ( f ) {
790             case fGraphZoomQ0  :
791                 graph = x_MakeGraph(annot_name, *loc, info, range,
792                                     step, *curs, curs->m_GR_ZOOM_Q0  ,   0);
793                 break;
794             case fGraphZoomQ10 :
795                 graph = x_MakeGraph(annot_name, *loc, info, range,
796                                     step, *curs, curs->m_GR_ZOOM_Q10 ,  10);
797                 break;
798             case fGraphZoomQ50 :
799                 graph = x_MakeGraph(annot_name, *loc, info, range,
800                                     step, *curs, curs->m_GR_ZOOM_Q50 ,  50);
801                 break;
802             case fGraphZoomQ90 :
803                 graph = x_MakeGraph(annot_name, *loc, info, range,
804                                     step, *curs, curs->m_GR_ZOOM_Q90 ,  90);
805                 break;
806             case fGraphZoomQ100:
807                 graph = x_MakeGraph(annot_name, *loc, info, range,
808                                     step, *curs, curs->m_GR_ZOOM_Q100, 100);
809                 break;
810             default:
811                 break;
812             }
813             if ( graph ) {
814                 annot->SetData().SetGraph().push_back(graph);
815             }
816         }
817     }
818 
819     if ( content & fGraphMain ) {
820         COpenRange<TSeqPos> range = range0;
821 
822         CRef<CSeq_loc> loc(new CSeq_loc);
823         loc->SetInt().SetId(*SerialClone(*info.m_Seq_id_Handle.GetSeqId()));
824         loc->SetInt().SetFrom(range.GetFrom());
825         loc->SetInt().SetTo(range.GetTo());
826 
827         bool as_table = false;
828         if ( content & fGraphMainAsTable ) {
829             if ( content & fGraphMainAsGraph ) {
830                 // select best
831                 as_table = x_SeqTableIsSmaller(range, *curs);
832             }
833             else {
834                 as_table = true;
835             }
836         }
837         if ( as_table ) {
838             {
839                 CRef<CAnnotdesc> desc(new CAnnotdesc);
840                 CUser_object& obj = desc->SetUser();
841                 obj.SetType().SetStr("Track Data");
842                 obj.AddField("track type", "graph");
843                 annot->SetDesc().Set().push_back(desc);
844             }
845 
846             CRef<CSeq_table> table = x_MakeTable(annot_name, *loc, info, range,
847                                                  *curs);
848             annot->SetData().SetSeq_table(*table);
849         }
850         else {
851             CRef<CSeq_graph> graph = x_MakeGraph(annot_name, *loc, info, range,
852                                                  1, *curs, curs->m_GRAPH, -1);
853             annot->SetData().SetGraph().push_back(graph);
854         }
855     }
856 
857     GetDb().Put(curs);
858 
859     if ( !annot->IsSetData() ) {
860         return null;
861     }
862 
863     return annot;
864 }
865 
866 
867 END_NAMESPACE(objects);
868 END_NCBI_NAMESPACE;
869