1 /* $Id: graphread.cpp 618599 2020-10-22 17:38:51Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Eugene Vasilchenko
27 *
28 * File Description:
29 * Access to WGS files
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <sra/readers/sra/graphread.hpp>
35 #include <sra/readers/ncbi_traces_path.hpp>
36 #include <corelib/ncbistr.hpp>
37 #include <corelib/ncbi_param.hpp>
38 #include <objects/general/general__.hpp>
39 #include <objects/seq/seq__.hpp>
40 #include <objects/seqset/seqset__.hpp>
41 #include <objects/seqloc/seqloc__.hpp>
42 #include <objects/seqalign/seqalign__.hpp>
43 #include <objects/seqres/seqres__.hpp>
44 #include <objects/seqtable/seqtable__.hpp>
45 #include <serial/objistrasnb.hpp>
46 #include <serial/serial.hpp>
47 #include <sra/error_codes.hpp>
48
49 #include <sra/readers/sra/kdbread.hpp>
50
51 BEGIN_NCBI_NAMESPACE;
52
53 #define NCBI_USE_ERRCODE_X VDBGraphReader
54 NCBI_DEFINE_ERR_SUBCODE_X(1);
55
56 BEGIN_NAMESPACE(objects);
57
58
59 NCBI_PARAM_DECL(bool, VDBGRAPH, USE_VDB_INDEX);
60 NCBI_PARAM_DEF_EX(bool, VDBGRAPH, USE_VDB_INDEX, true,
61 eParam_NoThread, VDBGRAPH_USE_VDB_INDEX);
62
63
LookupIsInMemory(ELookupType lookup_type)64 bool CVDBGraphDb_Impl::LookupIsInMemory(ELookupType lookup_type)
65 {
66 if ( lookup_type == eLookupDefault ) {
67 static bool use_vdb_index = NCBI_PARAM_TYPE(VDBGRAPH, USE_VDB_INDEX)::GetDefault();
68 return !use_vdb_index;
69 }
70 else {
71 return lookup_type == eLookupInMemory;
72 }
73 }
74
75
76 /////////////////////////////////////////////////////////////////////////////
77 // CVDBGraphDb_Impl
78 /////////////////////////////////////////////////////////////////////////////
79
80
SGraphTableCursor(const CVDBTable & table)81 CVDBGraphDb_Impl::SGraphTableCursor::SGraphTableCursor(const CVDBTable& table)
82 : m_Cursor(table),
83 INIT_VDB_COLUMN(SID),
84 INIT_VDB_COLUMN(START),
85 INIT_VDB_COLUMN(LEN),
86 INIT_VDB_COLUMN(GR_Q0),
87 INIT_VDB_COLUMN(GR_Q10),
88 INIT_VDB_COLUMN(GR_Q50),
89 INIT_VDB_COLUMN(GR_Q90),
90 INIT_VDB_COLUMN(GR_Q100),
91 INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q0),
92 INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q10),
93 INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q50),
94 INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q90),
95 INIT_OPTIONAL_VDB_COLUMN(GR_ZOOM_Q100),
96 INIT_VDB_COLUMN(GRAPH),
97 INIT_VDB_COLUMN(SCALE),
98 INIT_OPTIONAL_VDB_COLUMN(NUM_SWITCHES)
99 {
100 }
101
102
CVDBGraphDb_Impl(CVDBMgr & mgr,CTempString path,ELookupType lookup_type)103 CVDBGraphDb_Impl::CVDBGraphDb_Impl(CVDBMgr& mgr, CTempString path, ELookupType lookup_type)
104 : m_Mgr(mgr),
105 m_Path(path)
106 {
107 // VDB graph are plain VDB table objects.
108 // However, there could be other VDBs in the same namespace (NA*)
109 // so we have to check this situation and return normal eNotFoundDb error.
110 try {
111 m_GraphTable = CVDBTable(mgr, path);
112 }
113 catch ( CSraException& exc ) {
114 bool another_vdb = false;
115 if ( exc.GetErrCode() != exc.eNotFoundTable ) {
116 // check if the accession refers some other VDB object
117 try {
118 CVDB db(mgr, path);
119 another_vdb = true;
120 }
121 catch ( CSraException& /*exc2*/ ) {
122 }
123 }
124 if ( another_vdb || exc.GetErrCode() == exc.eNotFoundTable ) {
125 // It's either some other VDB object, or not an VDB object at all
126 // report eNotFoundDb with original rc
127 NCBI_THROW2_FMT(CSraException, eNotFoundDb,
128 "Cannot open VDB graph table: "<<path,
129 exc.GetRC());
130 }
131 else {
132 // neither vdbgraph table nor another VDB
133 // report original exception
134 throw;
135 }
136 }
137 CRef<SGraphTableCursor> curs = Graph();
138
139 TVDBRowId last_row = curs->m_Cursor.GetMaxRowId();
140 SSeqInfo info;
141 CVDBTableIndex idx(GraphTable(), "sid", CVDBTableIndex::eMissing_Allow);
142 if ( !idx ) {
143 ERR_POST(Warning<<"CVDBGraphDb: sid index not found. Scanning sequentially.");
144 for ( TVDBRowId row = 1; row <= last_row; ++row ) {
145 // read range and names
146 TSeqPos start = *curs->START(row);
147 TSeqPos len = *curs->LEN(row);
148 CVDBStringValue seq_id = curs->SID(row);
149 if ( *seq_id == info.m_SeqId ) {
150 // continuation of graph
151 info.m_RowLast = row;
152 info.m_SeqLength = start + len;
153 continue;
154 }
155 if ( !info.m_SeqId.empty() ) {
156 m_SeqList.push_back(info);
157 }
158 info.m_SeqId = *seq_id;
159 info.m_Seq_id_Handle = CSeq_id_Handle::GetHandle(info.m_SeqId);
160 info.m_RowFirst = row;
161 info.m_RowSize = len;
162 info.m_SeqLength = len;
163 }
164 if ( !info.m_SeqId.empty() ) {
165 info.m_RowLast = last_row;
166 m_SeqList.push_back(info);
167 }
168 }
169 else if ( LookupIsInMemory(lookup_type) ) {
170 for ( TVDBRowId row = 1; row <= last_row; ++row ) {
171 CVDBStringValue seq_id = curs->SID(row);
172 info.m_SeqId = *seq_id;
173 info.m_Seq_id_Handle = CSeq_id_Handle::GetHandle(info.m_SeqId);
174 info.m_RowSize = *curs->LEN(row);
175 info.m_SeqLength = *curs->START(row) + info.m_RowSize;
176 info.m_RowLast = info.m_RowFirst = row;
177 TVDBRowIdRange range = idx.Find(info.m_SeqId);
178 _ASSERT(row == range.first);
179 _ASSERT(range.second);
180 if ( range.second > 1 ) {
181 row += range.second-1;
182 info.m_RowLast = row;
183 info.m_SeqLength = *curs->START(row)+*curs->LEN(row);
184 }
185 m_SeqList.push_back(info);
186 }
187 }
188 else {
189 m_LookupIndex = idx;
190 }
191 Put(curs);
192
193 NON_CONST_ITERATE ( TSeqInfoList, it, m_SeqList ) {
194 m_SeqMapByFirstRow.insert
195 (TSeqInfoMapByFirstRow::value_type(it->m_RowFirst, it));
196 m_SeqMapBySeq_id.insert
197 (TSeqInfoMapBySeq_id::value_type(it->m_Seq_id_Handle, it));
198 }
199 }
200
201
~CVDBGraphDb_Impl(void)202 CVDBGraphDb_Impl::~CVDBGraphDb_Impl(void)
203 {
204 }
205
206
Graph(void)207 CRef<CVDBGraphDb_Impl::SGraphTableCursor> CVDBGraphDb_Impl::Graph(void)
208 {
209 CRef<SGraphTableCursor> curs = m_Graph.Get();
210 if ( !curs ) {
211 curs = new SGraphTableCursor(GraphTable());
212 }
213 return curs;
214 }
215
216
HasMidZoomGraphs(void)217 bool CVDBGraphDb_Impl::HasMidZoomGraphs(void)
218 {
219 CRef<SGraphTableCursor> curs = Graph();
220 bool ret = curs->m_GR_ZOOM_Q100;
221 Put(curs);
222 return ret;
223 }
224
225
GetSeqInfoAtRow(TVDBRowId first_row)226 CVDBGraphDb_Impl::SSeqInfo CVDBGraphDb_Impl::GetSeqInfoAtRow(TVDBRowId first_row)
227 {
228 CMutexGuard guard(m_SeqInfoMutex);
229 auto iter = m_SeqMapByFirstRow.find(first_row);
230 if ( iter != m_SeqMapByFirstRow.end() ) {
231 return *iter->second;
232 }
233 if ( m_LookupIndex ) {
234 auto curs = Graph();
235 CVDBStringValue id = curs->SID(first_row, CVDBValue::eMissing_Allow);
236 if ( id.empty() ) {
237 Put(curs);
238 return SSeqInfo();
239 }
240 SSeqInfo info;
241 info.m_SeqId = *id;
242 info.m_Seq_id_Handle = CSeq_id_Handle::GetHandle(info.m_SeqId);
243 info.m_RowFirst = first_row;
244 TSeqPos first_len = *curs->LEN(first_row);
245 info.m_RowSize = first_len;
246 TVDBRowIdRange range = m_LookupIndex.Find(info.m_SeqId);
247 _ASSERT(first_row == range.first);
248 _ASSERT(range.second);
249 if ( range.second > 1 ) {
250 // multi page
251 TVDBRowId last_row = first_row + range.second - 1;
252 info.m_RowLast = last_row;
253 TSeqPos last_start = *curs->START(last_row);
254 TSeqPos last_len = *curs->LEN(last_row);
255 info.m_SeqLength = last_start + last_len;
256 }
257 else {
258 // single page
259 info.m_RowLast = first_row;
260 TSeqPos first_start = *curs->START(first_row);
261 info.m_SeqLength = first_start + first_len;
262 }
263 Put(curs);
264 return info;
265 }
266 return SSeqInfo();
267 }
268
269
GetSeqInfo(const CSeq_id_Handle & idh)270 CVDBGraphDb_Impl::SSeqInfo CVDBGraphDb_Impl::GetSeqInfo(const CSeq_id_Handle& idh)
271 {
272 CMutexGuard guard(m_SeqInfoMutex);
273 auto iter = m_SeqMapBySeq_id.find(idh);
274 if ( iter != m_SeqMapBySeq_id.end() ) {
275 return *iter->second;
276 }
277 if ( m_LookupIndex ) {
278 auto seq_id = idh.GetSeqId();
279 const CTextseq_id* text_id = seq_id->GetTextseq_Id();
280 if ( !text_id ||
281 text_id->IsSetName() || text_id->IsSetRelease() ||
282 !text_id->IsSetAccession() || !text_id->IsSetVersion() ) {
283 return SSeqInfo();
284 }
285 string id = text_id->GetAccession()+'.'+NStr::NumericToString(text_id->GetVersion());
286 NStr::ToUpper(id);
287 auto curs = Graph();
288 auto range = m_LookupIndex.Find(id);
289 if ( !range.second ) {
290 Put(curs);
291 return SSeqInfo();
292 }
293 auto first_row = range.first;
294 SSeqInfo info;
295 info.m_SeqId = id;
296 info.m_Seq_id_Handle = idh;
297 info.m_RowFirst = first_row;
298 TSeqPos first_len = *curs->LEN(first_row);
299 info.m_RowSize = first_len;
300 if ( range.second > 1 ) {
301 // multi page
302 TVDBRowId last_row = first_row + range.second - 1;
303 info.m_RowLast = last_row;
304 TSeqPos last_start = *curs->START(last_row);
305 TSeqPos last_len = *curs->LEN(last_row);
306 info.m_SeqLength = last_start + last_len;
307 }
308 else {
309 // single page
310 info.m_RowLast = first_row;
311 TSeqPos first_start = *curs->START(first_row);
312 info.m_SeqLength = first_start + first_len;
313 }
314 Put(curs);
315 return info;
316 }
317 return SSeqInfo();
318 }
319
320
321 /////////////////////////////////////////////////////////////////////////////
322 // CVDBGraphSeqIterator
323 /////////////////////////////////////////////////////////////////////////////
324
CVDBGraphSeqIterator(const CVDBGraphDb & db)325 CVDBGraphSeqIterator::CVDBGraphSeqIterator(const CVDBGraphDb& db)
326 : m_Db(db)
327 {
328 m_Info = db.GetNCObject().GetSeqInfoAtRow(1);
329 }
330
331
CVDBGraphSeqIterator(const CVDBGraphDb & db,const CSeq_id_Handle & seq_id)332 CVDBGraphSeqIterator::CVDBGraphSeqIterator(const CVDBGraphDb& db,
333 const CSeq_id_Handle& seq_id)
334 : m_Db(db)
335 {
336 m_Info = db.GetNCObject().GetSeqInfo(seq_id);
337 }
338
339
operator ++(void)340 CVDBGraphSeqIterator& CVDBGraphSeqIterator::operator++(void)
341 {
342 m_Info = m_Db->GetSeqInfoAtRow(GetInfo().m_RowLast+1);
343 return *this;
344 }
345
346
GetInfo(void) const347 const CVDBGraphDb_Impl::SSeqInfo& CVDBGraphSeqIterator::GetInfo(void) const
348 {
349 if ( !*this ) {
350 NCBI_THROW(CSraException, eInvalidState,
351 "CVDBGraphSeqIterator is invalid");
352 }
353 return m_Info;
354 }
355
356
357 template<class DstVector, class SrcVector>
sx_Assign(DstVector & dst,const SrcVector & src)358 static void sx_Assign(DstVector& dst, const SrcVector& src)
359 {
360 dst.clear();
361 dst.reserve(src.size());
362 ITERATE ( typename SrcVector, it, src ) {
363 dst.push_back(*it);
364 }
365 }
366
367
368 CRef<CSeq_graph>
x_MakeGraph(const string & annot_name,CSeq_loc & loc,const SSeqInfo & info,const COpenRange<TSeqPos> & range,TSeqPos step,SGraphTableCursor & cursor,CVDBColumn & column,int level) const369 CVDBGraphSeqIterator::x_MakeGraph(const string& annot_name,
370 CSeq_loc& loc,
371 const SSeqInfo& info,
372 const COpenRange<TSeqPos>& range,
373 TSeqPos step,
374 SGraphTableCursor& cursor,
375 CVDBColumn& column,
376 int level) const
377 {
378 if ( !column ) {
379 return null;
380 }
381
382 CRef<CSeq_graph> graph(new CSeq_graph);
383 if ( !annot_name.empty() ) {
384 graph->SetTitle(annot_name);
385 }
386 graph->SetLoc(loc);
387 if ( level >= 0 ) {
388 graph->SetComment(NStr::IntToString(level)+"%");
389 }
390
391 typedef SGraphTableCursor::TGraphQ TValue;
392 const TValue kMinIntValue = kMin_I4;
393 const TValue kMaxIntValue = kMax_I4;
394 const TValue kMinByteValue = 0;
395 const TValue kMaxByteValue = kMax_UI1;
396
397 TValue max_v = numeric_limits<TValue>::min();
398 TValue min_v = numeric_limits<TValue>::max();
399 CInt_graph* int_graph = &graph->SetGraph().SetInt();
400 CReal_graph* real_graph = 0;
401 CInt_graph::TValues* int_vv = &int_graph->SetValues();
402 CReal_graph::TValues* real_vv = 0;
403 TSeqPos row_size = info.m_RowSize;
404 TSeqPos pos = range.GetFrom();
405 for ( TVDBRowId row = info.m_RowFirst + pos/row_size;
406 pos < range.GetToOpen();
407 ++row ) {
408 CVDBValueFor<TValue> values(cursor.m_Cursor, row, column);
409 for ( size_t index = pos%row_size/step;
410 index < values.size() && pos < range.GetToOpen();
411 ++index, pos += step ) {
412 TValue v = values[index];
413 bool switch_to_real = false;
414 if ( v < min_v ) {
415 min_v = v;
416 switch_to_real = v < kMinIntValue;
417 }
418 if ( v > max_v ) {
419 max_v = v;
420 switch_to_real = v > kMaxIntValue;
421 }
422 if ( switch_to_real && int_vv ) {
423 // switch to real graph
424 CRef<CInt_graph> save_int_graph(int_graph);
425 real_graph = &graph->SetGraph().SetReal();
426 int_graph = 0;
427 real_vv = &real_graph->SetValues();
428 sx_Assign(*real_vv, *int_vv);
429 int_vv = 0;
430 }
431 if ( int_vv ) {
432 int_vv->push_back(int(v));
433 }
434 else {
435 real_vv->push_back(double(v));
436 }
437 }
438 if ( pos < range.GetToOpen() &&
439 pos < (row-info.m_RowFirst+1)*row_size ) {
440 NCBI_THROW(CSraException, eNotFoundValue,
441 "CVDBGraphSeqIterator: graph data array is too short");
442 }
443 }
444 size_t numval = 0;
445 if ( min_v >= kMinByteValue && max_v <= kMaxByteValue ) {
446 // use smaller byte representation
447 numval = int_vv->size();
448 CRef<CByte_graph> byte_graph(new CByte_graph);
449 byte_graph->SetAxis(0);
450 byte_graph->SetMin(int(min_v));
451 byte_graph->SetMax(int(max_v));
452 sx_Assign(byte_graph->SetValues(), *int_vv);
453 graph->SetGraph().SetByte(*byte_graph);
454 int_graph = 0;
455 int_vv = 0;
456 }
457 else if ( real_graph ) {
458 // need bigger double representation
459 numval = real_vv->size();
460 real_graph->SetAxis(0);
461 real_graph->SetMin(double(min_v));
462 real_graph->SetMax(double(max_v));
463 }
464 else {
465 // int graph
466 numval = int_vv->size();
467 int_graph->SetAxis(0);
468 int_graph->SetMin(int(min_v));
469 int_graph->SetMax(int(max_v));
470 }
471 if ( step > 1 ) {
472 graph->SetComp(step);
473 }
474 uint32_t scale = cursor.SCALE(info.m_RowFirst);
475 if ( scale != 1 ) {
476 graph->SetA(1./scale);
477 }
478 if ( numval > size_t(kMax_Int) ) {
479 NCBI_THROW(CSraException, eDataError,
480 "CVDBGraphSeqIterator::x_MakeGraph: graph too big");
481 }
482 graph->SetNumval(int(numval));
483 return graph;
484 }
485
486
487 CRef<CSeq_table>
x_MakeTable(const string & annot_name,CSeq_loc & loc,const SSeqInfo & info,const COpenRange<TSeqPos> & range,SGraphTableCursor & cursor) const488 CVDBGraphSeqIterator::x_MakeTable(const string& annot_name,
489 CSeq_loc& loc,
490 const SSeqInfo& info,
491 const COpenRange<TSeqPos>& range,
492 SGraphTableCursor& cursor) const
493 {
494 TSeqPos size = range.GetLength();
495 TSeqPos row_size = info.m_RowSize;
496 typedef SGraphTableCursor::TGraphQ TValue;
497 const TValue kMinIntValue = kMin_I4;
498 const TValue kMaxIntValue = kMax_I4;
499
500 TValue min_v = 0, max_v = 0;
501 vector<TValue> vv;
502 vv.reserve(size);
503 TSeqPos pos = range.GetFrom();
504 TVDBRowId row = pos/row_size;
505 for ( ; pos < range.GetToOpen(); ++row, pos += row_size ) {
506 CVDBValueFor<TValue> vv_arr(cursor.GRAPH(info.m_RowFirst+row));
507 TSeqPos off = TSeqPos(pos - row*row_size);
508 TSeqPos cnt = min(row_size-off, range.GetToOpen()-pos);
509 for ( TSeqPos i = 0; i < cnt; ++i ) {
510 TValue v = vv_arr[off+i];
511 vv.push_back(v);
512 if ( v > max_v ) {
513 max_v = v;
514 }
515 if ( v < min_v ) {
516 min_v = v;
517 }
518 }
519 }
520
521 CRef<CSeq_table> table(new CSeq_table);
522 table->SetFeat_type(0);
523
524 { // Seq-table location
525 CRef<CSeqTable_column> col_id(new CSeqTable_column);
526 table->SetColumns().push_back(col_id);
527 col_id->SetHeader().SetField_name("Seq-table location");
528 col_id->SetDefault().SetLoc(loc);
529 }
530 { // Seq-id
531 CRef<CSeqTable_column> col_id(new CSeqTable_column);
532 table->SetColumns().push_back(col_id);
533 col_id->SetHeader().SetField_id(CSeqTable_column_info::eField_id_location_id);
534 col_id->SetDefault().SetId(loc.SetInt().SetId());
535 }
536
537 // position
538 CRef<CSeqTable_column> col_pos(new CSeqTable_column);
539 table->SetColumns().push_back(col_pos);
540 col_pos->SetHeader().SetField_id(CSeqTable_column_info::eField_id_location_from);
541 CSeqTable_multi_data::TInt& arr_pos = col_pos->SetData().SetInt();
542
543 // span
544 CRef<CSeqTable_column> col_span(new CSeqTable_column);
545 table->SetColumns().push_back(col_span);
546 col_span->SetHeader().SetField_name("span");
547 CSeqTable_multi_data::TInt& arr_span = col_span->SetData().SetInt();
548
549 CRef<CSeqTable_column> col_val(new CSeqTable_column);
550 table->SetColumns().push_back(col_val);
551 col_val->SetHeader().SetField_name("values");
552
553 TSeqPos cur_i = 0;
554 TValue cur_v = vv[0];
555 if ( min_v < kMinIntValue || max_v > kMaxIntValue ) {
556 CSeqTable_multi_data::TReal& arr_vv = col_val->SetData().SetReal();
557 for ( TSeqPos i = 0; i < size; ++i ) {
558 TValue v = vv[i];
559 if ( v != cur_v ) {
560 arr_pos.push_back(cur_i+range.GetFrom());
561 arr_span.push_back(i-cur_i);
562 arr_vv.push_back(double(cur_v));
563 cur_i = i;
564 cur_v = v;
565 }
566 }
567 arr_pos.push_back(cur_i+range.GetFrom());
568 arr_span.push_back(size-cur_i);
569 arr_vv.push_back(double(cur_v));
570 }
571 else {
572 CSeqTable_multi_data::TInt& arr_vv = col_val->SetData().SetInt();
573 for ( TSeqPos i = 0; i < size; ++i ) {
574 TValue v = vv[i];
575 if ( v != cur_v ) {
576 arr_pos.push_back(cur_i+range.GetFrom());
577 arr_span.push_back(i-cur_i);
578 arr_vv.push_back(int(cur_v));
579 cur_i = i;
580 cur_v = v;
581 }
582 }
583 arr_pos.push_back(cur_i+range.GetFrom());
584 arr_span.push_back(size-cur_i);
585 arr_vv.push_back(int(cur_v));
586 }
587
588 uint32_t scale = cursor.SCALE(info.m_RowFirst);
589 if ( scale != 1 ) {
590 CRef<CSeqTable_column> col_step(new CSeqTable_column);
591 table->SetColumns().push_back(col_step);
592 col_step->SetHeader().SetField_name("value_step");
593 col_step->SetDefault().SetReal(1./scale);
594 }
595 if ( arr_pos.size() > size_t(kMax_Int) ) {
596 NCBI_THROW(CSraException, eDataError,
597 "CVDBGraphSeqIterator::x_MakeTable: graph too big");
598 }
599 table->SetNum_rows(int(arr_pos.size()));
600 return table;
601 }
602
603
x_SeqTableIsSmaller(COpenRange<TSeqPos> range,SGraphTableCursor & cursor) const604 bool CVDBGraphSeqIterator::x_SeqTableIsSmaller(COpenRange<TSeqPos> range,
605 SGraphTableCursor& cursor) const
606 {
607 if ( !cursor.m_NUM_SWITCHES ) {
608 // no info about value changes
609 return false;
610 }
611 typedef SGraphTableCursor::TGraphV TValue;
612 const TValue kMinIntValue = kMin_I4;
613 const TValue kMaxIntValue = kMax_I4;
614 const TValue kMinByteValue = 0;
615 const TValue kMaxByteValue = kMax_UI1;
616 TValue min_v = 0, max_v = 0;
617 size_t values = 0;
618 uint64_t switches = 0;
619 TSeqPos pos = range.GetFrom();
620 const SSeqInfo& info = GetInfo();
621 TSeqPos row_size = info.m_RowSize;
622 TVDBRowId row = pos/row_size;
623 for ( ; pos < range.GetToOpen(); ++row, pos += row_size ) {
624 values += row_size;
625 switches += *cursor.NUM_SWITCHES(info.m_RowFirst+row);
626 TValue v = cursor.GR_Q100(info.m_RowFirst+row);
627 if ( v > max_v ) {
628 max_v = v;
629 }
630 if ( v < min_v ) {
631 min_v = v;
632 }
633 }
634 size_t table_value_size =
635 min_v < kMinIntValue || max_v > kMaxIntValue? sizeof(double): sizeof(int);
636 size_t graph_value_size =
637 min_v < kMinByteValue || max_v > kMaxByteValue? table_value_size: 1;
638 uint64_t table_size =
639 (table_value_size+2*sizeof(int))*switches; //+pos+span
640 size_t graph_size =
641 graph_value_size*values;
642 return table_size < graph_size;
643 }
644
645
SeqTableIsSmaller(COpenRange<TSeqPos> range) const646 bool CVDBGraphSeqIterator::SeqTableIsSmaller(COpenRange<TSeqPos> range) const
647 {
648 const SSeqInfo& info = GetInfo();
649 if ( range.GetToOpen() > info.m_SeqLength ) {
650 range.SetToOpen(info.m_SeqLength);
651 }
652 if ( range.Empty() ) {
653 return false;
654 }
655 CRef<SGraphTableCursor> curs(GetDb().Graph());
656 bool seq_table_is_smaller = x_SeqTableIsSmaller(range, *curs);
657 GetDb().Put(curs);
658 return seq_table_is_smaller;
659 }
660
661
662 CRef<CSeq_annot>
GetAnnot(COpenRange<TSeqPos> range0,const string & annot_name,TContentFlags content) const663 CVDBGraphSeqIterator::GetAnnot(COpenRange<TSeqPos> range0,
664 const string& annot_name,
665 TContentFlags content) const
666 {
667 const SSeqInfo& info = GetInfo();
668 if ( range0.GetToOpen() > info.m_SeqLength ) {
669 range0.SetToOpen(info.m_SeqLength);
670 }
671 if ( range0.Empty() ) {
672 return null;
673 }
674
675 CRef<CSeq_annot> annot(new CSeq_annot);
676
677 if ( !annot_name.empty() ) {
678 CRef<CAnnotdesc> desc(new CAnnotdesc);
679 desc->SetName(annot_name);
680 annot->SetDesc().Set().push_back(desc);
681 }
682
683 CRef<SGraphTableCursor> curs(GetDb().Graph());
684
685 if ( content & fGraphQAll ) {
686 if ( content & (fGraphZoomQAll|fGraphMain) ) {
687 NCBI_THROW(CSraException, eInvalidArg, "CVDBGraphSeqIterator: "
688 "several zoom tracks are requested");
689 }
690
691 TSeqPos step = info.m_RowSize;
692
693 COpenRange<TSeqPos> range = range0;
694 if ( TSeqPos adjust = range.GetFrom() % step ) {
695 range.SetFrom(range.GetFrom() - adjust);
696 }
697 if ( TSeqPos adjust = range.GetToOpen() % step ) {
698 range.SetToOpen(min(info.m_SeqLength,
699 range.GetToOpen() + (step - adjust)));
700 }
701
702 CRef<CSeq_loc> loc(new CSeq_loc);
703 loc->SetInt().SetId(*SerialClone(*info.m_Seq_id_Handle.GetSeqId()));
704 loc->SetInt().SetFrom(range.GetFrom());
705 loc->SetInt().SetTo(range.GetTo());
706
707 // describe statistics
708 {
709 CRef<CAnnotdesc> desc(new CAnnotdesc);
710 CUser_object& obj = desc->SetUser();
711 obj.SetType().SetStr("AnnotationTrack");
712 obj.AddField("ZoomLevel", int(step));
713 obj.AddField("StatisticsType", "Percentiles");
714 annot->SetDesc().Set().push_back(desc);
715 }
716
717 for ( TContentFlags f = fGraphQ0; f & fGraphQAll; f <<= 1 ) {
718 if ( !(content & f) ) {
719 continue;
720 }
721 CRef<CSeq_graph> graph;
722 switch ( f ) {
723 case fGraphQ0 :
724 graph = x_MakeGraph(annot_name, *loc, info, range,
725 step, *curs, curs->m_GR_Q0 , 0);
726 break;
727 case fGraphQ10 :
728 graph = x_MakeGraph(annot_name, *loc, info, range,
729 step, *curs, curs->m_GR_Q10 , 10);
730 break;
731 case fGraphQ50 :
732 graph = x_MakeGraph(annot_name, *loc, info, range,
733 step, *curs, curs->m_GR_Q50 , 50);
734 break;
735 case fGraphQ90 :
736 graph = x_MakeGraph(annot_name, *loc, info, range,
737 step, *curs, curs->m_GR_Q90 , 90);
738 break;
739 case fGraphQ100:
740 graph = x_MakeGraph(annot_name, *loc, info, range,
741 step, *curs, curs->m_GR_Q100, 100);
742 break;
743 default:
744 break;
745 }
746 if ( graph ) {
747 annot->SetData().SetGraph().push_back(graph);
748 }
749 }
750 }
751
752 if ( content & fGraphZoomQAll ) {
753 if ( content & (fGraphQAll|fGraphMain) ) {
754 NCBI_THROW(CSraException, eInvalidArg, "CVDBGraphSeqIterator: "
755 "several zoom tracks are requested");
756 }
757
758 TSeqPos step = 100;
759
760 COpenRange<TSeqPos> range = range0;
761 if ( TSeqPos adjust = range.GetFrom() % step ) {
762 range.SetFrom(range.GetFrom() - adjust);
763 }
764 if ( TSeqPos adjust = range.GetToOpen() % step ) {
765 range.SetToOpen(min(info.m_SeqLength,
766 range.GetToOpen() + (step - adjust)));
767 }
768
769 CRef<CSeq_loc> loc(new CSeq_loc);
770 loc->SetInt().SetId(*SerialClone(*info.m_Seq_id_Handle.GetSeqId()));
771 loc->SetInt().SetFrom(range.GetFrom());
772 loc->SetInt().SetTo(range.GetTo());
773
774 // describe statistics
775 {
776 CRef<CAnnotdesc> desc(new CAnnotdesc);
777 CUser_object& obj = desc->SetUser();
778 obj.SetType().SetStr("AnnotationTrack");
779 obj.AddField("ZoomLevel", int(step));
780 obj.AddField("StatisticsType", "Percentiles");
781 annot->SetDesc().Set().push_back(desc);
782 }
783
784 for ( TContentFlags f = fGraphZoomQ0; f & fGraphZoomQAll; f <<= 1 ) {
785 if ( !(content & f) ) {
786 continue;
787 }
788 CRef<CSeq_graph> graph;
789 switch ( f ) {
790 case fGraphZoomQ0 :
791 graph = x_MakeGraph(annot_name, *loc, info, range,
792 step, *curs, curs->m_GR_ZOOM_Q0 , 0);
793 break;
794 case fGraphZoomQ10 :
795 graph = x_MakeGraph(annot_name, *loc, info, range,
796 step, *curs, curs->m_GR_ZOOM_Q10 , 10);
797 break;
798 case fGraphZoomQ50 :
799 graph = x_MakeGraph(annot_name, *loc, info, range,
800 step, *curs, curs->m_GR_ZOOM_Q50 , 50);
801 break;
802 case fGraphZoomQ90 :
803 graph = x_MakeGraph(annot_name, *loc, info, range,
804 step, *curs, curs->m_GR_ZOOM_Q90 , 90);
805 break;
806 case fGraphZoomQ100:
807 graph = x_MakeGraph(annot_name, *loc, info, range,
808 step, *curs, curs->m_GR_ZOOM_Q100, 100);
809 break;
810 default:
811 break;
812 }
813 if ( graph ) {
814 annot->SetData().SetGraph().push_back(graph);
815 }
816 }
817 }
818
819 if ( content & fGraphMain ) {
820 COpenRange<TSeqPos> range = range0;
821
822 CRef<CSeq_loc> loc(new CSeq_loc);
823 loc->SetInt().SetId(*SerialClone(*info.m_Seq_id_Handle.GetSeqId()));
824 loc->SetInt().SetFrom(range.GetFrom());
825 loc->SetInt().SetTo(range.GetTo());
826
827 bool as_table = false;
828 if ( content & fGraphMainAsTable ) {
829 if ( content & fGraphMainAsGraph ) {
830 // select best
831 as_table = x_SeqTableIsSmaller(range, *curs);
832 }
833 else {
834 as_table = true;
835 }
836 }
837 if ( as_table ) {
838 {
839 CRef<CAnnotdesc> desc(new CAnnotdesc);
840 CUser_object& obj = desc->SetUser();
841 obj.SetType().SetStr("Track Data");
842 obj.AddField("track type", "graph");
843 annot->SetDesc().Set().push_back(desc);
844 }
845
846 CRef<CSeq_table> table = x_MakeTable(annot_name, *loc, info, range,
847 *curs);
848 annot->SetData().SetSeq_table(*table);
849 }
850 else {
851 CRef<CSeq_graph> graph = x_MakeGraph(annot_name, *loc, info, range,
852 1, *curs, curs->m_GRAPH, -1);
853 annot->SetData().SetGraph().push_back(graph);
854 }
855 }
856
857 GetDb().Put(curs);
858
859 if ( !annot->IsSetData() ) {
860 return null;
861 }
862
863 return annot;
864 }
865
866
867 END_NAMESPACE(objects);
868 END_NCBI_NAMESPACE;
869