1 /* $Id: bed_reader.cpp 632526 2021-06-02 17:25:01Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Frank Ludwig
27 *
28 * File Description:
29 * BED file reader
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35
36 #include <objects/general/Object_id.hpp>
37 #include <objects/general/User_object.hpp>
38 #include <objects/seqloc/Seq_point.hpp>
39 #include <objects/seqfeat/SeqFeatXref.hpp>
40 #include <objects/seq/Annotdesc.hpp>
41 #include <objects/seq/Annot_descr.hpp>
42 #include <objects/seqfeat/SeqFeatData.hpp>
43 #include <objects/seqfeat/Seq_feat.hpp>
44 #include <objects/seqfeat/Feat_id.hpp>
45
46 #include <objtools/readers/bed_reader.hpp>
47 #include "bed_autosql.hpp"
48 #include "reader_message_handler.hpp"
49 #include "bed_column_data.hpp"
50
51 #include <algorithm>
52 #include <deque>
53
54 BEGIN_NCBI_SCOPE
55 BEGIN_objects_SCOPE // namespace ncbi::objects::
56
57 // ============================================================================
58 class CLinePreBuffer
59 // ============================================================================
60 {
61 public:
62 using LinePreIt = deque<string>::const_iterator;
63
CLinePreBuffer(ILineReader & lineReader)64 CLinePreBuffer(
65 ILineReader& lineReader):
66 mLineReader(lineReader),
67 mLineNumber(0)
68 {};
69
~CLinePreBuffer()70 virtual ~CLinePreBuffer() {};
71
FillBuffer(size_t numLines)72 bool FillBuffer(
73 size_t numLines)
74 {
75 string line;
76 while (numLines && !mLineReader.AtEOF()) {
77 line = *++mLineReader;
78 CLinePreBuffer::StripSpaceCharsInPlace(line);
79 mBuffer.push_back(line);
80 if (!IsCommentLine(line)) {
81 --numLines;
82 }
83 }
84 return true;
85 }
86
IsCommentLine(const CTempString & line)87 virtual bool IsCommentLine(
88 const CTempString& line)
89 {
90 if (NStr::StartsWith(line, "#")) {
91 return true;
92 }
93 if (NStr::IsBlank(line)) {
94 return true;
95 }
96 return false;
97 };
98
GetLine(string & line)99 bool GetLine(
100 string& line)
101 {
102 while (!mBuffer.empty() || !mLineReader.AtEOF()) {
103 string temp;
104 if (!mBuffer.empty()) {
105 temp = mBuffer.front();
106 mBuffer.pop_front();
107 }
108 else {
109 temp = *++mLineReader;
110 CLinePreBuffer::StripSpaceCharsInPlace(temp);
111 }
112 if (!IsCommentLine(temp)) {
113 line = temp;
114 ++mLineNumber;
115 return true;
116 }
117 }
118 return false;
119 };
120
UngetLine(const string & line)121 bool UngetLine(
122 const string& line)
123 {
124 mBuffer.push_front(line);
125 --mLineNumber;
126 return true;
127 }
128
LineNumber() const129 int LineNumber() const
130 {
131 return mLineNumber;
132 };
133
begin()134 LinePreIt begin()
135 {
136 return mBuffer.begin();
137 };
138
end()139 LinePreIt end()
140 {
141 return mBuffer.end();
142 };
143
144 void
AssignReader(ILineReader & lineReader)145 AssignReader(
146 ILineReader& lineReader) {
147 if (&mLineReader != &lineReader) {
148 mLineReader = lineReader;
149 mBuffer.clear();
150 mLineNumber = 0;
151 }
152 };
153
154 static void
StripSpaceCharsInPlace(string & str)155 StripSpaceCharsInPlace(
156 string& str)
157 {
158 if (str.empty()) {
159 return;
160 }
161 auto newFirst = 0;
162 while (str[newFirst] == ' ') {
163 ++newFirst;
164 }
165 auto newLast = str.length() - 1;
166 while (str[newLast] == ' ') {
167 --newLast;
168 }
169 str = str.substr(newFirst, newLast - newFirst + 1);
170 };
171
172 protected:
173 ILineReader& mLineReader;
174 deque<string> mBuffer;
175 int mLineNumber;
176 };
177
178
179 // ----------------------------------------------------------------------------
180 void
SetInterval(CSeq_id & id,unsigned int start,unsigned int stop,ENa_strand strand)181 CRawBedRecord::SetInterval(
182 // ----------------------------------------------------------------------------
183 CSeq_id& id,
184 unsigned int start,
185 unsigned int stop,
186 ENa_strand strand)
187 {
188 m_pInterval.Reset(new CSeq_interval());
189 m_pInterval->SetId(id);
190 m_pInterval->SetFrom(start);
191 m_pInterval->SetTo(stop-1);
192 m_pInterval->SetStrand(strand);
193 };
194
195 // ----------------------------------------------------------------------------
196 void
SetScore(unsigned int score)197 CRawBedRecord::SetScore(
198 unsigned int score)
199 // ----------------------------------------------------------------------------
200 {
201 m_score = score;
202 };
203
204 // ----------------------------------------------------------------------------
205 void
Dump(CNcbiOstream & ostr) const206 CRawBedRecord::Dump(
207 CNcbiOstream& ostr) const
208 // ----------------------------------------------------------------------------
209 {
210 ostr << " [CRawBedRecord" << endl;
211 ostr << "id=\"" << m_pInterval->GetId().AsFastaString() << "\" ";
212 ostr << "start=" << m_pInterval->GetFrom() << " ";
213 ostr << "stop=" << m_pInterval->GetTo() << " ";
214 ostr << "strand=" <<
215 (m_pInterval->GetStrand() == eNa_strand_minus ? "-" : "+") << " ";
216 if (m_score >= 0) {
217 ostr << "score=" << m_score << " ";
218 }
219 ostr << "]" << endl;
220 };
221
222 // ----------------------------------------------------------------------------
223 void
Dump(CNcbiOstream & ostr) const224 CRawBedTrack::Dump(
225 CNcbiOstream& ostr) const
226 // ----------------------------------------------------------------------------
227 {
228 ostr << "[CRawBedTrack" << endl;
229 for (vector<CRawBedRecord>::const_iterator it = m_Records.begin();
230 it != m_Records.end(); ++it) {
231 it->Dump(ostr);
232 }
233 ostr << "]" << std::endl;
234 }
235
236 // ----------------------------------------------------------------------------
CBedReader(int flags,const string & annotName,const string & annotTitle,CReaderListener * pRL)237 CBedReader::CBedReader(
238 int flags,
239 const string& annotName,
240 const string& annotTitle,
241 CReaderListener* pRL ) :
242 // ----------------------------------------------------------------------------
243 CReaderBase(flags, annotName, annotTitle, CReadUtil::AsSeqId, pRL),
244 m_currentId(""),
245 mColumnSeparator(""),
246 mColumnSplitFlags(0),
247 mRealColumnCount(0),
248 mValidColumnCount(0),
249 mAssumeErrorsAreRecordLevel(true),
250 m_CurrentFeatureCount(0),
251 m_usescore(false),
252 m_CurBatchSize(0),
253 m_MaxBatchSize(10000),
254 mLinePreBuffer(nullptr),
255 mpAutoSql(new CBedAutoSql(flags))
256 {
257 }
258
259 // ----------------------------------------------------------------------------
~CBedReader()260 CBedReader::~CBedReader()
261 // ----------------------------------------------------------------------------
262 {
263 }
264
265 // ----------------------------------------------------------------------------
266 CRef< CSeq_annot >
ReadSeqAnnot(ILineReader & lineReader,ILineErrorListener * pEC)267 CBedReader::ReadSeqAnnot(
268 ILineReader& lineReader,
269 ILineErrorListener* pEC )
270 // ----------------------------------------------------------------------------
271 {
272 m_CurrentFeatureCount = 0;
273 return CReaderBase::ReadSeqAnnot(lineReader, pEC);
274 }
275
276 // ----------------------------------------------------------------------------
277 bool
SetAutoSql(const string & fileName)278 CBedReader::SetAutoSql(
279 const string& fileName)
280 // ----------------------------------------------------------------------------
281 {
282 CNcbiIfstream istr;
283 try {
284 auto origExceptions = istr.exceptions();
285 istr.exceptions(std::istream::failbit);
286 istr.open(fileName);
287 istr.exceptions(origExceptions);
288 }
289 catch (CException& e) {
290 cerr << e.GetMsg() << endl;
291 return false;
292 }
293 m_iFlags |= CBedReader::fAutoSql;
294 return SetAutoSql(istr);
295 }
296
297 // ----------------------------------------------------------------------------
298 bool
SetAutoSql(CNcbiIstream & istr)299 CBedReader::SetAutoSql(
300 CNcbiIstream& istr)
301 // ----------------------------------------------------------------------------
302 {
303 return mpAutoSql->Load(istr, *m_pMessageHandler);
304 }
305
306 // ----------------------------------------------------------------------------
307 CRef<CSeq_annot>
xCreateSeqAnnot()308 CBedReader::xCreateSeqAnnot()
309 // ----------------------------------------------------------------------------
310 {
311 CRef<CSeq_annot> pAnnot(new CSeq_annot);
312 if (!m_AnnotName.empty()) {
313 pAnnot->SetNameDesc(m_AnnotName);
314 }
315 if (!m_AnnotTitle.empty()) {
316 pAnnot->SetTitleDesc(m_AnnotTitle);
317 }
318 CRef<CAnnot_descr> pDescr(new CAnnot_descr);
319 pAnnot->SetDesc(*pDescr);
320 return pAnnot;
321 }
322
323 // ----------------------------------------------------------------------------
324 void
xGetData(ILineReader & lr,TReaderData & readerData)325 CBedReader::xGetData(
326 ILineReader& lr,
327 TReaderData& readerData)
328 // ----------------------------------------------------------------------------
329 {
330 if (!mLinePreBuffer) {
331 mLinePreBuffer.reset(new CLinePreBuffer(lr));
332 }
333 if (mRealColumnCount == 0) {
334 xDetermineLikelyColumnCount(*mLinePreBuffer, nullptr);
335 }
336
337 readerData.clear();
338 string line;
339 if (!mLinePreBuffer->GetLine(line)) {
340 return;
341 }
342 bool isBrowserLine = NStr::StartsWith(line, "browser ");
343 bool isTrackLine = NStr::StartsWith(line, "track ");
344 if (xIsTrackLine(line) && m_uDataCount != 0) {
345 mLinePreBuffer->UngetLine(line);
346 return;
347 }
348 m_uLineNumber = mLinePreBuffer->LineNumber();
349 readerData.push_back(TReaderLine{m_uLineNumber, line});
350 if (!isBrowserLine && !isTrackLine) {
351 ++m_uDataCount;
352 }
353 }
354
355 // ----------------------------------------------------------------------------
356 void
xProcessData(const TReaderData & readerData,CSeq_annot & annot)357 CBedReader::xProcessData(
358 const TReaderData& readerData,
359 CSeq_annot& annot)
360 // ----------------------------------------------------------------------------
361 {
362 for (const auto& lineData: readerData) {
363 string line = lineData.mData;
364 if (xParseTrackLine(line)) {
365 return;
366 }
367 if (xParseBrowserLine(line, annot)) {
368 return;
369 }
370 xParseFeature(lineData, annot, nullptr);
371 ++m_CurrentFeatureCount;
372 }
373 }
374
375 // ----------------------------------------------------------------------------
xDetermineLikelyColumnCount(CLinePreBuffer & preBuffer,ILineErrorListener * pEc)376 bool CBedReader::xDetermineLikelyColumnCount(
377 CLinePreBuffer& preBuffer,
378 ILineErrorListener* pEc)
379 // ----------------------------------------------------------------------------
380 {
381 if (this->m_iFlags & fAutoSql) {
382 mValidColumnCount = mRealColumnCount = mpAutoSql->ColumnCount();;
383 return true;
384 }
385
386 using LineIt = CLinePreBuffer::LinePreIt;
387 int bufferLineNumber = 0;
388 CReaderMessage fatalColumns(
389 eDiag_Fatal,
390 0,
391 "Bad data line: Inconsistent column count.");
392
393 CReaderMessage fatalChroms(
394 eDiag_Fatal,
395 0,
396 "Bad data line: Invalid chrom boundaries.");
397
398 const size_t MIN_SAMPLE_SIZE = 50;
399 preBuffer.FillBuffer(MIN_SAMPLE_SIZE);
400
401 mRealColumnCount = mValidColumnCount = 0;
402 vector<string>::size_type realColumnCount = 0;
403 vector<string>::size_type validColumnCount = 0;
404 for (LineIt lineIt = preBuffer.begin(); lineIt != preBuffer.end(); ++lineIt) {
405 bufferLineNumber++;
406 const auto& line = *lineIt;
407 if (preBuffer.IsCommentLine(line)) {
408 continue;
409 }
410 if (this->xIsTrackLine(line)) {
411 continue;
412 }
413 if (this->xIsBrowserLine(line)) {
414 continue;
415 }
416
417 CBedColumnData columnData(SReaderLine(bufferLineNumber, line));
418 if (realColumnCount == 0 ) {
419 realColumnCount = columnData.ColumnCount();
420 }
421 if (realColumnCount != columnData.ColumnCount()) {
422 fatalColumns.SetLineNumber(bufferLineNumber);
423 throw(fatalColumns);
424 }
425
426 if (validColumnCount == 0) {
427 validColumnCount = realColumnCount;
428 if (validColumnCount > 12) {
429 validColumnCount = 12;
430 }
431 }
432 unsigned long chromStart = 0, chromEnd = 0;
433 try {
434 chromStart = NStr::StringToULong(columnData[1]);
435 chromEnd = NStr::StringToULong(columnData[2]);
436 }
437 catch (CException&) {
438 fatalChroms.SetLineNumber(bufferLineNumber);
439 throw(fatalChroms);
440 }
441 if (validColumnCount >= 7) {
442 try {
443 auto thickStart = NStr::StringToULong(columnData[6]);
444 if (thickStart < chromStart || chromEnd < thickStart) {
445 validColumnCount = 6;
446 }
447 }
448 catch(CException&) {
449 validColumnCount = 6;
450 }
451 }
452 if (validColumnCount >= 8) {
453 try {
454 auto thickEnd = NStr::StringToULong(columnData[7]);
455 if (thickEnd < chromStart || chromEnd < thickEnd) {
456 validColumnCount = 6;
457 }
458 }
459 catch(CException&) {
460 validColumnCount = 6;
461 }
462 }
463
464 int blockCount;
465 if (validColumnCount >= 10) {
466 try {
467 blockCount = NStr::StringToInt(
468 columnData[9], NStr::fDS_ProhibitFractions);
469 if (blockCount < 1) {
470 validColumnCount = 9;
471 }
472 }
473 catch(CException&) {
474 validColumnCount = 9;
475 }
476 }
477 if (validColumnCount >= 11) {
478 vector<string> blockSizes;
479 auto col10 = columnData[10];
480 if (NStr::EndsWith(col10, ",")) {
481 col10 = col10.substr(0, col10.size()-1);
482 }
483 NStr::Split(col10, ",", blockSizes, NStr::fSplit_MergeDelimiters);
484 if (blockSizes.size() != blockCount) {
485 validColumnCount = 9;
486 }
487 else {
488 try {
489 for (auto blockSize: blockSizes) {
490 NStr::StringToULong(blockSize);
491 }
492 }
493 catch(CException&) {
494 validColumnCount = 9;
495 }
496 }
497 }
498 if (validColumnCount >= 12) {
499 vector<string> blockStarts;
500 auto col11 = columnData[11];
501 if (NStr::EndsWith(col11, ",")) {
502 col11 = col11.substr(0, col11.size()-1);
503 }
504 NStr::Split(col11, ",", blockStarts, NStr::fSplit_MergeDelimiters);
505 if (blockStarts.size() != blockCount) {
506 validColumnCount = 9;
507 }
508 else {
509 try {
510 for (auto blockStart: blockStarts) {
511 NStr::StringToULong(blockStart);
512 }
513 }
514 catch(CException&) {
515 validColumnCount = 9;
516 }
517 }
518 }
519 }
520 mRealColumnCount = realColumnCount;
521 mValidColumnCount = validColumnCount;
522 mAssumeErrorsAreRecordLevel = (
523 validColumnCount == realColumnCount &&
524 validColumnCount != 7 &&
525 validColumnCount != 10 &&
526 validColumnCount != 11);
527
528 return true;
529 }
530
531 // ----------------------------------------------------------------------------
xPostProcessAnnot(CSeq_annot & annot)532 void CBedReader::xPostProcessAnnot(
533 CSeq_annot& annot)
534 // ----------------------------------------------------------------------------
535 {
536 xAddConversionInfo(annot, nullptr);
537 xAssignTrackData(annot);
538 xAssignBedColumnCount(annot);
539 }
540
541 // ----------------------------------------------------------------------------
542 bool
xParseTrackLine(const string & strLine)543 CBedReader::xParseTrackLine(
544 const string& strLine)
545 // ----------------------------------------------------------------------------
546 {
547 CReaderMessage warning(
548 eDiag_Warning,
549 m_uLineNumber,
550 "Bad track line: Expected \"track key1=value1 key2=value2 ...\". Ignored.");
551
552 if ( ! NStr::StartsWith( strLine, "track" ) ) {
553 return false;
554 }
555 vector<string> parts;
556 CReadUtil::Tokenize( strLine, " \t", parts );
557 if (parts.size() >= 3) {
558 const string digits("0123456789");
559 bool col2_is_numeric =
560 (string::npos == parts[1].find_first_not_of(digits));
561 bool col3_is_numeric =
562 (string::npos == parts[2].find_first_not_of(digits));
563 if (col2_is_numeric && col3_is_numeric) {
564 return false;
565 }
566 }
567 m_currentId.clear();
568 if (!CReaderBase::xParseTrackLine(strLine)) {
569 m_pMessageHandler->Report(warning);
570 }
571 return true;
572 }
573
574 // ----------------------------------------------------------------------------
575 bool
xParseFeature(const SReaderLine & lineData,CSeq_annot & annot,ILineErrorListener * pEC)576 CBedReader::xParseFeature(
577 const SReaderLine& lineData,
578 CSeq_annot& annot,
579 ILineErrorListener* pEC)
580 // ----------------------------------------------------------------------------
581 {
582 CBedColumnData columnData(lineData);
583 if (columnData.ColumnCount()!= mRealColumnCount) {
584 CReaderMessage error(
585 eDiag_Error,
586 m_uLineNumber,
587 "Bad data line: Inconsistent column count.");
588 throw error;
589 }
590
591 if (m_iFlags & CBedReader::fThreeFeatFormat) {
592 return xParseFeatureThreeFeatFormat(columnData, annot, pEC);
593 }
594 else if (m_iFlags & CBedReader::fDirectedFeatureModel) {
595 return xParseFeatureGeneModelFormat(columnData, annot, pEC);
596 }
597 else if (m_iFlags & CBedReader::fAutoSql) {
598 return xParseFeatureAutoSql(columnData, annot, pEC);
599 }
600 else {
601 return xParseFeatureUserFormat(columnData, annot, pEC);
602 }
603 return false;
604 }
605
606 // ----------------------------------------------------------------------------
xParseFeatureThreeFeatFormat(const CBedColumnData & columnData,CSeq_annot & annot,ILineErrorListener * pEC)607 bool CBedReader::xParseFeatureThreeFeatFormat(
608 const CBedColumnData& columnData,
609 CSeq_annot& annot,
610 ILineErrorListener* pEC)
611 // ----------------------------------------------------------------------------
612 {
613 unsigned int baseId = 3*m_CurrentFeatureCount;
614
615 if (!xAppendFeatureChrom(columnData, annot, baseId, pEC)) {
616 return false;
617 }
618 if (xContainsThickFeature(columnData) &&
619 !xAppendFeatureThick(columnData, annot, baseId, pEC)) {
620 return false;
621 }
622 if (xContainsBlockFeature(columnData) &&
623 !xAppendFeatureBlock(columnData, annot, baseId, pEC)) {
624 return false;
625 }
626 return true;
627 }
628
629 // ----------------------------------------------------------------------------
xParseFeatureGeneModelFormat(const CBedColumnData & columnData,CSeq_annot & annot,ILineErrorListener * pEC)630 bool CBedReader::xParseFeatureGeneModelFormat(
631 const CBedColumnData& columnData,
632 CSeq_annot& annot,
633 ILineErrorListener* pEC)
634 // ----------------------------------------------------------------------------
635 {
636 unsigned int baseId = 3*m_CurrentFeatureCount;
637
638 CRef<CSeq_feat> pGene = xAppendFeatureGene(columnData, annot, baseId, pEC);
639 if (!pGene) {
640 return false;
641 }
642
643 CRef<CSeq_feat> pRna;
644 if (xContainsRnaFeature(columnData)) {//blocks
645 pRna = xAppendFeatureRna(columnData, annot, baseId, pEC);
646 if (!pRna) {
647 return false;
648 }
649 }
650
651 CRef<CSeq_feat> pCds;
652 if (xContainsCdsFeature(columnData)) {//thick
653 pCds = xAppendFeatureCds(columnData, annot, baseId, pEC);
654 if (!pCds) {
655 return false;
656 }
657 }
658
659 if (pRna && pCds) {
660 CRef<CSeq_loc> pRnaLoc(new CSeq_loc);
661 CRef<CSeq_loc> pClippedLoc = pRna->GetLocation().Intersect(pCds->GetLocation(), 0, 0);
662 pCds->SetLocation(*pClippedLoc);
663 }
664 return true;
665 }
666
667 // ----------------------------------------------------------------------------
xAppendFeatureChrom(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)668 bool CBedReader::xAppendFeatureChrom(
669 const CBedColumnData& columnData,
670 CSeq_annot& annot,
671 unsigned int baseId,
672 ILineErrorListener* pEC)
673 // ----------------------------------------------------------------------------
674 {
675 CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
676 CRef<CSeq_feat> feature;
677 feature.Reset(new CSeq_feat);
678
679 xSetFeatureLocationChrom(feature, columnData);
680 xSetFeatureIdsChrom(feature, columnData, baseId);
681 xSetFeatureBedData(feature, columnData, pEC);
682
683 ftable.push_back(feature);
684 m_currentId = columnData[0];
685 return true;
686 }
687
688 // ----------------------------------------------------------------------------
xAppendFeatureGene(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)689 CRef<CSeq_feat> CBedReader::xAppendFeatureGene(
690 const CBedColumnData& columnData,
691 CSeq_annot& annot,
692 unsigned int baseId,
693 ILineErrorListener* pEC)
694 // ----------------------------------------------------------------------------
695 {
696 CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
697 CRef<CSeq_feat> feature;
698 feature.Reset(new CSeq_feat);
699
700 xSetFeatureLocationGene(feature, columnData);
701 xSetFeatureIdsGene(feature, columnData, baseId);
702 xSetFeatureBedData(feature, columnData, pEC);
703
704 ftable.push_back(feature);
705 m_currentId = columnData[0];
706 return feature;
707 }
708
709 // ----------------------------------------------------------------------------
xAppendFeatureThick(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)710 bool CBedReader::xAppendFeatureThick(
711 const CBedColumnData& columnData,
712 CSeq_annot& annot,
713 unsigned int baseId,
714 ILineErrorListener* pEC)
715 // ----------------------------------------------------------------------------
716 {
717 CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
718 CRef<CSeq_feat> feature;
719 feature.Reset(new CSeq_feat);
720
721 xSetFeatureLocationThick(feature, columnData);
722 xSetFeatureIdsThick(feature, columnData, baseId);
723 xSetFeatureBedData(feature, columnData, pEC);
724
725 ftable.push_back(feature);
726 return true;
727 }
728
729 // ----------------------------------------------------------------------------
xAppendFeatureCds(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)730 CRef<CSeq_feat> CBedReader::xAppendFeatureCds(
731 const CBedColumnData& columnData,
732 CSeq_annot& annot,
733 unsigned int baseId,
734 ILineErrorListener* pEC)
735 // ----------------------------------------------------------------------------
736 {
737 CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
738 CRef<CSeq_feat> feature;
739 feature.Reset(new CSeq_feat);
740
741 xSetFeatureLocationCds(feature, columnData);
742 xSetFeatureIdsCds(feature, columnData, baseId);
743 xSetFeatureBedData(feature, columnData, pEC);
744
745 ftable.push_back(feature);
746 return feature;
747 }
748
749 // ----------------------------------------------------------------------------
xAppendFeatureBlock(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)750 bool CBedReader::xAppendFeatureBlock(
751 const CBedColumnData& columnData,
752 CSeq_annot& annot,
753 unsigned int baseId,
754 ILineErrorListener* pEC)
755 // ----------------------------------------------------------------------------
756 {
757 CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
758 CRef<CSeq_feat> feature;
759 feature.Reset(new CSeq_feat);
760
761 xSetFeatureLocationBlock(feature, columnData);
762 xSetFeatureIdsBlock(feature, columnData, baseId);
763 xSetFeatureBedData(feature, columnData, pEC);
764
765 ftable.push_back(feature);
766 return true;
767 }
768
769 // ----------------------------------------------------------------------------
xAppendFeatureRna(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)770 CRef<CSeq_feat> CBedReader::xAppendFeatureRna(
771 const CBedColumnData& columnData,
772 CSeq_annot& annot,
773 unsigned int baseId,
774 ILineErrorListener* pEC)
775 // ----------------------------------------------------------------------------
776 {
777 CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
778 CRef<CSeq_feat> feature;
779 feature.Reset(new CSeq_feat);
780
781 xSetFeatureLocationRna(feature, columnData);
782 xSetFeatureIdsRna(feature, columnData, baseId);
783 xSetFeatureBedData(feature, columnData, pEC);
784
785 ftable.push_back(feature);
786 return feature;
787 }
788
789
790 // ----------------------------------------------------------------------------
xParseFeatureUserFormat(const CBedColumnData & columnData,CSeq_annot & annot,ILineErrorListener * pEC)791 bool CBedReader::xParseFeatureUserFormat(
792 const CBedColumnData& columnData,
793 CSeq_annot& annot,
794 ILineErrorListener* pEC)
795 // ----------------------------------------------------------------------------
796 {
797 // assign
798 CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
799 CRef<CSeq_feat> feature;
800 feature.Reset( new CSeq_feat );
801
802 xSetFeatureTitle(feature, columnData);
803 xSetFeatureLocation(feature, columnData);
804 xSetFeatureDisplayData(feature, columnData);
805
806 ftable.push_back( feature );
807 m_currentId = columnData[0];
808 return true;
809 }
810
811 // ----------------------------------------------------------------------------
xParseFeatureAutoSql(const CBedColumnData & columnData,CSeq_annot & annot,ILineErrorListener * pEC)812 bool CBedReader::xParseFeatureAutoSql(
813 const CBedColumnData& columnData,
814 CSeq_annot& annot,
815 ILineErrorListener* pEC)
816 // ----------------------------------------------------------------------------
817 {
818 CRef<CSeq_feat> pFeat(new CSeq_feat);;
819 if (!mpAutoSql->ReadSeqFeat(columnData, *pFeat, *m_pMessageHandler)) {
820 return false;
821 }
822 CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
823 ftable.push_back(pFeat);
824 m_currentId = columnData[0];
825 return true;
826 }
827
828
829 // ----------------------------------------------------------------------------
xSetFeatureDisplayData(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)830 void CBedReader::xSetFeatureDisplayData(
831 CRef<CSeq_feat>& feature,
832 const CBedColumnData& columnData)
833 // ----------------------------------------------------------------------------
834 {
835 CRef<CUser_object> display_data( new CUser_object );
836 display_data->SetType().SetStr( "Display Data" );
837 if (mValidColumnCount >= 4) {
838 display_data->AddField( "name", columnData[3] );
839 }
840 else {
841 display_data->AddField( "name", string("") );
842 feature->SetData().SetUser( *display_data );
843 return;
844 }
845 if (mValidColumnCount >= 5) {
846 if ( !m_usescore ) {
847 display_data->AddField(
848 "score",
849 NStr::StringToInt(columnData[4],
850 NStr::fConvErr_NoThrow|NStr::fAllowTrailingSymbols) );
851 }
852 else {
853 display_data->AddField(
854 "greylevel",
855 NStr::StringToInt(columnData[4],
856 NStr::fConvErr_NoThrow|NStr::fAllowTrailingSymbols) );
857 }
858 }
859 if (mValidColumnCount >= 7) {
860 display_data->AddField(
861 "thickStart",
862 NStr::StringToInt(columnData[6], NStr::fDS_ProhibitFractions) );
863 }
864 if (mValidColumnCount >= 8) {
865 display_data->AddField(
866 "thickEnd",
867 NStr::StringToInt(columnData[7], NStr::fDS_ProhibitFractions) - 1 );
868 }
869 if (mValidColumnCount >= 9) {
870 display_data->AddField(
871 "itemRGB",
872 columnData[8]);
873 }
874 if (mValidColumnCount >= 10) {
875 display_data->AddField(
876 "blockCount",
877 NStr::StringToInt(columnData[9], NStr::fDS_ProhibitFractions) );
878 }
879 if (mValidColumnCount >= 11) {
880 display_data->AddField( "blockSizes", columnData[10] );
881 }
882 if (mValidColumnCount >= 12) {
883 display_data->AddField( "blockStarts", columnData[11] );
884 }
885 feature->SetData().SetUser( *display_data );
886 }
887
888 // ----------------------------------------------------------------------------
xSetFeatureLocationChrom(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)889 void CBedReader::xSetFeatureLocationChrom(
890 CRef<CSeq_feat>& feature,
891 const CBedColumnData& columnData)
892 // ----------------------------------------------------------------------------
893 {
894 xSetFeatureLocation(feature, columnData);
895
896 CRef<CUser_object> pBed(new CUser_object());
897 pBed->SetType().SetStr("BED");
898 pBed->AddField("location", "chrom");
899 CSeq_feat::TExts& exts = feature->SetExts();
900 exts.push_back(pBed);
901 }
902
903 // ----------------------------------------------------------------------------
xSetFeatureLocationGene(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)904 void CBedReader::xSetFeatureLocationGene(
905 CRef<CSeq_feat>& feature,
906 const CBedColumnData& columnData)
907 // ----------------------------------------------------------------------------
908 {
909 xSetFeatureLocation(feature, columnData);
910
911 CRef<CUser_object> pBed(new CUser_object());
912 pBed->SetType().SetStr("BED");
913 pBed->AddField("location", "chrom");
914 CSeq_feat::TExts& exts = feature->SetExts();
915 exts.push_back(pBed);
916 }
917
918 // ----------------------------------------------------------------------------
xSetFeatureLocationThick(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)919 void CBedReader::xSetFeatureLocationThick(
920 CRef<CSeq_feat>& feature,
921 const CBedColumnData& columnData)
922 // ----------------------------------------------------------------------------
923 {
924 CRef<CSeq_loc> location(new CSeq_loc);
925 int from, to;
926 from = to = -1;
927
928 //already established: We got at least three columns
929 try {
930 from = NStr::StringToInt(columnData[6]);
931 }
932 catch (std::exception&) {
933 CReaderMessage error(
934 eDiag_Error,
935 m_uLineNumber,
936 "Invalid data line: Bad \"ThickStart\" value.");
937 throw error;
938 }
939 try {
940 to = NStr::StringToInt(columnData[7]) - 1;
941 }
942 catch (std::exception&) {
943 CReaderMessage error(
944 eDiag_Error,
945 m_uLineNumber,
946 "Invalid data line: Bad \"ThickStop\" value.");
947 throw error;
948 }
949 if (from == to) {
950 location->SetPnt().SetPoint(from);
951 }
952 else if (from < to) {
953 location->SetInt().SetFrom(from);
954 location->SetInt().SetTo(to);
955 }
956 else if (from > to) {
957 //below: flip commenting to switch from null locations to impossible
958 // intervals
959 //location->SetInt().SetFrom(from);
960 //location->SetInt().SetTo(to);
961 location->SetNull();
962 }
963
964 if (!location->IsNull()) {
965 location->SetStrand(xGetStrand(columnData));
966 }
967 CRef<CSeq_id> id = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
968 location->SetId(*id);
969 feature->SetLocation(*location);
970
971 CRef<CUser_object> pBed(new CUser_object());
972 pBed->SetType().SetStr("BED");
973 pBed->AddField("location", "thick");
974 CSeq_feat::TExts& exts = feature->SetExts();
975 exts.push_back(pBed);
976 }
977
978 // ----------------------------------------------------------------------------
xSetFeatureLocationCds(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)979 void CBedReader::xSetFeatureLocationCds(
980 CRef<CSeq_feat>& feature,
981 const CBedColumnData& columnData)
982 // ----------------------------------------------------------------------------
983 {
984 CRef<CSeq_loc> location(new CSeq_loc);
985 int from, to;
986 from = to = -1;
987
988 //already established: We got at least three columns
989 try {
990 from = NStr::StringToInt(columnData[6]);
991 }
992 catch (std::exception&) {
993 CReaderMessage error(
994 eDiag_Error,
995 m_uLineNumber,
996 "Invalid data line: Bad \"ThickStart\" value.");
997 throw error;
998 }
999 try {
1000 to = NStr::StringToInt(columnData[7]) - 1;
1001 }
1002 catch (std::exception&) {
1003 CReaderMessage error(
1004 eDiag_Error,
1005 m_uLineNumber,
1006 "Invalid data line: Bad \"ThickStop\" value.");
1007 throw error;
1008 }
1009 if (from == to) {
1010 location->SetPnt().SetPoint(from);
1011 }
1012 else if (from < to) {
1013 location->SetInt().SetFrom(from);
1014 location->SetInt().SetTo(to);
1015 }
1016 else if (from > to) {
1017 //below: flip commenting to switch from null locations to impossible
1018 // intervals
1019 //location->SetInt().SetFrom(from);
1020 //location->SetInt().SetTo(to);
1021 location->SetNull();
1022 }
1023
1024 if (!location->IsNull()) {
1025 location->SetStrand(xGetStrand(columnData));
1026 }
1027 CRef<CSeq_id> id = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1028 location->SetId(*id);
1029 feature->SetLocation(*location);
1030
1031 CRef<CUser_object> pBed(new CUser_object());
1032 pBed->SetType().SetStr("BED");
1033 pBed->AddField("location", "thick");
1034 CSeq_feat::TExts& exts = feature->SetExts();
1035 exts.push_back(pBed);
1036 }
1037
1038 // ----------------------------------------------------------------------------
xGetStrand(const CBedColumnData & columnData) const1039 ENa_strand CBedReader::xGetStrand(
1040 const CBedColumnData& columnData) const
1041 // ----------------------------------------------------------------------------
1042 {
1043 size_t strand_field = 5;
1044 if (columnData.ColumnCount() == 5 &&
1045 (columnData[4] == "-" || columnData[4] == "+")) {
1046 strand_field = 4;
1047 }
1048 if (strand_field < columnData.ColumnCount()) {
1049 string strand = columnData[strand_field];
1050 if (strand != "+" && strand != "-" && strand != ".") {
1051 CReaderMessage error(
1052 eDiag_Error,
1053 m_uLineNumber,
1054 "Invalid data line: Invalid strand character.");
1055 throw error;
1056 }
1057 }
1058 return (columnData[strand_field] == "-" ? eNa_strand_minus : eNa_strand_plus);
1059 }
1060
1061 // ----------------------------------------------------------------------------
xSetFeatureLocationBlock(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)1062 void CBedReader::xSetFeatureLocationBlock(
1063 CRef<CSeq_feat>& feature,
1064 const CBedColumnData& columnData)
1065 // ----------------------------------------------------------------------------
1066 {
1067 //already established: there are sufficient columns to do this
1068 size_t blockCount = NStr::StringToUInt(columnData[9]);
1069 vector<size_t> blockSizes;
1070 vector<size_t> blockStarts;
1071 {{
1072 blockSizes.reserve(blockCount);
1073 vector<string> vals;
1074 NStr::Split(columnData[10], ",", vals);
1075 if (vals.back() == "") {
1076 vals.erase(vals.end()-1);
1077 }
1078 if (vals.size() != blockCount) {
1079 CReaderMessage error(
1080 eDiag_Error,
1081 columnData.LineNo(),
1082 "Invalid data line: Bad value count in \"blockSizes\".");
1083 throw error;
1084 }
1085 try {
1086 for (size_t i=0; i < blockCount; ++i) {
1087 blockSizes.push_back(NStr::StringToUInt(vals[i]));
1088 }
1089 }
1090 catch (std::exception&) {
1091 CReaderMessage error(
1092 eDiag_Error,
1093 columnData.LineNo(),
1094 "Invalid data line: Malformed \"blockSizes\" column.");
1095 throw error;
1096 }
1097 }}
1098 {{
1099 blockStarts.reserve(blockCount);
1100 vector<string> vals;
1101 size_t baseStart = NStr::StringToUInt(columnData[1]);
1102 NStr::Split(columnData[11], ",", vals);
1103 if (vals.back() == "") {
1104 vals.erase(vals.end()-1);
1105 }
1106 if (vals.size() != blockCount) {
1107 CReaderMessage error(
1108 eDiag_Error,
1109 columnData.LineNo(),
1110 "Invalid data line: Bad value count in \"blockStarts\".");
1111 throw error;
1112 }
1113 try {
1114 for (size_t i=0; i < blockCount; ++i) {
1115 blockStarts.push_back(baseStart + NStr::StringToUInt(vals[i]));
1116 }
1117 }
1118 catch (std::exception&) {
1119 CReaderMessage error(
1120 eDiag_Error,
1121 columnData.LineNo(),
1122 "Invalid data line: Malformed \"blockStarts\" column.");
1123 throw error;
1124 }
1125 }}
1126
1127 CPacked_seqint& location = feature->SetLocation().SetPacked_int();
1128 ENa_strand strand = xGetStrand(columnData);
1129 CRef<CSeq_id> pId = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1130
1131 bool negative = columnData[5] == "-";
1132
1133 CPacked_seqint::Tdata& blocks = location.Set();
1134
1135 for (size_t i=0; i < blockCount; ++i) {
1136 CRef<CSeq_interval> pInterval(new CSeq_interval);
1137 pInterval->SetId(*pId);
1138 pInterval->SetFrom(static_cast<CSeq_interval::TFrom>(blockStarts[i]));
1139 pInterval->SetTo(static_cast<CSeq_interval::TTo>(
1140 blockStarts[i] + blockSizes[i] - 1));
1141 pInterval->SetStrand(strand);
1142 if (negative)
1143 blocks.insert(blocks.begin(), pInterval);
1144 else
1145 blocks.push_back(pInterval);
1146 }
1147
1148 CRef<CUser_object> pBed(new CUser_object());
1149 pBed->SetType().SetStr("BED");
1150 pBed->AddField("location", "block");
1151 CSeq_feat::TExts& exts = feature->SetExts();
1152 exts.push_back(pBed);
1153 }
1154
1155 // ----------------------------------------------------------------------------
xSetFeatureLocationRna(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)1156 void CBedReader::xSetFeatureLocationRna(
1157 CRef<CSeq_feat>& feature,
1158 const CBedColumnData& columnData)
1159 // ----------------------------------------------------------------------------
1160 {
1161 //already established: there are sufficient columns to do this
1162 size_t blockCount = NStr::StringToUInt(columnData[9]);
1163 vector<size_t> blockSizes;
1164 vector<size_t> blockStarts;
1165 {{
1166 blockSizes.reserve(blockCount);
1167 vector<string> vals;
1168 NStr::Split(columnData[10], ",", vals);
1169 if (vals.back() == "") {
1170 vals.erase(vals.end()-1);
1171 }
1172 if (vals.size() != blockCount) {
1173 CReaderMessage error(
1174 eDiag_Error,
1175 columnData.LineNo(),
1176 "Invalid data line: Bad value count in \"blockSizes\".");
1177 throw error;
1178 }
1179 try {
1180 for (size_t i=0; i < blockCount; ++i) {
1181 blockSizes.push_back(NStr::StringToUInt(vals[i]));
1182 }
1183 }
1184 catch (std::exception&) {
1185 CReaderMessage error(
1186 eDiag_Error,
1187 columnData.LineNo(),
1188 "Invalid data line: Malformed \"blockSizes\" column.");
1189 throw error;
1190 }
1191 }}
1192 {{
1193 blockStarts.reserve(blockCount);
1194 vector<string> vals;
1195 size_t baseStart = NStr::StringToUInt(columnData[1]);
1196 NStr::Split(columnData[11], ",", vals);
1197 if (vals.back() == "") {
1198 vals.erase(vals.end()-1);
1199 }
1200 if (vals.size() != blockCount) {
1201 CReaderMessage error(
1202 eDiag_Error,
1203 columnData.LineNo(),
1204 "Invalid data line: Bad value count in \"blockStarts\".");
1205 throw error;
1206 }
1207 try {
1208 for (size_t i=0; i < blockCount; ++i) {
1209 blockStarts.push_back(baseStart + NStr::StringToUInt(vals[i]));
1210 }
1211 }
1212 catch (std::exception&) {
1213 CReaderMessage error(
1214 eDiag_Error,
1215 columnData.LineNo(),
1216 "Invalid data line: Malformed \"blockStarts\" column.");
1217 throw error;
1218 }
1219 }}
1220
1221 CPacked_seqint& location = feature->SetLocation().SetPacked_int();
1222 ENa_strand strand = xGetStrand(columnData);
1223 CRef<CSeq_id> pId = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1224
1225 bool negative = columnData[5] == "-";
1226
1227 CPacked_seqint::Tdata& blocks = location.Set();
1228
1229 for (size_t i=0; i < blockCount; ++i) {
1230 CRef<CSeq_interval> pInterval(new CSeq_interval);
1231 pInterval->SetId(*pId);
1232 pInterval->SetFrom(static_cast<CSeq_interval::TFrom>(blockStarts[i]));
1233 pInterval->SetTo(static_cast<CSeq_interval::TTo>(
1234 blockStarts[i] + blockSizes[i] -1));
1235 pInterval->SetStrand(strand);
1236 if (negative)
1237 blocks.insert(blocks.begin(), pInterval);
1238 else
1239 blocks.push_back(pInterval);
1240 }
1241
1242 CRef<CUser_object> pBed(new CUser_object());
1243 pBed->SetType().SetStr("BED");
1244 pBed->AddField("location", "block");
1245 CSeq_feat::TExts& exts = feature->SetExts();
1246 exts.push_back(pBed);
1247 }
1248
1249 // ----------------------------------------------------------------------------
xSetFeatureIdsChrom(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1250 void CBedReader::xSetFeatureIdsChrom(
1251 CRef<CSeq_feat>& feature,
1252 const CBedColumnData& columnData,
1253 unsigned int baseId)
1254 // ----------------------------------------------------------------------------
1255 {
1256 baseId++; //0-based to 1-based
1257 feature->SetId().SetLocal().SetId(baseId);
1258
1259 if (xContainsThickFeature(columnData)) {
1260 CRef<CFeat_id> pIdThick(new CFeat_id);
1261 pIdThick->SetLocal().SetId(baseId+1);
1262 CRef<CSeqFeatXref> pXrefThick(new CSeqFeatXref);
1263 pXrefThick->SetId(*pIdThick);
1264 feature->SetXref().push_back(pXrefThick);
1265 }
1266
1267 if (xContainsBlockFeature(columnData)) {
1268 CRef<CFeat_id> pIdBlock(new CFeat_id);
1269 pIdBlock->SetLocal().SetId(baseId+2);
1270 CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1271 pXrefBlock->SetId(*pIdBlock);
1272 feature->SetXref().push_back(pXrefBlock);
1273 }
1274 }
1275
1276 // ----------------------------------------------------------------------------
xSetFeatureIdsGene(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1277 void CBedReader::xSetFeatureIdsGene(
1278 CRef<CSeq_feat>& feature,
1279 const CBedColumnData& columnData,
1280 unsigned int baseId)
1281 // ----------------------------------------------------------------------------
1282 {
1283 baseId++; //0-based to 1-based
1284 feature->SetId().SetLocal().SetId(baseId);
1285 }
1286
1287 // ----------------------------------------------------------------------------
xSetFeatureIdsThick(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1288 void CBedReader::xSetFeatureIdsThick(
1289 CRef<CSeq_feat>& feature,
1290 const CBedColumnData& columnData,
1291 unsigned int baseId)
1292 // ----------------------------------------------------------------------------
1293 {
1294 baseId++; //0-based to 1-based
1295 feature->SetId().SetLocal().SetId(baseId+1);
1296
1297 CRef<CFeat_id> pIdChrom(new CFeat_id);
1298 pIdChrom->SetLocal().SetId(baseId);
1299 CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1300 pXrefChrom->SetId(*pIdChrom);
1301 feature->SetXref().push_back(pXrefChrom);
1302
1303 if (xContainsBlockFeature(columnData)) {
1304 CRef<CFeat_id> pIdBlock(new CFeat_id);
1305 pIdBlock->SetLocal().SetId(baseId+2);
1306 CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1307 pXrefBlock->SetId(*pIdBlock);
1308 feature->SetXref().push_back(pXrefBlock);
1309 }
1310 }
1311
1312 // ----------------------------------------------------------------------------
xSetFeatureIdsCds(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1313 void CBedReader::xSetFeatureIdsCds(
1314 CRef<CSeq_feat>& feature,
1315 const CBedColumnData& columnData,
1316 unsigned int baseId)
1317 // ----------------------------------------------------------------------------
1318 {
1319 baseId++; //0-based to 1-based
1320 feature->SetId().SetLocal().SetId(baseId+1);
1321
1322 if (xContainsBlockFeature(columnData)) {
1323 CRef<CFeat_id> pIdBlock(new CFeat_id);
1324 pIdBlock->SetLocal().SetId(baseId+2);
1325 CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1326 pXrefBlock->SetId(*pIdBlock);
1327 feature->SetXref().push_back(pXrefBlock);
1328 }
1329 else {
1330 CRef<CFeat_id> pIdChrom(new CFeat_id);
1331 pIdChrom->SetLocal().SetId(baseId);
1332 CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1333 pXrefChrom->SetId(*pIdChrom);
1334 feature->SetXref().push_back(pXrefChrom);
1335 }
1336 }
1337
1338 // ----------------------------------------------------------------------------
xSetFeatureIdsBlock(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1339 void CBedReader::xSetFeatureIdsBlock(
1340 CRef<CSeq_feat>& feature,
1341 const CBedColumnData& columnData,
1342 unsigned int baseId)
1343 // ----------------------------------------------------------------------------
1344 {
1345 baseId++; //0-based to 1-based
1346 feature->SetId().SetLocal().SetId(baseId+2);
1347
1348 CRef<CFeat_id> pIdChrom(new CFeat_id);
1349 pIdChrom->SetLocal().SetId(baseId);
1350 CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1351 pXrefChrom->SetId(*pIdChrom);
1352 feature->SetXref().push_back(pXrefChrom);
1353
1354 if (xContainsThickFeature(columnData)) {
1355 CRef<CFeat_id> pIdThick(new CFeat_id);
1356 pIdThick->SetLocal().SetId(baseId+1);
1357 CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1358 pXrefBlock->SetId(*pIdThick);
1359 feature->SetXref().push_back(pXrefBlock);
1360 }
1361 }
1362
1363 // ----------------------------------------------------------------------------
xSetFeatureIdsRna(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1364 void CBedReader::xSetFeatureIdsRna(
1365 CRef<CSeq_feat>& feature,
1366 const CBedColumnData& columnData,
1367 unsigned int baseId)
1368 // ----------------------------------------------------------------------------
1369 {
1370 baseId++; //0-based to 1-based
1371 feature->SetId().SetLocal().SetId(baseId+2);
1372
1373 CRef<CFeat_id> pIdChrom(new CFeat_id);
1374 pIdChrom->SetLocal().SetId(baseId);
1375 CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1376 pXrefChrom->SetId(*pIdChrom);
1377 feature->SetXref().push_back(pXrefChrom);
1378 }
1379
1380 // ----------------------------------------------------------------------------
xSetFeatureTitle(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)1381 void CBedReader::xSetFeatureTitle(
1382 CRef<CSeq_feat>& feature,
1383 const CBedColumnData& columnData)
1384 // ----------------------------------------------------------------------------
1385 {
1386 if (columnData.ColumnCount() >= 4 &&
1387 !columnData[3].empty() && columnData[3] != ".") {
1388 feature->SetTitle(columnData[0]);
1389 }
1390 else {
1391 feature->SetTitle(string("line_") + NStr::IntToString(m_uLineNumber));
1392 }
1393 }
1394
1395
1396 // ----------------------------------------------------------------------------
xSetFeatureScore(CRef<CUser_object> pDisplayData,const CBedColumnData & columnData)1397 void CBedReader::xSetFeatureScore(
1398 CRef<CUser_object> pDisplayData,
1399 const CBedColumnData& columnData)
1400 // ----------------------------------------------------------------------------
1401 {
1402 CReaderMessage error(
1403 eDiag_Error,
1404 columnData.LineNo(),
1405 "Invalid data line: Bad \"score\" value.");
1406
1407 string trackUseScore = m_pTrackDefaults->ValueOf("useScore");
1408 if (columnData.ColumnCount() < 5 || trackUseScore == "1") {
1409 //record does not carry score information
1410 return;
1411 }
1412
1413 int int_score = NStr::StringToInt(columnData[4], NStr::fConvErr_NoThrow );
1414 double d_score = 0;
1415
1416 if (int_score == 0 && columnData[4].compare("0") != 0) {
1417 try {
1418 d_score = NStr::StringToDouble(columnData[4]);
1419 }
1420 catch(std::exception&) {
1421 throw error;
1422 }
1423 }
1424
1425 if (d_score < 0 || int_score < 0) {
1426 throw error;
1427 }
1428 else if (d_score > 0) {
1429 pDisplayData->AddField("score", d_score);
1430 }
1431 else {
1432 pDisplayData->AddField("score", int_score);
1433 }
1434 }
1435
1436
1437 // ----------------------------------------------------------------------------
xSetFeatureColor(CRef<CUser_object> pDisplayData,const CBedColumnData & columnData,ILineErrorListener * pEC)1438 void CBedReader::xSetFeatureColor(
1439 CRef<CUser_object> pDisplayData,
1440 const CBedColumnData& columnData,
1441 ILineErrorListener* pEC )
1442 // ----------------------------------------------------------------------------
1443 {
1444 //1: if track line itemRgb is set, try that first:
1445 string trackItemRgb = m_pTrackDefaults->ValueOf("itemRgb");
1446 if (trackItemRgb == "On" && columnData.ColumnCount() >= 9) {
1447 string featItemRgb = columnData[8];
1448 if (featItemRgb != ".") {
1449 xSetFeatureColorFromItemRgb(pDisplayData, featItemRgb, pEC);
1450 return;
1451 }
1452 }
1453
1454 //2: if track useScore is set, try that next:
1455 string trackUseScore = m_pTrackDefaults->ValueOf("useScore");
1456 if (trackUseScore == "1" && columnData.ColumnCount() >= 5) {
1457 string featScore = columnData[4];
1458 if (featScore != ".") {
1459 xSetFeatureColorFromScore(pDisplayData, featScore);
1460 return;
1461 }
1462 }
1463
1464 //3: if track colorByStrand is set, try that next:
1465 string trackColorByStrand = m_pTrackDefaults->ValueOf("colorByStrand");
1466 if (!trackColorByStrand.empty() && columnData.ColumnCount() >= 6) {
1467 ENa_strand strand =
1468 (columnData[5] == "-") ? eNa_strand_minus : eNa_strand_plus;
1469 xSetFeatureColorByStrand(pDisplayData, trackColorByStrand, strand, pEC);
1470 return;
1471 }
1472 //4: if none of the track color attributes are set, attempt feature itemRgb:
1473 if (columnData.ColumnCount() >= 9) {
1474 string featItemRgb = columnData[8];
1475 if (featItemRgb != ".") {
1476 xSetFeatureColorFromItemRgb(pDisplayData, featItemRgb, pEC);
1477 return;
1478 }
1479 }
1480
1481 //5: if still here, assign default color:
1482 xSetFeatureColorDefault(pDisplayData);
1483 }
1484
1485 // ----------------------------------------------------------------------------
xSetFeatureColorDefault(CRef<CUser_object> pDisplayData)1486 void CBedReader::xSetFeatureColorDefault(
1487 CRef<CUser_object> pDisplayData)
1488 // ----------------------------------------------------------------------------
1489 {
1490 const string colorDefault("0 0 0");
1491 pDisplayData->AddField("color", colorDefault);
1492 }
1493
1494 // ----------------------------------------------------------------------------
xSetFeatureColorByStrand(CRef<CUser_object> pDisplayData,const string & trackColorByStrand,ENa_strand strand,ILineErrorListener * pEC)1495 void CBedReader::xSetFeatureColorByStrand(
1496 CRef<CUser_object> pDisplayData,
1497 const string& trackColorByStrand,
1498 ENa_strand strand,
1499 ILineErrorListener* pEC)
1500 // ----------------------------------------------------------------------------
1501 {
1502 try {
1503 string colorPlus, colorMinus;
1504 NStr::SplitInTwo(trackColorByStrand, " ", colorPlus, colorMinus);
1505 string useColor = (strand == eNa_strand_minus) ? colorMinus : colorPlus;
1506 xSetFeatureColorFromItemRgb(pDisplayData, useColor, pEC);
1507 }
1508 catch (std::exception&) {
1509 CReaderMessage error(
1510 eDiag_Error,
1511 m_uLineNumber,
1512 "Invalid track line: Bad colorByStrand value.");
1513 throw error;
1514 }
1515 }
1516
1517 // ----------------------------------------------------------------------------
xSetFeatureColorFromScore(CRef<CUser_object> pDisplayData,const string & featScore)1518 void CBedReader::xSetFeatureColorFromScore(
1519 CRef<CUser_object> pDisplayData,
1520 const string& featScore )
1521 // ----------------------------------------------------------------------------
1522 {
1523 CReaderMessage error(
1524 eDiag_Error,
1525 m_uLineNumber,
1526 "Invalid data line: Bad score value to be used for color.");
1527
1528 int score = 0;
1529 try {
1530 score = static_cast<int>(NStr::StringToDouble(featScore));
1531 }
1532 catch (const std::exception&) {
1533 throw error;
1534 }
1535 if (score < 0 || 1000 < score) {
1536 throw error;
1537 }
1538 string greyValue = NStr::DoubleToString(255 - (score/4));
1539 vector<string> srgb{ greyValue, greyValue, greyValue};
1540 string rgbValue = NStr::Join(srgb, " ");
1541 pDisplayData->AddField("color", rgbValue);
1542 }
1543
1544 // ----------------------------------------------------------------------------
xSetFeatureColorFromItemRgb(CRef<CUser_object> pDisplayData,const string & itemRgb,ILineErrorListener * pEC)1545 void CBedReader::xSetFeatureColorFromItemRgb(
1546 CRef<CUser_object> pDisplayData,
1547 const string& itemRgb,
1548 ILineErrorListener* pEC )
1549 // ----------------------------------------------------------------------------
1550 {
1551 CReaderMessage warning(
1552 eDiag_Warning,
1553 m_uLineNumber,
1554 "Bad color value - converted to BLACK.");
1555 const string rgbDefault = "0 0 0";
1556
1557 //optimization for common case:
1558 if (itemRgb == "0") {
1559 pDisplayData->AddField("color", rgbDefault);
1560 return;
1561 }
1562
1563 vector<string> srgb;
1564 NStr::Split(itemRgb, ",", srgb);
1565
1566 if (srgb.size() == 3) {
1567 auto valuesOk = true;
1568 for (auto i=0; i<3; ++i) {
1569 int test;
1570 try {
1571 test = NStr::StringToInt(srgb[i], NStr::fDS_ProhibitFractions);
1572 }
1573 catch(CException&) {
1574 valuesOk = false;
1575 break;
1576 }
1577 if ((test < 0) || (256 <= test)) {
1578 valuesOk = false;
1579 break;
1580 }
1581 }
1582 if (!valuesOk) {
1583 m_pMessageHandler->Report(warning);
1584 pDisplayData->AddField("color", rgbDefault);
1585 return;
1586 }
1587 auto outValue = srgb[0] + " " + srgb[1] + " " + srgb[2];
1588 pDisplayData->AddField("color", outValue);
1589 return;
1590 }
1591
1592 if (srgb.size() == 1) {
1593 auto assumeHex = false;
1594 string itemRgbCopy(itemRgb);
1595 if (NStr::StartsWith(itemRgbCopy, "0x")) {
1596 assumeHex = true;
1597 itemRgbCopy = itemRgb.substr(2);
1598 }
1599 else if (NStr::StartsWith(itemRgbCopy, "#")) {
1600 assumeHex = true;
1601 itemRgbCopy = itemRgbCopy.substr(1);
1602 }
1603 unsigned long colorValue;
1604 int radix = (assumeHex ? 16 : 10);
1605 try {
1606 colorValue = NStr::StringToULong(
1607 itemRgbCopy, NStr::fDS_ProhibitFractions, radix);
1608 }
1609 catch (CStringException&) {
1610 m_pMessageHandler->Report(warning);
1611 pDisplayData->AddField("color", rgbDefault);
1612 return;
1613 }
1614 int blue = colorValue & 0xFF;
1615 colorValue >>= 8;
1616 int green = colorValue & 0xFF;
1617 colorValue >>= 8;
1618 int red = colorValue & 0xFF;
1619 auto outValue = NStr::IntToString(red) + " " + NStr::IntToString(green) +
1620 " " + NStr::IntToString(blue);
1621 pDisplayData->AddField("color", outValue);
1622 return;
1623 }
1624
1625 m_pMessageHandler->Report(warning);
1626 pDisplayData->AddField("color", rgbDefault);
1627 return;
1628 }
1629
1630 // ----------------------------------------------------------------------------
xSetFeatureBedData(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,ILineErrorListener * pEc)1631 void CBedReader::xSetFeatureBedData(
1632 CRef<CSeq_feat>& feature,
1633 const CBedColumnData& columnData,
1634 ILineErrorListener* pEc )
1635 // ----------------------------------------------------------------------------
1636 {
1637 CSeqFeatData& data = feature->SetData();
1638 if (columnData.ColumnCount() >= 4 && columnData[3] != ".") {
1639 data.SetRegion() = columnData[3];
1640 }
1641 else {
1642 data.SetRegion() = columnData[0];
1643 }
1644
1645 CRef<CUser_object> pDisplayData(new CUser_object());
1646
1647 CSeq_feat::TExts& exts = feature->SetExts();
1648 pDisplayData->SetType().SetStr("DisplaySettings");
1649 exts.push_front(pDisplayData);
1650
1651 xSetFeatureScore(pDisplayData, columnData);
1652 xSetFeatureColor(pDisplayData, columnData, pEc);
1653 }
1654
1655 // ----------------------------------------------------------------------------
xSetFeatureLocation(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)1656 void CBedReader::xSetFeatureLocation(
1657 CRef<CSeq_feat>& feature,
1658 const CBedColumnData& columnData )
1659 // ----------------------------------------------------------------------------
1660 {
1661 //
1662 // Note:
1663 // BED convention for specifying intervals is 0-based, first in, first out.
1664 // ASN convention for specifying intervals is 0-based, first in, last in.
1665 // Hence, conversion BED->ASN leaves the first leaves the "from" coordinate
1666 // unchanged, and decrements the "to" coordinate by one.
1667 //
1668
1669 CRef<CSeq_loc> location(new CSeq_loc);
1670 int from, to;
1671 from = to = -1;
1672
1673 //already established: We got at least three columns
1674 try {
1675 from = NStr::StringToInt(columnData[1]);
1676 }
1677 catch(std::exception&) {
1678 CReaderMessage error(
1679 eDiag_Error,
1680 columnData.LineNo(),
1681 "Invalid data line: Bad \"SeqStart\" value.");
1682 throw error;
1683 }
1684 try {
1685 to = NStr::StringToInt(columnData[2]) - 1;
1686 }
1687 catch(std::exception&) {
1688 CReaderMessage error(
1689 eDiag_Error,
1690 columnData.LineNo(),
1691 "Invalid data line: Bad \"SeqStop\" value.");
1692 throw error;
1693 }
1694 if (from == to) {
1695 location->SetPnt().SetPoint(from);
1696 }
1697 else if (from < to) {
1698 location->SetInt().SetFrom(from);
1699 location->SetInt().SetTo(to);
1700 }
1701 else {
1702 CReaderMessage error(
1703 eDiag_Error,
1704 columnData.LineNo(),
1705 "Invalid data line: \"SeqStop\" less than \"SeqStart\".");
1706 throw error;
1707 }
1708
1709 size_t strand_field = 5;
1710 if (columnData.ColumnCount() == 5 &&
1711 (columnData[4] == "-" || columnData[4] == "+")) {
1712 strand_field = 4;
1713 }
1714 if (strand_field < columnData.ColumnCount()) {
1715 string strand = columnData[strand_field];
1716 if (strand != "+" && strand != "-" && strand != ".") {
1717 CReaderMessage error(
1718 eDiag_Error,
1719 columnData.LineNo(),
1720 "Invalid data line: Invalid strand character.");
1721 throw error;
1722 }
1723 location->SetStrand(( columnData[strand_field] == "+" ) ?
1724 eNa_strand_plus : eNa_strand_minus );
1725 }
1726
1727 CRef<CSeq_id> id = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1728 location->SetId(*id);
1729 feature->SetLocation(*location);
1730 }
1731
1732 // ----------------------------------------------------------------------------
1733 bool
ReadTrackData(ILineReader & lr,CRawBedTrack & rawdata,ILineErrorListener * pMessageListener)1734 CBedReader::ReadTrackData(
1735 ILineReader& lr,
1736 CRawBedTrack& rawdata,
1737 ILineErrorListener* pMessageListener)
1738 // ----------------------------------------------------------------------------
1739 {
1740 if (m_CurBatchSize == m_MaxBatchSize) {
1741 m_CurBatchSize = 0;
1742 return xReadBedDataRaw(lr, rawdata, pMessageListener);
1743 }
1744
1745 string line;
1746 while (xGetLine(lr, line)) {
1747 m_CurBatchSize = 0;
1748 if (line == "browser" || NStr::StartsWith(line, "browser ")) {
1749 continue;
1750 }
1751 if (line == "track" || NStr::StartsWith(line, "track ")) {
1752 continue;
1753 }
1754 //data line
1755 lr.UngetLine();
1756 return xReadBedDataRaw(lr, rawdata, pMessageListener);
1757 }
1758 return false;
1759 }
1760
1761 // ----------------------------------------------------------------------------
1762 bool
xReadBedRecordRaw(const string & line,CRawBedRecord & record,ILineErrorListener * pMessageListener)1763 CBedReader::xReadBedRecordRaw(
1764 const string& line,
1765 CRawBedRecord& record,
1766 ILineErrorListener* pMessageListener)
1767 // ----------------------------------------------------------------------------
1768 {
1769 if (line == "browser" || NStr::StartsWith(line, "browser ")
1770 || NStr::StartsWith(line, "browser\t")) {
1771 return false;
1772 }
1773 if (line == "track" || NStr::StartsWith(line, "track ")
1774 || NStr::StartsWith(line, "track\t")) {
1775 return false;
1776 }
1777
1778 vector<string> columns;
1779 string linecopy = line;
1780 NStr::TruncateSpacesInPlace(linecopy);
1781
1782 // parse
1783 NStr::Split(linecopy, " \t", columns, NStr::fSplit_MergeDelimiters);
1784 xCleanColumnValues(columns);
1785 if (columns.size() != mRealColumnCount) {
1786 CReaderMessage error(
1787 eDiag_Error,
1788 m_uLineNumber,
1789 "Invalid data line: Inconsistent column count.");
1790 m_pMessageHandler->Report(error);
1791 return false;
1792 }
1793
1794 //assign columns to record:
1795 CRef<CSeq_id> id = CReadUtil::AsSeqId(columns[0], m_iFlags, false);
1796
1797 unsigned int start;
1798 try {
1799 start = NStr::StringToInt(columns[1]);
1800 }
1801 catch(std::exception&) {
1802 CReaderMessage error(
1803 eDiag_Error,
1804 m_uLineNumber,
1805 "Invalid data line: Invalid \"SeqStart\" (column 2) value.");
1806 m_pMessageHandler->Report(error);
1807 return false;
1808 }
1809
1810 unsigned int stop;
1811 try {
1812 stop = NStr::StringToInt(columns[2]);
1813 }
1814 catch(std::exception&) {
1815 CReaderMessage error(
1816 eDiag_Error,
1817 m_uLineNumber,
1818 "Invalid data line: Invalid \"SeqStop\" (column 3) value.");
1819 m_pMessageHandler->Report(error);
1820 return false;
1821 }
1822
1823 int score(-1);
1824 if (mValidColumnCount >= 5 && columns[4] != ".") {
1825 try {
1826 score = NStr::StringToInt(columns[4],
1827 NStr::fConvErr_NoThrow|NStr::fAllowTrailingSymbols);
1828 }
1829 catch(std::exception&) {
1830 CReaderMessage error(
1831 eDiag_Error,
1832 m_uLineNumber,
1833 "Invalid data line: Invalid \"Score\" (column 5) value.");
1834 m_pMessageHandler->Report(error);
1835 return false;
1836 }
1837 }
1838 ENa_strand strand = eNa_strand_plus;
1839 if (mValidColumnCount >= 6) {
1840 if (columns[5] == "-") {
1841 strand = eNa_strand_minus;
1842 }
1843 }
1844 record.SetInterval(*id, start, stop, strand);
1845 if (score >= 0) {
1846 record.SetScore(score);
1847 }
1848 return true;
1849 }
1850
1851 // ----------------------------------------------------------------------------
1852 bool
xContainsThickFeature(const CBedColumnData & columnData) const1853 CBedReader::xContainsThickFeature(
1854 const CBedColumnData& columnData) const
1855 // ----------------------------------------------------------------------------
1856 {
1857 if (columnData.ColumnCount() < 8 || mValidColumnCount < 8) {
1858 return false;
1859 }
1860
1861 int start = -1, from = -1, to = -1;
1862 try {
1863 start = NStr::StringToInt(columnData[1]);
1864 from = NStr::StringToInt(columnData[6]);
1865 to = NStr::StringToInt(columnData[7]);
1866 }
1867 catch (std::exception&) {
1868 CReaderMessage error(
1869 eDiag_Error,
1870 columnData.LineNo(),
1871 "Invalid data line: Bad \"Start/ThickStart/ThickStop\" values.");
1872 throw error;
1873 }
1874 if (start == from && from == to) {
1875 return false;
1876 }
1877 return true;
1878 }
1879
1880
1881 // ----------------------------------------------------------------------------
1882 bool
xContainsRnaFeature(const CBedColumnData & columnData) const1883 CBedReader::xContainsRnaFeature(
1884 const CBedColumnData& columnData) const
1885 // ----------------------------------------------------------------------------
1886 {
1887 if (columnData.ColumnCount() < 12 || mValidColumnCount < 12) {
1888 return false;
1889 }
1890
1891 int start = -1, from = -1, to = -1;
1892 try {
1893 start = NStr::StringToInt(columnData[1]);
1894 from = NStr::StringToInt(columnData[6]);
1895 to = NStr::StringToInt(columnData[7]);
1896 }
1897 catch (std::exception&) {
1898 CReaderMessage error(
1899 eDiag_Error,
1900 columnData.LineNo(),
1901 "Invalid data line: Bad \"Start/ThickStart/ThickStop\" values.");
1902 throw error;
1903 }
1904 if (start == from && from == to) {
1905 return false;
1906 }
1907 return true;
1908 }
1909
1910
1911 // ----------------------------------------------------------------------------
1912 bool
xContainsBlockFeature(const CBedColumnData & columnData) const1913 CBedReader::xContainsBlockFeature(
1914 const CBedColumnData& columnData) const
1915 // ----------------------------------------------------------------------------
1916 {
1917 return (columnData.ColumnCount() >= 12 && mValidColumnCount >= 12);
1918 }
1919
1920
1921 // ----------------------------------------------------------------------------
1922 bool
xContainsCdsFeature(const CBedColumnData & columnData) const1923 CBedReader::xContainsCdsFeature(
1924 const CBedColumnData& columnData) const
1925 // ----------------------------------------------------------------------------
1926 {
1927 return (columnData.ColumnCount() >= 8 && mValidColumnCount >= 8);
1928 }
1929
1930
1931 // ----------------------------------------------------------------------------
1932 bool
xReadBedDataRaw(ILineReader & lr,CRawBedTrack & rawdata,ILineErrorListener * pMessageListener)1933 CBedReader::xReadBedDataRaw(
1934 ILineReader& lr,
1935 CRawBedTrack& rawdata,
1936 ILineErrorListener* pMessageListener)
1937 // ----------------------------------------------------------------------------
1938 {
1939 rawdata.Reset();
1940 string line;
1941 while (xGetLine(lr, line)) {
1942 CRawBedRecord record;
1943 if (!xReadBedRecordRaw(line, record, pMessageListener)) {
1944 lr.UngetLine();
1945 break;
1946 }
1947 rawdata.AddRecord(record);
1948 ++m_CurBatchSize;
1949 if (m_CurBatchSize == m_MaxBatchSize) {
1950 return rawdata.HasData();
1951 }
1952 }
1953
1954 return rawdata.HasData();
1955 }
1956
1957 // ----------------------------------------------------------------------------
1958 void
xCleanColumnValues(vector<string> & columns)1959 CBedReader::xCleanColumnValues(
1960 vector<string>& columns)
1961 // ----------------------------------------------------------------------------
1962 {
1963 string fixup;
1964
1965 if (NStr::EqualNocase(columns[0], "chr") && columns.size() > 1) {
1966 columns[1] = columns[0] + columns[1];
1967 columns.erase(columns.begin());
1968 }
1969 if (columns.size() < 3) {
1970 CReaderMessage error(
1971 eDiag_Error,
1972 0,
1973 "Invalid data line: Insufficient column count.");
1974 throw error;
1975 }
1976
1977 try {
1978 NStr::Replace(columns[1], ",", "", fixup);
1979 columns[1] = fixup;
1980 }
1981 catch(std::exception&) {
1982 CReaderMessage error(
1983 eDiag_Error,
1984 0,
1985 "Invalid data line: Invalid \"SeqStart\" (column 2) value.");
1986 throw error;
1987 }
1988
1989 try {
1990 NStr::Replace(columns[2], ",", "", fixup);
1991 columns[2] = fixup;
1992 }
1993 catch(std::exception&) {
1994 CReaderMessage error(
1995 eDiag_Error,
1996 0,
1997 "Invalid data line: Invalid \"SeqStop\" (column 3) value.");
1998 throw error;
1999 }
2000 }
2001
2002 // ----------------------------------------------------------------------------
2003 void
xAssignBedColumnCount(CSeq_annot & annot)2004 CBedReader::xAssignBedColumnCount(
2005 CSeq_annot& annot)
2006 // ----------------------------------------------------------------------------
2007 {
2008 if(mValidColumnCount < 3) {
2009 return;
2010 }
2011 CRef<CUser_object> columnCountUser(new CUser_object());
2012 columnCountUser->SetType().SetStr("NCBI_BED_COLUMN_COUNT");
2013 columnCountUser->AddField("NCBI_BED_COLUMN_COUNT", int (mValidColumnCount));
2014
2015 CRef<CAnnotdesc> userDesc(new CAnnotdesc());
2016 userDesc->SetUser().Assign(*columnCountUser);
2017 annot.SetDesc().Set().push_back(userDesc);
2018 }
2019
2020 END_objects_SCOPE
2021 END_NCBI_SCOPE
2022