1 /* $Id: gtf_reader.cpp 632531 2021-06-02 17:25:37Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Frank Ludwig
27 *
28 * File Description:
29 * GFF file reader
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35
36 #include <util/line_reader.hpp>
37
38 #include <objects/general/Object_id.hpp>
39 #include <objects/general/User_object.hpp>
40 #include <objects/general/Dbtag.hpp>
41
42 #include <objects/seqloc/Seq_id.hpp>
43 #include <objects/seqloc/Seq_loc.hpp>
44 #include <objects/seqloc/Seq_interval.hpp>
45 #include <objects/seqloc/Seq_point.hpp>
46
47 #include <objects/seq/Seq_annot.hpp>
48 #include <objects/seq/Annot_id.hpp>
49 #include <objects/seq/Annot_descr.hpp>
50 #include <objects/seqfeat/SeqFeatData.hpp>
51 #include <objects/seqfeat/SeqFeatXref.hpp>
52
53 #include <objects/seqfeat/Seq_feat.hpp>
54 #include <objects/seqfeat/Gene_ref.hpp>
55 #include <objects/seqfeat/Genetic_code.hpp>
56 #include <objects/seqfeat/RNA_ref.hpp>
57 #include <objects/seqfeat/Gb_qual.hpp>
58 #include <objects/seqfeat/Feat_id.hpp>
59
60 #include <objtools/readers/gtf_reader.hpp>
61 #include "gtf_location_merger.hpp"
62 #include "reader_message_handler.hpp"
63
64 #include <algorithm>
65
66 BEGIN_NCBI_SCOPE
67 BEGIN_objects_SCOPE // namespace ncbi::objects::
68
69 // ----------------------------------------------------------------------------
xAssignAttributesFromGff(const string & strGtfType,const string & strRawAttributes)70 bool CGtfReadRecord::xAssignAttributesFromGff(
71 const string& strGtfType,
72 const string& strRawAttributes )
73 // ----------------------------------------------------------------------------
74 {
75 vector< string > attributes;
76 xSplitGffAttributes(strRawAttributes, attributes);
77
78 for ( size_t u=0; u < attributes.size(); ++u ) {
79 string key, value;
80 string attribute(attributes[u]);
81 if (!NStr::SplitInTwo(attribute, "=", key, value)) {
82 if (!NStr::SplitInTwo(attribute, " ", key, value)) {
83 if (strGtfType == "gene") {
84 mAttributes.AddValue(
85 "gene_id", xNormalizedAttributeValue(attribute));
86 continue;
87 }
88 if (strGtfType == "transcript") {
89 string gid, tid;
90 if (!NStr::SplitInTwo(attribute, ".", gid, tid)) {
91 return false;
92 }
93 mAttributes.AddValue(
94 "gene_id", xNormalizedAttributeValue(gid));
95 mAttributes.AddValue(
96 "transcript_id", xNormalizedAttributeValue(attribute));
97 continue;
98 }
99 }
100 }
101 key = xNormalizedAttributeKey(key);
102 value = xNormalizedAttributeValue(value);
103 if ( key.empty() && value.empty() ) {
104 // Probably due to trailing "; ". Sequence Ontology generates such
105 // things.
106 continue;
107 }
108 if (NStr::StartsWith(value, "\"")) {
109 value = value.substr(1, string::npos);
110 }
111 if (NStr::EndsWith(value, "\"")) {
112 value = value.substr(0, value.length() - 1);
113 }
114 mAttributes.AddValue(key, value);
115 }
116 return true;
117 }
118
119 // ----------------------------------------------------------------------------
CGtfReader(unsigned int uFlags,const string & strAnnotName,const string & strAnnotTitle,SeqIdResolver resolver,CReaderListener * pRL)120 CGtfReader::CGtfReader(
121 unsigned int uFlags,
122 const string& strAnnotName,
123 const string& strAnnotTitle,
124 SeqIdResolver resolver,
125 CReaderListener* pRL):
126 // ----------------------------------------------------------------------------
127 CGff2Reader( uFlags, strAnnotName, strAnnotTitle, resolver, pRL)
128 {
129 mpLocations.reset(new CGtfLocationMerger(uFlags, resolver));
130 }
131
132 // ----------------------------------------------------------------------------
CGtfReader(unsigned int uFlags,CReaderListener * pRL)133 CGtfReader::CGtfReader(
134 unsigned int uFlags,
135 CReaderListener* pRL):
136 // ----------------------------------------------------------------------------
137 CGtfReader( uFlags, "", "", CReadUtil::AsSeqId, pRL)
138 {
139 }
140
141
142 // ----------------------------------------------------------------------------
~CGtfReader()143 CGtfReader::~CGtfReader()
144 // ----------------------------------------------------------------------------
145 {
146 }
147
148 // ----------------------------------------------------------------------------
149 CRef<CSeq_annot>
ReadSeqAnnot(ILineReader & lineReader,ILineErrorListener * pEC)150 CGtfReader::ReadSeqAnnot(
151 ILineReader& lineReader,
152 ILineErrorListener* pEC )
153 // ----------------------------------------------------------------------------
154 {
155 mCurrentFeatureCount = 0;
156 return CReaderBase::ReadSeqAnnot(lineReader, pEC);
157 }
158
159 // ----------------------------------------------------------------------------
160 void
xProcessData(const TReaderData & readerData,CSeq_annot & annot)161 CGtfReader::xProcessData(
162 const TReaderData& readerData,
163 CSeq_annot& annot)
164 // ----------------------------------------------------------------------------
165 {
166 for (const auto& lineData: readerData) {
167 const auto& line = lineData.mData;
168 if (xIsTrackTerminator(line)) {
169 continue;
170 }
171 if (xParseStructuredComment(line)) {
172 continue;
173 }
174 if (xParseBrowserLine(line, annot)) {
175 continue;
176 }
177 if (xParseFeature(line, annot, nullptr)) {
178 continue;
179 }
180 }
181 }
182
183 // ----------------------------------------------------------------------------
xUpdateAnnotFeature(const CGff2Record & record,CSeq_annot & annot,ILineErrorListener * pEC)184 bool CGtfReader::xUpdateAnnotFeature(
185 const CGff2Record& record,
186 CSeq_annot& annot,
187 ILineErrorListener* pEC)
188 // ----------------------------------------------------------------------------
189 {
190 const CGtfReadRecord& gff = dynamic_cast<const CGtfReadRecord&>(record);
191 auto recType = gff.NormalizedType();
192
193 using TYPEHANDLER = bool (CGtfReader::*)(const CGtfReadRecord&, CSeq_annot&);
194 using HANDLERMAP = map<string, TYPEHANDLER>;
195
196 HANDLERMAP typeHandlers = {
197 {"cds", &CGtfReader::xUpdateAnnotCds},
198 {"start_codon", &CGtfReader::xUpdateAnnotCds},
199 {"stop_codon", &CGtfReader::xUpdateAnnotCds},
200 {"5utr", &CGtfReader::xUpdateAnnotTranscript},
201 {"3utr", &CGtfReader::xUpdateAnnotTranscript},
202 {"exon", &CGtfReader::xUpdateAnnotTranscript},
203 {"initial", &CGtfReader::xUpdateAnnotTranscript},
204 {"internal", &CGtfReader::xUpdateAnnotTranscript},
205 {"terminal", &CGtfReader::xUpdateAnnotTranscript},
206 {"single", &CGtfReader::xUpdateAnnotTranscript},
207 };
208
209 //
210 // Handle officially recognized GTF types:
211 //
212 HANDLERMAP::iterator it = typeHandlers.find(recType);
213 if (it != typeHandlers.end()) {
214 TYPEHANDLER handler = it->second;
215 return (this->*handler)(gff, annot);
216 }
217
218 //
219 // Every other type is not officially sanctioned GTF, and per spec we are
220 // supposed to ignore it. In the spirit of being lenient on input we may
221 // try to salvage some of it anyway.
222 //
223 if (recType == "gene") {
224 return xCreateParentGene(gff, annot);
225 }
226 if (recType == "mrna" || recType == "transcript") {
227 return xCreateParentMrna(gff, annot);
228 }
229 return true;
230 }
231
232 // ----------------------------------------------------------------------------
xUpdateAnnotCds(const CGtfReadRecord & gff,CSeq_annot & annot)233 bool CGtfReader::xUpdateAnnotCds(
234 const CGtfReadRecord& gff,
235 CSeq_annot& annot )
236 // ----------------------------------------------------------------------------
237 {
238 auto featId = mpLocations->GetFeatureIdFor(gff, "cds");
239 mpLocations->AddRecordForId(featId, gff) ;
240 return (xFindFeatById(featId) || xCreateParentCds(gff, annot));
241 }
242
243 // ----------------------------------------------------------------------------
xUpdateAnnotTranscript(const CGtfReadRecord & gff,CSeq_annot & annot)244 bool CGtfReader::xUpdateAnnotTranscript(
245 const CGtfReadRecord& gff,
246 CSeq_annot& annot )
247 // ----------------------------------------------------------------------------
248 {
249 //
250 // If there is no gene feature to go with this CDS then make one. Otherwise,
251 // make sure the existing gene feature includes the location of the CDS.
252 //
253 auto geneFeatId = mpLocations->GetFeatureIdFor(gff, "gene");
254 CRef< CSeq_feat > pGene = xFindFeatById(geneFeatId);
255 if (!pGene) {
256 if (!xCreateParentGene(gff, annot)) {
257 return false;
258 }
259 mpLocations->AddRecordForId(geneFeatId, gff);
260 }
261 else {
262 mpLocations->AddRecordForId(geneFeatId, gff);
263 if (!xFeatureTrimQualifiers(gff, *pGene)) {
264 return false;
265 }
266 }
267
268 //
269 // If there is no mRNA feature with this gene_id|transcript_id then make one.
270 // Otherwise, fix up the location of the existing one.
271 //
272 auto transcriptFeatId = mpLocations->GetFeatureIdFor(gff, "transcript");
273 CRef<CSeq_feat> pMrna = xFindFeatById(transcriptFeatId);
274 if (!pMrna) {
275 //
276 // Create a brand new CDS feature:
277 //
278 if (!xCreateParentMrna(gff, annot)) {
279 return false;
280 }
281 mpLocations->AddRecordForId(transcriptFeatId, gff);
282 }
283 else {
284 //
285 // Update an already existing CDS features:
286 //
287 mpLocations->AddRecordForId(transcriptFeatId, gff);
288 if (!xFeatureTrimQualifiers(gff, *pMrna)) {
289 return false;
290 }
291 }
292 return true;
293 }
294
295 // ----------------------------------------------------------------------------
xCreateFeatureId(const CGtfReadRecord & record,const string & prefix,CSeq_feat & feature)296 bool CGtfReader::xCreateFeatureId(
297 const CGtfReadRecord& record,
298 const string& prefix,
299 CSeq_feat& feature )
300 // ----------------------------------------------------------------------------
301 {
302 static int seqNum(1);
303
304 string strFeatureId = prefix;
305 if (strFeatureId.empty()) {
306 strFeatureId = "id";
307 }
308 strFeatureId += "_";
309 strFeatureId += NStr::IntToString(seqNum++);
310 feature.SetId().SetLocal().SetStr(strFeatureId);
311 return true;
312 }
313
314 // -----------------------------------------------------------------------------
xCreateParentGene(const CGtfReadRecord & gff,CSeq_annot & annot)315 bool CGtfReader::xCreateParentGene(
316 const CGtfReadRecord& gff,
317 CSeq_annot& annot )
318 // -----------------------------------------------------------------------------
319 {
320 auto featId = mpLocations->GetFeatureIdFor(gff, "gene");
321 if (m_MapIdToFeature.find(featId) != m_MapIdToFeature.end()) {
322 return true;
323 }
324
325 CRef<CSeq_feat> pFeature( new CSeq_feat );
326
327 if (!xFeatureSetDataGene(gff, *pFeature)) {
328 return false;
329 }
330 if (!xCreateFeatureId(gff, "gene", *pFeature)) {
331 return false;
332 }
333 if ( !xFeatureSetQualifiersGene(gff, *pFeature)) {
334 return false;
335 }
336
337 (gff.Type() == "gene") ?
338 mpLocations->AddRecordForId(featId, gff) :
339 mpLocations->AddStubForId(featId);
340 m_MapIdToFeature[featId] = pFeature;
341 xAddFeatureToAnnot(pFeature, annot);
342 return true;
343 }
344
345 // ----------------------------------------------------------------------------
xFeatureSetQualifiersGene(const CGtfReadRecord & record,CSeq_feat & feature)346 bool CGtfReader::xFeatureSetQualifiersGene(
347 const CGtfReadRecord& record,
348 CSeq_feat& feature )
349 // ----------------------------------------------------------------------------
350 {
351 list<string> ignoredAttrs = {
352 "locus_tag", "transcript_id"
353 };
354 //
355 // Create GB qualifiers for the record attributes:
356 //
357
358 const auto& attrs = record.GtfAttributes().Get();
359 auto it = attrs.begin();
360 for (/*NOOP*/; it != attrs.end(); ++it) {
361 auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first);
362 if (cit != ignoredAttrs.end()) {
363 continue;
364 }
365 // special case some well-known attributes
366 if (xProcessQualifierSpecialCase(it->first, it->second, feature)) {
367 continue;
368 }
369
370 // turn everything else into a qualifier
371 xFeatureAddQualifiers(it->first, it->second, feature);
372 }
373 return true;
374 }
375
376 // ----------------------------------------------------------------------------
xFeatureSetQualifiersRna(const CGtfReadRecord & record,CSeq_feat & feature)377 bool CGtfReader::xFeatureSetQualifiersRna(
378 const CGtfReadRecord& record,
379 CSeq_feat& feature )
380 // ----------------------------------------------------------------------------
381 {
382 list<string> ignoredAttrs = {
383 "locus_tag"
384 };
385
386 const auto& attrs = record.GtfAttributes().Get();
387 auto it = attrs.begin();
388 for (/*NOOP*/; it != attrs.end(); ++it) {
389 auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first);
390 if (cit != ignoredAttrs.end()) {
391 continue;
392 }
393 // special case some well-known attributes
394 if (xProcessQualifierSpecialCase(it->first, it->second, feature)) {
395 continue;
396 }
397
398 // turn everything else into a qualifier
399 xFeatureAddQualifiers(it->first, it->second, feature);
400 }
401 return true;
402 }
403
404 // ----------------------------------------------------------------------------
xFeatureSetQualifiersCds(const CGtfReadRecord & record,CSeq_feat & feature)405 bool CGtfReader::xFeatureSetQualifiersCds(
406 const CGtfReadRecord& record,
407 CSeq_feat& feature )
408 // ----------------------------------------------------------------------------
409 {
410 list<string> ignoredAttrs = {
411 "locus_tag"
412 };
413
414 const auto& attrs = record.GtfAttributes().Get();
415 auto it = attrs.begin();
416 for (/*NOOP*/; it != attrs.end(); ++it) {
417 auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first);
418 if (cit != ignoredAttrs.end()) {
419 continue;
420 }
421 // special case some well-known attributes
422 if (xProcessQualifierSpecialCase(it->first, it->second, feature)) {
423 continue;
424 }
425
426 // turn everything else into a qualifier
427 xFeatureAddQualifiers(it->first, it->second, feature);
428 }
429 return true;
430 }
431
432 // -----------------------------------------------------------------------------
xCreateParentCds(const CGtfReadRecord & gff,CSeq_annot & annot)433 bool CGtfReader::xCreateParentCds(
434 const CGtfReadRecord& gff,
435 CSeq_annot& annot )
436 // -----------------------------------------------------------------------------
437 {
438 auto featId = mpLocations->GetFeatureIdFor(gff, "cds");
439 if (m_MapIdToFeature.find(featId) != m_MapIdToFeature.end()) {
440 return true;
441 }
442
443 CRef<CSeq_feat> pFeature(new CSeq_feat);
444
445 if (!xFeatureSetDataCds(gff, *pFeature)) {
446 return false;
447 }
448 if (!xCreateFeatureId(gff, "cds", *pFeature)) {
449 return false;
450 }
451 if (!xFeatureSetQualifiersCds(gff, *pFeature)) {
452 return false;
453 }
454 m_MapIdToFeature[featId] = pFeature;
455 return xAddFeatureToAnnot(pFeature, annot);
456 }
457
458 // -----------------------------------------------------------------------------
xCreateParentMrna(const CGtfReadRecord & gff,CSeq_annot & annot)459 bool CGtfReader::xCreateParentMrna(
460 const CGtfReadRecord& gff,
461 CSeq_annot& annot )
462 // -----------------------------------------------------------------------------
463 {
464 auto featId = mpLocations->GetFeatureIdFor(gff, "transcript");
465 if (m_MapIdToFeature.find(featId) != m_MapIdToFeature.end()) {
466 return true;
467 }
468
469 CRef< CSeq_feat > pFeature( new CSeq_feat );
470
471 if (!xFeatureSetDataMrna(gff, *pFeature)) {
472 return false;
473 }
474 if (!xCreateFeatureId(gff, "mrna", *pFeature)) {
475 return false;
476 }
477 if ( ! xFeatureSetQualifiersRna( gff, *pFeature ) ) {
478 return false;
479 }
480
481 mpLocations->AddStubForId(featId);
482 m_MapIdToFeature[featId] = pFeature;
483
484 return xAddFeatureToAnnot( pFeature, annot );
485 }
486
487 // ----------------------------------------------------------------------------
xFindFeatById(const string & featId)488 CRef<CSeq_feat> CGtfReader::xFindFeatById(
489 const string& featId)
490 // ----------------------------------------------------------------------------
491 {
492 auto featIt = m_MapIdToFeature.find(featId);
493 if (featIt == m_MapIdToFeature.end()) {
494 return CRef<CSeq_feat>();
495 }
496 return featIt->second;
497 }
498
499 // ----------------------------------------------------------------------------
xFeatureSetDataGene(const CGtfReadRecord & record,CSeq_feat & feature)500 bool CGtfReader::xFeatureSetDataGene(
501 const CGtfReadRecord& record,
502 CSeq_feat& feature )
503 // ----------------------------------------------------------------------------
504 {
505 CGene_ref& gene = feature.SetData().SetGene();
506
507 const auto& attributes = record.GtfAttributes();
508 string geneSynonym = attributes.ValueOf("gene_synonym");
509 if (!geneSynonym.empty()) {
510 gene.SetSyn().push_back(geneSynonym);
511 }
512 string locusTag = attributes.ValueOf("locus_tag");
513 if (!locusTag.empty()) {
514 gene.SetLocus_tag(locusTag);
515 }
516 return true;
517 }
518
519 // ----------------------------------------------------------------------------
xFeatureSetDataMrna(const CGtfReadRecord & record,CSeq_feat & feature)520 bool CGtfReader::xFeatureSetDataMrna(
521 const CGtfReadRecord& record,
522 CSeq_feat& feature)
523 // ----------------------------------------------------------------------------
524 {
525 if (!xFeatureSetDataRna(record, feature, CSeqFeatData::eSubtype_mRNA)) {
526 return false;
527 }
528 CRNA_ref& rna = feature.SetData().SetRna();
529
530 string product = record.GtfAttributes().ValueOf("product");
531 if (!product.empty()) {
532 rna.SetExt().SetName(product);
533 }
534 return true;
535 }
536
537 // ----------------------------------------------------------------------------
xFeatureSetDataRna(const CGtfReadRecord & record,CSeq_feat & feature,CSeqFeatData::ESubtype subType)538 bool CGtfReader::xFeatureSetDataRna(
539 const CGtfReadRecord& record,
540 CSeq_feat& feature,
541 CSeqFeatData::ESubtype subType)
542 // ----------------------------------------------------------------------------
543 {
544 CRNA_ref& rnaRef = feature.SetData().SetRna();
545 switch (subType){
546 default:
547 rnaRef.SetType(CRNA_ref::eType_miscRNA);
548 break;
549 case CSeqFeatData::eSubtype_mRNA:
550 rnaRef.SetType(CRNA_ref::eType_mRNA);
551 break;
552 case CSeqFeatData::eSubtype_rRNA:
553 rnaRef.SetType(CRNA_ref::eType_rRNA);
554 break;
555 }
556 return true;
557 }
558
559 // ----------------------------------------------------------------------------
xFeatureSetDataCds(const CGtfReadRecord & record,CSeq_feat & feature)560 bool CGtfReader::xFeatureSetDataCds(
561 const CGtfReadRecord& record,
562 CSeq_feat& feature )
563 // ----------------------------------------------------------------------------
564 {
565 CCdregion& cdr = feature.SetData().SetCdregion();
566 const auto& attributes = record.GtfAttributes();
567
568 string proteinId = attributes.ValueOf("protein_id");
569 if (!proteinId.empty()) {
570 CRef<CSeq_id> pId = mSeqIdResolve(proteinId, m_iFlags, true);
571 if (pId->IsGenbank()) {
572 feature.SetProduct().SetWhole(*pId);
573 }
574 }
575 string ribosomalSlippage = attributes.ValueOf("ribosomal_slippage");
576 if (!ribosomalSlippage.empty()) {
577 feature.SetExcept( true );
578 feature.SetExcept_text("ribosomal slippage");
579 }
580 string transTable = attributes.ValueOf("transl_table");
581 if (!transTable.empty()) {
582 CRef< CGenetic_code::C_E > pGc( new CGenetic_code::C_E );
583 pGc->SetId(NStr::StringToUInt(transTable));
584 cdr.SetCode().Set().push_back(pGc);
585 }
586 return true;
587 }
588
589 // ----------------------------------------------------------------------------
xFeatureTrimQualifiers(const CGtfReadRecord & record,CSeq_feat & feature)590 bool CGtfReader::xFeatureTrimQualifiers(
591 const CGtfReadRecord& record,
592 CSeq_feat& feature )
593 // ----------------------------------------------------------------------------
594 {
595 typedef CSeq_feat::TQual TQual;
596 //task:
597 // for each attribute of the new piece check if we already got a feature
598 // qualifier
599 // if so, and with the same value, then the qualifier is allowed to live
600 // otherwise it is subfeature specific and hence removed from the feature
601 TQual& quals = feature.SetQual();
602 for (TQual::iterator it = quals.begin(); it != quals.end(); /**/) {
603 const string& qualKey = (*it)->GetQual();
604 if (NStr::StartsWith(qualKey, "gff_")) {
605 it++;
606 continue;
607 }
608 if (qualKey == "locus_tag") {
609 it++;
610 continue;
611 }
612 if (qualKey == "old_locus_tag") {
613 it++;
614 continue;
615 }
616 if (qualKey == "product") {
617 it++;
618 continue;
619 }
620 if (qualKey == "protein_id") {
621 it++;
622 continue;
623 }
624 const string& qualVal = (*it)->GetVal();
625 if (!record.GtfAttributes().HasValue(qualKey, qualVal)) {
626 //superfluous qualifier- squish
627 it = quals.erase(it);
628 continue;
629 }
630 it++;
631 }
632 return true;
633 }
634
635 // ----------------------------------------------------------------------------
xProcessQualifierSpecialCase(const string & key,const CGtfAttributes::MultiValue & values,CSeq_feat & feature)636 bool CGtfReader::xProcessQualifierSpecialCase(
637 const string& key,
638 const CGtfAttributes::MultiValue& values,
639 CSeq_feat& feature )
640 // ----------------------------------------------------------------------------
641 {
642 CRef<CGb_qual> pQual(0);
643
644 if (0 == NStr::CompareNocase(key, "exon_id")) {
645 return true;
646 }
647 if (0 == NStr::CompareNocase(key, "exon_number")) {
648 return true;
649 }
650 if ( 0 == NStr::CompareNocase(key, "note") ) {
651 feature.SetComment(NStr::Join(values, ";"));
652 return true;
653 }
654 if ( 0 == NStr::CompareNocase(key, "dbxref") ||
655 0 == NStr::CompareNocase(key, "db_xref"))
656 {
657 for (auto value: values) {
658 vector< string > tags;
659 NStr::Split(value, ";", tags );
660 for (auto it = tags.begin(); it != tags.end(); ++it ) {
661 feature.SetDbxref().push_back(x_ParseDbtag(*it));
662 }
663 }
664 return true;
665 }
666
667 if ( 0 == NStr::CompareNocase(key, "pseudo")) {
668 feature.SetPseudo( true );
669 return true;
670 }
671 if ( 0 == NStr::CompareNocase(key, "partial")) {
672 // RW-1108 - ignore partial attribute in Genbank mode
673 if (m_iFlags & CGtfReader::fGenbankMode) {
674 return true;
675 }
676 }
677 return false;
678 }
679
680 // ----------------------------------------------------------------------------
xFeatureAddQualifiers(const string & key,const CGtfAttributes::MultiValue & values,CSeq_feat & feature)681 void CGtfReader::xFeatureAddQualifiers(
682 const string& key,
683 const CGtfAttributes::MultiValue& values,
684 CSeq_feat& feature)
685 // ----------------------------------------------------------------------------
686 {
687 for (auto value: values) {
688 feature.AddQualifier(key, value);
689 }
690 };
691
692 // ============================================================================
xSetAncestorXrefs(CSeq_feat & descendent,CSeq_feat & ancestor)693 void CGtfReader::xSetAncestorXrefs(
694 CSeq_feat& descendent,
695 CSeq_feat& ancestor)
696 // ============================================================================
697 {
698 xSetXrefFromTo(descendent, ancestor);
699 if (m_iFlags & CGtfReader::fGenerateChildXrefs) {
700 xSetXrefFromTo(ancestor, descendent);
701 }
702 }
703
704 // ----------------------------------------------------------------------------
xPostProcessAnnot(CSeq_annot & annot)705 void CGtfReader::xPostProcessAnnot(
706 CSeq_annot& annot)
707 // ----------------------------------------------------------------------------
708 {
709 //location fixup:
710 for (auto itLocation: mpLocations->LocationMap()) {
711 auto id = itLocation.first;
712 auto itFeature = m_MapIdToFeature.find(id);
713 if (itFeature == m_MapIdToFeature.end()) {
714 continue;
715 }
716 CRef<CSeq_feat> pFeature = itFeature->second;
717 auto featSubType = pFeature->GetData().GetSubtype();
718 CRef<CSeq_loc> pNewLoc = mpLocations->MergeLocation(
719 featSubType, itLocation.second);
720 pFeature->SetLocation(*pNewLoc);
721 }
722
723 //generate xrefs:
724 for (auto itLocation: mpLocations->LocationMap()) {
725 auto id = itLocation.first;
726 auto itFeature = m_MapIdToFeature.find(id);
727 if (itFeature == m_MapIdToFeature.end()) {
728 continue;
729 }
730 CRef<CSeq_feat> pFeature = itFeature->second;
731 auto featSubType = pFeature->GetData().GetSubtype();
732 switch(featSubType) {
733 default: {
734 break;
735 }
736 case CSeqFeatData::eSubtype_mRNA: {
737 auto parentGeneFeatId = string("gene:") + pFeature->GetNamedQual("gene_id");
738 CRef<CSeq_feat> pParentGene;
739 if (x_GetFeatureById(parentGeneFeatId, pParentGene)) {
740 xSetAncestorXrefs(*pFeature, *pParentGene);
741 }
742 break;
743 }
744 case CSeqFeatData::eSubtype_cdregion: {
745 auto parentRnaFeatId = string("transcript:") + pFeature->GetNamedQual("gene_id") +
746 "_" + pFeature->GetNamedQual("transcript_id");
747 CRef<CSeq_feat> pParentRna;
748 if (x_GetFeatureById(parentRnaFeatId, pParentRna)) {
749 xSetAncestorXrefs(*pFeature, *pParentRna);
750 }
751 auto parentGeneFeatId = string("gene:") + pFeature->GetNamedQual("gene_id");
752 CRef<CSeq_feat> pParentGene;
753 if (x_GetFeatureById(parentGeneFeatId, pParentGene)) {
754 xSetAncestorXrefs(*pFeature, *pParentGene);
755 }
756 break;
757 }
758 }
759 }
760 return CGff2Reader::xPostProcessAnnot(annot);
761 }
762
763 END_objects_SCOPE
764 END_NCBI_SCOPE
765