1 /* $Id: src_quals.cpp 636817 2021-08-31 18:41:12Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 *
27 * Author: Sergiy Gotvyanskyy, NCBI
28 * File Description:
29 * High level reader for source qualifiers
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35
36 #include <corelib/ncbifile.hpp>
37 #include <objmgr/object_manager.hpp>
38 #include <objmgr/scope.hpp>
39 #include <objmgr/bioseq_ci.hpp>
40 #include <objects/seq/Seq_descr.hpp>
41 #include <objtools/readers/line_error.hpp>
42 #include <objtools/readers/message_listener.hpp>
43 #include <objtools/readers/mod_reader.hpp>
44 #include "src_quals.hpp"
45 #include "visitors.hpp"
46 #include <sstream>
47
48
49 BEGIN_NCBI_SCOPE
50 USING_SCOPE(objects);
51
52
sPostError(ILineErrorListener * pEC,const string & message,const CTempString & seqId,size_t lineNum=0)53 static void sPostError(
54 ILineErrorListener* pEC,
55 const string& message,
56 const CTempString& seqId,
57 size_t lineNum=0)
58 {
59 _ASSERT(pEC);
60
61 AutoPtr<CLineErrorEx> pErr(
62 CLineErrorEx::Create(
63 ILineError::eProblem_GeneralParsingError,
64 eDiag_Error,
65 0, 0, // code and subcode
66 seqId,
67 lineNum, // lineNumber,
68 message));
69
70 pEC->PutError(*pErr);
71
72 }
73
sReportMissingMods(ILineErrorListener * pEC,const string & fileName,const CBioseq & bioseq)74 static void sReportMissingMods(
75 ILineErrorListener* pEC,
76 const string& fileName,
77 const CBioseq& bioseq)
78 {
79
80 string seqId = bioseq.GetId().front()->AsFastaString();
81 string message =
82 fileName +
83 " doesn't contain qualifiers for sequence id " +
84 seqId +
85 ".";
86
87 sPostError(pEC, message, seqId);
88 }
89
90
sReportMultipleMatches(ILineErrorListener * pEC,const string & fileName,size_t lineNum,const CBioseq & bioseq)91 static void sReportMultipleMatches(
92 ILineErrorListener* pEC,
93 const string& fileName,
94 size_t lineNum,
95 const CBioseq& bioseq)
96 {
97 string seqId = bioseq.GetId().front()->AsFastaString();
98 ostringstream message;
99 message
100 << "Multiple potential matches for line "
101 << lineNum
102 << " of "
103 << fileName
104 << ". Unable to match sequence id "
105 << seqId
106 << " to a previously matched entry.";
107
108 sPostError(pEC, message.str(), seqId);
109 }
110
111
s_PostProcessID(string & id)112 static void s_PostProcessID(string& id)
113 {
114 if (id.empty()) {
115 return;
116 }
117
118 if (id.back() == '|') {
119 id.pop_back();
120 }
121 NStr::ToLower(id);
122 }
123
124
sReportDuplicateIds(ILineErrorListener * pEC,const string & fileName,size_t currentLine,size_t previousLine,const CTempString & seqId)125 static void sReportDuplicateIds(
126 ILineErrorListener* pEC,
127 const string& fileName,
128 size_t currentLine,
129 size_t previousLine,
130 const CTempString& seqId)
131 {
132
133 ostringstream message;
134 message
135 << "Sequence id "
136 << seqId
137 << " on line "
138 << currentLine
139 << " of " << fileName
140 << " duplicates id on line "
141 << previousLine
142 << ". Skipping line "
143 << currentLine
144 << ".";
145
146 sPostError(pEC, message.str(), seqId, currentLine);
147 }
148
149
sReportUnusedMods(ILineErrorListener * pEC,const string & fileName,size_t lineNum,const CTempString & seqId)150 static void sReportUnusedMods(
151 ILineErrorListener* pEC,
152 const string& fileName,
153 size_t lineNum,
154 const CTempString& seqId)
155 {
156 _ASSERT(pEC);
157
158 string message =
159 fileName +
160 " contains qualifiers for sequence id " +
161 seqId +
162 ", but no sequence with that id was found.";
163
164 AutoPtr<CLineErrorEx> pErr(
165 CLineErrorEx::Create(
166 ILineError::eProblem_GeneralParsingError,
167 eDiag_Error,
168 0, 0, // code and subcode
169 seqId,
170 lineNum, // lineNumber,
171 message));
172
173 pEC->PutError(*pErr);
174 }
175
176
Empty() const177 bool CMemorySrcFileMap::Empty() const
178 {
179 return m_LineMap.empty();
180 }
181
182
Mapped() const183 bool CMemorySrcFileMap::Mapped() const
184 {
185 return m_FileMapped;
186 }
187
188
GetMods(const CBioseq & bioseq,TModList & mods,bool isVerbose)189 bool CMemorySrcFileMap::GetMods(const CBioseq& bioseq, TModList& mods, bool isVerbose)
190 {
191 mods.clear();
192 if (!m_FileMapped) {
193 return false;
194 }
195
196 list<string> id_strings;
197 for (const auto& pId : bioseq.GetId()) {
198 string id;
199 pId->GetLabel(&id, nullptr, CSeq_id::eFasta);
200 s_PostProcessID(id);
201 id_strings.push_back(id);
202 CTempString type, content;
203 NStr::SplitInTwo(id, "|", type, content);
204 id_strings.push_back(content);
205 if (pId->IsGeneral()) {
206 CTempString db, tag;
207 NStr::SplitInTwo(content, "|", db, tag);
208 id_strings.push_back(tag);
209 }
210 else {
211 auto pTextSeqId = pId->GetTextseq_Id();
212 if (pTextSeqId && pTextSeqId->IsSetVersion()) {
213 size_t pointPos = id.rfind('.');
214 if (pointPos != string::npos) {
215 CTempString versionlessId(id, 0, pointPos);
216 NStr::SplitInTwo(versionlessId, "|", type, content);
217 id_strings.push_back(versionlessId);
218 id_strings.push_back(content);
219 }
220 }
221 }
222 }
223
224 /*
225 for (const auto& id : id_strings) {
226 auto it = m_LineMap.find(id);
227 if (it != m_LineMap.end()) {
228 x_ProcessLine(it->second.line, mods);
229 auto lineNum = it->second.lineNum;
230 m_ProcessedIdsToLineNum.emplace(id, lineNum);
231 for (const auto& pEquivIt : it->second.equiv) {
232 m_LineMap.erase(pEquivIt->val);
233 }
234 m_LineMap.erase(it);
235 return true;
236 }
237 }
238 */
239
240 for (const auto& id : id_strings) {
241 auto it = m_LineMap.find(id);
242 if (it != m_LineMap.end()) {
243 CTempString* linePtr = it->second.linePtr;
244 CTempString& line = linePtr ? *linePtr : it->second.line;
245 if (!line.empty()) {
246 x_ProcessLine(line, mods);
247 auto lineNum = it->second.lineNum;
248 m_ProcessedIdsToLineNum.emplace(id, lineNum);
249 line.clear();
250 return true;
251 }
252 }
253 }
254
255
256
257 for (const auto& id : id_strings) {
258 auto it = m_ProcessedIdsToLineNum.find(id);
259 if (it != end(m_ProcessedIdsToLineNum)) {
260 sReportMultipleMatches(m_pEC, m_pFileMap->GetFileName(), it->second, bioseq);
261 return false;
262 }
263 }
264
265 if (isVerbose) {
266 sReportMissingMods(m_pEC, m_pFileMap->GetFileName(), bioseq);
267 }
268 return false;
269 }
270
271
ReportUnusedIds()272 void CMemorySrcFileMap::ReportUnusedIds()
273 {
274 if (!Empty()) {
275 map<size_t, CTempString> unusedLines;
276 for (const auto& entry : m_LineMap) {
277 if (!entry.second.line.empty()) {
278 unusedLines.emplace(entry.second.lineNum, entry.second.line);
279 }
280 }
281
282 for (const auto& entry : unusedLines) {
283 CTempString seqId, remainder;
284 NStr::SplitInTwo(entry.second, "\t", seqId, remainder);
285 sReportUnusedMods(m_pEC,
286 m_pFileMap->GetFileName(),
287 entry.first,
288 NStr::TruncateSpaces_Unsafe(seqId));
289 }
290 }
291 }
292
x_ProcessLine(const CTempString & line,TModList & mods)293 void CMemorySrcFileMap::x_ProcessLine(const CTempString& line, TModList& mods)
294 {
295 vector<CTempString> tokens;
296 NStr::Split(line, "\t", tokens);
297 for (size_t i=1; i < tokens.size() && i < m_ColumnNames.size(); ++i) {
298 auto value=NStr::TruncateSpaces_Unsafe(tokens[i]);
299 if (!NStr::IsBlank(value)) {
300 mods.emplace_back(m_ColumnNames[i], value);
301 }
302 }
303 }
304
305 static pair<size_t,size_t>
s_IdTypeToNumFields(CSeq_id::E_Choice choice)306 s_IdTypeToNumFields(CSeq_id::E_Choice choice)
307 {
308 switch(choice) {
309 case CSeq_id::e_Local:
310 case CSeq_id::e_Gibbsq:
311 case CSeq_id::e_Gibbmt:
312 case CSeq_id::e_Giim:
313 case CSeq_id::e_Gi:
314 return make_pair<size_t,size_t>(1,1);
315 case CSeq_id::e_Patent:
316 return make_pair<size_t,size_t>(3,3);
317 case CSeq_id::e_General:
318 return make_pair<size_t,size_t>(2,2);
319 default:
320 break;
321 }
322 return make_pair<size_t,size_t>(1,3);
323 }
324
325
326 static bool
s_ParseFastaIdString(const CTempString & fastaString,set<CTempString,PNocase_Generic<CTempString>> & idStrings)327 s_ParseFastaIdString(const CTempString& fastaString,
328 set<CTempString, PNocase_Generic<CTempString>>& idStrings)
329 {
330 idStrings.clear();
331
332 static const size_t minStubLength=2;
333 static const size_t maxStubLength=3;
334
335 using size_type = CTempString::size_type;
336 size_type fastaLength = fastaString.size();
337 size_type currentPos=0;
338 size_type idStartPos=0;
339 size_t currentField=0;
340 size_t currentMinField=0;
341 size_t currentMaxField=0;
342
343 while (currentPos < fastaLength) {
344 if (idStartPos == currentPos) {
345 auto nextBarPos = fastaString.find('|', currentPos);
346 if (nextBarPos == NPOS) {
347 return false;
348 }
349 const auto stubLength = nextBarPos - currentPos;
350 if (stubLength<minStubLength || stubLength>maxStubLength) {
351 return false;
352 }
353 const auto idType =
354 CSeq_id::WhichInverseSeqId(fastaString.substr(currentPos, stubLength));
355 if (idType == CSeq_id::e_not_set) {
356 return false;
357 }
358 auto numFields = s_IdTypeToNumFields(idType);
359 currentMinField = numFields.first;
360 currentMaxField = numFields.second;
361 currentPos=nextBarPos+1;
362 continue;
363 }
364
365 _ASSERT(currentMinField <= currentMaxField);
366 if (currentField < currentMaxField) {
367 auto nextBarPos = fastaString.find('|', currentPos);
368 if (nextBarPos == NPOS) {
369 if (currentField < currentMinField-1) {
370 return false;
371 }
372 idStrings.emplace(fastaString.substr(idStartPos));
373 return true;
374 }
375 if (currentField >= currentMinField) {
376 auto length = nextBarPos-currentPos;
377 if (length>=minStubLength && length<=maxStubLength) {
378 const auto idType =
379 CSeq_id::WhichInverseSeqId(fastaString.substr(currentPos, length));
380 if (idType != CSeq_id::e_not_set) {
381 auto numFields = s_IdTypeToNumFields(idType);
382 currentMinField = numFields.first;
383 currentMaxField = numFields.second;
384 idStartPos=currentPos;
385 currentField=0;
386 currentPos=nextBarPos+1;
387 continue;
388 }
389 }
390 }
391 currentPos=nextBarPos+1;
392 ++currentField;
393 }
394 else {
395 _ASSERT(currentField == currentMaxField);
396 idStrings.emplace(fastaString.substr(idStartPos, (currentPos-idStartPos)-1));
397 idStartPos=currentPos;
398 currentField=0;
399 }
400 }
401
402 if (currentField < currentMinField) {
403 return false;
404 }
405
406 if (fastaString[fastaLength-1] == '|') {
407 if (currentField < currentMaxField) {
408 _ASSERT(currentPos == fastaLength);
409 idStrings.emplace(fastaString.substr(idStartPos, (currentPos-idStartPos)-1));
410 return true;
411 }
412 return false;
413 }
414
415 return true;
416 }
417
418
x_RegisterLine(size_t lineNum,const CTempString & line,bool allowAcc)419 void CMemorySrcFileMap::x_RegisterLine(size_t lineNum, const CTempString& line, bool allowAcc)
420 {
421 CTempString idString, remainder;
422 NStr::SplitInTwo(line, "\t", idString, remainder);
423 NStr::TruncateSpacesInPlace(idString);
424 if (idString.empty()) {
425 return;
426 }
427
428 if (count(begin(idString), end(idString), '|')<2) { // idString encodes a single id
429 auto rval = m_LineMap.emplace(idString, SLineInfo{lineNum, line});
430 if (!rval.second) {
431 CTempString seqId, remainder; // revisit this
432 NStr::SplitInTwo(line, "\t", seqId, remainder);
433 sReportDuplicateIds(m_pEC,
434 m_pFileMap->GetFileName(),
435 lineNum,
436 rval.first->second.lineNum,
437 NStr::TruncateSpaces_Unsafe(seqId));
438 }
439 return;
440 }
441
442 set<CTempString, PNocase_Generic<CTempString>> parsedIDs;
443 if (!s_ParseFastaIdString(idString, parsedIDs)){
444 sPostError(m_pEC,
445 "In " + m_pFileMap->GetFileName() +
446 ". Unable to parse " + idString + ".",
447 "",
448 lineNum);
449 return;
450 }
451
452 CTempString* linePtr=nullptr;
453 for (auto id : parsedIDs) {
454 pair<TLineMap::iterator,bool> rval;
455 if (linePtr) {
456 rval = m_LineMap.emplace(id, SLineInfo{lineNum});
457 rval.first->second.linePtr = linePtr;
458 }
459 else {
460 rval = m_LineMap.emplace(id, SLineInfo{lineNum, line});
461 linePtr = &rval.first->second.line;
462 }
463
464 if (!rval.second) {
465 CTempString seqId, remainder; // revisit this
466 NStr::SplitInTwo(line, "\t", seqId, remainder);
467 sReportDuplicateIds(m_pEC,
468 m_pFileMap->GetFileName(),
469 lineNum,
470 rval.first->second.lineNum,
471 NStr::TruncateSpaces_Unsafe(seqId));
472
473 linePtr->clear();
474 break;
475 }
476 }
477 }
478
479
480
MapFile(const string & fileName,bool allowAcc)481 void CMemorySrcFileMap::MapFile(const string& fileName, bool allowAcc)
482 {
483 if (m_FileMapped ||
484 m_pFileMap ||
485 !m_LineMap.empty()) {
486 return;
487 }
488
489 m_pFileMap.reset(new CMemoryFileMap(fileName));
490
491 size_t fileSize = m_pFileMap->GetFileSize();
492 const char* ptr = (const char*)m_pFileMap->Map(0, fileSize);
493 const char* end = ptr + fileSize;
494
495 size_t lineNum = 0;
496 while (ptr < end)
497 {
498 // search for next non empty line
499 if (*ptr == '\r' || *ptr == '\n') {
500 ++ptr;
501 continue;
502 }
503
504 const char* start = ptr;
505 // search for end of line
506 const char* endline = (const char*)memchr(ptr, '\n', end - ptr);
507 if (endline == nullptr) endline = end;
508
509 ptr = endline + 1;
510 endline--;
511
512 while (start < endline && *endline == '\r')
513 endline--;
514
515 // compose line control structure
516 if (start < endline)
517 {
518 ++lineNum;
519 CTempString line(start, endline-start+1);
520 if (m_ColumnNames.empty())
521 NStr::Split(line, "\t", m_ColumnNames);
522 else // parse regular line
523 x_RegisterLine(lineNum, line, allowAcc);
524 }
525 }
526
527 if (m_ColumnNames.empty()) {
528 NCBI_THROW(CArgException, eConstraint,
529 "source modifiers file header line is not valid");
530 }
531
532 m_FileMapped = true;
533 }
534
535
536
537
538
539
s_AppendMods(const CModHandler::TModList & mods,string & title)540 static void s_AppendMods(
541 const CModHandler::TModList& mods,
542 string& title)
543 {
544 for (const auto& mod : mods) {
545
546 title.append(" ["
547 + mod.GetName()
548 + "="
549 + mod.GetValue()
550 + "]");
551 }
552 }
553
554
sReportError(ILineErrorListener * pEC,EDiagSev severity,int subcode,const string & seqId,const string & message,ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError)555 static void sReportError(
556 ILineErrorListener* pEC,
557 EDiagSev severity,
558 int subcode,
559 const string& seqId,
560 const string& message,
561 ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError)
562 {
563 _ASSERT(pEC);
564
565 AutoPtr<CLineErrorEx> pErr(
566 CLineErrorEx::Create(
567 problemType,
568 severity,
569 EReaderCode::eReader_Mods,
570 subcode,
571 seqId,
572 0, // lineNumber,
573 message));
574
575 pEC->PutError(*pErr);
576 }
577
578
579
s_PreprocessNoteMods(CModHandler::TModList & mods)580 static void s_PreprocessNoteMods(CModHandler::TModList& mods)
581 {
582 for (auto& mod : mods) {
583 if (CModHandler::GetCanonicalName(mod.GetName()) == "note"){
584 string new_value = mod.GetValue();
585 NStr::ReplaceInPlace(new_value, "<", "[");
586 NStr::ReplaceInPlace(new_value, ">", "]");
587 mod.SetValue(new_value);
588 }
589 }
590 }
591
592
593 class CApplyMods
594 {
595 public:
596 using TModList = CModHandler::TModList;
597 using TMods = CModHandler::TMods;
598 using TMergePolicy = CModHandler::EHandleExisting;
599
600 CApplyMods(const TMods& commandLineMods,
601 const string& m_CommandLineRemainder,
602 CMemorySrcFileMap* pNamedSrcFileMap,
603 CMemorySrcFileMap* pDefaultSrcFileMap,
604 ILineErrorListener* pMessageListener,
605 bool readModsFromTitle,
606 bool isVerbose,
607 TMergePolicy mergePolicy=CModHandler::ePreserve);
608
609 void operator()(CBioseq& bioseq);
610
611 private:
612 void x_GetModsFromFileMap(
613 CMemorySrcFileMap& fileMap,
614 const CBioseq& bioseq,
615 CModHandler::FReportError fReportError,
616 CModHandler& mod_handler,
617 string& remainder);
618
619
620 TMods m_CommandLineMods;
621 const string m_CommandLineRemainder;
622
623 CMemorySrcFileMap* m_pNamedSrcFileMap=nullptr;
624 CMemorySrcFileMap* m_pDefaultSrcFileMap=nullptr;
625 ILineErrorListener* m_pMessageListener=nullptr;
626
627 bool m_ReadModsFromTitle = false;
628 bool m_IsVerbose=false; // can set this in CMemorySrcFileMap
629 TMergePolicy m_MergePolicy;
630 };
631
632
CApplyMods(const TMods & commandLineMods,const string & commandLineRemainder,CMemorySrcFileMap * pNamedSrcFileMap,CMemorySrcFileMap * pDefaultSrcFileMap,ILineErrorListener * pMessageListener,bool readModsFromTitle,bool isVerbose,TMergePolicy mergePolicy)633 CApplyMods::CApplyMods(
634 const TMods& commandLineMods,
635 const string& commandLineRemainder,
636 CMemorySrcFileMap* pNamedSrcFileMap,
637 CMemorySrcFileMap* pDefaultSrcFileMap,
638 ILineErrorListener* pMessageListener,
639 bool readModsFromTitle,
640 bool isVerbose,
641 TMergePolicy mergePolicy) :
642 m_CommandLineMods(commandLineMods),
643 m_CommandLineRemainder(commandLineRemainder),
644 m_pNamedSrcFileMap(pNamedSrcFileMap),
645 m_pDefaultSrcFileMap(pDefaultSrcFileMap),
646 m_pMessageListener(pMessageListener),
647 m_ReadModsFromTitle(readModsFromTitle),
648 m_IsVerbose(isVerbose),
649 m_MergePolicy(mergePolicy)
650 {}
651
652
653
x_GetModsFromFileMap(CMemorySrcFileMap & fileMap,const CBioseq & bioseq,CModHandler::FReportError fReportError,CModHandler & mod_handler,string & remainder)654 void CApplyMods::x_GetModsFromFileMap(
655 CMemorySrcFileMap& fileMap,
656 const CBioseq& bioseq,
657 CModHandler::FReportError fReportError,
658 CModHandler& mod_handler,
659 string& remainder)
660 {
661 CApplyMods::TModList mods;
662 if (!fileMap.GetMods(bioseq, mods, m_IsVerbose)) {
663 return;
664 }
665
666 s_PreprocessNoteMods(mods); // RW-928
667 CApplyMods::TModList rejectedMods;
668
669 mod_handler.AddMods(mods,
670 m_MergePolicy,
671 rejectedMods,
672 fReportError);
673 s_AppendMods(rejectedMods, remainder);
674 }
675
676
operator ()(CBioseq & bioseq)677 void CApplyMods::operator()(CBioseq& bioseq)
678 {
679 CModHandler mod_handler;
680 mod_handler.SetExcludedMods({"lineage"});
681
682 mod_handler.SetMods(m_CommandLineMods);
683 string remainder = m_CommandLineRemainder;
684 TModList rejectedMods;
685
686 string seqId = bioseq.GetId().front()->AsFastaString();
687 auto fReportError =
688 [&](const CModData& /* mod */, const string& msg, EDiagSev /* sev */,
689 EModSubcode subcode) {
690 return sReportError(m_pMessageListener, eDiag_Warning, subcode, seqId, msg);
691 };
692
693 if (m_pNamedSrcFileMap && m_pNamedSrcFileMap->Mapped()) {
694 x_GetModsFromFileMap(
695 *m_pNamedSrcFileMap,
696 bioseq,
697 fReportError,
698 mod_handler,
699 remainder);
700 }
701
702 if (m_pDefaultSrcFileMap && m_pDefaultSrcFileMap->Mapped()) {
703 x_GetModsFromFileMap(
704 *m_pDefaultSrcFileMap,
705 bioseq,
706 fReportError,
707 mod_handler,
708 remainder);
709 }
710
711 CRef<CSeqdesc> pTitleDesc;
712 CSeq_descr::Tdata* pDescriptors = nullptr;
713 if ((bioseq.IsSetDescr() &&
714 bioseq.GetDescr().IsSet()) ||
715 !NStr::IsBlank(remainder)) {
716 pDescriptors = &(bioseq.SetDescr().Set());
717 }
718
719 CSeq_descr::Tdata::iterator title_it;
720 if (pDescriptors) {
721 title_it =
722 find_if(pDescriptors->begin(), pDescriptors->end(),
723 [](CRef<CSeqdesc> pDesc) { return pDesc->IsTitle(); });
724 if (title_it != pDescriptors->end()) {
725 pTitleDesc = *title_it;
726 if (m_ReadModsFromTitle) {
727 auto& title = (*title_it)->SetTitle();
728 string titleRemainder;
729 TModList mods;
730 CTitleParser::Apply(title, mods, titleRemainder);
731 title.clear();
732 mod_handler.AddMods(mods,
733 m_MergePolicy,
734 rejectedMods,
735 fReportError);
736 s_AppendMods(rejectedMods, titleRemainder);
737 remainder = titleRemainder + remainder;
738 }
739 }
740 }
741
742
743 CModAdder::Apply(mod_handler, bioseq, rejectedMods, fReportError);
744 s_AppendMods(rejectedMods, remainder);
745
746
747 NStr::TruncateSpacesInPlace(remainder);
748 if (!remainder.empty()) {
749 if (!pTitleDesc) {
750 pTitleDesc = Ref(new CSeqdesc());
751 pDescriptors->push_back(pTitleDesc);
752 pTitleDesc->SetTitle() = remainder;
753 }
754 else {
755 string current_title =
756 NStr::TruncateSpaces(
757 pTitleDesc->GetTitle(),
758 NStr::eTrunc_End);
759 pTitleDesc->SetTitle() = current_title.empty() ?
760 remainder :
761 current_title + " " + remainder;
762 }
763 }
764 else // remainder.empty()
765 if (pDescriptors) {
766 if (title_it != pDescriptors->end() &&
767 (*title_it)->GetTitle().empty()) {
768 pDescriptors->erase(title_it);
769 }
770
771 if (pDescriptors->empty()) {
772 bioseq.ResetDescr();
773 }
774 }
775 }
776
777
g_ApplyMods(CMemorySrcFileMap * pNamedSrcFileMap,CMemorySrcFileMap * pDefaultSrcFileMap,const string & commandLineStr,bool readModsFromTitle,bool isVerbose,CModHandler::EHandleExisting mergePolicy,ILineErrorListener * pEC,CSeq_entry & entry)778 void g_ApplyMods(
779 CMemorySrcFileMap* pNamedSrcFileMap,
780 CMemorySrcFileMap* pDefaultSrcFileMap,
781 const string& commandLineStr,
782 bool readModsFromTitle,
783 bool isVerbose,
784 CModHandler::EHandleExisting mergePolicy,
785 ILineErrorListener* pEC,
786 CSeq_entry& entry)
787 {
788 using TModList = CModHandler::TModList;
789 using TMods = CModHandler::TMods;
790
791 string commandLineRemainder;
792 TMods commandLineMods;
793
794 if (!NStr::IsBlank(commandLineStr)) {
795 TModList mods;
796 CTitleParser::Apply(commandLineStr, mods, commandLineRemainder);
797 s_PreprocessNoteMods(mods); // RW-928
798
799 auto fReportCommandLineError =
800 [&](const CModData& /* mod */, const string& msg, EDiagSev /* sev */,
801 EModSubcode subcode) {
802 return sReportError(pEC, eDiag_Warning, subcode, "", msg);
803 };
804
805 TModList rejectedMods;
806 CModHandler mod_handler;
807 mod_handler.AddMods(mods,
808 CModHandler::ePreserve,
809 rejectedMods,
810 fReportCommandLineError);
811 s_AppendMods(rejectedMods, commandLineRemainder);
812 commandLineMods = mod_handler.GetMods();
813 }
814
815 CApplyMods applyMods(commandLineMods,
816 commandLineRemainder,
817 pNamedSrcFileMap,
818 pDefaultSrcFileMap,
819 pEC,
820 readModsFromTitle,
821 isVerbose,
822 mergePolicy);
823
824 VisitAllBioseqs(entry, applyMods);
825
826 if (isVerbose && pDefaultSrcFileMap) {
827 //pDefaultSrcFileMap->ReportUnusedIds();
828 }
829 }
830
831
832 END_NCBI_SCOPE
833