1 /*  $Id: src_quals.cpp 636817 2021-08-31 18:41:12Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 *
27 * Author:  Sergiy Gotvyanskyy, NCBI
28 * File Description:
29 *   High level reader for source qualifiers
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include <corelib/ncbifile.hpp>
37 #include <objmgr/object_manager.hpp>
38 #include <objmgr/scope.hpp>
39 #include <objmgr/bioseq_ci.hpp>
40 #include <objects/seq/Seq_descr.hpp>
41 #include <objtools/readers/line_error.hpp>
42 #include <objtools/readers/message_listener.hpp>
43 #include <objtools/readers/mod_reader.hpp>
44 #include "src_quals.hpp"
45 #include "visitors.hpp"
46 #include <sstream>
47 
48 
49 BEGIN_NCBI_SCOPE
50 USING_SCOPE(objects);
51 
52 
sPostError(ILineErrorListener * pEC,const string & message,const CTempString & seqId,size_t lineNum=0)53 static void sPostError(
54         ILineErrorListener* pEC,
55         const string& message,
56         const CTempString& seqId,
57         size_t lineNum=0)
58 {
59     _ASSERT(pEC);
60 
61     AutoPtr<CLineErrorEx> pErr(
62             CLineErrorEx::Create(
63                 ILineError::eProblem_GeneralParsingError,
64                 eDiag_Error,
65                 0, 0, // code and subcode
66                 seqId,
67                 lineNum, // lineNumber,
68                 message));
69 
70     pEC->PutError(*pErr);
71 
72 }
73 
sReportMissingMods(ILineErrorListener * pEC,const string & fileName,const CBioseq & bioseq)74 static void sReportMissingMods(
75         ILineErrorListener* pEC,
76         const string& fileName,
77         const CBioseq& bioseq)
78 {
79 
80     string seqId = bioseq.GetId().front()->AsFastaString();
81     string message =
82         fileName +
83         " doesn't contain qualifiers for sequence id " +
84         seqId +
85        ".";
86 
87     sPostError(pEC, message, seqId);
88 }
89 
90 
sReportMultipleMatches(ILineErrorListener * pEC,const string & fileName,size_t lineNum,const CBioseq & bioseq)91 static void sReportMultipleMatches(
92         ILineErrorListener* pEC,
93         const string& fileName,
94         size_t lineNum,
95         const CBioseq& bioseq)
96 {
97     string seqId = bioseq.GetId().front()->AsFastaString();
98     ostringstream message;
99     message
100         << "Multiple potential matches for line "
101         << lineNum
102         << " of "
103         << fileName
104         << ". Unable to match sequence id "
105         << seqId
106         << " to a previously matched entry.";
107 
108     sPostError(pEC, message.str(), seqId);
109 }
110 
111 
s_PostProcessID(string & id)112 static void s_PostProcessID(string& id)
113 {
114     if (id.empty()) {
115         return;
116     }
117 
118     if (id.back() == '|') {
119         id.pop_back();
120     }
121     NStr::ToLower(id);
122 }
123 
124 
sReportDuplicateIds(ILineErrorListener * pEC,const string & fileName,size_t currentLine,size_t previousLine,const CTempString & seqId)125 static void sReportDuplicateIds(
126     ILineErrorListener* pEC,
127     const string& fileName,
128     size_t currentLine,
129     size_t previousLine,
130     const CTempString& seqId)
131 {
132 
133     ostringstream message;
134     message
135         <<  "Sequence id "
136         <<  seqId
137         << " on line "
138         << currentLine
139         << " of " << fileName
140         << " duplicates id on line "
141         << previousLine
142         << ". Skipping line "
143         << currentLine
144         << ".";
145 
146     sPostError(pEC, message.str(), seqId, currentLine);
147 }
148 
149 
sReportUnusedMods(ILineErrorListener * pEC,const string & fileName,size_t lineNum,const CTempString & seqId)150 static void sReportUnusedMods(
151     ILineErrorListener* pEC,
152     const string& fileName,
153     size_t lineNum,
154     const CTempString& seqId)
155 {
156     _ASSERT(pEC);
157 
158     string message =
159         fileName +
160         " contains qualifiers for sequence id " +
161         seqId +
162         ", but no sequence with that id was found.";
163 
164     AutoPtr<CLineErrorEx> pErr(
165         CLineErrorEx::Create(
166             ILineError::eProblem_GeneralParsingError,
167             eDiag_Error,
168             0, 0, // code and subcode
169             seqId,
170             lineNum, // lineNumber,
171             message));
172 
173     pEC->PutError(*pErr);
174 }
175 
176 
Empty() const177 bool CMemorySrcFileMap::Empty() const
178 {
179     return m_LineMap.empty();
180 }
181 
182 
Mapped() const183 bool CMemorySrcFileMap::Mapped() const
184 {
185     return m_FileMapped;
186 }
187 
188 
GetMods(const CBioseq & bioseq,TModList & mods,bool isVerbose)189 bool CMemorySrcFileMap::GetMods(const CBioseq& bioseq, TModList& mods, bool isVerbose)
190 {
191     mods.clear();
192     if (!m_FileMapped) {
193         return false;
194     }
195 
196     list<string> id_strings;
197     for (const auto& pId : bioseq.GetId()) {
198         string id;
199         pId->GetLabel(&id, nullptr, CSeq_id::eFasta);
200         s_PostProcessID(id);
201         id_strings.push_back(id);
202         CTempString type, content;
203         NStr::SplitInTwo(id, "|", type, content);
204         id_strings.push_back(content);
205         if (pId->IsGeneral()) {
206             CTempString db, tag;
207             NStr::SplitInTwo(content, "|", db, tag);
208             id_strings.push_back(tag);
209         }
210         else {
211             auto pTextSeqId = pId->GetTextseq_Id();
212             if (pTextSeqId && pTextSeqId->IsSetVersion()) {
213                 size_t pointPos = id.rfind('.');
214                 if (pointPos != string::npos) {
215                     CTempString versionlessId(id, 0, pointPos);
216                     NStr::SplitInTwo(versionlessId, "|", type, content);
217                     id_strings.push_back(versionlessId);
218                     id_strings.push_back(content);
219                 }
220             }
221         }
222     }
223 
224 /*
225     for (const auto& id : id_strings) {
226         auto it = m_LineMap.find(id);
227         if (it != m_LineMap.end()) {
228             x_ProcessLine(it->second.line, mods);
229             auto lineNum = it->second.lineNum;
230             m_ProcessedIdsToLineNum.emplace(id, lineNum);
231             for (const auto& pEquivIt : it->second.equiv) {
232                 m_LineMap.erase(pEquivIt->val);
233             }
234             m_LineMap.erase(it);
235             return true;
236         }
237     }
238 */
239 
240     for (const auto& id : id_strings) {
241         auto it = m_LineMap.find(id);
242         if (it != m_LineMap.end()) {
243             CTempString* linePtr = it->second.linePtr;
244             CTempString& line = linePtr ? *linePtr : it->second.line;
245             if (!line.empty()) {
246                 x_ProcessLine(line, mods);
247                 auto lineNum = it->second.lineNum;
248                 m_ProcessedIdsToLineNum.emplace(id, lineNum);
249                 line.clear();
250                 return true;
251             }
252         }
253     }
254 
255 
256 
257     for (const auto& id : id_strings) {
258         auto it = m_ProcessedIdsToLineNum.find(id);
259         if (it != end(m_ProcessedIdsToLineNum)) {
260             sReportMultipleMatches(m_pEC, m_pFileMap->GetFileName(), it->second, bioseq);
261             return false;
262         }
263     }
264 
265     if (isVerbose) {
266         sReportMissingMods(m_pEC, m_pFileMap->GetFileName(), bioseq);
267     }
268     return false;
269 }
270 
271 
ReportUnusedIds()272 void CMemorySrcFileMap::ReportUnusedIds()
273 {
274     if (!Empty()) {
275         map<size_t, CTempString> unusedLines;
276         for (const auto& entry : m_LineMap) {
277             if (!entry.second.line.empty()) {
278                 unusedLines.emplace(entry.second.lineNum, entry.second.line);
279             }
280         }
281 
282         for (const auto& entry : unusedLines) {
283             CTempString seqId, remainder;
284             NStr::SplitInTwo(entry.second, "\t", seqId, remainder);
285             sReportUnusedMods(m_pEC,
286                     m_pFileMap->GetFileName(),
287                     entry.first,
288                     NStr::TruncateSpaces_Unsafe(seqId));
289         }
290     }
291 }
292 
x_ProcessLine(const CTempString & line,TModList & mods)293 void CMemorySrcFileMap::x_ProcessLine(const CTempString& line, TModList& mods)
294 {
295     vector<CTempString> tokens;
296     NStr::Split(line, "\t", tokens);
297     for (size_t i=1; i < tokens.size() && i < m_ColumnNames.size(); ++i) {
298         auto value=NStr::TruncateSpaces_Unsafe(tokens[i]);
299         if (!NStr::IsBlank(value)) {
300             mods.emplace_back(m_ColumnNames[i], value);
301         }
302     }
303 }
304 
305 static pair<size_t,size_t>
s_IdTypeToNumFields(CSeq_id::E_Choice choice)306 s_IdTypeToNumFields(CSeq_id::E_Choice choice)
307 {
308     switch(choice) {
309     case CSeq_id::e_Local:
310     case CSeq_id::e_Gibbsq:
311     case CSeq_id::e_Gibbmt:
312     case CSeq_id::e_Giim:
313     case CSeq_id::e_Gi:
314         return make_pair<size_t,size_t>(1,1);
315     case CSeq_id::e_Patent:
316         return make_pair<size_t,size_t>(3,3);
317     case CSeq_id::e_General:
318         return make_pair<size_t,size_t>(2,2);
319     default:
320         break;
321     }
322     return make_pair<size_t,size_t>(1,3);
323 }
324 
325 
326 static bool
s_ParseFastaIdString(const CTempString & fastaString,set<CTempString,PNocase_Generic<CTempString>> & idStrings)327 s_ParseFastaIdString(const CTempString& fastaString,
328     set<CTempString, PNocase_Generic<CTempString>>& idStrings)
329 {
330     idStrings.clear();
331 
332     static const size_t minStubLength=2;
333     static const size_t maxStubLength=3;
334 
335     using size_type = CTempString::size_type;
336     size_type fastaLength = fastaString.size();
337     size_type currentPos=0;
338     size_type idStartPos=0;
339     size_t currentField=0;
340     size_t currentMinField=0;
341     size_t currentMaxField=0;
342 
343     while (currentPos < fastaLength) {
344         if (idStartPos == currentPos) {
345             auto nextBarPos = fastaString.find('|', currentPos);
346             if (nextBarPos == NPOS) {
347                 return false;
348             }
349             const auto stubLength = nextBarPos - currentPos;
350             if (stubLength<minStubLength || stubLength>maxStubLength) {
351                 return false;
352             }
353             const auto idType =
354                 CSeq_id::WhichInverseSeqId(fastaString.substr(currentPos, stubLength));
355             if (idType == CSeq_id::e_not_set) {
356                 return false;
357             }
358             auto numFields = s_IdTypeToNumFields(idType);
359             currentMinField = numFields.first;
360             currentMaxField = numFields.second;
361             currentPos=nextBarPos+1;
362             continue;
363         }
364 
365         _ASSERT(currentMinField <= currentMaxField);
366         if (currentField < currentMaxField) {
367             auto nextBarPos = fastaString.find('|', currentPos);
368             if (nextBarPos == NPOS) {
369                 if (currentField < currentMinField-1) {
370                     return false;
371                 }
372                 idStrings.emplace(fastaString.substr(idStartPos));
373                 return true;
374             }
375             if (currentField >= currentMinField) {
376                 auto length = nextBarPos-currentPos;
377                 if (length>=minStubLength && length<=maxStubLength) {
378                     const auto idType =
379                         CSeq_id::WhichInverseSeqId(fastaString.substr(currentPos, length));
380                     if (idType != CSeq_id::e_not_set) {
381                         auto numFields = s_IdTypeToNumFields(idType);
382                         currentMinField = numFields.first;
383                         currentMaxField = numFields.second;
384                         idStartPos=currentPos;
385                         currentField=0;
386                         currentPos=nextBarPos+1;
387                         continue;
388                     }
389                 }
390             }
391             currentPos=nextBarPos+1;
392             ++currentField;
393         }
394         else {
395             _ASSERT(currentField == currentMaxField);
396             idStrings.emplace(fastaString.substr(idStartPos, (currentPos-idStartPos)-1));
397             idStartPos=currentPos;
398             currentField=0;
399         }
400     }
401 
402     if (currentField < currentMinField) {
403         return false;
404     }
405 
406     if (fastaString[fastaLength-1] == '|') {
407         if (currentField < currentMaxField) {
408             _ASSERT(currentPos == fastaLength);
409             idStrings.emplace(fastaString.substr(idStartPos, (currentPos-idStartPos)-1));
410             return true;
411         }
412         return false;
413     }
414 
415     return true;
416 }
417 
418 
x_RegisterLine(size_t lineNum,const CTempString & line,bool allowAcc)419 void CMemorySrcFileMap::x_RegisterLine(size_t lineNum, const CTempString& line, bool allowAcc)
420 {
421     CTempString idString, remainder;
422     NStr::SplitInTwo(line, "\t", idString, remainder);
423     NStr::TruncateSpacesInPlace(idString);
424     if (idString.empty()) {
425         return;
426     }
427 
428     if (count(begin(idString), end(idString), '|')<2) { // idString encodes a single id
429         auto rval = m_LineMap.emplace(idString, SLineInfo{lineNum, line});
430         if (!rval.second) {
431             CTempString seqId, remainder; // revisit this
432             NStr::SplitInTwo(line, "\t", seqId, remainder);
433             sReportDuplicateIds(m_pEC,
434                 m_pFileMap->GetFileName(),
435                 lineNum,
436                 rval.first->second.lineNum,
437                 NStr::TruncateSpaces_Unsafe(seqId));
438         }
439         return;
440     }
441 
442     set<CTempString, PNocase_Generic<CTempString>> parsedIDs;
443     if (!s_ParseFastaIdString(idString, parsedIDs)){
444         sPostError(m_pEC,
445                 "In " + m_pFileMap->GetFileName() +
446                 ". Unable to parse " + idString + ".",
447                 "",
448                 lineNum);
449         return;
450     }
451 
452     CTempString* linePtr=nullptr;
453     for (auto id : parsedIDs) {
454         pair<TLineMap::iterator,bool> rval;
455         if (linePtr) {
456             rval = m_LineMap.emplace(id, SLineInfo{lineNum});
457             rval.first->second.linePtr = linePtr;
458         }
459         else {
460             rval = m_LineMap.emplace(id, SLineInfo{lineNum, line});
461             linePtr = &rval.first->second.line;
462         }
463 
464         if (!rval.second) {
465             CTempString seqId, remainder; // revisit this
466             NStr::SplitInTwo(line, "\t", seqId, remainder);
467             sReportDuplicateIds(m_pEC,
468                 m_pFileMap->GetFileName(),
469                 lineNum,
470                 rval.first->second.lineNum,
471                 NStr::TruncateSpaces_Unsafe(seqId));
472 
473             linePtr->clear();
474             break;
475         }
476     }
477 }
478 
479 
480 
MapFile(const string & fileName,bool allowAcc)481 void CMemorySrcFileMap::MapFile(const string& fileName, bool allowAcc)
482 {
483     if (m_FileMapped ||
484         m_pFileMap ||
485         !m_LineMap.empty()) {
486         return;
487     }
488 
489     m_pFileMap.reset(new CMemoryFileMap(fileName));
490 
491     size_t fileSize = m_pFileMap->GetFileSize();
492     const char* ptr = (const char*)m_pFileMap->Map(0, fileSize);
493     const char* end = ptr + fileSize;
494 
495     size_t lineNum = 0;
496     while (ptr < end)
497     {
498         // search for next non empty line
499         if (*ptr == '\r' || *ptr == '\n') {
500             ++ptr;
501             continue;
502         }
503 
504         const char* start = ptr;
505         // search for end of line
506         const char* endline = (const char*)memchr(ptr, '\n', end - ptr);
507         if (endline == nullptr) endline = end;
508 
509         ptr = endline + 1;
510         endline--;
511 
512         while (start < endline && *endline == '\r')
513             endline--;
514 
515         // compose line control structure
516         if (start < endline)
517         {
518             ++lineNum;
519             CTempString line(start, endline-start+1);
520             if (m_ColumnNames.empty())
521                 NStr::Split(line, "\t", m_ColumnNames);
522             else // parse regular line
523                 x_RegisterLine(lineNum, line, allowAcc);
524         }
525     }
526 
527     if (m_ColumnNames.empty()) {
528         NCBI_THROW(CArgException, eConstraint,
529         "source modifiers file header line is not valid");
530     }
531 
532     m_FileMapped = true;
533 }
534 
535 
536 
537 
538 
539 
s_AppendMods(const CModHandler::TModList & mods,string & title)540 static void s_AppendMods(
541     const CModHandler::TModList& mods,
542     string& title)
543 {
544     for (const auto& mod : mods) {
545 
546         title.append(" ["
547                 + mod.GetName()
548                 + "="
549                 + mod.GetValue()
550                 + "]");
551     }
552 }
553 
554 
sReportError(ILineErrorListener * pEC,EDiagSev severity,int subcode,const string & seqId,const string & message,ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError)555 static void sReportError(
556         ILineErrorListener* pEC,
557         EDiagSev severity,
558         int subcode,
559         const string& seqId,
560         const string& message,
561         ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError)
562 {
563     _ASSERT(pEC);
564 
565     AutoPtr<CLineErrorEx> pErr(
566             CLineErrorEx::Create(
567                 problemType,
568                 severity,
569                 EReaderCode::eReader_Mods,
570                 subcode,
571                 seqId,
572                 0, // lineNumber,
573                 message));
574 
575     pEC->PutError(*pErr);
576 }
577 
578 
579 
s_PreprocessNoteMods(CModHandler::TModList & mods)580 static void s_PreprocessNoteMods(CModHandler::TModList& mods)
581 {
582     for (auto& mod : mods) {
583         if (CModHandler::GetCanonicalName(mod.GetName()) == "note"){
584             string new_value = mod.GetValue();
585             NStr::ReplaceInPlace(new_value, "<", "[");
586             NStr::ReplaceInPlace(new_value, ">", "]");
587             mod.SetValue(new_value);
588         }
589     }
590 }
591 
592 
593 class CApplyMods
594 {
595 public:
596     using TModList = CModHandler::TModList;
597     using TMods = CModHandler::TMods;
598     using TMergePolicy = CModHandler::EHandleExisting;
599 
600     CApplyMods(const TMods& commandLineMods,
601                const string& m_CommandLineRemainder,
602                CMemorySrcFileMap* pNamedSrcFileMap,
603                CMemorySrcFileMap* pDefaultSrcFileMap,
604                ILineErrorListener* pMessageListener,
605                bool readModsFromTitle,
606                bool isVerbose,
607                TMergePolicy mergePolicy=CModHandler::ePreserve);
608 
609     void operator()(CBioseq& bioseq);
610 
611 private:
612     void x_GetModsFromFileMap(
613         CMemorySrcFileMap& fileMap,
614         const CBioseq& bioseq,
615         CModHandler::FReportError fReportError,
616         CModHandler& mod_handler,
617         string& remainder);
618 
619 
620     TMods m_CommandLineMods;
621     const string m_CommandLineRemainder;
622 
623     CMemorySrcFileMap* m_pNamedSrcFileMap=nullptr;
624     CMemorySrcFileMap* m_pDefaultSrcFileMap=nullptr;
625     ILineErrorListener* m_pMessageListener=nullptr;
626 
627     bool m_ReadModsFromTitle = false;
628     bool m_IsVerbose=false; // can set this in CMemorySrcFileMap
629     TMergePolicy m_MergePolicy;
630 };
631 
632 
CApplyMods(const TMods & commandLineMods,const string & commandLineRemainder,CMemorySrcFileMap * pNamedSrcFileMap,CMemorySrcFileMap * pDefaultSrcFileMap,ILineErrorListener * pMessageListener,bool readModsFromTitle,bool isVerbose,TMergePolicy mergePolicy)633 CApplyMods::CApplyMods(
634         const TMods& commandLineMods,
635         const string& commandLineRemainder,
636         CMemorySrcFileMap* pNamedSrcFileMap,
637         CMemorySrcFileMap* pDefaultSrcFileMap,
638         ILineErrorListener* pMessageListener,
639         bool readModsFromTitle,
640         bool isVerbose,
641         TMergePolicy mergePolicy) :
642     m_CommandLineMods(commandLineMods),
643     m_CommandLineRemainder(commandLineRemainder),
644     m_pNamedSrcFileMap(pNamedSrcFileMap),
645     m_pDefaultSrcFileMap(pDefaultSrcFileMap),
646     m_pMessageListener(pMessageListener),
647     m_ReadModsFromTitle(readModsFromTitle),
648     m_IsVerbose(isVerbose),
649     m_MergePolicy(mergePolicy)
650 {}
651 
652 
653 
x_GetModsFromFileMap(CMemorySrcFileMap & fileMap,const CBioseq & bioseq,CModHandler::FReportError fReportError,CModHandler & mod_handler,string & remainder)654 void CApplyMods::x_GetModsFromFileMap(
655         CMemorySrcFileMap& fileMap,
656         const CBioseq& bioseq,
657         CModHandler::FReportError fReportError,
658         CModHandler& mod_handler,
659         string& remainder)
660 {
661     CApplyMods::TModList mods;
662     if (!fileMap.GetMods(bioseq, mods, m_IsVerbose)) {
663         return;
664     }
665 
666     s_PreprocessNoteMods(mods); // RW-928
667     CApplyMods::TModList rejectedMods;
668 
669     mod_handler.AddMods(mods,
670             m_MergePolicy,
671             rejectedMods,
672             fReportError);
673     s_AppendMods(rejectedMods, remainder);
674 }
675 
676 
operator ()(CBioseq & bioseq)677 void CApplyMods::operator()(CBioseq& bioseq)
678 {
679     CModHandler mod_handler;
680     mod_handler.SetExcludedMods({"lineage"});
681 
682     mod_handler.SetMods(m_CommandLineMods);
683     string remainder = m_CommandLineRemainder;
684     TModList rejectedMods;
685 
686     string seqId = bioseq.GetId().front()->AsFastaString();
687     auto fReportError =
688         [&](const CModData& /* mod */, const string& msg, EDiagSev /* sev */,
689             EModSubcode subcode) {
690             return sReportError(m_pMessageListener, eDiag_Warning, subcode, seqId, msg);
691         };
692 
693     if (m_pNamedSrcFileMap && m_pNamedSrcFileMap->Mapped()) {
694         x_GetModsFromFileMap(
695                 *m_pNamedSrcFileMap,
696                 bioseq,
697                 fReportError,
698                 mod_handler,
699                 remainder);
700     }
701 
702     if (m_pDefaultSrcFileMap && m_pDefaultSrcFileMap->Mapped()) {
703         x_GetModsFromFileMap(
704                 *m_pDefaultSrcFileMap,
705                 bioseq,
706                 fReportError,
707                 mod_handler,
708                 remainder);
709     }
710 
711     CRef<CSeqdesc> pTitleDesc;
712     CSeq_descr::Tdata* pDescriptors = nullptr;
713     if ((bioseq.IsSetDescr() &&
714         bioseq.GetDescr().IsSet()) ||
715         !NStr::IsBlank(remainder)) {
716         pDescriptors = &(bioseq.SetDescr().Set());
717     }
718 
719     CSeq_descr::Tdata::iterator title_it;
720     if (pDescriptors) {
721         title_it =
722             find_if(pDescriptors->begin(), pDescriptors->end(),
723                     [](CRef<CSeqdesc> pDesc) { return pDesc->IsTitle(); });
724         if (title_it != pDescriptors->end()) {
725             pTitleDesc = *title_it;
726             if (m_ReadModsFromTitle) {
727                 auto& title = (*title_it)->SetTitle();
728                 string titleRemainder;
729                 TModList mods;
730                 CTitleParser::Apply(title, mods, titleRemainder);
731                 title.clear();
732                 mod_handler.AddMods(mods,
733                     m_MergePolicy,
734                     rejectedMods,
735                     fReportError);
736                 s_AppendMods(rejectedMods, titleRemainder);
737                 remainder = titleRemainder +  remainder;
738             }
739         }
740     }
741 
742 
743     CModAdder::Apply(mod_handler, bioseq, rejectedMods, fReportError);
744     s_AppendMods(rejectedMods, remainder);
745 
746 
747     NStr::TruncateSpacesInPlace(remainder);
748     if (!remainder.empty()) {
749         if (!pTitleDesc) {
750             pTitleDesc = Ref(new CSeqdesc());
751             pDescriptors->push_back(pTitleDesc);
752             pTitleDesc->SetTitle() = remainder;
753         }
754         else {
755             string current_title =
756                 NStr::TruncateSpaces(
757                     pTitleDesc->GetTitle(),
758                     NStr::eTrunc_End);
759             pTitleDesc->SetTitle() = current_title.empty() ?
760                 remainder :
761                 current_title + " " + remainder;
762         }
763     }
764     else // remainder.empty()
765     if (pDescriptors) {
766         if (title_it != pDescriptors->end() &&
767             (*title_it)->GetTitle().empty()) {
768             pDescriptors->erase(title_it);
769         }
770 
771         if (pDescriptors->empty()) {
772             bioseq.ResetDescr();
773         }
774     }
775 }
776 
777 
g_ApplyMods(CMemorySrcFileMap * pNamedSrcFileMap,CMemorySrcFileMap * pDefaultSrcFileMap,const string & commandLineStr,bool readModsFromTitle,bool isVerbose,CModHandler::EHandleExisting mergePolicy,ILineErrorListener * pEC,CSeq_entry & entry)778 void g_ApplyMods(
779     CMemorySrcFileMap* pNamedSrcFileMap,
780     CMemorySrcFileMap* pDefaultSrcFileMap,
781     const string& commandLineStr,
782     bool readModsFromTitle,
783     bool isVerbose,
784     CModHandler::EHandleExisting mergePolicy,
785     ILineErrorListener* pEC,
786     CSeq_entry& entry)
787 {
788     using TModList = CModHandler::TModList;
789     using TMods = CModHandler::TMods;
790 
791     string commandLineRemainder;
792     TMods commandLineMods;
793 
794     if (!NStr::IsBlank(commandLineStr)) {
795         TModList mods;
796         CTitleParser::Apply(commandLineStr, mods, commandLineRemainder);
797         s_PreprocessNoteMods(mods); // RW-928
798 
799         auto fReportCommandLineError =
800             [&](const CModData& /* mod */, const string& msg, EDiagSev /* sev */,
801                 EModSubcode subcode) {
802                 return sReportError(pEC, eDiag_Warning, subcode, "", msg);
803             };
804 
805         TModList rejectedMods;
806         CModHandler mod_handler;
807         mod_handler.AddMods(mods,
808             CModHandler::ePreserve,
809             rejectedMods,
810             fReportCommandLineError);
811         s_AppendMods(rejectedMods, commandLineRemainder);
812         commandLineMods = mod_handler.GetMods();
813     }
814 
815     CApplyMods applyMods(commandLineMods,
816                          commandLineRemainder,
817                          pNamedSrcFileMap,
818                          pDefaultSrcFileMap,
819                          pEC,
820                          readModsFromTitle,
821                          isVerbose,
822                          mergePolicy);
823 
824     VisitAllBioseqs(entry, applyMods);
825 
826     if (isVerbose && pDefaultSrcFileMap) {
827         //pDefaultSrcFileMap->ReportUnusedIds();
828     }
829 }
830 
831 
832 END_NCBI_SCOPE
833