1 /*  $Id: ngalign_app.cpp 548810 2017-10-18 13:38:41Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Nathan Bouk, Mike DiCuccio
27  *
28  * File Description:
29  *
30  */
31 
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbiargs.hpp>
37 
38 #include <objmgr/object_manager.hpp>
39 #include <objtools/data_loaders/genbank/gbloader.hpp>
40 #include <objmgr/scope.hpp>
41 #include <objects/general/User_object.hpp>
42 #include <objects/general/User_field.hpp>
43 #include <objects/general/Object_id.hpp>
44 #include <objects/seqalign/Seq_align.hpp>
45 #include <objects/seq/Bioseq.hpp>
46 #include <objects/seq/Annot_descr.hpp>
47 #include <objects/seq/Annotdesc.hpp>
48 #include <objects/seq/seq__.hpp>
49 #include <objtools/readers/agp_read.hpp>
50 #include <objtools/readers/agp_seq_entry.hpp>
51 #include <objtools/data_loaders/blastdb/bdbloader.hpp>
52 #include <objtools/readers/fasta.hpp>
53 
54 #include <util/line_reader.hpp>
55 #include <objects/genomecoll/genome_collection__.hpp>
56 
57 #include <algo/align/ngalign/ngalign.hpp>
58 #include <algo/align/ngalign/alignment_filterer.hpp>
59 #include <algo/align/ngalign/sequence_set.hpp>
60 #include <algo/align/ngalign/blast_aligner.hpp>
61 #include <algo/align/ngalign/banded_aligner.hpp>
62 #include <algo/align/ngalign/merge_aligner.hpp>
63 //#include <algo/align/ngalign/coverage_aligner.hpp>
64 //#include <algo/align/ngalign/overlap_aligner.hpp>
65 #include <algo/align/ngalign/inversion_merge_aligner.hpp>
66 #include <algo/align/ngalign/alignment_scorer.hpp>
67 #include <algo/align/ngalign/unordered_spliter.hpp>
68 
69 #include <algo/align/ngalign/merge_tree_aligner.hpp>
70 
71 #include <algo/align/ngalign/ngalign_interface.hpp>
72 
73 #include <objtools/data_loaders/asn_cache/Cache_blob.hpp>
74 #include <objtools/data_loaders/asn_cache/asn_index.hpp>
75 #include <objtools/data_loaders/asn_cache/chunk_file.hpp>
76 #include <objtools/data_loaders/asn_cache/asn_cache_util.hpp>
77 #include <objtools/data_loaders/asn_cache/seq_id_chunk_file.hpp>
78 #include <objtools/data_loaders/asn_cache/asn_cache_loader.hpp>
79 
80 
81 BEGIN_NCBI_SCOPE
82 USING_SCOPE(objects);
83 
84 class CFileLoadAligner : public IAlignmentFactory
85 {
86 public:
87 
CFileLoadAligner(const string & name)88     CFileLoadAligner(const string& name) : FileName(name) { ; }
89 private:
90     string FileName;
91 
92 public:
93 
GetName() const94     string GetName() const { return "file_load_aligner"; }
95 
GenerateAlignments(objects::CScope & Scope,ISequenceSet * QuerySet,ISequenceSet * SubjectSet,TAlignResultsRef AccumResults)96     TAlignResultsRef GenerateAlignments(objects::CScope& Scope,
97                                             ISequenceSet* QuerySet,
98                                             ISequenceSet* SubjectSet,
99                                             TAlignResultsRef AccumResults)
100     {
101         TAlignResultsRef NewResults(new CAlignResultsSet);
102 
103         CNcbiIfstream In(FileName.c_str());
104         while(In) {
105             size_t Pos = In.tellg();
106             try {
107                 CRef<CSeq_align> Temp(new CSeq_align);
108                 In >> MSerial_AsnText >> *Temp;
109                 NewResults->Insert(Temp);
110                 cerr << "FileLoad Aligner: 1 Seq-align" << endl;
111                 continue;
112             } catch(...) {
113                 In.seekg(Pos);
114             }
115             try {
116                 CRef<CSeq_align_set> Temp(new CSeq_align_set);
117                 In >> MSerial_AsnText >> *Temp;
118                 NewResults->Insert(*Temp);
119                 cerr << "FileLoad Aligner: " << Temp->Get().size() << " Seq-align-set" << endl;
120                 continue;
121             } catch(...) {
122                 In.seekg(Pos);
123             }
124             try {
125                 CRef<CSeq_annot> Temp(new CSeq_annot);
126                 In >> MSerial_AsnText >> *Temp;
127                 CRef<CSeq_align_set> TempSet(new CSeq_align_set);
128                 TempSet->Set().insert(TempSet->Set().end(), Temp->SetData().SetAlign().begin(), Temp->SetData().SetAlign().end());
129                 NewResults->Insert(*TempSet);
130 
131                 cerr << "FileLoad Aligner: " << TempSet->Get().size() << " Seq-annot" << endl;
132                 continue;
133             } catch(...) {
134                 In.seekg(Pos);
135             }
136             break;
137         }
138 
139         return NewResults;
140     }
141 };
142 
143 
144 
145 class CNgAlignApp : public CNcbiApplication
146 {
147 private:
148     void Init();
149     int Run();
150 
151 
152 	CRef<CScope> m_Scope;
153 
154 	auto_ptr<CUnorderedSplitter> m_Splitter;
155 
156 	list<CRef<CSeq_id> > m_LoadedIds;
157 	list<CRef<CSeq_id> >::const_iterator LoadedIdsIter;
158 
159     int BatchCounter;
160 
161 	bool x_CreateNgAlignRun(CNgAligner& NgAligner, IRegistry* RunRegistry);
162 	CRef<ISequenceSet> x_CreateSequenceSet(IRegistry* RunRegistry,
163 										const string& Category);
164 
165 	void x_LoadExternalSequences(IRegistry* RunRegistry,
166 								const string& Category,
167 								list<CRef<CSeq_id> >& LoadedIds);
168 
169 	void x_AddScorers(CNgAligner& NgAligner, IRegistry* RunRegistry);
170 	void x_AddFilters(CNgAligner& NgAligner, IRegistry* RunRegistry);
171 	void x_AddAligners(CNgAligner& NgAligner, IRegistry* RunRegistry);
172 
173 	CRef<CBlastAligner> x_CreateBlastAligner(IRegistry* RunRegistry, const string& Name);
174 	CRef<CRemoteBlastAligner> x_CreateRemoteBlastAligner(IRegistry* RunRegistry, const string& Name);
175 	CRef<CMergeAligner> x_CreateMergeAligner(IRegistry* RunRegistry, const string& Name);
176 //	CRef<CCoverageAligner> x_CreateCoverageAligner(IRegistry* RunRegistry, const string& Name);
177 //	CRef<CMergeTreeAligner> x_CreateMergeTreeAligner(IRegistry* RunRegistry, const string& Name);
178 	CRef<CInversionMergeAligner> x_CreateInversionMergeAligner(IRegistry* RunRegistry, const string& Name);
179 	CRef<CSplitSeqAlignMerger> x_CreateSplitSeqMergeAligner(IRegistry* RunRegistry, const string& Name);
180 
181     CRef<CFileLoadAligner> x_CreateFileLoadAligner(IRegistry* RunRegistry, const string& Name);
182 
183 
184 	void x_RecurseSeqEntry(CRef<CSeq_entry> Top, list<CRef<CSeq_id> >& SeqIdList);
185 
186 	CRef<CGC_Assembly> x_LoadAssembly(CNcbiIstream& In);
187 
188 	void x_PrintNoHitList(CNcbiOstream& Out, const CSeq_align_set& Alignments);
189 };
190 
191 
Init()192 void CNgAlignApp::Init()
193 {
194     auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
195     arg_desc->SetUsageContext(GetArguments().GetProgramName(),
196                               "Generic app for creating an NgAlign "
197                               "run, and running it.");
198 
199 	arg_desc->AddFlag("info", "info level logging");
200 
201     arg_desc->AddDefaultKey("o", "OutputFile",
202                             "File for results",
203                             CArgDescriptions::eOutputFile,
204                             "-");
205     arg_desc->AddFlag("b", "Output binary ASN.1");
206 
207 	arg_desc->AddFlag("dup", "allow dupes");
208 
209 	arg_desc->AddKey("run", "InputFile", "ngalign run ini file",
210 					CArgDescriptions::eInputFile);
211 
212     arg_desc->AddOptionalKey("asn_cache", "string", "comma seperated paths",
213                             CArgDescriptions::eString);
214 	arg_desc->AddFlag("G", "No genbank");
215 
216 	arg_desc->AddOptionalKey("query", "string", "single seq-id",
217 					CArgDescriptions::eString);
218 	arg_desc->AddOptionalKey("subject", "string", "single seq-id",
219 					CArgDescriptions::eString);
220 	arg_desc->AddOptionalKey("qidlist", "file", "seq-id list file",
221 					CArgDescriptions::eInputFile);
222 	arg_desc->AddOptionalKey("sidlist", "file", "seq-id list file",
223 					CArgDescriptions::eInputFile);
224 	arg_desc->AddOptionalKey("idlist", "file", "fasta seq-id list file",
225 					CArgDescriptions::eInputFile);
226 	arg_desc->AddOptionalKey("qloclist", "file", "seq-loc list file",
227 					CArgDescriptions::eInputFile);
228 	arg_desc->AddOptionalKey("sloclist", "file", "seq-loc list file",
229 					CArgDescriptions::eInputFile);
230     arg_desc->AddOptionalKey("blastdb", "blastdb", "Blastdb Name",
231 					CArgDescriptions::eInputFile);
232 	arg_desc->AddOptionalKey("softfilter", "string", "blastdb soft filter, int or string",
233 					CArgDescriptions::eString);
234 	arg_desc->AddOptionalKey("fasta", "file", "fasta file",
235 					CArgDescriptions::eInputFile);
236 
237 	arg_desc->AddOptionalKey("seqentry", "file", "seq-entry file",
238 					CArgDescriptions::eInputFile);
239 	arg_desc->AddOptionalKey("agp", "file", "agp file",
240 					CArgDescriptions::eInputFile);
241 
242 	arg_desc->AddOptionalKey("gc", "file", "gencoll asn.1 Assembly, for subject side. Allows for seperate ranking by assembly-unit.",
243 					CArgDescriptions::eInputFile);
244 
245 	arg_desc->AddDefaultKey("batch", "int", "batch size for loaded ids",
246 							CArgDescriptions::eInteger, "100");
247 
248 	arg_desc->AddOptionalKey("nohit", "outfile", "List of nohit IDs",
249 							CArgDescriptions::eOutputFile);
250 
251     arg_desc->AddOptionalKey("filters", "string",
252                             "semi-colon seperated list of filters, overrides the ini file",
253                             CArgDescriptions::eString);
254     arg_desc->AddOptionalKey("scorers", "string",
255                             "semi-colon seperated list of scorers, overrides the ini file",
256                             CArgDescriptions::eString);
257 
258 
259     SetupArgDescriptions(arg_desc.release());
260 }
261 
262 
Run()263 int CNgAlignApp::Run()
264 {
265     const CArgs& args = GetArgs();
266 
267 	if(args["info"].HasValue())
268 		SetDiagPostLevel(eDiag_Info);
269 
270     // Scope things
271     CRef<CObjectManager> om(CObjectManager::GetInstance());
272 
273 	if(!args["G"].HasValue())
274 		CGBDataLoader::RegisterInObjectManager(*om);
275 
276 	if(args["blastdb"].HasValue()) {
277 		string Orig = args["blastdb"].AsString();
278         vector<string> DBs;
279         NStr::Split(Orig, ",", DBs);
280         ITERATE(vector<string>, DBIter, DBs) {
281             cerr << *DBIter << endl;
282             CBlastDbDataLoader::RegisterInObjectManager
283         	    (*om, *DBIter,
284 	    		    CBlastDbDataLoader::eNucleotide,
285 				    true, CObjectManager::eDefault, 100);
286         }
287 	}
288 
289 
290     if(args["asn_cache"].HasValue()) {
291 	    vector<string> Tokens;
292         NStr::Split(args["asn_cache"].AsString(), ",", Tokens);
293         int Priority = 1;
294         ITERATE(vector<string>, CacheIter, Tokens) {
295             CFile CacheFile( *CacheIter + "/asn_cache.idx"  );
296 		    if(CacheFile.Exists()) {
297 			    CAsnCache_DataLoader::RegisterInObjectManager(*om, *CacheIter,
298 													CObjectManager::eDefault, Priority);
299                 Priority++;
300 		    }
301         }
302     }
303 
304 
305 	m_Scope.Reset(new CScope(*om));
306 	m_Scope->AddDefaults();
307 
308     // end scope
309 
310 	m_Splitter.reset(new CUnorderedSplitter(*m_Scope));
311 
312     CNcbiRegistry RunRegistry(args["run"].AsInputFile());
313 
314 	bool AllowDupes = false;
315 	if(RunRegistry.Get("ngalign", "allow_dupes") == "true")
316 		AllowDupes = true;
317 	if(args["dup"].HasValue() && args["dup"].AsBoolean())
318 		AllowDupes = true;
319 ERR_POST(Info << "Dupes: " << AllowDupes);
320 
321 	CRef<CGC_Assembly> Assembly;
322 	if(args["gc"].HasValue()) {
323 		Assembly = x_LoadAssembly(args["gc"].AsInputFile());
324 	}
325 
326 
327     BatchCounter = 0;
328 
329 	x_LoadExternalSequences(&RunRegistry, "", m_LoadedIds);
330 	LoadedIdsIter = m_LoadedIds.begin();
331 
332 
333 	CRef<CSeq_align_set> Alignments(new CSeq_align_set);
334 
335 	do {
336 		CNgAligner NgAligner(*m_Scope, Assembly, AllowDupes);
337 
338 		bool Created;
339 		Created = x_CreateNgAlignRun(NgAligner, &RunRegistry);
340 
341 		if(!Created) {
342 			cerr << "Failed to create Run." << endl;
343 			return 1;
344 		}
345 
346 		CRef<CSeq_align_set> CurrAligns;
347 		try {
348             CurrAligns = NgAligner.Align();
349 		} catch(CException& e) {
350             ERR_POST(Warning << e.ReportAll());
351             break;
352         }
353 
354         if(CurrAligns) {
355 			Alignments->Set().insert(Alignments->Set().end(),
356 									CurrAligns->Get().begin(),
357 									CurrAligns->Get().end());
358 		}
359         //break;
360         ERR_POST(Info << "Looping" );
361 	} while(!m_LoadedIds.empty() && LoadedIdsIter != m_LoadedIds.end() );
362 
363 	if(!Alignments.IsNull()) {
364 
365 		/*{{
366 			CRef<CSeq_annot> Old(new CSeq_annot), New(new CSeq_annot), Blast(new CSeq_annot);
367 			Blast->SetTitle("Blast Aligner");
368 			Blast->AddName("Blast Aligner");
369 			Old->SetTitle("Old Merge Aligner");
370 			Old->AddName("Old Merge Aligner");
371 			New->SetTitle("New Merge Aligner");
372 			New->AddName("New Merge Aligner");
373 			ITERATE(CSeq_align_set::Tdata, AlignIter, Alignments->Get()) {
374 				int BlastScore=0, OldScore=0, NewScore=0;
375 				(*AlignIter)->GetNamedScore("blast_aligner", BlastScore);
376 				(*AlignIter)->GetNamedScore("merge_aligner", OldScore);
377 				(*AlignIter)->GetNamedScore("new_merge_aligner", NewScore);
378 
379 				if(BlastScore)
380 					Blast->SetData().SetAlign().push_back(*AlignIter);
381 				else if(OldScore)
382 					Old->SetData().SetAlign().push_back(*AlignIter);
383 				else if(NewScore)
384 					New->SetData().SetAlign().push_back(*AlignIter);
385 			}
386 			CNcbiOstream& ostr = args["o"].AsOutputFile();
387 			ostr << MSerial_AsnText << *Old;
388 			ostr << MSerial_AsnText << *New;
389 			ostr << MSerial_AsnText << *Blast;
390 			return 1;
391 		}}*/
392 
393 		CNcbiOstream& ostr = args["o"].AsOutputFile();
394 		if(args["b"].HasValue() && args["b"].AsBoolean())
395 			ostr << MSerial_AsnBinary << *Alignments;
396 		else
397 			ostr << MSerial_AsnText << *Alignments;
398 
399 		if(args["nohit"].HasValue()) {
400 			x_PrintNoHitList(args["nohit"].AsOutputFile(), *Alignments);
401 		}
402 	} else {
403 		cerr << "No alignments found." << endl;
404 		return 1;
405 	}
406 
407     return 0;
408 }
409 
410 
x_CreateNgAlignRun(CNgAligner & NgAligner,IRegistry * RunRegistry)411 bool CNgAlignApp::x_CreateNgAlignRun(CNgAligner& NgAligner, IRegistry* RunRegistry)
412 {
413 	CRef<ISequenceSet> Query, Subject;
414 	Query = x_CreateSequenceSet(RunRegistry, "query");
415 	Subject = x_CreateSequenceSet(RunRegistry, "subject");
416 
417 	if(Query.IsNull() || Subject.IsNull())
418 		return false;
419 
420 	NgAligner.SetQuery(Query);
421 	NgAligner.SetSubject(Subject);
422 
423 	x_AddScorers(NgAligner, RunRegistry);
424 	x_AddFilters(NgAligner, RunRegistry);
425 
426 	x_AddAligners(NgAligner, RunRegistry);
427 
428 	return true;
429 }
430 
431 
432 CRef<ISequenceSet>
x_CreateSequenceSet(IRegistry * RunRegistry,const string & Category)433 CNgAlignApp::x_CreateSequenceSet(IRegistry* RunRegistry,
434 								const string& Category)
435 {
436     CRef<ISequenceSet> Result;
437 	const string Type = RunRegistry->Get(Category, "type");
438 
439 	const string Source = RunRegistry->Get(Category, "source");
440 
441     const string Mask = RunRegistry->Get(Category, "mask");
442     CSeqMasker* Masker = NULL;
443     if(!Mask.empty()) {
444         string NMer = "/panfs/pan1/gpipe/ThirdParty/WindowMasker/data/" + Mask;
445         Masker = new CSeqMasker(NMer,
446                         0, 1, 1, 0, 0, 0, 0, 0, 0, false, 0, 0, 0, 0, "mean", 0, false, 0, false);
447     }
448 
449     const CArgs& Args = GetArgs();
450 
451 
452 	if(Type == "seqidlist") {
453 		CRef<CSeqIdListSet> IdList(new CSeqIdListSet);
454 
455 		if(Args["query"].HasValue() && Source.empty() && Category == "query") {
456 			const string& Id = Args["query"].AsString();
457 			CRef<CSeq_id> QueryId(new CSeq_id(Id));
458 			IdList->SetIdList().push_back(QueryId);
459 		}
460 		if(Args["subject"].HasValue() && Source.empty() && Category == "subject") {
461 			const string& Id = Args["subject"].AsString();
462 			CRef<CSeq_id> SubjectId(new CSeq_id(Id));
463 			IdList->SetIdList().push_back(SubjectId);
464 		}
465 		if(Args["qidlist"].HasValue() && Source.empty() && Category == "query") {
466 			CNcbiIstream& In = Args["qidlist"].AsInputFile();
467 			CStreamLineReader Reader(In);
468 			while(!Reader.AtEOF()) {
469 				++Reader;
470 				string Line = *Reader;
471 				if(!Line.empty() && Line[0] != '#') {
472 					Line = Line.substr(0, Line.find(" "));
473 					CRef<CSeq_id> QueryId(new CSeq_id(Line));
474 					IdList->SetIdList().push_back(QueryId);
475 				}
476 			}
477 		}
478 		if(Args["sidlist"].HasValue() && Source.empty() && Category == "subject") {
479 			CNcbiIstream& In = Args["sidlist"].AsInputFile();
480 			CStreamLineReader Reader(In);
481 			while(!Reader.AtEOF()) {
482 				++Reader;
483 				string Line = *Reader;
484 				if(!Line.empty() && Line[0] != '#') {
485 					Line = Line.substr(0, Line.find(" "));
486 					CRef<CSeq_id> SubjectId(new CSeq_id(Line));
487 					IdList->SetIdList().push_back(SubjectId);
488 				}
489 			}
490 		}
491 
492 		if(Args["idlist"].HasValue() && Source.empty() && m_LoadedIds.empty()) {
493 			CNcbiIstream& In = Args["idlist"].AsInputFile();
494 			CStreamLineReader Reader(In);
495 			while(!Reader.AtEOF()) {
496 				++Reader;
497 				string Line = *Reader;
498 				if(!Line.empty() && Line[0] != '#') {
499 					Line = Line.substr(0, Line.find(" "));
500 					CRef<CSeq_id> QueryId(new CSeq_id(Line));
501 					m_LoadedIds.push_back(QueryId);
502 					//IdList->SetIdList().push_back(QueryId);
503 				}
504 			}
505 			LoadedIdsIter = m_LoadedIds.begin();
506 		}
507 
508 		if(IdList->SetIdList().empty() && !m_LoadedIds.empty()) {
509 			int BatchSize = Args["batch"].AsInteger();
510 			for(int Count = 0;
511 				Count < BatchSize && LoadedIdsIter != m_LoadedIds.end();
512 				Count++) {
513 			//cerr << __LINE__ << (*LoadedIdsIter)->AsFastaString() << endl;
514                 IdList->SetIdList().push_back(*LoadedIdsIter);
515 				++LoadedIdsIter;
516 			}
517 		}
518 
519         if(Masker != NULL)
520             IdList->SetSeqMasker(Masker);
521 
522 		Result.Reset(IdList.GetPointer());
523 	}
524     else if(Type == "seqloclist") {
525         CRef<CSeqLocListSet> LocList(new CSeqLocListSet);
526 
527         if(Args["query"].HasValue() && Source.empty() && Category == "query" ) {
528 			string QueryString = Args["query"].AsString();
529 			//CStreamLineReader Reader(In);
530 			//while(!Reader.AtEOF()) {
531 			//	++Reader;
532 				string Line = QueryString;
533 				if(!Line.empty() && Line[0] != '#') {
534 					vector<string> Tokens;
535                     NStr::Split(Line, "\t -.", Tokens, NStr::fSplit_Tokenize);
536                     CRef<CSeq_loc> Loc(new CSeq_loc);
537 
538                     Loc->SetInt().SetId().Set(Tokens[0]);
539                     if(Loc->GetInt().GetId().IsGi() && Loc->GetInt().GetId().GetGi() < GI_CONST(50)) {
540                         Loc->SetInt().SetId().SetLocal().SetId() = NStr::StringToInt(Tokens[0]);
541                     }
542 
543                     Loc->SetInt().SetFrom() = NStr::StringToInt(Tokens[1])-1;
544                     Loc->SetInt().SetTo() = NStr::StringToInt(Tokens[2])-1;
545 				    LocList->SetLocList().push_back(Loc);
546                 }
547 			//}
548 		}
549         else if(Args["qloclist"].HasValue() && Source.empty() && Category == "query" ) {
550 			CNcbiIstream& In = Args["qloclist"].AsInputFile();
551 			CStreamLineReader Reader(In);
552 			while(!Reader.AtEOF()) {
553 				++Reader;
554 				string Line = *Reader;
555 				if(!Line.empty() && Line[0] != '#') {
556 					vector<string> Tokens;
557                     NStr::Split(Line, "\t -.", Tokens, NStr::fSplit_Tokenize);
558                     CRef<CSeq_loc> Loc(new CSeq_loc);
559 
560                     Loc->SetInt().SetId().Set(Tokens[0]);
561                     if(Loc->GetInt().GetId().IsGi() && Loc->GetInt().GetId().GetGi() < GI_CONST(50)) {
562                         Loc->SetInt().SetId().SetLocal().SetId() = NStr::StringToInt(Tokens[0]);
563                     }
564 
565                     Loc->SetInt().SetFrom() = NStr::StringToInt(Tokens[1])-1;
566                     Loc->SetInt().SetTo() = NStr::StringToInt(Tokens[2])-1;
567 				    LocList->SetLocList().push_back(Loc);
568                 }
569 			}
570 		}
571 
572         else if(Args["subject"].HasValue() && Source.empty() && Category == "subject" ) {
573 			string SubjectString = Args["subject"].AsString();
574 			//CStreamLineReader Reader(In);
575 			//while(!Reader.AtEOF()) {
576 			//	++Reader;
577 				string Line = SubjectString;
578 				if(!Line.empty() && Line[0] != '#') {
579 					vector<string> Tokens;
580                     NStr::Split(Line, "\t -.", Tokens, NStr::fSplit_Tokenize);
581                     CRef<CSeq_loc> Loc(new CSeq_loc);
582                     Loc->SetInt().SetId().Set(Tokens[0]);
583                     if(Loc->GetInt().GetId().IsGi() && Loc->GetInt().GetId().GetGi() < GI_CONST(50)) {
584                         Loc->SetInt().SetId().SetLocal().SetId() = NStr::StringToInt(Tokens[0]);
585                     }
586 
587                     Loc->SetInt().SetFrom() = NStr::StringToInt(Tokens[1])-1;
588                     Loc->SetInt().SetTo() = NStr::StringToInt(Tokens[2])-1;
589 				    LocList->SetLocList().push_back(Loc);
590                 }
591 			//}
592 		}
593 
594         else if(Args["sloclist"].HasValue() && Source.empty() && Category == "subject" ) {
595 			CNcbiIstream& In = Args["sloclist"].AsInputFile();
596 			CStreamLineReader Reader(In);
597 			while(!Reader.AtEOF()) {
598 				++Reader;
599 				string Line = *Reader;
600 				if(!Line.empty() && Line[0] != '#') {
601 					vector<string> Tokens;
602                     NStr::Split(Line, "\t -.", Tokens, NStr::fSplit_Tokenize);
603                     CRef<CSeq_loc> Loc(new CSeq_loc);
604                     Loc->SetInt().SetId().Set(Tokens[0]);
605                     if(Loc->GetInt().GetId().IsGi() && Loc->GetInt().GetId().GetGi() < GI_CONST(50)) {
606                         Loc->SetInt().SetId().SetLocal().SetId() = NStr::StringToInt(Tokens[0]);
607                     }
608 
609                     Loc->SetInt().SetFrom() = NStr::StringToInt(Tokens[1])-1;
610                     Loc->SetInt().SetTo() = NStr::StringToInt(Tokens[2])-1;
611 				    LocList->SetLocList().push_back(Loc);
612                 }
613 			}
614 		}
615 
616         if(Masker != NULL)
617             LocList->SetSeqMasker(Masker);
618 
619 		Result.Reset(LocList.GetPointer());
620     }
621 	else if(Type == "blastdb") {
622 		string Orig = Args["blastdb"].AsString();
623         vector<string> DBs;
624         NStr::Split(Orig, ",", DBs);
625         ITERATE(vector<string>, DBIter, DBs) {
626             CRef<CBlastDbSet> BlastDb(new CBlastDbSet(*DBIter));
627 		    if(Args["softfilter"].HasValue()) {
628 			    BlastDb->SetSoftFiltering(Args["softfilter"].AsString());
629 		    }
630 		    Result.Reset(BlastDb.GetPointer());
631 	        break;
632         }
633     }
634 	else if(Type == "fasta") {
635 		string FileName;
636 		FileName = RunRegistry->Get(Category, "fasta");
637 		if(Args["fasta"].HasValue())
638 			FileName = Args["fasta"].AsString();
639         int Batch = -1;
640         if(Args["batch"].HasValue())
641             Batch = Args["batch"].AsInteger();
642 		ERR_POST(Info << "Fasta File Name: " << FileName );
643 		// yes, its being leaked.
644 		CNcbiIfstream* FastaIn = new CNcbiIfstream(FileName.c_str());
645 		CRef<CFastaFileSet> FastaSet;
646         if(Batch != -1) {
647             FastaSet.Reset(new CFastaFileSet(FastaIn, BatchCounter, Batch));
648             BatchCounter += Batch;
649             for(int i = 0; i < Batch; i++) {
650                 if(LoadedIdsIter != m_LoadedIds.end())
651                     ++LoadedIdsIter;
652             }
653         } else {
654             FastaSet.Reset(new CFastaFileSet(FastaIn));
655 		}
656         Result.Reset(FastaSet.GetPointer());
657 	}
658 	else if(Type == "splitseqidlist") {
659 		CRef<CSplitSeqIdListSet> IdList(new CSplitSeqIdListSet(m_Splitter.get()));
660 
661 		if(Args["query"].HasValue()) {
662 			const string& Id = Args["query"].AsString();
663 			CRef<CSeq_id> QueryId(new CSeq_id(Id));
664 			IdList->AddSeqId(QueryId);
665 		}
666 
667         /*if(Args["idlist"].HasValue()) {
668 			CNcbiIstream& In = Args["idlist"].AsInputFile();
669 			CStreamLineReader Reader(In);
670 			while(!Reader.AtEOF()) {
671 				++Reader;
672 				string Line = *Reader;
673 				if(!Line.empty() && Line[0] != '#') {
674 					Line = Line.substr(0, Line.find(" "));
675 					CRef<CSeq_id> QueryId(new CSeq_id(Line));
676 					IdList->AddSeqId(QueryId);
677 				}
678 			}
679 		}*/
680 
681         if(Args["idlist"].HasValue() && Source.empty() && m_LoadedIds.empty()) {
682 			CNcbiIstream& In = Args["idlist"].AsInputFile();
683 			CStreamLineReader Reader(In);
684 			while(!Reader.AtEOF()) {
685 				++Reader;
686 				string Line = *Reader;
687 				if(!Line.empty() && Line[0] != '#') {
688 					Line = Line.substr(0, Line.find(" "));
689 					CRef<CSeq_id> QueryId(new CSeq_id(Line));
690 					m_LoadedIds.push_back(QueryId);
691 					//IdList->SetIdList().push_back(QueryId);
692 				}
693 			}
694 			LoadedIdsIter = m_LoadedIds.begin();
695 		}
696 
697         cerr << __LINE__ << endl;
698 		if(IdList->Empty() && !m_LoadedIds.empty()) {
699 			int BatchSize = Args["batch"].AsInteger();
700 			for(int Count = 0;
701 				Count < BatchSize && LoadedIdsIter != m_LoadedIds.end();
702 				Count++) {
703 			cerr << __LINE__ << (*LoadedIdsIter)->AsFastaString() << endl;
704 				IdList->AddSeqId(*LoadedIdsIter);
705                 //IdList->SetIdList().push_back(*LoadedIdsIter);
706 				++LoadedIdsIter;
707 			}
708 		}
709 
710 
711         if(IdList->Empty()) {
712             ERR_POST(Error << " Split Seq Id List is empty, maybe all gap? ");
713         }
714 
715         if(Masker != NULL)
716             IdList->SetSeqMasker(Masker);
717 
718 		Result.Reset(IdList.GetPointer());
719 	}
720     else if(Type == "splitseqloclist") {
721 		CRef<CSplitSeqLocListSet> LocList(new CSplitSeqLocListSet(m_Splitter.get()));
722 
723         if(Args["qloclist"].HasValue() && Source.empty() && Category == "query" ) {
724 			CNcbiIstream& In = Args["qloclist"].AsInputFile();
725 			CStreamLineReader Reader(In);
726 			while(!Reader.AtEOF()) {
727 				++Reader;
728 				string Line = *Reader;
729 				if(!Line.empty() && Line[0] != '#') {
730 					vector<string> Tokens;
731                     NStr::Split(Line, "\t ", Tokens, NStr::fSplit_Tokenize);
732                     CRef<CSeq_loc> Loc(new CSeq_loc);
733                     if(Tokens.size() >= 3) {
734                         Loc->SetInt().SetId().Set(Tokens[0]);
735                         Loc->SetInt().SetFrom() = NStr::StringToInt(Tokens[1])-1;
736                         Loc->SetInt().SetTo() = NStr::StringToInt(Tokens[2])-1;
737                     } else {
738                         Loc->SetWhole().Set(Tokens[0]);
739                     }
740                     LocList->AddSeqLoc(Loc);
741                 }
742 			}
743 		}
744 
745         if(Args["sloclist"].HasValue() && Source.empty() && Category == "subject" ) {
746 			CNcbiIstream& In = Args["sloclist"].AsInputFile();
747 			CStreamLineReader Reader(In);
748 			while(!Reader.AtEOF()) {
749 				++Reader;
750 				string Line = *Reader;
751 				if(!Line.empty() && Line[0] != '#') {
752 					vector<string> Tokens;
753                     NStr::Split(Line, "\t ", Tokens, NStr::fSplit_Tokenize);
754                     CRef<CSeq_loc> Loc(new CSeq_loc);
755                     if(Tokens.size() >= 3) {
756                         Loc->SetInt().SetId().Set(Tokens[0]);
757                         Loc->SetInt().SetFrom() = NStr::StringToInt(Tokens[1])-1;
758                         Loc->SetInt().SetTo() = NStr::StringToInt(Tokens[2])-1;
759                     } else {
760                         Loc->SetWhole().Set(Tokens[0]);
761                     }
762                     LocList->AddSeqLoc(Loc);
763                 }
764 			}
765 		}
766         if(LocList->Empty()) {
767             ERR_POST(Error << " Split Seq Loc List is empty, maybe all gap? ");
768         }
769 
770         if(Masker != NULL)
771             LocList->SetSeqMasker(Masker);
772 
773 		Result.Reset(LocList.GetPointer());
774 	}
775 
776 	return Result;
777 }
778 
779 
x_LoadExternalSequences(IRegistry * RunRegistry,const string & Category,list<CRef<CSeq_id>> & LoadedIds)780 void CNgAlignApp::x_LoadExternalSequences(IRegistry* RunRegistry,
781 								const string& Category,
782 								list<CRef<CSeq_id> >& LoadedIds)
783 {
784     const CArgs& Args = GetArgs();
785 
786     if(Args["fasta"].HasValue()) {
787         CNcbiIstream& In = Args["fasta"].AsInputFile();
788         CFastaReader FastaReader(In);
789 
790         while(!FastaReader.AtEOF()) {
791             CRef<CSeq_entry> Entry;
792             try{
793                 Entry = FastaReader.ReadOneSeq();
794             } catch(...) {
795                 break;
796             }
797             m_Scope->AddTopLevelSeqEntry(*Entry);
798 			LoadedIds.push_back( Entry->GetSeq().GetId().front() );
799         }
800         cerr << __LINE__ << "\t" << LoadedIds.size() << endl;
801     }
802 
803 
804 	if(Args["seqentry"].HasValue()) {
805 		CNcbiIstream& In = Args["seqentry"].AsInputFile();
806 
807 		while(!In.eof()) {
808 			CRef<CSeq_entry> Top(new CSeq_entry);
809 
810 			size_t Pos = In.tellg();
811 			try {
812 				In >> MSerial_AsnText >> *Top;
813 			} catch(...) {
814 				if(In.eof())
815 					continue;
816 				In.clear();
817 				In.seekg(Pos);
818 				try {
819 					In >> MSerial_AsnBinary >> *Top;
820 				} catch(...) {
821 					if(!In.eof())
822 						cerr << "Read Failure" << endl;
823 				}
824 			}
825 			if(In.eof())
826 				continue;
827 
828 			x_RecurseSeqEntry(Top, LoadedIds);
829 		}
830 	}
831 
832 	if(Args["agp"].HasValue()) {
833 		CNcbiIstream& In = Args["agp"].AsInputFile();
834 
835 		vector< CRef< CSeq_entry > > SeqEntries;
836 	    try {
837             CAgpToSeqEntry Reader(CAgpToSeqEntry::fSetSeqGap );
838             Reader.SetVersion( eAgpVersion_1_1);
839             Reader.ReadStream(In);
840             SeqEntries.clear();
841             SeqEntries = Reader.GetResult();
842 			//AgpRead(In, SeqEntries, eAgpRead_ParseId, true);
843 		} catch(CException& e) {
844 			cerr << "AgpRead Exception: " << e.ReportAll() << endl;
845 		}
846 
847 		ITERATE(vector<CRef<CSeq_entry> >, SeqEntryIter, SeqEntries) {
848 			//cerr << "Loading AGP: " << (*SeqEntryIter)->GetSeq().GetId().front()->AsFastaString() << endl;
849             m_Scope->AddTopLevelSeqEntry(**SeqEntryIter);
850 			LoadedIds.push_back( (*SeqEntryIter)->GetSeq().GetId().front() );
851 		}
852 	}
853 
854 }
855 
856 
x_RecurseSeqEntry(CRef<CSeq_entry> Top,list<CRef<CSeq_id>> & SeqIdList)857 void CNgAlignApp::x_RecurseSeqEntry(CRef<CSeq_entry> Top,
858 									list<CRef<CSeq_id> >& SeqIdList)
859 {
860 	if(Top->IsSet()) {
861 		int Loop = 0;
862 		ITERATE(CBioseq_set::TSeq_set, SeqIter, Top->GetSet().GetSeq_set()) {
863 			//if(Loop > 1927)
864 			//	break;
865 			//if(Loop > 1900)
866 				x_RecurseSeqEntry(*SeqIter, SeqIdList);
867 			Loop++;
868 		}
869 	} else if(Top->IsSeq()) {
870 		CRef<CSeq_id> Id;
871 		Id = Top->GetSeq().GetId().front();
872 		SeqIdList.push_back(Id);
873 		try {
874 			m_Scope->AddTopLevelSeqEntry(*Top);
875 		} catch(...) { ; }
876 	}
877 }
878 
879 
x_AddScorers(CNgAligner & NgAligner,IRegistry * RunRegistry)880 void CNgAlignApp::x_AddScorers(CNgAligner& NgAligner, IRegistry* RunRegistry)
881 {
882 	string ScorerNames;
883 
884     if(GetArgs()["scorers"].HasValue())
885         ScorerNames = GetArgs()["scorers"].AsString();
886     else
887         ScorerNames = RunRegistry->Get("scorers", "names");
888 
889     vector<string> Names;
890 	NStr::Split(ScorerNames, " \t;", Names);
891 
892 	ITERATE(vector<string>, NameIter, Names) {
893 
894 		if(*NameIter == "blast")
895 			NgAligner.AddScorer(
896                 new CBlastScorer(CBlastScorer::eSkipUnsupportedAlignments));
897 		else if(*NameIter == "pctident")
898 			NgAligner.AddScorer(new CPctIdentScorer);
899 		else if(*NameIter == "pctcov")
900 			NgAligner.AddScorer(new CPctCoverageScorer);
901 		else if(*NameIter == "comcomp")
902 			NgAligner.AddScorer(new CCommonComponentScorer);
903 		else if(*NameIter == "expansion")
904 			NgAligner.AddScorer(new CExpansionScorer);
905 		else if(*NameIter == "weighted") {
906             double K = 0.04;
907             K = RunRegistry->GetDouble("scorers", "sw_cvg", 0.04);
908 			NgAligner.AddScorer(new CWeightedIdentityScorer(K));
909 		}
910         else if(*NameIter == "hang")
911 			NgAligner.AddScorer(new CHangScorer);
912 		else if(*NameIter == "overlap")
913 			NgAligner.AddScorer(new COverlapScorer);
914 		else if(*NameIter == "clip") {
915 	        double K = 0.04;
916             K = RunRegistry->GetDouble("scorers", "sw_cvg", 0.04);
917             NgAligner.AddScorer(new CClippedScorer(K));
918         }
919 
920 	}
921 }
922 
923 
x_AddFilters(CNgAligner & NgAligner,IRegistry * RunRegistry)924 void CNgAlignApp::x_AddFilters(CNgAligner& NgAligner, IRegistry* RunRegistry)
925 {
926     const CArgs& args = GetArgs();
927 
928     int Rank = 0;
929 
930     if(args["filters"].HasValue()) {
931         string OrigFilters = args["filters"].AsString();
932         vector<string> SplitFilters;
933         NStr::Split(OrigFilters, ";", SplitFilters);
934         ITERATE(vector<string>, FilterIter, SplitFilters) {
935             const string FilterStr = *FilterIter;
936             try {
937                 CRef<CQueryFilter> Filter(new CQueryFilter(Rank, FilterStr));
938                 Rank++;
939                 NgAligner.AddFilter(Filter);
940             } catch( CException& e) {
941                 cerr << "x_AddFilters" << "  :  "  << Rank << "  :  " << FilterStr << endl;
942                 cerr << e.ReportAll() << endl;
943             }
944         }
945     } else {
946         string FilterNames = RunRegistry->Get("filters", "names");
947         vector<string> Names;
948         NStr::Split(FilterNames, " \t", Names);
949         ITERATE(vector<string>, NameIter, Names) {
950             string FilterStr = RunRegistry->Get("filters", *NameIter);
951             try {
952                 CRef<CQueryFilter> Filter(new CQueryFilter(Rank, FilterStr));
953                 Rank++;
954                 NgAligner.AddFilter(Filter);
955             } catch( CException& e) {
956                 cerr << "x_AddFilters" << "  :  "  << *NameIter << "  :  " << FilterStr << endl;
957                 cerr << e.ReportAll() << endl;
958             }
959         }
960     }
961 }
962 
963 
x_AddAligners(CNgAligner & NgAligner,IRegistry * RunRegistry)964 void CNgAlignApp::x_AddAligners(CNgAligner& NgAligner, IRegistry* RunRegistry)
965 {
966 	string FilterNames = RunRegistry->Get("aligners", "names");
967 	vector<string> Names;
968 	NStr::Split(FilterNames, " \t", Names);
969 
970 	ITERATE(vector<string>, NameIter, Names) {
971 
972 		string Type = RunRegistry->Get(*NameIter, "type");
973 
974 		CRef<IAlignmentFactory> Aligner;
975 		if(Type == "blast")
976 			Aligner = x_CreateBlastAligner(RunRegistry, *NameIter);
977 		else if(Type == "remote_blast")
978 			Aligner = x_CreateRemoteBlastAligner(RunRegistry, *NameIter);
979 		else if(Type == "merge")
980 			Aligner = x_CreateMergeAligner(RunRegistry, *NameIter);
981 		//else if(Type == "coverage")
982 		//	Aligner = x_CreateCoverageAligner(RunRegistry, *NameIter);
983 		else if(Type == "inversion")
984 			Aligner = x_CreateInversionMergeAligner(RunRegistry, *NameIter);
985 		else if(Type == "split")
986 			Aligner = x_CreateSplitSeqMergeAligner(RunRegistry, *NameIter);
987 		else if(Type == "file")
988 			Aligner = x_CreateFileLoadAligner(RunRegistry, *NameIter);
989 
990 		if(!Aligner.IsNull())
991 			NgAligner.AddAligner(Aligner);
992 	}
993 }
994 
995 
996 CRef<CBlastAligner>
x_CreateBlastAligner(IRegistry * RunRegistry,const string & Name)997 CNgAlignApp::x_CreateBlastAligner(IRegistry* RunRegistry, const string& Name)
998 {
999 	string Params = RunRegistry->Get(Name, "params");
1000 	int Threshold = RunRegistry->GetInt(Name, "threshold", 0);
1001 	string Filter = RunRegistry->Get(Name, "filter");
1002     bool UseNegatives = RunRegistry->GetBool(Name, "useneg", true);
1003 
1004 	const CArgs& Args = GetArgs();
1005 
1006 
1007 	if(Args["softfilter"].HasValue())
1008 		Filter = Args["softfilter"].AsString();
1009 
1010 
1011 
1012 	CRef<CBlastAligner> Blaster;
1013 	Blaster.Reset(new CBlastAligner(Params, Threshold));
1014 
1015 	//if(Filter != -1)
1016 		Blaster->SetSoftFiltering(Filter);
1017 
1018     Blaster->SetUseNegativeGiList(UseNegatives);
1019 
1020 	return Blaster;
1021 }
1022 
1023 
1024 CRef<CRemoteBlastAligner>
x_CreateRemoteBlastAligner(IRegistry * RunRegistry,const string & Name)1025 CNgAlignApp::x_CreateRemoteBlastAligner(IRegistry* RunRegistry, const string& Name)
1026 {
1027 	string Params = RunRegistry->Get(Name, "params");
1028 	int Threshold = RunRegistry->GetInt(Name, "threshold", 0);
1029 	//int Filter = RunRegistry->GetInt(Name, "filter", -1);
1030 
1031 	//const CArgs& Args = GetArgs();
1032 
1033 
1034 	CRef<CRemoteBlastAligner> Blaster;
1035 	Blaster.Reset(new CRemoteBlastAligner(Params, Threshold));
1036 
1037 	//if(Filter != -1)
1038 	//	Blaster->SetSoftFiltering(Filter);
1039 
1040 	return Blaster;
1041 }
1042 
1043 
1044 CRef<CMergeAligner>
x_CreateMergeAligner(IRegistry * RunRegistry,const string & Name)1045 CNgAlignApp::x_CreateMergeAligner(IRegistry* RunRegistry, const string& Name)
1046 {
1047 	int Threshold = RunRegistry->GetInt(Name, "threshold", 0);
1048 	double Clip = RunRegistry->GetDouble(Name, "clip", 3.0);
1049 	string ModeStr = RunRegistry->Get(Name, "mode");
1050     CMergeAligner::TMode Mode = CMergeAligner::eDefault;
1051     if(ModeStr == "cleanup")
1052         Mode = CMergeAligner::eAlignCleanup;
1053     else if(ModeStr == "tree")
1054         Mode = CMergeAligner::eTreeAlignMerger;
1055 
1056     return CRef<CMergeAligner>(new CMergeAligner(Threshold, Mode/*, Clip*/));
1057 }
1058 
1059 /*CRef<CCoverageAligner>
1060 CNgAlignApp::x_CreateCoverageAligner(IRegistry* RunRegistry, const string& Name)
1061 {
1062 	int Threshold = RunRegistry->GetInt(Name, "threshold", 0);
1063 	return CRef<CCoverageAligner>(new CCoverageAligner(Threshold));
1064 }*/
1065 
1066 
1067 CRef<CInversionMergeAligner>
x_CreateInversionMergeAligner(IRegistry * RunRegistry,const string & Name)1068 CNgAlignApp::x_CreateInversionMergeAligner(IRegistry* RunRegistry, const string& Name)
1069 {
1070 	int Threshold = RunRegistry->GetInt(Name, "threshold", 0);
1071 	return CRef<CInversionMergeAligner>(new CInversionMergeAligner(Threshold));
1072 }
1073 
1074 
1075 CRef<CSplitSeqAlignMerger>
x_CreateSplitSeqMergeAligner(IRegistry * RunRegistry,const string & Name)1076 CNgAlignApp::x_CreateSplitSeqMergeAligner(IRegistry* RunRegistry, const string& Name)
1077 {
1078 	return CRef<CSplitSeqAlignMerger>(new CSplitSeqAlignMerger(m_Splitter.get()));
1079 }
1080 
1081 
1082 CRef<CFileLoadAligner>
x_CreateFileLoadAligner(IRegistry * RunRegistry,const string & Name)1083 CNgAlignApp::x_CreateFileLoadAligner(IRegistry* RunRegistry, const string& Name)
1084 {
1085 	//int Threshold = RunRegistry->GetInt(Name, "threshold", 0);
1086 	string FileNameStr = RunRegistry->Get(Name, "filename");
1087   cerr << "x_CreateFileLoadAligner : " << FileNameStr << endl;
1088     return CRef<CFileLoadAligner>(new CFileLoadAligner(FileNameStr));
1089 }
1090 
1091 
x_LoadAssembly(CNcbiIstream & In)1092 CRef<CGC_Assembly> CNgAlignApp::x_LoadAssembly(CNcbiIstream& In)
1093 {
1094 	CRef<CGC_Assembly> Assembly(new CGC_Assembly);
1095 
1096 	try {
1097 		In >> MSerial_AsnText >> *Assembly;
1098 	} catch(...) {
1099 		In.clear();
1100 		In.seekg(0);
1101 		try {
1102 			In >> MSerial_AsnBinary >> *Assembly;
1103 		} catch(...) {
1104 			Assembly.Reset(NULL);
1105 		}
1106 	}
1107 
1108 	return Assembly;
1109 }
1110 
1111 
x_PrintNoHitList(CNcbiOstream & Out,const CSeq_align_set & Alignments)1112 void CNgAlignApp::x_PrintNoHitList(CNcbiOstream& Out, const CSeq_align_set& Alignments)
1113 {
1114 	set<CSeq_id_Handle> GivenIds;
1115 
1116 	ITERATE(list<CRef<CSeq_id> >, IdIter, m_LoadedIds) {
1117 		CSeq_id_Handle Handle = CSeq_id_Handle::GetHandle(**IdIter);
1118 		GivenIds.insert(Handle);
1119 	}
1120 
1121 	ITERATE(CSeq_align_set::Tdata, AlignIter, Alignments.Get()) {
1122 		CSeq_id_Handle AlignIdHandle = CSeq_id_Handle::GetHandle((*AlignIter)->GetSeq_id(0));
1123 		set<CSeq_id_Handle>::iterator Found = GivenIds.find(AlignIdHandle);
1124 		if(Found != GivenIds.end())
1125 			GivenIds.erase(Found);
1126 	}
1127 
1128 	ITERATE(set<CSeq_id_Handle>, IdIter, GivenIds) {
1129 		Out << IdIter->AsString() << endl;
1130 	}
1131 
1132 }
1133 
1134 
1135 END_NCBI_SCOPE
1136 USING_SCOPE(ncbi);
1137 
1138 
main(int argc,char ** argv)1139 int main(int argc, char** argv)
1140 {
1141     return CNgAlignApp().AppMain(argc, argv, 0, eDS_Default, 0);
1142 }
1143