1 /*  $Id: local_finder.cpp 467698 2015-05-15 15:51:24Z vasilche $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Alexandre Souvorov
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbiapp.hpp>
34 #include <corelib/ncbienv.hpp>
35 #include <corelib/ncbiargs.hpp>
36 
37 #include <algo/gnomon/gnomon.hpp>
38 #include <serial/serial.hpp>
39 #include <serial/objostr.hpp>
40 #include <objtools/readers/fasta.hpp>
41 #include <objects/seqfeat/Imp_feat.hpp>
42 #include <objmgr/object_manager.hpp>
43 
44 USING_SCOPE(ncbi);
45 USING_SCOPE(ncbi::objects);
46 USING_SCOPE(ncbi::gnomon);
47 
48 class CLocalFinderApp : public CNcbiApplication
49 {
50 public:
51     void Init(void);
52     int Run(void);
53 };
54 
55 
56 
Init(void)57 void CLocalFinderApp::Init(void)
58 {
59     // Prepare command line descriptions
60     auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
61 
62     arg_desc->AddKey("input", "FastaFile",
63                      "File containing FASTA-format sequence",
64                      CArgDescriptions::eInputFile);
65 
66     arg_desc->AddDefaultKey("from", "From",
67                             "From",
68                             CArgDescriptions::eInteger,
69                             "0");
70 
71     arg_desc->AddDefaultKey("to", "To",
72                             "To",
73                             CArgDescriptions::eInteger,
74                             "1000000000");
75 
76     arg_desc->AddKey("model", "ModelData",
77                      "Model Data",
78                      CArgDescriptions::eInputFile);
79 
80     arg_desc->AddOptionalKey("align", "Alignments",
81                             "Alignments",
82                             CArgDescriptions::eInputFile);
83 
84     arg_desc->AddFlag("rep", "Repeats");
85 
86 
87     // Pass argument descriptions to the application
88     //
89 
90     SetupArgDescriptions(arg_desc.release());
91 }
92 
93 
Run(void)94 int CLocalFinderApp::Run(void)
95 {
96     const CArgs& myargs = GetArgs();
97 
98     int left            = myargs["from"].AsInteger();
99     int right           = myargs["to"].AsInteger();
100     bool repeats        = myargs["rep"];
101 
102 
103     //
104     // read our sequence data
105     //
106     CFastaReader fastareader(myargs["input"].AsString());
107     CRef<CSeq_loc> masked_regions;
108     masked_regions = fastareader.SaveMask();
109     CRef<CSeq_entry> se = fastareader.ReadOneSeq();
110 
111     if(masked_regions) {
112         CBioseq& bioseq = se->SetSeq();     // assumes that reader gets only one sequence per fasta id (no [] in file)
113         CRef<CSeq_annot> seq_annot(new CSeq_annot);
114         seq_annot->SetNameDesc("NCBI-FASTA-Lowercase");
115         bioseq.SetAnnot().push_back(seq_annot);
116         CSeq_annot::C_Data::TFtable* feature_table = &seq_annot->SetData().SetFtable();
117         for(CSeq_loc_CI i(*masked_regions); i; ++i) {
118             CRef<CSeq_feat> repeat(new CSeq_feat);
119             CRef<CSeq_id> id(new CSeq_id);
120             id->Assign(i.GetSeq_id());
121             CRef<CSeq_loc> loc(new CSeq_loc(*id, i.GetRange().GetFrom(), i.GetRange().GetTo()));
122             repeat->SetLocation(*loc);
123             repeat->SetData().SetImp().SetKey("repeat_region");
124             feature_table->push_back(repeat);
125         }
126     }
127 
128     CRef<CObjectManager> objmgr = CObjectManager::GetInstance();
129     CScope scope(*objmgr);
130     scope.AddTopLevelSeqEntry(*se);
131 
132     CRef<CSeq_id> cntg(new CSeq_id);
133     cntg->Assign(*se->GetSeq().GetFirstId());
134     CSeq_loc loc;
135     loc.SetWhole(*cntg);
136     CSeqVector vec(loc, scope);
137     vec.SetIupacCoding();
138 
139     CResidueVec seq;
140     ITERATE(CSeqVector,i,vec)
141         seq.push_back(*i);
142 
143     // read the alignment information
144     TGeneModelList alignments;
145     if(myargs["align"]) {
146         CNcbiIstream& alignmentfile = myargs["align"].AsInputFile();
147         string our_contig = cntg->GetSeqIdString(true);
148         string cur_contig;
149         CAlignModel algn;
150 
151         while(alignmentfile >> algn >> getcontig(cur_contig)) {
152             if (cur_contig==our_contig)
153                 alignments.push_back(algn);
154         }
155     }
156 
157     // create engine
158     CRef<CHMMParameters> hmm_params(new CHMMParameters(myargs["model"].AsInputFile()));
159     CGnomonEngine gnomon(hmm_params, seq, TSignedSeqRange(left, right));
160 
161     // run!
162     gnomon.Run(alignments, repeats, true, true, false, false, 10.0);
163 
164     // dump the annotation
165     CRef<CSeq_annot> annot = gnomon.GetAnnot(*cntg);
166     auto_ptr<CObjectOStream> os(CObjectOStream::Open(eSerial_AsnText, cout));
167     *os << *annot;
168 
169     return 0;
170 
171 }
172 
173 
174 /////////////////////////////////////////////////////////////////////////////
175 //  MAIN
176 
177 
main(int argc,const char * argv[])178 int main(int argc, const char* argv[])
179 {
180     return CLocalFinderApp().AppMain(argc, argv);
181 }
182 
183