1 #include "myutils.h"
2 #include "chime.h"
3 #include "seqdb.h"
4 #include "dp.h"
5 #include "ultra.h"
6 #include "hspfinder.h"
7 #include <algorithm>
8 #include <set>
9 
10 bool SearchChime(Ultra &U, const SeqData &QSD, float QAb,
11   const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,
12   float MinFractId, ChimeHit2 &Hit);
13 
14 FILE *g_fUChime;
15 FILE *g_fUChimeAlns;
16 const vector<float> *g_SortVecFloat;
17 bool g_UchimeDeNovo = false;
18 
Usage()19 void Usage()
20 	{
21 	printf("\n");
22 	printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);
23 	printf("http://www.drive5.com/uchime\n");
24 	printf("\n");
25 	printf("This software is donated to the public domain\n");
26 	printf("\n");
27 
28 	printf(
29 #include "help.h"
30 		);
31 	}
32 
SetBLOSUM62()33 void SetBLOSUM62()
34 	{
35 	Die("SetBLOSUM62 not implemented");
36 	}
37 
ReadSubstMx(const string &,Mx<float> &)38 void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)
39 	{
40 	Die("ReadSubstMx not implemented");
41 	}
42 
LogAllocs()43 void LogAllocs()
44 	{
45 	/*empty*/
46 	}
47 
CmpDescVecFloat(unsigned i,unsigned j)48 static bool CmpDescVecFloat(unsigned i, unsigned j)
49 	{
50 	return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];
51 	}
52 
Range(vector<unsigned> & v,unsigned N)53 void Range(vector<unsigned> &v, unsigned N)
54 	{
55 	v.clear();
56 	v.reserve(N);
57 	for (unsigned i = 0; i < N; ++i)
58 		v.push_back(i);
59 	}
60 
SortDescending(const vector<float> & Values,vector<unsigned> & Order)61 void SortDescending(const vector<float> &Values, vector<unsigned> &Order)
62 	{
63 	StartTimer(Sort);
64 	const unsigned N = SIZE(Values);
65 	Range(Order, N);
66 	g_SortVecFloat = &Values;
67 	sort(Order.begin(), Order.end(), CmpDescVecFloat);
68 	EndTimer(Sort);
69 	}
70 
GetAbFromLabel(const string & Label)71 float GetAbFromLabel(const string &Label)
72 	{
73 	vector<string> Fields;
74 	Split(Label, Fields, '/');
75 	const unsigned N = SIZE(Fields);
76 	for (unsigned i = 0; i < N; ++i)
77 		{
78 		const string &Field = Fields[i];
79 		if (Field.substr(0, 3) == "ab=")
80 			{
81 			string a = Field.substr(3, string::npos);
82 			return (float) atof(a.c_str());
83 			}
84 		}
85 	if (g_UchimeDeNovo)
86 		Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());
87 	return 0.0;
88 	}
89 
main(int argc,char * argv[])90 int main(int argc, char *argv[])
91 	{
92 
93 	MyCmdLine(argc, argv);
94 
95 	if (argc < 2)
96 		{
97 		Usage();
98 		return 0;
99 		}
100 
101 	if (opt_version)
102 		{
103 		printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);
104 		return 0;
105 		}
106 
107 	printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);
108 	printf("by Robert C. Edgar\n");
109 	printf("http://drive5.com/uchime\n");
110 	printf("This code is donated to the public domain.\n");
111 	printf("\n");
112 	if (!optset_w)
113 		opt_w = 8;
114 
115 	float MinFractId = 0.95f;
116 	if (optset_id)
117 		MinFractId = (float) opt_id;
118 
119 	Log("%8.2f  minh\n", opt_minh);
120 	Log("%8.2f  xn\n", opt_xn);
121 	Log("%8.2f  dn\n", opt_dn);
122 	Log("%8.2f  xa\n", opt_xa);
123 	Log("%8.2f  mindiv\n", opt_mindiv);
124 	Log("%8u  maxp\n", opt_maxp);
125 
126 	if (opt_input == "" && opt_uchime != "")
127 		opt_input = opt_uchime;
128 
129 	if (opt_input == "")
130 		Die("Missing --input");
131 
132 	g_UchimeDeNovo = (opt_db == "");
133 
134 	if (opt_uchimeout != "")
135 		g_fUChime = CreateStdioFile(opt_uchimeout);
136 
137 	if (opt_uchimealns != "")
138 		g_fUChimeAlns = CreateStdioFile(opt_uchimealns);
139 
140 	SeqDB Input;
141 	SeqDB DB;
142 
143 	Input.FromFasta(opt_input);
144 	if (!Input.IsNucleo())
145 		Die("Input contains amino acid sequences");
146 
147 	const unsigned QuerySeqCount = Input.GetSeqCount();
148 	vector<unsigned> Order;
149 	for (unsigned i = 0; i < QuerySeqCount; ++i)
150 		Order.push_back(i);
151 
152 	if (g_UchimeDeNovo)
153 		{
154 		vector<float> Abs;
155 		for (unsigned i = 0; i < QuerySeqCount; ++i)
156 			{
157 			const char *Label = Input.GetLabel(i);
158 			float Ab = GetAbFromLabel(Label);
159 			Abs.push_back(Ab);
160 			}
161 		SortDescending(Abs, Order);
162 		DB.m_IsNucleoSet = true;
163 		DB.m_IsNucleo = true;
164 		}
165 	else
166 		{
167 		DB.FromFasta(opt_db);
168 		if (!DB.IsNucleo())
169 			Die("Database contains amino acid sequences");
170 		}
171 
172 	vector<ChimeHit2> Hits;
173 	unsigned HitCount = 0;
174 	for (unsigned i = 0; i < QuerySeqCount; ++i)
175 		{
176 		unsigned QuerySeqIndex = Order[i];
177 
178 		SeqData QSD;
179 		Input.GetSeqData(QuerySeqIndex, QSD);
180 
181 		float QAb = -1.0;
182 		if (g_UchimeDeNovo)
183 			QAb = GetAbFromLabel(QSD.Label);
184 
185 		ChimeHit2 Hit;
186 		AlnParams &AP = *(AlnParams *) 0;
187 		AlnHeuristics &AH = *(AlnHeuristics *) 0;
188 		HSPFinder &HF = *(HSPFinder *) 0;
189 		bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);
190 		if (Found)
191 			++HitCount;
192 		else
193 			{
194 			if (g_UchimeDeNovo)
195 				DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);
196 			}
197 
198 		WriteChimeHit(g_fUChime, Hit);
199 
200 		ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));
201 		}
202 
203 	Log("\n");
204 	Log("%s: %u/%u chimeras found (%.1f%%)\n",
205 	  opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));
206 
207 	CloseStdioFile(g_fUChime);
208 	CloseStdioFile(g_fUChimeAlns);
209 
210 	ProgressExit();
211 	return 0;
212 	}
213