1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9 
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14 
15 //***************************************************************************************************************
ChimeraSlayer(string file,string temp,bool trim,string mode,int k,int ms,int mms,int win,float div,int minsim,int mincov,int minbs,int minsnp,int par,int it,int inc,int numw,bool r,string blas,int tid)16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : MothurChimera()  {
18 	try {
19 		fastafile = file;
20 		templateFileName = temp; templateSeqs = readSeqs(temp);
21 		searchMethod = mode;
22 		kmerSize = k;
23 		match = ms;
24 		misMatch = mms;
25 		window = win;
26 		divR = div;
27 		minSim = minsim;
28 		minCov = mincov;
29 		minBS = minbs;
30 		minSNP = minsnp;
31 		parents = par;
32 		iters = it;
33 		increment = inc;
34 		numWanted = numw;
35 		realign = r;
36 		trimChimera = trim;
37 		numNoParents = 0;
38 		blastlocation = blas;
39 		threadID = tid;
40 
41 		doPrep();
42 	}
43 	catch(exception& e) {
44 		m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
45 		exit(1);
46 	}
47 }
48 //***************************************************************************************************************
49 //template=self
ChimeraSlayer(string file,string temp,bool trim,map<string,int> & prior,string mode,int k,int ms,int mms,int win,float div,int minsim,int mincov,int minbs,int minsnp,int par,int it,int inc,int numw,bool r,string blas,int tid,bool bg)50 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
51 							 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : MothurChimera()  {
52 	try {
53 		byGroup = bg;
54 		fastafile = file; templateSeqs = readSeqs(fastafile);
55 		templateFileName = temp;
56 		searchMethod = mode;
57 		kmerSize = k;
58 		match = ms;
59 		misMatch = mms;
60 		window = win;
61 		divR = div;
62 		minSim = minsim;
63 		minCov = mincov;
64 		minBS = minbs;
65 		minSNP = minsnp;
66 		parents = par;
67 		iters = it;
68 		increment = inc;
69 		numWanted = numw;
70 		realign = r;
71 		trimChimera = trim;
72 		priority = prior;
73 		numNoParents = 0;
74 		blastlocation = blas;
75 		threadID = tid;
76 
77 
78 		createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
79 
80 		if (searchMethod == "distance") {
81 			//createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
82 
83 			//run filter on template copying templateSeqs into filteredTemplateSeqs
84 			for (int i = 0; i < templateSeqs.size(); i++) {
85 				if (m->getControl_pressed()) {  break; }
86 
87 				Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
88 				runFilter(newSeq);
89 				filteredTemplateSeqs.push_back(newSeq);
90 			}
91 		}
92 	}
93 	catch(exception& e) {
94 		m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
95 		exit(1);
96 	}
97 }
98 //***************************************************************************************************************
99 //template=self
ChimeraSlayer(string file,string temp,bool trim,map<string,int> & prior,string mode,int k,int ms,int mms,int win,float div,int minsim,int mincov,int minbs,int minsnp,int par,int it,int inc,int numw,bool r,string blas,int tid)100 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
101 							 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : MothurChimera()  {
102 	try {
103 		fastafile = file; templateSeqs = readSeqs(fastafile);
104 		templateFileName = temp;
105 		searchMethod = mode;
106 		kmerSize = k;
107 		match = ms;
108 		misMatch = mms;
109 		window = win;
110 		divR = div;
111 		minSim = minsim;
112 		minCov = mincov;
113 		minBS = minbs;
114 		minSNP = minsnp;
115 		parents = par;
116 		iters = it;
117 		increment = inc;
118 		numWanted = numw;
119 		realign = r;
120 		trimChimera = trim;
121 		priority = prior;
122 		numNoParents = 0;
123 		blastlocation = blas;
124 		threadID = tid;
125 
126 
127 		createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
128 
129 		if (searchMethod == "distance") {
130 			//createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
131 
132 			//run filter on template copying templateSeqs into filteredTemplateSeqs
133 			for (int i = 0; i < templateSeqs.size(); i++) {
134 				if (m->getControl_pressed()) {  break; }
135 
136 				Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
137 				runFilter(newSeq);
138 				filteredTemplateSeqs.push_back(newSeq);
139 			}
140 		}
141 	}
142 	catch(exception& e) {
143 		m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
144 		exit(1);
145 	}
146 }
147 //***************************************************************************************************************
doPrep()148 int ChimeraSlayer::doPrep() {
149 	try {
150 		if (searchMethod == "distance") {
151 			//read in all query seqs
152 			vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
153 
154 			vector<Sequence*> temp = templateSeqs;
155 			for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
156 
157 			createFilter(temp, 0.0); //just removed columns where all seqs have a gap
158 
159 			for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
160 
161 			if (m->getControl_pressed()) {  return 0; }
162 
163 			//run filter on template copying templateSeqs into filteredTemplateSeqs
164 			for (int i = 0; i < templateSeqs.size(); i++) {
165 				if (m->getControl_pressed()) {  return 0; }
166 
167 				Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
168 				runFilter(newSeq);
169 				filteredTemplateSeqs.push_back(newSeq);
170 			}
171 		}
172 		string 	kmerDBNameLeft;
173 		string 	kmerDBNameRight;
174         Utils util;
175 		//generate the kmerdb to pass to maligner
176 		if (searchMethod == "kmer") {
177 			string templatePath = util.hasPath(templateFileName);
178 			string rightTemplateFileName = templatePath + "right." + util.getRootName(util.getSimpleName(templateFileName));
179 			databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
180 
181 			string leftTemplateFileName = templatePath + "left." + util.getRootName(util.getSimpleName(templateFileName));
182 			databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
183 
184 			//leftside
185 			kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
186 			ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
187 			bool needToGenerateLeft = true;
188 
189 			if(kmerFileTestLeft){
190                 string line = util.getline(kmerFileTestLeft);
191 				bool GoodFile = util.checkReleaseVersion(line, current->getVersion());
192 				if (GoodFile) {  needToGenerateLeft = false;	}
193 			}
194 
195 			if(needToGenerateLeft){
196 
197 				for (int i = 0; i < templateSeqs.size(); i++) {
198 
199 					if (m->getControl_pressed()) { return 0; }
200 
201 					string leftFrag = templateSeqs[i]->getUnaligned();
202 					leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
203 
204 					Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
205 					databaseLeft->addSequence(leftTemp);
206 				}
207 				databaseLeft->generateDB();
208 
209 			}else {
210 				databaseLeft->readDB(kmerFileTestLeft);
211 			}
212 			kmerFileTestLeft.close();
213 
214 			databaseLeft->setNumSeqs(templateSeqs.size());
215 
216 			//rightside
217 			kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
218 			ifstream kmerFileTestRight(kmerDBNameRight.c_str());
219 			bool needToGenerateRight = true;
220 
221 			if(kmerFileTestRight){
222                 string line = util.getline(kmerFileTestRight);
223 				bool GoodFile = util.checkReleaseVersion(line, current->getVersion());
224 				if (GoodFile) {  needToGenerateRight = false;	}
225 			}
226 
227 			if(needToGenerateRight){
228 
229 				for (int i = 0; i < templateSeqs.size(); i++) {
230 					if (m->getControl_pressed()) { return 0; }
231 
232 					string rightFrag = templateSeqs[i]->getUnaligned();
233 					rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
234 
235 					Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
236 					databaseRight->addSequence(rightTemp);
237 				}
238 				databaseRight->generateDB();
239 
240 			}else {	 databaseRight->readDB(kmerFileTestRight);	 }
241 			kmerFileTestRight.close();
242 
243 			databaseRight->setNumSeqs(templateSeqs.size());
244 
245 		}else if (searchMethod == "blast") {
246 
247 			//generate blastdb
248 			databaseLeft = new BlastDB(util.getRootName(util.getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
249 
250 			if (m->getControl_pressed()) { return 0; }
251 
252 			for (int i = 0; i < templateSeqs.size(); i++) { 	databaseLeft->addSequence(*templateSeqs[i]);	}
253 			databaseLeft->generateDB();
254 			databaseLeft->setNumSeqs(templateSeqs.size());
255 		}
256 
257 		return 0;
258 
259 	}
260 	catch(exception& e) {
261 		m->errorOut(e, "ChimeraSlayer", "doprep");
262 		exit(1);
263 	}
264 }
265 //***************************************************************************************************************
getTemplate(Sequence q,vector<Sequence * > & userTemplateFiltered)266 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
267 	try {
268 
269 		//when template=self, the query file is sorted from most abundance to least abundant
270 		//userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
271 		vector<Sequence*> userTemplate;
272 
273 		int myAbund = priority[q.getName()];
274 
275 		for (int i = 0; i < templateSeqs.size(); i++) {
276 
277 			if (m->getControl_pressed()) { return userTemplate; }
278 
279 			//have I reached a sequence with the same abundance as myself?
280 			if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
281 
282 			//if its am not chimeric add it
283 			if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
284 				userTemplate.push_back(templateSeqs[i]);
285 				if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
286 			}
287 		}
288 
289 		//avoids nuisance error from formatdb for making blank blast database
290 		if (userTemplate.size() == 0) {
291 			return userTemplate;
292 		}
293 
294 		string 	kmerDBNameLeft;
295 		string 	kmerDBNameRight;
296 
297 		//generate the kmerdb to pass to maligner
298 		if (searchMethod == "kmer") {
299             Utils util;
300 			string templatePath = util.hasPath(templateFileName);
301 			string rightTemplateFileName = templatePath + "right." + util.getRootName(util.getSimpleName(templateFileName));
302 			databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
303 
304 			string leftTemplateFileName = templatePath + "left." + util.getRootName(util.getSimpleName(templateFileName));
305 			databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
306 
307 
308 			for (int i = 0; i < userTemplate.size(); i++) {
309 
310 				if (m->getControl_pressed()) { return userTemplate; }
311 
312 				string leftFrag = userTemplate[i]->getUnaligned();
313 				leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
314 
315 				Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
316 				databaseLeft->addSequence(leftTemp);
317 			}
318 			databaseLeft->generateDB();
319 			databaseLeft->setNumSeqs(userTemplate.size());
320 
321 			for (int i = 0; i < userTemplate.size(); i++) {
322 				if (m->getControl_pressed()) { return userTemplate; }
323 
324 				string rightFrag = userTemplate[i]->getUnaligned();
325 				rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
326 
327 				Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
328 				databaseRight->addSequence(rightTemp);
329 			}
330 			databaseRight->generateDB();
331 			databaseRight->setNumSeqs(userTemplate.size());
332 
333 		}else if (searchMethod == "blast") {
334 
335 			//generate blastdb
336             Utils util;
337 			databaseLeft = new BlastDB(util.getRootName(util.getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
338 
339 			if (m->getControl_pressed()) { return userTemplate; }
340 
341 			for (int i = 0; i < userTemplate.size(); i++) { if (m->getControl_pressed()) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]);	}
342 			databaseLeft->generateDB();
343 			databaseLeft->setNumSeqs(userTemplate.size());
344 		}
345 
346 		return userTemplate;
347 
348 	}
349 	catch(exception& e) {
350 		m->errorOut(e, "ChimeraSlayer", "getTemplate");
351 		exit(1);
352 	}
353 }
354 
355 //***************************************************************************************************************
~ChimeraSlayer()356 ChimeraSlayer::~ChimeraSlayer() {
357 	if (templateFileName != "self") {
358 		if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }
359 		else if (searchMethod == "blast") {  delete databaseLeft; }
360 	}
361 }
362 //***************************************************************************************************************
printHeader(ostream & out)363 void ChimeraSlayer::printHeader(ostream& out) {
364 
365 	m->mothurOut("\nOnly reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.\n");
366 
367 	out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
368 }
369 //***************************************************************************************************************
print(ostream & out,ostream & outAcc)370 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
371 	try {
372 		Sequence trim;
373 		if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
374 
375 		if (chimeraFlags == "yes") {
376 			string chimeraFlag = "no";
377 			if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
378 			   ||
379 			   (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
380 
381 
382 			if (chimeraFlag == "yes") {
383 				if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
384 					m->mothurOutJustToScreen(querySeq.getName() + "\tyes\n");
385 					outAcc << querySeq.getName() << endl;
386 
387 					if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
388 
389 					if (trimChimera) {
390 						int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
391 						int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
392 
393 						string newAligned = trim.getAligned();
394 
395 						if (lengthLeft > lengthRight) { //trim right
396 							for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
397 						}else { //trim left
398 							for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
399 						}
400 						trim.setAligned(newAligned);
401 					}
402 				}
403 			}
404 
405 			printBlock(chimeraResults[0], chimeraFlag, out);
406 		}else {
407 			out << querySeq.getName() << "\tno" << endl;
408 		}
409 
410 		return trim;
411 
412 	}
413 	catch(exception& e) {
414 		m->errorOut(e, "ChimeraSlayer", "print");
415 		exit(1);
416 	}
417 }
418 //***************************************************************************************************************
print(ostream & out,ostream & outAcc,data_results leftPiece,data_results rightPiece)419 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
420 	try {
421 		Sequence trim;
422 
423 		if (trimChimera) {
424 			string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
425 			trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
426 		}
427 
428 		if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
429 
430 			string chimeraFlag = "no";
431 			if (leftPiece.flag == "yes") {
432 
433 				if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
434 					||
435 					(leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
436 			}
437 
438 			if (rightPiece.flag == "yes") {
439 				if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
440 				 ||
441 				 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
442 			}
443 
444 			bool rightChimeric = false;
445 			bool leftChimeric = false;
446 
447 			if (chimeraFlag == "yes") {
448 				//which peice is chimeric or are both
449 				if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
450 				if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))	{ leftChimeric = true;	} }
451 
452 				if (rightChimeric || leftChimeric) {
453 					m->mothurOutJustToScreen(querySeq.getName() + "\tyes\n");
454 					outAcc << querySeq.getName() << endl;
455 
456 					if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
457 
458 					if (trimChimera) {
459 						string newAligned = trim.getAligned();
460 
461 						//right side is fine so keep that
462 						if ((leftChimeric) && (!rightChimeric)) {
463 							for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
464 						}else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
465 							for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466 						}else { //both sides are chimeric, keep longest piece
467 
468 							int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
469 							int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
470 
471 							int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
472 							int length = lengthLeftLeft;
473 							if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
474 
475 							int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
476 							int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
477 
478 							if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
479 							if (lengthRightRight > length) { longest = 4; }
480 
481 							if (longest == 1) { //leftleft
482 								for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
483 							}else if (longest == 2) { //leftright
484 								//get rid of leftleft
485 								for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
486 								//get rid of right
487 								for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
488 							}else if (longest == 3) { //rightleft
489 								//get rid of left
490 								for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
491 								//get rid of rightright
492 								for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
493 							}else { //rightright
494 								//get rid of left
495 								for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
496 								//get rid of rightleft
497 								for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
498 							}
499 						}
500 
501 						trim.setAligned(newAligned);
502 					}
503 
504 				}
505 			}
506 
507 			printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
508 		}else {
509 			out << querySeq.getName() << "\tno" << endl;
510 		}
511 
512 		return trim;
513 
514 	}
515 	catch(exception& e) {
516 		m->errorOut(e, "ChimeraSlayer", "print");
517 		exit(1);
518 	}
519 }
520 
521 //***************************************************************************************************************
getChimeras(Sequence * query)522 int ChimeraSlayer::getChimeras(Sequence* query) {
523 	try {
524 
525 		trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
526 		printResults.trimQuery = trimQuery;
527 
528 		chimeraFlags = "no";
529 		printResults.flag = "no";
530 
531 		querySeq = *query;
532 
533 		//you must create a template
534 		vector<Sequence*> thisTemplate;
535 		vector<Sequence*> thisFilteredTemplate;
536 		if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
537 		else {  thisTemplate = getTemplate(*query, thisFilteredTemplate);  } //fills this template and creates the databases
538 
539 		if (m->getControl_pressed()) {  return 0;  }
540 		if (thisTemplate.size() == 0) {  return 0; } //not chimeric
541 
542 		//moved this out of maligner - 4/29/11
543 		vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
544 
545 		Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
546 		Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
547 
548 		if (templateFileName == "self") {
549 			if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }
550 			else if (searchMethod == "blast") {  delete databaseLeft; }
551 		}
552 
553 		if (m->getControl_pressed()) {  return 0;  }
554 
555 		string chimeraFlag = maligner.getResults(*query, decalc);
556 
557 		if (m->getControl_pressed()) {  return 0;  }
558 
559 		vector<results> Results = maligner.getOutput();
560 
561 		//for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];	}
562 
563 		if (chimeraFlag == "yes") {
564 
565 			if (realign) {
566 				vector<string> parents;
567 				for (int i = 0; i < Results.size(); i++) {
568 					parents.push_back(Results[i].parentAligned);
569 				}
570 
571 				ChimeraReAligner realigner;
572 				realigner.reAlign(query, parents);
573 
574 			}
575 
576 			//get sequence that were given from maligner results
577 			vector<SeqCompare> seqs;
578 			map<string, float> removeDups;
579 			map<string, float>::iterator itDup;
580 			map<string, string> parentNameSeq;
581 			map<string, string>::iterator itSeq;
582 			for (int j = 0; j < Results.size(); j++) {
583 
584 				float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
585 				//only add if you are not a duplicate
586 
587 				if(Results[j].queryToParentLocal >= 90){	//local match has to be over 90% similarity
588 
589 					itDup = removeDups.find(Results[j].parent);
590 					if (itDup == removeDups.end()) { //this is not duplicate
591 						removeDups[Results[j].parent] = dist;
592 						parentNameSeq[Results[j].parent] = Results[j].parentAligned;
593 					}else if (dist > itDup->second) { //is this a stronger number for this parent
594 						removeDups[Results[j].parent] = dist;
595 						parentNameSeq[Results[j].parent] = Results[j].parentAligned;
596 					}
597 
598 				}
599 
600 			}
601 
602 			for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
603 				itSeq = parentNameSeq.find(itDup->first);
604 				Sequence seq(itDup->first, itSeq->second);
605 
606 				SeqCompare member;
607 				member.seq = seq;
608 				member.dist = itDup->second;
609 				seqs.push_back(member);
610 			}
611 
612 			//limit number of parents to explore - default 3
613 			if (Results.size() > parents) {
614 				//sort by distance
615 				sort(seqs.begin(), seqs.end(), compareSeqCompare);
616 				//prioritize larger more similiar sequence fragments
617 				reverse(seqs.begin(), seqs.end());
618 
619 				//for (int k = seqs.size()-1; k > (parents-1); k--)  {
620 				//	delete seqs[k].seq;
621 					//seqs.pop_back();
622 				//}
623 			}
624 
625 			//put seqs into vector to send to slayer
626 
627 
628 			vector<Sequence> seqsForSlayer;
629 			for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq);	 }
630 
631 			if (m->getControl_pressed()) {  return 0;  }
632 
633 			//send to slayer
634 			chimeraFlags = slayer.getResults(*query, seqsForSlayer);
635 			if (m->getControl_pressed()) {  return 0;  }
636 			chimeraResults = slayer.getOutput();
637 
638 			printResults.flag = chimeraFlags;
639 			printResults.results = chimeraResults;
640 
641 			//free memory
642 			//for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
643 		}
644 
645 		return 0;
646 	}
647 	catch(exception& e) {
648 		m->errorOut(e, "ChimeraSlayer", "getChimeras");
649 		exit(1);
650 	}
651 }
652 //***************************************************************************************************************
printBlock(data_struct data,string flag,ostream & out)653 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
654 	try {
655 		out << querySeq.getName();
656 		out << '\t' << data.parentA.getName() << "\t" << data.parentB.getName();
657 
658 		out << '\t' <<  data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa;
659 		out << '\t' <<  data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb ;
660 
661 		out << '\t' <<  flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\n';
662 
663 	}
664 	catch(exception& e) {
665 		m->errorOut(e, "ChimeraSlayer", "printBlock");
666 		exit(1);
667 	}
668 }
669 //***************************************************************************************************************
printBlock(data_results leftdata,data_results rightdata,bool leftChimeric,bool rightChimeric,string flag,ostream & out)670 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
671 	try {
672 
673 		if ((leftChimeric) && (!rightChimeric)) { //print left
674 			out << querySeq.getName();
675 			out << '\t' << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName();
676 			out << '\t' << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa;
677 			out << '\t' << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb;
678             out << '\t' << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << endl;
679 
680 		}else if ((!leftChimeric) && (rightChimeric)) {  //print right
681 			out << querySeq.getName();
682 			out << '\t' << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName();
683 			out << '\t' << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa;
684 			out << '\t' << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb;
685 			out << '\t' << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << endl;
686 
687 		}else  { //print both results
688 			if (leftdata.flag == "yes") {
689 				out << querySeq.getName() + "_LEFT";
690 				out << '\t' << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName();
691 				out << '\t' << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa;
692 				out << '\t' << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb;
693                 out << '\t' << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << endl;
694 			}
695 
696 			if (rightdata.flag == "yes") {
697 				out << querySeq.getName() + "_RIGHT";
698 				out << '\t' << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName();
699 				out << '\t' << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa;
700 				out << '\t' << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb;
701 				out << '\t' << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\n';
702 
703 			}
704 		}
705 	}
706 	catch(exception& e) {
707 		m->errorOut(e, "ChimeraSlayer", "printBlock");
708 		exit(1);
709 	}
710 }
711 //***************************************************************************************************************
getBlock(data_results leftdata,data_results rightdata,bool leftChimeric,bool rightChimeric,string flag)712 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
713 	try {
714 
715 		string out = "";
716 
717 		if ((leftChimeric) && (!rightChimeric)) { //get left
718 			out += querySeq.getName();
719 			out += "\t" + leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName();
720 			out += "\t" + toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa);
721 			out += "\t" + toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb);
722 			out += "\t" + flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\n";
723 
724 		}else if ((!leftChimeric) && (rightChimeric)) {  //print right
725 			out += querySeq.getName();
726 			out += "\t" + rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName();
727 			out += "\t" + toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa);
728 			out += "\t" + toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb);
729 			out += "\t" + flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\n";
730 
731 		}else  { //print both results
732 			if (leftdata.flag == "yes") {
733 				out += querySeq.getName() + "_LEFT";
734 				out += "\t" + leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName();
735 				out += "\t" + toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa);
736 				out += "\t" + toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb);
737 				out += "\t" + flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\n";
738 			}
739 
740 			if (rightdata.flag == "yes") {
741 				out +=  querySeq.getName() + "_RIGHT";
742 				out += "\t" + rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName();
743 				out += "\t" + toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa);
744 				out += "\t" + toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb);
745 				out += "\t" + flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\n";
746 			}
747 		}
748 
749 		return out;
750 
751 	}
752 	catch(exception& e) {
753 		m->errorOut(e, "ChimeraSlayer", "getBlock");
754 		exit(1);
755 	}
756 }
757 //***************************************************************************************************************
getBlock(data_struct data,string flag)758 string ChimeraSlayer::getBlock(data_struct data, string flag){
759 	try {
760 
761 		string outputString = "";
762 		outputString += querySeq.getName();
763 		outputString +=  "\t" + data.parentA.getName() + "\t" + data.parentB.getName();
764 		outputString +=  "\t" + toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa);
765 		outputString +=  "\t" + toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb);
766 		outputString +=  "\t" + flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\n";
767 
768 		return outputString;
769 	}
770 	catch(exception& e) {
771 		m->errorOut(e, "ChimeraSlayer", "getBlock");
772 		exit(1);
773 	}
774 }
775 //***************************************************************************************************************
getRefSeqs(Sequence q,vector<Sequence * > & thisTemplate,vector<Sequence * > & thisFilteredTemplate)776 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
777 	try {
778 
779 		vector<Sequence> refSeqs;
780 
781 		if (searchMethod == "distance") {
782 			//find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
783 			Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
784 			runFilter(newSeq);
785 			refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
786 			delete newSeq;
787 		}else if (searchMethod == "blast")  {
788 			refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
789 		}else if (searchMethod == "kmer") {
790 			refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
791 		}else { m->mothurOut("not valid search."); exit(1);  } //should never get here
792 
793 		return refSeqs;
794 	}
795 	catch(exception& e) {
796 		m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
797 		exit(1);
798 	}
799 }
800 //***************************************************************************************************************/
getBlastSeqs(Sequence q,vector<Sequence * > & db,int num)801 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
802 	try {
803 
804 		vector<Sequence> refResults;
805 
806 		//get parts of query
807 		string queryUnAligned = q.getUnaligned();
808 		string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
809 		string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
810 
811 		Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
812 		Sequence* queryRight = new Sequence(q.getName(), rightQuery);
813 
814 
815 		vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
816 		vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
817 
818 		//merge results
819 		map<int, int> seen;
820 		map<int, int>::iterator it;
821 		vector<int> mergedResults;
822 
823 		int index = 0;
824 //		for (int i = 0; i < smaller.size(); i++) {
825 		while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
826 
827 			if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
828 
829 			//add left if you havent already
830 			it = seen.find(tempIndexesLeft[index]);
831 			if (it == seen.end()) {
832 				mergedResults.push_back(tempIndexesLeft[index]);
833 				seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
834 			}
835 
836 			//add right if you havent already
837 			it = seen.find(tempIndexesRight[index]);
838 			if (it == seen.end()) {
839 				mergedResults.push_back(tempIndexesRight[index]);
840 				seen[tempIndexesRight[index]] = tempIndexesRight[index];
841 			}
842 			index++;
843 		}
844 
845 
846 		for (int i = index; i < tempIndexesLeft.size(); i++) {
847 			if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
848 
849 			//add right if you havent already
850 			it = seen.find(tempIndexesLeft[i]);
851 			if (it == seen.end()) {
852 				mergedResults.push_back(tempIndexesLeft[i]);
853 				seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
854 			}
855 		}
856 
857 		for (int i = index; i < tempIndexesRight.size(); i++) {
858 			if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
859 
860 			//add right if you havent already
861 			it = seen.find(tempIndexesRight[i]);
862 			if (it == seen.end()) {
863 				mergedResults.push_back(tempIndexesRight[i]);
864 				seen[tempIndexesRight[i]] = tempIndexesRight[i];
865 			}
866 		}
867 		//string qname = q->getName().substr(0, q->getName().find_last_of('_'));
868 
869 
870 		if (mergedResults.size() == 0) { numNoParents++; }
871 
872 		for (int i = 0; i < mergedResults.size(); i++) {
873 
874 			if (db[mergedResults[i]]->getName() != q.getName()) {
875 				Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
876 				refResults.push_back(temp);
877 			}
878 		}
879 
880 		delete queryRight;
881 		delete queryLeft;
882 
883 		return refResults;
884 	}
885 	catch(exception& e) {
886 		m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
887 		exit(1);
888 	}
889 }
890 //***************************************************************************************************************
getKmerSeqs(Sequence q,vector<Sequence * > & db,int num)891 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
892 	try {
893 		vector<Sequence> refResults;
894 
895 		//get parts of query
896 		string queryUnAligned = q.getUnaligned();
897 		string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
898 		string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
899 
900 		Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
901 		Sequence* queryRight = new Sequence(q.getName(), rightQuery);
902 
903         vector<float> Scores;
904 		vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num, Scores);
905 		vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num, Scores);
906 
907 		//merge results
908 		map<int, int> seen;
909 		map<int, int>::iterator it;
910 		vector<int> mergedResults;
911 
912 		int index = 0;
913 		//		for (int i = 0; i < smaller.size(); i++) {
914 		while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
915 
916 			if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
917 
918 			//add left if you havent already
919 			it = seen.find(tempIndexesLeft[index]);
920 			if (it == seen.end()) {
921 				mergedResults.push_back(tempIndexesLeft[index]);
922 				seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
923 			}
924 
925 			//add right if you havent already
926 			it = seen.find(tempIndexesRight[index]);
927 			if (it == seen.end()) {
928 				mergedResults.push_back(tempIndexesRight[index]);
929 				seen[tempIndexesRight[index]] = tempIndexesRight[index];
930 			}
931 			index++;
932 		}
933 
934 
935 		for (int i = index; i < tempIndexesLeft.size(); i++) {
936 			if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
937 
938 			//add right if you havent already
939 			it = seen.find(tempIndexesLeft[i]);
940 			if (it == seen.end()) {
941 				mergedResults.push_back(tempIndexesLeft[i]);
942 				seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
943 			}
944 		}
945 
946 		for (int i = index; i < tempIndexesRight.size(); i++) {
947 			if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
948 
949 			//add right if you havent already
950 			it = seen.find(tempIndexesRight[i]);
951 			if (it == seen.end()) {
952 				mergedResults.push_back(tempIndexesRight[i]);
953 				seen[tempIndexesRight[i]] = tempIndexesRight[i];
954 			}
955 		}
956 
957 		for (int i = 0; i < mergedResults.size(); i++) {
958 
959 			if (db[mergedResults[i]]->getName() != q.getName()) {
960 				Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
961 				refResults.push_back(temp);
962 
963 			}
964 		}
965 
966 		delete queryRight;
967 		delete queryLeft;
968 
969 		return refResults;
970 	}
971 	catch(exception& e) {
972 		m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
973 		exit(1);
974 	}
975 }
976 //***************************************************************************************************************
977 
978 
979