1 /*
2 * chimeraslayer.cpp
3 * Mothur
4 *
5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
7 *
8 */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
ChimeraSlayer(string file,string temp,bool trim,string mode,int k,int ms,int mms,int win,float div,int minsim,int mincov,int minbs,int minsnp,int par,int it,int inc,int numw,bool r,string blas,int tid)16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : MothurChimera() {
18 try {
19 fastafile = file;
20 templateFileName = temp; templateSeqs = readSeqs(temp);
21 searchMethod = mode;
22 kmerSize = k;
23 match = ms;
24 misMatch = mms;
25 window = win;
26 divR = div;
27 minSim = minsim;
28 minCov = mincov;
29 minBS = minbs;
30 minSNP = minsnp;
31 parents = par;
32 iters = it;
33 increment = inc;
34 numWanted = numw;
35 realign = r;
36 trimChimera = trim;
37 numNoParents = 0;
38 blastlocation = blas;
39 threadID = tid;
40
41 doPrep();
42 }
43 catch(exception& e) {
44 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
45 exit(1);
46 }
47 }
48 //***************************************************************************************************************
49 //template=self
ChimeraSlayer(string file,string temp,bool trim,map<string,int> & prior,string mode,int k,int ms,int mms,int win,float div,int minsim,int mincov,int minbs,int minsnp,int par,int it,int inc,int numw,bool r,string blas,int tid,bool bg)50 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
51 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : MothurChimera() {
52 try {
53 byGroup = bg;
54 fastafile = file; templateSeqs = readSeqs(fastafile);
55 templateFileName = temp;
56 searchMethod = mode;
57 kmerSize = k;
58 match = ms;
59 misMatch = mms;
60 window = win;
61 divR = div;
62 minSim = minsim;
63 minCov = mincov;
64 minBS = minbs;
65 minSNP = minsnp;
66 parents = par;
67 iters = it;
68 increment = inc;
69 numWanted = numw;
70 realign = r;
71 trimChimera = trim;
72 priority = prior;
73 numNoParents = 0;
74 blastlocation = blas;
75 threadID = tid;
76
77
78 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
79
80 if (searchMethod == "distance") {
81 //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
82
83 //run filter on template copying templateSeqs into filteredTemplateSeqs
84 for (int i = 0; i < templateSeqs.size(); i++) {
85 if (m->getControl_pressed()) { break; }
86
87 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
88 runFilter(newSeq);
89 filteredTemplateSeqs.push_back(newSeq);
90 }
91 }
92 }
93 catch(exception& e) {
94 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
95 exit(1);
96 }
97 }
98 //***************************************************************************************************************
99 //template=self
ChimeraSlayer(string file,string temp,bool trim,map<string,int> & prior,string mode,int k,int ms,int mms,int win,float div,int minsim,int mincov,int minbs,int minsnp,int par,int it,int inc,int numw,bool r,string blas,int tid)100 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
101 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : MothurChimera() {
102 try {
103 fastafile = file; templateSeqs = readSeqs(fastafile);
104 templateFileName = temp;
105 searchMethod = mode;
106 kmerSize = k;
107 match = ms;
108 misMatch = mms;
109 window = win;
110 divR = div;
111 minSim = minsim;
112 minCov = mincov;
113 minBS = minbs;
114 minSNP = minsnp;
115 parents = par;
116 iters = it;
117 increment = inc;
118 numWanted = numw;
119 realign = r;
120 trimChimera = trim;
121 priority = prior;
122 numNoParents = 0;
123 blastlocation = blas;
124 threadID = tid;
125
126
127 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
128
129 if (searchMethod == "distance") {
130 //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
131
132 //run filter on template copying templateSeqs into filteredTemplateSeqs
133 for (int i = 0; i < templateSeqs.size(); i++) {
134 if (m->getControl_pressed()) { break; }
135
136 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
137 runFilter(newSeq);
138 filteredTemplateSeqs.push_back(newSeq);
139 }
140 }
141 }
142 catch(exception& e) {
143 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
144 exit(1);
145 }
146 }
147 //***************************************************************************************************************
doPrep()148 int ChimeraSlayer::doPrep() {
149 try {
150 if (searchMethod == "distance") {
151 //read in all query seqs
152 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
153
154 vector<Sequence*> temp = templateSeqs;
155 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
156
157 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
158
159 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
160
161 if (m->getControl_pressed()) { return 0; }
162
163 //run filter on template copying templateSeqs into filteredTemplateSeqs
164 for (int i = 0; i < templateSeqs.size(); i++) {
165 if (m->getControl_pressed()) { return 0; }
166
167 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
168 runFilter(newSeq);
169 filteredTemplateSeqs.push_back(newSeq);
170 }
171 }
172 string kmerDBNameLeft;
173 string kmerDBNameRight;
174 Utils util;
175 //generate the kmerdb to pass to maligner
176 if (searchMethod == "kmer") {
177 string templatePath = util.hasPath(templateFileName);
178 string rightTemplateFileName = templatePath + "right." + util.getRootName(util.getSimpleName(templateFileName));
179 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
180
181 string leftTemplateFileName = templatePath + "left." + util.getRootName(util.getSimpleName(templateFileName));
182 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
183
184 //leftside
185 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
186 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
187 bool needToGenerateLeft = true;
188
189 if(kmerFileTestLeft){
190 string line = util.getline(kmerFileTestLeft);
191 bool GoodFile = util.checkReleaseVersion(line, current->getVersion());
192 if (GoodFile) { needToGenerateLeft = false; }
193 }
194
195 if(needToGenerateLeft){
196
197 for (int i = 0; i < templateSeqs.size(); i++) {
198
199 if (m->getControl_pressed()) { return 0; }
200
201 string leftFrag = templateSeqs[i]->getUnaligned();
202 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
203
204 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
205 databaseLeft->addSequence(leftTemp);
206 }
207 databaseLeft->generateDB();
208
209 }else {
210 databaseLeft->readDB(kmerFileTestLeft);
211 }
212 kmerFileTestLeft.close();
213
214 databaseLeft->setNumSeqs(templateSeqs.size());
215
216 //rightside
217 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
218 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
219 bool needToGenerateRight = true;
220
221 if(kmerFileTestRight){
222 string line = util.getline(kmerFileTestRight);
223 bool GoodFile = util.checkReleaseVersion(line, current->getVersion());
224 if (GoodFile) { needToGenerateRight = false; }
225 }
226
227 if(needToGenerateRight){
228
229 for (int i = 0; i < templateSeqs.size(); i++) {
230 if (m->getControl_pressed()) { return 0; }
231
232 string rightFrag = templateSeqs[i]->getUnaligned();
233 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
234
235 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
236 databaseRight->addSequence(rightTemp);
237 }
238 databaseRight->generateDB();
239
240 }else { databaseRight->readDB(kmerFileTestRight); }
241 kmerFileTestRight.close();
242
243 databaseRight->setNumSeqs(templateSeqs.size());
244
245 }else if (searchMethod == "blast") {
246
247 //generate blastdb
248 databaseLeft = new BlastDB(util.getRootName(util.getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
249
250 if (m->getControl_pressed()) { return 0; }
251
252 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
253 databaseLeft->generateDB();
254 databaseLeft->setNumSeqs(templateSeqs.size());
255 }
256
257 return 0;
258
259 }
260 catch(exception& e) {
261 m->errorOut(e, "ChimeraSlayer", "doprep");
262 exit(1);
263 }
264 }
265 //***************************************************************************************************************
getTemplate(Sequence q,vector<Sequence * > & userTemplateFiltered)266 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
267 try {
268
269 //when template=self, the query file is sorted from most abundance to least abundant
270 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
271 vector<Sequence*> userTemplate;
272
273 int myAbund = priority[q.getName()];
274
275 for (int i = 0; i < templateSeqs.size(); i++) {
276
277 if (m->getControl_pressed()) { return userTemplate; }
278
279 //have I reached a sequence with the same abundance as myself?
280 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
281
282 //if its am not chimeric add it
283 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
284 userTemplate.push_back(templateSeqs[i]);
285 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
286 }
287 }
288
289 //avoids nuisance error from formatdb for making blank blast database
290 if (userTemplate.size() == 0) {
291 return userTemplate;
292 }
293
294 string kmerDBNameLeft;
295 string kmerDBNameRight;
296
297 //generate the kmerdb to pass to maligner
298 if (searchMethod == "kmer") {
299 Utils util;
300 string templatePath = util.hasPath(templateFileName);
301 string rightTemplateFileName = templatePath + "right." + util.getRootName(util.getSimpleName(templateFileName));
302 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
303
304 string leftTemplateFileName = templatePath + "left." + util.getRootName(util.getSimpleName(templateFileName));
305 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
306
307
308 for (int i = 0; i < userTemplate.size(); i++) {
309
310 if (m->getControl_pressed()) { return userTemplate; }
311
312 string leftFrag = userTemplate[i]->getUnaligned();
313 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
314
315 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
316 databaseLeft->addSequence(leftTemp);
317 }
318 databaseLeft->generateDB();
319 databaseLeft->setNumSeqs(userTemplate.size());
320
321 for (int i = 0; i < userTemplate.size(); i++) {
322 if (m->getControl_pressed()) { return userTemplate; }
323
324 string rightFrag = userTemplate[i]->getUnaligned();
325 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
326
327 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
328 databaseRight->addSequence(rightTemp);
329 }
330 databaseRight->generateDB();
331 databaseRight->setNumSeqs(userTemplate.size());
332
333 }else if (searchMethod == "blast") {
334
335 //generate blastdb
336 Utils util;
337 databaseLeft = new BlastDB(util.getRootName(util.getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
338
339 if (m->getControl_pressed()) { return userTemplate; }
340
341 for (int i = 0; i < userTemplate.size(); i++) { if (m->getControl_pressed()) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
342 databaseLeft->generateDB();
343 databaseLeft->setNumSeqs(userTemplate.size());
344 }
345
346 return userTemplate;
347
348 }
349 catch(exception& e) {
350 m->errorOut(e, "ChimeraSlayer", "getTemplate");
351 exit(1);
352 }
353 }
354
355 //***************************************************************************************************************
~ChimeraSlayer()356 ChimeraSlayer::~ChimeraSlayer() {
357 if (templateFileName != "self") {
358 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
359 else if (searchMethod == "blast") { delete databaseLeft; }
360 }
361 }
362 //***************************************************************************************************************
printHeader(ostream & out)363 void ChimeraSlayer::printHeader(ostream& out) {
364
365 m->mothurOut("\nOnly reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.\n");
366
367 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
368 }
369 //***************************************************************************************************************
print(ostream & out,ostream & outAcc)370 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
371 try {
372 Sequence trim;
373 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
374
375 if (chimeraFlags == "yes") {
376 string chimeraFlag = "no";
377 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
378 ||
379 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
380
381
382 if (chimeraFlag == "yes") {
383 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
384 m->mothurOutJustToScreen(querySeq.getName() + "\tyes\n");
385 outAcc << querySeq.getName() << endl;
386
387 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
388
389 if (trimChimera) {
390 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
391 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
392
393 string newAligned = trim.getAligned();
394
395 if (lengthLeft > lengthRight) { //trim right
396 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
397 }else { //trim left
398 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
399 }
400 trim.setAligned(newAligned);
401 }
402 }
403 }
404
405 printBlock(chimeraResults[0], chimeraFlag, out);
406 }else {
407 out << querySeq.getName() << "\tno" << endl;
408 }
409
410 return trim;
411
412 }
413 catch(exception& e) {
414 m->errorOut(e, "ChimeraSlayer", "print");
415 exit(1);
416 }
417 }
418 //***************************************************************************************************************
print(ostream & out,ostream & outAcc,data_results leftPiece,data_results rightPiece)419 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
420 try {
421 Sequence trim;
422
423 if (trimChimera) {
424 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
425 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
426 }
427
428 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
429
430 string chimeraFlag = "no";
431 if (leftPiece.flag == "yes") {
432
433 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
434 ||
435 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
436 }
437
438 if (rightPiece.flag == "yes") {
439 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
440 ||
441 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
442 }
443
444 bool rightChimeric = false;
445 bool leftChimeric = false;
446
447 if (chimeraFlag == "yes") {
448 //which peice is chimeric or are both
449 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
450 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
451
452 if (rightChimeric || leftChimeric) {
453 m->mothurOutJustToScreen(querySeq.getName() + "\tyes\n");
454 outAcc << querySeq.getName() << endl;
455
456 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
457
458 if (trimChimera) {
459 string newAligned = trim.getAligned();
460
461 //right side is fine so keep that
462 if ((leftChimeric) && (!rightChimeric)) {
463 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
464 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
465 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466 }else { //both sides are chimeric, keep longest piece
467
468 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
469 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
470
471 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
472 int length = lengthLeftLeft;
473 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
474
475 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
476 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
477
478 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
479 if (lengthRightRight > length) { longest = 4; }
480
481 if (longest == 1) { //leftleft
482 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
483 }else if (longest == 2) { //leftright
484 //get rid of leftleft
485 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
486 //get rid of right
487 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
488 }else if (longest == 3) { //rightleft
489 //get rid of left
490 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
491 //get rid of rightright
492 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
493 }else { //rightright
494 //get rid of left
495 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
496 //get rid of rightleft
497 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
498 }
499 }
500
501 trim.setAligned(newAligned);
502 }
503
504 }
505 }
506
507 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
508 }else {
509 out << querySeq.getName() << "\tno" << endl;
510 }
511
512 return trim;
513
514 }
515 catch(exception& e) {
516 m->errorOut(e, "ChimeraSlayer", "print");
517 exit(1);
518 }
519 }
520
521 //***************************************************************************************************************
getChimeras(Sequence * query)522 int ChimeraSlayer::getChimeras(Sequence* query) {
523 try {
524
525 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
526 printResults.trimQuery = trimQuery;
527
528 chimeraFlags = "no";
529 printResults.flag = "no";
530
531 querySeq = *query;
532
533 //you must create a template
534 vector<Sequence*> thisTemplate;
535 vector<Sequence*> thisFilteredTemplate;
536 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
537 else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
538
539 if (m->getControl_pressed()) { return 0; }
540 if (thisTemplate.size() == 0) { return 0; } //not chimeric
541
542 //moved this out of maligner - 4/29/11
543 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
544
545 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
546 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
547
548 if (templateFileName == "self") {
549 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
550 else if (searchMethod == "blast") { delete databaseLeft; }
551 }
552
553 if (m->getControl_pressed()) { return 0; }
554
555 string chimeraFlag = maligner.getResults(*query, decalc);
556
557 if (m->getControl_pressed()) { return 0; }
558
559 vector<results> Results = maligner.getOutput();
560
561 //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
562
563 if (chimeraFlag == "yes") {
564
565 if (realign) {
566 vector<string> parents;
567 for (int i = 0; i < Results.size(); i++) {
568 parents.push_back(Results[i].parentAligned);
569 }
570
571 ChimeraReAligner realigner;
572 realigner.reAlign(query, parents);
573
574 }
575
576 //get sequence that were given from maligner results
577 vector<SeqCompare> seqs;
578 map<string, float> removeDups;
579 map<string, float>::iterator itDup;
580 map<string, string> parentNameSeq;
581 map<string, string>::iterator itSeq;
582 for (int j = 0; j < Results.size(); j++) {
583
584 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
585 //only add if you are not a duplicate
586
587 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
588
589 itDup = removeDups.find(Results[j].parent);
590 if (itDup == removeDups.end()) { //this is not duplicate
591 removeDups[Results[j].parent] = dist;
592 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
593 }else if (dist > itDup->second) { //is this a stronger number for this parent
594 removeDups[Results[j].parent] = dist;
595 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
596 }
597
598 }
599
600 }
601
602 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
603 itSeq = parentNameSeq.find(itDup->first);
604 Sequence seq(itDup->first, itSeq->second);
605
606 SeqCompare member;
607 member.seq = seq;
608 member.dist = itDup->second;
609 seqs.push_back(member);
610 }
611
612 //limit number of parents to explore - default 3
613 if (Results.size() > parents) {
614 //sort by distance
615 sort(seqs.begin(), seqs.end(), compareSeqCompare);
616 //prioritize larger more similiar sequence fragments
617 reverse(seqs.begin(), seqs.end());
618
619 //for (int k = seqs.size()-1; k > (parents-1); k--) {
620 // delete seqs[k].seq;
621 //seqs.pop_back();
622 //}
623 }
624
625 //put seqs into vector to send to slayer
626
627
628 vector<Sequence> seqsForSlayer;
629 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
630
631 if (m->getControl_pressed()) { return 0; }
632
633 //send to slayer
634 chimeraFlags = slayer.getResults(*query, seqsForSlayer);
635 if (m->getControl_pressed()) { return 0; }
636 chimeraResults = slayer.getOutput();
637
638 printResults.flag = chimeraFlags;
639 printResults.results = chimeraResults;
640
641 //free memory
642 //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
643 }
644
645 return 0;
646 }
647 catch(exception& e) {
648 m->errorOut(e, "ChimeraSlayer", "getChimeras");
649 exit(1);
650 }
651 }
652 //***************************************************************************************************************
printBlock(data_struct data,string flag,ostream & out)653 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
654 try {
655 out << querySeq.getName();
656 out << '\t' << data.parentA.getName() << "\t" << data.parentB.getName();
657
658 out << '\t' << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa;
659 out << '\t' << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb ;
660
661 out << '\t' << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\n';
662
663 }
664 catch(exception& e) {
665 m->errorOut(e, "ChimeraSlayer", "printBlock");
666 exit(1);
667 }
668 }
669 //***************************************************************************************************************
printBlock(data_results leftdata,data_results rightdata,bool leftChimeric,bool rightChimeric,string flag,ostream & out)670 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
671 try {
672
673 if ((leftChimeric) && (!rightChimeric)) { //print left
674 out << querySeq.getName();
675 out << '\t' << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName();
676 out << '\t' << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa;
677 out << '\t' << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb;
678 out << '\t' << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << endl;
679
680 }else if ((!leftChimeric) && (rightChimeric)) { //print right
681 out << querySeq.getName();
682 out << '\t' << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName();
683 out << '\t' << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa;
684 out << '\t' << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb;
685 out << '\t' << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << endl;
686
687 }else { //print both results
688 if (leftdata.flag == "yes") {
689 out << querySeq.getName() + "_LEFT";
690 out << '\t' << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName();
691 out << '\t' << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa;
692 out << '\t' << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb;
693 out << '\t' << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << endl;
694 }
695
696 if (rightdata.flag == "yes") {
697 out << querySeq.getName() + "_RIGHT";
698 out << '\t' << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName();
699 out << '\t' << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa;
700 out << '\t' << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb;
701 out << '\t' << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\n';
702
703 }
704 }
705 }
706 catch(exception& e) {
707 m->errorOut(e, "ChimeraSlayer", "printBlock");
708 exit(1);
709 }
710 }
711 //***************************************************************************************************************
getBlock(data_results leftdata,data_results rightdata,bool leftChimeric,bool rightChimeric,string flag)712 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
713 try {
714
715 string out = "";
716
717 if ((leftChimeric) && (!rightChimeric)) { //get left
718 out += querySeq.getName();
719 out += "\t" + leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName();
720 out += "\t" + toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa);
721 out += "\t" + toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb);
722 out += "\t" + flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\n";
723
724 }else if ((!leftChimeric) && (rightChimeric)) { //print right
725 out += querySeq.getName();
726 out += "\t" + rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName();
727 out += "\t" + toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa);
728 out += "\t" + toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb);
729 out += "\t" + flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\n";
730
731 }else { //print both results
732 if (leftdata.flag == "yes") {
733 out += querySeq.getName() + "_LEFT";
734 out += "\t" + leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName();
735 out += "\t" + toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa);
736 out += "\t" + toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb);
737 out += "\t" + flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\n";
738 }
739
740 if (rightdata.flag == "yes") {
741 out += querySeq.getName() + "_RIGHT";
742 out += "\t" + rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName();
743 out += "\t" + toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa);
744 out += "\t" + toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb);
745 out += "\t" + flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\n";
746 }
747 }
748
749 return out;
750
751 }
752 catch(exception& e) {
753 m->errorOut(e, "ChimeraSlayer", "getBlock");
754 exit(1);
755 }
756 }
757 //***************************************************************************************************************
getBlock(data_struct data,string flag)758 string ChimeraSlayer::getBlock(data_struct data, string flag){
759 try {
760
761 string outputString = "";
762 outputString += querySeq.getName();
763 outputString += "\t" + data.parentA.getName() + "\t" + data.parentB.getName();
764 outputString += "\t" + toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa);
765 outputString += "\t" + toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb);
766 outputString += "\t" + flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\n";
767
768 return outputString;
769 }
770 catch(exception& e) {
771 m->errorOut(e, "ChimeraSlayer", "getBlock");
772 exit(1);
773 }
774 }
775 //***************************************************************************************************************
getRefSeqs(Sequence q,vector<Sequence * > & thisTemplate,vector<Sequence * > & thisFilteredTemplate)776 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
777 try {
778
779 vector<Sequence> refSeqs;
780
781 if (searchMethod == "distance") {
782 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
783 Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
784 runFilter(newSeq);
785 refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
786 delete newSeq;
787 }else if (searchMethod == "blast") {
788 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
789 }else if (searchMethod == "kmer") {
790 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
791 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
792
793 return refSeqs;
794 }
795 catch(exception& e) {
796 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
797 exit(1);
798 }
799 }
800 //***************************************************************************************************************/
getBlastSeqs(Sequence q,vector<Sequence * > & db,int num)801 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
802 try {
803
804 vector<Sequence> refResults;
805
806 //get parts of query
807 string queryUnAligned = q.getUnaligned();
808 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
809 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
810
811 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
812 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
813
814
815 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
816 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
817
818 //merge results
819 map<int, int> seen;
820 map<int, int>::iterator it;
821 vector<int> mergedResults;
822
823 int index = 0;
824 // for (int i = 0; i < smaller.size(); i++) {
825 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
826
827 if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
828
829 //add left if you havent already
830 it = seen.find(tempIndexesLeft[index]);
831 if (it == seen.end()) {
832 mergedResults.push_back(tempIndexesLeft[index]);
833 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
834 }
835
836 //add right if you havent already
837 it = seen.find(tempIndexesRight[index]);
838 if (it == seen.end()) {
839 mergedResults.push_back(tempIndexesRight[index]);
840 seen[tempIndexesRight[index]] = tempIndexesRight[index];
841 }
842 index++;
843 }
844
845
846 for (int i = index; i < tempIndexesLeft.size(); i++) {
847 if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
848
849 //add right if you havent already
850 it = seen.find(tempIndexesLeft[i]);
851 if (it == seen.end()) {
852 mergedResults.push_back(tempIndexesLeft[i]);
853 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
854 }
855 }
856
857 for (int i = index; i < tempIndexesRight.size(); i++) {
858 if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
859
860 //add right if you havent already
861 it = seen.find(tempIndexesRight[i]);
862 if (it == seen.end()) {
863 mergedResults.push_back(tempIndexesRight[i]);
864 seen[tempIndexesRight[i]] = tempIndexesRight[i];
865 }
866 }
867 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
868
869
870 if (mergedResults.size() == 0) { numNoParents++; }
871
872 for (int i = 0; i < mergedResults.size(); i++) {
873
874 if (db[mergedResults[i]]->getName() != q.getName()) {
875 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
876 refResults.push_back(temp);
877 }
878 }
879
880 delete queryRight;
881 delete queryLeft;
882
883 return refResults;
884 }
885 catch(exception& e) {
886 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
887 exit(1);
888 }
889 }
890 //***************************************************************************************************************
getKmerSeqs(Sequence q,vector<Sequence * > & db,int num)891 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
892 try {
893 vector<Sequence> refResults;
894
895 //get parts of query
896 string queryUnAligned = q.getUnaligned();
897 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
898 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
899
900 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
901 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
902
903 vector<float> Scores;
904 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num, Scores);
905 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num, Scores);
906
907 //merge results
908 map<int, int> seen;
909 map<int, int>::iterator it;
910 vector<int> mergedResults;
911
912 int index = 0;
913 // for (int i = 0; i < smaller.size(); i++) {
914 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
915
916 if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
917
918 //add left if you havent already
919 it = seen.find(tempIndexesLeft[index]);
920 if (it == seen.end()) {
921 mergedResults.push_back(tempIndexesLeft[index]);
922 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
923 }
924
925 //add right if you havent already
926 it = seen.find(tempIndexesRight[index]);
927 if (it == seen.end()) {
928 mergedResults.push_back(tempIndexesRight[index]);
929 seen[tempIndexesRight[index]] = tempIndexesRight[index];
930 }
931 index++;
932 }
933
934
935 for (int i = index; i < tempIndexesLeft.size(); i++) {
936 if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
937
938 //add right if you havent already
939 it = seen.find(tempIndexesLeft[i]);
940 if (it == seen.end()) {
941 mergedResults.push_back(tempIndexesLeft[i]);
942 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
943 }
944 }
945
946 for (int i = index; i < tempIndexesRight.size(); i++) {
947 if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; }
948
949 //add right if you havent already
950 it = seen.find(tempIndexesRight[i]);
951 if (it == seen.end()) {
952 mergedResults.push_back(tempIndexesRight[i]);
953 seen[tempIndexesRight[i]] = tempIndexesRight[i];
954 }
955 }
956
957 for (int i = 0; i < mergedResults.size(); i++) {
958
959 if (db[mergedResults[i]]->getName() != q.getName()) {
960 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
961 refResults.push_back(temp);
962
963 }
964 }
965
966 delete queryRight;
967 delete queryLeft;
968
969 return refResults;
970 }
971 catch(exception& e) {
972 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
973 exit(1);
974 }
975 }
976 //***************************************************************************************************************
977
978
979