1 #include "Util.h"
2 #include "Parameters.h"
3 #include "Matcher.h"
4 #include "Debug.h"
5 #include "DBReader.h"
6 #include "DBWriter.h"
7 #include "IndexReader.h"
8 #include "FileUtil.h"
9 #include "TranslateNucl.h"
10 #include "Sequence.h"
11 #include "Orf.h"
12 #include "MemoryMapped.h"
13 #include "NcbiTaxonomy.h"
14 
15 #define ZSTD_STATIC_LINKING_ONLY
16 #include <zstd.h>
17 #include "result_viz_prelude.html.zst.h"
18 
19 #include <map>
20 
21 #ifdef OPENMP
22 #include <omp.h>
23 #endif
24 
25 
printSeqBasedOnAln(std::string & out,const char * seq,unsigned int offset,const std::string & bt,bool reverse,bool isReverseStrand,bool translateSequence,const TranslateNucl & translateNucl)26 void printSeqBasedOnAln(std::string &out, const char *seq, unsigned int offset,
27                         const std::string &bt, bool reverse, bool isReverseStrand,
28                         bool translateSequence, const TranslateNucl &translateNucl) {
29     unsigned int seqPos = 0;
30     char codon[3];
31     for (uint32_t i = 0; i < bt.size(); ++i) {
32         char seqChar = (isReverseStrand == true) ? Orf::complement(seq[offset - seqPos]) : seq[offset + seqPos];
33         if (translateSequence) {
34             codon[0] = (isReverseStrand == true) ? Orf::complement(seq[offset - seqPos])     : seq[offset + seqPos];
35             codon[1] = (isReverseStrand == true) ? Orf::complement(seq[offset - (seqPos+1)]) : seq[offset + (seqPos+1)];
36             codon[2] = (isReverseStrand == true) ? Orf::complement(seq[offset - (seqPos+2)]) : seq[offset + (seqPos+2)];
37             seqChar = translateNucl.translateSingleCodon(codon);
38         }
39         switch (bt[i]) {
40             case 'M':
41                 out.append(1, seqChar);
42                 seqPos += (translateSequence) ?  3 : 1;
43                 break;
44             case 'I':
45                 if (reverse == true) {
46                     out.append(1, '-');
47                 } else {
48                     out.append(1, seqChar);
49                     seqPos += (translateSequence) ?  3 : 1;
50                 }
51                 break;
52             case 'D':
53                 if (reverse == true) {
54                     out.append(1, seqChar);
55                     seqPos += (translateSequence) ?  3 : 1;
56                 } else {
57                     out.append(1, '-');
58                 }
59                 break;
60         }
61     }
62 }
63 
64 /*
65 query       Query sequence label
66 target      Target sequenc label
67 evalue      E-value
68 gapopen     Number of gap opens
69 pident      Percentage of identical matches
70 nident      Number of identical matches
71 qstart      1-based start position of alignment in query sequence
72 qend        1-based end position of alignment in query sequence
73 qlen        Query sequence length
74 tstart      1-based start position of alignment in target sequence
75 tend        1-based end position of alignment in target sequence
76 tlen        Target sequence length
77 alnlen      Number of alignment columns
78 raw         Raw alignment score
79 bits        Bit score
80 cigar       Alignment as string M=letter pair, D=delete (gap in query), I=insert (gap in target)
81 qseq        Full-length query sequence
82 tseq        Full-length target sequence
83 qheader     Header of Query sequence
84 theader     Header of Target sequence
85 qaln        Aligned query sequence with gaps
86 taln        Aligned target sequence with gaps
87 qframe      Query frame (-3 to +3)
88 tframe      Target frame (-3 to +3)
89 mismatch    Number of mismatches
90 qcov        Fraction of query sequence covered by alignment
91 tcov        Fraction of target sequence covered by alignment
92 qset        Query set
93 tset        Target set
94  */
95 
readKeyToSet(const std::string & file)96 std::map<unsigned int, unsigned int> readKeyToSet(const std::string& file) {
97     std::map<unsigned int, unsigned int> mapping;
98     if (file.length() == 0) {
99         return mapping;
100     }
101 
102     MemoryMapped lookup(file, MemoryMapped::WholeFile, MemoryMapped::SequentialScan);
103     char* data = (char *) lookup.getData();
104     const char* entry[255];
105     while (*data != '\0') {
106         const size_t columns = Util::getWordsOfLine(data, entry, 255);
107         if (columns < 3) {
108             Debug(Debug::WARNING) << "Not enough columns in lookup file " << file << "\n";
109             continue;
110         }
111         mapping.emplace(Util::fast_atoi<unsigned int>(entry[0]), Util::fast_atoi<unsigned int>(entry[2]));
112         data = Util::skipLine(data);
113     }
114     lookup.close();
115     return mapping;
116 }
117 
118 
readSetToSource(const std::string & file)119 std::map<unsigned int, std::string> readSetToSource(const std::string& file) {
120     std::map<unsigned int, std::string> mapping;
121     if (file.length() == 0) {
122         return mapping;
123     }
124 
125     MemoryMapped source(file, MemoryMapped::WholeFile, MemoryMapped::SequentialScan);
126     char* data = (char *) source.getData();
127     const char* entry[255];
128     while (*data != '\0') {
129         const size_t columns = Util::getWordsOfLine(data, entry, 255);
130         if (columns < 2) {
131             Debug(Debug::WARNING) << "Not enough columns in lookup file " << file << "\n";
132             continue;
133         }
134         data = Util::skipLine(data);
135         std::string source(entry[1], data - entry[1] - 1);
136         mapping.emplace(Util::fast_atoi<unsigned int>(entry[0]), source);
137     }
138     source.close();
139     return mapping;
140 }
141 
compareToFirstInt(const std::pair<unsigned int,unsigned int> & lhs,const std::pair<unsigned int,unsigned int> & rhs)142 static bool compareToFirstInt(const std::pair<unsigned int, unsigned int>& lhs, const std::pair<unsigned int, unsigned int>&  rhs){
143     return (lhs.first <= rhs.first);
144 }
145 
convertalignments(int argc,const char ** argv,const Command & command)146 int convertalignments(int argc, const char **argv, const Command &command) {
147     Parameters &par = Parameters::getInstance();
148     par.parseParameters(argc, argv, command, true, 0, 0);
149 
150     const bool sameDB = par.db1.compare(par.db2) == 0 ? true : false;
151     const int format = par.formatAlignmentMode;
152     const bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP);
153 
154     bool needSequenceDB = false;
155     bool needBacktrace = false;
156     bool needFullHeaders = false;
157     bool needLookup = false;
158     bool needSource = false;
159     bool needTaxonomy = false;
160     bool needTaxonomyMapping = false;
161     const std::vector<int> outcodes = Parameters::getOutputFormat(format, par.outfmt, needSequenceDB, needBacktrace, needFullHeaders,
162                                                                   needLookup, needSource, needTaxonomyMapping, needTaxonomy);
163 
164     NcbiTaxonomy * t = NULL;
165     std::vector<std::pair<unsigned int, unsigned int>> mapping;
166     if(needTaxonomy){
167         std::string db2NoIndexName = PrefilteringIndexReader::dbPathWithoutIndex(par.db2);
168         t = NcbiTaxonomy::openTaxonomy(db2NoIndexName);
169     }
170     if(needTaxonomy || needTaxonomyMapping){
171         std::string db2NoIndexName = PrefilteringIndexReader::dbPathWithoutIndex(par.db2);
172         if(FileUtil::fileExists(std::string(db2NoIndexName + "_mapping").c_str()) == false){
173             Debug(Debug::ERROR) << db2NoIndexName + "_mapping" << " does not exist. Please create the taxonomy mapping!\n";
174             EXIT(EXIT_FAILURE);
175         }
176         bool isSorted = Util::readMapping( db2NoIndexName + "_mapping", mapping);
177         if(isSorted == false){
178             std::stable_sort(mapping.begin(), mapping.end(), compareToFirstInt);
179         }
180     }
181 
182     bool isTranslatedSearch = false;
183 
184     int dbaccessMode = needSequenceDB ? (DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA) : (DBReader<unsigned int>::USE_INDEX);
185 
186     std::map<unsigned int, unsigned int> qKeyToSet;
187     std::map<unsigned int, unsigned int> tKeyToSet;
188     if (needLookup) {
189         std::string file1 = par.db1 + ".lookup";
190         std::string file2 = par.db2 + ".lookup";
191         qKeyToSet = readKeyToSet(file1);
192         tKeyToSet = readKeyToSet(file2);
193     }
194 
195     std::map<unsigned int, std::string> qSetToSource;
196     std::map<unsigned int, std::string> tSetToSource;
197     if (needSource) {
198         std::string file1 = par.db1 + ".source";
199         std::string file2 = par.db2 + ".source";
200         qSetToSource = readSetToSource(file1);
201         tSetToSource = readSetToSource(file2);
202     }
203 
204     IndexReader qDbr(par.db1, par.threads,  IndexReader::SRC_SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0, dbaccessMode);
205     IndexReader qDbrHeader(par.db1, par.threads, IndexReader::SRC_HEADERS , (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0);
206 
207     IndexReader *tDbr;
208     IndexReader *tDbrHeader;
209     if (sameDB) {
210         tDbr = &qDbr;
211         tDbrHeader= &qDbrHeader;
212     } else {
213         tDbr = new IndexReader(par.db2, par.threads, IndexReader::SRC_SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0, dbaccessMode);
214         tDbrHeader = new IndexReader(par.db2, par.threads, IndexReader::SRC_HEADERS, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0);
215     }
216 
217     bool queryNucs = Parameters::isEqualDbtype(qDbr.sequenceReader->getDbtype(), Parameters::DBTYPE_NUCLEOTIDES);
218     bool targetNucs = Parameters::isEqualDbtype(tDbr->sequenceReader->getDbtype(), Parameters::DBTYPE_NUCLEOTIDES);
219     if (needSequenceDB) {
220         // try to figure out if search was translated. This is can not be solved perfectly.
221         bool seqtargetAA = false;
222         if(Parameters::isEqualDbtype(tDbr->getDbtype(), Parameters::DBTYPE_INDEX_DB)){
223             IndexReader tseqDbr(par.db2, par.threads, IndexReader::SEQUENCES, 0, IndexReader::PRELOAD_INDEX);
224             seqtargetAA = Parameters::isEqualDbtype(tseqDbr.sequenceReader->getDbtype(), Parameters::DBTYPE_AMINO_ACIDS);
225         } else if(targetNucs == true && queryNucs == true && par.searchType == Parameters::SEARCH_TYPE_AUTO){
226             Debug(Debug::WARNING) << "It is unclear from the input if a translated or nucleotide search was performed\n "
227                                      "Please provide the parameter --search-type 2 (translated) or 3 (nucleotide)\n";
228             EXIT(EXIT_FAILURE);
229         } else if(par.searchType == Parameters::SEARCH_TYPE_TRANSLATED){
230             seqtargetAA = true;
231         }
232 
233         if((targetNucs == true && queryNucs == false )  || (targetNucs == false && queryNucs == true ) || (targetNucs == true && seqtargetAA == true && queryNucs == true )  ){
234             isTranslatedSearch = true;
235         }
236     }
237 
238     int gapOpen, gapExtend;
239     SubstitutionMatrix * subMat= NULL;
240     if (targetNucs == true && queryNucs == true && isTranslatedSearch == false) {
241         subMat = new NucleotideMatrix(par.scoringMatrixFile.nucleotides, 1.0, 0.0);
242         gapOpen = par.gapOpen.nucleotides;
243         gapExtend = par.gapExtend.nucleotides;
244     }else{
245         subMat = new SubstitutionMatrix(par.scoringMatrixFile.aminoacids, 2.0, 0.0);
246         gapOpen = par.gapOpen.aminoacids;
247         gapExtend = par.gapExtend.aminoacids;
248     }
249     EvalueComputation *evaluer = NULL;
250     bool queryProfile = false;
251     bool targetProfile = false;
252     if (needSequenceDB) {
253         queryProfile = Parameters::isEqualDbtype(qDbr.sequenceReader->getDbtype(), Parameters::DBTYPE_HMM_PROFILE);
254         targetProfile = Parameters::isEqualDbtype(tDbr->sequenceReader->getDbtype(), Parameters::DBTYPE_HMM_PROFILE);
255         evaluer = new EvalueComputation(tDbr->sequenceReader->getAminoAcidDBSize(), subMat, gapOpen, gapExtend);
256     }
257 
258     DBReader<unsigned int> alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
259     alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);
260 
261     unsigned int localThreads = 1;
262 #ifdef OPENMP
263     localThreads = std::min((unsigned int)par.threads, (unsigned int)alnDbr.getSize());
264 #endif
265 
266     const bool shouldCompress = par.dbOut == true && par.compressed == true;
267     const int dbType = par.dbOut == true ? Parameters::DBTYPE_GENERIC_DB : Parameters::DBTYPE_OMIT_FILE;
268     DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), localThreads, shouldCompress, dbType);
269     resultWriter.open();
270 
271     const bool isDb = par.dbOut;
272     TranslateNucl translateNucl(static_cast<TranslateNucl::GenCode>(par.translationTable));
273 
274     if (format == Parameters::FORMAT_ALIGNMENT_SAM) {
275         char buffer[1024];
276         unsigned int lastKey = tDbr->sequenceReader->getLastKey();
277         bool *headerWritten = new bool[lastKey + 1];
278         memset(headerWritten, 0, sizeof(bool) * (lastKey + 1));
279         resultWriter.writeStart(0);
280         std::string header = "@HD\tVN:1.4\tSO:queryname\n";
281         resultWriter.writeAdd(header.c_str(), header.size(), 0);
282 
283         for (size_t i = 0; i < alnDbr.getSize(); i++) {
284             char *data = alnDbr.getData(i, 0);
285             while (*data != '\0') {
286                 char dbKeyBuffer[255 + 1];
287                 Util::parseKey(data, dbKeyBuffer);
288                 const unsigned int dbKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10);
289                 if (headerWritten[dbKey] == false) {
290                     headerWritten[dbKey] = true;
291                     unsigned int tId = tDbr->sequenceReader->getId(dbKey);
292                     unsigned int seqLen = tDbr->sequenceReader->getSeqLen(tId);
293                     unsigned int tHeaderId = tDbrHeader->sequenceReader->getId(dbKey);
294                     const char *tHeader = tDbrHeader->sequenceReader->getData(tHeaderId, 0);
295                     std::string targetId = Util::parseFastaHeader(tHeader);
296                     int count = snprintf(buffer, sizeof(buffer), "@SQ\tSN:%s\tLN:%d\n", targetId.c_str(),
297                                          (int32_t) seqLen);
298                     if (count < 0 || static_cast<size_t>(count) >= sizeof(buffer)) {
299                         Debug(Debug::WARNING) << "Truncated line in header " << i << "!\n";
300                         continue;
301                     }
302                     resultWriter.writeAdd(buffer, count, 0);
303                 }
304                 resultWriter.writeEnd(0, 0, false, 0);
305                 data = Util::skipLine(data);
306             }
307         }
308         delete[] headerWritten;
309     } else if (format == Parameters::FORMAT_ALIGNMENT_HTML) {
310         size_t dstSize = ZSTD_findDecompressedSize(result_viz_prelude_html_zst, result_viz_prelude_html_zst_len);
311         char* dst = (char*)malloc(sizeof(char) * dstSize);
312         size_t realSize = ZSTD_decompress(dst, dstSize, result_viz_prelude_html_zst, result_viz_prelude_html_zst_len);
313         resultWriter.writeData(dst, realSize, 0, 0, false, false);
314         const char* scriptBlock = "<script>render([";
315         resultWriter.writeData(scriptBlock, strlen(scriptBlock), 0, 0, false, false);
316         free(dst);
317     }
318 
319     Debug::Progress progress(alnDbr.getSize());
320 #pragma omp parallel num_threads(localThreads)
321     {
322         unsigned int thread_idx = 0;
323 #ifdef OPENMP
324         thread_idx = static_cast<unsigned int>(omp_get_thread_num());
325 #endif
326         char buffer[1024];
327 
328         std::string result;
329         result.reserve(1024*1024);
330 
331         std::string queryProfData;
332         queryProfData.reserve(1024);
333 
334         std::string queryBuffer;
335         queryBuffer.reserve(1024);
336 
337         std::string queryHeaderBuffer;
338         queryHeaderBuffer.reserve(1024);
339 
340         std::string targetProfData;
341         targetProfData.reserve(1024);
342 
343         std::string newBacktrace;
344         newBacktrace.reserve(1024);
345 
346         const TaxonNode * taxonNode = NULL;
347 
348 #pragma omp  for schedule(dynamic, 10)
349         for (size_t i = 0; i < alnDbr.getSize(); i++) {
350             progress.updateProgress();
351 
352             const unsigned int queryKey = alnDbr.getDbKey(i);
353             char *querySeqData = NULL;
354             size_t querySeqLen = 0;
355             queryProfData.clear();
356             if (needSequenceDB) {
357                 size_t qId = qDbr.sequenceReader->getId(queryKey);
358                 querySeqData = qDbr.sequenceReader->getData(qId, thread_idx);
359                 querySeqLen = qDbr.sequenceReader->getSeqLen(qId);
360                 if(sameDB && qDbr.sequenceReader->isCompressed()){
361                     queryBuffer.assign(querySeqData, querySeqLen);
362                     querySeqData = (char*) queryBuffer.c_str();
363                 }
364                 if (queryProfile) {
365                     Sequence::extractProfileConsensus(querySeqData, *subMat, queryProfData);
366                 }
367             }
368 
369             size_t qHeaderId = qDbrHeader.sequenceReader->getId(queryKey);
370             const char *qHeader = qDbrHeader.sequenceReader->getData(qHeaderId, thread_idx);
371             size_t qHeaderLen = qDbrHeader.sequenceReader->getSeqLen(qHeaderId);
372             std::string queryId = Util::parseFastaHeader(qHeader);
373             if (sameDB && needFullHeaders) {
374                 queryHeaderBuffer.assign(qHeader, qHeaderLen);
375                 qHeader = (char*) queryHeaderBuffer.c_str();
376             }
377 
378             if (format == Parameters::FORMAT_ALIGNMENT_HTML) {
379                 const char* jsStart = "{\"query\": {\"accession\": \"%s\",\"sequence\": \"";
380                 int count = snprintf(buffer, sizeof(buffer), jsStart, queryId.c_str(), querySeqData);
381                 if (count < 0 || static_cast<size_t>(count) >= sizeof(buffer)) {
382                     Debug(Debug::WARNING) << "Truncated line in entry" << i << "!\n";
383                     continue;
384                 }
385                 result.append(buffer, count);
386                 if (queryProfile) {
387                     result.append(queryProfData);
388                 } else {
389                     result.append(querySeqData, querySeqLen);
390                 }
391                 result.append("\"}, \"alignments\": [\n");
392             }
393 
394             char *data = alnDbr.getData(i, thread_idx);
395             while (*data != '\0') {
396                 Matcher::result_t res = Matcher::parseAlignmentRecord(data, true);
397                 data = Util::skipLine(data);
398 
399                 if (res.backtrace.empty() && needBacktrace == true) {
400                     Debug(Debug::ERROR) << "Backtrace cigar is missing in the alignment result. Please recompute the alignment with the -a flag.\n"
401                                            "Command: mmseqs align " << par.db1 << " " << par.db2 << " " << par.db3 << " " << "alnNew -a\n";
402                     EXIT(EXIT_FAILURE);
403                 }
404 
405                 size_t tHeaderId = tDbrHeader->sequenceReader->getId(res.dbKey);
406                 const char *tHeader = tDbrHeader->sequenceReader->getData(tHeaderId, thread_idx);
407                 size_t tHeaderLen = tDbrHeader->sequenceReader->getSeqLen(tHeaderId);
408                 std::string targetId = Util::parseFastaHeader(tHeader);
409 
410                 unsigned int gapOpenCount = 0;
411                 unsigned int alnLen = res.alnLength;
412                 unsigned int missMatchCount = 0;
413                 unsigned int identical = 0;
414                 if (res.backtrace.empty() == false) {
415                     size_t matchCount = 0;
416                     alnLen = 0;
417                     for (size_t pos = 0; pos < res.backtrace.size(); pos++) {
418                         int cnt = 0;
419                         if (isdigit(res.backtrace[pos])) {
420                             cnt += Util::fast_atoi<int>(res.backtrace.c_str() + pos);
421                             while (isdigit(res.backtrace[pos])) {
422                                 pos++;
423                             }
424                         }
425                         alnLen += cnt;
426 
427                         switch (res.backtrace[pos]) {
428                             case 'M':
429                                 matchCount += cnt;
430                                 break;
431                             case 'D':
432                             case 'I':
433                                 gapOpenCount += 1;
434                                 break;
435                         }
436                     }
437 //                res.seqId = X / alnLen;
438                     identical = static_cast<unsigned int>(res.seqId * static_cast<float>(alnLen) + 0.5);
439                     //res.alnLength = alnLen;
440                     missMatchCount = static_cast<unsigned int>( matchCount - identical);
441                 } else {
442                     const int adjustQstart = (res.qStartPos == -1) ? 0 : res.qStartPos;
443                     const int adjustDBstart = (res.dbStartPos == -1) ? 0 : res.dbStartPos;
444                     const float bestMatchEstimate = static_cast<float>(std::min(abs(res.qEndPos - adjustQstart), abs(res.dbEndPos - adjustDBstart)));
445                     missMatchCount = static_cast<unsigned int>(bestMatchEstimate * (1.0f - res.seqId) + 0.5);
446                 }
447 
448                 switch (format) {
449                     case Parameters::FORMAT_ALIGNMENT_BLAST_TAB: {
450                         if (outcodes.empty()) {
451                             int count = snprintf(buffer, sizeof(buffer),
452                                                  "%s\t%s\t%1.3f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2E\t%d\n",
453                                                  queryId.c_str(), targetId.c_str(), res.seqId, alnLen,
454                                                  missMatchCount, gapOpenCount,
455                                                  res.qStartPos + 1, res.qEndPos + 1,
456                                                  res.dbStartPos + 1, res.dbEndPos + 1,
457                                                  res.eval, res.score);
458                             if (count < 0 || static_cast<size_t>(count) >= sizeof(buffer)) {
459                                 Debug(Debug::WARNING) << "Truncated line in entry" << i << "!\n";
460                                 continue;
461                             }
462                             result.append(buffer, count);
463                         } else {
464                             char *targetSeqData = NULL;
465                             targetProfData.clear();
466                             unsigned int taxon = 0;
467 
468                             if(needTaxonomy || needTaxonomyMapping) {
469                                 std::pair<unsigned int, unsigned int> val;
470                                 val.first = res.dbKey;
471                                 std::vector<std::pair<unsigned int, unsigned int>>::iterator mappingIt;
472                                 mappingIt = std::upper_bound(mapping.begin(), mapping.end(), val, compareToFirstInt);
473                                 if (mappingIt == mapping.end() || mappingIt->first != val.first) {
474                                     taxon = 0;
475                                     taxonNode = NULL;
476                                 }else{
477                                     taxon = mappingIt->second;
478                                     if(needTaxonomy){
479                                         taxonNode = t->taxonNode(taxon, false);
480                                     }
481                                 }
482 
483                             }
484 
485                             if (needSequenceDB) {
486                                 size_t tId = tDbr->sequenceReader->getId(res.dbKey);
487                                 targetSeqData = tDbr->sequenceReader->getData(tId, thread_idx);
488                                 if (targetProfile) {
489                                     Sequence::extractProfileConsensus(targetSeqData, *subMat, targetProfData);
490                                 }
491                             }
492                             for(size_t i = 0; i < outcodes.size(); i++) {
493                                 switch (outcodes[i]) {
494                                     case Parameters::OUTFMT_QUERY:
495                                         result.append(queryId);
496                                         break;
497                                     case Parameters::OUTFMT_TARGET:
498                                         result.append(targetId);
499                                         break;
500                                     case Parameters::OUTFMT_EVALUE:
501                                         result.append(SSTR(res.eval));
502                                         break;
503                                     case Parameters::OUTFMT_GAPOPEN:
504                                         result.append(SSTR(gapOpenCount));
505                                         break;
506                                     case Parameters::OUTFMT_FIDENT:
507                                         result.append(SSTR(res.seqId));
508                                         break;
509                                     case Parameters::OUTFMT_PIDENT:
510                                         result.append(SSTR(res.seqId*100));
511                                         break;
512                                     case Parameters::OUTFMT_NIDENT:
513                                         result.append(SSTR(identical));
514                                         break;
515                                     case Parameters::OUTFMT_QSTART:
516                                         result.append(SSTR(res.qStartPos + 1));
517                                         break;
518                                     case Parameters::OUTFMT_QEND:
519                                         result.append(SSTR(res.qEndPos + 1));
520                                         break;
521                                     case Parameters::OUTFMT_QLEN:
522                                         result.append(SSTR(res.qLen));
523                                         break;
524                                     case Parameters::OUTFMT_TSTART:
525                                         result.append(SSTR(res.dbStartPos + 1));
526                                         break;
527                                     case Parameters::OUTFMT_TEND:
528                                         result.append(SSTR(res.dbEndPos + 1));
529                                         break;
530                                     case Parameters::OUTFMT_TLEN:
531                                         result.append(SSTR(res.dbLen));
532                                         break;
533                                     case Parameters::OUTFMT_ALNLEN:
534                                         result.append(SSTR(alnLen));
535                                         break;
536                                     case Parameters::OUTFMT_RAW:
537                                         result.append(SSTR(static_cast<int>(evaluer->computeRawScoreFromBitScore(res.score) + 0.5)));
538                                         break;
539                                     case Parameters::OUTFMT_BITS:
540                                         result.append(SSTR(res.score));
541                                         break;
542                                     case Parameters::OUTFMT_CIGAR:
543                                         if(isTranslatedSearch == true && targetNucs == true && queryNucs == true ){
544                                             Matcher::result_t::protein2nucl(res.backtrace, newBacktrace);
545                                             res.backtrace = newBacktrace;
546                                         }
547                                         result.append(SSTR(res.backtrace));
548                                         newBacktrace.clear();
549                                         break;
550                                     case Parameters::OUTFMT_QSEQ:
551                                         if (queryProfile) {
552                                             result.append(queryProfData.c_str(), res.qLen);
553                                         } else {
554                                             result.append(querySeqData, res.qLen);
555                                         }
556                                         break;
557                                     case Parameters::OUTFMT_TSEQ:
558                                         if (targetProfile) {
559                                             result.append(targetProfData.c_str(), res.dbLen);
560                                         } else {
561                                             result.append(targetSeqData, res.dbLen);
562                                         }
563                                         break;
564                                     case Parameters::OUTFMT_QHEADER:
565                                         result.append(qHeader, qHeaderLen);
566                                         break;
567                                     case Parameters::OUTFMT_THEADER:
568                                         result.append(tHeader, tHeaderLen);
569                                         break;
570                                     case Parameters::OUTFMT_QALN:
571                                         if (queryProfile) {
572                                             printSeqBasedOnAln(result, queryProfData.c_str(), res.qStartPos,
573                                                                Matcher::uncompressAlignment(res.backtrace), false, (res.qStartPos > res.qEndPos),
574                                                                (isTranslatedSearch == true && queryNucs == true), translateNucl);
575                                         } else {
576                                             printSeqBasedOnAln(result, querySeqData, res.qStartPos,
577                                                                Matcher::uncompressAlignment(res.backtrace), false, (res.qStartPos > res.qEndPos),
578                                                                (isTranslatedSearch == true && queryNucs == true), translateNucl);
579                                         }
580                                         break;
581                                     case Parameters::OUTFMT_TALN: {
582                                         if (targetProfile) {
583                                             printSeqBasedOnAln(result, targetProfData.c_str(), res.dbStartPos,
584                                                                Matcher::uncompressAlignment(res.backtrace), true,
585                                                                (res.dbStartPos > res.dbEndPos),
586                                                                (isTranslatedSearch == true && targetNucs == true), translateNucl);
587                                         } else {
588                                             printSeqBasedOnAln(result, targetSeqData, res.dbStartPos,
589                                                                Matcher::uncompressAlignment(res.backtrace), true,
590                                                                (res.dbStartPos > res.dbEndPos),
591                                                                (isTranslatedSearch == true && targetNucs == true), translateNucl);
592                                         }
593                                         break;
594                                     }
595                                     case Parameters::OUTFMT_MISMATCH:
596                                         result.append(SSTR(missMatchCount));
597                                         break;
598                                     case Parameters::OUTFMT_QCOV:
599                                         result.append(SSTR(res.qcov));
600                                         break;
601                                     case Parameters::OUTFMT_TCOV:
602                                         result.append(SSTR(res.dbcov));
603                                         break;
604                                     case Parameters::OUTFMT_QSET:
605                                         result.append(SSTR(qSetToSource[qKeyToSet[queryKey]]));
606                                         break;
607                                     case Parameters::OUTFMT_QSETID:
608                                         result.append(SSTR(qKeyToSet[queryKey]));
609                                         break;
610                                     case Parameters::OUTFMT_TSET:
611                                         result.append(SSTR(tSetToSource[tKeyToSet[res.dbKey]]));
612                                         break;
613                                     case Parameters::OUTFMT_TSETID:
614                                         result.append(SSTR(tKeyToSet[res.dbKey]));
615                                         break;
616                                     case Parameters::OUTFMT_TAXID:
617                                         result.append(SSTR(taxon));
618                                         break;
619                                     case Parameters::OUTFMT_TAXNAME:
620                                         result.append((taxonNode != NULL) ? t->getString(taxonNode->nameIdx) : "unclassified");
621                                         break;
622                                     case Parameters::OUTFMT_TAXLIN:
623                                         result.append((taxonNode != NULL) ? t->taxLineage(taxonNode, true) : "unclassified");
624                                         break;
625                                     case Parameters::OUTFMT_EMPTY:
626                                         result.push_back('-');
627                                         break;
628                                     case Parameters::OUTFMT_QORFSTART:
629                                         result.append(SSTR(res.queryOrfStartPos));
630                                         break;
631                                     case Parameters::OUTFMT_QORFEND:
632                                         result.append(SSTR(res.queryOrfEndPos));
633                                         break;
634                                     case Parameters::OUTFMT_TORFSTART:
635                                         result.append(SSTR(res.dbOrfStartPos));
636                                         break;
637                                     case Parameters::OUTFMT_TORFEND:
638                                         result.append(SSTR(res.dbOrfEndPos));
639                                         break;
640                                 }
641                                 if (i < outcodes.size() - 1) {
642                                     result.push_back('\t');
643                                 }
644                             }
645                             result.push_back('\n');
646                         }
647                         break;
648                     }
649                     case Parameters::FORMAT_ALIGNMENT_BLAST_WITH_LEN: {
650                         int count = snprintf(buffer, sizeof(buffer),
651                                              "%s\t%s\t%1.3f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2E\t%d\t%d\t%d\n",
652                                              queryId.c_str(), targetId.c_str(), res.seqId, alnLen,
653                                              missMatchCount, gapOpenCount,
654                                              res.qStartPos + 1, res.qEndPos + 1,
655                                              res.dbStartPos + 1, res.dbEndPos + 1,
656                                              res.eval, res.score,
657                                              res.qLen, res.dbLen);
658 
659                         if (count < 0 || static_cast<size_t>(count) >= sizeof(buffer)) {
660                             Debug(Debug::WARNING) << "Truncated line in entry" << i << "!\n";
661                             continue;
662                         }
663 
664                         result.append(buffer, count);
665                         break;
666                     }
667                     case Parameters::FORMAT_ALIGNMENT_SAM: {
668                         bool strand = res.qEndPos > res.qStartPos;
669                         int rawScore = static_cast<int>(evaluer->computeRawScoreFromBitScore(res.score) + 0.5);
670                         uint32_t mapq = -4.343 * log(exp(static_cast<double>(-rawScore)));
671                         mapq = (uint32_t) (mapq + 4.99);
672                         mapq = mapq < 254 ? mapq : 254;
673                         int count = snprintf(buffer, sizeof(buffer), "%s\t%d\t%s\t%d\t%d\t",  queryId.c_str(), (strand) ? 16: 0, targetId.c_str(), res.dbStartPos + 1, mapq);
674                         if (count < 0 || static_cast<size_t>(count) >= sizeof(buffer)) {
675                             Debug(Debug::WARNING) << "Truncated line in entry" << i << "!\n";
676                             continue;
677                         }
678                         result.append(buffer, count);
679                         if (isTranslatedSearch == true && targetNucs == true && queryNucs == true) {
680                             Matcher::result_t::protein2nucl(res.backtrace, newBacktrace);
681                             result.append(newBacktrace);
682                             newBacktrace.clear();
683 
684                         } else {
685                             result.append(res.backtrace);
686                         }
687                         result.append("\t*\t0\t0\t");
688                         int start = std::min(res.qStartPos, res.qEndPos);
689                         int end   = std::max(res.qStartPos, res.qEndPos);
690                         if (queryProfile) {
691                             result.append(queryProfData.c_str() + start, (end + 1) - start);
692                         } else {
693                             result.append(querySeqData + start, (end + 1) - start);
694                         }
695                         count = snprintf(buffer, sizeof(buffer), "\t*\tAS:i:%d\tNM:i:%d\n", rawScore, missMatchCount);
696                         if (count < 0 || static_cast<size_t>(count) >= sizeof(buffer)) {
697                             Debug(Debug::WARNING) << "Truncated line in entry" << i << "!\n";
698                             continue;
699                         }
700                         result.append(buffer, count);
701                         break;
702                     }
703                     case Parameters::FORMAT_ALIGNMENT_HTML: {
704                         const char* jsAln = "{\"target\": \"%s\", \"seqId\": %1.3f, \"alnLen\": %d, \"mismatch\": %d, \"gapopen\": %d, \"qStartPos\": %d, \"qEndPos\": %d, \"dbStartPos\": %d, \"dbEndPos\": %d, \"eval\": %.2E, \"score\": %d, \"qLen\": %d, \"dbLen\": %d, \"qAln\": \"";
705                         int count = snprintf(buffer, sizeof(buffer), jsAln,
706                                              targetId.c_str(), res.seqId, alnLen,
707                                              missMatchCount, gapOpenCount,
708                                              res.qStartPos + 1, res.qEndPos + 1,
709                                              res.dbStartPos + 1, res.dbEndPos + 1,
710                                              res.eval, res.score,
711                                              res.qLen, res.dbLen);
712 
713                         if (count < 0 || static_cast<size_t>(count) >= sizeof(buffer)) {
714                             Debug(Debug::WARNING) << "Truncated line in entry" << i << "!\n";
715                             continue;
716                         }
717                         result.append(buffer, count);
718                         if (queryProfile) {
719                             printSeqBasedOnAln(result, queryProfData.c_str(), res.qStartPos,
720                                                Matcher::uncompressAlignment(res.backtrace), false, (res.qStartPos > res.qEndPos),
721                                                (isTranslatedSearch == true && queryNucs == true), translateNucl);
722                         } else {
723                             printSeqBasedOnAln(result, querySeqData, res.qStartPos,
724                                                Matcher::uncompressAlignment(res.backtrace), false, (res.qStartPos > res.qEndPos),
725                                                (isTranslatedSearch == true && queryNucs == true), translateNucl);
726                         }
727                         result.append("\", \"dbAln\": \"");
728                         size_t tId = tDbr->sequenceReader->getId(res.dbKey);
729                         char* targetSeqData = tDbr->sequenceReader->getData(tId, thread_idx);
730                         if (targetProfile) {
731                             Sequence::extractProfileConsensus(targetSeqData, *subMat, targetProfData);
732                             printSeqBasedOnAln(result, targetProfData.c_str(), res.dbStartPos,
733                                                Matcher::uncompressAlignment(res.backtrace), true,
734                                                (res.dbStartPos > res.dbEndPos),
735                                                (isTranslatedSearch == true && targetNucs == true), translateNucl);
736                         } else {
737                             printSeqBasedOnAln(result, targetSeqData, res.dbStartPos,
738                                                Matcher::uncompressAlignment(res.backtrace), true,
739                                                (res.dbStartPos > res.dbEndPos),
740                                                (isTranslatedSearch == true && targetNucs == true), translateNucl);
741                         }
742                         result.append("\" },\n");
743                         break;
744                     }
745 
746 //                    case Parameters::FORMAT_ALIGNMENT_GFF:{
747 //                        // for TBLASTX
748 //                        bool strand = res.qEndPos > res.qStartPos;
749 //                        int currStart = std::min(res.qStartPos, res.qEndPos);
750 //                        int currEnd = std::max(res.qStartPos, res.qEndPos);
751 //                        int currLen = currEnd - currStart;
752 //                        result.append(queryId);
753 //                        result.append("\tconserve\tprotein_match\t");
754 //                        result.append(SSTR(currStart+1));
755 //                        result.push_back('\t');
756 //                        result.append(SSTR(currEnd+1));
757 //                        result.push_back('\t');
758 //                        result.append(SSTR(currLen));
759 //                        result.push_back('\t');
760 //                        result.push_back((strand) ? '-' : '+');
761 //                        result.append("\t.\t");
762 //                        result.append("ID=");
763 //                        result.append(queryId);
764 //                        result.append(":hsp:");
765 //                        result.append(SSTR(counter));
766 //                        result.append(";");
767 //                        break;
768 //                    }
769                     default:
770                         Debug(Debug::ERROR) << "Not implemented yet";
771                         EXIT(EXIT_FAILURE);
772                 }
773             }
774 
775             if (format == Parameters::FORMAT_ALIGNMENT_HTML) {
776                 result.append("]},\n");
777             }
778             resultWriter.writeData(result.c_str(), result.size(), queryKey, thread_idx, isDb);
779             result.clear();
780         }
781     }
782     if (format == Parameters::FORMAT_ALIGNMENT_HTML) {
783         const char* endBlock = "]);</script>";
784         resultWriter.writeData(endBlock, strlen(endBlock), 0, localThreads - 1, false, false);
785     }
786     // tsv output
787     resultWriter.close(true);
788     if (isDb == false) {
789         FileUtil::remove(par.db4Index.c_str());
790     }
791     if(needTaxonomy){
792         delete t;
793     }
794     alnDbr.close();
795     if (sameDB == false) {
796         delete tDbr;
797         delete tDbrHeader;
798     }
799     if (needSequenceDB) {
800         delete evaluer;
801     }
802     delete subMat;
803 
804     return EXIT_SUCCESS;
805 }
806 
807