1 /*
2  * HHblits.h
3  *
4  *  Created on: Apr 1, 2014
5  *      Author: meiermark
6  */
7 
8 #ifndef HHBLITS_H_
9 #define HHBLITS_H_
10 
11 #include <fstream>
12 #include <cstdio>
13 #include <algorithm>
14 #include <cstdlib>
15 #include <cstring>
16 #include <sstream>
17 #include <vector>
18 #include <cmath>
19 #include <climits>
20 #include <float.h>
21 #include <ctype.h>
22 #include <time.h>
23 #include <errno.h>
24 #include <cassert>
25 #include <map>
26 
27 #ifdef OPENMP
28 #include <omp.h>
29 #endif
30 
31 #include <sys/time.h>
32 
33 extern "C" {
34 #include <ffindex.h>
35 }
36 
37 #include "cs.h"
38 #include "context_library.h"
39 #include "library_pseudocounts-inl.h"
40 #include "crf_pseudocounts-inl.h"
41 
42 #include "hhdecl.h"
43 #include "list.h"
44 #include "hash.h"
45 #include "util.h"
46 #include "hhutil.h"
47 
48 #include "hhhmm.h"
49 #include "hhhit.h"
50 #include "hhalignment.h"
51 #include "hhhalfalignment.h"
52 #include "hhfullalignment.h"
53 #include "hhhitlist.h"
54 
55 #include "hhmatrices.h"
56 #include "hhfunc.h"
57 
58 #include "hhdatabase.h"
59 
60 #include "hhprefilter.h"
61 
62 #include "log.h"
63 #include "simd.h"
64 #include "hhhmmsimd.h"
65 #include "hhviterbimatrix.h"
66 #include "hhviterbirunner.h"
67 
68 #include "hhposteriormatrix.h"
69 #include "hhposteriordecoderrunner.h"
70 
71 class HHblits {
72 public:
73   HHblits(Parameters& parameters, std::vector<HHblitsDatabase*>& databases);
74   virtual ~HHblits();
75 
76   void Reset();
77 
78   static void ProcessAllArguments(Parameters& par);
79 
80   //print methods for hhalign and hhblits
81   void printHitList();
82   void printHHRFile();
83 
84   //writer for non-mpi version
85   void writeHHRFile(char* hhrFile);
86   void writeAlisFile(char* basename);
87   void writeScoresFile(char* scoresFile);
88   void writeM8(char* m8File);
89   void writePairwiseAlisFile(char* pairwieseAlisFile, char outformat);
90   void writeAlitabFile(char* alitabFile);
91   void writePsiFile(char* psiFile);
92   void writeHMMFile(char* HMMFile);
93   void writeA3MFile(char* A3MFile);
94   void writeMatricesFile(char* matricesOutputFileName);
95 
96   //output writer for mpi version
97   static void writeHHRFile(HHblits& hhblits, std::stringstream& out);
98   static void writeScoresFile(HHblits& hhblits, std::stringstream& out);
99   static void writeM8(HHblits& hhblits, std::stringstream& out);
100   static void writePairwiseAlisFile(HHblits& hhblits, std::stringstream& out);
101   static void writeAlitabFile(HHblits& hhblits, std::stringstream& out);
102   static void writePsiFile(HHblits& hhblits, std::stringstream& out);
103   static void writeHMMFile(HHblits& hhblits, std::stringstream& out);
104   static void writeA3MFile(HHblits& hhblits, std::stringstream& out);
105   static void writeMatricesFile(HHblits& hhblits, std::stringstream& out);
106 
107   static void prepareDatabases(Parameters& par, std::vector<HHblitsDatabase*>& databases);
108 
109   virtual void run(FILE* query_fh, char* query_path);
110   void run(ffindex_entry_t* entry, char* data,
111       ffindex_index_t* sequence_index, char* seq,
112       ffindex_index_t* header_index, char* header);
113 
114 protected:
115 	// substitution matrix flavours
116 	float __attribute__((aligned(32))) P[20][20];
117 	float __attribute__((aligned(32))) R[20][20];
118 	float __attribute__((aligned(32))) Sim[20][20];
119 	float __attribute__((aligned(32))) S[20][20];
120 	float __attribute__((aligned(32))) pb[21];
121 
122 	// secondary structure matrices
123 	float S73[NDSSP][NSSPRED][MAXCF];
124 	float S37[NSSPRED][MAXCF][NDSSP];
125 	float S33[NSSPRED][MAXCF][NSSPRED][MAXCF];
126 
127 	Parameters& par;
128 
129 	cs::ContextLibrary<cs::AA>* context_lib;
130 	cs::Crf<cs::AA>* crf;
131 	cs::Pseudocounts<cs::AA>* pc_hhm_context_engine;
132 	cs::Admix* pc_hhm_context_mode;
133 	cs::Pseudocounts<cs::AA>* pc_prefilter_context_engine;
134 	cs::Admix* pc_prefilter_context_mode;
135 
136 	//database filenames
137 	std::vector<HHblitsDatabase*> dbs;
138 
139 	// Create query HMM with maximum of par.maxres match states
140 	HMM* q;
141 	// Create query HMM with maximum of par.maxres match states (needed for prefiltering)
142 	HMM* q_tmp;
143 
144 	// output A3M generated by merging A3M alignments for significant hits to the query alignment
145 	Alignment* Qali;
146 	// output A3M alignment with no sequence filtered out (only active with -all option)
147 	Alignment* Qali_allseqs;
148 
149 	ViterbiMatrix** viterbiMatrices;
150 	PosteriorMatrix** posteriorMatrices;
151 
152 	HitList hitlist; // list of hits with one Hit object for each pairwise comparison done
153 	std::map<int, Alignment*> alis;
154     void mergeHitsToQuery(HitList &hitlist, Hash<Hit>* previous_hits, Hash<Hit>* premerged_hits, int& seqs_found, int& cluster_found, int min_col_realign);
155     void perform_realign(HMMSimd& q_vec, const char input_format, std::vector<HHEntry*>& hits_to_realign, int min_col_realign);
156 
157 
158 	void add_hits_to_hitlist(std::vector<Hit>& hits, HitList& hitlist);
159 
160 
161 private:
162 	static void help(Parameters& par, char all = 0);
163 	static void ProcessArguments(Parameters& par);
164 	void RescoreWithViterbiKeepAlignment(HMMSimd& q_vec, Hash<Hit>* previous_hits);
165 
166     void premerge(Hash<Hit>* previous_hits, Hash<Hit>* premerged_hits,
167                   int& seqs_found, int& cluster_found, int min_col_realign);
168 };
169 
170 #endif /* HHBLITS_H_ */
171