1 /*
2  * HHDatabase.cpp
3  *
4  *  Created on: Apr 7, 2014
5  *      Author: meiermark
6  */
7 
8 #include "hhdatabase.h"
9 
10 #include <stddef.h>
11 #include <sys/mman.h>
12 #include <cstdio>
13 #include <cstdlib>
14 #include <cstring>
15 #include <string>
16 #include <utility>
17 #include <vector>
18 
19 #include "hhalignment.h"
20 #include "hhprefilter.h"
21 #include "hhdecl.h"
22 #include "hhhmm.h"
23 #include "util-inl.h"
24 
25 
HHDatabase()26 HHDatabase::HHDatabase() {
27 
28 }
29 
~HHDatabase()30 HHDatabase::~HHDatabase() {
31 }
32 
buildDatabaseName(const char * base,const char * extension,const char * suffix,char * databaseName)33 void HHDatabase::buildDatabaseName(const char* base, const char* extension,
34                                    const char* suffix, char* databaseName) {
35   strcpy(databaseName, base);
36   if (strlen(extension) != 0)
37     strcat(databaseName, "_");
38   strcat(databaseName, extension);
39   strcat(databaseName, suffix);
40 }
41 
checkDatabaseConflicts(const char * base)42 bool HHDatabase::checkDatabaseConflicts(const char* base) {
43 
44   char a3m_index_filename[NAMELEN];
45   char a3m_data_filename[NAMELEN];
46 
47   char ca3m_index_filename[NAMELEN];
48   char ca3m_data_filename[NAMELEN];
49 
50   buildDatabaseName(base, "a3m", ".ffdata", a3m_data_filename);
51   buildDatabaseName(base, "a3m", ".ffindex", a3m_index_filename);
52 
53   buildDatabaseName(base, "ca3m", ".ffdata", ca3m_data_filename);
54   buildDatabaseName(base, "ca3m", ".ffindex", ca3m_index_filename);
55 
56   if (file_exists(ca3m_index_filename) && file_exists(ca3m_data_filename)
57       && file_exists(a3m_index_filename) && file_exists(a3m_data_filename)) {
58     return true;
59   }
60   return false;
61 }
62 
HHblitsDatabase(const char * base,bool initCs219)63 HHblitsDatabase::HHblitsDatabase(const char* base, bool initCs219) {
64   cs219_database = NULL;
65 
66   a3m_database = NULL;
67   hhm_database = NULL;
68 
69   ca3m_database = NULL;
70   sequence_database = NULL;
71   header_database = NULL;
72 
73 
74   use_compressed = false;
75   basename = new char[strlen(base) + 1];
76   strcpy(basename, base);
77 
78   if (initCs219) {
79       char cs219_index_filename[NAMELEN];
80       char cs219_data_filename[NAMELEN];
81 
82       buildDatabaseName(base, "cs219", ".ffdata", cs219_data_filename);
83       buildDatabaseName(base, "cs219", ".ffindex", cs219_index_filename);
84 
85       cs219_database = new FFindexDatabase(cs219_data_filename, cs219_index_filename, use_compressed);
86   }
87 
88   if (!checkAndBuildCompressedDatabase(base)) {
89     char a3m_index_filename[NAMELEN];
90     char a3m_data_filename[NAMELEN];
91 
92     buildDatabaseName(base, "a3m", ".ffdata", a3m_data_filename);
93     buildDatabaseName(base, "a3m", ".ffindex", a3m_index_filename);
94 
95     if (file_exists(a3m_data_filename) && file_exists(a3m_index_filename)) {
96       a3m_database = new FFindexDatabase(a3m_data_filename, a3m_index_filename, use_compressed);
97     }
98 
99     char hhm_index_filename[NAMELEN];
100     char hhm_data_filename[NAMELEN];
101 
102     buildDatabaseName(base, "hhm", ".ffdata", hhm_data_filename);
103     buildDatabaseName(base, "hhm", ".ffindex", hhm_index_filename);
104 
105     if (file_exists(hhm_data_filename) && file_exists(hhm_index_filename)) {
106       hhm_database = new FFindexDatabase(hhm_data_filename, hhm_index_filename, use_compressed);
107     }
108 
109     if (a3m_database == NULL && hhm_database == NULL) {
110       HH_LOG(ERROR) << "Could find neither hhm_db nor a3m_db!" << std::endl;
111       exit(1);
112     } else if (a3m_database != NULL && hhm_database == NULL) {
113       query_database = a3m_database;
114     } else if (a3m_database == NULL && hhm_database != NULL) {
115       query_database = hhm_database;
116     } else {
117       // both exist, use the a3m
118       query_database = a3m_database;
119     }
120   } else {
121     query_database = cs219_database;
122   }
123 
124   prefilter = NULL;
125 }
126 
~HHblitsDatabase()127 HHblitsDatabase::~HHblitsDatabase() {
128   delete[] basename;
129   delete cs219_database;
130 
131   if (use_compressed) {
132     delete ca3m_database;
133     delete sequence_database;
134     delete header_database;
135   } else {
136     delete a3m_database;
137     delete hhm_database;
138   }
139 
140   if (prefilter) {
141     delete prefilter;
142   }
143 }
144 
initPrefilter(const std::string & cs_library)145 void HHblitsDatabase::initPrefilter(const std::string& cs_library) {
146   prefilter = new Prefilter(cs_library, cs219_database);
147 }
148 
initNoPrefilter(std::vector<HHEntry * > & new_entries)149 void HHblitsDatabase::initNoPrefilter(std::vector<HHEntry*>& new_entries) {
150   std::vector<std::pair<int, std::string> > new_entry_names;
151   Prefilter::init_no_prefiltering(cs219_database, new_entry_names);
152 
153   getEntriesFromNames(new_entry_names, new_entries);
154 }
155 
initSelected(std::vector<std::string> & selected_templates,std::vector<HHEntry * > & new_entries)156 void HHblitsDatabase::initSelected(std::vector<std::string>& selected_templates,
157                                    std::vector<HHEntry*>& new_entries) {
158 
159   std::vector<std::pair<int, std::string> > new_entry_names;
160   Prefilter::init_selected(cs219_database, selected_templates,
161                                new_entry_names);
162 
163   getEntriesFromNames(new_entry_names, new_entries);
164 }
165 
prefilter_db(HMM * q_tmp,Hash<Hit> * previous_hits,const int threads,const int prefilter_gap_open,const int prefilter_gap_extend,const int prefilter_score_offset,const int prefilter_bit_factor,const double prefilter_evalue_thresh,const double prefilter_evalue_coarse_thresh,const int preprefilter_smax_thresh,const int min_prefilter_hits,const int maxnumbdb,const float R[20][20],std::vector<HHEntry * > & new_entries,std::vector<HHEntry * > & old_entries)166 void HHblitsDatabase::prefilter_db(HMM* q_tmp, Hash<Hit>* previous_hits,
167                                    const int threads,
168                                    const int prefilter_gap_open,
169                                    const int prefilter_gap_extend,
170                                    const int prefilter_score_offset,
171                                    const int prefilter_bit_factor,
172                                    const double prefilter_evalue_thresh,
173                                    const double prefilter_evalue_coarse_thresh,
174                                    const int preprefilter_smax_thresh,
175                                    const int min_prefilter_hits,
176                                    const int maxnumbdb, const float R[20][20],
177                                    std::vector<HHEntry*>& new_entries,
178                                    std::vector<HHEntry*>& old_entries) {
179 
180   std::vector<std::pair<int, std::string> > prefiltered_new_entry_names;
181   std::vector<std::pair<int, std::string> > prefiltered_old_entry_names;
182 
183   prefilter->prefilter_db(q_tmp, previous_hits, threads, prefilter_gap_open,
184                           prefilter_gap_extend, prefilter_score_offset,
185                           prefilter_bit_factor, prefilter_evalue_thresh,
186                           prefilter_evalue_coarse_thresh,
187                           preprefilter_smax_thresh, min_prefilter_hits,
188                           maxnumbdb, R, prefiltered_new_entry_names,
189                           prefiltered_old_entry_names);
190 
191   getEntriesFromNames(prefiltered_new_entry_names, new_entries);
192   getEntriesFromNames(prefiltered_old_entry_names, old_entries);
193 }
194 
getEntriesFromNames(std::vector<std::pair<int,std::string>> & hits,std::vector<HHEntry * > & entries)195 void HHblitsDatabase::getEntriesFromNames(std::vector<std::pair<int, std::string>>& hits, std::vector<HHEntry*>& entries) {
196   for (size_t i = 0; i < hits.size(); i++) {
197     ffindex_entry_t* entry;
198 
199     if (hhm_database != NULL) {
200       entry = ffindex_get_entry_by_name(hhm_database->db_index, const_cast<char*>(hits[i].second.c_str()));
201 
202       if (entry != NULL) {
203         HHEntry* hhentry = new HHDatabaseEntry(hits[i].first, this, hhm_database, entry);
204         entries.push_back(hhentry);
205         continue;
206       }
207     }
208 
209     if (use_compressed) {
210       entry = ffindex_get_entry_by_name(ca3m_database->db_index, const_cast<char *>(hits[i].second.c_str()));
211       if (entry == NULL) {
212         //TODO: error
213         HH_LOG(WARNING) << "Could not fetch entry from compressed a3m!" << std::endl;
214         HH_LOG(WARNING) << "\tentry: " << hits[i].second << std::endl;
215         HH_LOG(WARNING) << "\tdb: " << ca3m_database->data_filename << std::endl;
216         continue;
217       }
218 
219       HHEntry *hhentry = new HHDatabaseEntry(hits[i].first, this, ca3m_database, entry);
220       entries.push_back(hhentry);
221     } else {
222       entry = ffindex_get_entry_by_name(a3m_database->db_index, const_cast<char*>(hits[i].second.c_str()));
223       if (entry == NULL) {
224         //TODO: error
225         HH_LOG(WARNING) << "Could not fetch entry from a3m or hhm!" << std::endl;
226         HH_LOG(WARNING) << "\tentry: " << hits[i].second << std::endl;
227         HH_LOG(WARNING) << "\ta3m_db: " << a3m_database->data_filename << std::endl;
228         HH_LOG(WARNING) << "\thhm_db: " << hhm_database->data_filename << std::endl;
229         continue;
230       }
231       HHEntry* hhentry = new HHDatabaseEntry(hits[i].first, this, a3m_database, entry);
232       entries.push_back(hhentry);
233     }
234   }
235 }
236 
checkAndBuildCompressedDatabase(const char * base)237 bool HHblitsDatabase::checkAndBuildCompressedDatabase(const char* base) {
238   char ca3m_index_filename[NAMELEN];
239   char ca3m_data_filename[NAMELEN];
240 
241   char sequence_index_filename[NAMELEN];
242   char sequence_data_filename[NAMELEN];
243 
244   char header_index_filename[NAMELEN];
245   char header_data_filename[NAMELEN];
246 
247   buildDatabaseName(base, "ca3m", ".ffdata", ca3m_data_filename);
248   buildDatabaseName(base, "ca3m", ".ffindex", ca3m_index_filename);
249 
250   buildDatabaseName(base, "sequence", ".ffdata", sequence_data_filename);
251   buildDatabaseName(base, "sequence", ".ffindex", sequence_index_filename);
252 
253   buildDatabaseName(base, "header", ".ffdata", header_data_filename);
254   buildDatabaseName(base, "header", ".ffindex", header_index_filename);
255 
256   if (file_exists(ca3m_index_filename) && file_exists(ca3m_data_filename)
257       && file_exists(header_index_filename) && file_exists(header_data_filename)
258       && file_exists(sequence_index_filename) && file_exists(sequence_data_filename)) {
259     use_compressed = true;
260 
261     ca3m_database = new FFindexDatabase(ca3m_data_filename, ca3m_index_filename, true);
262     sequence_database = new FFindexDatabase(sequence_data_filename, sequence_index_filename, true);
263     header_database = new FFindexDatabase(header_data_filename, header_index_filename, true);
264 
265     char hhm_index_filename[NAMELEN];
266     char hhm_data_filename[NAMELEN];
267 
268     buildDatabaseName(base, "hhm", ".ffdata", hhm_data_filename);
269     buildDatabaseName(base, "hhm", ".ffindex", hhm_index_filename);
270 
271     if (file_exists(hhm_data_filename) && file_exists(hhm_index_filename)) {
272       hhm_database = new FFindexDatabase(hhm_data_filename, hhm_index_filename, false);
273     }
274     return true;
275   }
276   use_compressed = false;
277   return false;
278 }
279 
HHEntry(int sequence_length)280 HHEntry::HHEntry(int sequence_length) : sequence_length(sequence_length) {
281 }
282 
~HHEntry()283 HHEntry::~HHEntry() {
284 }
285 
HHDatabaseEntry(int sequence_length,HHblitsDatabase * hhdatabase,FFindexDatabase * ffdatabase,ffindex_entry_t * entry)286 HHDatabaseEntry::HHDatabaseEntry(int sequence_length,
287                                  HHblitsDatabase* hhdatabase,
288                                  FFindexDatabase* ffdatabase,
289                                  ffindex_entry_t* entry)
290     : HHEntry(sequence_length) {
291   this->hhdatabase = hhdatabase;
292   this->ffdatabase = ffdatabase;
293   this->entry = entry;
294 }
295 
~HHDatabaseEntry()296 HHDatabaseEntry::~HHDatabaseEntry() {
297 }
298 
getTemplateHMM(Parameters & par,char use_global_weights,const float qsc,int & format,float * pb,const float S[20][20],const float Sim[20][20],HMM * t)299 void HHDatabaseEntry::getTemplateHMM(Parameters& par, char use_global_weights,
300                                      const float qsc, int& format, float* pb,
301                                      const float S[20][20],
302                                      const float Sim[20][20], HMM* t) {
303   if (ffdatabase->isCompressed) {
304     Alignment tali(par.maxseq, par.maxres);
305 
306     char* data = ffindex_get_data_by_entry(ffdatabase->db_data, entry);
307 
308     if (data == NULL) {
309       HH_LOG(ERROR) << "Could not fetch data for a3m " << entry->name << "!" << std::endl;
310       exit(4);
311     }
312 
313     tali.ReadCompressed(entry, data, hhdatabase->sequence_database->db_index,
314                         hhdatabase->sequence_database->db_data,
315                         hhdatabase->header_database->db_index,
316                         hhdatabase->header_database->db_data, par.mark,
317                         par.maxcol);
318 
319     tali.Compress(entry->name, par.cons, par.maxcol, par.M_template, par.Mgaps);
320 
321     tali.N_filtered = tali.Filter(par.max_seqid_db, S, par.coverage_db,
322                                   par.qid_db, qsc, par.Ndiff_db);
323     t->name[0] = t->longname[0] = t->fam[0] = '\0';
324     tali.FrequenciesAndTransitions(t, use_global_weights, par.mark, par.cons, par.showcons, pb, Sim);
325 
326     format = 0;
327   } else {
328     FILE* dbf = ffindex_fopen_by_entry(ffdatabase->db_data, entry);
329     char* name = new char[strlen(entry->name) + 1];
330     strcpy(name, entry->name);
331     HHEntry::getTemplateHMM(dbf, name, par, use_global_weights, qsc, format, pb, S, Sim, t);
332     fclose(dbf);
333     delete[] name;
334   }
335 }
336 
getTemplateA3M(Parameters & par,float * pb,const float S[20][20],const float Sim[20][20],Alignment & tali)337 void HHDatabaseEntry::getTemplateA3M(Parameters& par, float* pb,
338                                      const float S[20][20],
339                                      const float Sim[20][20], Alignment& tali) {
340   if (hhdatabase->use_compressed) {
341     ffindex_entry_t* entry = ffindex_get_entry_by_name(
342         hhdatabase->ca3m_database->db_index, this->entry->name);
343 
344     if (entry == NULL) {
345       HH_LOG(ERROR) << "Could not fetch entry " << this->entry->name
346                               << " from compressed hhblits database!"
347                               << std::endl;
348       exit(1);
349     }
350 
351     char* data = ffindex_get_data_by_entry(hhdatabase->ca3m_database->db_data,
352                                            entry);
353 
354     if (data == NULL) {
355       HH_LOG(ERROR) << "Could not fetch data for a3m " << entry->name << "!" << std::endl;
356       exit(4);
357     }
358 
359     tali.ReadCompressed(entry, data, hhdatabase->sequence_database->db_index,
360                         hhdatabase->sequence_database->db_data,
361                         hhdatabase->header_database->db_index,
362                         hhdatabase->header_database->db_data, par.mark,
363                         par.maxcol);
364   } else {
365     FILE* dbf = ffindex_fopen_by_name(hhdatabase->a3m_database->db_data,
366                                       hhdatabase->a3m_database->db_index,
367                                       entry->name);
368 
369     if (dbf == NULL) {
370       HH_LOG(ERROR) << "Opening A3M " << entry->name << " failed!" << std::endl;
371       exit(4);
372     }
373 
374     char line[LINELEN];
375     if (!fgetline(line, LINELEN, dbf)) {
376       //TODO: throw error
377       HH_LOG(ERROR) << "In " << __FILE__ << ":" << __LINE__ << ": " << __func__ << ":" << std::endl;
378       HH_LOG(ERROR) << "\tThis should not happen!" << std::endl;
379     }
380 
381     while (strscn(line) == NULL)
382       fgetline(line, LINELEN, dbf);  // skip lines that contain only white space
383 
384     tali.Read(dbf, entry->name, par.mark, par.maxcol, par.nseqdis, line);
385     fclose(dbf);
386   }
387 
388   tali.Compress(entry->name, par.cons, par.maxcol, par.M_template, par.Mgaps);
389 
390   if(tali.L > sequence_length) {
391     HH_LOG(ERROR) << "sequence length (" << sequence_length << ") does not fit to read MSA (match states: "<< tali.L << ") of file " << getName() << "!" << std::endl;
392     HH_LOG(ERROR) << "\tYour cs219 states might not fit your multiple sequence alignments." << std::endl;
393   }
394 }
395 
getTemplateHMM(FILE * dbf,char * name,Parameters & par,char use_global_weights,const float qsc,int & format,float * pb,const float S[20][20],const float Sim[20][20],HMM * t)396 void HHEntry::getTemplateHMM(FILE* dbf, char* name, Parameters& par,
397                              char use_global_weights, const float qsc,
398                              int& format, float* pb, const float S[20][20],
399                              const float Sim[20][20], HMM* t) {
400   if (dbf != NULL) {
401     char line[LINELEN];
402     if (!fgetline(line, LINELEN, dbf)) {
403       //TODO: throw error
404       HH_LOG(ERROR) << "In " << __FILE__ << ":" << __LINE__ << ": " << __func__ << ":" << std::endl;
405       HH_LOG(ERROR) << "\tThis should not happen!" << std::endl;
406     }
407     while (strscn(line) == NULL) {
408 
409 	if (!fgetline(line, LINELEN, dbf)) break;  // skip lines that contain only white space
410     }
411     // read HMMER3 format
412     if (!strncmp(line, "HMMER3", 6)) {
413       format = 1;
414       t->ReadHMMer3(dbf, par.showcons, pb, name);
415       par.hmmer_used = true;
416     }
417     // read HMMER format
418     else if (!strncmp(line, "HMMER", 5)) {
419       format = 1;
420       t->ReadHMMer(dbf, par.showcons, pb, name);
421       par.hmmer_used = true;
422     }
423     // read HHM format
424     else if (!strncmp(line, "HH", 2)) {
425       char path[NAMELEN];
426       Pathname(path, name);
427 
428       format = 0;
429       t->Read(dbf, par.maxcol, par.nseqdis, pb, path);
430       RemoveExtension(t->file, name);
431     }
432     // read a3m alignment
433     else if (line[0] == '#' || line[0] == '>') {
434       Alignment tali(par.maxseq, par.maxres);
435       tali.Read(dbf, name, par.mark, par.maxcol, par.nseqdis, line);
436       tali.Compress(name, par.cons, par.maxcol, par.M_template, par.Mgaps);
437       //              qali.FilterForDisplay(par.max_seqid,par.coverage,par.qid,par.qsc,par.nseqdis);
438       tali.N_filtered = tali.Filter(par.max_seqid_db, S, par.coverage_db, par.qid_db, qsc, par.Ndiff_db);
439       t->name[0] = t->longname[0] = t->fam[0] = '\0';
440       tali.FrequenciesAndTransitions(t, use_global_weights, par.mark, par.cons, par.showcons, pb, Sim);
441       format = 0;
442     } else {
443       HH_LOG(ERROR) << "In " << __FILE__ << ":" << __LINE__ << ": " << __func__ << ":" << std::endl;
444       HH_LOG(ERROR) << "\tUnrecognized HMM file format in \'" << name << "\'." << std::endl;
445       HH_LOG(ERROR) << "\tContext:\n'" << line << "\n";
446       fgetline(line, LINELEN, dbf);
447       HH_LOG(ERROR) << line << std::endl;
448       fgetline(line, LINELEN, dbf);
449       HH_LOG(ERROR) << line << "'\n";
450       exit(1);
451     }
452   }
453 
454   if(t->L > sequence_length) {
455     HH_LOG(ERROR) << "sequence length (" << sequence_length << ") does not fit to read MSA (match states: "<< t->L << ") of file " << getName() << "!" << std::endl;
456     HH_LOG(ERROR) << "\tYour cs219 states might not fit your multiple sequence alignments." << std::endl;
457   }
458 }
459 
getName()460 char* HHDatabaseEntry::getName() {
461   return entry->name;
462 }
463 
HHFileEntry(const char * file,int sequence_length)464 HHFileEntry::HHFileEntry(const char* file, int sequence_length)
465     : HHEntry(sequence_length), file(strdup(file)) {
466 }
467 
~HHFileEntry()468 HHFileEntry::~HHFileEntry() {
469   free(file);
470 }
471 
getTemplateHMM(Parameters & par,char use_global_weights,const float qsc,int & format,float * pb,const float S[20][20],const float Sim[20][20],HMM * t)472 void HHFileEntry::getTemplateHMM(Parameters& par, char use_global_weights,
473                                  const float qsc, int& format, float* pb,
474                                  const float S[20][20], const float Sim[20][20],
475                                  HMM* t) {
476 
477   FILE * dbf = fopen(file, "r");
478   if(dbf == NULL) {
479     //TODO: throw error
480     HH_LOG(ERROR) << "Template File does not exist: " << file << std::endl;
481     exit(1);
482   }
483 
484   HHEntry::getTemplateHMM(dbf, file, par, use_global_weights, qsc, format, pb,
485                           S, Sim, t);
486   fclose(dbf);
487 }
488 
getTemplateA3M(Parameters & par,float * pb,const float S[20][20],const float Sim[20][20],Alignment & tali)489 void HHFileEntry::getTemplateA3M(Parameters& par, float* pb,
490                                  const float S[20][20], const float Sim[20][20],
491                                  Alignment& tali) {
492 
493   char line[LINELEN];
494   HMM* t = new HMM(MAXSEQDIS, par.maxres);
495 
496   FILE* inf = fopen(file, "r");
497 
498   if (!fgetline(line, LINELEN, inf)) {
499     HH_LOG(ERROR) << "Error in " << __FILE__ << ":" << __LINE__
500                             << ": " << __func__ << ":" << std::endl;
501     HH_LOG(ERROR) << "\t" << file << " is empty!\n";
502     exit(4);
503   }
504 
505   while (strscn(line) == NULL)
506     fgetline(line, LINELEN, inf);  // skip lines that contain only white space
507 
508   // Is infile a HMMER file?
509   if (!strncmp(line, "HMMER", 5)) {
510     // Uncomment this line to allow HMMER2/HMMER3 models as queries:
511     HH_LOG(ERROR)
512         << "Use of HMMER format as input will result in severe loss of sensitivity!\n";
513   }
514   // ... or is it an hhm file?
515   else if (!strncmp(line, "NAME", 4) || !strncmp(line, "HH", 2)) {
516     char path[NAMELEN];
517     Pathname(path, file);
518 
519     HH_LOG(INFO) << "Query file is in HHM format\n";
520 
521     // Rewind to beginning of line and read query hhm file
522     rewind(inf);
523     t->Read(inf, par.maxcol, par.nseqdis, pb, path);
524 
525     Alignment ali_tmp(par.maxseq, par.maxres);
526     ali_tmp.GetSeqsFromHMM(t);
527     ali_tmp.Compress(file, par.cons, par.maxcol, par.M_template, par.Mgaps);
528     tali = ali_tmp;
529   }
530   // ... or is it an alignment file
531   else if (line[0] == '#' || line[0] == '>') {
532     Alignment ali_tmp(par.maxseq, par.maxres);
533 
534     // Read alignment from infile into matrix X[k][l] as ASCII (and supply first line as extra argument)
535     ali_tmp.Read(inf, file, par.mark, par.maxcol, par.nseqdis, line);
536 
537     // Convert ASCII to int (0-20),throw out all insert states, record their number in I[k][i]
538     // and store marked sequences in name[k] and seq[k]
539     ali_tmp.Compress(file, par.cons, par.maxcol, par.M_template, par.Mgaps);
540 
541     tali = ali_tmp;
542   } else {
543     HH_LOG(ERROR) << "Error in " << __FILE__ << ":" << __LINE__
544                             << ": " << __func__ << ":" << std::endl;
545     HH_LOG(ERROR) << "\t unrecognized input file format in \'" << file
546                             << "\'\n";
547     HH_LOG(ERROR) << "\t line = " << line << "\n";
548     exit(1);
549   }
550 
551   fclose(inf);
552 
553   delete t;
554 }
555 
getName()556 char* HHFileEntry::getName() {
557   return file;
558 }
559 
getMaxTemplateLength(std::vector<HHEntry * > & entries)560 int getMaxTemplateLength(std::vector<HHEntry*>& entries) {
561   int max_template_length = 0;
562 
563   for (size_t i = 0; i < entries.size(); i++) {
564     max_template_length = std::max(max_template_length,entries[i]->sequence_length);
565   }
566 
567   return max_template_length;
568 }
569 
570