1 /*
2   Copyright 2009-2012 Andreas Biegert, Christof Angermueller
3 
4   This file is part of the CS-BLAST package.
5 
6   The CS-BLAST package is free software: you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation, either version 3 of the License, or
9   (at your option) any later version.
10 
11   The CS-BLAST package is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #include "cs.h"
21 #include "alignment-inl.h"
22 #include "application.h"
23 #include "blosum_matrix.h"
24 #include "context_library.h"
25 #include "crf_pseudocounts-inl.h"
26 #include "crf-inl.h"
27 #include "count_profile-inl.h"
28 #include "getopt_pp.h"
29 #include "library_pseudocounts-inl.h"
30 #include "pssm.h"
31 #include "sequence-inl.h"
32 #include "a3m_compress.h"
33 #include "ffindexdatabase.h"
34 
35 #include "ext/fmemopen.h"
36 #include "context_data.lib.h"
37 //include "context_data.crf.h"
38 #include "cs219.lib.h"
39 
40 #ifdef OPENMP
41 #include <omp.h>
42 #endif
43 
44 #include <sstream>
45 
46 using namespace GetOpt;
47 using std::string;
48 using std::vector;
49 
50 namespace cs {
51   struct CSTranslateAppOptions {
52     static const int kAssignMatchColsByQuery = -1;
53 
CSTranslateAppOptionsCSTranslateAppOptions54     CSTranslateAppOptions() { Init(); }
55 
~CSTranslateAppOptionsCSTranslateAppOptions56     virtual ~CSTranslateAppOptions() { }
57 
58     // Set csbuild default parameters
InitCSTranslateAppOptions59     void Init() {
60       informat = "auto";
61       outformat = "seq";
62       modelfile = "internal";
63       alphabetfile = "internal";
64       pc_admix = 0.90;
65       pc_ali = 12.0;
66       match_assign = kAssignMatchColsByQuery;
67       weight_center = 1.6;
68       weight_decay = 0.85;
69       weight_as = 1000.0;
70       ffindex = false;
71       verbose = true;
72     }
73 
74     // Validates the parameter settings and throws exception if needed.
ValidateCSTranslateAppOptions75     void Validate() {
76       if (infile.empty()) throw Exception("No input file provided!");
77       if (alphabetfile.empty()) throw Exception("No abstract states provided!");
78     }
79 
80     // The input alignment file with training data.
81     string infile;
82     // The output file.
83     string outfile;
84     // The file to which the output should be appended.
85     string appendfile;
86     // Input file with context profile library or HMM for generating pseudocounts
87     string modelfile;
88     // Input file with profile library to be used as abstract state alphabet
89     string alphabetfile;
90     // Input file format
91     string informat;
92     // Output file format (abstract state sequence or abstract state profile)
93     string outformat;
94     // Overall pseudocount admixture
95     double pc_admix;
96     // Constant in pseudocount calculation for alignments
97     double pc_ali;
98     // Match column assignment for FASTA alignments
99     int match_assign;
100     // Weight of central column in multinomial emission
101     double weight_center;
102     // Exponential decay of window weights
103     double weight_decay;
104     // Weight in emission calculation of abstract states
105     double weight_as;
106     // verbose output
107     bool verbose;
108     // ffindex
109     bool ffindex;
110   };  // CSTranslateAppOptions
111 
GetMatchSymbol(double pp)112   char GetMatchSymbol(double pp) {
113     char rv = '=';
114     if (pp > 0.8) rv = '|';
115     else if (pp > 0.6) rv = '+';
116     else if (pp > 0.4) rv = '.';
117     else if (pp > 0.2) rv = '-';
118     return rv;
119   }
120 
GetConfidence(double pp)121   inline int GetConfidence(double pp) {
122     return static_cast<int>(floor((pp - DBL_EPSILON) * 10));
123   }
124 
125 
126   template<class Abc>
127   class CSTranslateApp : public Application {
128   public:
129     // Runs the csbuild application.
Run()130     virtual int Run() {
131       SetupEmissions();
132       SetupPseudocountEngine();
133       SetupAbstractStateEngine();
134 
135       if (!opts_.ffindex) {
136         FILE *fin;
137         if (strcmp(opts_.infile.c_str(), "stdin") == 0)
138           fin = stdin;
139         else
140           fin = fopen(opts_.infile.c_str(), "r");
141 
142         if (!fin)
143           throw Exception("Unable to read input file '%s'!", opts_.infile.c_str());
144 
145         string header;
146         CountProfile<Abc> profile;  // input profile we want to translate
147         ReadProfile(fin, header, profile);
148 
149         size_t profile_counts_lenght = profile.counts.length();
150 
151         // Prepare abstract sequence in AS219 format
152         Sequence<AS219> as_seq(profile_counts_lenght);
153         as_seq.set_header(header);
154 
155         // Translate count profile into abstract state count profile (Neff is one)
156         if (opts_.verbose)
157           fputs("Translating count profile to abstract state alphabet AS219 ...\n", out_);
158 
159         CountProfile<AS219> as_profile(profile_counts_lenght);  // output profile
160         Translate(profile, as_profile);
161 
162         BuildSequence(as_profile, profile_counts_lenght, as_seq);
163 
164         // Build pseudo-alignment for output
165         const size_t nseqs = 5;
166         const size_t header_width = 4;
167         const size_t width = 100;
168         vector<string> ali(nseqs, "");
169         vector<string> labels(nseqs, "");
170         labels[0] = "Pos";
171         labels[1] = "Cons";
172         labels[3] = "AS219";
173         labels[4] = "Conf";
174 
175         BlosumMatrix sm;
176         ali[1] = ConservationSequence(profile, sm);
177 
178         for (size_t i = 0; i < profile.counts.length(); ++i) {
179           ali[0].append(strprintf("%d", (i + 1) % 10));
180           ali[2].push_back(GetMatchSymbol(as_profile.counts[i][as_seq[i]]));
181           ali[3].push_back(as_seq.chr(i));
182           ali[4].append(strprintf("%d", GetConfidence(as_profile.counts[i][as_seq[i]])));
183         }
184 
185         // Print pseudo alignment in blocks if verbose
186         if (opts_.verbose) {
187           fputc('\n', out_);  // blank line before alignment
188           while (!ali.front().empty()) {
189             for (size_t k = 0; k < nseqs; ++k) {
190               string label = labels[k];
191               label += string(header_width - label.length() + 1, ' ');
192               fputs(label.c_str(), out_);
193               fputc(' ', out_);  // separator between header and sequence
194 
195               size_t len = MIN(width, ali[k].length());
196               fputs(ali[k].substr(0, len).c_str(), out_);
197               fputc('\n', out_);
198               ali[k].erase(0, len);
199             }
200             fputc('\n', out_);  // blank line after each block
201           }
202         }
203 
204         // Write abstract-state sequence or profile to outfile
205         if (opts_.outformat == "seq") {
206           if (!opts_.outfile.empty()) {
207             WriteStateSequence(as_seq, opts_.outfile, false);
208           }
209           if (!opts_.appendfile.empty()) {
210             WriteStateSequence(as_seq, opts_.appendfile, true);
211           }
212         } else {
213           if (!opts_.outfile.empty()) {
214             WriteStateProfile(as_profile, opts_.outfile, false);
215           }
216           if (!opts_.appendfile.empty()) {
217             WriteStateProfile(as_profile, opts_.appendfile, true);
218           }
219         }
220       }
221       else {
222         const bool isCa3m = this->opts_.informat == "ca3m";
223 
224         std::string input_data_file = opts_.infile + ".ffdata";
225         std::string input_index_file = opts_.infile + ".ffindex";
226 
227         FFindexDatabase *header_db = NULL;
228         FFindexDatabase *sequence_db = NULL;
229 
230         if (isCa3m) {
231           // infile has to be the ffindex basepath with no suffices
232           input_data_file = this->opts_.infile + "_ca3m.ffdata";
233           input_index_file = this->opts_.infile + "_ca3m.ffindex";
234 
235           std::string input_header_data_file = this->opts_.infile + "_header.ffdata";
236           std::string input_header_index_file = this->opts_.infile + "_header.ffindex";
237           header_db = new FFindexDatabase(const_cast<char *>(input_header_data_file.c_str()),
238                                           const_cast<char *>(input_header_index_file.c_str()), false);
239 
240 
241           std::string input_sequence_data_file = this->opts_.infile + "_sequence.ffdata";
242           std::string input_sequence_index_file = this->opts_.infile + "_sequence.ffindex";
243           sequence_db = new FFindexDatabase(const_cast<char *>(input_sequence_data_file.c_str()),
244                                             const_cast<char *>(input_sequence_index_file.c_str()), false);
245 
246         }
247 
248         FFindexDatabase input(const_cast<char *>(input_data_file.c_str()),
249                               const_cast<char *>(input_index_file.c_str()), isCa3m);
250         input.ensureLinearAccess();
251 
252         //prepare output ffindex cs219 database
253         std::string output_data_file = opts_.outfile + ".ffdata";
254         std::string output_index_file = opts_.outfile + ".ffindex";
255 
256         FILE *output_data_fh = fopen(output_data_file.c_str(), "w");
257         FILE *output_index_fh = fopen(output_index_file.c_str(), "w");
258 
259         if (output_data_fh == NULL) {
260           LOG(ERROR) << "Could not open ffindex output data file! (" << output_data_file << ")!" << std::endl;
261           exit(1);
262         }
263 
264         if (output_index_fh == NULL) {
265           LOG(ERROR) << "Could not open ffindex output index file! (" << output_index_file << ")!" << std::endl;
266           exit(1);
267         }
268 
269         size_t output_offset = 0;
270 
271         size_t input_range_start = 0;
272         size_t input_range_end = input.db_index->n_entries;
273 
274         // Foreach entry
275         #pragma omp parallel for shared(input, sequence_db, header_db)
276         for (size_t entry_index = input_range_start; entry_index < input_range_end; entry_index++) {
277           ffindex_entry_t *entry = ffindex_get_entry_by_index(input.db_index, entry_index);
278 
279           if (entry == NULL) {
280             LOG(WARNING) << "Could not open entry " << entry_index << " from input ffindex!" << std::endl;
281             continue;
282           }
283 
284           if (opts_.verbose) {
285             fprintf(out_, "Processing entry: %s\n", entry->name);
286           }
287 
288           std::ostringstream a3m_buffer;
289           std::string a3m_string;
290           FILE *inf;
291           if (isCa3m) {
292             char *data = ffindex_get_data_by_entry(input.db_data, entry);
293 
294             compressed_a3m::extract_a3m(data, entry->length,
295                                         sequence_db->db_index, sequence_db->db_data,
296                                         header_db->db_index, header_db->db_data,
297                                         &a3m_buffer);
298 
299             a3m_string = a3m_buffer.str();
300 
301             inf = fmemopen(static_cast<void *>(const_cast<char *>(a3m_string.c_str())), a3m_string.length(), "r");
302           } else {
303             inf = ffindex_fopen_by_entry(input.db_data, entry);
304           }
305 
306           if (inf == NULL) {
307             LOG(WARNING) << "Could not open input entry (" << entry->name << ")!" << std::endl;
308             continue;
309           }
310 
311           string header;
312           CountProfile<Abc> profile;  // input profile we want to translate
313           try {
314             ReadProfile(inf, header, profile);
315           } catch (const Exception &e) {
316             fprintf(out_, "Could not read entry: %s, Message: %s\n", entry->name, e.what());
317             continue;
318           }
319 
320           size_t profile_counts_length = profile.counts.length();
321 
322           CountProfile<AS219> as_profile(profile_counts_length);  // output profile
323           Translate(profile, as_profile);
324 
325           // Prepare abstract sequence in AS219 format
326           Sequence<AS219> as_seq(profile_counts_length);
327           as_seq.set_header(header);
328           BuildSequence(as_profile, profile_counts_length, as_seq);
329 
330           std::stringstream out_buffer;
331 
332           if (opts_.outformat == "seq") {
333             WriteStateSequence(as_seq, out_buffer);
334           } else {
335             WriteStateProfile(as_profile, out_buffer);
336           }
337 
338           std::string out_string = out_buffer.str();
339           #pragma omp critical
340           {
341             ffindex_insert_memory(output_data_fh, output_index_fh, &output_offset,
342                                   const_cast<char *>(out_string.c_str()),
343                                   out_string.size(), entry->name);
344           }
345 
346           // FIXME: we are leaking inf, but if we fclose we get weird crashes
347           //fclose(inf);
348 
349         }
350 
351         fclose(output_index_fh);
352         fclose(output_data_fh);
353 
354 
355         ffsort_index(output_index_file.c_str());
356 
357         if (isCa3m) {
358           delete sequence_db;
359           delete header_db;
360         }
361       }
362 
363       return 0;
364     };
365 
366     // Parses command line options.
ParseOptions(GetOpt_pp & ops)367     virtual void ParseOptions(GetOpt_pp &ops) {
368       ops >> Option('i', "infile", opts_.infile, opts_.infile);
369       ops >> Option('o', "outfile", opts_.outfile, opts_.outfile);
370       ops >> Option('a', "appendfile", opts_.appendfile, opts_.appendfile);
371       ops >> Option('I', "informat", opts_.informat, opts_.informat);
372       ops >> Option('O', "outformat", opts_.outformat, opts_.outformat);
373       ops >> Option('M', "match-assign", opts_.match_assign, opts_.match_assign);
374       ops >> Option('x', "pc-admix", opts_.pc_admix, opts_.pc_admix);
375       ops >> Option('c', "pc-ali", opts_.pc_ali, opts_.pc_ali);
376       ops >> Option('A', "alphabet", opts_.alphabetfile, opts_.alphabetfile);
377       ops >> Option('D', "context-data", opts_.modelfile, opts_.modelfile);
378       ops >> Option('w', "weight", opts_.weight_as, opts_.weight_as);
379       ops >> OptionPresent('f', "ffindex", opts_.ffindex);
380       ops >> Option('v', "verbose", opts_.verbose, opts_.verbose);
381 
382       opts_.Validate();
383 
384       if (opts_.outfile.compare("stdout") == 0)
385         opts_.verbose = false;
386 
387       if (opts_.outfile.empty() && opts_.appendfile.empty())
388         opts_.outfile = GetBasename(opts_.infile, false) + ".as";
389       if (opts_.informat == "auto")
390         opts_.informat = GetFileExt(opts_.infile);
391     };
392 
393     // Prints options summary to stream.
PrintOptions()394     virtual void PrintOptions() const {
395       fprintf(out_, "  %-30s %s\n", "-i, --infile <file>",
396               "Input file with alignment or sequence");
397       fprintf(out_, "  %-30s %s\n", "-o, --outfile <file>",
398               "Output file for generated abstract state sequence (def: <infile>.as)");
399       fprintf(out_, "  %-30s %s\n", "-a, --append <file>", "Append generated abstract state sequence to this file");
400       fprintf(out_, "  %-30s %s (def=%s)\n", "-I, --informat prf|seq|fas|...",
401               "Input format: prf, seq, fas, a2m, a3m or ca3m", opts_.informat.c_str());
402       fprintf(out_, "  %-30s %s (def=%s)\n", "-O, --outformat seq|prf", "Outformat: abstract state sequence or profile",
403               opts_.outformat.c_str());
404       fprintf(out_, "  %-30s %s\n", "-M, --match-assign [0:100]",
405               "Make all FASTA columns with less than X% gaps match columns");
406       fprintf(out_, "  %-30s %s\n", "", "(def: make columns with residue in first sequence match columns)");
407       fprintf(out_, "  %-30s %s (def=internal)\n", "-A, --alphabet <file>",
408               "Abstract state alphabet consisting of exactly 219 states");
409       fprintf(out_, "  %-30s %s (def=internal)\n", "-D, --context-data <file>",
410               "Add context-specific pseudocounts using given context-data");
411       fprintf(out_, "  %-30s %s (def=%-.2f)\n", "-x, --pc-admix [0,1]",
412               "Pseudocount admix for context-specific pseudocounts", opts_.pc_admix);
413       fprintf(out_, "  %-30s %s (def=%-.1f)\n", "-c, --pc-ali [0,inf[",
414               "Constant in pseudocount calculation for alignments", opts_.pc_ali);
415       fprintf(out_, "  %-30s %s (def=%-.2f)\n", "-w, --weight [0,inf[",
416               "Weight of abstract state column in emission calculation", opts_.weight_as);
417       fprintf(out_, "  %-30s %s (def=off)\n", "-f, --ffindex",
418               "Read from -i <ffindex>, write to -o <ffindex> (do not include _ca3m suffix for ca3m informat); enables openmp if possible");
419     };
420 
421     // Prints short application description.
PrintBanner()422     virtual void PrintBanner() const {
423       fputs("Translate a sequence/alignment into an abstract state alphabet.\n", out_);
424     };
425 
426     // Prints usage banner to stream.
PrintUsage()427     virtual void PrintUsage() const {
428       fputs("Usage: cstranslate -i <infile> [options]\n", out_);
429     };
430 
431     // Setup pseudocount engine
SetupPseudocountEngine()432     void SetupPseudocountEngine() {
433       if (opts_.modelfile.empty())
434         return;
435 
436       std::string engine;
437       FILE *fin;
438       if (opts_.modelfile == "internal") {
439 //        fin = fmemopen((void*)context_data_crf, context_data_crf_len, "r");
440 //        engine = "crf";
441           fin = fmemopen((void*)context_data_lib, context_data_lib_len, "r");
442           engine = "lib";
443       } else {
444         fin = fopen(opts_.modelfile.c_str(), "r");
445         engine = GetFileExt(opts_.modelfile);
446       }
447       if (!fin) {
448         throw Exception("Unable to read file '" + opts_.modelfile  + "'!\n");
449       }
450 
451       if (engine == "lib") {
452         // Older generative approach by Biegert & Soeding PNAS 2009
453         if (opts_.verbose) {
454           fprintf(out_, "Reading context library for pseudocounts from %s ...\n", GetBasename(opts_.modelfile).c_str());
455         }
456         pc_lib_.reset(new ContextLibrary<Abc>(fin));
457         TransformToLog(*pc_lib_);
458         pc_.reset(new LibraryPseudocounts<Abc>(*pc_lib_, opts_.weight_center,opts_.weight_decay));
459       } else if (engine == "crf") {
460         // Newer discriminative approach by Angermueller & Soeding Bioinformatics 2012
461         if (opts_.verbose) {
462           fprintf(out_, "Reading CRF for pseudocounts from %s ...\n", GetBasename(opts_.modelfile).c_str());
463         }
464         pc_crf_.reset(new Crf<Abc>(fin));
465         pc_.reset(new CrfPseudocounts<Abc>(*pc_crf_));
466       } else {
467         throw Exception("Invalid pseudocount engine: " + engine);
468       }
469       fclose(fin);
470     };
471 
472     // Setup abstract state engine
SetupAbstractStateEngine()473     void SetupAbstractStateEngine() {
474       FILE *fin;
475       if (opts_.alphabetfile == "internal") {
476         fin = fmemopen((void*)cs219_lib, cs219_lib_len, "r");
477       } else {
478         fin = fopen(opts_.alphabetfile.c_str(), "r");
479       }
480       if (!fin) {
481         throw Exception("Unable to read file '" + opts_.alphabetfile  + "'!\n");
482       }
483       if (opts_.verbose) {
484         fprintf(out_, "Reading abstract state alphabet from %s ...\n", GetBasename(opts_.alphabetfile).c_str());
485       }
486 
487       as_lib_.reset(new ContextLibrary<Abc>(fin));
488 
489       if (as_lib_->size() != AS219::kSize) {
490         fprintf(out_, "Abstract state alphabet should have %zu states but actually has %zu states!\n", AS219::kSize, as_lib_->size());
491         exit(1);
492       }
493 
494       if (static_cast<int>(as_lib_->wlen()) != 1) {
495         fprintf(out_, "Abstract state alphabet should have a window length of %d but actually has %zu!\n", 1, as_lib_->wlen());
496         exit(1);
497       }
498 
499       TransformToLog(*as_lib_);
500       fclose(fin);
501     };
502 
SetupEmissions()503     void SetupEmissions() {
504       emission_.reset(new Emission<Abc>(1, this->opts_.weight_as, 1.0));
505     }
506 
507     // Writes abstract state sequence to outfile
508     void WriteStateSequence(const Sequence<AS219> &seq, string outfile, bool append = false) const {
509       FILE *fout;
510       if (outfile.compare("stdout") == 0)
511         fout = stdout;
512       else
513         fout = fopen(outfile.c_str(), append ? "a" : "w");
514       if (!fout) throw Exception("Can't %s to file '%s'!", append ? "append" : "write", outfile.c_str());
515       for (size_t i = 0; i < seq.length(); ++i) {
516         fputc((char) seq[i], fout);
517       }
518       fclose(fout);
519       if (opts_.verbose)
520         fprintf(out_, "%s abstract state sequence to %s\n", append ? "Appended" : "Wrote", outfile.c_str());
521     };
522 
WriteStateSequence(const Sequence<AS219> & seq,std::stringstream & ss)523     void WriteStateSequence(const Sequence<AS219> &seq, std::stringstream &ss) {
524       for (size_t i = 0; i < seq.length(); ++i) {
525         ss.put((char) seq[i]);
526       }
527     };
528 
529     // Writes abstract state profile to outfile
530     void WriteStateProfile(const CountProfile<AS219> &prof, string outfile, bool append = false) const {
531       FILE *fout;
532       if (outfile.compare("stdout") == 0)
533         fout = stdout;
534       else
535         fout = fopen(outfile.c_str(), append ? "a" : "w");
536       if (!fout) throw Exception("Can't %s to output file '%s'!", append ? "append" : "write", outfile.c_str());
537       prof.Write(fout);
538       fclose(fout);
539       if (opts_.verbose)
540         fprintf(out_, "%s abstract state count profile to %s\n", append ? "Appended" : "Wrote", outfile.c_str());
541     };
542 
WriteStateProfile(const CountProfile<AS219> & prof,std::stringstream & ss)543     void WriteStateProfile(const CountProfile<AS219> &prof, std::stringstream &ss) {
544       prof.Write(ss);
545     };
546 
ReadProfile(FILE * fin,string & header,CountProfile<Abc> & profile)547     void ReadProfile(FILE *fin, string &header, CountProfile<Abc> &profile) {
548       if (opts_.informat == "prf") {  // read count profile from infile
549         profile = CountProfile<Abc>(fin);;
550         if (profile.name.empty()) header = GetBasename(opts_.infile, false);
551         else header = profile.name;
552 
553         if (pc_) {
554           if (opts_.verbose)
555             fprintf(out_, "Adding cs-pseudocounts (admix=%.2f) ...\n", opts_.pc_admix);
556           CSBlastAdmix admix(opts_.pc_admix, opts_.pc_ali);
557           profile.counts = pc_->AddTo(profile, admix);
558           Normalize(profile.counts, profile.neff);
559         }
560 
561       } else if (opts_.informat == "seq") {  // build profile from sequence
562         Sequence<Abc> seq(fin);
563         header = seq.header();
564         profile = CountProfile<Abc>(seq);
565 
566         if (pc_) {
567           if (opts_.verbose)
568             fprintf(out_, "Adding cs-pseudocounts (admix=%.2f) ...\n", opts_.pc_admix);
569           ConstantAdmix admix(opts_.pc_admix);
570           profile.counts = pc_->AddTo(seq, admix);
571         }
572 
573       } else {  // build profile from alignment
574         AlignmentFormat f = AlignmentFormatFromString(opts_.informat);
575         Alignment<Abc> ali(fin, f);
576         header = ali.name();
577 
578         if (f == FASTA_ALIGNMENT) {
579           if (opts_.match_assign == CSTranslateAppOptions::kAssignMatchColsByQuery)
580             ali.AssignMatchColumnsBySequence(0);
581           else
582             ali.AssignMatchColumnsByGapRule(opts_.match_assign);
583         }
584         profile = CountProfile<Abc>(ali);
585 
586         if (pc_) {
587           if (opts_.verbose)
588             fprintf(out_, "Adding cs-pseudocounts (admix=%.2f) ...\n", opts_.pc_admix);
589           CSBlastAdmix admix(opts_.pc_admix, opts_.pc_ali);
590           profile.counts = pc_->AddTo(profile, admix);
591           Normalize(profile.counts, profile.neff);
592         }
593       }
594       fclose(fin);  // close input file
595     };
596 
Translate(CountProfile<Abc> & profile,CountProfile<cs::AS219> & as_profile)597     void Translate(CountProfile<Abc> &profile, CountProfile<cs::AS219> &as_profile) {
598       for (size_t i = 0; i < as_profile.length(); ++i) {
599         CalculatePosteriorProbs(*as_lib_, *emission_, profile, i, as_profile.counts[i]);
600       }
601       as_profile.name = GetBasename(opts_.infile, false);
602       as_profile.name = as_profile.name.substr(0, as_profile.name.length() - 1);
603     };
604 
BuildSequence(CountProfile<AS219> & as_profile,size_t profile_counts_length,Sequence<cs::AS219> & as_seq)605     void BuildSequence(CountProfile<AS219> &as_profile, size_t profile_counts_length, Sequence<cs::AS219> &as_seq) {
606       // We also build an abstract state sequence, either just for pretty printing or
607       // even because this is the actual output that the user wants
608       for (size_t i = 0; i < profile_counts_length; ++i) {
609         // Find state with maximal posterior prob and assign it to as_seq[i]
610         size_t k_max = 0;
611         double p_max = as_profile.counts[i][0];
612         for (size_t k = 1; k < AS219::kSize; ++k) {
613           if (as_profile.counts[i][k] > p_max) {
614             k_max = k;
615             p_max = as_profile.counts[i][k];
616           }
617         }
618         as_seq[i] = k_max;
619       }
620     };
621 
622     // Parameter wrapper
623     CSTranslateAppOptions opts_;
624     // Profile library with abstract states
625     scoped_ptr<ContextLibrary<Abc> > as_lib_;
626     // Profile library for context pseudocounts
627     scoped_ptr<ContextLibrary<Abc> > pc_lib_;
628     // CRF for CRF context pseudocounts
629     scoped_ptr<Crf<Abc> > pc_crf_;
630     // Pseudocount engine
631     scoped_ptr<Pseudocounts<Abc> > pc_;
632     // Emissions
633     scoped_ptr<Emission<Abc> > emission_;
634   };  // class CSTranslateApp
635 
636 }  // namespace cs
637