1 /****************************************************************\
2 *                                                                *
3 *  esd2esi - generate an exonerate index file from a dataset     *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #include "argument.h"
17 #include "dataset.h"
18 #include "index.h"
19 
Argument_main(Argument * arg)20 int Argument_main(Argument *arg){
21     register ArgumentSet *as
22            = ArgumentSet_create("Input and Output Options");
23     gchar *dataset_path, *index_path;
24     register Dataset *dataset;
25     register Index *index;
26     gboolean is_translated = FALSE;
27     register gint word_length;
28     gint dna_word_length, protein_word_length,
29          word_jump, word_ambiguity,
30          saturate_threshold, memory_limit;
31     /**/
32     ArgumentSet_add_option(as, 'd', "dataset", "path",
33         "Exonerate dataset file", NULL,
34         Argument_parse_string, &dataset_path);
35     ArgumentSet_add_option(as, 'o', "output", "path",
36         "Output path for .esi file", NULL,
37         Argument_parse_string, &index_path);
38     ArgumentSet_add_option(as, '\0', "translate", NULL,
39         "Translate the dataset for comparison with protein", "FALSE",
40         Argument_parse_boolean, &is_translated);
41     /**/
42     /* FIXME: tidy duplicated arguments also in libraries */
43     ArgumentSet_add_option(as, 0, "dnawordlen", "bp",
44         "Wordlength for DNA words", "12",
45         Argument_parse_int, &dna_word_length);
46     ArgumentSet_add_option(as, 0, "proteinwordlen", "aa",
47         "Wordlength for protein words", "5",
48         Argument_parse_int, &protein_word_length);
49     ArgumentSet_add_option(as, 0, "wordjump", NULL,
50         "Jump between database words", "1",
51         Argument_parse_int, &word_jump);
52     ArgumentSet_add_option(as, 0, "wordambiguity", NULL,
53         "Number of ambiguous words to index", "1",
54         Argument_parse_int, &word_ambiguity);
55     ArgumentSet_add_option(as, 0, "saturatethreshold", NULL,
56         "Word saturation threshold", "10",
57         Argument_parse_int, &saturate_threshold);
58     /**/
59     ArgumentSet_add_option(as, 0, "memorylimit", NULL,
60         "Memory limit for database indexing", "1024",
61         Argument_parse_int, &memory_limit);
62     /**/
63     Argument_absorb_ArgumentSet(arg, as);
64     Argument_process(arg, "esd2esi",
65         "generate an exonerate sequence index file\n"
66         "Guy St.C. Slater. guy@ebi.ac.uk. 2007.\n", NULL);
67     dataset = Dataset_read(dataset_path);
68     word_length = (is_translated
69                   || (dataset->alphabet->type == Alphabet_Type_PROTEIN))
70                 ? protein_word_length
71                 : dna_word_length;
72     if(word_ambiguity < 1)
73         g_error("Word ambiguity cannot be less than one.");
74     if((word_ambiguity > 1)
75     && (dataset->alphabet->type == Alphabet_Type_PROTEIN))
76         g_error("Protein ambuigity symbols not implemented");
77     g_message("Building index");
78     index = Index_create(dataset, is_translated, word_length,
79                          word_jump, word_ambiguity,
80                          saturate_threshold, index_path, dataset_path,
81                          memory_limit);
82     Index_destroy(index);
83     Dataset_destroy(dataset);
84     g_message("-- completed");
85     return 0;
86     }
87 
88 /**/
89 
90