1 /****************************************************************\
2 * *
3 * esd2esi - generate an exonerate index file from a dataset *
4 * *
5 * Guy St.C. Slater.. mailto:guy@ebi.ac.uk *
6 * Copyright (C) 2000-2009. All Rights Reserved. *
7 * *
8 * This source code is distributed under the terms of the *
9 * GNU General Public License, version 3. See the file COPYING *
10 * or http://www.gnu.org/licenses/gpl.txt for details *
11 * *
12 * If you use this code, please keep this notice intact. *
13 * *
14 \****************************************************************/
15
16 #include "argument.h"
17 #include "dataset.h"
18 #include "index.h"
19
Argument_main(Argument * arg)20 int Argument_main(Argument *arg){
21 register ArgumentSet *as
22 = ArgumentSet_create("Input and Output Options");
23 gchar *dataset_path, *index_path;
24 register Dataset *dataset;
25 register Index *index;
26 gboolean is_translated = FALSE;
27 register gint word_length;
28 gint dna_word_length, protein_word_length,
29 word_jump, word_ambiguity,
30 saturate_threshold, memory_limit;
31 /**/
32 ArgumentSet_add_option(as, 'd', "dataset", "path",
33 "Exonerate dataset file", NULL,
34 Argument_parse_string, &dataset_path);
35 ArgumentSet_add_option(as, 'o', "output", "path",
36 "Output path for .esi file", NULL,
37 Argument_parse_string, &index_path);
38 ArgumentSet_add_option(as, '\0', "translate", NULL,
39 "Translate the dataset for comparison with protein", "FALSE",
40 Argument_parse_boolean, &is_translated);
41 /**/
42 /* FIXME: tidy duplicated arguments also in libraries */
43 ArgumentSet_add_option(as, 0, "dnawordlen", "bp",
44 "Wordlength for DNA words", "12",
45 Argument_parse_int, &dna_word_length);
46 ArgumentSet_add_option(as, 0, "proteinwordlen", "aa",
47 "Wordlength for protein words", "5",
48 Argument_parse_int, &protein_word_length);
49 ArgumentSet_add_option(as, 0, "wordjump", NULL,
50 "Jump between database words", "1",
51 Argument_parse_int, &word_jump);
52 ArgumentSet_add_option(as, 0, "wordambiguity", NULL,
53 "Number of ambiguous words to index", "1",
54 Argument_parse_int, &word_ambiguity);
55 ArgumentSet_add_option(as, 0, "saturatethreshold", NULL,
56 "Word saturation threshold", "10",
57 Argument_parse_int, &saturate_threshold);
58 /**/
59 ArgumentSet_add_option(as, 0, "memorylimit", NULL,
60 "Memory limit for database indexing", "1024",
61 Argument_parse_int, &memory_limit);
62 /**/
63 Argument_absorb_ArgumentSet(arg, as);
64 Argument_process(arg, "esd2esi",
65 "generate an exonerate sequence index file\n"
66 "Guy St.C. Slater. guy@ebi.ac.uk. 2007.\n", NULL);
67 dataset = Dataset_read(dataset_path);
68 word_length = (is_translated
69 || (dataset->alphabet->type == Alphabet_Type_PROTEIN))
70 ? protein_word_length
71 : dna_word_length;
72 if(word_ambiguity < 1)
73 g_error("Word ambiguity cannot be less than one.");
74 if((word_ambiguity > 1)
75 && (dataset->alphabet->type == Alphabet_Type_PROTEIN))
76 g_error("Protein ambuigity symbols not implemented");
77 g_message("Building index");
78 index = Index_create(dataset, is_translated, word_length,
79 word_jump, word_ambiguity,
80 saturate_threshold, index_path, dataset_path,
81 memory_limit);
82 Index_destroy(index);
83 Dataset_destroy(dataset);
84 g_message("-- completed");
85 return 0;
86 }
87
88 /**/
89
90