1 /****************************************************************\
2 *                                                                *
3 *  Library for manipulation of exonerate index files             *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #ifndef INCLUDED_INDEX_H
17 #define INCLUDED_INDEX_H
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif /* __cplusplus */
22 
23 #include <stdio.h>
24 #include <glib.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 
28 #ifdef USE_PTHREADS
29 #include <pthread.h>
30 #endif /* USE_PTHREADS */
31 
32 #include "dataset.h"
33 #include "vfsm.h"
34 #include "hspset.h"
35 #include "bitarray.h"
36 
37 /* File format:
38    Header
39    dataset path\n
40    FW Strand
41    RV Strand (if translated)
42 
43    Strand:
44        Strand header
45        WordList:
46            For each word
47                word_id <MW>
48                freq_count <MI>
49                index_offset <TI>
50        Index:
51            sequence <NS>
52            pos <MS> (from dataset)
53 */
54 
55 typedef struct {
56     guint64  magic;
57     guint64  version;
58     guint64  type;                /* plain | trans */
59     guint64  dataset_path_len;
60     /**/
61     guint64  word_length;
62     guint64  word_jump;
63     guint64  word_ambiguity;
64     guint64  saturate_threshold;
65 } Index_Header;
66 
67 typedef struct {
68     gint  max_word_width;       /* From vfsm->lrw : MW */
69     gint  number_of_seqs_width; /* From dataset->header->number_of_seqs : NS */
70 } Index_Width;
71 
72 typedef struct {
73     gint sequence_id;
74     gint position;
75 } Index_Address;
76 
77 typedef struct {
78       gint freq_count;
79     gint64 index_offset;
80 } Index_Word;
81 
82 typedef struct {
83     guint64 max_index_length;    /* Filled by Index_survey_word_list() */
84     guint64 word_list_length;    /* Filled by Index_survey_word_list() */
85     guint64 total_index_length;  /* Filled by Index_find_offsets()     */
86 } Index_Strand_Header;
87 
88 typedef struct {
89     gint  max_index_len_width;    /* From   max_index_length : MI */
90     gint  total_index_len_width;  /* From total_index_length : TI */
91 } Index_Strand_Width;
92 
93 typedef struct {
94      Index_Strand_Header  header;
95       Index_Strand_Width  width;
96                     /**/
97                     gint *word_table; /* VFSM array */
98               Index_Word *word_list;
99                    off_t  strand_offset; /* Offset to strand header */
100                 BitArray *index_cache;
101 } Index_Strand;
102 
103 typedef struct {
104               guint  ref_count;
105                FILE *fp;
106               gchar *dataset_path;
107             Dataset *dataset;
108        Index_Header *header;
109                VFSM *vfsm;
110         Index_Width *width;
111               /**/
112        Index_Strand *forward;
113        Index_Strand *revcomp; /* Only used when index is translated */
114 #ifdef USE_PTHREADS
115      pthread_mutex_t index_mutex;
116 #endif /* USE_PTHREADS */
117 } Index;
118 
119 /**/
120 
121    Index *Index_create(Dataset *dataset, gboolean is_translated, gint word_length,
122                        gint word_jump, gint word_ambiguity, gint saturate_threshold,
123                        gchar *index_path, gchar *dataset_path, gint memory_limit);
124    Index *Index_share(Index *index);
125     void  Index_destroy(Index *index);
126     void  Index_info(Index *index);
127    Index *Index_open(gchar *path);
128  guint64  Index_memory_usage(Index *index);
129     void  Index_preload_index(Index *index);
130 gboolean  Index_check_filetype(gchar *path);
131 /* Returns TRUE when magic number is correct for this filetype */
132 
133 typedef struct {
134     HSPset *hsp_set;
135       gint  target_id;
136 } Index_HSPset;
137 
138 void Index_HSPset_destroy(Index_HSPset *index_hsp_set);
139 
140 GPtrArray *Index_get_HSPsets(Index *index, HSP_Param *hsp_param,
141                              Sequence *query, gboolean revcomp_target);
142 /* Returns a GPtrArray containing Index_HSPset structs */
143 
144 GPtrArray *Index_get_HSPsets_geneseed(Index *index, HSP_Param *hsp_param,
145                                   Sequence *query, gboolean revcomp_target,
146                                   gint geneseed_threshold, gint geneseed_repeat,
147                                   gint max_query_span, gint max_target_span);
148 
149 /**/
150 
151 #ifdef __cplusplus
152 }
153 #endif /* __cplusplus */
154 
155 #endif /* INCLUDED_INDEX_H */
156 
157