1#!/usr/bin/perl
2
3
4=head1 NAME
5
6bp_bioflat_index.pl - index sequence files using Bio::DB::Flat
7
8=head1 DESCRIPTION
9
10Create or update a biological sequence database indexed with the
11Bio::DB::Flat indexing scheme. The arguments are a list of flat files
12containing the sequence information to be indexed.
13
14=head1 USAGE
15
16bp_bioflat_index.pl <options> file1 file2 file3...
17
18Options:
19
20--create Create or reinitialize the index. If not specified,
21the index must already exist.
22
23--format <format> The format of the sequence files. Must be one
24of "genbank", "swissprot", "embl" or "fasta".
25
26--location <path> Path to the directory in which the index files
27are stored.
28
29--dbname <name> The symbolic name of the database to be created.
30
31--indextype <type> Type of index to create. Either "bdb" or "flat".
32"binarysearch" is the same as "flat".
33
34Options can be abbreviated. For example, use -i for --indextype.
35
36The following environment variables will be used as defaults if the
37corresponding options are not provided:
38
39OBDA_FORMAT format of sequence file
40OBDA_LOCATION path to directory in which index files are stored
41OBDA_DBNAME name of database
42OBDA_INDEX type of index to create
43
44=cut
45
46use strict;
47use warnings;
48use Bio::Root::Root;
49use Bio::Root::IO;
50use Bio::DB::Flat;
51use Getopt::Long;
52use File::Path qw(mkpath rmtree);
53
54my ($CREATE,$FORMAT,$LOCATION,$DBNAME,$INDEXTYPE);
55
56GetOptions( 'create' => \$CREATE,
57'format:s' => \$FORMAT,
58'location:s' => \$LOCATION,
59'dbname:s' => \$DBNAME,
60'indextype:s' => \$INDEXTYPE );
61
62$FORMAT = $ENV{OBDA_FORMAT} unless defined $FORMAT;
63$LOCATION = $ENV{OBDA_LOCATION} unless defined $LOCATION;
64$DBNAME = $ENV{OBDA_DBNAME} unless defined $DBNAME;
65$INDEXTYPE = $ENV{OBDA_INDEXTYPE} unless defined $INDEXTYPE;
66
67my $root = 'Bio::Root::Root';
68my $io = 'Bio::Root::IO';
69
70# confirm that database directory is there
71defined $LOCATION or
72  $root->throw("please provide a base directory with the --location option");
73
74-d $LOCATION or
75  $root->throw("$LOCATION is not a valid directory; use --create to create a new index");
76
77defined $DBNAME or
78  $root->throw("please provide a database name with the --dbname option");
79
80defined $FORMAT or
81  $root->throw("please specify the format for the input files with the --format option");
82
83unless (defined $INDEXTYPE) {
84$INDEXTYPE = 'flat';
85$root->warn('setting index type to "flat", use the --indextype option to override');
86}
87
88# Confirm that database is there and that --create flag is sensible.
89my $path = $io->catfile($LOCATION,$DBNAME,'config.dat');
90if (-e $path) {
91  if ($CREATE) {
92    $root->warn("existing index detected; deleting.");
93    rmtree($io->catfile($LOCATION,$DBNAME),1,1);
94  } else {
95    $root->warn("existing index detected; ignoring --indextype and --format options.");
96    undef $INDEXTYPE;
97  }
98}
99elsif (!$CREATE) {
100  $root->throw("Cannot find database config file at location $path; use --create to create a new index");
101}
102
103# open for writing/updating
104my $db = Bio::DB::Flat->new(-directory => $LOCATION,
105-dbname => $DBNAME,
106$INDEXTYPE ? (
107-index => $INDEXTYPE
108)
109: (),
110-write_flag => 1,
111-format => $FORMAT) or
112  $root->throw("can't create Bio::DB::Flat object");
113
114my $entries = $db->build_index(@ARGV);
115
116print STDERR "(Re)indexed $entries entries.\n ";
117
118__END__
119