1#!/usr/bin/perl 2 3 4=head1 NAME 5 6bp_bioflat_index.pl - index sequence files using Bio::DB::Flat 7 8=head1 DESCRIPTION 9 10Create or update a biological sequence database indexed with the 11Bio::DB::Flat indexing scheme. The arguments are a list of flat files 12containing the sequence information to be indexed. 13 14=head1 USAGE 15 16bp_bioflat_index.pl <options> file1 file2 file3... 17 18Options: 19 20--create Create or reinitialize the index. If not specified, 21the index must already exist. 22 23--format <format> The format of the sequence files. Must be one 24of "genbank", "swissprot", "embl" or "fasta". 25 26--location <path> Path to the directory in which the index files 27are stored. 28 29--dbname <name> The symbolic name of the database to be created. 30 31--indextype <type> Type of index to create. Either "bdb" or "flat". 32"binarysearch" is the same as "flat". 33 34Options can be abbreviated. For example, use -i for --indextype. 35 36The following environment variables will be used as defaults if the 37corresponding options are not provided: 38 39OBDA_FORMAT format of sequence file 40OBDA_LOCATION path to directory in which index files are stored 41OBDA_DBNAME name of database 42OBDA_INDEX type of index to create 43 44=cut 45 46use strict; 47use warnings; 48use Bio::Root::Root; 49use Bio::Root::IO; 50use Bio::DB::Flat; 51use Getopt::Long; 52use File::Path qw(mkpath rmtree); 53 54my ($CREATE,$FORMAT,$LOCATION,$DBNAME,$INDEXTYPE); 55 56GetOptions( 'create' => \$CREATE, 57'format:s' => \$FORMAT, 58'location:s' => \$LOCATION, 59'dbname:s' => \$DBNAME, 60'indextype:s' => \$INDEXTYPE ); 61 62$FORMAT = $ENV{OBDA_FORMAT} unless defined $FORMAT; 63$LOCATION = $ENV{OBDA_LOCATION} unless defined $LOCATION; 64$DBNAME = $ENV{OBDA_DBNAME} unless defined $DBNAME; 65$INDEXTYPE = $ENV{OBDA_INDEXTYPE} unless defined $INDEXTYPE; 66 67my $root = 'Bio::Root::Root'; 68my $io = 'Bio::Root::IO'; 69 70# confirm that database directory is there 71defined $LOCATION or 72 $root->throw("please provide a base directory with the --location option"); 73 74-d $LOCATION or 75 $root->throw("$LOCATION is not a valid directory; use --create to create a new index"); 76 77defined $DBNAME or 78 $root->throw("please provide a database name with the --dbname option"); 79 80defined $FORMAT or 81 $root->throw("please specify the format for the input files with the --format option"); 82 83unless (defined $INDEXTYPE) { 84$INDEXTYPE = 'flat'; 85$root->warn('setting index type to "flat", use the --indextype option to override'); 86} 87 88# Confirm that database is there and that --create flag is sensible. 89my $path = $io->catfile($LOCATION,$DBNAME,'config.dat'); 90if (-e $path) { 91 if ($CREATE) { 92 $root->warn("existing index detected; deleting."); 93 rmtree($io->catfile($LOCATION,$DBNAME),1,1); 94 } else { 95 $root->warn("existing index detected; ignoring --indextype and --format options."); 96 undef $INDEXTYPE; 97 } 98} 99elsif (!$CREATE) { 100 $root->throw("Cannot find database config file at location $path; use --create to create a new index"); 101} 102 103# open for writing/updating 104my $db = Bio::DB::Flat->new(-directory => $LOCATION, 105-dbname => $DBNAME, 106$INDEXTYPE ? ( 107-index => $INDEXTYPE 108) 109: (), 110-write_flag => 1, 111-format => $FORMAT) or 112 $root->throw("can't create Bio::DB::Flat object"); 113 114my $entries = $db->build_index(@ARGV); 115 116print STDERR "(Re)indexed $entries entries.\n "; 117 118__END__ 119