1#!/usr/local/bin/perl -w 2# $Id: convert_scoremat.pl 26434 2003-08-21 19:48:21Z ucko $ 3 4use strict; 5 6use IO::File; 7use POSIX; 8 9my $HEADER = <<EOF; 10/* \$Id\$ 11* =========================================================================== 12* 13* PUBLIC DOMAIN NOTICE 14* National Center for Biotechnology Information 15* 16* This software/database is a "United States Government Work" under the 17* terms of the United States Copyright Act. It was written as part of 18* the author\'s official duties as a United States Government employee and 19* thus cannot be copyrighted. This software/database is freely available 20* to the public for use. The National Library of Medicine and the U.S. 21* Government have not placed any restriction on its use or reproduction. 22* 23* Although all reasonable efforts have been taken to ensure the accuracy 24* and reliability of the software and data, the NLM and the U.S. 25* Government do not and cannot warrant the performance or results that 26* may be obtained by using this software or data. The NLM and the U.S. 27* Government disclaim all warranties, express or implied, including 28* warranties of performance, merchantability or fitness for any particular 29* purpose. 30* 31* Please cite the author in any work or product based on this material. 32* 33* =========================================================================== 34* 35* Author: Aaron Ucko (via $0) 36* 37* File Description: 38* Protein alignment score matrices; shared between the two toolkits. 39* 40* =========================================================================== 41*/ 42 43#include <util/tables/raw_scoremat.h> 44 45EOF 46 47foreach my $filename (@ARGV) { 48 my $in = new IO::File($filename); 49 if ( !$in ) { 50 warn "Unable to open $filename: $!"; 51 next; 52 } 53 my $varbase = $filename; 54 $varbase =~ s:.*/::; 55 $varbase =~ s/([A-Z])([A-Z]+)/$1\L$2/g; 56 my $outfn = "sm_\L$varbase.c"; 57 my $out = new IO::File(">$outfn"); 58 if ( !$out ) { 59 warn "Unable to open $outfn: $!"; 60 next; 61 } 62 print $out $HEADER; 63 my @symbols; 64 my $i; 65 my $n; 66 my $width; # score entries per line 67 my $min; 68 while (<$in>) { 69 if (s/\# *(.*)// && $1) { 70 print $out '/* ', $1, " */\n"; 71 } 72 my @elts = split; 73 next unless @elts; 74 if (defined @symbols && @symbols) { 75 if ($elts[0] ne $symbols[$i]) { 76 warn "$filename:$.: Expected $symbols[$i] but got $elts[0]"; 77 } 78 print $out " /*$elts[0]*/ {"; 79 for (my $j = 0; $j < $n; ++$j) { 80 if ($j > 0 && !($j % $width)) { 81 print $out "\n", ' ' x 11; 82 } 83 printf $out '%3d', $elts[$j+1]; 84 if ( !defined($min) || $min > $elts[$j+1]) { 85 $min = $elts[$j+1]; 86 } 87 if ($j == $n - 1) { 88 print $out ' }'; 89 print $out ',' unless $i == $n - 1; 90 print $out "\n"; 91 } else { 92 print $out ','; 93 } 94 } 95 ++$i; 96 } else { 97 @symbols = @elts; 98 $n = @symbols; 99 $i = 0; 100 print $out 101 "\nstatic const TNCBIScore s_${varbase}PSM[$n][$n] = {\n"; 102 my $rows = POSIX::ceil($n / 16); 103 # Find the minimum width that yields the necessary number of rows. 104 $width = POSIX::ceil($n / $rows); 105 print $out ' /* '; 106 for (my $j = 0; $j < $n; ++$j) { 107 if ($j > 0 && !($j % $width)) { 108 print $out "\n", ' ' x 11; 109 } 110 print $out ' ', $symbols[$j]; 111 if ($j == $n - 1) { 112 print $out " */\n"; 113 } else { 114 print $out ','; 115 } 116 } 117 } 118 } 119 my $symstr = join '', @symbols; 120 print $out <<EOF; 121}; 122const SNCBIPackedScoreMatrix NCBISM_$varbase = { 123 "$symstr", 124 s_${varbase}PSM[0], 125 $min 126}; 127EOF 128} 129