1#!/usr/local/bin/perl -w
2# $Id: convert_scoremat.pl 26434 2003-08-21 19:48:21Z ucko $
3
4use strict;
5
6use IO::File;
7use POSIX;
8
9my $HEADER = <<EOF;
10/*  \$Id\$
11* ===========================================================================
12*
13*                            PUBLIC DOMAIN NOTICE
14*               National Center for Biotechnology Information
15*
16*  This software/database is a "United States Government Work" under the
17*  terms of the United States Copyright Act.  It was written as part of
18*  the author\'s official duties as a United States Government employee and
19*  thus cannot be copyrighted.  This software/database is freely available
20*  to the public for use. The National Library of Medicine and the U.S.
21*  Government have not placed any restriction on its use or reproduction.
22*
23*  Although all reasonable efforts have been taken to ensure the accuracy
24*  and reliability of the software and data, the NLM and the U.S.
25*  Government do not and cannot warrant the performance or results that
26*  may be obtained by using this software or data. The NLM and the U.S.
27*  Government disclaim all warranties, express or implied, including
28*  warranties of performance, merchantability or fitness for any particular
29*  purpose.
30*
31*  Please cite the author in any work or product based on this material.
32*
33* ===========================================================================
34*
35* Author:  Aaron Ucko (via $0)
36*
37* File Description:
38*   Protein alignment score matrices; shared between the two toolkits.
39*
40* ===========================================================================
41*/
42
43#include <util/tables/raw_scoremat.h>
44
45EOF
46
47foreach my $filename (@ARGV) {
48    my $in = new IO::File($filename);
49    if ( !$in ) {
50	warn "Unable to open $filename: $!";
51	next;
52    }
53    my $varbase = $filename;
54    $varbase =~ s:.*/::;
55    $varbase =~ s/([A-Z])([A-Z]+)/$1\L$2/g;
56    my $outfn = "sm_\L$varbase.c";
57    my $out = new IO::File(">$outfn");
58    if ( !$out ) {
59	warn "Unable to open $outfn: $!";
60	next;
61    }
62    print $out $HEADER;
63    my @symbols;
64    my $i;
65    my $n;
66    my $width; # score entries per line
67    my $min;
68    while (<$in>) {
69	if (s/\# *(.*)//  &&  $1) {
70	    print $out '/* ', $1, " */\n";
71	}
72	my @elts = split;
73	next unless @elts;
74	if (defined @symbols  &&  @symbols) {
75	    if ($elts[0] ne $symbols[$i]) {
76		warn "$filename:$.: Expected $symbols[$i] but got $elts[0]";
77	    }
78	    print $out "    /*$elts[0]*/ {";
79	    for (my $j = 0;  $j < $n;  ++$j) {
80		if ($j > 0  &&  !($j % $width)) {
81		    print $out "\n", ' ' x 11;
82		}
83		printf $out '%3d', $elts[$j+1];
84		if ( !defined($min)  ||  $min > $elts[$j+1]) {
85		    $min = $elts[$j+1];
86		}
87		if ($j == $n - 1) {
88		    print $out ' }';
89		    print $out ',' unless $i == $n - 1;
90		    print $out "\n";
91		} else {
92		    print $out ',';
93		}
94	    }
95	    ++$i;
96	} else {
97	    @symbols = @elts;
98	    $n = @symbols;
99	    $i = 0;
100	    print $out
101		"\nstatic const TNCBIScore s_${varbase}PSM[$n][$n] = {\n";
102	    my $rows = POSIX::ceil($n / 16);
103	    # Find the minimum width that yields the necessary number of rows.
104	    $width = POSIX::ceil($n / $rows);
105	    print $out '    /*     ';
106	    for (my $j = 0;  $j < $n;  ++$j) {
107		if ($j > 0  &&  !($j % $width)) {
108		    print $out "\n", ' ' x 11;
109		}
110		print $out '  ', $symbols[$j];
111		if ($j == $n - 1) {
112		    print $out " */\n";
113		} else {
114		    print $out ',';
115		}
116	    }
117	}
118    }
119    my $symstr = join '', @symbols;
120    print $out <<EOF;
121};
122const SNCBIPackedScoreMatrix NCBISM_$varbase = {
123    "$symstr",
124    s_${varbase}PSM[0],
125    $min
126};
127EOF
128}
129