1#!/bin/sh -
2
3# This script will send a file to netscape with database IDs linked to SRS.
4# To customize this script edit the $DATABASES and $SRS_SERVER variable in the
5# perl code below and the NETSCAPE variable at the top of the script.
6
7# To enable this in Artemis, sanger_options must be set to true in the options
8# file.
9
10# Default browser is mozilla
11# If you are under KDE, you should use Konqueror then
12# If you are under MAC OS X, use Safari
13
14if [ `uname` = Darwin ]; then
15  NETSCAPE=/usr/bin/open
16else
17  for f in /usr/bin/x-www-browser \
18         /usr/bin/X11/real-netscape \
19         /usr/bin/firefox \
20         /usr/bin/iceweasel \
21         /usr/bin/mozilla; do
22    if [ -f $f ]; then
23      NETSCAPE=$f
24      break
25    fi
26  done
27fi
28
29if [ ! -f "$NETSCAPE" ]; then
30    NETSCAPE=`which firefox`
31fi
32
33if [ -f "$DIANA_ENVIRONMENT_FILE" ]
34then
35   . $DIANA_ENVIRONMENT_FILE
36fi
37
38if [ $# = 0 ]
39then
40    echo no argument given 1>&2
41    exit 1
42fi
43
44file_arg=$1
45
46unique_bit=$$.`hostname`
47
48# sanger hack:
49file_arg=`echo $file_arg | sed 's@^/tmp_mnt/nfs/@/nfs/@' | sed 's@^/tmp_mnt/tmp_nfs/@/nfs/@'`
50# fix for pcs3:
51file_arg=`echo $file_arg | sed 's@^/yeastpub4/@/nfs/disk222/yeastpub4/@'`
52# fix for pcs4:
53file_arg=`echo $file_arg | sed 's@^/.automount/pcs3/root/nfs/@/nfs/@'`
54file_arg=`echo $file_arg | sed 's@^/.automount/evs-users2/root/@/nfs/@'`
55
56if [ -f ./$file_arg ]
57then
58    # the file is in the current directory - we need the full path so netscape
59    # can find the file
60    new_file=$PWD/$file_arg.$unique_bit.html
61else
62    new_file=$file_arg.$unique_bit.html
63fi
64
65###
66# sanger fix for /nfs/repository which isn't mounted on workstations:
67REPOSITORY=`echo $new_file | sed -n  -e 's|^\/nfs\/repository\/\(.*\)\(\/\)\(.*\)$|\3|p'`
68
69if [ "$REPOSITORY" != "" ]; then
70
71  if [ ! -d $HOME/artemis_tmp ]; then
72    mkdir $HOME/artemis_tmp
73  fi
74  new_file="$HOME/artemis_tmp/$REPOSITORY"
75
76fi
77#
78###
79
80cat <<EOF > $new_file
81<HTML>
82 <HEAD>
83  <TITLE>
84  Results for $file_arg
85 </TITLE>
86 </HEAD>
87 <BODY>
88<PRE>
89EOF
90
91perl -e '
92BEGIN {
93  # change these variable to list the databases to search for the IDs - the
94  # database names should be separated by spaces
95  $PROTEIN_DATABASES = "uniprot";
96  $DNA_DATABASES = "embl";
97
98  # change this to point to the wgetz script of your SRS server
99  #$SRS_SERVER = "www.sanger.ac.uk/srs6bin/cgi-bin/wgetz?-e+";
100  #$SRS_SERVER = "srs.ebi.ac.uk/srsbin/cgi-bin/wgetz?-e+";
101  $SRS_SERVER = "www.bioinformatics.nl/srsbin/cgi-bin/wgetz?-e+";
102
103  $PROTEIN_DATABASES =~ s/ /%20/g;
104  $DNA_DATABASES =~ s/ /%20/g;
105
106  %GENEDB_PATTERNS = (tryp => q!Tb\d+\.\d+\.\d+|TRYP_\S+!,
107                      leish => q!LmjF\d+.\d+!);
108
109  $GENEDB_PATTERN = join ("|", values %GENEDB_PATTERNS);
110
111  $BLAST_START_LINE = "Sequences producing High-scoring Segment Pairs|" .
112    "Sequences producing significant alignments:";
113  $FASTA_START_LINE = "The best scores are";
114
115  # the list of IDs we have seen so far
116  @ids = ();
117
118  # the list of IDs we have made anchors for so far
119  @anchored_ids = ();
120
121  $db_type = "unknown";
122}
123
124sub hyperlink_to_anchor
125{
126  $id = shift;
127  qq(<a href="#$id">$id</a>);
128}
129
130sub hyperlink_id
131{
132  $id = shift;
133
134  if ($db_type eq "dna") {
135    $r = qq#<a href="http://$SRS_SERVER\[\{$DNA_DATABASES\}-ID:$id*]|[\{$DNA_DATABASES\}-AccNumber:$id*]">$id</a>#;
136  } else {
137    for my $org (keys %GENEDB_PATTERNS) {
138      my $pattern = $GENEDB_PATTERNS{$org};
139
140      if ($id =~ /$pattern/) {
141        $r = qq#<a href="http://www.genedb.org/genedb/Search?organism=$org&name=$id">$id</a>#;
142        return $r
143      }
144    }
145
146    # Text Entry
147    $r = qq#<a href="http://www.uniprot.org/uniprot/$id">$id</a>#;
148
149    #$r = qq#<a href="http://$SRS_SERVER-id+1+\[\{$PROTEIN_DATABASES\}-ID:$id*]|[\{$PROTEIN_DATABASES\}-AccNumber:$id*]+-vn+2">$id</a>#;
150    #$r = qq#<a href="http://$SRS_SERVER-id+1+\[\{$PROTEIN_DATABASES\}-ID:$id*]|[\{$PROTEIN_DATABASES\}-AccNumber:$id*]">$id</a>#;
151  }
152  return $r
153}
154
155$file_name = $ARGV[0];
156
157if ($file_name =~ /\.gz$/) {
158  open IN_FILE, "gzip -d < $file_name |" or die "failed to open $file_name\n";
159} else {
160  open IN_FILE, "$file_name" or die "failed to open $file_name\n";
161}
162
163while (<IN_FILE>) {
164  if ($. == 1) {
165    if (/^\s*([^\s]+)/) {
166      if (lc $1 eq "blastn" or lc $1 eq "tblastn" or lc $1 eq "tblastx") {
167        $db_type = "dna";
168      } else {
169        if (lc $1 eq "fasta" or lc $1 eq "blastp" or lc $1 eq "blastx") {
170          $db_type = "protein";
171        } else {
172          print "\nWARNING: could not identify file type: $1\n";
173        }
174      }
175    } else {
176      print "\nWARNING: could not identify file type\n";
177    }
178  }
179
180
181  # ignore header lines
182  if (1..m/$BLAST_START_LINE|$FASTA_START_LINE/) {
183    print;
184    next;
185  }
186
187  if (@ids && /^\s*$/) {
188    $summary_finished = 1;
189  }
190
191  if (/^>?\w+\|\w+\|(\w+)|^(?:(?:>?>?(?:[A-Z]+:)?)(\w+)|($GENEDB_PATTERN))|^(?:(?:>?>?(?:[A-Z]+:)?)(\w+)\\.\d+\s+) /) {
192
193    $id = $1;
194
195    if (!defined $id) {
196      $id = $2;
197    }
198
199    if ($summary_finished) {
200      if ((grep {$_ eq $id} @ids) && (!grep {$_ eq $id} @anchored_ids)) {
201        # not anchored yet so make it an anchor
202        if (s/\b$id\b/"<a name=\"$id\">" . (hyperlink_id($id)) . "<\/a>"/e) {
203          push @anchored_ids, $id;
204        }
205      }
206    } else {
207      if (!grep {$_ eq $id} @ids) {
208        push @ids, $id;
209      }
210
211      s/$id/hyperlink_to_anchor($id)/ei;
212
213#     if (!s/ $id/" " . hyperlink_id($id)/ei) {
214        # if the id occurs once in the line put a link at end of line
215        s/$/"  LINK:" . hyperlink_id($id)/e;
216#     }
217    }
218  }
219  print;
220}
221
222' $file_arg >> $new_file
223
224cat <<EOF >> $new_file;
225</PRE>
226  </BODY>
227</HTML>
228EOF
229
230# delete it at some point
231echo "rm -f $new_file > /dev/null 2>&1" | at now + 12 hours
232
233# MAC OS X => Safari browser
234# The command then is the following
235if [ -f "/usr/bin/open" -a `uname` = Darwin ]
236then
237  /usr/bin/open $new_file
238  exit 0
239elif [ -f "/Applications/Safari.app/Contents/MacOS/Safari" ]
240then
241  if $NETSCAPE $new_file
242  then
243    exit 0
244  fi
245# For Netscape or mozilla
246# Use openURL($new_file, new-tab) to get new-tab
247elif $NETSCAPE -remote "openURL($new_file)"
248then
249    sleep 1m
250    rm -f $new_file
251    exit 0
252else
253    echo starting new netscape 2>&1
254    # netscape isn't running - so start it
255    ($NETSCAPE &)&
256
257    # now send the URL.  we do things this way so that the script doesn't exit
258    # until netscape has successfully shown the URL
259
260    sleep 1
261
262    # don't exit the script until the file is successfully displayed or until
263    # 40 seconds is up
264    for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
265    do
266        if $NETSCAPE -remote "openURL($new_file)" 2> /dev/null
267        then
268	    sleep 1m
269	    rm -f $new_file
270            exit 0
271        else
272            sleep 2
273        fi
274    done
275
276    exit 1
277fi
278