1 /* ===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *            National Center for Biotechnology Information (NCBI)
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government do not place any restriction on its use or reproduction.
12 *  We would, however, appreciate having the NCBI and the author cited in
13 *  any work or product based on this material.
14 *
15 *  Although all reasonable efforts have been taken to ensure the accuracy
16 *  and reliability of the software and data, the NLM and the U.S.
17 *  Government do not and cannot warrant the performance or results that
18 *  may be obtained by using this software or data. The NLM and the U.S.
19 *  Government disclaim all warranties, express or implied, including
20 *  warranties of performance, merchantability or fitness for any particular
21 *  purpose.
22 *
23 * ===========================================================================
24 *
25 * File Name:  spidey.h
26 *
27 * Author:  Sarah Wheelan
28 *
29 * Version Creation Date:   5/01
30 *
31 * $Revision: 6.19 $
32 *
33 * File Description: mrna-to-genomic alignment algorithms and functions
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * $Log: spidey.h,v $
38 * Revision 6.19  2004/03/25 21:20:03  kskatz
39 * All SPI_is_acceptor_* functions have been corrected: 'N' no longer contributes to nor subtracts from the score, log odds are calculated and the scores added; they are however all antilogged because there are too many places in the code where the score is expected to be between 0 and 1.  Also, corrected sequence frequency determination in SPI_is_acceptor_user and SPI_is_donor_user, as well as correcting for 'N'. Finally, and this all began with, I added matrices for Dictyostelium - command line -r -m
40 *
41 * Revision 6.18  2003/04/04 19:42:56  kskatz
42 * Added a new command line option (-R) to allow external users to point spidey to a repeat database that it can pass on to blast for filtering repeats
43 *
44 * Revision 6.17  2002/10/02 16:12:54  kskatz
45 * Added a new option to SPI_Options (bigintron_size) that holds a user-supplied maximum size (default = 220000) for introns and requires the option (bool) bigintron to be set to 'TRUE'; The functions affected are SPI_mRNAPtr SPI_AdjustForSplice(), SPI_is_consistent(), and SPI_FindPiece(); note that the default for bigintron_size is not set in SPI_OptionsNew() (yet)
46 *
47 * Revision 6.16  2002/06/27 11:53:33  wheelan
48 * additions to structures to support off-by-one bug fixes and printing of the cds length
49 *
50 * Revision 6.15  2002/05/07 18:42:53  wheelan
51 * changes to support user-defined splice matrices
52 *
53 * Revision 6.14  2002/04/16 17:54:03  wheelan
54 * increased XLINTRON sizes
55 *
56 * Revision 6.13  2002/04/04 17:17:24  wheelan
57 * changed endfuzz to 8
58 *
59 * Revision 6.12  2002/01/30 19:08:51  wheelan
60 * better support for revcomp
61 *
62 * Revision 6.11  2001/12/18 18:00:18  wheelan
63 * add strand
64 *
65 * Revision 6.10  2001/11/20 12:13:28  wheelan
66 * made SPI_GetProteinFrommRNA EXTERN
67 *
68 * Revision 6.9  2001/11/05 16:14:53  wheelan
69 * added option to print multiple alignment to a file
70 *
71 * Revision 6.8  2001/10/04 12:34:07  wheelan
72 * added bigintron option
73 *
74 * Revision 6.7  2001/10/03 14:19:29  wheelan
75 * include new alignment manager
76 *
77 * Revision 6.6  2001/09/04 13:46:37  wheelan
78 * made SPI_RemoveInconsistentAlnsFromSet and SPI_flip_sa_list extern
79 *
80 * Revision 6.5  2001/08/24 13:44:35  wheelan
81 * changed printaln to Int4
82 *
83 * Revision 6.4  2001/08/06 16:49:25  wheelan
84 * changed revcompthresh parameter to 55 from 65
85 *
86 * Revision 6.3  2001/07/11 17:57:07  wheelan
87 * added typedefs for multiple alignments
88 *
89 * Revision 6.2  2001/07/10 16:44:42  wheelan
90 * added functions to make a multiple alignment
91 *
92 * Revision 6.1  2001/05/24 16:27:58  wheelan
93 * initial checkin
94 *
95 *
96 * ==========================================================================
97 */
98 
99 #ifndef _SPIDEY_
100 #define _SPIDEY_
101 
102 #include <ncbi.h>
103 #include <alignmgr2.h>
104 #include <actutils.h>
105 #include <dotseq.h>
106 
107 #undef NLM_EXTERN
108 #ifdef NLM_IMPORT
109 #define NLM_EXTERN NLM_IMPORT
110 #else
111 #define NLM_EXTERN extern
112 #endif
113 
114 #ifdef __cplusplus
115 extern "C" {
116 #endif
117 
118 #define SPI_DROPOFF  50
119 #define SPI_GAPOPEN  10
120 #define SPI_GAPEXTEND  3
121 #define SPI_PENALTY   -5
122 
123 #define SPI_MINBADEXON  6
124 #define SPI_MAXBADEXON  11
125 #define SPI_BADEXONTHRESH  40
126 
127 #define SPI_MAXSEQPORT  20000
128 
129 #define SPI_SPLICETHRESH  0.0001
130 
131 #define SPI_MAXGAP  4 /* maximum gap allowed in SPI_ExtendAlnAlg */
132 
133 #define SPI_REVCOMPTHRESH  55 /* minimum allowed % of splice sites present */
134                               /* If model is < minimum, then the reverse   */
135                               /* complement will be checked.               */
136 #define SPI_COVERDIFF  15 /* amount the %coverage is allowed to drop in the */
137                           /* reverse complement models                      */
138 #define SPI_MISMTCHDIFF  10 /* amount the %mismatch is allowed to rise in */
139                             /* the reverse complement models              */
140 
141 #define SPI_TEENYEXON  6 /* smallest exon to look for */
142 
143 #define SPI_ENDFUZZ  8 /* if the overall alignment misses less than or equal */
144                         /* to this amount on the ends of the mRNA, the        */
145                         /* alignment will be extended.                        */
146 
147 #define SPI_MINBLASTSIZE  7 /* smallest bit that can go into BlastTwoSequencesByLoc */
148 
149 #define SPI_MINPOLYASIZE  5 /* minimum #A's to call a poly(A)+ tail */
150 #define SPI_MAXPOLYASIZE  200 /* maximum number of nucleotides to bother */
151                               /* scanning for a tail                     */
152 #define SPI_LINKERSIZE  8 /* maximum number of non-A's to allow on end of tail */
153 
154 #define SPI_INTRONSIZE  35000 /* used only to decide whether an mRNA may have fallen */
155                               /* off a contig */
156 #define SPI_INTRONSIZEXL  220000 /* if spot->bigintron TRUE, use this */
157 
158 #define SPI_BIGINTRON  100000 /* max size of 1st and last introns, if 1st and last exons */
159                               /* have to be found by SPI_FindPiece. */
160 #define SPI_BIGINTRONXL  500000 /* if spot->bigintron TRUE, use this */
161 
162 #define SPI_PADDING  0 /* how much each region is padded on each side */
163 
164 #define SPI_NUMSITES  4 /* number of alternative splice sites to consider per exon */
165 
166 #define SPI_BIGOVERLAP  12 /* above this cutoff, the overlap won't be doubled */
167                            /* to get the window in which to search for splice sites */
168 
169 #define SPI_EXONMERGESIZE  15 /* exons closer than this to each other will get merged */
170 
171 #define SPI_FLUFF  16 /* amount to search on either side of splice site for interspecies comp. */
172 
173 #define SPI_UNKNOWN     0
174 #define SPI_CONSISTENT  1
175 #define SPI_IMPOSSIBLE  2
176 #define SPI_DONE1       3
177 #define SPI_DONE2       4
178 
179 #define SPI_FUZZ  20  /* amount of overlap/underlap allowed to consider hits consistent*/
180 
181 #define SPI_LEFT     0
182 #define SPI_RIGHT    1
183 #define SPI_BOTH     2
184 #define SPI_NEITHER  3
185 
186 #define SPI_REVERSEUNKNOWN  0
187 #define SPI_REVERSE         1
188 #define SPI_NOTREVERSED     2
189 
190 #define SPI_MULT     1
191 #define SPI_NOTMULT  2
192 
193 #define SPI_LINE 60 /* line length for text alignment output -- must be more than SPI_PSPLICE */
194 #define SPI_PSPLICE 10 /* length of genomic sequence to print before and after each exon */
195 #define SPI_SPACER 12 /* space at the beginning of each printed alignment line */
196 
197 #define SPI_NUMCOLS  8 /* number of columns in the tab-delimited file of position info for draft */
198 
199 /* defines for organisms (determines which splice matrices to use) */
200 #define SPI_VERTEBRATE  1
201 #define SPI_FLY         2
202 #define SPI_PLANT       3
203 #define SPI_CELEGANS    4
204 #define SPI_DICTY       5
205 
206 /* return codes for progress callback */
207 #define SPI_START     1
208 #define SPI_PROGRESS  2
209 #define SPI_FINISHED  3
210 
211 typedef struct spi_bsinfo {
212    BioseqPtr  bsp;
213    SeqLocPtr  lcaseloc;
214    struct spi_bsinfo PNTR next;
215 } SPI_bsinfo, PNTR SPI_bsinfoPtr;
216 
217 typedef struct spi_alninfo {
218    FloatHi      bit_score;
219    SeqAlignPtr  sap;
220    Int2         used;
221 } SPI_AlnInfo, PNTR SPI_AlnInfoPtr;
222 
223 typedef struct spi_ival {
224    Int4         n;
225    SeqAlignPtr  sap;
226    Int4         gstart;
227    Int4         gstop;
228    Int4         mstart;
229    Int4         mstop;
230    Uint2        strand;
231    Int2         used;
232    Int4         score;
233    struct spi_ival PNTR next;
234 } SPI_Ival, PNTR SPI_IvalPtr;
235 
236 typedef struct spiexonprof {
237    Int4     exonnum;
238    Int4Ptr  mismatches;
239    Int4     nummismatches;
240    struct spiexonprof PNTR next;
241 } SPI_ExonProf, PNTR SPI_ExonProfPtr;
242 
243 typedef struct spi_mrna {
244    Uint1            strand;
245    Boolean          revcomp;
246    Int4             numexons;
247    FloatHiPtr       exonid; /* percent identity per exon */
248    Int4Ptr          exongaps;  /* number of gaps in each exon alignment */
249    Uint1Ptr         splicedon;  /* for each exon, is splice donor site (right) present? */
250    Uint1Ptr         spliceacc;  /* for each exon, is splice acceptor site (left) present? */
251    Uint1            missingends;  /* SPI_LEFT, SPI_RIGHT, SPI_BOTH, or SPI_NEITHER */
252    Int4Ptr          mstarts; /* exon starts, in mRNA coordinates */
253    Int4Ptr          mstops;  /* exon stops, in mRNA coordinates */
254    Int4Ptr          gstarts; /* exon starts, in genomic coordinates */
255    Int4Ptr          gstops;  /* exon stops, in genomic coordinates */
256    SeqAlignPtr      PNTR saps; /*indexed alignments for exons */
257    Int4             mRNAcoverage; /* percentage of the mRNA contained in this alignment */
258    FloatHi          mismatch; /* percent mismatches in entire alignment */
259    Int4             polyAtail; /* if +, length of polyA tail that doesn't align */
260                                /* if negative, length of polyAtail that does align */
261    Boolean          fallsoff; /* does this mRNA fall of the end of the contig? */
262    SeqAlignPtr      parent; /* parent of exon alignment set */
263    SeqAlignPtr      continuous; /* continuous alignment over whole gene */
264    SPI_ExonProfPtr  epp; /* positions of mismatches (for printing) */
265    CharPtr          protein; /* sequence of the protein translated from the mRNA */
266    Int4             transstart; /* translation start position */
267    Boolean          holes; /* are there holes in the mRNA alignment? */
268    struct spi_mrna PNTR next;
269 } SPI_mRNA, PNTR SPI_mRNAPtr;
270 
271 typedef struct spi_utrinfo {
272    FloatHi  left;
273    FloatHi  right;
274 } SPI_UTRInfo, PNTR SPI_UTRInfoPtr;
275 
276 typedef struct spi_seq {
277    CharPtr    seq;
278    Int4       start;
279 } SPI_Seq, PNTR SPI_SeqPtr;
280 
281 typedef struct spi_mult {
282    SeqAlignPtr  PNTR exons;
283    Int4         numexons;
284 } SPI_Mult, PNTR SPI_MultPtr;
285 
286 typedef struct spi_reginfo {
287    Boolean      revcomp;
288    Int4         gstart;
289    Int4         gstop;
290    Int4         mstart;
291    Int4         mstop;
292    Int4         mlen;
293    Uint1        strand;
294    SPI_mRNAPtr  smp;
295    Int4         coverage;
296    Int4         score;
297    Int4         polyAtail; /* length of polyA(+) tail that doesn't align */
298    Boolean      fallsoff; /* this mRNA may fall off the end of the genomic sequence */
299    SPI_UTRInfo  utr; /* if this is a CDS, UTR %ids are here */
300    SPI_MultPtr  smu;
301    Boolean      revcmp_try;
302    struct spi_reginfo PNTR next;
303 } SPI_RegionInfo, PNTR SPI_RegionInfoPtr;
304 
305 typedef struct spi_tinyinfo {
306    Int4  start;
307    Int4  n;
308    struct spi_tinyinfo PNTR next;
309 } SPI_TinyInfo, PNTR SPI_TinyInfoPtr;
310 
311 typedef struct spi_pos {
312    Int4  lgroup;
313    Int4  group;
314    Int4  order;
315 } SPI_Pos, PNTR SPI_PosPtr;
316 
317 typedef struct spi_fragmentptr {
318    Int4         start;
319    Int4         stop;
320    Int4         fragnum; /* original fragment number, for reference */
321    SeqLocPtr    slp;
322    SPI_PosPtr   position_orig;  /* the position info (if any) given for this fragment */
323    Int4         position_mrna;  /* after alignment to mRNA, where is this fragment */
324    Int4         reverse; /* is this fragment reversed with respect to the mRNA */
325    SeqAlignPtr  sap;  /* indexed parent of the alignments for this fragment to the mRNA */
326    SPI_mRNAPtr  smp; /* info for the alignment to this fragment */
327    Uint1        donor;  /* does this (set of) alignment(s) have a donor site to the next frag? */
328    Uint1        acceptor;  /* acceptor site ? */
329    struct spi_fragmentptr PNTR next;
330 } SPI_Frag, PNTR SPI_FragPtr;
331 
332 typedef struct spi_fragherd {
333    Int4         polyAtail; /* length of polyAtail on the mRNA */
334    Int4         numfrags;
335    SPI_FragPtr  PNTR sfparray;
336 } SPI_FragHerd, PNTR SPI_FragHerdPtr;
337 
338 typedef struct spi_fraginfo {
339    Int4        mrnastart;
340    Int4        mrnastop;
341    Uint1       strand;
342    Int4        sfpnum;
343    Int4        fragnum;
344    SPI_PosPtr  position_orig;
345 } SPI_FragInfo, PNTR SPI_FragInfoPtr;
346 
347 typedef struct spi_mRNAtoherd {
348    BioseqPtr        bsp_mrna; /* mrna sequence */
349    Int4             numpieces;
350    Int4             numexons;
351    Int4Ptr          exons; /* array which specifies which pieces go with which exons */
352    Int4Ptr          fragments; /* array of fragment numbers, one per piece */
353    Int4Ptr          sfpnum; /* array of sfp numbers, one for each piece */
354    Uint1Ptr         fallsoff; /* for each piece, does it fall off fragment? */
355    Uint1Ptr         strands; /* genomic strand, one per piece */
356    Int4Ptr          mstarts; /* mrna starts, one per piece */
357    Int4Ptr          mstops;  /* mrna stops, one per piece */
358    Int4Ptr          gstarts; /* genomic starts, one per piece */
359    Int4Ptr          gstops; /* genomic stops, one per piece */
360    Int4Ptr          lens; /* length of alignment for each piece */
361    Int4Ptr          pmismatch; /* number of mismatches per piece */
362    Int4Ptr          pgaps; /* number of gaps in alignment for each piece */
363    FloatHiPtr       exonid; /* percent identity per exon */
364    Int4Ptr          exongaps; /* number of gaps per exon */
365    Uint1Ptr         splicedon;  /* for each piece, is splice donor site (right) present? */
366    Uint1Ptr         spliceacc;  /* for each piece, is splice acceptor site (left) present? */
367    Uint1            missingends;  /* SPI_LEFT, SPI_RIGHT, SPI_BOTH, or SPI_NEITHER */
368    SeqAlignPtr      PNTR saps; /* indexed alignments, one for each piece (not exon) */
369    FloatHi          mRNAcoverage; /* percentage of the mRNA contained in this alignment */
370    Boolean          gaps; /* are there internal pieces missing from the mRNA alignment? */
371    FloatHi          mismatch; /* percent mismatches in entire alignment */
372    SPI_ExonProfPtr  epp;
373    struct spi_mRNAtoherd PNTR next;
374 } SPI_mRNAToHerd, PNTR SPI_mRNAToHerdPtr;
375 
376 typedef struct spi_exonherdinfo {
377    Int4         fragmentnum;
378    SeqAlignPtr  sap;
379    Uint1        donor;
380    Uint1        acceptor;
381    Int4         sfpnum;
382    struct spi_exonherdinfo PNTR next;
383 } SPI_ExonHerdInfo, PNTR SPI_ExonHerdInfoPtr;
384 
385 typedef struct spi_splice {
386    Int4     i;
387    FloatHi  score;
388    FloatHi  score2;
389    Int4     diff;
390 } SPI_Splice, PNTR SPI_SplicePtr;
391 
392 typedef struct spi_fragsplice {
393    SPI_SplicePtr  splarray;
394    Int4           spllen;
395    Int4           boundary;
396 } SPI_FragSpl, PNTR SPI_FragSplPtr;
397 
398 typedef struct spi_progress {
399    Int4  percentdone;
400    Int4  returncode;
401 } SPI_Progress, PNTR SPI_ProgressPtr;
402 
403 typedef Boolean (LIBCALLBACK *SPI_ProgressCallback)(SPI_ProgressPtr progress);
404 
405 typedef struct spi_spliceinfo {
406    FloatHi  a;
407    FloatHi  c;
408    FloatHi  g;
409    FloatHi  t;
410    struct spi_spliceinfo PNTR next;
411 } SPI_SpliceInfo, PNTR SPI_SpliceInfoPtr;
412 
413 typedef struct spi_options {
414     FloatHi               firstpasseval;
415     FloatHi               secpasseval;
416     FloatHi               thirdpasseval;
417     Int4                  organism;
418     Int4                  numreturns;
419     Int4                  idcutoff;
420     Int4                  lencutoff;
421     Int4                  printaln;
422     Boolean               interspecies;
423     Boolean               printasn;
424     SeqAlignPtr           PNTR sap_head;
425     CharPtr               draftfile;
426     CharPtr               repeat_db_file; /* path to repeat db to use */
427     SeqLocPtr             lcaseloc;
428     Boolean               fetchcds;
429     Boolean               printheader;
430     Boolean               ace;
431     SPI_ProgressCallback  callback;
432     Int4                  from; /* to restrict genomic interval */
433     Int4                  to;   /* " */
434     Boolean               makemult; /* make a multiple alignment from numerous returns? */
435     Boolean               bigintron;
436     Int4                  bigintron_size; /*added by KSK */
437     Uint1                 strand; /* to restrict the search to one genomic strand */
438     Boolean               revcomp;
439     Int4                  dsplicejunc;
440     SPI_SpliceInfoPtr     dssp_head;
441     Int4                  asplicejunc;
442     SPI_SpliceInfoPtr     assp_head;
443 } SPI_Options, PNTR SPI_OptionsPtr;
444 
445 typedef struct spi_n {
446    Int4  n1;
447    Int4  n2;
448    Int4  n3;
449    Int4  n4;
450    Int4  n5;
451    Int4  n6;
452 } SPI_n, PNTR SPI_nPtr;
453 
454 typedef struct spi_block {
455    SeqAlignPtr  sap;
456    Int4         from_g;
457    Int4         to_g;
458    struct spi_block PNTR next;
459 } SPI_Block, PNTR SPI_BlockPtr;
460 
461 NLM_EXTERN SPI_RegionInfoPtr SPI_AlnSinglemRNAToGen(SPI_bsinfoPtr spig, SPI_bsinfoPtr spim, FILE *ofp, FILE *ofp2, SPI_OptionsPtr spot);
462 NLM_EXTERN SPI_mRNAToHerdPtr SPI_AlnSinglemRNAToPieces(SPI_bsinfoPtr spig_head, SPI_bsinfoPtr spim, FILE *ofp, FILE *ofp2, SPI_OptionsPtr spot);
463 NLM_EXTERN void SPI_MakeMultipleAlignment(SPI_RegionInfoPtr srip_head);
464 NLM_EXTERN void SPI_PrintMultipleAlignment(SPI_RegionInfoPtr srip, Boolean html, BioseqPtr bsp, FILE * ofp);
465 NLM_EXTERN void SPI_RegionListFree (SPI_RegionInfoPtr srip);
466 
467 /*************************************************************************************
468 *
469 *  SPI_AlnmRNAToGenomic is available to outside programs; just pass in the two
470 *  bioseqs and options (to use default options, just pass in NULL, and to use
471 *  other options, call SPI_OptionsNew to get an initialized options pointer and
472 *  make the desired changes).  If options are passed in, they should be freed
473 *  using SPI_OptionsFree.  SPI_AlignmRNAToGenomic returns a linked list of
474 *  SPI_mRNAPtrs, one per gene model (default is to only return one gene model).
475 *  Each SPI_mRNAPtr (see above) has arrays specifying the exon boundaries in
476 *  genomic and mRNA coordinates as well as information about splice sites,
477 *  percent identity, number of gaps, etc.  The SPI_mRNAPtr also has one alignment
478 *  per exon as well as a single alignment (smp->continuous) that covers the entire
479 *  gene, with big gaps in the mRNA for the genomic introns.  The SPI_mRNAPtr should
480 *  be freed by the calling function, using SPI_mRNAFree.
481 *
482 *  SPI_AlnmRNAToGenomic should only be used on finished sequence; it can handle
483 *  interspecies comparisons but doesn't work on draft sequence.
484 *
485 *************************************************************************************/
486 NLM_EXTERN SPI_mRNAPtr SPI_AlignmRNAToGenomic(BioseqPtr bsp_genomic, BioseqPtr bsp_mrna, SPI_OptionsPtr spot);
487 
488 /***************************************************************************
489 *
490 *  SPI_flip_sa_list takes the head of a list of seqaligns and switches
491 *  the first and second row of every alignment (alignments should all have
492 *  two rows). Then, the indexes are freed and the alignments are reindexed.
493 *
494 ***************************************************************************/
495 NLM_EXTERN void SPI_flip_sa_list (SeqAlignPtr sap);
496 
497 /***************************************************************************
498 *
499 *  SPI_RemoveInconsistentAlnsFromSet is a greedy algorithm that first
500 *  sorts the alignments by score, then takes the highest-scoring
501 *  alignment and compares it to the next-highest-scoring alignment, which
502 *  is deleted if it is contained; on subsequent loops each next-highest-
503 *  scoring alignment is compared to the set of alignments that have
504 *  been kept. The alignments can be sorted along the first or
505 *  second sequence; the alignments will be reversed so that they are
506 *  all on the plus strand of the sequence to be examined.
507 *  The input alignment must be indexed at least at the LITE level;
508 *  conflicting child alignments will be deleted, not hidden, by this
509 *  function.  This function assumes that all children have the same two
510 *  rows. The 'compact' parameter tells the function whether to try to
511 *  keep alignments that are more to the left in genomic coordinates, or
512 *  more to the right.
513 *
514 ***************************************************************************/
515 NLM_EXTERN void SPI_RemoveInconsistentAlnsFromSet(SeqAlignPtr sap, Int4 fuzz, Int4 n, Int4 compact);
516 
517 NLM_EXTERN void SPI_bsinfoFreeList (SPI_bsinfoPtr spi);
518 NLM_EXTERN void SPI_mRNAFree (SPI_mRNAPtr smp);
519 NLM_EXTERN SPI_OptionsPtr SPI_OptionsNew(void);
520 NLM_EXTERN void SPI_OptionsFree (SPI_OptionsPtr spot);
521 NLM_EXTERN void SPI_is_donor (Uint1Ptr sequence, Int4 seqlen, FloatHiPtr score, Int4 org);
522 NLM_EXTERN void SPI_is_acceptor (Uint1Ptr sequence, Int4 seqlen, FloatHiPtr score, Int4 org);
523 
524 /***************************************************************************
525 *
526 *  SPI_GetProteinFrommRNA takes an mRNA bioseq and returns a string
527 *  which is the best protein translation of the mRNA. First, the function
528 *  looks to see whether there are any annotated CDSs, and if so, it uses
529 *  the translation of the annotated CDS. If not, the function translates
530 *  the mRNA in all 3 reading frames and looks for the frame with the
531 *  longest protein, then returns that protein.
532 *
533 ***************************************************************************/
534 NLM_EXTERN CharPtr SPI_GetProteinFrommRNA(BioseqPtr bsp_mrna, Int4Ptr start);
535 
536 #ifdef __cplusplus
537 }
538 #endif
539 
540 #undef NLM_EXTERN
541 #ifdef NLM_EXPORT
542 #define NLM_EXTERN NLM_EXPORT
543 #else
544 #define NLM_EXTERN
545 #endif
546 
547 #endif
548