1 /* =========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information (NCBI) 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government do not place any restriction on its use or reproduction. 12 * We would, however, appreciate having the NCBI and the author cited in 13 * any work or product based on this material. 14 * 15 * Although all reasonable efforts have been taken to ensure the accuracy 16 * and reliability of the software and data, the NLM and the U.S. 17 * Government do not and cannot warrant the performance or results that 18 * may be obtained by using this software or data. The NLM and the U.S. 19 * Government disclaim all warranties, express or implied, including 20 * warranties of performance, merchantability or fitness for any particular 21 * purpose. 22 * 23 * =========================================================================== 24 * 25 * File Name: spidey.h 26 * 27 * Author: Sarah Wheelan 28 * 29 * Version Creation Date: 5/01 30 * 31 * $Revision: 6.19 $ 32 * 33 * File Description: mrna-to-genomic alignment algorithms and functions 34 * 35 * Modifications: 36 * -------------------------------------------------------------------------- 37 * $Log: spidey.h,v $ 38 * Revision 6.19 2004/03/25 21:20:03 kskatz 39 * All SPI_is_acceptor_* functions have been corrected: 'N' no longer contributes to nor subtracts from the score, log odds are calculated and the scores added; they are however all antilogged because there are too many places in the code where the score is expected to be between 0 and 1. Also, corrected sequence frequency determination in SPI_is_acceptor_user and SPI_is_donor_user, as well as correcting for 'N'. Finally, and this all began with, I added matrices for Dictyostelium - command line -r -m 40 * 41 * Revision 6.18 2003/04/04 19:42:56 kskatz 42 * Added a new command line option (-R) to allow external users to point spidey to a repeat database that it can pass on to blast for filtering repeats 43 * 44 * Revision 6.17 2002/10/02 16:12:54 kskatz 45 * Added a new option to SPI_Options (bigintron_size) that holds a user-supplied maximum size (default = 220000) for introns and requires the option (bool) bigintron to be set to 'TRUE'; The functions affected are SPI_mRNAPtr SPI_AdjustForSplice(), SPI_is_consistent(), and SPI_FindPiece(); note that the default for bigintron_size is not set in SPI_OptionsNew() (yet) 46 * 47 * Revision 6.16 2002/06/27 11:53:33 wheelan 48 * additions to structures to support off-by-one bug fixes and printing of the cds length 49 * 50 * Revision 6.15 2002/05/07 18:42:53 wheelan 51 * changes to support user-defined splice matrices 52 * 53 * Revision 6.14 2002/04/16 17:54:03 wheelan 54 * increased XLINTRON sizes 55 * 56 * Revision 6.13 2002/04/04 17:17:24 wheelan 57 * changed endfuzz to 8 58 * 59 * Revision 6.12 2002/01/30 19:08:51 wheelan 60 * better support for revcomp 61 * 62 * Revision 6.11 2001/12/18 18:00:18 wheelan 63 * add strand 64 * 65 * Revision 6.10 2001/11/20 12:13:28 wheelan 66 * made SPI_GetProteinFrommRNA EXTERN 67 * 68 * Revision 6.9 2001/11/05 16:14:53 wheelan 69 * added option to print multiple alignment to a file 70 * 71 * Revision 6.8 2001/10/04 12:34:07 wheelan 72 * added bigintron option 73 * 74 * Revision 6.7 2001/10/03 14:19:29 wheelan 75 * include new alignment manager 76 * 77 * Revision 6.6 2001/09/04 13:46:37 wheelan 78 * made SPI_RemoveInconsistentAlnsFromSet and SPI_flip_sa_list extern 79 * 80 * Revision 6.5 2001/08/24 13:44:35 wheelan 81 * changed printaln to Int4 82 * 83 * Revision 6.4 2001/08/06 16:49:25 wheelan 84 * changed revcompthresh parameter to 55 from 65 85 * 86 * Revision 6.3 2001/07/11 17:57:07 wheelan 87 * added typedefs for multiple alignments 88 * 89 * Revision 6.2 2001/07/10 16:44:42 wheelan 90 * added functions to make a multiple alignment 91 * 92 * Revision 6.1 2001/05/24 16:27:58 wheelan 93 * initial checkin 94 * 95 * 96 * ========================================================================== 97 */ 98 99 #ifndef _SPIDEY_ 100 #define _SPIDEY_ 101 102 #include <ncbi.h> 103 #include <alignmgr2.h> 104 #include <actutils.h> 105 #include <dotseq.h> 106 107 #undef NLM_EXTERN 108 #ifdef NLM_IMPORT 109 #define NLM_EXTERN NLM_IMPORT 110 #else 111 #define NLM_EXTERN extern 112 #endif 113 114 #ifdef __cplusplus 115 extern "C" { 116 #endif 117 118 #define SPI_DROPOFF 50 119 #define SPI_GAPOPEN 10 120 #define SPI_GAPEXTEND 3 121 #define SPI_PENALTY -5 122 123 #define SPI_MINBADEXON 6 124 #define SPI_MAXBADEXON 11 125 #define SPI_BADEXONTHRESH 40 126 127 #define SPI_MAXSEQPORT 20000 128 129 #define SPI_SPLICETHRESH 0.0001 130 131 #define SPI_MAXGAP 4 /* maximum gap allowed in SPI_ExtendAlnAlg */ 132 133 #define SPI_REVCOMPTHRESH 55 /* minimum allowed % of splice sites present */ 134 /* If model is < minimum, then the reverse */ 135 /* complement will be checked. */ 136 #define SPI_COVERDIFF 15 /* amount the %coverage is allowed to drop in the */ 137 /* reverse complement models */ 138 #define SPI_MISMTCHDIFF 10 /* amount the %mismatch is allowed to rise in */ 139 /* the reverse complement models */ 140 141 #define SPI_TEENYEXON 6 /* smallest exon to look for */ 142 143 #define SPI_ENDFUZZ 8 /* if the overall alignment misses less than or equal */ 144 /* to this amount on the ends of the mRNA, the */ 145 /* alignment will be extended. */ 146 147 #define SPI_MINBLASTSIZE 7 /* smallest bit that can go into BlastTwoSequencesByLoc */ 148 149 #define SPI_MINPOLYASIZE 5 /* minimum #A's to call a poly(A)+ tail */ 150 #define SPI_MAXPOLYASIZE 200 /* maximum number of nucleotides to bother */ 151 /* scanning for a tail */ 152 #define SPI_LINKERSIZE 8 /* maximum number of non-A's to allow on end of tail */ 153 154 #define SPI_INTRONSIZE 35000 /* used only to decide whether an mRNA may have fallen */ 155 /* off a contig */ 156 #define SPI_INTRONSIZEXL 220000 /* if spot->bigintron TRUE, use this */ 157 158 #define SPI_BIGINTRON 100000 /* max size of 1st and last introns, if 1st and last exons */ 159 /* have to be found by SPI_FindPiece. */ 160 #define SPI_BIGINTRONXL 500000 /* if spot->bigintron TRUE, use this */ 161 162 #define SPI_PADDING 0 /* how much each region is padded on each side */ 163 164 #define SPI_NUMSITES 4 /* number of alternative splice sites to consider per exon */ 165 166 #define SPI_BIGOVERLAP 12 /* above this cutoff, the overlap won't be doubled */ 167 /* to get the window in which to search for splice sites */ 168 169 #define SPI_EXONMERGESIZE 15 /* exons closer than this to each other will get merged */ 170 171 #define SPI_FLUFF 16 /* amount to search on either side of splice site for interspecies comp. */ 172 173 #define SPI_UNKNOWN 0 174 #define SPI_CONSISTENT 1 175 #define SPI_IMPOSSIBLE 2 176 #define SPI_DONE1 3 177 #define SPI_DONE2 4 178 179 #define SPI_FUZZ 20 /* amount of overlap/underlap allowed to consider hits consistent*/ 180 181 #define SPI_LEFT 0 182 #define SPI_RIGHT 1 183 #define SPI_BOTH 2 184 #define SPI_NEITHER 3 185 186 #define SPI_REVERSEUNKNOWN 0 187 #define SPI_REVERSE 1 188 #define SPI_NOTREVERSED 2 189 190 #define SPI_MULT 1 191 #define SPI_NOTMULT 2 192 193 #define SPI_LINE 60 /* line length for text alignment output -- must be more than SPI_PSPLICE */ 194 #define SPI_PSPLICE 10 /* length of genomic sequence to print before and after each exon */ 195 #define SPI_SPACER 12 /* space at the beginning of each printed alignment line */ 196 197 #define SPI_NUMCOLS 8 /* number of columns in the tab-delimited file of position info for draft */ 198 199 /* defines for organisms (determines which splice matrices to use) */ 200 #define SPI_VERTEBRATE 1 201 #define SPI_FLY 2 202 #define SPI_PLANT 3 203 #define SPI_CELEGANS 4 204 #define SPI_DICTY 5 205 206 /* return codes for progress callback */ 207 #define SPI_START 1 208 #define SPI_PROGRESS 2 209 #define SPI_FINISHED 3 210 211 typedef struct spi_bsinfo { 212 BioseqPtr bsp; 213 SeqLocPtr lcaseloc; 214 struct spi_bsinfo PNTR next; 215 } SPI_bsinfo, PNTR SPI_bsinfoPtr; 216 217 typedef struct spi_alninfo { 218 FloatHi bit_score; 219 SeqAlignPtr sap; 220 Int2 used; 221 } SPI_AlnInfo, PNTR SPI_AlnInfoPtr; 222 223 typedef struct spi_ival { 224 Int4 n; 225 SeqAlignPtr sap; 226 Int4 gstart; 227 Int4 gstop; 228 Int4 mstart; 229 Int4 mstop; 230 Uint2 strand; 231 Int2 used; 232 Int4 score; 233 struct spi_ival PNTR next; 234 } SPI_Ival, PNTR SPI_IvalPtr; 235 236 typedef struct spiexonprof { 237 Int4 exonnum; 238 Int4Ptr mismatches; 239 Int4 nummismatches; 240 struct spiexonprof PNTR next; 241 } SPI_ExonProf, PNTR SPI_ExonProfPtr; 242 243 typedef struct spi_mrna { 244 Uint1 strand; 245 Boolean revcomp; 246 Int4 numexons; 247 FloatHiPtr exonid; /* percent identity per exon */ 248 Int4Ptr exongaps; /* number of gaps in each exon alignment */ 249 Uint1Ptr splicedon; /* for each exon, is splice donor site (right) present? */ 250 Uint1Ptr spliceacc; /* for each exon, is splice acceptor site (left) present? */ 251 Uint1 missingends; /* SPI_LEFT, SPI_RIGHT, SPI_BOTH, or SPI_NEITHER */ 252 Int4Ptr mstarts; /* exon starts, in mRNA coordinates */ 253 Int4Ptr mstops; /* exon stops, in mRNA coordinates */ 254 Int4Ptr gstarts; /* exon starts, in genomic coordinates */ 255 Int4Ptr gstops; /* exon stops, in genomic coordinates */ 256 SeqAlignPtr PNTR saps; /*indexed alignments for exons */ 257 Int4 mRNAcoverage; /* percentage of the mRNA contained in this alignment */ 258 FloatHi mismatch; /* percent mismatches in entire alignment */ 259 Int4 polyAtail; /* if +, length of polyA tail that doesn't align */ 260 /* if negative, length of polyAtail that does align */ 261 Boolean fallsoff; /* does this mRNA fall of the end of the contig? */ 262 SeqAlignPtr parent; /* parent of exon alignment set */ 263 SeqAlignPtr continuous; /* continuous alignment over whole gene */ 264 SPI_ExonProfPtr epp; /* positions of mismatches (for printing) */ 265 CharPtr protein; /* sequence of the protein translated from the mRNA */ 266 Int4 transstart; /* translation start position */ 267 Boolean holes; /* are there holes in the mRNA alignment? */ 268 struct spi_mrna PNTR next; 269 } SPI_mRNA, PNTR SPI_mRNAPtr; 270 271 typedef struct spi_utrinfo { 272 FloatHi left; 273 FloatHi right; 274 } SPI_UTRInfo, PNTR SPI_UTRInfoPtr; 275 276 typedef struct spi_seq { 277 CharPtr seq; 278 Int4 start; 279 } SPI_Seq, PNTR SPI_SeqPtr; 280 281 typedef struct spi_mult { 282 SeqAlignPtr PNTR exons; 283 Int4 numexons; 284 } SPI_Mult, PNTR SPI_MultPtr; 285 286 typedef struct spi_reginfo { 287 Boolean revcomp; 288 Int4 gstart; 289 Int4 gstop; 290 Int4 mstart; 291 Int4 mstop; 292 Int4 mlen; 293 Uint1 strand; 294 SPI_mRNAPtr smp; 295 Int4 coverage; 296 Int4 score; 297 Int4 polyAtail; /* length of polyA(+) tail that doesn't align */ 298 Boolean fallsoff; /* this mRNA may fall off the end of the genomic sequence */ 299 SPI_UTRInfo utr; /* if this is a CDS, UTR %ids are here */ 300 SPI_MultPtr smu; 301 Boolean revcmp_try; 302 struct spi_reginfo PNTR next; 303 } SPI_RegionInfo, PNTR SPI_RegionInfoPtr; 304 305 typedef struct spi_tinyinfo { 306 Int4 start; 307 Int4 n; 308 struct spi_tinyinfo PNTR next; 309 } SPI_TinyInfo, PNTR SPI_TinyInfoPtr; 310 311 typedef struct spi_pos { 312 Int4 lgroup; 313 Int4 group; 314 Int4 order; 315 } SPI_Pos, PNTR SPI_PosPtr; 316 317 typedef struct spi_fragmentptr { 318 Int4 start; 319 Int4 stop; 320 Int4 fragnum; /* original fragment number, for reference */ 321 SeqLocPtr slp; 322 SPI_PosPtr position_orig; /* the position info (if any) given for this fragment */ 323 Int4 position_mrna; /* after alignment to mRNA, where is this fragment */ 324 Int4 reverse; /* is this fragment reversed with respect to the mRNA */ 325 SeqAlignPtr sap; /* indexed parent of the alignments for this fragment to the mRNA */ 326 SPI_mRNAPtr smp; /* info for the alignment to this fragment */ 327 Uint1 donor; /* does this (set of) alignment(s) have a donor site to the next frag? */ 328 Uint1 acceptor; /* acceptor site ? */ 329 struct spi_fragmentptr PNTR next; 330 } SPI_Frag, PNTR SPI_FragPtr; 331 332 typedef struct spi_fragherd { 333 Int4 polyAtail; /* length of polyAtail on the mRNA */ 334 Int4 numfrags; 335 SPI_FragPtr PNTR sfparray; 336 } SPI_FragHerd, PNTR SPI_FragHerdPtr; 337 338 typedef struct spi_fraginfo { 339 Int4 mrnastart; 340 Int4 mrnastop; 341 Uint1 strand; 342 Int4 sfpnum; 343 Int4 fragnum; 344 SPI_PosPtr position_orig; 345 } SPI_FragInfo, PNTR SPI_FragInfoPtr; 346 347 typedef struct spi_mRNAtoherd { 348 BioseqPtr bsp_mrna; /* mrna sequence */ 349 Int4 numpieces; 350 Int4 numexons; 351 Int4Ptr exons; /* array which specifies which pieces go with which exons */ 352 Int4Ptr fragments; /* array of fragment numbers, one per piece */ 353 Int4Ptr sfpnum; /* array of sfp numbers, one for each piece */ 354 Uint1Ptr fallsoff; /* for each piece, does it fall off fragment? */ 355 Uint1Ptr strands; /* genomic strand, one per piece */ 356 Int4Ptr mstarts; /* mrna starts, one per piece */ 357 Int4Ptr mstops; /* mrna stops, one per piece */ 358 Int4Ptr gstarts; /* genomic starts, one per piece */ 359 Int4Ptr gstops; /* genomic stops, one per piece */ 360 Int4Ptr lens; /* length of alignment for each piece */ 361 Int4Ptr pmismatch; /* number of mismatches per piece */ 362 Int4Ptr pgaps; /* number of gaps in alignment for each piece */ 363 FloatHiPtr exonid; /* percent identity per exon */ 364 Int4Ptr exongaps; /* number of gaps per exon */ 365 Uint1Ptr splicedon; /* for each piece, is splice donor site (right) present? */ 366 Uint1Ptr spliceacc; /* for each piece, is splice acceptor site (left) present? */ 367 Uint1 missingends; /* SPI_LEFT, SPI_RIGHT, SPI_BOTH, or SPI_NEITHER */ 368 SeqAlignPtr PNTR saps; /* indexed alignments, one for each piece (not exon) */ 369 FloatHi mRNAcoverage; /* percentage of the mRNA contained in this alignment */ 370 Boolean gaps; /* are there internal pieces missing from the mRNA alignment? */ 371 FloatHi mismatch; /* percent mismatches in entire alignment */ 372 SPI_ExonProfPtr epp; 373 struct spi_mRNAtoherd PNTR next; 374 } SPI_mRNAToHerd, PNTR SPI_mRNAToHerdPtr; 375 376 typedef struct spi_exonherdinfo { 377 Int4 fragmentnum; 378 SeqAlignPtr sap; 379 Uint1 donor; 380 Uint1 acceptor; 381 Int4 sfpnum; 382 struct spi_exonherdinfo PNTR next; 383 } SPI_ExonHerdInfo, PNTR SPI_ExonHerdInfoPtr; 384 385 typedef struct spi_splice { 386 Int4 i; 387 FloatHi score; 388 FloatHi score2; 389 Int4 diff; 390 } SPI_Splice, PNTR SPI_SplicePtr; 391 392 typedef struct spi_fragsplice { 393 SPI_SplicePtr splarray; 394 Int4 spllen; 395 Int4 boundary; 396 } SPI_FragSpl, PNTR SPI_FragSplPtr; 397 398 typedef struct spi_progress { 399 Int4 percentdone; 400 Int4 returncode; 401 } SPI_Progress, PNTR SPI_ProgressPtr; 402 403 typedef Boolean (LIBCALLBACK *SPI_ProgressCallback)(SPI_ProgressPtr progress); 404 405 typedef struct spi_spliceinfo { 406 FloatHi a; 407 FloatHi c; 408 FloatHi g; 409 FloatHi t; 410 struct spi_spliceinfo PNTR next; 411 } SPI_SpliceInfo, PNTR SPI_SpliceInfoPtr; 412 413 typedef struct spi_options { 414 FloatHi firstpasseval; 415 FloatHi secpasseval; 416 FloatHi thirdpasseval; 417 Int4 organism; 418 Int4 numreturns; 419 Int4 idcutoff; 420 Int4 lencutoff; 421 Int4 printaln; 422 Boolean interspecies; 423 Boolean printasn; 424 SeqAlignPtr PNTR sap_head; 425 CharPtr draftfile; 426 CharPtr repeat_db_file; /* path to repeat db to use */ 427 SeqLocPtr lcaseloc; 428 Boolean fetchcds; 429 Boolean printheader; 430 Boolean ace; 431 SPI_ProgressCallback callback; 432 Int4 from; /* to restrict genomic interval */ 433 Int4 to; /* " */ 434 Boolean makemult; /* make a multiple alignment from numerous returns? */ 435 Boolean bigintron; 436 Int4 bigintron_size; /*added by KSK */ 437 Uint1 strand; /* to restrict the search to one genomic strand */ 438 Boolean revcomp; 439 Int4 dsplicejunc; 440 SPI_SpliceInfoPtr dssp_head; 441 Int4 asplicejunc; 442 SPI_SpliceInfoPtr assp_head; 443 } SPI_Options, PNTR SPI_OptionsPtr; 444 445 typedef struct spi_n { 446 Int4 n1; 447 Int4 n2; 448 Int4 n3; 449 Int4 n4; 450 Int4 n5; 451 Int4 n6; 452 } SPI_n, PNTR SPI_nPtr; 453 454 typedef struct spi_block { 455 SeqAlignPtr sap; 456 Int4 from_g; 457 Int4 to_g; 458 struct spi_block PNTR next; 459 } SPI_Block, PNTR SPI_BlockPtr; 460 461 NLM_EXTERN SPI_RegionInfoPtr SPI_AlnSinglemRNAToGen(SPI_bsinfoPtr spig, SPI_bsinfoPtr spim, FILE *ofp, FILE *ofp2, SPI_OptionsPtr spot); 462 NLM_EXTERN SPI_mRNAToHerdPtr SPI_AlnSinglemRNAToPieces(SPI_bsinfoPtr spig_head, SPI_bsinfoPtr spim, FILE *ofp, FILE *ofp2, SPI_OptionsPtr spot); 463 NLM_EXTERN void SPI_MakeMultipleAlignment(SPI_RegionInfoPtr srip_head); 464 NLM_EXTERN void SPI_PrintMultipleAlignment(SPI_RegionInfoPtr srip, Boolean html, BioseqPtr bsp, FILE * ofp); 465 NLM_EXTERN void SPI_RegionListFree (SPI_RegionInfoPtr srip); 466 467 /************************************************************************************* 468 * 469 * SPI_AlnmRNAToGenomic is available to outside programs; just pass in the two 470 * bioseqs and options (to use default options, just pass in NULL, and to use 471 * other options, call SPI_OptionsNew to get an initialized options pointer and 472 * make the desired changes). If options are passed in, they should be freed 473 * using SPI_OptionsFree. SPI_AlignmRNAToGenomic returns a linked list of 474 * SPI_mRNAPtrs, one per gene model (default is to only return one gene model). 475 * Each SPI_mRNAPtr (see above) has arrays specifying the exon boundaries in 476 * genomic and mRNA coordinates as well as information about splice sites, 477 * percent identity, number of gaps, etc. The SPI_mRNAPtr also has one alignment 478 * per exon as well as a single alignment (smp->continuous) that covers the entire 479 * gene, with big gaps in the mRNA for the genomic introns. The SPI_mRNAPtr should 480 * be freed by the calling function, using SPI_mRNAFree. 481 * 482 * SPI_AlnmRNAToGenomic should only be used on finished sequence; it can handle 483 * interspecies comparisons but doesn't work on draft sequence. 484 * 485 *************************************************************************************/ 486 NLM_EXTERN SPI_mRNAPtr SPI_AlignmRNAToGenomic(BioseqPtr bsp_genomic, BioseqPtr bsp_mrna, SPI_OptionsPtr spot); 487 488 /*************************************************************************** 489 * 490 * SPI_flip_sa_list takes the head of a list of seqaligns and switches 491 * the first and second row of every alignment (alignments should all have 492 * two rows). Then, the indexes are freed and the alignments are reindexed. 493 * 494 ***************************************************************************/ 495 NLM_EXTERN void SPI_flip_sa_list (SeqAlignPtr sap); 496 497 /*************************************************************************** 498 * 499 * SPI_RemoveInconsistentAlnsFromSet is a greedy algorithm that first 500 * sorts the alignments by score, then takes the highest-scoring 501 * alignment and compares it to the next-highest-scoring alignment, which 502 * is deleted if it is contained; on subsequent loops each next-highest- 503 * scoring alignment is compared to the set of alignments that have 504 * been kept. The alignments can be sorted along the first or 505 * second sequence; the alignments will be reversed so that they are 506 * all on the plus strand of the sequence to be examined. 507 * The input alignment must be indexed at least at the LITE level; 508 * conflicting child alignments will be deleted, not hidden, by this 509 * function. This function assumes that all children have the same two 510 * rows. The 'compact' parameter tells the function whether to try to 511 * keep alignments that are more to the left in genomic coordinates, or 512 * more to the right. 513 * 514 ***************************************************************************/ 515 NLM_EXTERN void SPI_RemoveInconsistentAlnsFromSet(SeqAlignPtr sap, Int4 fuzz, Int4 n, Int4 compact); 516 517 NLM_EXTERN void SPI_bsinfoFreeList (SPI_bsinfoPtr spi); 518 NLM_EXTERN void SPI_mRNAFree (SPI_mRNAPtr smp); 519 NLM_EXTERN SPI_OptionsPtr SPI_OptionsNew(void); 520 NLM_EXTERN void SPI_OptionsFree (SPI_OptionsPtr spot); 521 NLM_EXTERN void SPI_is_donor (Uint1Ptr sequence, Int4 seqlen, FloatHiPtr score, Int4 org); 522 NLM_EXTERN void SPI_is_acceptor (Uint1Ptr sequence, Int4 seqlen, FloatHiPtr score, Int4 org); 523 524 /*************************************************************************** 525 * 526 * SPI_GetProteinFrommRNA takes an mRNA bioseq and returns a string 527 * which is the best protein translation of the mRNA. First, the function 528 * looks to see whether there are any annotated CDSs, and if so, it uses 529 * the translation of the annotated CDS. If not, the function translates 530 * the mRNA in all 3 reading frames and looks for the frame with the 531 * longest protein, then returns that protein. 532 * 533 ***************************************************************************/ 534 NLM_EXTERN CharPtr SPI_GetProteinFrommRNA(BioseqPtr bsp_mrna, Int4Ptr start); 535 536 #ifdef __cplusplus 537 } 538 #endif 539 540 #undef NLM_EXTERN 541 #ifdef NLM_EXPORT 542 #define NLM_EXTERN NLM_EXPORT 543 #else 544 #define NLM_EXTERN 545 #endif 546 547 #endif 548