1 /* $Id: cddutil.h,v 1.52 2003/12/09 22:21:35 bauer Exp $ 2 *=========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * File Name: cddutil.h 27 * 28 * Author: Aron Marchler-Bauer 29 * 30 * Initial Version Creation Date: 12/15/1999 31 * 32 * $Revision: 1.52 $ 33 * 34 * File Description: Header file for cdd api utility functions 35 * 36 * Modifications: 37 * -------------------------------------------------------------------------- 38 * $Log: cddutil.h,v $ 39 * Revision 1.52 2003/12/09 22:21:35 bauer 40 * added CddCountResTypes 41 * 42 * Revision 1.51 2003/08/25 19:09:47 bauer 43 * added SeqAlignReadFromFile 44 * 45 * Revision 1.50 2003/05/21 17:25:21 bauer 46 * optional ObjMgr in CddReadDBGetBioseq 47 * 48 * Revision 1.49 2003/04/25 14:36:20 bauer 49 * impalaScaling now returns boolean value 50 * 51 * Revision 1.48 2003/02/06 21:04:27 bauer 52 * fixed bug in reindexing to consensus 53 * 54 * Revision 1.47 2002/12/03 14:36:31 bauer 55 * added CddMSLMixedToMSLDenDiag 56 * 57 * Revision 1.46 2002/11/22 21:35:23 bauer 58 * added SeqAnnotReadFromFile and preservation of scores in DenseSeg to DenseDiag conversion 59 * 60 * Revision 1.45 2002/10/10 20:38:19 bauer 61 * changes to accomodate new spec items 62 * - old-root node 63 * - curation-status 64 * 65 * Revision 1.44 2002/10/02 17:32:21 bauer 66 * avoid merging blocks when reindexing alignments 67 * 68 * Revision 1.43 2002/08/17 11:55:08 bauer 69 * backed out changes 70 * 71 * Revision 1.42 2002/08/16 19:51:46 bauer 72 * added Ben's CddSrvGetStyle2 73 * 74 * Revision 1.41 2002/07/31 14:58:58 bauer 75 * BLAST DB Sequence Retrieval 76 * 77 * Revision 1.40 2002/07/17 18:54:11 bauer 78 * CddFeaturesAreConsistent now returns explicit error messages 79 * 80 * Revision 1.39 2002/07/10 15:34:31 bauer 81 * made SipIsConsensus public 82 * 83 * Revision 1.38 2002/07/09 21:12:40 bauer 84 * added CddDenDiagCposComp2KBP to return Karlin-Altschul parameters together with PSSM 85 * 86 * Revision 1.37 2002/07/05 21:09:26 bauer 87 * added GetAlignmentSize 88 * 89 * Revision 1.36 2002/05/06 16:59:51 bauer 90 * remove blanks in inferred CD short names 91 * 92 * Revision 1.35 2002/04/22 16:37:31 bauer 93 * added check for missing structure alignments 94 * 95 * Revision 1.34 2002/04/18 21:00:27 bauer 96 * added check CddFeaturesAreConsistent 97 * 98 * Revision 1.33 2002/04/12 14:02:43 bauer 99 * added update_date case to CddAssignDescr 100 * 101 * Revision 1.32 2002/04/11 14:34:26 bauer 102 * added CddRemoveConsensus 103 * 104 * Revision 1.31 2002/02/20 22:27:28 bauer 105 * utility functions for the CD-Server 106 * 107 * Revision 1.30 2002/02/12 23:00:47 bauer 108 * added missing break in CddRelocateSeqLoc 109 * 110 * Revision 1.29 2002/02/06 19:35:37 bauer 111 * some more CddGet.. functionality 112 * 113 * Revision 1.28 2002/02/05 23:15:41 bauer 114 * added a few CddGet.. utility functions, changes to CddAddDescr allow to extend scrapbook line by line 115 * 116 * Revision 1.27 2002/01/05 14:49:44 bauer 117 * made SeqAlignDup a local function 118 * 119 * Revision 1.26 2002/01/04 19:46:56 bauer 120 * added functions to interconvert PSSM-Ids and accessions 121 * 122 * Revision 1.25 2001/11/13 19:51:52 bauer 123 * support for annotation transfer in alignment reindexing 124 * 125 * Revision 1.24 2001/05/23 21:18:06 bauer 126 * added functions for alignment block structure control 127 * 128 * Revision 1.23 2001/04/10 20:25:57 bauer 129 * cddutil.c 130 * 131 * Revision 1.22 2001/04/10 20:18:09 bauer 132 * write out ascii-formatted mtx-Files for copymat 133 * 134 * Revision 1.21 2001/03/07 16:30:33 bauer 135 * fixed alignment reindexing bug 136 * 137 * Revision 1.20 2001/02/14 17:11:03 bauer 138 * relaced calls to BioseqCopy with CddBioseqCopy 139 * 140 * Revision 1.19 2001/02/13 20:55:10 bauer 141 * fixed bug when comparing local ids 142 * 143 * Revision 1.18 2001/02/06 22:55:35 bauer 144 * Scoring Matrix now a function parameter in CddDenDiagCposComp2 145 * 146 * Revision 1.17 2001/02/05 22:58:47 bauer 147 * added alignment reindexing function 148 * 149 * Revision 1.16 2001/01/17 21:32:02 bauer 150 * changes to PSSM calculation 151 * 152 * Revision 1.15 2001/01/12 01:31:30 bauer 153 * added data structures for alignment reindexing 154 * 155 * Revision 1.14 2001/01/12 00:17:01 bauer 156 * added routines for information content calculation 157 * 158 * Revision 1.13 2001/01/11 23:48:25 bauer 159 * added check for consensus-Cd 160 * 161 * Revision 1.12 2000/12/01 19:36:57 hurwitz 162 * added scale factor to PSSM calcs 163 * 164 * Revision 1.11 2000/11/14 22:08:44 hurwitz 165 * added weighting for pssm calculation 166 * 167 * Revision 1.10 2000/09/08 21:43:51 hurwitz 168 * adding PSSM calculation to DDE 169 * 170 * Revision 1.9 2000/09/01 21:59:12 hurwitz 171 * re-order columns from PSSM of CDs to column order expected in threading 172 * 173 * Revision 1.8 2000/08/30 21:33:55 hurwitz 174 * added new and free functions for Seq_Mtf and Qry_Seq 175 * 176 * Revision 1.7 2000/08/17 19:00:24 hurwitz 177 * fixed some codewarrior compiler warnings 178 * 179 * Revision 1.6 2000/08/14 21:52:05 hurwitz 180 * added CddCalcPSSM 181 * 182 * Revision 1.5 2000/08/14 19:36:26 hurwitz 183 * got CddCposComp working and tested 184 * 185 * Revision 1.4 2000/08/11 19:54:01 hurwitz 186 * restored CddDenDiagCposComputation and CddCposComputation to original, added CddCposComp which combines the 2 187 * 188 * Revision 1.3 2000/08/09 21:29:08 hurwitz 189 * adding cddutil.c to VC++ build 190 * 191 * Revision 1.2 2000/07/19 19:42:08 bauer 192 * added modification logging 193 * 194 * 195 * ========================================================================== 196 */ 197 198 #if !defined(CDDUTIL_H) 199 #define CDDUTIL_H 200 201 #ifdef __cplusplus 202 extern "C" { 203 #endif 204 205 #include <objcdd.h> 206 /*#include <objcn3d.h> */ 207 #include <blastdef.h> 208 #include <thrdatd.h> 209 #include <posit.h> 210 211 #define CKPTEXT ".chk" 212 #define CSEQEXT ".csq" 213 #define MTRXEXT ".mtx" 214 215 #define POSFREQ_SCALE 1000000 216 217 #define Xscore (-1) 218 219 /* column order of residue-types after PSSM is calculated from CD's */ 220 #define InputOrder "-ABCDEFGHIKLMNPQRSTVWXYZU*" 221 222 /* column order of residue-types needed for threading */ 223 /* this should match the column order in the contact potential */ 224 #define OutputOrder "ARNDCQEGHILKMFPSTWYV" 225 226 /*---------------------------------------------------------------------------*/ 227 /* Structures used in Calculation of a 50/50 Extent/Content consensus seq. */ 228 /*---------------------------------------------------------------------------*/ 229 typedef struct _cdd_ext_alignment_cell { 230 Uint1 aatype; 231 Int4 seqpos; 232 } CddExtAlignCell, PNTR CddExtAlignCellPtr; 233 234 typedef struct _cdd_ext_alignment_column { 235 Int4 conpos; 236 Int4 occpos; 237 Int4 ntypes; 238 Int4 *typecount; 239 FloatHi *wtypefreq; 240 FloatHi w_occpos; 241 Uint4 aatype; 242 } CddExtAlignCol, PNTR CddExtAlignColPtr; 243 244 typedef struct _cdd_alignment_weight { 245 FloatHi weight; 246 Int4 nposaligned; 247 BioseqPtr bsp; 248 } CddAlignWeight, PNTR CddAlignWeightPtr; 249 250 /*---------------------------------------------------------------------------*/ 251 /* Cdd Explicit Alignment, data structure used for alignment reindexing and */ 252 /* calculations on pairwise alignments */ 253 /*---------------------------------------------------------------------------*/ 254 typedef struct _cdd_explicit_alignment { 255 SeqIdPtr ids; 256 Boolean bIdAlloc; 257 Int4 length; 258 Int4 *adata; 259 Int4 *starts; 260 } CddExpAlign, PNTR CddExpAlignPtr; 261 262 typedef struct _cdd_idx_data { 263 Int4 iPssmId; 264 CharPtr cCDDid; 265 struct _cdd_idx_data PNTR next; 266 } CddIdxData, PNTR CddIdxDataPtr; 267 268 /*---------------------------------------------------------------------------*/ 269 /* removed PHI-Blast specific variables from this data structure */ 270 /*---------------------------------------------------------------------------*/ 271 typedef struct _pgp_blast_options { 272 BLAST_OptionsBlkPtr options; 273 CharPtr blast_database; 274 BioseqPtr query_bsp, fake_bsp; 275 Int4 number_of_descriptions, number_of_alignments; 276 FILE *infp, *outfp; 277 AsnIoPtr aip_out; 278 Boolean html; 279 Boolean believe_query; 280 Uint4 align_options, print_options; 281 } PGPBlastOptions, PNTR PGPBlastOptionsPtr; 282 283 284 /*---------------------------------------------------------------------------*/ 285 /* drawing style conventions for Cn3D v4.x */ 286 /*---------------------------------------------------------------------------*/ 287 /* 288 Int4 cdd_def_style[] = 289 { 290 Cn3d_backbone_type_trace, 291 Cn3d_drawing_style_tubes, 292 Cn3d_color_scheme_weighted_variety, 293 10000,5000,5000,5000,10000, 294 Cn3d_backbone_type_trace, 295 Cn3d_drawing_style_tubes, 296 Cn3d_color_scheme_molecule, 297 10000,5000,5000,5000,10000, 298 TRUE, 299 Cn3d_drawing_style_wire, 300 Cn3d_color_scheme_weighted_variety, 301 10000,5000,5000,5000,10000, 302 TRUE, 303 Cn3d_drawing_style_wire, 304 Cn3d_color_scheme_molecule, 305 10000,5000,5000,5000,10000, 306 TRUE, 307 Cn3d_drawing_style_ball_and_stick, 308 Cn3d_color_scheme_element, 309 10000,5000,5000,5000,10000, 310 FALSE, 311 Cn3d_drawing_style_ball_and_stick, 312 Cn3d_color_scheme_element, 313 10000,5000,5000,5000,10000, 314 TRUE, 315 Cn3d_drawing_style_tubes, 316 Cn3d_color_scheme_user_select, 317 10000,9000,9000,10000,10000, 318 FALSE, 319 Cn3d_drawing_style_with_arrows, 320 Cn3d_color_scheme_object, 321 10000,5000,5000,5000,10000, 322 FALSE, 323 Cn3d_drawing_style_with_arrows, 324 Cn3d_color_scheme_object, 325 10000,5000,5000,5000,10000, 326 TRUE, 327 10000,9300,5500,500,10000, 328 FALSE, 329 10000,0,0,0,10000, 330 10000, 331 10000, 332 4000, 333 2000, 334 3000, 335 3000, 336 18000, 337 20000, 338 5000, 339 0, 340 Cn3d_backbone_label_style_type_three_letter, 341 Cn3d_backbone_label_style_number_sequential, 342 FALSE, 343 TRUE, 344 0, 345 Cn3d_backbone_label_style_type_three_letter, 346 Cn3d_backbone_label_style_number_sequential, 347 FALSE, 348 TRUE, 349 TRUE 350 }; 351 Int4 cdd_evidence_style[] = 352 { 353 Cn3d_backbone_type_trace, 354 Cn3d_drawing_style_tubes, 355 Cn3d_color_scheme_user_select, 356 10000,0,10000,0,10000, 357 Cn3d_backbone_type_complete, 358 Cn3d_drawing_style_ball_and_stick, 359 Cn3d_color_scheme_user_select, 360 10000,0,5019,10000,10000, 361 TRUE, 362 Cn3d_drawing_style_tubes, 363 Cn3d_color_scheme_user_select, 364 10000,0,10000,0,10000, 365 TRUE, 366 Cn3d_drawing_style_ball_and_stick, 367 Cn3d_color_scheme_user_select, 368 10000,10000,5019,0,10000, 369 TRUE, 370 Cn3d_drawing_style_space_fill, 371 Cn3d_color_scheme_user_select, 372 10000,10000,5019,0,10000, 373 TRUE, 374 Cn3d_drawing_style_ball_and_stick, 375 Cn3d_color_scheme_user_select, 376 10000,10000,5019,0,10000, 377 TRUE, 378 Cn3d_drawing_style_tubes, 379 Cn3d_color_scheme_user_select, 380 10000,9000,9000,10000,10000, 381 FALSE, 382 Cn3d_drawing_style_with_arrows, 383 Cn3d_color_scheme_object, 384 10000,5000,5000,5000,10000, 385 FALSE, 386 Cn3d_drawing_style_with_arrows, 387 Cn3d_color_scheme_object, 388 10000,5000,5000,5000,10000, 389 TRUE, 390 10000,9300,5500,500,10000, 391 FALSE, 392 10000,0,0,0,10000, 393 10000, 394 10000, 395 4000, 396 2000, 397 3000, 398 3000, 399 18000, 400 20000, 401 5000, 402 0, 403 Cn3d_backbone_label_style_type_three_letter, 404 Cn3d_backbone_label_style_number_sequential, 405 FALSE, 406 TRUE, 407 0, 408 Cn3d_backbone_label_style_type_three_letter, 409 Cn3d_backbone_label_style_number_sequential, 410 FALSE, 411 TRUE, 412 TRUE 413 }; 414 */ 415 416 /*---------------------------------------------------------------------------*/ 417 /*---------------------------------------------------------------------------*/ 418 /* Function prototypes */ 419 /*---------------------------------------------------------------------------*/ 420 /*---------------------------------------------------------------------------*/ 421 422 /*---------------------------------------------------------------------------*/ 423 /* Cdd asn1 reader and writer wrappers */ 424 /*---------------------------------------------------------------------------*/ 425 Boolean LIBCALL CddWriteToFile(CddPtr pcdd, CharPtr cFile, Boolean bBin); 426 CddPtr LIBCALL CddReadFromFile(CharPtr cFile, Boolean bBin); 427 SeqAnnotPtr LIBCALL SeqAnnotReadFromFile(CharPtr cFile, Boolean bBin); 428 SeqAlignPtr LIBCALL SeqAlignReadFromFile(CharPtr cFile, Boolean bBin); 429 430 Boolean LIBCALL CddTreeWriteToFile(CddTreePtr pcddt, CharPtr cFile, Boolean bBin); 431 CddTreePtr LIBCALL CddTreeReadFromFile(CharPtr cFile, Boolean bBin); 432 433 /*---------------------------------------------------------------------------*/ 434 /* Cdd Data manipulations and queries */ 435 /*---------------------------------------------------------------------------*/ 436 void LIBCALL CddAssignDescr(CddPtr pcdd, Pointer pThis, Int4 iWhat, Int4 iIval); 437 Boolean LIBCALL CddKillDescr(CddPtr pcdd, Pointer pThis, Int4 iWhat, Int4 iIval); 438 CharPtr LIBCALL CddGetAccession(CddPtr pcdd); 439 Int4 LIBCALL CddGetVersion(CddPtr pcdd); 440 OrgRefPtr LIBCALL CddGetOrgRef(CddPtr pcdd); 441 Int4 LIBCALL CddGetPssmId(CddPtr pcdd); 442 Int4 LIBCALL CddGetPmIds(CddPtr pcdd, Int4Ptr iPMids); 443 CharPtr LIBCALL CddGetDescr(CddPtr pcdd); 444 DatePtr LIBCALL CddGetCreateDate(CddPtr pcdd); 445 DatePtr LIBCALL CddGetUpdateDate(CddPtr pcdd); 446 CharPtr LIBCALL CddGetSource(CddPtr pcdd); 447 CharPtr LIBCALL CddGetSourceId(CddPtr pcdd); 448 Int4 LIBCALL CddGetStatus(CddPtr pcdd); 449 Int4 LIBCALL CddGetAlignmentLength(CddPtr pcdd); 450 Int4Ptr LIBCALL GetAlignmentSize(SeqAlignPtr salp); 451 ValNodePtr LIBCALL CddGetAnnotNames(CddPtr pcdd); 452 Boolean LIBCALL CddHasDescription(CddPtr pcdd, CharPtr pc); 453 Boolean LIBCALL CddHasAnnotation(CddPtr pcdd); 454 Boolean LIBCALL CddMasterIs3D(CddPtr pcdd); 455 Int4 LIBCALL CddCount3DAlignments(CddPtr pcdd); 456 Boolean LIBCALL SeqAlignHasConsensus(SeqAlignPtr salp); 457 Boolean LIBCALL SipIsConsensus(SeqIdPtr sip); 458 Boolean LIBCALL CddHasConsensus(CddPtr pcdd); 459 void LIBCALL CddRegularizeFileName(CharPtr cIn, CharPtr cAcc, CharPtr cFn, CharPtr cEx); 460 Boolean LIBCALL CddCheckForRepeats(CddPtr pcdd, Int4 width, Int4 GapI, Int4 GapE, 461 Nlm_FloatHi rthresh, Boolean bOutput); 462 void LIBCALL CddTruncStringAtFirstPunct(CharPtr pChar); 463 void LIBCALL CddFillBlanksInString(CharPtr pChar); 464 Boolean LIBCALL CddFeaturesAreConsistent(CddPtr pcdd, CharPtr errmsg); 465 Boolean LIBCALL CddHas3DSuperpos(CddPtr pcdd); 466 Boolean LIBCALL CddHasPendingAlignments(CddPtr pcdd); 467 CddPtr LIBCALL CddFreeCarefully(CddPtr pcdd); 468 469 /*---------------------------------------------------------------------------*/ 470 /* report Errors in processing and exit immediately */ 471 /*---------------------------------------------------------------------------*/ 472 void LIBCALL CddSimpleHtmlError(CharPtr cErrTxt); 473 void LIBCALL CddSevError(CharPtr cErrTxt); 474 475 /*---------------------------------------------------------------------------*/ 476 /* extract BioSeqs from the list stored in the CD */ 477 /*---------------------------------------------------------------------------*/ 478 BioseqPtr LIBCALL CddFindSip(SeqIdPtr sip, SeqEntryPtr sep); 479 BioseqPtr LIBCALL CddBioseqCopy(SeqIdPtr newid, BioseqPtr oldbsp, Int4 from, 480 Int4 to, Uint1 strand, Boolean do_feat); 481 BioseqPtr LIBCALL CddExtractBioseq(SeqEntryPtr sep, SeqIdPtr sip); 482 void LIBCALL CddShrinkBioseq(BioseqPtr bsp); 483 SeqAnnotPtr LIBCALL CddFindMMDBIdInBioseq(BioseqPtr bsp, Int4 *iMMDBid); 484 485 /*---------------------------------------------------------------------------*/ 486 /* Cdd specific sequence alignment format converters */ 487 /*---------------------------------------------------------------------------*/ 488 SeqAlignPtr LIBCALL CddMSLMixedToMSLDenDiag(SeqAlignPtr salp); 489 SeqAlignPtr LIBCALL CddMSLDenDiagToMSLDenSeg(SeqAlignPtr salp); 490 SeqAlignPtr LIBCALL CddMSLDenSegToMSLDenDiag(SeqAlignPtr salp); 491 SeqAlignPtr LIBCALL CddMSLDenDiagToMULDenDiag(SeqAlignPtr salp); 492 Int2 LIBCALL CddTrimSeqAligns(CddPtr pcdd); 493 494 /*---------------------------------------------------------------------------*/ 495 /* various routines for calculating PSSM/Alignment information content */ 496 /*---------------------------------------------------------------------------*/ 497 Nlm_FloatHi LIBCALL SeqAlignConservation(SeqAlignPtr salp, Nlm_FloatHi fract,BioseqPtr bsp_master, Boolean bHasConsensus, Int4 offset); 498 Nlm_FloatHiPtr LIBCALL SeqAlignInform(SeqAlignPtr salp, BioseqPtr bsp_master,Boolean bHasConsensus,Int4 offset); 499 Int4 ** LIBCALL CddCountResTypes(CddPtr pcdd, Int4 *ncols); 500 Nlm_FloatHiPtr LIBCALL CddAlignInform(CddPtr pcdd, Nlm_FloatHi * Niobs); 501 Nlm_FloatHiPtr LIBCALL CddPssmInform(CddPtr pcdd); 502 Nlm_FloatHiPtr LIBCALL CddPosFreqInform(Nlm_FloatHi **posFreq, Int4 ncol, Int4 nrow); 503 504 /*---------------------------------------------------------------------------*/ 505 /* Create a BlastOptionsPtr for the PSSM Computations */ 506 /*---------------------------------------------------------------------------*/ 507 static PGPBlastOptionsPtr CddReadBlastOptions(BioseqPtr bsp, Int4 iPseudo, CharPtr matrix_name); 508 509 /*---------------------------------------------------------------------------*/ 510 /* Calculate PSSMs for DenseDiag (or DenseSeg) Master_Slave alignment sets */ 511 /*---------------------------------------------------------------------------*/ 512 static void BlastKarlinBlkCopy(BLAST_KarlinBlkPtr kbp_in,BLAST_KarlinBlkPtr kbp_out); 513 void LIBCALL CddDenDiagCposComputation(SeqAlignPtr listOfSeqAligns, BioseqPtr bsp, 514 BioseqPtr bspF, CddPtr pcdd, Int4 pseudoCnt); 515 void LIBCALL CddCposComputation(SeqAlignPtr listOfSeqAligns, BioseqPtr bsp, CddPtr pcdd); 516 static void CddputMatrixKbp(FILE *checkFile, BLAST_KarlinBlkPtr kbp, 517 Boolean scaling, Nlm_FloatHi scalingDown); 518 static void CddputMatrixMatrix(FILE *checkFile, 519 compactSearchItems *compactSearch, 520 posSearchItems *posSearch, 521 Boolean scaleScores); 522 static Boolean CddtakeMatrixCheckpoint(compactSearchItems *compactSearch, 523 posSearchItems *posSearch, 524 BLAST_ScoreBlkPtr sbp, 525 Char *fileName,ValNodePtr *error_return, 526 Boolean scaleScores, 527 Nlm_FloatHi scalingFactor); 528 /*---------------------------------------------------------------------------*/ 529 /* calculate a PSSM */ 530 /*---------------------------------------------------------------------------*/ 531 Seq_Mtf * LIBCALL CddDenDiagCposComp2(BioseqPtr bspFake, Int4 iPseudo, 532 SeqAlignPtr salp, CddPtr pcdd, 533 BioseqPtr bspOut, double Weight, 534 double ScaleFactor, CharPtr matrix_name); 535 /*---------------------------------------------------------------------------*/ 536 /* calculate a PSSM and also fill the kbp data structure. Must be initialized*/ 537 /* with BlastKarlinBlkCreate(); */ 538 /*---------------------------------------------------------------------------*/ 539 Seq_Mtf * LIBCALL CddDenDiagCposComp2KBP(BioseqPtr bspFake, Int4 iPseudo, 540 SeqAlignPtr salp, CddPtr pcdd, 541 BioseqPtr bspOut, double Weight, 542 double ScaleFactor, CharPtr matrix_name, 543 BLAST_KarlinBlkPtr kbp); 544 545 /*---------------------------------------------------------------------------*/ 546 /* this function combines CddCposComputation and CddDenDiagCposComputation */ 547 /* into 1 function. no need to calculate fake bsp beforehand. */ 548 /*---------------------------------------------------------------------------*/ 549 void LIBCALL CddCposComp(SeqAlignPtr listOfSeqAligns, BioseqPtr bsp, CddPtr pcdd); 550 551 /*---------------------------------------------------------------------------*/ 552 /* same as CddCposComp, except pssm is put in a Seq_Mtf instead of a Cdd */ 553 /*---------------------------------------------------------------------------*/ 554 Seq_Mtf* LIBCALL CddCalcPSSM(SeqAlignPtr listOfSeqAligns, BioseqPtr bsp, 555 double Weight, double ScaleFactor); 556 Int4 LIBCALL CddGetNewIndexForThreading(char InChar, char* Output); 557 558 /*---------------------------------------------------------------------------*/ 559 /* Utility Functions for PSSM calculation */ 560 /*---------------------------------------------------------------------------*/ 561 Int4 LIBCALL CddCountSeqAligns(SeqAlignPtr listOfSeqAligns, Int4 * numSequences); 562 Int4 LIBCALL CddCountDenDiagSeqAligns(SeqAlignPtr listOfSeqAligns, Int4 * numSequences); 563 564 /*---------------------------------------------------------------------------*/ 565 /* assign the range of the master sequence involved in alignments */ 566 /*---------------------------------------------------------------------------*/ 567 void LIBCALL CddAssignProfileRange(CddPtr pcdd, SeqIdPtr sip); 568 569 /*---------------------------------------------------------------------------*/ 570 /* return a pointer to a specific bioseq from a seq-entry set */ 571 /*---------------------------------------------------------------------------*/ 572 BioseqPtr LIBCALL CddFindBioseqInSeqEntry(SeqEntryPtr sep, SeqIdPtr sip); 573 574 /*---------------------------------------------------------------------------*/ 575 /* Are two Sequence-Id's the same? */ 576 /*---------------------------------------------------------------------------*/ 577 Boolean LIBCALL CddSameSip(SeqIdPtr sip1, SeqIdPtr sip2); 578 579 /*---------------------------------------------------------------------------*/ 580 /* add an offset to a Seqalign */ 581 /*---------------------------------------------------------------------------*/ 582 void LIBCALL CddReindexMSLDenSegMaster(SeqAlignPtr salp, Int4 offset); 583 void LIBCALL CddReindexMSLDenDiagMaster(SeqAlignPtr salp, Int4 offset); 584 585 /*---------------------------------------------------------------------------*/ 586 /* Transfer alignment annotation between sequences in the alignment */ 587 /* and return a list detailing a sequence location in residue numbers */ 588 /*---------------------------------------------------------------------------*/ 589 static SeqIdPtr CddFindSeqIdInSeqLoc(SeqLocPtr location); 590 Int4Ptr LIBCALL CddGetFeatLocList(SeqLocPtr location, Int4 *nres); 591 static void CddRelocateSeqLoc(SeqLocPtr location, SeqIdPtr sip, Int4 *ali); 592 static Int4 CddSeqLocInExpAlign(SeqLocPtr location, CddExpAlignPtr eap); 593 static SeqIdPtr CddFindSeqIdInAlignAnnot(AlignAnnotPtr oldannot); 594 void LIBCALL CddTransferAlignAnnot(AlignAnnotPtr oldannot, 595 SeqIdPtr newMaster, 596 SeqAlignPtr salp, 597 BioseqSetPtr bssp); 598 599 /*---------------------------------------------------------------------------*/ 600 /* reindex a Seqalign to a new "Master" */ 601 /*---------------------------------------------------------------------------*/ 602 SeqAlignPtr LIBCALL CddReindexSeqAlign(SeqAlignPtr salp, SeqIdPtr sipMaster, 603 BioseqSetPtr bssp); 604 605 /*---------------------------------------------------------------------------*/ 606 /* Make a copy of a DenDiag Master-Slave list alignment */ 607 /*---------------------------------------------------------------------------*/ 608 SeqAlignPtr LIBCALL CddCopyMSLDenDiag(SeqAlignPtr salp); 609 610 /*---------------------------------------------------------------------------*/ 611 /* Utility functions for Alignment Reindexing and pairwise comparisons */ 612 /*---------------------------------------------------------------------------*/ 613 CddExpAlignPtr CddExpAlignNew(); 614 CddExpAlignPtr CddExpAlignFree(CddExpAlignPtr pCDea); 615 void CddExpAlignAlloc(CddExpAlignPtr pCDea, Int4 iLength); 616 CddExpAlignPtr CddExpAlignRevert(CddExpAlignPtr pCDea, Int4 iLength); 617 CddExpAlignPtr CddReindexExpAlign(CddExpAlignPtr pCDea1, Int4 newlength, CddExpAlignPtr pCDea2, Int4 iOuter, Int4 iInner); 618 CddExpAlignPtr LIBCALL SeqAlignToCddExpAlign(SeqAlignPtr salp, SeqEntryPtr sep); 619 CddExpAlignPtr InvertCddExpAlign(CddExpAlignPtr pCDea, SeqEntryPtr sep); 620 SeqAlignPtr CddExpAlignToSeqAlign(CddExpAlignPtr pCDea, Int4Ptr iBreakAfter); 621 Int2 LIBCALL CddGetProperBlocks(CddPtr pcdd, Boolean modify, Int4Ptr *iBreakAfter); 622 FloatHi CddGetPairId(TrianglePtr pTri, Int4 idx1, Int4 idx2); 623 static Boolean HitYet(Int4Ptr retlist, Int4 index, Int4 i); 624 Int4Ptr CddMostDiverse(TrianglePtr pTri, Int4 length, Int4 maxdiv); 625 Int4Ptr CddMostSimilarToQuery(ScorePtr psc, Int4 length); 626 BioseqPtr CddRetrieveBioseqById(SeqIdPtr sip, SeqEntryPtr sep); 627 TrianglePtr CddCalculateTriangle(CddPtr pcdd); 628 ScorePtr CddCalculateQuerySim(CddPtr pcdd, SeqAlignPtr salp); 629 630 /*---------------------------------------------------------------------------*/ 631 /* rips out and returns a PDBSeqId from a SeqId */ 632 /*---------------------------------------------------------------------------*/ 633 PDBSeqIdPtr LIBCALL CddGetPdbSeqId(SeqIdPtr sip); 634 635 /*---------------------------------------------------------------------------*/ 636 /* Make a Consensus Sequence and reindex alignment */ 637 /*---------------------------------------------------------------------------*/ 638 SeqAnnotPtr LIBCALL CddSeqAnnotForSeqAlign (SeqAlignPtr salp); 639 SeqAlignPtr LIBCALL CddSeqAlignDup (SeqAlignPtr salp); 640 SeqAlignPtr LIBCALL SeqAlignSetDup(SeqAlignPtr salp); 641 void LIBCALL CddDegapSeqAlign(SeqAlignPtr salp); 642 SeqIdPtr LIBCALL CddSeqIdDupPDBGI(SeqIdPtr sipold); 643 SeqAlignPtr LIBCALL CddConsensus(SeqAlignPtr salp, SeqEntryPtr sep, BioseqPtr bsp, SeqIntPtr range, BioseqPtr *bspCons, SeqAlignPtr *salpCons); 644 Boolean LIBCALL CddRemoveConsensus(CddPtr pcdd); 645 646 /*---------------------------------------------------------------------------*/ 647 /* dump out CD contents used for Entrez indexing in pseudo XML */ 648 /*---------------------------------------------------------------------------*/ 649 static CharPtr CddFixForXML(CharPtr pc); 650 void LIBCALL CddDumpPMLinks(CddPtr pcdd, FILE *FP); 651 void LIBCALL CddDumpTaxLinks(CddPtr pcdd, FILE *FP); 652 void LIBCALL CddDumpXML(CddPtr pcdd, FILE *FP); 653 654 /*---------------------------------------------------------------------------*/ 655 /* conversions between PSSM-Ids and CDD accessions */ 656 /*---------------------------------------------------------------------------*/ 657 CddIdxDataPtr LIBCALL CddIdxDataNew(CharPtr acc, Int4 uid); 658 CddIdxDataPtr LIBCALL CddIdxDataLink(CddIdxDataPtr PNTR head, CddIdxDataPtr cidp); 659 CddIdxDataPtr LIBCALL CddReadIdx(CharPtr CDDidx); 660 void LIBCALL CddAccFromPssmId(Int4 iPssmId, CharPtr cAcc, CharPtr CDDidx); 661 void LIBCALL CddPssmIdFromAcc(Int4 *iPssmId, CharPtr cAcc, CharPtr CDDidx); 662 663 /*---------------------------------------------------------------------------*/ 664 /* Bioseq retrieval from BLAST db - contributed by Ben */ 665 /*---------------------------------------------------------------------------*/ 666 Boolean LIBCALL SeqHasTax(BioseqPtr bsp); 667 BioseqPtr LIBCALL CddReadDBGetBioseq(SeqIdPtr query, Int4 index, ReadDBFILEPtr rdfp); 668 BioseqPtr LIBCALL CddReadDBGetBioseqEx(SeqIdPtr query, Int4 index, ReadDBFILEPtr rdfp, Boolean bUseObjMgr); 669 670 /*---------------------------------------------------------------------------*/ 671 /* setting styles for Cn3D v4.x */ 672 /*---------------------------------------------------------------------------*/ 673 /* 674 Cn3dStyleDictionaryPtr LIBCALL CddSrvGetStyle2(Int4 *styles[], Int4 nstyles); 675 static Cn3dStyleSettingsPtr CddSrvGetStyle2_Ex(Int4 style[]); 676 static Cn3dColorPtr MyCn3dColorInit(Int4 scale_factor, Int4 red, Int4 green, Int4 blue,Int4 alpha); 677 */ 678 679 #ifdef __cplusplus 680 } 681 #endif 682 683 #endif /* CDDUTIL_H */ 684