1 /* $Id: cddutil.h,v 1.52 2003/12/09 22:21:35 bauer Exp $
2 *===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  cddutil.h
27 *
28 * Author:  Aron Marchler-Bauer
29 *
30 * Initial Version Creation Date: 12/15/1999
31 *
32 * $Revision: 1.52 $
33 *
34 * File Description: Header file for cdd api utility functions
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * $Log: cddutil.h,v $
39 * Revision 1.52  2003/12/09 22:21:35  bauer
40 * added CddCountResTypes
41 *
42 * Revision 1.51  2003/08/25 19:09:47  bauer
43 * added SeqAlignReadFromFile
44 *
45 * Revision 1.50  2003/05/21 17:25:21  bauer
46 * optional ObjMgr in CddReadDBGetBioseq
47 *
48 * Revision 1.49  2003/04/25 14:36:20  bauer
49 * impalaScaling now returns boolean value
50 *
51 * Revision 1.48  2003/02/06 21:04:27  bauer
52 * fixed bug in reindexing to consensus
53 *
54 * Revision 1.47  2002/12/03 14:36:31  bauer
55 * added CddMSLMixedToMSLDenDiag
56 *
57 * Revision 1.46  2002/11/22 21:35:23  bauer
58 * added SeqAnnotReadFromFile and preservation of scores in DenseSeg to DenseDiag conversion
59 *
60 * Revision 1.45  2002/10/10 20:38:19  bauer
61 * changes to accomodate new spec items
62 * - old-root node
63 * - curation-status
64 *
65 * Revision 1.44  2002/10/02 17:32:21  bauer
66 * avoid merging blocks when reindexing alignments
67 *
68 * Revision 1.43  2002/08/17 11:55:08  bauer
69 * backed out changes
70 *
71 * Revision 1.42  2002/08/16 19:51:46  bauer
72 * added Ben's CddSrvGetStyle2
73 *
74 * Revision 1.41  2002/07/31 14:58:58  bauer
75 * BLAST DB Sequence Retrieval
76 *
77 * Revision 1.40  2002/07/17 18:54:11  bauer
78 * CddFeaturesAreConsistent now returns explicit error messages
79 *
80 * Revision 1.39  2002/07/10 15:34:31  bauer
81 * made SipIsConsensus public
82 *
83 * Revision 1.38  2002/07/09 21:12:40  bauer
84 * added CddDenDiagCposComp2KBP to return Karlin-Altschul parameters together with PSSM
85 *
86 * Revision 1.37  2002/07/05 21:09:26  bauer
87 * added GetAlignmentSize
88 *
89 * Revision 1.36  2002/05/06 16:59:51  bauer
90 * remove blanks in inferred CD short names
91 *
92 * Revision 1.35  2002/04/22 16:37:31  bauer
93 * added check for missing structure alignments
94 *
95 * Revision 1.34  2002/04/18 21:00:27  bauer
96 * added check CddFeaturesAreConsistent
97 *
98 * Revision 1.33  2002/04/12 14:02:43  bauer
99 * added update_date case to CddAssignDescr
100 *
101 * Revision 1.32  2002/04/11 14:34:26  bauer
102 * added CddRemoveConsensus
103 *
104 * Revision 1.31  2002/02/20 22:27:28  bauer
105 * utility functions for the CD-Server
106 *
107 * Revision 1.30  2002/02/12 23:00:47  bauer
108 * added missing break in CddRelocateSeqLoc
109 *
110 * Revision 1.29  2002/02/06 19:35:37  bauer
111 * some more CddGet.. functionality
112 *
113 * Revision 1.28  2002/02/05 23:15:41  bauer
114 * added a few CddGet.. utility functions, changes to CddAddDescr allow to extend scrapbook line by line
115 *
116 * Revision 1.27  2002/01/05 14:49:44  bauer
117 * made SeqAlignDup a local function
118 *
119 * Revision 1.26  2002/01/04 19:46:56  bauer
120 * added functions to interconvert PSSM-Ids and accessions
121 *
122 * Revision 1.25  2001/11/13 19:51:52  bauer
123 * support for annotation transfer in alignment reindexing
124 *
125 * Revision 1.24  2001/05/23 21:18:06  bauer
126 * added functions for alignment block structure control
127 *
128 * Revision 1.23  2001/04/10 20:25:57  bauer
129 * cddutil.c
130 *
131 * Revision 1.22  2001/04/10 20:18:09  bauer
132 * write out ascii-formatted mtx-Files for copymat
133 *
134 * Revision 1.21  2001/03/07 16:30:33  bauer
135 * fixed alignment reindexing bug
136 *
137 * Revision 1.20  2001/02/14 17:11:03  bauer
138 * relaced calls to BioseqCopy with CddBioseqCopy
139 *
140 * Revision 1.19  2001/02/13 20:55:10  bauer
141 * fixed bug when comparing local ids
142 *
143 * Revision 1.18  2001/02/06 22:55:35  bauer
144 * Scoring Matrix now a function parameter in CddDenDiagCposComp2
145 *
146 * Revision 1.17  2001/02/05 22:58:47  bauer
147 * added alignment reindexing function
148 *
149 * Revision 1.16  2001/01/17 21:32:02  bauer
150 * changes to PSSM calculation
151 *
152 * Revision 1.15  2001/01/12 01:31:30  bauer
153 * added data structures for alignment reindexing
154 *
155 * Revision 1.14  2001/01/12 00:17:01  bauer
156 * added routines for information content calculation
157 *
158 * Revision 1.13  2001/01/11 23:48:25  bauer
159 * added check for consensus-Cd
160 *
161 * Revision 1.12  2000/12/01 19:36:57  hurwitz
162 * added scale factor to PSSM calcs
163 *
164 * Revision 1.11  2000/11/14 22:08:44  hurwitz
165 * added weighting for pssm calculation
166 *
167 * Revision 1.10  2000/09/08 21:43:51  hurwitz
168 * adding PSSM calculation to DDE
169 *
170 * Revision 1.9  2000/09/01 21:59:12  hurwitz
171 * re-order columns from PSSM of CDs to column order expected in threading
172 *
173 * Revision 1.8  2000/08/30 21:33:55  hurwitz
174 * added new and free functions for Seq_Mtf and Qry_Seq
175 *
176 * Revision 1.7  2000/08/17 19:00:24  hurwitz
177 * fixed some codewarrior compiler warnings
178 *
179 * Revision 1.6  2000/08/14 21:52:05  hurwitz
180 * added CddCalcPSSM
181 *
182 * Revision 1.5  2000/08/14 19:36:26  hurwitz
183 * got CddCposComp working and tested
184 *
185 * Revision 1.4  2000/08/11 19:54:01  hurwitz
186 * restored CddDenDiagCposComputation and CddCposComputation to original, added CddCposComp which combines the 2
187 *
188 * Revision 1.3  2000/08/09 21:29:08  hurwitz
189 * adding cddutil.c to VC++ build
190 *
191 * Revision 1.2  2000/07/19 19:42:08  bauer
192 * added modification logging
193 *
194 *
195 * ==========================================================================
196 */
197 
198 #if !defined(CDDUTIL_H)
199 #define CDDUTIL_H
200 
201 #ifdef __cplusplus
202 extern "C" {
203 #endif
204 
205 #include <objcdd.h>
206 /*#include <objcn3d.h> */
207 #include <blastdef.h>
208 #include <thrdatd.h>
209 #include <posit.h>
210 
211 #define CKPTEXT ".chk"
212 #define CSEQEXT ".csq"
213 #define MTRXEXT ".mtx"
214 
215 #define POSFREQ_SCALE 1000000
216 
217 #define Xscore (-1)
218 
219 /* column order of residue-types after PSSM is calculated from CD's */
220 #define InputOrder "-ABCDEFGHIKLMNPQRSTVWXYZU*"
221 
222 /* column order of residue-types needed for threading */
223 /* this should match the column order in the contact potential */
224 #define OutputOrder "ARNDCQEGHILKMFPSTWYV"
225 
226 /*---------------------------------------------------------------------------*/
227 /* Structures used in Calculation of a 50/50 Extent/Content consensus seq.   */
228 /*---------------------------------------------------------------------------*/
229 typedef struct _cdd_ext_alignment_cell {
230   Uint1     aatype;
231   Int4      seqpos;
232 } CddExtAlignCell, PNTR CddExtAlignCellPtr;
233 
234 typedef struct _cdd_ext_alignment_column {
235   Int4      conpos;
236   Int4      occpos;
237   Int4      ntypes;
238   Int4      *typecount;
239   FloatHi   *wtypefreq;
240   FloatHi   w_occpos;
241   Uint4     aatype;
242 } CddExtAlignCol, PNTR CddExtAlignColPtr;
243 
244 typedef struct _cdd_alignment_weight {
245   FloatHi   weight;
246   Int4      nposaligned;
247   BioseqPtr bsp;
248 } CddAlignWeight, PNTR CddAlignWeightPtr;
249 
250 /*---------------------------------------------------------------------------*/
251 /* Cdd Explicit Alignment, data structure used for alignment reindexing and  */
252 /* calculations on pairwise alignments                                       */
253 /*---------------------------------------------------------------------------*/
254 typedef struct _cdd_explicit_alignment {
255   SeqIdPtr     ids;
256   Boolean      bIdAlloc;
257   Int4         length;
258   Int4         *adata;
259   Int4         *starts;
260 } CddExpAlign, PNTR CddExpAlignPtr;
261 
262 typedef struct _cdd_idx_data {
263   Int4     iPssmId;
264   CharPtr  cCDDid;
265   struct _cdd_idx_data PNTR next;
266 } CddIdxData, PNTR CddIdxDataPtr;
267 
268 /*---------------------------------------------------------------------------*/
269 /* removed PHI-Blast specific variables from this data structure             */
270 /*---------------------------------------------------------------------------*/
271 typedef struct _pgp_blast_options {
272     BLAST_OptionsBlkPtr options;
273     CharPtr blast_database;
274     BioseqPtr query_bsp, fake_bsp;
275     Int4 number_of_descriptions, number_of_alignments;
276     FILE *infp, *outfp;
277     AsnIoPtr aip_out;
278     Boolean html;
279     Boolean believe_query;
280     Uint4 align_options, print_options;
281 } PGPBlastOptions, PNTR PGPBlastOptionsPtr;
282 
283 
284 /*---------------------------------------------------------------------------*/
285 /* drawing style conventions for Cn3D v4.x                                   */
286 /*---------------------------------------------------------------------------*/
287 /*
288 Int4 cdd_def_style[] =
289 {
290     Cn3d_backbone_type_trace,
291     Cn3d_drawing_style_tubes,
292      Cn3d_color_scheme_weighted_variety,
293     10000,5000,5000,5000,10000,
294     Cn3d_backbone_type_trace,
295     Cn3d_drawing_style_tubes,
296     Cn3d_color_scheme_molecule,
297     10000,5000,5000,5000,10000,
298     TRUE,
299     Cn3d_drawing_style_wire,
300     Cn3d_color_scheme_weighted_variety,
301     10000,5000,5000,5000,10000,
302     TRUE,
303     Cn3d_drawing_style_wire,
304     Cn3d_color_scheme_molecule,
305     10000,5000,5000,5000,10000,
306     TRUE,
307     Cn3d_drawing_style_ball_and_stick,
308     Cn3d_color_scheme_element,
309     10000,5000,5000,5000,10000,
310     FALSE,
311     Cn3d_drawing_style_ball_and_stick,
312     Cn3d_color_scheme_element,
313     10000,5000,5000,5000,10000,
314     TRUE,
315     Cn3d_drawing_style_tubes,
316     Cn3d_color_scheme_user_select,
317     10000,9000,9000,10000,10000,
318     FALSE,
319     Cn3d_drawing_style_with_arrows,
320     Cn3d_color_scheme_object,
321     10000,5000,5000,5000,10000,
322     FALSE,
323     Cn3d_drawing_style_with_arrows,
324     Cn3d_color_scheme_object,
325     10000,5000,5000,5000,10000,
326     TRUE,
327     10000,9300,5500,500,10000,
328     FALSE,
329     10000,0,0,0,10000,
330     10000,
331     10000,
332     4000,
333     2000,
334     3000,
335     3000,
336     18000,
337     20000,
338     5000,
339     0,
340     Cn3d_backbone_label_style_type_three_letter,
341     Cn3d_backbone_label_style_number_sequential,
342     FALSE,
343     TRUE,
344     0,
345     Cn3d_backbone_label_style_type_three_letter,
346     Cn3d_backbone_label_style_number_sequential,
347     FALSE,
348     TRUE,
349     TRUE
350 };
351 Int4 cdd_evidence_style[] =
352 {
353     Cn3d_backbone_type_trace,
354     Cn3d_drawing_style_tubes,
355     Cn3d_color_scheme_user_select,
356     10000,0,10000,0,10000,
357     Cn3d_backbone_type_complete,
358     Cn3d_drawing_style_ball_and_stick,
359     Cn3d_color_scheme_user_select,
360     10000,0,5019,10000,10000,
361     TRUE,
362     Cn3d_drawing_style_tubes,
363     Cn3d_color_scheme_user_select,
364     10000,0,10000,0,10000,
365     TRUE,
366     Cn3d_drawing_style_ball_and_stick,
367     Cn3d_color_scheme_user_select,
368     10000,10000,5019,0,10000,
369     TRUE,
370     Cn3d_drawing_style_space_fill,
371     Cn3d_color_scheme_user_select,
372     10000,10000,5019,0,10000,
373     TRUE,
374     Cn3d_drawing_style_ball_and_stick,
375     Cn3d_color_scheme_user_select,
376     10000,10000,5019,0,10000,
377     TRUE,
378     Cn3d_drawing_style_tubes,
379     Cn3d_color_scheme_user_select,
380     10000,9000,9000,10000,10000,
381     FALSE,
382     Cn3d_drawing_style_with_arrows,
383     Cn3d_color_scheme_object,
384     10000,5000,5000,5000,10000,
385     FALSE,
386     Cn3d_drawing_style_with_arrows,
387     Cn3d_color_scheme_object,
388     10000,5000,5000,5000,10000,
389     TRUE,
390     10000,9300,5500,500,10000,
391     FALSE,
392     10000,0,0,0,10000,
393     10000,
394     10000,
395     4000,
396     2000,
397     3000,
398     3000,
399     18000,
400     20000,
401     5000,
402     0,
403     Cn3d_backbone_label_style_type_three_letter,
404     Cn3d_backbone_label_style_number_sequential,
405     FALSE,
406     TRUE,
407     0,
408     Cn3d_backbone_label_style_type_three_letter,
409     Cn3d_backbone_label_style_number_sequential,
410     FALSE,
411     TRUE,
412     TRUE
413 };
414 */
415 
416 /*---------------------------------------------------------------------------*/
417 /*---------------------------------------------------------------------------*/
418 /* Function prototypes                                                       */
419 /*---------------------------------------------------------------------------*/
420 /*---------------------------------------------------------------------------*/
421 
422 /*---------------------------------------------------------------------------*/
423 /* Cdd asn1 reader and writer wrappers                                       */
424 /*---------------------------------------------------------------------------*/
425 Boolean     LIBCALL CddWriteToFile(CddPtr pcdd, CharPtr cFile, Boolean bBin);
426 CddPtr      LIBCALL CddReadFromFile(CharPtr cFile, Boolean bBin);
427 SeqAnnotPtr LIBCALL SeqAnnotReadFromFile(CharPtr cFile, Boolean bBin);
428 SeqAlignPtr LIBCALL SeqAlignReadFromFile(CharPtr cFile, Boolean bBin);
429 
430 Boolean     LIBCALL CddTreeWriteToFile(CddTreePtr pcddt, CharPtr cFile, Boolean bBin);
431 CddTreePtr  LIBCALL CddTreeReadFromFile(CharPtr cFile, Boolean bBin);
432 
433 /*---------------------------------------------------------------------------*/
434 /* Cdd Data manipulations and queries                                        */
435 /*---------------------------------------------------------------------------*/
436 void       LIBCALL CddAssignDescr(CddPtr pcdd, Pointer pThis, Int4 iWhat, Int4 iIval);
437 Boolean    LIBCALL CddKillDescr(CddPtr pcdd, Pointer pThis, Int4 iWhat, Int4 iIval);
438 CharPtr    LIBCALL CddGetAccession(CddPtr pcdd);
439 Int4       LIBCALL CddGetVersion(CddPtr pcdd);
440 OrgRefPtr  LIBCALL CddGetOrgRef(CddPtr pcdd);
441 Int4       LIBCALL CddGetPssmId(CddPtr pcdd);
442 Int4       LIBCALL CddGetPmIds(CddPtr pcdd, Int4Ptr iPMids);
443 CharPtr    LIBCALL CddGetDescr(CddPtr pcdd);
444 DatePtr    LIBCALL CddGetCreateDate(CddPtr pcdd);
445 DatePtr    LIBCALL CddGetUpdateDate(CddPtr pcdd);
446 CharPtr    LIBCALL CddGetSource(CddPtr pcdd);
447 CharPtr    LIBCALL CddGetSourceId(CddPtr pcdd);
448 Int4       LIBCALL CddGetStatus(CddPtr pcdd);
449 Int4       LIBCALL CddGetAlignmentLength(CddPtr pcdd);
450 Int4Ptr    LIBCALL GetAlignmentSize(SeqAlignPtr salp);
451 ValNodePtr LIBCALL CddGetAnnotNames(CddPtr pcdd);
452 Boolean    LIBCALL CddHasDescription(CddPtr pcdd, CharPtr pc);
453 Boolean    LIBCALL CddHasAnnotation(CddPtr pcdd);
454 Boolean    LIBCALL CddMasterIs3D(CddPtr pcdd);
455 Int4       LIBCALL CddCount3DAlignments(CddPtr pcdd);
456 Boolean    LIBCALL SeqAlignHasConsensus(SeqAlignPtr salp);
457 Boolean    LIBCALL SipIsConsensus(SeqIdPtr sip);
458 Boolean    LIBCALL CddHasConsensus(CddPtr pcdd);
459 void       LIBCALL CddRegularizeFileName(CharPtr cIn, CharPtr cAcc, CharPtr cFn, CharPtr cEx);
460 Boolean    LIBCALL CddCheckForRepeats(CddPtr pcdd, Int4 width, Int4 GapI, Int4 GapE,
461                                       Nlm_FloatHi rthresh, Boolean bOutput);
462 void       LIBCALL CddTruncStringAtFirstPunct(CharPtr pChar);
463 void       LIBCALL CddFillBlanksInString(CharPtr pChar);
464 Boolean    LIBCALL CddFeaturesAreConsistent(CddPtr pcdd, CharPtr errmsg);
465 Boolean    LIBCALL CddHas3DSuperpos(CddPtr pcdd);
466 Boolean    LIBCALL CddHasPendingAlignments(CddPtr pcdd);
467 CddPtr     LIBCALL CddFreeCarefully(CddPtr pcdd);
468 
469 /*---------------------------------------------------------------------------*/
470 /* report Errors in processing and exit immediately                          */
471 /*---------------------------------------------------------------------------*/
472 void LIBCALL CddSimpleHtmlError(CharPtr cErrTxt);
473 void LIBCALL CddSevError(CharPtr cErrTxt);
474 
475 /*---------------------------------------------------------------------------*/
476 /* extract BioSeqs from the list stored in the CD                            */
477 /*---------------------------------------------------------------------------*/
478 BioseqPtr   LIBCALL CddFindSip(SeqIdPtr sip, SeqEntryPtr sep);
479 BioseqPtr   LIBCALL CddBioseqCopy(SeqIdPtr newid, BioseqPtr oldbsp, Int4 from,
480                                   Int4 to, Uint1 strand, Boolean do_feat);
481 BioseqPtr   LIBCALL CddExtractBioseq(SeqEntryPtr sep, SeqIdPtr sip);
482 void        LIBCALL CddShrinkBioseq(BioseqPtr bsp);
483 SeqAnnotPtr LIBCALL CddFindMMDBIdInBioseq(BioseqPtr bsp, Int4 *iMMDBid);
484 
485 /*---------------------------------------------------------------------------*/
486 /* Cdd specific sequence alignment format converters                         */
487 /*---------------------------------------------------------------------------*/
488 SeqAlignPtr LIBCALL CddMSLMixedToMSLDenDiag(SeqAlignPtr salp);
489 SeqAlignPtr LIBCALL CddMSLDenDiagToMSLDenSeg(SeqAlignPtr salp);
490 SeqAlignPtr LIBCALL CddMSLDenSegToMSLDenDiag(SeqAlignPtr salp);
491 SeqAlignPtr LIBCALL CddMSLDenDiagToMULDenDiag(SeqAlignPtr salp);
492 Int2        LIBCALL CddTrimSeqAligns(CddPtr pcdd);
493 
494 /*---------------------------------------------------------------------------*/
495 /* various routines for calculating PSSM/Alignment information content       */
496 /*---------------------------------------------------------------------------*/
497 Nlm_FloatHi    LIBCALL SeqAlignConservation(SeqAlignPtr salp, Nlm_FloatHi fract,BioseqPtr bsp_master, Boolean bHasConsensus, Int4 offset);
498 Nlm_FloatHiPtr LIBCALL SeqAlignInform(SeqAlignPtr salp, BioseqPtr bsp_master,Boolean bHasConsensus,Int4 offset);
499 Int4 **        LIBCALL CddCountResTypes(CddPtr pcdd, Int4 *ncols);
500 Nlm_FloatHiPtr LIBCALL CddAlignInform(CddPtr pcdd, Nlm_FloatHi * Niobs);
501 Nlm_FloatHiPtr LIBCALL CddPssmInform(CddPtr pcdd);
502 Nlm_FloatHiPtr LIBCALL CddPosFreqInform(Nlm_FloatHi **posFreq, Int4 ncol, Int4 nrow);
503 
504 /*---------------------------------------------------------------------------*/
505 /* Create a BlastOptionsPtr for the PSSM Computations                        */
506 /*---------------------------------------------------------------------------*/
507 static PGPBlastOptionsPtr CddReadBlastOptions(BioseqPtr bsp, Int4 iPseudo, CharPtr matrix_name);
508 
509 /*---------------------------------------------------------------------------*/
510 /* Calculate PSSMs for DenseDiag (or DenseSeg) Master_Slave alignment sets   */
511 /*---------------------------------------------------------------------------*/
512 static void BlastKarlinBlkCopy(BLAST_KarlinBlkPtr kbp_in,BLAST_KarlinBlkPtr kbp_out);
513 void LIBCALL CddDenDiagCposComputation(SeqAlignPtr listOfSeqAligns, BioseqPtr bsp,
514                                        BioseqPtr bspF, CddPtr pcdd, Int4 pseudoCnt);
515 void LIBCALL CddCposComputation(SeqAlignPtr listOfSeqAligns, BioseqPtr bsp, CddPtr pcdd);
516 static void CddputMatrixKbp(FILE *checkFile, BLAST_KarlinBlkPtr kbp,
517                             Boolean scaling, Nlm_FloatHi scalingDown);
518 static void CddputMatrixMatrix(FILE *checkFile,
519                                compactSearchItems *compactSearch,
520 			       posSearchItems *posSearch,
521 			       Boolean scaleScores);
522 static Boolean CddtakeMatrixCheckpoint(compactSearchItems *compactSearch,
523                                        posSearchItems *posSearch,
524 				       BLAST_ScoreBlkPtr sbp,
525                                        Char *fileName,ValNodePtr *error_return,
526 				       Boolean scaleScores,
527 				       Nlm_FloatHi scalingFactor);
528 /*---------------------------------------------------------------------------*/
529 /* calculate a PSSM                                                          */
530 /*---------------------------------------------------------------------------*/
531 Seq_Mtf * LIBCALL CddDenDiagCposComp2(BioseqPtr bspFake, Int4 iPseudo,
532                                       SeqAlignPtr salp, CddPtr pcdd,
533 				      BioseqPtr bspOut, double Weight,
534 				      double ScaleFactor, CharPtr matrix_name);
535 /*---------------------------------------------------------------------------*/
536 /* calculate a PSSM and also fill the kbp data structure. Must be initialized*/
537 /* with BlastKarlinBlkCreate();                                              */
538 /*---------------------------------------------------------------------------*/
539 Seq_Mtf * LIBCALL CddDenDiagCposComp2KBP(BioseqPtr bspFake, Int4 iPseudo,
540                                          SeqAlignPtr salp, CddPtr pcdd,
541 				         BioseqPtr bspOut, double Weight,
542 				         double ScaleFactor, CharPtr matrix_name,
543 				         BLAST_KarlinBlkPtr kbp);
544 
545 /*---------------------------------------------------------------------------*/
546 /* this function combines CddCposComputation and CddDenDiagCposComputation   */
547 /* into 1 function.  no need to calculate fake bsp beforehand.               */
548 /*---------------------------------------------------------------------------*/
549 void LIBCALL CddCposComp(SeqAlignPtr listOfSeqAligns, BioseqPtr bsp, CddPtr pcdd);
550 
551 /*---------------------------------------------------------------------------*/
552 /* same as CddCposComp, except pssm is put in a Seq_Mtf instead of a Cdd     */
553 /*---------------------------------------------------------------------------*/
554 Seq_Mtf* LIBCALL CddCalcPSSM(SeqAlignPtr listOfSeqAligns, BioseqPtr bsp,
555                              double Weight, double ScaleFactor);
556 Int4     LIBCALL CddGetNewIndexForThreading(char InChar, char* Output);
557 
558 /*---------------------------------------------------------------------------*/
559 /* Utility Functions for PSSM calculation                                    */
560 /*---------------------------------------------------------------------------*/
561 Int4 LIBCALL CddCountSeqAligns(SeqAlignPtr listOfSeqAligns, Int4 * numSequences);
562 Int4 LIBCALL CddCountDenDiagSeqAligns(SeqAlignPtr listOfSeqAligns, Int4 * numSequences);
563 
564 /*---------------------------------------------------------------------------*/
565 /* assign the range of the master sequence involved in alignments            */
566 /*---------------------------------------------------------------------------*/
567 void LIBCALL CddAssignProfileRange(CddPtr pcdd, SeqIdPtr sip);
568 
569 /*---------------------------------------------------------------------------*/
570 /* return a pointer to a specific bioseq from a seq-entry set                */
571 /*---------------------------------------------------------------------------*/
572 BioseqPtr LIBCALL CddFindBioseqInSeqEntry(SeqEntryPtr sep, SeqIdPtr sip);
573 
574 /*---------------------------------------------------------------------------*/
575 /* Are two Sequence-Id's the same?                                           */
576 /*---------------------------------------------------------------------------*/
577 Boolean LIBCALL CddSameSip(SeqIdPtr sip1, SeqIdPtr sip2);
578 
579 /*---------------------------------------------------------------------------*/
580 /* add an offset to a Seqalign                                               */
581 /*---------------------------------------------------------------------------*/
582 void LIBCALL CddReindexMSLDenSegMaster(SeqAlignPtr salp, Int4 offset);
583 void LIBCALL CddReindexMSLDenDiagMaster(SeqAlignPtr salp, Int4 offset);
584 
585 /*---------------------------------------------------------------------------*/
586 /* Transfer alignment annotation between sequences in the alignment          */
587 /* and return a list detailing a sequence location in residue numbers        */
588 /*---------------------------------------------------------------------------*/
589 static  SeqIdPtr        CddFindSeqIdInSeqLoc(SeqLocPtr location);
590         Int4Ptr LIBCALL CddGetFeatLocList(SeqLocPtr location, Int4 *nres);
591 static  void            CddRelocateSeqLoc(SeqLocPtr location, SeqIdPtr sip, Int4 *ali);
592 static  Int4            CddSeqLocInExpAlign(SeqLocPtr location, CddExpAlignPtr eap);
593 static  SeqIdPtr        CddFindSeqIdInAlignAnnot(AlignAnnotPtr oldannot);
594         void    LIBCALL CddTransferAlignAnnot(AlignAnnotPtr oldannot,
595                                               SeqIdPtr newMaster,
596 				              SeqAlignPtr salp,
597 				              BioseqSetPtr bssp);
598 
599 /*---------------------------------------------------------------------------*/
600 /* reindex a Seqalign to a new "Master"                                      */
601 /*---------------------------------------------------------------------------*/
602 SeqAlignPtr LIBCALL CddReindexSeqAlign(SeqAlignPtr salp, SeqIdPtr sipMaster,
603                                        BioseqSetPtr bssp);
604 
605 /*---------------------------------------------------------------------------*/
606 /* Make a copy of a DenDiag Master-Slave list alignment                      */
607 /*---------------------------------------------------------------------------*/
608 SeqAlignPtr LIBCALL CddCopyMSLDenDiag(SeqAlignPtr salp);
609 
610 /*---------------------------------------------------------------------------*/
611 /* Utility functions for Alignment Reindexing and pairwise comparisons       */
612 /*---------------------------------------------------------------------------*/
613 CddExpAlignPtr         CddExpAlignNew();
614 CddExpAlignPtr         CddExpAlignFree(CddExpAlignPtr pCDea);
615 void                   CddExpAlignAlloc(CddExpAlignPtr pCDea, Int4 iLength);
616 CddExpAlignPtr         CddExpAlignRevert(CddExpAlignPtr pCDea, Int4 iLength);
617 CddExpAlignPtr         CddReindexExpAlign(CddExpAlignPtr pCDea1, Int4 newlength, CddExpAlignPtr pCDea2, Int4 iOuter, Int4 iInner);
618 CddExpAlignPtr LIBCALL SeqAlignToCddExpAlign(SeqAlignPtr salp, SeqEntryPtr sep);
619 CddExpAlignPtr         InvertCddExpAlign(CddExpAlignPtr pCDea, SeqEntryPtr sep);
620 SeqAlignPtr            CddExpAlignToSeqAlign(CddExpAlignPtr pCDea, Int4Ptr iBreakAfter);
621 Int2           LIBCALL CddGetProperBlocks(CddPtr pcdd, Boolean modify, Int4Ptr *iBreakAfter);
622 FloatHi                CddGetPairId(TrianglePtr pTri, Int4 idx1, Int4 idx2);
623 static Boolean         HitYet(Int4Ptr retlist, Int4 index, Int4 i);
624 Int4Ptr                CddMostDiverse(TrianglePtr pTri, Int4 length, Int4 maxdiv);
625 Int4Ptr                CddMostSimilarToQuery(ScorePtr psc, Int4 length);
626 BioseqPtr              CddRetrieveBioseqById(SeqIdPtr sip, SeqEntryPtr sep);
627 TrianglePtr            CddCalculateTriangle(CddPtr pcdd);
628 ScorePtr               CddCalculateQuerySim(CddPtr pcdd, SeqAlignPtr salp);
629 
630 /*---------------------------------------------------------------------------*/
631 /* rips out and returns a PDBSeqId from a SeqId                              */
632 /*---------------------------------------------------------------------------*/
633 PDBSeqIdPtr LIBCALL CddGetPdbSeqId(SeqIdPtr sip);
634 
635 /*---------------------------------------------------------------------------*/
636 /* Make a Consensus Sequence and reindex alignment                           */
637 /*---------------------------------------------------------------------------*/
638 SeqAnnotPtr LIBCALL CddSeqAnnotForSeqAlign (SeqAlignPtr salp);
639 SeqAlignPtr LIBCALL CddSeqAlignDup (SeqAlignPtr salp);
640 SeqAlignPtr LIBCALL SeqAlignSetDup(SeqAlignPtr salp);
641 void        LIBCALL CddDegapSeqAlign(SeqAlignPtr salp);
642 SeqIdPtr    LIBCALL CddSeqIdDupPDBGI(SeqIdPtr sipold);
643 SeqAlignPtr LIBCALL CddConsensus(SeqAlignPtr salp, SeqEntryPtr sep, BioseqPtr bsp, SeqIntPtr range, BioseqPtr *bspCons, SeqAlignPtr *salpCons);
644 Boolean     LIBCALL CddRemoveConsensus(CddPtr pcdd);
645 
646 /*---------------------------------------------------------------------------*/
647 /* dump out CD contents used for Entrez indexing in pseudo XML               */
648 /*---------------------------------------------------------------------------*/
649 static CharPtr CddFixForXML(CharPtr pc);
650 void LIBCALL CddDumpPMLinks(CddPtr pcdd, FILE *FP);
651 void LIBCALL CddDumpTaxLinks(CddPtr pcdd, FILE *FP);
652 void LIBCALL CddDumpXML(CddPtr pcdd, FILE *FP);
653 
654 /*---------------------------------------------------------------------------*/
655 /* conversions between PSSM-Ids and CDD accessions                           */
656 /*---------------------------------------------------------------------------*/
657 CddIdxDataPtr LIBCALL CddIdxDataNew(CharPtr acc, Int4 uid);
658 CddIdxDataPtr LIBCALL CddIdxDataLink(CddIdxDataPtr PNTR head, CddIdxDataPtr cidp);
659 CddIdxDataPtr LIBCALL CddReadIdx(CharPtr CDDidx);
660 void LIBCALL CddAccFromPssmId(Int4 iPssmId, CharPtr cAcc, CharPtr CDDidx);
661 void LIBCALL CddPssmIdFromAcc(Int4 *iPssmId, CharPtr cAcc, CharPtr CDDidx);
662 
663 /*---------------------------------------------------------------------------*/
664 /* Bioseq retrieval from BLAST db - contributed by Ben                       */
665 /*---------------------------------------------------------------------------*/
666 Boolean   LIBCALL SeqHasTax(BioseqPtr bsp);
667 BioseqPtr LIBCALL CddReadDBGetBioseq(SeqIdPtr query, Int4 index, ReadDBFILEPtr rdfp);
668 BioseqPtr LIBCALL CddReadDBGetBioseqEx(SeqIdPtr query, Int4 index, ReadDBFILEPtr rdfp, Boolean bUseObjMgr);
669 
670 /*---------------------------------------------------------------------------*/
671 /* setting styles for Cn3D v4.x                                              */
672 /*---------------------------------------------------------------------------*/
673 /*
674 Cn3dStyleDictionaryPtr LIBCALL CddSrvGetStyle2(Int4 *styles[], Int4 nstyles);
675 static Cn3dStyleSettingsPtr CddSrvGetStyle2_Ex(Int4 style[]);
676 static Cn3dColorPtr MyCn3dColorInit(Int4 scale_factor, Int4 red, Int4 green, Int4 blue,Int4 alpha);
677 */
678 
679 #ifdef __cplusplus
680 }
681 #endif
682 
683 #endif /* CDDUTIL_H */
684