1 /* ===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information (NCBI)
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government do not place any restriction on its use or reproduction.
12 * We would, however, appreciate having the NCBI and the author cited in
13 * any work or product based on this material.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * ===========================================================================
24 *
25 * File Name: alignmgr2.c
26 *
27 * Author: Sarah Wheelan
28 *
29 * Version Creation Date: 10/01
30 *
31 * $Revision: 6.66 $
32 *
33 * File Description: SeqAlign indexing, access, and manipulation functions
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * $Log: alignmgr2.c,v $
38 * Revision 6.66 2016/09/02 14:57:38 ucko
39 * Formally clean up calls to printf-family functions that are at least
40 * nominally unsafe, as already done in Debian/Ubuntu packages.
41 *
42 * Revision 6.65 2013/11/26 01:23:42 kans
43 * JIRA:GP-6623 AlnMgr2ConvertAllToDenseSeg bails specifically for Spliced-seg
44 *
45 * Revision 6.64 2013/11/26 00:15:42 kans
46 * JIRA:GP-5360 AlnMgr2ConvertAllToDenseSeg returns Boolean if not Dense-diag or Dense-seg to avoid crash on Spliced-seg
47 *
48 * Revision 6.63 2008/12/01 19:35:39 bollin
49 * prevent crash when mapping positions and row of alignment is entirely in the gapl.
50 *
51 * Revision 6.62 2007/03/09 20:37:06 bollin
52 * Fixed insidious double-increment bug in AlnMgr2MergeTwoAlignments - if the
53 * second alignment to be merged had more than one segment, the seg index was
54 * incremented past the number of segments (and some segments were not initialized).
55 *
56 * Revision 6.61 2007/01/09 14:13:52 bollin
57 * Fixed bug in AlnMgr2ExtendToCoords - prior version was not extending on 5' end.
58 *
59 * Revision 6.60 2006/09/06 15:48:33 bollin
60 * removed compiler warnings
61 *
62 * Revision 6.59 2006/09/06 15:14:54 bollin
63 * fixed bug that was generating segments of length zero at the end of an
64 * alignment
65 *
66 * Revision 6.58 2005/03/01 13:56:03 bollin
67 * if the alignment we want to index is a DenseSeg and not a list of alignments,
68 * just give it a simple index - don't decompose to pairwise and reconstruct it.
69 *
70 * Revision 6.57 2005/02/23 14:40:55 bollin
71 * when condensing columns in AlnMgr2CondenseColumns, make sure we do not
72 * disturb the ascending order of starts for each row
73 *
74 * Revision 6.56 2004/09/15 14:59:19 bollin
75 * make sure we do not read outside the alignment index arrays
76 *
77 * Revision 6.55 2004/05/20 19:46:25 bollin
78 * removed unused variables
79 *
80 * Revision 6.54 2004/05/11 13:19:49 bollin
81 * update the dimension of the shared alignment after adding a sequence.
82 *
83 * Revision 6.53 2004/04/13 14:43:07 kskatz
84 * Final resolution of revisions 6.51 and 6.52: reverted 6.52; then cleaned up readability of AlnMgr2SeqPortRead() and ensured that it will never call SeqPortRead for a length > AM_SEQPORTSIZE
85 *
86 * Revision 6.52 2004/04/12 19:52:15 kskatz
87 * Revision 6.51 was right neighborhood,wrong off-by-one: It was in AlnMgr2ComputeFreqMatrix() call to AlnMgr2SeqPortRead() when using l+AM_SEQPORTSIZE instead of l+AM_SEQPORTSIZE-1
88 *
89 * Revision 6.51 2004/04/12 17:00:44 kskatz
90 * Fixed off-by-one error in AlnMgr2SeqPortRead() length passed to SeqPortRead(); stop-start+1 changed to stop-start
91 *
92 * Revision 6.50 2004/03/11 14:15:41 bollin
93 * added extra check in AlnMgr2GetNthSeqIdPtr to avoid core dump if there are
94 * fewer than N SeqIDs in the alignment.
95 *
96 * Revision 6.49 2003/10/20 17:54:34 kans
97 * AlnMgr2ComputeFreqMatrix protect against dereferencing NULL bsp
98 *
99 * Revision 6.48 2003/10/09 13:46:52 rsmith
100 * Add AlnMgr2GetFirstNForSipList.
101 *
102 * Revision 6.47 2003/05/15 18:53:10 rsmith
103 * in AlnMgr2GetSeqRangeForSipInStdSeg always return start & stop in coordinate order. Do not assume what minus strand will do or not.
104 *
105 * Revision 6.46 2003/04/24 20:28:48 rsmith
106 * made AlnMgr2GetNthStdSeg use 1 based numbering like the other Nth functions.
107 *
108 * Revision 6.45 2003/04/23 20:36:13 rsmith
109 * Added four functions in Section 11 to get information about Std-Seg alignments.
110 *
111 * Revision 6.44 2003/03/31 20:17:11 todorov
112 * Added AlnMgr2IndexSeqAlignEx
113 *
114 * Revision 6.43 2003/02/03 12:36:22 kans
115 * AlnMgr2ComputeScoreForSeqAlign checks return value of AlnMgr2ComputeFreqMatrix, returns -1 if NULL to avoid dereference crash
116 *
117 * Revision 6.42 2002/10/23 16:32:19 todorov
118 * CondenseColumns fixed: needed to move the lens too.
119 *
120 * Revision 6.40 2002/10/16 15:54:28 todorov
121 * use the default dim value if not set
122 *
123 * Revision 6.39 2002/08/07 21:57:33 kans
124 * added AlignMgr2GetFirstNForStdSeg
125 *
126 * Revision 6.38 2002/07/11 14:35:51 kans
127 * fixed Mac complaints about prototypes
128 *
129 * Revision 6.37 2002/07/11 12:55:38 wheelan
130 * added support for std-seg alignments
131 *
132 * Revision 6.36 2002/06/04 17:43:07 todorov
133 * 1) Substituted AddInNewSA with a new and optimized AddInNewPairwiseSA function.
134 * 2) Fixed a few bugs in other functions.
135 *
136 * Revision 6.35 2002/05/17 15:04:42 wheelan
137 * bug fix in ExtendToCoords
138 *
139 * Revision 6.34 2002/05/17 11:02:36 wheelan
140 * bug fixes in Merge func
141 *
142 * Revision 6.32 2002/03/04 17:19:18 wheelan
143 * added AlnMgr2FuseSet, changed behavior of RemoveInconsistent, fixed GetNextAlnBitBugs
144 *
145 * Revision 6.31 2002/01/31 17:41:47 wheelan
146 * various bug fixes -- no more 0 len segments, better handling of rows that are one big insert, etc.
147 *
148 * Revision 6.30 2002/01/30 19:12:53 wheelan
149 * added RemoveInconsistentAlnsFromSet, ExtractPairwiseSeqAlign, changed behavior of GetSubAlign, changed structures and behavior of GetNextAlnBit, added GetInterruptInfo, added AlnMgr2IndexAsRows, bug fixes in indexing routines
150 *
151 * Revision 6.29 2002/01/02 15:05:07 wheelan
152 * changes to force more efficient ordering in CompareAsp callbacks, plus more stringent checks in AlnMgr2AddInNewSA
153 *
154 * Revision 6.28 2001/12/28 22:53:20 wheelan
155 * bug fixes; added AlnMgr2DupAlnAndIndexes, changed some New and Free funcs
156 *
157 * Revision 6.27 2001/12/27 16:07:22 wheelan
158 * bug fix in ExtendToEnd
159 *
160 * Revision 6.26 2001/12/20 19:43:20 wheelan
161 * bug fix in GetNextAlnBit -- no more incorrect inserts
162 *
163 * Revision 6.25 2001/12/18 16:36:57 wheelan
164 * scattered fixes to unaligned region code
165 *
166 * Revision 6.24 2001/12/17 19:36:39 wheelan
167 * various fixes in AlnMgr2AddInNewSA
168 *
169 * Revision 6.23 2001/12/14 12:38:50 wheelan
170 * added functions for ddv
171 *
172 * Revision 6.22 2001/12/05 12:25:49 wheelan
173 * bug fix in SortByNthRow
174 *
175 * Revision 6.21 2001/12/04 19:28:55 wheelan
176 * bug fixes in AddInNewSA and in IndexSingleDenseSegSA
177 *
178 * Revision 6.20 2001/12/04 14:31:27 wheelan
179 * fixes to avoid mistakenly processing AM2_LITE as real indexed alignments
180 *
181 * Revision 6.19 2001/11/30 16:55:21 wheelan
182 * added AlnMgr2PadConservatively
183 *
184 * Revision 6.18 2001/11/29 18:38:47 wheelan
185 * cleanup as recommended by Mac compiler
186 *
187 * Revision 6.17 2001/11/29 17:37:16 wheelan
188 * added ExtendToCoords and MergeTwoAlignments
189 *
190 * Revision 6.16 2001/11/27 15:47:40 wheelan
191 * bug fixes in AnchorSeqAlign, DoCondense, and AddInNewSA
192 *
193 * Revision 6.15 2001/11/15 18:23:06 wheelan
194 * small change in AlnMgr2GetNthRowSpan
195 *
196 * Revision 6.14 2001/11/15 18:09:38 wheelan
197 * another bug fix in AddInNewSA
198 *
199 * Revision 6.13 2001/11/15 15:30:54 wheelan
200 * many bugs fixed, leaks plugged, plus reworked AddInNewSA to use new AMSmall field
201 *
202 * Revision 6.12 2001/11/13 14:36:13 wheelan
203 * many bug fixes in AddInNewSA and MapBioseqToSeqAlign
204 *
205 * Revision 6.11 2001/11/08 19:56:07 wheelan
206 * added AlnMgr2GetNthRowSpanInSA, fixed various memory errors
207 *
208 * Revision 6.10 2001/11/08 01:39:15 wheelan
209 * many bug fixes in and around AddInNewSA
210 *
211 * Revision 6.9 2001/11/02 14:01:30 wheelan
212 * bug fixes in AlnMgr2AddInNewSA
213 *
214 * Revision 6.8 2001/10/31 12:00:46 wheelan
215 * commented out the mistakenly uncommented comment
216 *
217 * Revision 6.7 2001/10/30 20:14:38 wheelan
218 * bug fixes for minus strands in AddInNewSA, bug fix in GetSubAlign
219 *
220 * Revision 6.6 2001/10/23 12:14:27 wheelan
221 * changes in AlnMgr2GetNextAlnBit as well as tree-based multiple alignment algorithm
222 *
223 * Revision 6.5 2001/10/18 15:10:53 wheelan
224 * fixed AlnMgr2ComputeScoreForSeqAlign
225 *
226 * Revision 6.4 2001/10/16 12:00:17 wheelan
227 * added GetParent and FreeEitherIndex
228 *
229 * Revision 6.3 2001/10/08 18:43:29 wheelan
230 * added comments
231 *
232 * Revision 6.2 2001/10/03 18:13:01 wheelan
233 * changed some colliding defines
234 *
235 * Revision 6.1 2001/10/03 14:20:11 wheelan
236 * initial checkin
237 *
238 * ==========================================================================
239 *
240 */
241
242 #include <alignmgr2.h>
243
244 /***************************************************************************
245 *
246 * static functions
247 *
248 ***************************************************************************/
249 /* SECTION 1 */
250 static SARowDat2Ptr SARowDat2New(void);
251 static void SARowDat2Free(SARowDat2Ptr srdp);
252 static SARowDat2Ptr SARowDat2Copy(SARowDat2Ptr srdp);
253 static SAIndex2Ptr SAIndex2New(void);
254 static SAIndex2Ptr SAIndex2Copy(VoidPtr index);
255 static AMAlignIndex2Ptr AMAlignIndex2Copy(VoidPtr index);
256 static void AMIntervalSetFree(AMIntervalSetPtr amint);
257 /* SECTION 2 */
258 static void AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap);
259 static void AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap);
260 static Boolean AlnMgr2UnpackSeqAlign(SeqAlignPtr sap);
261 static Boolean AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap);
262 static void AlnMgr2DecomposeToPairwise(SeqAlignPtr sap);
263 static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap);
264 static void AlnMgr2SortBySeqId(SeqAlignPtr sap);
265 static int LIBCALLBACK AlnMgr2CompareIds(VoidPtr ptr1, VoidPtr ptr2);
266 static void AlnMgr2TossWorse(SeqAlignPtr sap, Int4 i, Int4 j);
267 static AMIntervalSetPtr AlnMgr2MakeIntervals(SeqAlignPtr sap);
268 static void AlnMgr2SortIntervals(AMIntervalSetPtr amint);
269 static int LIBCALLBACK AlnMgr2CompareIntervals(VoidPtr ptr1, VoidPtr ptr2);
270 static AMVertexPtr PNTR AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap, AMIntervalSetPtr amint_head, AMVertexPtr PNTR vertexhead, AMEdgePtr PNTR edgehead, Int4Ptr numvertices);
271 static void AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray, Int4 numvertices);
272 static int LIBCALLBACK AlnMgr2CompareVertices(VoidPtr ptr1, VoidPtr ptr2);
273 static void AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head);
274 static int LIBCALLBACK AlnMgr2CompareEdges(VoidPtr ptr1, VoidPtr ptr2);
275 static Int4 AlnMgr2MatchToVertex(SeqIdPtr sip, Int4 start, Int4 stop, AMVertexPtr PNTR vertexarray, Int4 numvertices);
276 static void AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head);
277 static void AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head);
278 static AMVertexPtr AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray, AMEdgePtr edge);
279 static AMEdgePtr AlnMgr2GetEdgeList(Int4 vertexnum, AMEdgePtr edge_head, AMEdgePtr already_used);
280 static void AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head);
281 static Boolean AlnMgr2SameSeq(AMVertexPtr vertex1, AMVertexPtr vertex2);
282 static void AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head, SeqAlignPtr sap);
283 static AMVertexPtr AlnMgr2GetAdjacentVertices(AMVertexPtr vertex, AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head);
284 static void AlnMgr2AddInNewSA(SeqAlignPtr parent, SeqAlignPtr sap);
285 static void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap);
286 static Int4 AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2, Int4 len);
287 static Int4 AlnMgr2GetSegForStartPos(SeqAlignPtr sap, Int4 pos, Int4 row);
288 static void AlnMgr2CondenseColumns(DenseSegPtr dsp);
289 static void AlnMgr2CondenseRows(DenseSegPtr dsp, Int4 whichrow);
290 static Boolean AlnMgr2DoCondense(DenseSegPtr dsp, Int4 rownum1, Int4 rownum2);
291 static int LIBCALLBACK AlnMgr2CompareCdRows(VoidPtr ptr1, VoidPtr ptr2);
292 static int LIBCALLBACK AlnMgr2CompareAsps(VoidPtr ptr1, VoidPtr ptr2);
293 static int LIBCALLBACK AlnMgr2CompareAspsMinus(VoidPtr ptr1, VoidPtr ptr2);
294 static void AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1, SeqAlignPtr sap2, Int4Ptr n1, Int4Ptr n2);
295 static SeqIdPtr AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1, SeqIdPtr sip2);
296 static Int4 AlnMgr2OrderSeqIds(SeqIdPtr sip1, SeqIdPtr sip2);
297 static void AlnMgr2SetUnaln(SeqAlignPtr sap);
298 static int LIBCALLBACK AlnMgr2CompareUnalnAMS(VoidPtr ptr1, VoidPtr ptr2);
299 /* SECTION 4 */
300 static Int4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen);
301 static Int4 binary_search_on_uint2_list(Uint2Ptr list, Int4 ele, Uint2 listlen);
302 static void AlnMgr2GetUnalignedInfo(SeqAlignPtr sap, Int4 segment, Int4 row, Int4Ptr from, Int4Ptr to);
303 static void AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);
304 static Int4 AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap, Int4 seg);
305 /* SECTION 5 */
306 static void AlnMgr2AnchorChild(SeqAlignPtr sap, Int4 which_row);
307 /* SECTION 8 */
308 static Int4 AlnMgr2GetScoreForPair(Int4 res1, Int4 res2, Boolean is_prot);
309 /* SECTION 9 */
310 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2);
311
312
313 typedef struct am_seqpieceset AMSeqPieceSet, PNTR AMSeqPieceSetPtr;
314 typedef struct am_seqpiece AMSeqPiece, PNTR AMSeqPiecePtr;
315
316 struct am_seqpiece {
317 Int4 beg;
318 Int4 end;
319 Int4 left;
320 Int4 right;
321 Int4 orig_left;
322 Int4 orig_right;
323 Boolean aligned;
324 Int4 seg;
325 Int4 pos;
326 DenseSegPtr alt_dsp;
327 Int4 alt_seg;
328 Int4 alt_pos;
329 AMSeqPiecePtr next;
330 AMSeqPiecePtr prev;
331 AMSeqPieceSetPtr set;
332 };
333
334 struct am_seqpieceset {
335 AMSeqPiecePtr head;
336 AMSeqPiecePtr tail;
337 DenseSegPtr dsp;
338 DenseSegPtr alt_dsp;
339 Int4 row;
340 Int4 row2;
341 Int4 alt_row;
342 Int4 alt_row2;
343 Uint1 strand;
344 Boolean plus;
345 Int4 max_pos;
346 Boolean sign;
347 AMSeqPieceSetPtr next;
348 };
349
350
351
352 /***************************************************************************
353 *
354 * SECTION 1: Functions for allocating and freeing data structures used
355 * by the alignment manager; copying functions are also here.
356 *
357 ***************************************************************************/
358
359 /* SECTION 1 */
SARowDat2New(void)360 static SARowDat2Ptr SARowDat2New(void)
361 {
362 return (SARowDat2Ptr)MemNew(sizeof(SARowDat2));
363 }
364
365 /* SECTION 1 */
SARowDat2Free(SARowDat2Ptr srdp)366 static void SARowDat2Free(SARowDat2Ptr srdp)
367 {
368 if (srdp == NULL)
369 return;
370 if (srdp->sect != NULL)
371 MemFree(srdp->sect);
372 if (srdp->unsect != NULL)
373 MemFree(srdp->unsect);
374 MemFree(srdp->insect);
375 MemFree(srdp->unaligned);
376 MemFree(srdp);
377 }
378
379 /* SECTION 1 */
SARowDat2Copy(SARowDat2Ptr srdp)380 static SARowDat2Ptr SARowDat2Copy(SARowDat2Ptr srdp)
381 {
382 Int4 i;
383 SARowDat2Ptr srdp2;
384
385 if (srdp == NULL)
386 return NULL;
387 srdp2 = SARowDat2New();
388 srdp2->numsect = srdp->numsect;
389 srdp2->sect = (Uint2Ptr)MemNew(srdp2->numsect*sizeof(Uint2));
390 for (i=0; i<srdp2->numsect; i++)
391 {
392 srdp2->sect[i] = srdp->sect[i];
393 }
394 srdp2->numunsect = srdp->numunsect;
395 srdp2->unsect = (Uint2Ptr)MemNew(srdp2->numunsect*sizeof(Uint2));
396 for (i=0; i<srdp2->numunsect; i++)
397 {
398 srdp2->unsect[i] = srdp->unsect[i];
399 }
400 srdp2->numinsect = srdp->numinsect;
401 srdp2->insect = (Uint2Ptr)MemNew(srdp2->numinsect*sizeof(Uint2));
402 for (i=0; i<srdp2->numinsect; i++)
403 {
404 srdp2->insect[i] = srdp->insect[i];
405 }
406 srdp2->numunaln = srdp->numunaln;
407 srdp2->unaligned = (Uint2Ptr)MemNew(srdp2->numunaln*sizeof(Uint2));
408 for (i=0; i<srdp2->numunaln; i++)
409 {
410 srdp2->unaligned[i] = srdp->unaligned[i];
411 }
412 return srdp2;
413 }
414
415 /* SECTION 1 */
SAIndex2New(void)416 static SAIndex2Ptr SAIndex2New(void)
417 {
418 SAIndex2Ptr saip;
419
420 saip = (SAIndex2Ptr)MemNew(sizeof(SAIndex2));
421 saip->indextype = INDEX_CHILD;
422 saip->freefunc = (SeqAlignIndexFreeFunc)(SAIndex2Free2);
423 saip->anchor = -1;
424 return saip;
425 }
426
427 /* SECTION 1 */
SAIndex2Free2(VoidPtr index)428 NLM_EXTERN Boolean LIBCALLBACK SAIndex2Free2(VoidPtr index)
429 {
430 Int4 i;
431 SAIndex2Ptr saip;
432
433 if (index == NULL)
434 return TRUE;
435 saip = (SAIndex2Ptr)(index);
436 MemFree(saip->aligncoords);
437 for (i=0; i<saip->numrows; i++)
438 {
439 SARowDat2Free(saip->srdp[i]);
440 }
441 MemFree(saip->srdp);
442 MemFree(saip);
443 return TRUE;
444 }
445
446 /* SECTION 1 */
AlnMgr2FreeInterruptInfo(AMInterrInfoPtr interr)447 NLM_EXTERN void AlnMgr2FreeInterruptInfo(AMInterrInfoPtr interr)
448 {
449 if (interr == NULL)
450 return;
451 MemFree(interr->starts);
452 MemFree(interr->lens);
453 MemFree(interr->types);
454 MemFree(interr);
455 }
456
457 /* SECTION 1*/
SAIndex2Copy(VoidPtr index)458 static SAIndex2Ptr SAIndex2Copy(VoidPtr index)
459 {
460 Int4 i;
461 SAIndex2Ptr saip;
462 SAIndex2Ptr saip2;
463
464 saip2 = SAIndex2New();
465 saip = (SAIndex2Ptr)(index);
466 saip2->numseg = saip->numseg;
467 saip2->aligncoords = (Uint4Ptr)MemNew(saip2->numseg*sizeof(Uint4));
468 for (i=0; i<saip2->numseg; i++)
469 {
470 saip2->aligncoords[i] = saip->aligncoords[i];
471 }
472 saip2->anchor = saip->anchor;
473 saip2->numrows = saip->numrows;
474 saip2->numseg = saip->numseg;
475 saip2->srdp = (SARowDat2Ptr PNTR)MemNew(saip2->numrows*sizeof(SARowDat2));
476 for (i=0; i<saip2->numrows; i++)
477 {
478 saip2->srdp[i] = SARowDat2Copy(saip->srdp[i]);
479 }
480 saip2->numunaln = saip->numunaln;
481 saip2->unaln = (Uint4Ptr)MemNew(saip2->numunaln*sizeof(Uint4));
482 for (i=0; i<saip2->numunaln; i++)
483 {
484 saip2->unaln[i] = saip->unaln[i];
485 }
486 saip2->numinchain = saip->numinchain;
487 saip2->numsplitaln = saip->numsplitaln;
488 saip2->score = saip->score;
489 saip2->aligned = saip->aligned;
490 return saip2;
491 }
492
493 /* SECTION 1 */
AMAlignIndex2New(void)494 static AMAlignIndex2Ptr AMAlignIndex2New(void)
495 {
496 AMAlignIndex2Ptr amaip;
497
498 amaip = (AMAlignIndex2Ptr)MemNew(sizeof(AMAlignIndex2));
499 amaip->indextype = INDEX_PARENT;
500 amaip->freefunc = (SeqAlignIndexFreeFunc)(AMAlignIndex2Free2);
501 return amaip;
502 }
503
504 /* SECTION 1 */
AMAlignIndex2Free2(VoidPtr index)505 NLM_EXTERN Boolean LIBCALLBACK AMAlignIndex2Free2(VoidPtr index)
506 {
507 AMAlignIndex2Ptr amaip;
508 Int4 i;
509
510 if (index == NULL)
511 return FALSE;
512 amaip = (AMAlignIndex2Ptr)(index);
513 for (i=0; i<amaip->numrows; i++)
514 {
515 SeqIdFree(amaip->ids[i]);
516 }
517 MemFree(amaip->ids);
518 MemFree(amaip->saps);
519 MemFree(amaip->aligned);
520 SeqAlignFree(amaip->sharedaln);
521 MemFree(amaip);
522 return TRUE;
523 }
524
525 /* SECTION 1 */
AMAlignIndex2Copy(VoidPtr index)526 static AMAlignIndex2Ptr AMAlignIndex2Copy(VoidPtr index)
527 {
528 AMAlignIndex2Ptr amaip;
529 AMAlignIndex2Ptr amaip2;
530 Int4 i;
531
532 if (index == NULL)
533 return NULL;
534 amaip = (AMAlignIndex2Ptr)(index);
535 amaip2 = AMAlignIndex2New();
536 amaip2->alnstyle = amaip->alnstyle;
537 amaip2->anchor = amaip->anchor;
538 amaip2->numrows = amaip->numrows;
539 amaip2->ids = (SeqIdPtr PNTR)MemNew(amaip2->numrows*sizeof(SeqIdPtr));
540 for (i=0; i<amaip2->numrows; i++)
541 {
542 amaip2->ids[i] = SeqIdDup(amaip->ids[i]);
543 }
544 amaip2->numsaps = amaip->numsaps;
545 amaip2->saps = (SeqAlignPtr PNTR)MemNew(amaip2->numsaps*sizeof(SeqAlignPtr));
546 amaip2->aligned = (Boolean PNTR)MemNew(amaip2->numsaps*sizeof(Boolean));
547 for (i=0; i<amaip2->numsaps; i++)
548 {
549 amaip2->saps[i] = SeqAlignDup(amaip->saps[i]);
550 amaip2->aligned[i] = amaip->aligned[i];
551 if (i>0)
552 amaip2->saps[i-1]->next = amaip2->saps[i];
553 }
554 amaip2->sharedaln = AlnMgr2DupAlnAndIndexes(amaip->sharedaln);
555 return amaip2;
556 }
557
558 /* SECTION 1 */
AMAlignIndexFreeEitherIndex(SeqAlignPtr sap)559 NLM_EXTERN void AMAlignIndexFreeEitherIndex(SeqAlignPtr sap)
560 {
561 if (sap == NULL || sap->saip == NULL)
562 return;
563 if (sap->saip->indextype == INDEX_PARENT)
564 AMAlignIndex2Free2(sap->saip);
565 else
566 SAIndex2Free2(sap->saip);
567 sap->saip = NULL;
568 }
569
570 /* SECTION 1 */
AlnMgr2DupAlnAndIndexes(SeqAlignPtr sap)571 NLM_EXTERN SeqAlignPtr AlnMgr2DupAlnAndIndexes(SeqAlignPtr sap)
572 {
573 AMAlignIndex2Ptr amaip;
574 SAIndex2Ptr saip;
575 SeqAlignPtr sap_new;
576
577 if (sap == NULL)
578 return NULL;
579 if (sap->saip == NULL)
580 return (SeqAlignDup(sap));
581 sap_new = NULL;
582 if (sap->saip->indextype == INDEX_CHILD)
583 {
584 sap_new = SeqAlignDup(sap);
585 sap_new->saip = (Pointer)SAIndex2Copy(sap->saip);
586 saip = (SAIndex2Ptr)(sap_new->saip);
587 saip->top = AlnMgr2GetParent(sap);
588 } else if (sap->saip->indextype == INDEX_PARENT)
589 {
590 sap_new = SeqAlignNew();
591 sap_new->type = sap->type;
592 sap_new->segtype = sap->segtype;
593 sap_new->saip = (Pointer)(AMAlignIndex2Copy(sap->saip));
594 amaip = (AMAlignIndex2Ptr)(sap_new->saip);
595 sap_new->segs = amaip->saps[0];
596 }
597 return sap_new;
598 }
599
600 /* SECTION 1 */
AlnMsgNew2(void)601 NLM_EXTERN AlnMsg2Ptr AlnMsgNew2(void)
602 {
603 AlnMsg2Ptr amp;
604
605 amp = (AlnMsg2Ptr)MemNew(sizeof(AlnMsg2));
606 amp->real_from = -2;
607 return amp;
608 }
609
610 /* SECTION 1 */
AlnMsgFree2(AlnMsg2Ptr amp)611 NLM_EXTERN AlnMsg2Ptr AlnMsgFree2(AlnMsg2Ptr amp)
612 {
613 if (amp->left_interrupt != NULL)
614 {
615 MemFree(amp->left_interrupt);
616 amp->left_interrupt = NULL;
617 }
618 if (amp->right_interrupt != NULL)
619 {
620 MemFree(amp->right_interrupt);
621 amp->right_interrupt = NULL;
622 }
623 MemFree(amp);
624 return NULL;
625 }
626
627 /* SECTION 1 */
AlnMsgReNew2(AlnMsg2Ptr amp)628 NLM_EXTERN void AlnMsgReNew2(AlnMsg2Ptr amp)
629 {
630 if (amp == NULL)
631 return;
632 if (amp->left_interrupt != NULL)
633 {
634 MemFree(amp->left_interrupt);
635 amp->left_interrupt = NULL;
636 }
637 if (amp->right_interrupt != NULL)
638 {
639 MemFree(amp->right_interrupt);
640 amp->right_interrupt = NULL;
641 }
642 amp->real_from = -2;
643 amp->len = -2;
644 return;
645 }
646
647 /* SECTION 1 */
AMIntervalSetFree(AMIntervalSetPtr amint)648 static void AMIntervalSetFree(AMIntervalSetPtr amint)
649 {
650 AMIntervalPtr intv;
651 AMIntervalPtr intv_next;
652
653 intv = amint->int_head;
654 while (intv != NULL)
655 {
656 intv_next = intv->next;
657 MemFree(intv);
658 intv = intv_next;
659 }
660 SeqIdFree(amint->sip);
661 MemFree(amint);
662 }
663
664 /* SECTION 1 */
AMFreqFree(AMFreqPtr afp)665 NLM_EXTERN void AMFreqFree(AMFreqPtr afp)
666 {
667 Int4 i;
668
669 if (afp == NULL)
670 return;
671 for (i=0; i<afp->size; i++)
672 {
673 MemFree(afp->freq[i]);
674 }
675 MemFree(afp->freq);
676 MemFree(afp);
677 }
678
679 /* SECTION 1 */
AMSeqPieceSetFree(AMSeqPieceSetPtr s_set)680 static void AMSeqPieceSetFree(AMSeqPieceSetPtr s_set)
681 {
682 AMSeqPieceSetPtr s_set_next;
683 AMSeqPiecePtr s, s_next;
684
685 while (s_set) {
686 s = s_set->head;
687 while (s) {
688 s_next = s->next;
689 MemFree(s);
690 s = s_next;
691 }
692 s_set_next = s_set->next;
693 MemFree(s_set);
694 s_set = s_set_next;
695 }
696 }
697
698 /***************************************************************************
699 *
700 * SECTION 2: Functions used to create the indexes for parent and child
701 * seqaligns.
702 * SECTION 2a: Functions to create indexes for child seqaligns, and
703 * to convert seqaligns to dense-seg type
704 * SECTION 2b: Functions to unpack and rearrange complicated seqaligns
705 * into simple chains of dense-seg and dense-diag types
706 * SECTION 2c: Functions to create indexes for parent seqaligns
707 * SECTION 2d: Accessory functions for parent indexing
708 *
709 ***************************************************************************/
710
711 /***************************************************************************
712 *
713 * AlnMgr2ConvertDendiagToDensegChain takes a dense-diag style alignment
714 * and makes each diag into its own denseg seqalign, then links the new
715 * alignments together.
716 *
717 ***************************************************************************/
718 /* SECTION 2a */
AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap)719 static void AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap)
720 {
721 DenseDiagPtr ddp;
722 DenseDiagPtr ddp_next;
723 DenseSegPtr dsp;
724 Int4 i;
725 SeqAlignPtr sap_new;
726 SeqAlignPtr sap_next;
727 SeqAlignPtr sap_prev;
728
729 if (sap == NULL || sap->segtype != SAS_DENDIAG)
730 return;
731 sap_next = sap->next;
732 ddp = (DenseDiagPtr)(sap->segs);
733 /* convert the first diag to dense-seg and put it in the original alignment */
734 dsp = DenseSegNew();
735 dsp->ids = ddp->id;
736 ddp->id = NULL;
737 dsp->dim = ddp->dim;
738 dsp->numseg = 1;
739 dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Int4));
740 dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
741 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
742 for (i=0; i<dsp->dim; i++)
743 {
744 dsp->starts[i] = ddp->starts[i];
745 if (ddp->strands != NULL)
746 dsp->strands[i] = ddp->strands[i];
747 else
748 dsp->strands[i] = Seq_strand_plus;
749 }
750 dsp->lens[0] = ddp->len;
751 sap->segs = (Pointer)(dsp);
752 sap->segtype = SAS_DENSEG;
753 ddp_next = ddp->next;
754 ddp->next = NULL;
755 DenseDiagFree(ddp);
756 ddp = ddp_next;
757 if (ddp == NULL)
758 return;
759 sap_prev = sap;
760 while (ddp)
761 {
762 sap_new = SeqAlignNew();
763 sap_new->type = SAT_PARTIAL;
764 sap_new->segtype = SAS_DENSEG;
765 sap_new->dim = ddp->dim;
766 dsp = DenseSegNew();
767 dsp->ids = ddp->id;
768 ddp->id = NULL;
769 dsp->dim = ddp->dim;
770 dsp->numseg = 1;
771 dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Int4));
772 dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
773 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
774 for (i=0; i<dsp->dim; i++)
775 {
776 dsp->starts[i] = ddp->starts[i];
777 if (ddp->strands != NULL)
778 dsp->strands[i] = ddp->strands[i];
779 else
780 dsp->strands[i] = Seq_strand_plus;
781 }
782 dsp->lens[0] = ddp->len;
783 sap_new->segs = (Pointer)(dsp);
784 ddp_next = ddp->next;
785 ddp->next = NULL;
786 DenseDiagFree(ddp);
787 ddp = ddp_next;
788 sap_prev->next = sap_new;
789 sap_prev = sap_new;
790 }
791 sap_new->next = sap_next;
792 }
793
794 /* SECTION 2a */
795 /***************************************************************************
796 *
797 * AlnMgr2IndexSingleDenseSegSA creates the SAIndex2 structure for a given
798 * dense-seg seqalign. This structure has binary-searchable indexes into
799 * the segs. If the strands are not allocated, this function allocates
800 * them and sets them to Seq_strand_plus.
801 *
802 ***************************************************************************/
AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap)803 static void AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap)
804 {
805 DenseSegPtr dsp;
806 Int4 i;
807 Int4 j;
808 Int4 last;
809 Int4 next;
810 Int4 row;
811 SAIndex2Ptr saip;
812 Boolean unal;
813
814 if (sap->segtype != SAS_DENSEG)
815 return;
816 dsp = (DenseSegPtr)(sap->segs);
817 if (dsp->strands == NULL)
818 {
819 dsp->strands = (Uint1Ptr)MemNew(dsp->dim*dsp->numseg*sizeof(Uint1));
820 for (i=0; i<dsp->dim*dsp->numseg; i++)
821 {
822 dsp->strands[i] = Seq_strand_plus;
823 }
824 }
825 saip = SAIndex2New();
826 saip->aligncoords = (Uint4Ptr)MemNew((dsp->numseg)*sizeof(Uint4));
827 saip->srdp = (SARowDat2Ptr PNTR)MemNew((dsp->dim)*sizeof(SARowDat2Ptr));
828 saip->numrows = dsp->dim;
829 saip->numseg = dsp->numseg;
830 for (i=0; i<dsp->dim; i++)
831 {
832 saip->srdp[i] = SARowDat2New();
833 }
834 for (i=0; i<dsp->numseg; i++)
835 {
836 if (i != 0)
837 saip->aligncoords[i] = saip->aligncoords[i-1] + dsp->lens[i-1];
838 for (row=0; row<dsp->dim; row++)
839 {
840 if (dsp->starts[dsp->dim*i + row] != -1)
841 saip->srdp[row]->numsect++;
842 }
843 }
844 for (row=0; row<dsp->dim; row++)
845 {
846 saip->srdp[row]->sect = (Uint2Ptr)MemNew((saip->srdp[row]->numsect)*sizeof(Uint2));
847 saip->srdp[row]->unsect = (Uint2Ptr)MemNew((dsp->numseg - saip->srdp[row]->numsect)*sizeof(Uint2));
848 saip->srdp[row]->numsect = 0;
849 saip->srdp[row]->unaligned = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
850 }
851 for (i=0; i<dsp->numseg; i++)
852 {
853 for (row=0; row<dsp->dim; row++)
854 {
855 if (dsp->starts[dsp->dim*i + row] != -1)
856 {
857 saip->srdp[row]->sect[saip->srdp[row]->numsect] = i;
858 saip->srdp[row]->numsect++;
859 } else
860 {
861 saip->srdp[row]->unsect[saip->srdp[row]->numunsect] = i;
862 saip->srdp[row]->numunsect++;
863 }
864 }
865 }
866 for (row=0; row<dsp->dim; row++)
867 {
868 for (i=0; i<dsp->numseg; i++)
869 {
870 unal = FALSE;
871 last = -1;
872 j = i; /* only blocks with sequence can have flanking unal. regions */
873 if (j >= 0 && dsp->starts[dsp->dim*j+row] != -1)
874 {
875 if (dsp->strands[row] == Seq_strand_minus)
876 last = dsp->starts[dsp->dim*j+row];
877 else
878 last = dsp->starts[dsp->dim*j+row] + dsp->lens[j];
879 }
880 if (last > -1)
881 {
882 next = -1;
883 j++;
884 /* find next block of aligned sequence in this row */
885 for (j; j<dsp->numseg && next == -1; j++)
886 {
887 if (dsp->starts[dsp->dim*j+row] != -1)
888 {
889 if (dsp->strands[row] == Seq_strand_minus)
890 next = dsp->starts[dsp->dim*j+row] + dsp->lens[j];
891 else
892 next = dsp->starts[dsp->dim*j+row];
893 }
894 }
895 if (next > -1) /* look for unaligned seq on right side of this seg */
896 {
897 if (next != last)
898 unal = TRUE;
899 }
900 }
901 if (unal == TRUE)
902 {
903 saip->srdp[row]->unaligned[saip->srdp[row]->numunaln] = i;
904 saip->srdp[row]->numunaln++;
905 }
906 }
907 }
908 sap->saip = (SeqAlignIndexPtr)(saip);
909 }
910
911 /* SECTION 2a */
912 /***************************************************************************
913 *
914 * AlnMgr2IndexSingleChildSeqAlign takes a simple dense-seg or dense-diag
915 * seqalign, converts it to dense-seg, and then calls
916 * AlnMgr2IndexSingleDenseSegSA to create the indexes. If the alignment has
917 * already been indexed, this erases that index and reindexes the alignment.
918 * (SINGCHILD)
919 *
920 ***************************************************************************/
AlnMgr2IndexSingleChildSeqAlign(SeqAlignPtr sap)921 NLM_EXTERN Boolean AlnMgr2IndexSingleChildSeqAlign(SeqAlignPtr sap)
922 {
923 SeqAlignPtr salp;
924 SeqAlignPtr salp_prev;
925 SeqAlignPtr sap_next;
926
927 if (sap == NULL)
928 return FALSE;
929 if (sap->saip != NULL)
930 {
931 if (sap->saip->indextype != INDEX_CHILD)
932 return FALSE;
933 SAIndex2Free2(sap->saip);
934 sap->saip = NULL;
935 }
936 sap_next = sap->next;
937 sap->next = NULL;
938 if (sap->segtype == SAS_DISC)
939 return FALSE;
940 if (sap->segtype == SAS_DENDIAG)
941 AlnMgr2ConvertDendiagToDensegChain(sap);
942 salp = sap;
943 salp_prev = sap;
944 while (salp != NULL)
945 {
946 AlnMgr2IndexSingleDenseSegSA(salp);
947 salp_prev = salp;
948 salp = salp->next;
949 }
950 salp_prev->next = sap_next;
951 return TRUE;
952 }
953
954 /***************************************************************************
955 *
956 * AlnMgr2UnpackSeqAlign rearranges any seqalign (except alignments with
957 * more than two levels of nested discontinuous alignments) to a simple
958 * discontinuous alignment or a linked list of alignments.
959 *
960 ***************************************************************************/
961 /* SECTION 2b */
AlnMgr2UnpackSeqAlign(SeqAlignPtr sap)962 static Boolean AlnMgr2UnpackSeqAlign(SeqAlignPtr sap)
963 {
964 SeqAlignPtr sap_new;
965 SeqAlignPtr sap_next;
966 SeqAlignPtr sap_segs;
967 SeqAlignPtr sap_segs_head;
968 SeqAlignPtr sap_segs_prev;
969
970 if (sap == NULL)
971 return FALSE;
972 sap_segs = NULL;
973 if (sap->segtype == SAS_DISC)
974 {
975 sap_segs_head = (SeqAlignPtr)(sap->segs);
976 if (sap_segs_head->segtype == SAS_DISC)
977 {
978 sap_segs_prev = (SeqAlignPtr)(sap_segs_head->segs);
979 sap_segs_head->segs = NULL;
980 sap_next = sap_segs_head->next;
981 sap_segs_head->next = NULL;
982 SeqAlignFree(sap_segs_head);
983 sap_segs_head = sap_segs_prev;
984 sap->segs = (Pointer)(sap_segs_head);
985 while (sap_segs_prev->next)
986 {
987 sap_segs_prev = sap_segs_prev->next;
988 if (sap_segs_prev->segtype == SAS_DISC)
989 return FALSE;
990 }
991 sap_segs_prev->next = sap_next;
992 sap_segs = sap_next;
993 } else
994 sap_segs = sap_segs_head->next;
995 while (sap_segs)
996 {
997 if (sap_segs->segtype == SAS_DISC)
998 {
999 sap_next = sap_segs->next;
1000 sap_segs->next = NULL;
1001 sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1002 sap_segs->segs = NULL;
1003 SeqAlignFree(sap_segs);
1004 while (sap_segs_prev->next)
1005 {
1006 sap_segs_prev = sap_segs_prev->next;
1007 if (sap_segs_prev->segtype == SAS_DISC)
1008 return FALSE;
1009 }
1010 sap_segs_prev->next = sap_next;
1011 sap_segs = sap_next;
1012 } else
1013 sap_segs = sap_segs->next;
1014 }
1015 } else
1016 {
1017 sap_new = SeqAlignNew();
1018 sap_new->type = SAT_GLOBAL;
1019 sap_new->segtype = sap->segtype;
1020 sap_new->dim = sap->dim;
1021 sap_new->segs = sap->segs;
1022 sap_new->master = sap->master;
1023 sap_new->bounds = sap->bounds;
1024 sap_new->next = sap->next;
1025 sap_new->score = sap->score;
1026 sap->next = NULL;
1027 sap->segtype = SAS_DISC;
1028 sap->type = 0;
1029 sap->dim = 0;
1030 sap->master = NULL;
1031 sap->bounds = NULL;
1032 sap->score = NULL;
1033 sap->segs = (Pointer)sap_new;
1034 sap_segs_prev = sap_new;
1035 sap_segs = sap_new->next;
1036 while (sap_segs)
1037 {
1038 if (sap_segs->segtype == SAS_DISC)
1039 {
1040 sap_next = sap_segs->next;
1041 sap_segs->next = NULL;
1042 sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1043 sap_segs->segs = NULL;
1044 SeqAlignFree(sap_segs);
1045 while (sap_segs_prev->next)
1046 {
1047 sap_segs_prev = sap_segs_prev->next;
1048 if (sap_segs_prev->segtype == SAS_DISC)
1049 return FALSE;
1050 }
1051 sap_segs_prev->next = sap_next;
1052 sap_segs = sap_next;
1053 } else
1054 sap_segs = sap_segs->next;
1055 }
1056 }
1057 return TRUE;
1058 }
1059
1060 /* SECTION 2b */
AlnMgr2UnpackSeqAlignChain(SeqAlignPtr sap)1061 static void AlnMgr2UnpackSeqAlignChain(SeqAlignPtr sap)
1062 {
1063 Int4 i;
1064 SeqAlignPtr salp_head;
1065 SeqAlignPtr salp_prev;
1066 SeqAlignPtr sap_next;
1067 SeqAlignPtr sap_orig;
1068 SeqAlignPtr sap_prev;
1069
1070 salp_head = salp_prev = NULL;
1071 i = 0;
1072 while (sap != NULL)
1073 {
1074 sap_next = sap->next;
1075 sap->next = NULL;
1076 AlnMgr2UnpackSeqAlign(sap);
1077 while (sap != NULL)
1078 {
1079 if (salp_prev != NULL)
1080 {
1081 salp_prev->next = (SeqAlignPtr)(sap->segs);
1082 sap->segs = NULL;
1083 while (salp_prev->next != NULL)
1084 {
1085 salp_prev = salp_prev->next;
1086 }
1087 } else
1088 {
1089 salp_head = salp_prev = (SeqAlignPtr)(sap->segs);
1090 sap->segs = NULL;
1091 while (salp_prev->next != NULL)
1092 {
1093 salp_prev = salp_prev->next;
1094 }
1095 }
1096 sap_prev = sap;
1097 sap = sap->next;
1098 sap_prev->next = NULL;
1099 if (i>0)
1100 SeqAlignFree(sap_prev);
1101 else
1102 sap_orig = sap_prev; /* this is the pointer that was passed in */
1103 i++;
1104 }
1105 sap = sap_next;
1106 }
1107 sap_orig->segs = (Pointer)(salp_head);
1108 }
1109
1110 /* SECTION 2b */
1111 /***************************************************************************
1112 *
1113 * AlnMgr2ConvertAllToDenseSeg goes through a chain of simple child
1114 * seqaligns and makes sure that each is a dense-seg seqalign with the
1115 * strands explicitly allocated; dense-diag alignments are converted and
1116 * non-allocated strands are allocated and all set to Seq_strand_plus.
1117 *
1118 ***************************************************************************/
AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)1119 static Boolean AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)
1120 {
1121 DenseSegPtr dsp;
1122 Int4 i;
1123 SeqAlignPtr sap_next;
1124
1125 while (sap != NULL)
1126 {
1127 sap_next = sap->next;
1128 if (sap->segtype == SAS_DENDIAG) {
1129 AlnMgr2ConvertDendiagToDensegChain(sap);
1130 }
1131 else if (sap->segtype == SAS_DENSEG)
1132 {
1133 dsp = (DenseSegPtr)(sap->segs);
1134 if (dsp->strands == NULL)
1135 {
1136 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
1137 for (i=0; i<(dsp->dim)*(dsp->numseg); i++)
1138 {
1139 dsp->strands[i] = Seq_strand_plus;
1140 }
1141 }
1142 }
1143 else if (sap->segtype == SAS_SPLICED)
1144 {
1145 return FALSE;
1146 }
1147 sap = sap_next;
1148 }
1149 return TRUE;
1150 }
1151
1152 /* SECTION 2c */
1153 /***************************************************************************
1154 *
1155 * AlnMgr2IndexLite takes a seqalign or a list of seqaligns, converts
1156 * each alignment to a dense-seg structure and indexes it, and then
1157 * allocates an AMAlignIndex2 structure and fills in the saps array.
1158 *
1159 ***************************************************************************/
AlnMgr2IndexLite(SeqAlignPtr sap)1160 NLM_EXTERN Boolean AlnMgr2IndexLite(SeqAlignPtr sap)
1161 {
1162 AMAlignIndex2Ptr amaip;
1163 Int4 i;
1164 SAIndex2Ptr saip;
1165 SeqAlignPtr salp;
1166
1167 if (sap == NULL)
1168 return FALSE;
1169 if (!AlnMgr2UnpackSeqAlign(sap))
1170 return FALSE;
1171 if (!AlnMgr2ConvertAllToDenseSeg((SeqAlignPtr)sap->segs))
1172 return FALSE;
1173 amaip = AMAlignIndex2New();
1174 amaip->alnstyle = AM2_LITE;
1175 salp = (SeqAlignPtr)(sap->segs);
1176 while (salp != NULL)
1177 {
1178 amaip->numsaps++;
1179 AlnMgr2IndexSingleChildSeqAlign(salp);
1180 salp = salp->next;
1181 }
1182 amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
1183 salp = (SeqAlignPtr)(sap->segs);
1184 i = 0;
1185 while (salp != NULL)
1186 {
1187 amaip->saps[i] = salp;
1188 i++;
1189 saip = (SAIndex2Ptr)(salp->saip);
1190 saip->numinchain = i;
1191 saip->top = sap;
1192 salp = salp->next;
1193 }
1194 sap->saip = (SeqAlignIndexPtr)amaip;
1195 amaip->aligned = (Boolean PNTR)MemNew((amaip->numsaps)*sizeof(Boolean));
1196 for (i=0; i<amaip->numsaps; i++)
1197 {
1198 amaip->aligned[i] = TRUE;
1199 }
1200 return TRUE;
1201 }
1202
1203 /* SECTION 2c */
1204 /***************************************************************************
1205 *
1206 * AlnMgr2IndexSeqAlign takes a seqalign of any type except std-seg and
1207 * creates indexes on it for easy retrieval of useful information by other
1208 * AlnMgr2 functions. If the seqalign is a single alignment, that alignment
1209 * gets a simple index and is left alone otherwise. If the seqalign is
1210 * a set of alignments or a dense-diag set, the subalignments get
1211 * individually indexed and then are combined into a (fake) multiple
1212 * alignment which also gets indexed. The subalignments can now be accessed
1213 * as a multiple alignment by AlnMgr2 functions.
1214 *
1215 ***************************************************************************/
1216
AlnMgr2IndexSeqAlign(SeqAlignPtr sap)1217 NLM_EXTERN void AlnMgr2IndexSeqAlign(SeqAlignPtr sap)
1218 {
1219 AlnMgr2IndexSeqAlignEx(sap, TRUE);
1220 }
1221
AlnMgr2IndexSeqAlignEx(SeqAlignPtr sap,Boolean replace_gi)1222 NLM_EXTERN void AlnMgr2IndexSeqAlignEx(SeqAlignPtr sap, Boolean replace_gi)
1223 {
1224 AMAlignIndex2Ptr amaip;
1225 AMIntervalSetPtr amint;
1226 AMIntervalSetPtr amint_head;
1227 AMEdgePtr edge;
1228 AMEdgePtr edge_head;
1229 Int4 i;
1230 Int4 numvertices;
1231 AMVertexPtr vertex_head;
1232 AMVertexPtr PNTR vertexarray;
1233
1234 if (sap == NULL || sap->saip != NULL)
1235 return;
1236 if (replace_gi) {
1237 SAM_ReplaceGI(sap);
1238 }
1239
1240 if (sap->next == NULL && sap->segtype == SAS_DENSEG)
1241 {
1242 AlnMgr2IndexSingleChildSeqAlign(sap);
1243 return;
1244 }
1245
1246 AlnMgr2IndexLite(sap);
1247 AlnMgr2DecomposeToPairwise(sap);
1248 amaip = (AMAlignIndex2Ptr)(sap->saip);
1249 amaip->alnstyle = AM2_FULLINDEX;
1250 AlnMgr2HidePairwiseConflicts(sap);
1251 amint_head = AlnMgr2MakeIntervals(sap);
1252 vertex_head = NULL;
1253 edge_head = NULL;
1254 vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1255 while (amint_head != NULL)
1256 {
1257 amint = amint_head->next;
1258 AMIntervalSetFree(amint_head);
1259 amint_head = amint;
1260 }
1261 AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1262 AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1263 for (i=0; i<numvertices; i++)
1264 {
1265 SeqIdFree(vertexarray[i]->sip);
1266 MemFree(vertexarray[i]);
1267 }
1268 MemFree(vertexarray);
1269 while (edge_head != NULL)
1270 {
1271 edge = edge_head->next;
1272 MemFree(edge_head);
1273 edge_head = edge;
1274 }
1275 amaip = (AMAlignIndex2Ptr)(sap->saip);
1276 amaip->alnstyle = AM2_FULLINDEX;
1277 }
1278
1279 /* SECTION 2c */
1280 /***************************************************************************
1281 *
1282 * AlnMgr2ReIndexSeqAlign takes an indexed alignment (that has, presumably,
1283 * been changed), makes sure all child seqaligns are indexed (if they are
1284 * already indexed they are not reindexed), and reindexes all the child
1285 * seqaligns as a set.
1286 *
1287 ***************************************************************************/
AlnMgr2ReIndexSeqAlign(SeqAlignPtr sap)1288 NLM_EXTERN void AlnMgr2ReIndexSeqAlign(SeqAlignPtr sap)
1289 {
1290 AMAlignIndex2Ptr amaip;
1291 AMIntervalSetPtr amint;
1292 AMIntervalSetPtr amint_head;
1293 AMEdgePtr edge_head;
1294 Int4 i;
1295 Int4 numvertices;
1296 AMVertexPtr vertex_head;
1297 AMVertexPtr PNTR vertexarray;
1298
1299 if (sap == NULL)
1300 return;
1301 if (sap->saip == NULL)
1302 {
1303 AlnMgr2IndexSeqAlign(sap);
1304 return;
1305 }
1306 if (sap->saip->indextype == INDEX_CHILD)
1307 return;
1308 amaip = (AMAlignIndex2Ptr)(sap->saip);
1309 for (i=0; i<amaip->numsaps; i++)
1310 {
1311 if (amaip->saps[i]->saip == NULL)
1312 AlnMgr2IndexSingleChildSeqAlign(amaip->saps[i]);
1313 }
1314 if (amaip->alnstyle != AM2_LITE)
1315 return;
1316 AlnMgr2DecomposeToPairwise(sap);
1317 AlnMgr2HidePairwiseConflicts(sap);
1318 amint_head = AlnMgr2MakeIntervals(sap);
1319 vertex_head = NULL;
1320 edge_head = NULL;
1321 vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1322 while (amint_head != NULL)
1323 {
1324 amint = amint_head->next;
1325 AMIntervalSetFree(amint_head);
1326 amint_head = amint;
1327 }
1328 AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1329 AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1330 MemFree(vertexarray);
1331 }
1332
AlnMgr2CompareByAnchor(VoidPtr ptr1,VoidPtr ptr2)1333 static int LIBCALLBACK AlnMgr2CompareByAnchor(VoidPtr ptr1, VoidPtr ptr2)
1334 {
1335 DenseSegPtr dsp;
1336 int ret;
1337 SAIndex2Ptr saip1;
1338 SAIndex2Ptr saip2;
1339 SeqAlignPtr sap1;
1340 SeqAlignPtr sap2;
1341 SeqIdPtr sip1;
1342 SeqIdPtr sip2;
1343 Int4 start1;
1344 Int4 start2;
1345 Int4 stop1;
1346 Int4 stop2;
1347
1348 sap1 = *((SeqAlignPtr PNTR)ptr1);
1349 sap2 = *((SeqAlignPtr PNTR)ptr2);
1350 saip1 = (SAIndex2Ptr)(sap1->saip);
1351 saip2 = (SAIndex2Ptr)(sap2->saip);
1352 dsp = (DenseSegPtr)(sap1->segs);
1353 if (saip1->tmp == 1)
1354 sip1 = dsp->ids->next;
1355 else
1356 sip1 = dsp->ids;
1357 dsp = (DenseSegPtr)(sap2->segs);
1358 if (saip2->tmp == 1)
1359 sip2 = dsp->ids->next;
1360 else
1361 sip2 = dsp->ids;
1362 ret = AlnMgr2OrderSeqIds(sip1, sip2);
1363 if (ret != 0)
1364 return ret;
1365 /* these share both ids -- put best first */
1366 if (saip1->score == 0)
1367 saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
1368 if (saip2->score == 0)
1369 saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
1370 if (saip1->score > saip2->score)
1371 return -1;
1372 else if (saip1->score < saip2->score)
1373 return 1;
1374 AlnMgr2GetNthSeqRangeInSA(sap1, saip1->tmp, &start1, &stop1);
1375 AlnMgr2GetNthSeqRangeInSA(sap2, saip2->tmp, &start2, &stop2);
1376 if (start1 < start2)
1377 return -1;
1378 else if (start1 > start2)
1379 return 1;
1380 else if (stop1 > stop2)
1381 return -1;
1382 else if (stop1 < stop2)
1383 return 1;
1384 return 0;
1385 }
1386
1387 /* SECTION 2c */
AlnMgr2IndexAsRows(SeqAlignPtr sap,Uint1 strand,Boolean truncate)1388 NLM_EXTERN Boolean AlnMgr2IndexAsRows(SeqAlignPtr sap, Uint1 strand, Boolean truncate)
1389 {
1390 AMAlignIndex2Ptr amaip;
1391 DenseSegPtr dsp;
1392 DenseSegPtr dsp_tmp;
1393 Boolean found;
1394 Int4 i;
1395 Boolean impossible;
1396 Int4 numsaps;
1397 SAIndex2Ptr saip;
1398 SeqAlignPtr salp;
1399 SeqAlignPtr sap_head;
1400 SeqAlignPtr sap_prev;
1401 SeqAlignPtr sap_tmp;
1402 SeqAlignPtr PNTR saparray;
1403 SeqAlignPtr set_head;
1404 SeqAlignPtr set_prev;
1405 SeqIdPtr sharedsip;
1406 SeqIdPtr sip;
1407 SeqIdPtr sip_next;
1408 SeqIdPtr sip_tmp;
1409 Int4 tmp;
1410
1411 if (sap == NULL)
1412 return FALSE;
1413 if (sap->saip != NULL)
1414 AMAlignIndexFreeEitherIndex(sap);
1415 AlnMgr2IndexLite(sap);
1416 AlnMgr2DecomposeToPairwise(sap);
1417 /* need to figure out which row is shared by all saps */
1418 sap_tmp = (SeqAlignPtr)(sap->segs);
1419 dsp = (DenseSegPtr)(sap_tmp->segs);
1420 sip = dsp->ids;
1421 found = FALSE;
1422 while (!found && sip != NULL)
1423 {
1424 sap_tmp = (SeqAlignPtr)(sap->segs);
1425 sip_next = sip->next;
1426 sip->next = NULL;
1427 impossible = FALSE;
1428 while (!impossible && sap_tmp != NULL)
1429 {
1430 dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1431 if (AlnMgr2SeqIdListsOverlap(sip, dsp_tmp->ids) == NULL)
1432 impossible = TRUE;
1433 sap_tmp = sap_tmp->next;
1434 }
1435 sip->next = sip_next;
1436 if (!impossible) /* found one that matched a row in every alignment */
1437 found = TRUE;
1438 else
1439 sip = sip_next;
1440 }
1441 if (!found) /* didn't find a seqid that was contained in all alignments */
1442 return FALSE;
1443 /* mark the shared row to make things easier */
1444 sharedsip = SeqIdDup(sip);
1445 sap_tmp = (SeqAlignPtr)(sap->segs);
1446 i = 0;
1447 while (sap_tmp != NULL)
1448 {
1449 saip = (SAIndex2Ptr)(sap_tmp->saip);
1450 dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1451 if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1452 saip->tmp = 1;
1453 else
1454 saip->tmp = 2;
1455 sap_tmp = sap_tmp->next;
1456 i++;
1457 }
1458 saparray = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
1459 sap_tmp = (SeqAlignPtr)(sap->segs);
1460 i = 0;
1461 while (sap_tmp != NULL)
1462 {
1463 saparray[i] = sap_tmp;
1464 i++;
1465 sap_tmp = sap_tmp->next;
1466 }
1467 numsaps = i;
1468 HeapSort(saparray, i, sizeof(SeqAlignPtr), AlnMgr2CompareByAnchor);
1469 /* now each clump of alignments is a row -- need to eliminate overlaps next */
1470 sip = NULL;
1471 i = 0;
1472 sap_head = sap_prev = NULL;
1473 while (i<numsaps)
1474 {
1475 saparray[i]->next = NULL;
1476 set_head = set_prev = saparray[i];
1477 saip = (SAIndex2Ptr)(saparray[i]->saip);
1478 sip = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp); /* get other seqid */
1479 i++;
1480 if (i<numsaps)
1481 sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1482 while (i<numsaps && SeqIdComp(sip, sip_tmp) == SIC_YES)
1483 {
1484 set_prev->next = saparray[i];
1485 set_prev = saparray[i];
1486 saparray[i]->next = NULL;
1487 i++;
1488 SeqIdFree(sip_tmp);
1489 if (i<numsaps)
1490 sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1491 }
1492 AlnMgr2IndexLite(set_head);
1493 if (!truncate)
1494 AlnMgr2RemoveInconsistentAlnsFromSet(set_head, 0);
1495 else
1496 AlnMgr2RemoveInconsistentAlnsFromSet(set_head, -1);
1497 sap_tmp = (SeqAlignPtr)(set_head->segs);
1498 while (sap_tmp != NULL)
1499 {
1500 saip = (SAIndex2Ptr)(sap_tmp->saip);
1501 dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1502 if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1503 saip->tmp = 1;
1504 else
1505 saip->tmp = 2;
1506 sap_tmp = sap_tmp->next;
1507 }
1508 if (sap_head != NULL)
1509 sap_prev->next = set_head;
1510 else
1511 sap_head = sap_prev = set_head;
1512 while (sap_prev->next != NULL)
1513 {
1514 sap_prev = sap_prev->next;
1515 }
1516 sap_prev->next = NULL;
1517 }
1518 /* now we have lots of freed pointers sitting in the array */
1519 MemFree(saparray);
1520 saparray = NULL;
1521 /* sap_head is the head of a chain of LITE-indexed alignments, each of which is one row */
1522 /* first make sure that the shared row is on the requested strand */
1523 sap_tmp = sap_head;
1524 if (strand == Seq_strand_both || strand == Seq_strand_unknown || strand == 0)
1525 strand = Seq_strand_plus;
1526 while (sap_tmp != NULL)
1527 {
1528 salp = (SeqAlignPtr)(sap_tmp->segs);
1529 saip = (SAIndex2Ptr)(salp->saip);
1530 /* strand is same for all children */
1531 if (AlnMgr2GetNthStrand(salp, saip->tmp) != strand)
1532 {
1533 SeqAlignListReverseStrand(salp);
1534 while (salp != NULL)
1535 {
1536 saip = (SAIndex2Ptr)salp->saip;
1537 tmp = saip->tmp;
1538 SAIndex2Free2(salp->saip);
1539 salp->saip = NULL;
1540 AlnMgr2IndexSingleChildSeqAlign(salp);
1541 saip = (SAIndex2Ptr)salp->saip;
1542 saip->tmp = tmp;
1543 salp = salp->next;
1544 }
1545 }
1546 sap_tmp = sap_tmp->next;
1547 }
1548 sap_tmp = sap_head;
1549 sap->segs = NULL;
1550 AMAlignIndex2Free2(sap->saip);
1551 sap->saip = (SeqAlignIndexPtr)AMAlignIndex2New();
1552 amaip = (AMAlignIndex2Ptr)(sap->saip);
1553 amaip->alnstyle = AM2_FULLINDEX;
1554 set_head = set_prev = NULL;
1555 while (sap_tmp != NULL)
1556 {
1557 salp = (SeqAlignPtr)(sap_tmp->segs);
1558 while (salp != NULL)
1559 {
1560 AlnMgr2AddInNewPairwiseSA(sap, salp);
1561 if (set_head != NULL)
1562 {
1563 set_prev->next = salp;
1564 set_prev = salp;
1565 } else
1566 set_head = set_prev = salp;
1567 salp = salp->next;
1568 }
1569 sap_tmp->segs = NULL;
1570 sap_tmp = sap_tmp->next;
1571 }
1572 AlnMgr2CondenseColumns((DenseSegPtr)(amaip->sharedaln->segs));
1573 AlnMgr2IndexSingleChildSeqAlign(amaip->sharedaln);
1574 set_prev->next = NULL;
1575 sap->segs = (Pointer)(set_head);
1576 SeqAlignListFree(sap_head);
1577 SeqIdFree(sharedsip);
1578 return TRUE;
1579 }
1580
1581 /* SECTION 2c */
1582 /***************************************************************************
1583 *
1584 * AlnMgr2IndexIndexedChain takes a linked list of indexed seqaligns
1585 * and does an in-place transformation to an indexed parent-child
1586 * seqalign set.
1587 *
1588 ***************************************************************************/
AlnMgr2IndexIndexedChain(SeqAlignPtr sap)1589 NLM_EXTERN void AlnMgr2IndexIndexedChain(SeqAlignPtr sap)
1590 {
1591 AMAlignIndex2Ptr amaip;
1592 AMIntervalSetPtr amint;
1593 AMIntervalSetPtr amint_head;
1594 AMEdgePtr edge_head;
1595 Int4 numvertices;
1596 AMVertexPtr vertex_head;
1597 AMVertexPtr PNTR vertexarray;
1598
1599 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
1600 return;
1601 AlnMgr2IndexLite(sap);
1602 AlnMgr2DecomposeToPairwise(sap);
1603 amaip = (AMAlignIndex2Ptr)(sap->saip);
1604 amaip->alnstyle = AM2_FULLINDEX;
1605 AlnMgr2HidePairwiseConflicts(sap);
1606 amint_head = AlnMgr2MakeIntervals(sap);
1607 vertex_head = NULL;
1608 edge_head = NULL;
1609 vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1610 while (amint_head != NULL)
1611 {
1612 amint = amint_head->next;
1613 AMIntervalSetFree(amint_head);
1614 amint_head = amint;
1615 }
1616 AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1617 AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1618 MemFree(vertexarray);
1619 }
1620
1621 /* SECTION 2c */
1622 /***************************************************************************
1623 *
1624 * AlnMgr2DecomposeToPairwise takes a parent seqalign and goes through all
1625 * its children, checking their dimensions. If a child seqalign is found
1626 * with dimension greater than 2, that alignment is copied into a set of
1627 * two-row alignments, each new alignment containing the first row of the
1628 * original alignment and a different row. This function does NOT take out
1629 * segs with only gaps (is this a problem????). The resulting seqaligns
1630 * are all individually indexed and then the whole set is indexed lite.
1631 *
1632 ***************************************************************************/
AlnMgr2DecomposeToPairwise(SeqAlignPtr sap)1633 static void AlnMgr2DecomposeToPairwise(SeqAlignPtr sap)
1634 {
1635 DenseSegPtr dsp;
1636 DenseSegPtr dsp_orig;
1637 Int4 i;
1638 Int4 j;
1639 Int4 n;
1640 SAIndex2Ptr saip;
1641 SAIndex2Ptr saip_orig;
1642 SeqAlignPtr salp;
1643 SeqAlignPtr salp_new;
1644 SeqAlignPtr salp_next;
1645 SeqAlignPtr salp_prev;
1646
1647 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1648 return;
1649 salp = (SeqAlignPtr)(sap->segs);
1650 salp_prev = NULL;
1651 while (salp)
1652 {
1653 n = AlnMgr2GetNumRows(salp);
1654 if (n > 2)
1655 {
1656 salp_next = salp->next;
1657 saip_orig = (SAIndex2Ptr)(salp->saip);
1658 for (i=2; i<=n; i++)
1659 {
1660 salp_new = SeqAlignNew();
1661 dsp_orig = (DenseSegPtr)(salp->segs);
1662 dsp = DenseSegNew();
1663 dsp->dim = 2;
1664 dsp->numseg = dsp_orig->numseg;
1665 dsp->ids = AlnMgr2GetNthSeqIdPtr(salp, 1);
1666 dsp->ids->next = AlnMgr2GetNthSeqIdPtr(salp, i);
1667 dsp->starts = (Int4Ptr)MemNew(dsp->numseg*2*sizeof(Int4));
1668 dsp->lens = (Int4Ptr)MemNew(dsp->numseg*sizeof(Int4));
1669 dsp->strands = (Uint1Ptr)MemNew(dsp->numseg*2*sizeof(Uint1));
1670 for (j=0; j<dsp->numseg; j++)
1671 {
1672 dsp->lens[j] = dsp_orig->lens[j];
1673 dsp->starts[2*j] = dsp_orig->starts[dsp_orig->dim*j];
1674 dsp->starts[2*j+1] = dsp_orig->starts[dsp_orig->dim*j+i-1];
1675 dsp->strands[2*j] = dsp_orig->strands[dsp_orig->dim*j];
1676 dsp->strands[2*j+1] = dsp_orig->strands[dsp_orig->dim*j+i-1];
1677 }
1678 salp_new = SeqAlignNew();
1679 salp_new->dim = 2;
1680 salp_new->segs = (Pointer)dsp;
1681 salp_new->segtype = SAS_DENSEG;
1682 AlnMgr2IndexSingleChildSeqAlign(salp_new);
1683 saip = (SAIndex2Ptr)(salp_new->saip);
1684 saip->numinchain = saip_orig->numinchain;
1685 saip->numsplitaln = i-1;
1686 if (salp_prev == NULL)
1687 {
1688 salp_prev = salp_new;
1689 sap->segs = (Pointer)salp_new;
1690 } else
1691 {
1692 salp_prev->next = salp_new;
1693 salp_prev = salp_new;
1694 }
1695 }
1696 salp_prev->next = salp_next;
1697 salp->next = NULL;
1698 SeqAlignFree(salp);
1699 salp = salp_next;
1700 } else
1701 {
1702 salp_prev = salp;
1703 salp = salp->next;
1704 }
1705 }
1706 AMAlignIndex2Free2(sap->saip);
1707 sap->saip = NULL;
1708 AlnMgr2IndexLite(sap);
1709 }
1710
1711 /* SECTION 2c */
1712 /***************************************************************************
1713 *
1714 * AlnMgr2HidePairwiseConflicts looks through a set of indexed seqaligns
1715 * to find pairs of alignments that share the same seqids and that provide
1716 * conflicting information. These pairs are then sent to AlnMgr2TossWorse,
1717 * which hides the worse alignment by unaligning it. Note that the hidden
1718 * alignments are not destroyed and are not taken out of the set.
1719 *
1720 ***************************************************************************/
AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap)1721 static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap)
1722 {
1723 AMAlignIndex2Ptr amaip;
1724 Int4 i;
1725 Boolean inset;
1726 Int4 j;
1727 Boolean match;
1728 SeqIdPtr sip11;
1729 SeqIdPtr sip12;
1730 SeqIdPtr sip21;
1731 SeqIdPtr sip22;
1732 Int4 start11;
1733 Int4 start12;
1734 Int4 start21;
1735 Int4 start22;
1736 Int4 stop11;
1737 Int4 stop12;
1738 Int4 stop21;
1739 Int4 stop22;
1740 Int4Ptr tossed;
1741
1742 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1743 return;
1744 amaip = (AMAlignIndex2Ptr)(sap->saip);
1745 AlnMgr2SortBySeqId(sap);
1746 tossed = (Int4Ptr)MemNew(amaip->numsaps*sizeof(Int4));
1747 for (i=0; i<amaip->numsaps-1; i++)
1748 {
1749 for (j=0; j<amaip->numsaps; j++)
1750 {
1751 tossed[j] = 0;
1752 }
1753 inset = TRUE;
1754 for (j=i+1; amaip->aligned[i] && j<amaip->numsaps && inset == TRUE; j++)
1755 {
1756 if (tossed[j] == 0 && amaip->aligned[i] && amaip->aligned[j])
1757 {
1758 sip11 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], 1);
1759 sip12 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], 2);
1760 sip21 = AlnMgr2GetNthSeqIdPtr(amaip->saps[j], 1);
1761 sip22 = AlnMgr2GetNthSeqIdPtr(amaip->saps[j], 2);
1762 match = FALSE;
1763 if (SeqIdComp(sip11, sip21) == SIC_YES && SeqIdComp(sip12, sip22) == SIC_YES)
1764 {
1765 match = TRUE;
1766 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, &start11, &stop11);
1767 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &start12, &stop12);
1768 AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 1, &start21, &stop21);
1769 AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 2, &start22, &stop22);
1770 } else if (SeqIdComp(sip11, sip22) == SIC_YES && SeqIdComp(sip12, sip21) == SIC_YES)
1771 {
1772 match = TRUE;
1773 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, &start11, &stop11);
1774 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &start12, &stop12);
1775 AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 2, &start21, &stop21);
1776 AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 1, &start22, &stop22);
1777 } else if (SeqIdComp(sip11, sip21) != SIC_YES && SeqIdComp(sip11, sip22) != SIC_YES)
1778 inset = FALSE;
1779 if (match == TRUE)
1780 {
1781 if ((start11 < start21 && stop11 > stop21) || (start11 < stop21 && stop11 > stop21) || (start11 > start21 && stop11 < stop21))
1782 {
1783 AlnMgr2TossWorse(sap, i, j);
1784 if (amaip->aligned[j] == 0) /* j just got tossed -- put it in the list */
1785 tossed[j] = 1;
1786 }
1787 }
1788 SeqIdFree(sip11);
1789 SeqIdFree(sip12);
1790 SeqIdFree(sip21);
1791 SeqIdFree(sip22);
1792 }
1793 }
1794 if (amaip->aligned[i] == 0) /* the query alignment got tossed -- restore */
1795 { /* all the ones that it tossed out */
1796 for (j=0; j<amaip->numsaps; j++)
1797 {
1798 if (tossed[j] == 1)
1799 amaip->aligned[j] = 1;
1800 }
1801 }
1802 }
1803 MemFree(tossed);
1804 }
1805
1806 /* SECTION 2c */
AlnMgr2SortBySeqId(SeqAlignPtr sap)1807 static void AlnMgr2SortBySeqId(SeqAlignPtr sap)
1808 {
1809 AMAlignIndex2Ptr amaip;
1810 Int4 i;
1811 SAIndex2Ptr saip;
1812
1813 amaip = (AMAlignIndex2Ptr)(sap->saip);
1814 for (i=0; i<amaip->numsaps; i++)
1815 {
1816 saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
1817 saip->aligned = amaip->aligned[i];
1818 }
1819 HeapSort(amaip->saps, amaip->numsaps, sizeof(amaip->saps), AlnMgr2CompareIds);
1820 for (i=0; i<amaip->numsaps; i++)
1821 {
1822 saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
1823 amaip->aligned[i] = saip->aligned;
1824 }
1825 }
1826
1827 /* SECTION 2c */
AlnMgr2CompareIds(VoidPtr ptr1,VoidPtr ptr2)1828 static int LIBCALLBACK AlnMgr2CompareIds(VoidPtr ptr1, VoidPtr ptr2)
1829 {
1830 Int4 ret;
1831 SAIndex2Ptr saip1;
1832 SAIndex2Ptr saip2;
1833 SeqAlignPtr sap1;
1834 SeqAlignPtr sap2;
1835 SeqIdPtr sip1;
1836 SeqIdPtr sip2;
1837
1838 if (ptr1 == NULL || ptr2 == NULL)
1839 return 0;
1840 sap1 = *((SeqAlignPtr PNTR) ptr1);
1841 sap2 = *((SeqAlignPtr PNTR) ptr2);
1842 sip1 = AlnMgr2GetNthSeqIdPtr(sap1, 1);
1843 sip2 = AlnMgr2GetNthSeqIdPtr(sap2, 1);
1844 ret = (AlnMgr2OrderSeqIds(sip1, sip2));
1845 SeqIdFree(sip1);
1846 SeqIdFree(sip2);
1847 if (ret != 0)
1848 return ret;
1849 saip1 = (SAIndex2Ptr)(sap1->saip);
1850 saip2 = (SAIndex2Ptr)(sap2->saip);
1851 if (saip1->score == 0)
1852 saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
1853 if (saip2->score == 0)
1854 saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
1855 if (saip1->score > saip2->score)
1856 return -1;
1857 if (saip1->score < saip2->score)
1858 return 1;
1859 return 0;
1860 }
1861
1862 /* SECTION 2c */
1863 /***************************************************************************
1864 *
1865 * Given an indexed seqalign set, AlnMgr2TossWorse looks at the indicated
1866 * pair of seqaligns, gets their scores, and sets the unaligned bit of the
1867 * seqalign with the worse score.
1868 *
1869 ***************************************************************************/
AlnMgr2TossWorse(SeqAlignPtr sap,Int4 i,Int4 j)1870 static void AlnMgr2TossWorse(SeqAlignPtr sap, Int4 i, Int4 j)
1871 {
1872 AMAlignIndex2Ptr amaip;
1873 SAIndex2Ptr saip1;
1874 SAIndex2Ptr saip2;
1875 Int4 score1;
1876 Int4 score2;
1877
1878 amaip = (AMAlignIndex2Ptr)(sap->saip);
1879 saip1 = (SAIndex2Ptr)(amaip->saps[i]->saip);
1880 saip2 = (SAIndex2Ptr)(amaip->saps[j]->saip);
1881 if (saip1->score == 0)
1882 saip1->score = score1 = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[i]);
1883 else
1884 score1 = saip1->score;
1885 if (saip1->score == 0)
1886 saip2->score = score2 = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[j]);
1887 else
1888 score2 = saip2->score;
1889 if (score1 >= score2)
1890 amaip->aligned[j] = FALSE;
1891 else if (score2 > score1)
1892 amaip->aligned[i] = FALSE;
1893 }
1894
1895 /* SECTION 2c */
1896 /***************************************************************************
1897 *
1898 * AlnMgr2MakeIntervals takes every row from every seqalign and bins it
1899 * with other sequences with the same seqid and the same strand.
1900 *
1901 ***************************************************************************/
AlnMgr2MakeIntervals(SeqAlignPtr sap)1902 static AMIntervalSetPtr AlnMgr2MakeIntervals(SeqAlignPtr sap)
1903 {
1904 AMAlignIndex2Ptr amaip;
1905 AMIntervalSetPtr amint;
1906 AMIntervalSetPtr amint_head;
1907 AMIntervalSetPtr amint_prev;
1908 Boolean found;
1909 Int4 i;
1910 AMIntervalPtr intv;
1911 AMIntervalPtr int_prev;
1912 Int4 j;
1913 Int4 k;
1914 SeqIdPtr sip;
1915 Uint1 strand;
1916
1917 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1918 return NULL;
1919 amaip = (AMAlignIndex2Ptr)(sap->saip);
1920 amint_head = amint_prev = NULL;
1921 for (i=0; i<amaip->numsaps; i++)
1922 {
1923 if (amaip->aligned[i])
1924 {
1925 j = AlnMgr2GetNumRows(amaip->saps[i]);
1926 for (k=0; k<j; k++)
1927 {
1928 intv = (AMIntervalPtr)MemNew(sizeof(AMInterval));
1929 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], k+1, &(intv->from), &(intv->to));
1930 sip = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], k+1);
1931 strand = AlnMgr2GetNthStrand(amaip->saps[i], k+1);
1932 if (strand != Seq_strand_minus)
1933 strand = Seq_strand_plus; /* to avoid dealing with Seq_strand_unknown */
1934 intv->strand = strand;
1935 if (amint_head != NULL) /* figure out which interval set this goes in */
1936 {
1937 amint = amint_head;
1938 found = FALSE;
1939 while (amint != NULL && !found)
1940 {
1941 if (SeqIdComp(sip, amint->sip) == SIC_YES && strand == amint->strand)
1942 found = TRUE;
1943 else
1944 amint = amint->next;
1945 }
1946 if (found) /* add this to the interval set matched */
1947 {
1948 int_prev = amint->int_head;
1949 while (int_prev->next != NULL)
1950 {
1951 int_prev = int_prev->next;
1952 }
1953 int_prev->next = intv;
1954 } else /* make a new interval set */
1955 {
1956 amint = (AMIntervalSetPtr)MemNew(sizeof(AMIntervalSet));
1957 amint->sip = SeqIdDup(sip);
1958 amint->strand = strand;
1959 amint->int_head = intv;
1960 amint_prev = amint_head;
1961 while (amint_prev->next != NULL)
1962 {
1963 amint_prev = amint_prev->next;
1964 }
1965 amint_prev->next = amint;
1966 }
1967 } else /* make a new interval set */
1968 {
1969 amint = (AMIntervalSetPtr)MemNew(sizeof(AMIntervalSet));
1970 amint->sip = SeqIdDup(sip);
1971 amint->strand = strand;
1972 amint->int_head = intv;
1973 amint_head = amint;
1974 }
1975 SeqIdFree(sip);
1976 }
1977 }
1978 }
1979 return amint_head;
1980 }
1981
1982 /* SECTION 2c */
1983 /***************************************************************************
1984 *
1985 * AlnMgr2MakeVerticesFromIntervals takes the set of intervals created from
1986 * the alignments, and makes nonoverlapping vertices. Each vertex is a
1987 * single seqid plus a start and stop (so one seqid may have more than one
1988 * vertex). Each vertex is also associated with edges, or alignments, which
1989 * link the vertices together. An edge is simply two vertices plus a weight,
1990 * which is the alignment quality score. This function creates the vertices,
1991 * then creates the edges, and sorts the edges and vertices by quality and
1992 * by number of edges per vertex.
1993 *
1994 ***************************************************************************/
AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap,AMIntervalSetPtr amint_head,AMVertexPtr PNTR vertexhead,AMEdgePtr PNTR edgehead,Int4Ptr numvertices)1995 static AMVertexPtr PNTR AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap, AMIntervalSetPtr amint_head, AMVertexPtr PNTR vertexhead, AMEdgePtr PNTR edgehead, Int4Ptr numvertices)
1996 {
1997 AMAlignIndex2Ptr amaip;
1998 AMIntervalSetPtr amint;
1999 AMEdgePtr edge;
2000 AMEdgePtr edge_head;
2001 AMEdgePtr edge_prev;
2002 Int4 i;
2003 AMIntervalPtr intv;
2004 Int4 j;
2005 Int4 k;
2006 Int4 n;
2007 SAIndex2Ptr saip;
2008 SeqIdPtr sip1;
2009 SeqIdPtr sip2;
2010 Int4 start;
2011 Int4 stop;
2012 Int4 v1;
2013 Int4 v2;
2014 AMVertexPtr vertex;
2015 AMVertexPtr vertex_head;
2016 AMVertexPtr vertex_prev;
2017 AMVertexPtr PNTR vertexarray;
2018
2019 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
2020 return NULL;
2021 amint = amint_head;
2022 vertex_head = vertex_prev = NULL;
2023 while (amint != NULL)
2024 {
2025 AlnMgr2SortIntervals(amint);
2026 vertex = (AMVertexPtr)MemNew(sizeof(AMVertex));
2027 intv = amint->int_head;
2028 vertex->sip = SeqIdDup(amint->sip);
2029 vertex->strand = amint->strand;
2030 vertex->from = intv->from;
2031 vertex->to = intv->to;
2032 intv = intv->next;
2033 while (intv != NULL)
2034 {
2035 if ((intv->from <= vertex->to && intv->from >= vertex->from) || (intv->to <= vertex->to && intv->to >= vertex->from))
2036 {
2037 if (intv->from < vertex->from)
2038 vertex->from = intv->from;
2039 if (intv->to > vertex->to)
2040 vertex->to = intv->to;
2041 } else
2042 {
2043 if (vertex_head != NULL)
2044 {
2045 vertex_prev->next = vertex;
2046 vertex_prev = vertex;
2047 } else
2048 vertex_head = vertex_prev = vertex;
2049 vertex = (AMVertexPtr)MemNew(sizeof(AMVertex));
2050 vertex->from = intv->from;
2051 vertex->to = intv->to;
2052 vertex->sip = SeqIdDup(amint->sip);
2053 vertex->strand = amint->strand;
2054 }
2055 intv = intv->next;
2056 }
2057 if (vertex_head != NULL)
2058 {
2059 vertex_prev->next = vertex;
2060 vertex_prev = vertex;
2061 } else
2062 vertex_head = vertex_prev = vertex;
2063 amint = amint->next;
2064 }
2065 vertex = vertex_head;
2066 i = 0;
2067 while (vertex != NULL)
2068 {
2069 i++;
2070 vertex = vertex->next;
2071 }
2072 vertexarray = (AMVertexPtr PNTR)MemNew(i*sizeof(AMVertexPtr));
2073 *numvertices = i;
2074 vertex = vertex_head;
2075 i = 0;
2076 while (vertex != NULL)
2077 {
2078 vertexarray[i] = vertex;
2079 vertex = vertex->next;
2080 i++;
2081 }
2082 amaip = (AMAlignIndex2Ptr)(sap->saip);
2083 /* now make the edges from the alignments */
2084 edge_head = NULL;
2085 for (i=0; i<amaip->numsaps; i++)
2086 {
2087 if (amaip->aligned[i])
2088 {
2089 j = AlnMgr2GetNumRows(amaip->saps[i]);
2090 for (k=0; k<j; k++)
2091 {
2092 sip1 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], k+1);
2093 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], k+1, &start, &stop);
2094 v1 = AlnMgr2MatchToVertex(sip1, start, stop, vertexarray, *numvertices);
2095 for (n=k+1; n<j; n++)
2096 {
2097 vertexarray[v1]->numedges++;
2098 sip2 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], n+1);
2099 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n+1, &start, &stop);
2100 v2 = AlnMgr2MatchToVertex(sip2, start, stop, vertexarray, *numvertices);
2101 vertexarray[v2]->numedges++;
2102 edge = (AMEdgePtr)MemNew(sizeof(AMEdge));
2103 edge->vertex1 = v1;
2104 edge->vertex2 = v2;
2105 saip = NULL;
2106 if (amaip->saps[i]->saip != NULL)
2107 saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
2108 if (saip != NULL && saip->score != 0)
2109 edge->weight = saip->score;
2110 else
2111 edge->weight = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[i]);
2112 edge->sap = amaip->saps[i];
2113 edge->used = 0;
2114 if (edge_head != NULL)
2115 {
2116 edge_prev->next = edge;
2117 edge_prev = edge;
2118 } else
2119 edge_head = edge_prev = edge;
2120 SeqIdFree(sip2);
2121 }
2122 SeqIdFree(sip1);
2123 }
2124 }
2125 }
2126 AlnMgr2SortEdgesByWeight(&edge_head);
2127 *vertexhead = vertexarray[0];
2128 *edgehead = edge_head;
2129 return vertexarray;
2130 }
2131
2132 /* SECTION 2C */
2133 /***************************************************************************
2134 *
2135 * AlnMgr2SortVerticesByNumEdges -- the name says it all -- each vertex is
2136 * associated with one or more edges and the most populated vertices get
2137 * put first.
2138 *
2139 ***************************************************************************/
AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray,Int4 numvertices)2140 static void AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray, Int4 numvertices)
2141 {
2142 Int4 i;
2143
2144 HeapSort(vertexarray, numvertices, sizeof(vertexarray), AlnMgr2CompareVertices);
2145 for (i=0; i<numvertices-1; i++)
2146 {
2147 vertexarray[i]->next = vertexarray[i+1];
2148 }
2149 vertexarray[numvertices-1]->next = NULL;
2150 }
2151
2152 /* SECTION 2c */
2153 /***************************************************************************
2154 *
2155 * AlnMgr2CompareVertices is the HeapSort callback for
2156 * AlnMgr2SortVerticesByNumEdges.
2157 *
2158 ***************************************************************************/
AlnMgr2CompareVertices(VoidPtr ptr1,VoidPtr ptr2)2159 static int LIBCALLBACK AlnMgr2CompareVertices(VoidPtr ptr1, VoidPtr ptr2)
2160 {
2161 AMVertexPtr vertex1;
2162 AMVertexPtr vertex2;
2163
2164 if (ptr1 != NULL && ptr2 != NULL)
2165 {
2166 vertex1 = *((AMVertexPtr PNTR)ptr1);
2167 vertex2 = *((AMVertexPtr PNTR)ptr2);
2168 if (vertex1->numedges > vertex2->numedges)
2169 return -1;
2170 else if (vertex1->numedges < vertex2->numedges)
2171 return 1;
2172 else
2173 return 0;
2174 }
2175 return 0;
2176 }
2177
2178 /* SECTION 2C */
2179 /***************************************************************************
2180 *
2181 * AlnMgr2SortEdgesByWeight takes a set of edges (alignments) and sorts
2182 * them by their preset weights (alignment scores), using AlnMgr2CompareEdges
2183 * as its HeapSort callback.
2184 *
2185 ***************************************************************************/
AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head)2186 static void AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head)
2187 {
2188 AMEdgePtr edge;
2189 AMEdgePtr PNTR edgearray;
2190 Int4 i;
2191 Int4 j;
2192
2193 if (edge_head == NULL || *edge_head == NULL)
2194 return;
2195 edge = *edge_head;
2196 i = 0;
2197 while (edge != NULL)
2198 {
2199 i++;
2200 edge = edge->next;
2201 }
2202 edgearray = (AMEdgePtr PNTR)MemNew(i*sizeof(AMEdgePtr));
2203 edge = *edge_head;
2204 i = 0;
2205 while (edge != NULL)
2206 {
2207 edgearray[i] = edge;
2208 edge = edge->next;
2209 i++;
2210 }
2211 HeapSort(edgearray, i, sizeof(edgearray), AlnMgr2CompareEdges);
2212 for (j=0; j<i-1; j++)
2213 {
2214 edgearray[j]->next = edgearray[j+1];
2215 }
2216 edgearray[i-1]->next = NULL;
2217 *edge_head = edgearray[0];
2218 MemFree(edgearray);
2219 }
2220
2221 /* SECTION 2c */
2222 /***************************************************************************
2223 *
2224 * AlnMgr2CompareEdges is the HeapSort callback for AlnMgr2SortEdgesByWeight.
2225 * It simply compares the preset edge weights.
2226 *
2227 ***************************************************************************/
AlnMgr2CompareEdges(VoidPtr ptr1,VoidPtr ptr2)2228 static int LIBCALLBACK AlnMgr2CompareEdges(VoidPtr ptr1, VoidPtr ptr2)
2229 {
2230 AMEdgePtr edge1;
2231 AMEdgePtr edge2;
2232
2233 if (ptr1 != NULL && ptr2 != NULL)
2234 {
2235 edge1 = *((AMEdgePtr PNTR)ptr1);
2236 edge2 = *((AMEdgePtr PNTR)ptr2);
2237 if (edge1->weight > edge2->weight)
2238 return -1;
2239 else if (edge1->weight < edge2->weight)
2240 return 1;
2241 else
2242 return 0;
2243 }
2244 return 0;
2245 }
2246
2247 /* SECTION 2c */
2248 /***************************************************************************
2249 *
2250 * AlnMgr2MatchToVertex is called by AlnMgr2MakeVerticesFromIntervals to
2251 * figure out which vertex in the array the seqid, start, and stop match to.
2252 *
2253 ***************************************************************************/
AlnMgr2MatchToVertex(SeqIdPtr sip,Int4 start,Int4 stop,AMVertexPtr PNTR vertexarray,Int4 numvertices)2254 static Int4 AlnMgr2MatchToVertex(SeqIdPtr sip, Int4 start, Int4 stop, AMVertexPtr PNTR vertexarray, Int4 numvertices)
2255 {
2256 Int4 i;
2257
2258 if (sip == NULL || vertexarray == NULL)
2259 return -1;
2260 i = 0;
2261 while (i<numvertices)
2262 {
2263 if (SeqIdComp(sip, vertexarray[i]->sip) == SIC_YES)
2264 {
2265 if (start >= vertexarray[i]->from && start <= vertexarray[i]->to && stop >= vertexarray[i]->from && stop <= vertexarray[i]->to)
2266 return i;
2267 }
2268 i++;
2269 }
2270 return -1;
2271 }
2272
2273 /* SECTION 2c */
2274 /***************************************************************************
2275 *
2276 * AlnMgr2SortIntervals sorts the AMIntervals by start position within the
2277 * set, calling AlnMgr2CompareIntervals in a HeapSort.
2278 *
2279 ***************************************************************************/
AlnMgr2SortIntervals(AMIntervalSetPtr amint)2280 static void AlnMgr2SortIntervals(AMIntervalSetPtr amint)
2281 {
2282 Int4 i;
2283 AMIntervalPtr PNTR intarray;
2284 AMIntervalPtr intv;
2285 AMIntervalPtr intv_head;
2286 Int4 j;
2287
2288 i = 0;
2289 intv = amint->int_head;
2290 while (intv != NULL)
2291 {
2292 i++;
2293 intv = intv->next;
2294 }
2295 intarray = (AMIntervalPtr PNTR)MemNew(i*sizeof(AMIntervalPtr));
2296 intv = amint->int_head;
2297 i = 0;
2298 while (intv != NULL)
2299 {
2300 intarray[i] = intv;
2301 intv = intv->next;
2302 i++;
2303 }
2304 HeapSort(intarray, i, sizeof(intarray), AlnMgr2CompareIntervals);
2305 intv_head = intv = intarray[0];
2306 for (j=1; j<i; j++)
2307 {
2308 intv->next = intarray[j];
2309 intarray[j]->next = NULL;
2310 intv = intv->next;
2311 }
2312 amint->int_head = intv_head;
2313 MemFree(intarray);
2314 }
2315
2316 /* SECTION 2c */
2317 /***************************************************************************
2318 *
2319 * AlnMgr2CompareIntervals is the HeapSort callback for
2320 * AlnMgr2SortIntervals, which sorts a set of AMIntervals by start position.
2321 *
2322 ***************************************************************************/
AlnMgr2CompareIntervals(VoidPtr ptr1,VoidPtr ptr2)2323 static int LIBCALLBACK AlnMgr2CompareIntervals(VoidPtr ptr1, VoidPtr ptr2)
2324 {
2325 AMIntervalPtr intv1;
2326 AMIntervalPtr intv2;
2327
2328 if (ptr1 != NULL && ptr2 != NULL)
2329 {
2330 intv1 = *((AMIntervalPtr PNTR)ptr1);
2331 intv2 = *((AMIntervalPtr PNTR)ptr2);
2332 if (intv1->from > intv2->from)
2333 return 1;
2334 else if (intv1->from < intv2->from)
2335 return -1;
2336 else
2337 {
2338 if (intv1->to > intv2->to)
2339 return 1;
2340 else
2341 return -1;
2342 }
2343 }
2344 return 0;
2345 }
2346
2347 /* SECTION 2c */
2348 /***************************************************************************
2349 *
2350 * AlnMgr2UsePrimsAlgorithm takes the set of edges and vertices produced by
2351 * earlier functions and creates a subset of edges that can be made into
2352 * a multiple alignment.
2353 *
2354 ***************************************************************************/
AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray,Int4 numvertices,AMEdgePtr edge_head)2355 static void AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head)
2356 {
2357 if (vertexarray == NULL || edge_head == NULL)
2358 return;
2359 edge_head->used = AM_USED;
2360 vertexarray[edge_head->vertex1]->used = TRUE;
2361 vertexarray[edge_head->vertex2]->used = TRUE;
2362 AlnMgr2RecursePrims(vertexarray, edge_head);
2363 AlnMgr2CleanUpLeftovers(vertexarray, numvertices, edge_head);
2364 return;
2365 }
2366
2367 /* SECTION 2C */
AlnMgr2GetEdgeList(Int4 vertexnum,AMEdgePtr edge_head,AMEdgePtr already_used)2368 static AMEdgePtr AlnMgr2GetEdgeList(Int4 vertexnum, AMEdgePtr edge_head, AMEdgePtr already_used)
2369 {
2370 AMEdgePtr edge;
2371 AMEdgePtr list;
2372 AMEdgePtr list_head;
2373 AMEdgePtr list_prev;
2374
2375 edge = edge_head;
2376 list_head = NULL;
2377 while (edge != NULL)
2378 {
2379 if ((edge->vertex1 == vertexnum || edge->vertex2 == vertexnum) && edge != already_used)
2380 {
2381 list = (AMEdgePtr)MemNew(sizeof(AMEdge));
2382 list->vertex1 = edge->vertex1;
2383 list->vertex2 = edge->vertex2;
2384 list->weight = edge->weight;
2385 list->used = edge->used;
2386 if (list_head != NULL)
2387 {
2388 list_prev->next = list;
2389 list_prev = list;
2390 } else
2391 list_head = list_prev = list;
2392 }
2393 edge = edge->next;
2394 }
2395 return list_head;
2396 }
2397
2398 /* SECTION 2C */
2399 /***************************************************************************
2400 *
2401 * AlnMgr2GetBetterVertex returns the vertex of the edge indicated that
2402 * is shared by the largest number of other edges.
2403 *
2404 ***************************************************************************/
AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray,AMEdgePtr edge)2405 static AMVertexPtr AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray, AMEdgePtr edge)
2406 {
2407 if (vertexarray[edge->vertex1]->numedges >= vertexarray[edge->vertex2]->numedges)
2408 return vertexarray[edge->vertex1];
2409 else
2410 return vertexarray[edge->vertex2];
2411 }
2412
2413 /* SECTION 2C */
2414 /***************************************************************************
2415 *
2416 * AlnMgr2RecursePrims is a simple yet powerful algorithm that builds a
2417 * minimal spanning tree of the edges and vertexes by starting with a set
2418 * of edges and vertices, picking the best/shortest edge, then picking
2419 * other edges one by one that join a vertex in the set with a vertex not
2420 * in the set, until all edges are used (or deemed impossible).
2421 *
2422 ***************************************************************************/
AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray,AMEdgePtr edge_head)2423 static void AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head)
2424 {
2425 AMEdgePtr edge;
2426 Boolean found;
2427
2428 edge = edge_head;
2429 found = FALSE;
2430 /* find an edge that isn't used, that joins a vertex in the set */
2431 /* with a vertex outside the set, and add it and the new vertex */
2432 while (edge != NULL && !found)
2433 {
2434 if (edge->used == AM_NOTUSED)
2435 {
2436 if (vertexarray[edge->vertex1]->used != vertexarray[edge->vertex2]->used)
2437 {
2438 found = TRUE;
2439 vertexarray[edge->vertex1]->used = TRUE;
2440 vertexarray[edge->vertex2]->used = TRUE;
2441 edge->used = AM_USED;
2442 AlnMgr2RecursePrims(vertexarray, edge_head);
2443 }
2444 }
2445 edge = edge->next;
2446 }
2447 }
2448
2449 /* SECTION 2C */
2450 /***************************************************************************
2451 *
2452 * AlnMgr2CleanUpLeftovers takes the edges that are unused after
2453 * AlnMgr2RecursePrims and looks for edges that duplicate another edge in
2454 * the set or edges that share a seqid (but not a vertex) with another edge
2455 * already in the set. It adds these edges to the set; they don't belong
2456 * there in tree-based terms but as alignments they are related.
2457 *
2458 ***************************************************************************/
AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray,Int4 numvertices,AMEdgePtr edge_head)2459 static void AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head)
2460 {
2461 AMEdgePtr edge;
2462 AMEdgePtr edge_tmp;
2463 Boolean found;
2464 Int4 i;
2465 BoolPtr tmpverts;
2466
2467 tmpverts = (BoolPtr)MemNew(numvertices*sizeof(Boolean));
2468 for (i=0; i<numvertices; i++)
2469 {
2470 tmpverts[i] = vertexarray[i]->used;
2471 }
2472 edge = edge_head;
2473 while (edge != NULL)
2474 {
2475 if (edge->used == AM_NOTUSED)
2476 {
2477 if (tmpverts[edge->vertex1] == TRUE && tmpverts[edge->vertex2] == TRUE)
2478 {
2479 /* see if this edge duplicates another edge; if so, add it */
2480 edge_tmp = edge_head;
2481 found = FALSE;
2482 while (edge_tmp != NULL && !found)
2483 {
2484 if ((edge->vertex1 == edge_tmp->vertex1 && edge->vertex2 == edge_tmp->vertex2) || (edge->vertex1 == edge_tmp->vertex2 && edge->vertex2 == edge_tmp->vertex1))
2485 {
2486 found = TRUE;
2487 edge->used = AM_USED;
2488 }
2489 edge_tmp = edge_tmp->next;
2490 }
2491 if (!found)
2492 edge->used = AM_CONFLICT;
2493 } else if (tmpverts[edge->vertex1] == FALSE && tmpverts[edge->vertex2] == FALSE)
2494 {
2495 /* if one of the vertices shares a seqid with a vertex in the set, put both vertices */
2496 /* and the edge in the set. */
2497 found = FALSE;
2498 for (i=0; i<numvertices && !found; i++)
2499 {
2500 if (tmpverts[i] == TRUE && (SeqIdComp(vertexarray[i]->sip, vertexarray[edge->vertex1]->sip) == SIC_YES || SeqIdComp(vertexarray[i]->sip, vertexarray[edge->vertex2]->sip) == SIC_YES))
2501 {
2502 found = TRUE;
2503 vertexarray[edge->vertex1]->used = TRUE;
2504 vertexarray[edge->vertex2]->used = TRUE;
2505 edge->used = AM_USED;
2506 }
2507 }
2508 if (!found)
2509 edge->used = AM_CONFLICT;
2510 }
2511 }
2512 edge = edge->next;
2513 }
2514 MemFree(tmpverts);
2515 }
2516
2517 /* SECTION 2C */
2518 /***************************************************************************
2519 *
2520 * AlnMgr2SameSeq decides whether two vertices come from the same
2521 * sequence (simple seqid compare).
2522 *
2523 ***************************************************************************/
AlnMgr2SameSeq(AMVertexPtr vertex1,AMVertexPtr vertex2)2524 static Boolean AlnMgr2SameSeq(AMVertexPtr vertex1, AMVertexPtr vertex2)
2525 {
2526 if (vertex1 == NULL || vertex2 == NULL)
2527 return FALSE;
2528 if (SeqIdComp(vertex1->sip, vertex2->sip) == SIC_YES)
2529 return TRUE;
2530 else
2531 return FALSE;
2532 }
2533
2534
2535 /* SECTION 2C */
2536 /***************************************************************************
2537 *
2538 * AlnMgr2BuildAlignmentFromTree performs a breadth-first traversal of
2539 * the tree, adding edges to the growing alignment as it goes.
2540 *
2541 ***************************************************************************/
AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray,Int4 numvertices,AMEdgePtr edge_head,SeqAlignPtr sap)2542 static void AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head, SeqAlignPtr sap)
2543 {
2544 AMAlignIndex2Ptr amaip;
2545 AMVertexPtr adj;
2546 AMVertexPtr adj_head;
2547 AMEdgePtr edge;
2548 Int4 i;
2549 Int4 j;
2550 AMQueuePtr q;
2551 AMQueuePtr q_head;
2552 AMQueuePtr q_prev;
2553
2554 amaip = (AMAlignIndex2Ptr)(sap->saip);
2555 AlnMgr2AddInNewPairwiseSA(sap, edge_head->sap);
2556 edge_head->aligned = TRUE;
2557 q_head = (AMQueuePtr)MemNew(sizeof(AMQueue));
2558 q_head->vertex = AlnMgr2GetBetterVertex(vertexarray, edge_head);
2559 q_head->vertex->visited = TRUE;
2560 /* unlink the vertices */
2561 for (i=0; i<numvertices; i++)
2562 {
2563 vertexarray[i]->next = NULL;
2564 }
2565 while (q_head != NULL)
2566 {
2567 q_prev = q_head;
2568 while (q_prev->next != NULL)
2569 {
2570 q_prev = q_prev->next;
2571 }
2572 adj_head = AlnMgr2GetAdjacentVertices(q_head->vertex, vertexarray, edge_head);
2573 adj = adj_head;
2574 while (adj != NULL)
2575 {
2576 if (adj->visited == FALSE)
2577 {
2578 edge = edge_head;
2579 while (edge != NULL)
2580 {
2581 /* if the edge is used in the tree but not yet aligned, and it's adjacent, align it */
2582 if (edge->aligned == FALSE && edge->used == AM_USED && ((AlnMgr2SameSeq(vertexarray[edge->vertex1], q_head->vertex) && AlnMgr2SameSeq(vertexarray[edge->vertex2], adj)) || (AlnMgr2SameSeq(vertexarray[edge->vertex1], adj) && AlnMgr2SameSeq(vertexarray[edge->vertex2], q_head->vertex))))
2583 {
2584 AlnMgr2AddInNewPairwiseSA(sap, edge->sap);
2585 edge->aligned = TRUE;
2586 }
2587 edge = edge->next;
2588 }
2589 q = (AMQueuePtr)MemNew(sizeof(AMQueue));
2590 q->vertex = adj;
2591 q_prev->next = q;
2592 q_prev = q;
2593 adj->visited = TRUE;
2594 }
2595 adj = adj->next;
2596 }
2597 q = q_head->next;
2598 MemFree(q_head);
2599 q_head = q;
2600 if (q_head == NULL) /* look for discontinuous sets -- those will be left over */
2601 {
2602 edge = edge_head;
2603 while (edge != NULL && q_head == NULL)
2604 {
2605 if (edge->aligned == FALSE && (vertexarray[edge->vertex1]->visited == FALSE || vertexarray[edge->vertex2]->visited == FALSE))
2606 {
2607 q_head = (AMQueuePtr)MemNew(sizeof(AMQueue));
2608 q_head->vertex = AlnMgr2GetBetterVertex(vertexarray, edge);
2609 vertexarray[edge->vertex1]->visited = vertexarray[edge->vertex2]->visited = TRUE;
2610 }
2611 edge = edge->next;
2612 }
2613 }
2614 }
2615 /* now the vertices are no longer in a linked list -> put them back together */
2616 for (j=0; j<i-1; j++)
2617 {
2618 vertexarray[j]->next = vertexarray[j+1];
2619 vertexarray[j+1]->next = NULL;
2620 }
2621 AlnMgr2CondenseColumns((DenseSegPtr)(amaip->sharedaln->segs));
2622 AlnMgr2IndexSingleChildSeqAlign(amaip->sharedaln);
2623 }
2624
2625 /* SECTION 2c */
2626 /***************************************************************************
2627 *
2628 * AlnMgr2GetAdjacentVertices returns a linked list of all vertices which
2629 * are adjacent to the given edge; that is, it returns a list of all
2630 * vertices which are linked by an edge to either vertex of the given edge.
2631 *
2632 ***************************************************************************/
AlnMgr2GetAdjacentVertices(AMVertexPtr vertex,AMVertexPtr PNTR vertexarray,AMEdgePtr edge_head)2633 static AMVertexPtr AlnMgr2GetAdjacentVertices(AMVertexPtr vertex, AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head)
2634 {
2635 AMVertexPtr adj_head;
2636 AMVertexPtr adj_prev;
2637 AMEdgePtr edge;
2638
2639 edge = edge_head;
2640 adj_head = adj_prev = NULL;
2641 while (edge != NULL)
2642 {
2643 if (AlnMgr2SameSeq(vertexarray[edge->vertex1], vertex))
2644 {
2645 if (adj_head == NULL)
2646 adj_head = adj_prev = vertexarray[edge->vertex2];
2647 else
2648 {
2649 adj_prev->next = vertexarray[edge->vertex2];
2650 adj_prev = adj_prev->next;
2651 }
2652 } else if (AlnMgr2SameSeq(vertexarray[edge->vertex2], vertex))
2653 {
2654 if (adj_head == NULL)
2655 adj_head = adj_prev = vertexarray[edge->vertex1];
2656 else
2657 {
2658 adj_prev->next = vertexarray[edge->vertex1];
2659 adj_prev = adj_prev->next;
2660 }
2661 }
2662 if (adj_prev != NULL)
2663 adj_prev->next = NULL;
2664 edge = edge->next;
2665 }
2666 return adj_head;
2667 }
2668
2669 /* SECTION 2c */
2670
AlnMgr2GetFirstRowForSeqId(DenseSegPtr dsp,SeqIdPtr sip,Uint1 strand,Int4Ptr row_curr,SeqIdPtr PNTR sip_curr)2671 static Boolean AlnMgr2GetFirstRowForSeqId(
2672 DenseSegPtr dsp,
2673 SeqIdPtr sip,
2674 Uint1 strand,
2675 Int4Ptr row_curr,
2676 SeqIdPtr PNTR sip_curr)
2677 {
2678 Boolean found = FALSE;
2679
2680 while (*sip_curr) {
2681 (*row_curr)++;
2682 if (SeqIdComp(sip, *sip_curr) == SIC_YES &&
2683 strand == dsp->strands[*row_curr]) {
2684 found = TRUE;
2685 }
2686 *sip_curr = (*sip_curr)->next;
2687 if (found) return TRUE;
2688 }
2689 return FALSE;
2690 }
2691
2692
AlnMgr2CreateSeqPieceSet(DenseSegPtr dsp,Int4 row)2693 static AMSeqPieceSetPtr AlnMgr2CreateSeqPieceSet(DenseSegPtr dsp, Int4 row)
2694 {
2695 AMSeqPieceSetPtr s_set = (AMSeqPieceSetPtr)MemNew(sizeof(AMSeqPieceSet));
2696 AMSeqPiecePtr s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2697 s->beg = -1;
2698 s->end = -1;
2699 s->seg = -1;
2700 s->pos = row - dsp->dim;
2701 s->set = s_set;
2702 s->prev = NULL;
2703 s->next = NULL;
2704 s->left = -1;
2705 s->right = -1;
2706 s->orig_left = -2;
2707 s->orig_right = -2;
2708 s->aligned = FALSE;
2709 s->alt_dsp = NULL;
2710 s->alt_seg = -1;
2711 s->alt_pos = -1;
2712
2713 s->next = NULL;
2714
2715 s_set->dsp = dsp;
2716 s_set->row = row;
2717 s_set->row2 = -1;
2718 s_set->alt_row = -1;
2719 s_set->alt_row2 = -1;
2720 s_set->head = s;
2721 s_set->tail = s;
2722 s_set->max_pos = dsp->dim * dsp->numseg;
2723 s_set->strand = dsp->strands[row];
2724 s_set->plus = s_set->strand != Seq_strand_minus;
2725 s_set->next = NULL;
2726
2727 return s_set;
2728 }
2729
AlnMgr2GetNextSeqPiece(AMSeqPiecePtr s)2730 static AMSeqPiecePtr AlnMgr2GetNextSeqPiece(AMSeqPiecePtr s)
2731 {
2732 DenseSegPtr dsp;
2733 Int4 max_pos;
2734 AMSeqPiecePtr s_new;
2735
2736 dsp = s->set->dsp;
2737 max_pos = s->set->max_pos;
2738
2739 if (s->pos < max_pos) {
2740 s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2741 s_new->pos = s->pos + dsp->dim;
2742 s_new->seg = s->seg + 1;
2743 s_new->set = s->set;
2744 s_new->prev = s;
2745 s = s->next = s_new;
2746 s->set->tail = s;
2747
2748 s->next = NULL;
2749
2750 /* initialize the following */
2751 s->left = -1;
2752 s->right = -1;
2753 s->aligned = FALSE;
2754 s->alt_dsp = NULL;
2755 s->alt_seg = -1;
2756 s->alt_pos = -1;
2757 s->orig_left = -2;
2758 s->orig_right = -2;
2759
2760 /* find the beg and end */
2761 while (s->pos < max_pos) {
2762 if (dsp->starts[s->pos] != -1) {
2763 s->beg = s->end = dsp->starts[s->pos];
2764 if (s->set->plus) {
2765 s->end += dsp->lens[s->seg] - 1;
2766 } else {
2767 s->beg += dsp->lens[s->seg] - 1;
2768 }
2769 return s;
2770 } else {
2771 s->seg++;
2772 s->pos += dsp->dim;
2773 }
2774 }
2775 s->beg = -1;
2776 s->end = -1;
2777 return s;
2778 }
2779 return NULL;
2780 }
2781
AlnMgr2GetNextLimitedSeqPiece(AMSeqPiecePtr s,AMSeqPiecePtr right)2782 static AMSeqPiecePtr AlnMgr2GetNextLimitedSeqPiece(
2783 AMSeqPiecePtr s,
2784 AMSeqPiecePtr right)
2785 {
2786 DenseSegPtr dsp;
2787 Int4 new_pos, new_seg, max_pos, max_seg;
2788 AMSeqPiecePtr s_new;
2789
2790 AMSeqPiecePtr left = right->prev;
2791
2792 dsp = s->set->dsp;
2793 max_pos = s->set->max_pos;
2794 max_seg = right->seg;
2795 new_pos = s->pos + dsp->dim;
2796 new_seg = s->seg + 1;
2797
2798 while (new_pos < max_pos && new_seg <= max_seg) {
2799 if (dsp->starts[new_pos] != -1) {
2800 s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2801 s_new->pos = new_pos;
2802 s_new->seg = new_seg;
2803 s_new->set = s->set;
2804 s_new->next = NULL;
2805 s_new->prev = s;
2806 s = s->next = s_new;
2807 s->set->tail = s;
2808 s->beg = s->end = dsp->starts[s->pos];
2809 if (s->set->plus) {
2810 s->end += dsp->lens[s->seg] - 1;
2811 } else {
2812 s->beg += dsp->lens[s->seg] - 1;
2813 }
2814 /* aligned to a sequence in anchor or not */
2815 if (s->seg == right->seg) {
2816 s->aligned = TRUE;
2817 s->left = right->beg;
2818 s->right = right->end;
2819 } else {
2820 s->aligned = FALSE;
2821 s->left = left->end;
2822 s->right = right->beg;
2823 }
2824 /* these are not yet used */
2825 s->orig_left = -2;
2826 s->orig_right = -2;
2827 s->alt_dsp = NULL;
2828 s->alt_seg = -1;
2829 s->alt_pos = -1;
2830 return s;
2831 }
2832 new_pos += dsp->dim;
2833 new_seg++;
2834 }
2835 return NULL;
2836 }
2837
AlnMgr2AddSeqPiece(AMSeqPieceSetPtr set,AMSeqPiecePtr what)2838 static void AlnMgr2AddSeqPiece(
2839 AMSeqPieceSetPtr set,
2840 AMSeqPiecePtr what)
2841 {
2842 AMSeqPiecePtr s;
2843 DenseSegPtr dsp = set->dsp;
2844 DenseSegPtr alt_dsp = what->set->dsp;
2845
2846 s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2847 s->beg = what->beg;
2848 s->end = what->end;
2849
2850 if (alt_dsp == dsp) {
2851 s->seg = what->seg;
2852 s->pos = what->pos;
2853 s->alt_dsp = NULL;
2854 s->alt_seg = -1;
2855 s->alt_pos = -1;
2856 } else {
2857 s->seg = -1;
2858 s->pos = -1;
2859 s->alt_dsp = alt_dsp;
2860 s->alt_seg = what->seg;
2861 s->alt_pos = what->pos;
2862 }
2863 s->left = what->left;
2864 s->right = what->right;
2865 s->orig_left = what->orig_left;
2866 s->orig_right = what->orig_right;
2867 s->aligned = what->aligned;
2868 s->set = set;
2869 s->next = NULL;
2870 if ((s->prev = set->tail) != NULL) {
2871 s->prev->next = s;
2872 }
2873 set->tail = s;
2874 }
2875
AlnMgr2InsertSeqPiece(AMSeqPiecePtr where,AMSeqPiecePtr what,Int4 end)2876 static void AlnMgr2InsertSeqPiece(
2877 AMSeqPiecePtr where,
2878 AMSeqPiecePtr what,
2879 Int4 end)
2880 {
2881 AMSeqPiecePtr s;
2882 DenseSegPtr dsp = where->set->dsp;
2883 DenseSegPtr alt_dsp = what->set->dsp;
2884
2885
2886 s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2887 s->beg = what->beg;
2888 s->end = end;
2889
2890 if (where->beg == what->beg) {
2891 s->seg = where->seg;
2892 s->pos = where->pos;
2893 where->beg = end + (where->set->plus? 1 : -1);
2894 if (alt_dsp == dsp) {
2895 s->alt_dsp = NULL;
2896 s->alt_seg = -1;
2897 s->alt_pos = -1;
2898 } else {
2899 s->alt_dsp = alt_dsp;
2900 s->alt_seg = what->seg;
2901 s->alt_pos = what->pos;
2902 }
2903 } else {
2904 if (alt_dsp == dsp) {
2905 s->seg = what->seg;
2906 s->pos = what->pos;
2907 s->alt_dsp = NULL;
2908 s->alt_seg = -1;
2909 s->alt_pos = -1;
2910 } else {
2911 s->seg = -1;
2912 s->pos = -1;
2913 s->alt_dsp = alt_dsp;
2914 s->alt_seg = what->seg;
2915 s->alt_pos = what->pos;
2916 }
2917 }
2918 s->left = what->left;
2919 s->right = what->right;
2920 s->orig_left = what->orig_left;
2921 s->orig_right = what->orig_right;
2922 s->aligned = what->aligned;
2923 s->set = where->set;
2924 s->next = where;
2925 if ((s->prev = where->prev) != NULL) {
2926 if (s->prev) {
2927 s->prev->next = s;
2928 } else {
2929 if (s->set->head == where) {
2930 s->set->head = s;
2931 }
2932 }
2933 where->prev = s;
2934 }
2935 }
2936
AlnMgr2CopySeg(DenseSegPtr DSP,Int4 PNTR SEG_ptr,Int4 PNTR POS_ptr,DenseSegPtr Dsp,Int4 PNTR Seg_ptr,Int4 PNTR Pos_ptr,AMSeqPiecePtr PNTR s_ptr)2937 static void AlnMgr2CopySeg(
2938 DenseSegPtr DSP,
2939 Int4 PNTR SEG_ptr,
2940 Int4 PNTR POS_ptr,
2941 DenseSegPtr Dsp,
2942 Int4 PNTR Seg_ptr,
2943 Int4 PNTR Pos_ptr,
2944 AMSeqPiecePtr PNTR s_ptr)
2945 {
2946 Int4 i, rdelta, ldelta, POS, Pos, max_Pos, pos2, alt_pos2, SEG, Seg,
2947 beg, end;
2948 AMSeqPiecePtr s;
2949 Boolean plus;
2950
2951 POS = *POS_ptr; Pos = *Pos_ptr;
2952 SEG = *SEG_ptr; Seg = *Seg_ptr;
2953 s = *s_ptr;
2954
2955 if (s->set->row != s->set->row2) { /* if not a B */
2956 if (!(s->next)) {
2957 *s_ptr = NULL;
2958 return; /* skip the last A */
2959 }
2960 }
2961
2962 max_Pos = POS+Dsp->dim;
2963
2964 DSP->lens[SEG] = ABS(s->end - s->beg) + 1;
2965
2966 if (s->set->dsp != Dsp) { /* the extra row for the non-anchor seq */
2967 for (i = 0; POS < max_Pos; POS++, i++) {
2968 DSP->starts[POS] = -1;
2969 DSP->strands[POS] = Dsp->strands[i];
2970 }
2971 DSP->starts[POS] = MIN(s->beg, s->end);
2972 DSP->strands[POS] = s->set->strand;
2973 POS++;
2974
2975 } else { /* not dealing with the extra row itself */
2976
2977 if (s->pos >= 0 && s->set->row != s->set->row2) { /* Dsp involved */
2978 beg = end = s->set->dsp->starts[s->pos];
2979 if (s->set->plus) {
2980 end += s->set->dsp->lens[s->seg]-1;
2981 } else {
2982 beg += s->set->dsp->lens[s->seg]-1;
2983 }
2984 if (ldelta = ABS(s->beg - beg)) {
2985 /* need to "continue" from the orig seg */
2986 Pos = s->pos - s->set->row;
2987 Seg = s->seg;
2988 }
2989 rdelta = ABS(end - s->end);
2990
2991 for (; POS < max_Pos; POS++, Pos++) {
2992 DSP->strands[POS] = Dsp->strands[Pos];
2993 plus = DSP->strands[POS] != Seq_strand_minus;
2994 if (Dsp->starts[Pos] != -1) {
2995 DSP->starts[POS] = Dsp->starts[Pos] + (plus ? ldelta : rdelta);
2996 } else {
2997 DSP->starts[POS] = -1;
2998 }
2999 }
3000 if (ldelta) {
3001 /* restore these */
3002 Pos = *Pos_ptr;
3003 Seg = *Seg_ptr;
3004 } else {
3005 Seg++;
3006 }
3007
3008 if (s->alt_dsp) { /* dsp involved too */
3009 alt_pos2 =
3010 s->alt_pos + s->set->alt_row2 - s->set->alt_row;
3011 beg = end = s->alt_dsp->starts[s->alt_pos];
3012 if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
3013 beg += s->alt_dsp->lens[s->alt_seg]-1;
3014 } else {
3015 end += s->alt_dsp->lens[s->alt_seg]-1;
3016 }
3017 ldelta = ABS(s->beg - beg);
3018 rdelta = ABS(end - s->end);
3019
3020 if (s->set->row2 != -1) { /* 2nd row merged*/
3021 pos2 = POS - DSP->dim + s->set->row2;
3022 } else { /* extra row */
3023 pos2 = POS;
3024 POS++;
3025 }
3026 DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
3027 plus = DSP->strands[pos2] != Seq_strand_minus;
3028 if (s->alt_dsp->starts[alt_pos2] != -1) {
3029 DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
3030 (plus ? ldelta : rdelta);
3031 } else {
3032 DSP->starts[pos2] = -1;
3033 }
3034 } else { /* dsp not involved */
3035 if (s->set->row2 == -1) { /* 2nd row not merged */
3036 DSP->starts[POS] = -1;
3037 DSP->strands[POS] =
3038 s->set->alt_dsp->strands[s->set->alt_row2];
3039 POS++;
3040 }
3041 }
3042 } else { /* Dsp not involved */
3043 for (i = 0; POS < max_Pos; POS++, i++) {
3044 DSP->starts[POS] = -1;
3045 DSP->strands[POS] = Dsp->strands[i];
3046 }
3047 if (s->set->row == s->set->row2) { /* if a B */
3048 if (!(s->alt_dsp)) {
3049 Pos += s->set->dsp->dim; /* move to next seg */
3050 Seg++;
3051 }
3052 } else { /* not a B */
3053 alt_pos2 =
3054 s->alt_pos + s->set->alt_row2 - s->set->alt_row;
3055
3056 beg = end = s->alt_dsp->starts[s->alt_pos];
3057 if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
3058 beg += s->alt_dsp->lens[s->alt_seg]-1;
3059 } else {
3060 end += s->alt_dsp->lens[s->alt_seg]-1;
3061 }
3062 ldelta = ABS(s->beg - beg);
3063 rdelta = ABS(end - s->end);
3064
3065 if (s->set->row2 != -1) { /* merged row2 */
3066 pos2 = POS - DSP->dim + s->set->row2;
3067 } else {
3068 pos2 = POS;
3069 POS++;
3070 }
3071 DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
3072 plus = DSP->strands[pos2] != Seq_strand_minus;
3073 if (s->alt_dsp->starts[alt_pos2] != -1) {
3074 DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
3075 (plus ? ldelta : rdelta);
3076 } else {
3077 DSP->starts[pos2] = -1;
3078 }
3079 }
3080 DSP->starts[POS + s->set->row - DSP->dim] = MIN(s->beg, s->end);
3081 }
3082 }
3083 (*SEG_ptr)++;
3084 *Seg_ptr = Seg;
3085 *s_ptr = (*s_ptr)->next;
3086 *POS_ptr = POS;
3087 *Pos_ptr = Pos;
3088 }
3089
AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent,SeqAlignPtr sap)3090 NLM_EXTERN void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap)
3091 {
3092 AMAlignIndex2Ptr amaip;
3093 DenseSegPtr dsp, Dsp, DSP;
3094 Int4 Seg, SEG;
3095 Int4 Pos, POS, max_POS;
3096 Int4 A_end, B_beg;
3097 Int4 anchor, Anchor;
3098 Int4 row;
3099 SeqIdPtr sip, extra_sip;
3100 AMSeqPieceSetPtr a_set, A_set, b_set, B_set_head, B_set;
3101 AMSeqPiecePtr a, A, b, B;
3102 Boolean conflict;
3103 Boolean a_plus, b_plus;
3104 Int4 upper_limit;
3105 Int4 extra_segs;
3106
3107 dsp = (DenseSegPtr)(sap->segs);
3108 if (dsp->dim != 2) {
3109 if (dsp->dim == 0) {
3110 dsp->dim = 2; /* set to default */
3111 } else {
3112 ErrPostEx(SEV_ERROR, 0,0,
3113 "AlnMgr2AddInNewPairwiseSA: dsp->dim (=%d) should be 2.",
3114 dsp->dim);
3115 return;
3116 }
3117 }
3118 if (dsp->numseg < 1) {
3119 ErrPostEx(SEV_ERROR, 0,0,
3120 "AlnMgr2AddInNewPairwiseSA: dsp->numseg (=%d) should be > 0.",
3121 dsp->numseg);
3122 return;
3123 }
3124
3125 amaip = (AMAlignIndex2Ptr)(parent->saip);
3126 if (amaip->sharedaln == NULL) {/* first alignment to be added */
3127 SeqAlignPtr salp;
3128 Int4 i;
3129
3130 salp = SeqAlignDup(sap);
3131 AlnMgr2IndexSingleChildSeqAlign(salp);
3132 amaip->sharedaln = salp;
3133 amaip->numrows = dsp->dim;
3134 sip = dsp->ids;
3135 amaip->ids = (SeqIdPtr PNTR)MemNew((dsp->dim)*sizeof(SeqIdPtr));
3136 i = 0;
3137 while (sip != NULL) {
3138 amaip->ids[i] = SeqIdDup(sip);
3139 sip = sip->next;
3140 i++;
3141 }
3142 MemFree(amaip->saps);
3143 amaip->saps = (SeqAlignPtr PNTR)MemNew(sizeof(SeqAlignPtr));
3144 amaip->saps[0] = sap;
3145 amaip->numsaps = 1;
3146 MemFree(amaip->aligned);
3147 amaip->aligned = (Boolean PNTR) MemNew(sizeof(Boolean));
3148 amaip->aligned[0] = TRUE;
3149
3150 return;
3151 }
3152
3153 /* add the new sap */
3154 amaip->numsaps++;
3155 amaip->saps = (SeqAlignPtr PNTR) MemMore
3156 (amaip->saps, amaip->numsaps*sizeof(SeqAlignPtr));
3157 amaip->saps[amaip->numsaps-1] = sap;
3158 amaip->aligned = (Boolean PNTR) MemMore
3159 (amaip->aligned, (amaip->numsaps)*sizeof(Boolean));
3160 amaip->aligned[amaip->numsaps-1] = TRUE;
3161
3162 Dsp = (DenseSegPtr)(amaip->sharedaln->segs);
3163
3164 AlnMgr2GetFirstSharedRow(amaip->sharedaln, sap, &Anchor, &anchor);
3165
3166 {{ /* make sure the shared rows are on the same strand */
3167 Uint1 Strand, strand;
3168
3169 Strand = AlnMgr2GetNthStrand(amaip->sharedaln, Anchor);
3170 if (Strand == Seq_strand_unknown)
3171 Strand = Seq_strand_plus;
3172 strand = AlnMgr2GetNthStrand(sap, anchor);
3173 if (strand == Seq_strand_unknown)
3174 strand = Seq_strand_plus;
3175 if (Strand != strand) {
3176 SeqAlignListReverseStrand(sap);
3177 SAIndex2Free2(sap->saip);
3178 sap->saip = NULL;
3179 AlnMgr2IndexSingleChildSeqAlign(sap);
3180 dsp = (DenseSegPtr)(sap->segs);
3181 strand = AlnMgr2GetNthStrand(sap, anchor);
3182 if (strand == Seq_strand_unknown)
3183 strand = Seq_strand_plus;
3184 }
3185 a_plus = strand != Seq_strand_minus;
3186 }}
3187 anchor--; Anchor--; /* make them 0-based */
3188
3189 /* create new dsp */
3190 DSP = DenseSegNew();
3191 DSP->numseg = Dsp->numseg;
3192 DSP->dim = Dsp->dim;
3193 /* DSP->ids = SeqIdDupList(Dsp->ids); */
3194
3195 /* collect other shared seqids */
3196 b_set = B_set = B_set_head = NULL;
3197 row = -1; sip = Dsp->ids;
3198 extra_sip = dsp->ids;
3199 if (anchor == 0) {
3200 extra_sip = extra_sip->next;
3201 }
3202 while (AlnMgr2GetFirstRowForSeqId
3203 (Dsp, extra_sip, dsp->strands[1-anchor], &row, &sip)) {
3204 if (B_set) {
3205 B_set->next = AlnMgr2CreateSeqPieceSet(Dsp, row);
3206 B_set = B_set->next;
3207 } else {
3208 B_set = B_set_head = AlnMgr2CreateSeqPieceSet(Dsp, row);
3209 }
3210 }
3211 b_plus = dsp->strands[1-anchor] != Seq_strand_minus;
3212
3213 /* ids */
3214 DSP->ids = Dsp->ids;
3215 Dsp->ids = NULL;
3216
3217 /* collect a, b */
3218 a_set = AlnMgr2CreateSeqPieceSet(dsp, anchor);
3219 a = a_set->head;
3220 b_set = AlnMgr2CreateSeqPieceSet(dsp, 1-anchor);
3221 while (a = AlnMgr2GetNextSeqPiece(a)) {
3222 b = b_set->tail;
3223 while (b = AlnMgr2GetNextLimitedSeqPiece(b, a)) {
3224 if (!b->aligned) {
3225 DSP->numseg++;
3226 }
3227 }
3228 }
3229
3230 /* collect A, B */
3231 A_set = AlnMgr2CreateSeqPieceSet(Dsp, Anchor);
3232 A = A_set->head;
3233 while (A = AlnMgr2GetNextSeqPiece(A)) {
3234 B_set = B_set_head;
3235 while (B_set) {
3236 B = B_set->tail;
3237 while (B = AlnMgr2GetNextLimitedSeqPiece(B, A)) {};
3238 B_set=B_set->next;
3239 }
3240 }
3241
3242 /* resolve a, A */
3243 A_set->alt_row = a_set->row;
3244 a = a_set->head->next;
3245 A = A_set->head->next;
3246 while (a && A && a->next && A->next) {
3247 if (a_plus ? a->beg < A->beg : a->beg > A->beg) {
3248 AlnMgr2InsertSeqPiece
3249 (A, a, a_plus ? MIN(a->end, A->beg-1) : MAX(a->end, A->beg+1));
3250 DSP->numseg++;
3251 if (a_plus ? a->end < A->beg : a->end > A->beg) {
3252 a = a->next;
3253 } else {
3254 a->beg = A->beg;
3255 }
3256 } else if (a_plus ? A->beg < a->beg : A->beg > a->beg) {
3257 if (a_plus ? A->end < a->beg : A->end > a->beg) {
3258 A = A->next;
3259 } else {
3260 AlnMgr2InsertSeqPiece(A, A, a_plus ? a->beg - 1 : a->beg + 1);
3261 DSP->numseg++;
3262 }
3263 } else { /* a->beg == A->beg */
3264 if (a_plus ? a->end < A->end : a->end > A->end) {
3265 AlnMgr2InsertSeqPiece(A, a, a->end);
3266 DSP->numseg++;
3267 a = a->next;
3268 } else if (a_plus ? a->end > A->end : a->end < A->end) {
3269 a->beg = A->end + (a_plus ? 1 : -1);
3270 A->alt_dsp = a->set->dsp;
3271 A->alt_seg = a->seg;
3272 A->alt_pos = a->pos;
3273 A = A->next;
3274 } else { /* a->end == A->end */
3275 A->alt_dsp = a->set->dsp;
3276 A->alt_seg = a->seg;
3277 A->alt_pos = a->pos;
3278 a = a->next;
3279 A = A->next;
3280 }
3281 }
3282 }
3283 while (a && a->next) {
3284 AlnMgr2InsertSeqPiece(A, a, a->end);
3285 DSP->numseg++;
3286 a = a->next;
3287 }
3288
3289 /* set the upper limits */
3290 if (B_set_head) {
3291 if (a_plus) {
3292 upper_limit =
3293 A_set->tail->end = A_set->tail->beg = A_set->tail->prev->end + 1;
3294
3295 b = b_set->tail;
3296 while (b && b->right == -1) {
3297 b->right = upper_limit;
3298 b = b->prev;
3299 }
3300
3301 B_set = B_set_head;
3302 while (B_set) {
3303 B = B_set->tail;
3304 while (B && B->right == -1) {
3305 B->right = upper_limit;
3306 B = B->prev;
3307 }
3308 B_set = B_set->next;
3309 }
3310
3311 } else {
3312 upper_limit =
3313 A_set->head->beg = A_set->head->end = A_set->head->next->beg + 1;
3314
3315 b = b_set->head;
3316 while (b && b->left == -1) {
3317 b->left = upper_limit;
3318 b = b->next;
3319 }
3320
3321 B_set = B_set_head;
3322 while (B_set) {
3323 B = B_set->head;
3324 while (B && B->left == -1) {
3325 B->left = upper_limit;
3326 B = B->next;
3327 }
3328 B_set = B_set->next;
3329 }
3330
3331 }
3332 }
3333
3334 /* try to resolve b, B */
3335 if (B_set_head) {
3336 b = b_set->head->next;
3337 B_set = B_set_head;
3338 while (B_set) {
3339 B = B_set->head->next;
3340 conflict = FALSE;
3341 extra_segs = 0;
3342 while (b && B) {
3343 if (b_plus ? b->beg < B->beg : b->beg > B->beg) {
3344 if (b_plus ? b->end < B->beg : b->end > B->beg) {
3345 /* trim the limits */
3346 if (a_plus ? B->left <= b->left : B->left >= b->left) {
3347 if (a_plus ? B->right < b->left : B->right > b->left) {
3348 conflict = TRUE; break;
3349 } else {
3350 if (B->aligned) {
3351 conflict = TRUE; break; /* no trimming allowed */
3352 } else {
3353 B->left = b->left;
3354 }
3355 }
3356 if (a_plus ? b->right > B->right : b->right < B->right) {
3357 if (b->aligned) {
3358 conflict = TRUE; break; /* no trimming allowed */
3359 } else {
3360 b->orig_right = b->right; /* for recovering */
3361 b->right = B->right;
3362 }
3363 }
3364 }
3365 AlnMgr2InsertSeqPiece(B, b, b->end);
3366 if (!(b->aligned)) extra_segs++;
3367 b = b->next;
3368 } else {
3369 conflict = TRUE; break;
3370 }
3371
3372 } else if (b_plus ? B->beg < b->beg : B->beg > b->beg) {
3373 if (b_plus ? B->end < b->beg : B->end > b->beg) {
3374 /* trim the limits */
3375 if (a_plus ? b->left < B->left : b->left > B->left) {
3376 if (a_plus ? b->right < B->left : b->right > B->left) {
3377 conflict = TRUE; break;
3378 } else {
3379 if (b->aligned) {
3380 conflict = TRUE; break; /* no trimming allowed */
3381 } else {
3382 b->orig_left = b->left; /* for recovering */
3383 b->left = B->left;
3384 }
3385 }
3386 if (a_plus ? B->right > b->right : B->right < b->right) {
3387 if (B->aligned) {
3388 conflict = TRUE; break; /* no trimming allowed */
3389 } else {
3390 B->right = b->right;
3391 }
3392 }
3393 }
3394
3395 B = B->next;
3396
3397 } else {
3398 conflict = TRUE; break;
3399 }
3400 } else { /* B->beg == b->beg */
3401 conflict = TRUE; break;
3402 }
3403 }
3404 if (!conflict) {
3405 while (b) {
3406 AlnMgr2AddSeqPiece(B_set, b);
3407 if (!(b->aligned)) extra_segs++;
3408 b = b->next;
3409 }
3410 /* DSP->numseg += extra_segs; */
3411 break;
3412 }
3413 /* conflict, roll back b, recovering limits, try next B */
3414 if (!b) {
3415 b = b_set->tail;
3416 }
3417 while (b) {
3418 if (b->orig_left != -2) {
3419 b->left = b->orig_left;
3420 }
3421 if (b->orig_right != -2) {
3422 b->right = b->orig_right;
3423 }
3424 b = b->prev;
3425 }
3426 b = b_set->head->next;
3427 B_set = B_set->next;
3428 }
3429 }
3430 if (B_set) { /* B_set has no conflict with b_set */
3431 B = B_set->head->next;
3432 B_set->row2 = B_set->row; /* mark the set */
3433 A_set->row2 = B_set->row;
3434 A_set->alt_row2 = b_set->row;
3435 } else { /* this mean extra row */
3436 A_set->row2 = -1;
3437 A_set->alt_row2 = b_set->row;
3438 A_set->alt_dsp = b_set->dsp;
3439 DSP->dim++;
3440 sip = DSP->ids;
3441 while (sip->next) {
3442 sip = sip->next;
3443 }
3444 AddSeqId(&sip, extra_sip);
3445
3446 /* fix the index too */
3447 amaip->numrows = DSP->dim;
3448 amaip->ids = (SeqIdPtr PNTR)MemMore
3449 (amaip->ids,amaip->numrows*sizeof(SeqIdPtr));
3450 amaip->ids[amaip->numrows-1] = SeqIdDup(extra_sip);
3451
3452 b_set->row2 = b_set->row; /* mark the set */
3453 B = b_set->head->next;
3454 B_beg = -1; /* nothing to comp Bs to */
3455 }
3456
3457 /* allocate memory for the new sharedaln matrix */
3458 DSP->starts = (Int4Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Int4));
3459 DSP->strands = (Uint1Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Uint1));
3460 DSP->lens = (Int4Ptr)MemNew(DSP->numseg * sizeof(Int4));
3461
3462 /* loop through segments */
3463 POS = 0; Pos = 0; Seg = 0; SEG = 0;
3464 A = A_set->head->next;
3465 while (Seg < Dsp->numseg) {
3466
3467 A_end = Dsp->starts[Pos+A_set->row];
3468 if (a_plus && A_end >= 0) {
3469 A_end += Dsp->lens[Seg] - 1;
3470 }
3471 if (B_set) {
3472 B_beg = Dsp->starts[Pos+B_set->row];
3473 }
3474
3475 if (A_end >= 0) {
3476 while (A && (a_plus ? A->end <= A_end : A->end >= A_end)) {
3477 while (B && (a_plus ? B->left < A->beg : B->left > A->beg)) {
3478 if (B->aligned) {
3479 B = B->next;
3480 break; /* the aligned piece should be last */
3481 } else {
3482 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3483 }
3484 }
3485 if (B && B->aligned && B->left == A->beg) {
3486 B = B->next;
3487 }
3488 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3489 }
3490 } else if (B && B_beg >= 0) {
3491 while (B && (b_plus ? B->beg <= B_beg : B->beg >= B_beg)) {
3492 while (A && (a_plus ? A->beg <= B->left : A->beg >= B->left)) {
3493 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3494 }
3495 if (B->aligned) {
3496 B = B->next;
3497 } else {
3498 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3499 }
3500 }
3501 } else {
3502 /* just copy the Dsp segment */
3503 DSP->lens[SEG] = Dsp->lens[Seg];
3504 max_POS = POS + Dsp->dim;
3505 for (; POS < max_POS; POS++, Pos++) {
3506 DSP->starts[POS] = Dsp->starts[Pos];
3507 DSP->strands[POS] = Dsp->strands[Pos];
3508 }
3509 if (DSP->dim > Dsp->dim) {
3510 DSP->starts[POS] = -1;
3511 DSP->strands[POS] = dsp->strands[1-anchor];
3512 POS++;
3513 }
3514 SEG++;
3515 Seg++;
3516 }
3517 }
3518 while (A) {
3519 while (B && (a_plus ? B->right <= A->beg : B->right >= A->beg)) {
3520 if (B->aligned) {
3521 B = B->next;
3522 } else {
3523 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3524 }
3525 }
3526 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3527 }
3528 while (B) {
3529 if (B->aligned) {
3530 B = B->next;
3531 } else {
3532 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3533 }
3534 }
3535
3536 /* Done */
3537 AMSeqPieceSetFree(A_set);
3538 AMSeqPieceSetFree(a_set);
3539 AMSeqPieceSetFree(B_set_head);
3540 AMSeqPieceSetFree(b_set);
3541
3542 amaip->sharedaln->segs = DSP;
3543 /* update the dim for the shared_aln to match the new DensegPtr */
3544 amaip->sharedaln->dim = DSP->dim;
3545
3546 DenseSegFree(Dsp);
3547 }
3548
3549 /***************************************************************************
3550 *
3551 * AlnMgr2AddInNewSA adds a seqalign to an existing seqalign. The new
3552 * seqalign must share at least one row with the existing seqalign. The
3553 * new, combined dense-seg structure is computed, and then it is condensed
3554 * using AlnMgr2CondenseRows to make sure that there are no superfluous rows.
3555 *
3556 ***************************************************************************/
AlnMgr2AddInNewSA(SeqAlignPtr parent,SeqAlignPtr sap)3557 static void AlnMgr2AddInNewSA(SeqAlignPtr parent, SeqAlignPtr sap)
3558 {
3559 AMAlignIndex2Ptr amaip;
3560 AM_Small2Ptr asp;
3561 AM_Small2Ptr asp_head;
3562 AM_Small2Ptr asp_prev;
3563 AM_Small2Ptr asp_tmp;
3564 AM_Small2Ptr asp_tmp2;
3565 AM_Small2Ptr PNTR asparray;
3566 Int4 currstop;
3567 DenseSegPtr dsp;
3568 DenseSegPtr dsp_new;
3569 DenseSegPtr dsp_shared;
3570 Boolean found;
3571 Int4 i;
3572 Int4 j;
3573 Int4 k;
3574 Int4 n1;
3575 Int4 n2;
3576 Int4 numrows;
3577 Int4 offset;
3578 SeqAlignPtr salp;
3579 SeqAlignPtr sap_new;
3580 SeqAlignPtr PNTR saptmp;
3581 SeqIdPtr sip;
3582 SeqIdPtr sip_head;
3583 SeqIdPtr sip_tmp;
3584 Int4 state;
3585 Int4 stop1;
3586 Int4 stop2;
3587 Uint1 strand1;
3588 Uint1 strand2;
3589
3590 amaip = (AMAlignIndex2Ptr)(parent->saip);
3591 if (amaip->sharedaln == NULL) /* this is the first alignment to be added */
3592 {
3593 salp = SeqAlignDup(sap);
3594 AlnMgr2IndexSingleChildSeqAlign(salp);
3595 dsp = (DenseSegPtr)(salp->segs);
3596 amaip->sharedaln = salp;
3597 amaip->numrows = dsp->dim;
3598 sip = dsp->ids;
3599 amaip->ids = (SeqIdPtr PNTR)MemNew((dsp->dim)*sizeof(SeqIdPtr));
3600 i = 0;
3601 while (sip != NULL)
3602 {
3603 amaip->ids[i] = SeqIdDup(sip);
3604 sip = sip->next;
3605 i++;
3606 }
3607 MemFree(amaip->saps);
3608 amaip->saps = (SeqAlignPtr PNTR)MemNew(sizeof(SeqAlignPtr));
3609 amaip->saps[0] = sap;
3610 amaip->numsaps = 1;
3611 } else
3612 {
3613 /* free ids */
3614 for (i=0; i<amaip->numrows; i++)
3615 {
3616 SeqIdFree(amaip->ids[i]);
3617 }
3618 MemFree(amaip->ids);
3619
3620 /* add the new sap */
3621 saptmp = amaip->saps;
3622 amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps+1)*sizeof(SeqAlignPtr));
3623 for (i=0; i<amaip->numsaps; i++)
3624 {
3625 amaip->saps[i] = saptmp[i];
3626 }
3627 amaip->saps[amaip->numsaps] = sap;
3628 MemFree(saptmp);
3629 amaip->numsaps++;
3630
3631 /* dsp, dsp_shared, n1, n2 */
3632 dsp = (DenseSegPtr)(sap->segs);
3633 dsp_shared = (DenseSegPtr)(amaip->sharedaln->segs);
3634 AlnMgr2GetFirstSharedRow(amaip->sharedaln, sap, &n1, &n2);
3635 if (n1 == n2 && n1 == 0)
3636 return;
3637
3638 /* make sure the shared rows are on the same strand */
3639 strand1 = AlnMgr2GetNthStrand(amaip->sharedaln, n1);
3640 if (strand1 == Seq_strand_unknown)
3641 strand1 = Seq_strand_plus;
3642 strand2 = AlnMgr2GetNthStrand(sap, n2);
3643 if (strand2 == Seq_strand_unknown)
3644 strand2 = Seq_strand_plus;
3645 if (strand1 != strand2)
3646 {
3647 SeqAlignListReverseStrand(sap);
3648 SAIndex2Free2(sap->saip);
3649 sap->saip = NULL;
3650 AlnMgr2IndexSingleChildSeqAlign(sap);
3651 dsp = (DenseSegPtr)(sap->segs);
3652 strand2 = AlnMgr2GetNthStrand(sap, n2);
3653 if (strand2 == Seq_strand_unknown)
3654 strand2 = Seq_strand_plus;
3655 }
3656
3657 /* numrows */
3658 numrows = dsp->dim + dsp_shared->dim - 1; /* for now this works; compress at the end */
3659 asp_head = NULL;
3660
3661 /* currstop */
3662 if (strand1 == Seq_strand_minus)
3663 AlnMgr2GetNthSeqRangeInSA(amaip->sharedaln, n1, NULL, &currstop);
3664 else
3665 currstop = -1;
3666
3667 /* add asp for each dsp_shared seg */
3668 for (i=0; i<dsp_shared->numseg; i++)
3669 {
3670 asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3671 if (dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1] < 0)
3672 {
3673 asp->n1 = currstop;
3674 asp->n2 = i+1;
3675 asp->n3 = AM_GAP;
3676 asp->n4 = dsp_shared->lens[i];
3677 if (asp_head != NULL)
3678 {
3679 asp_prev->next = asp;
3680 /*if (asp_prev->n1 == asp->n1)
3681 asp->n5 = asp_prev->n5+1;*/
3682 asp_prev = asp;
3683 } else
3684 asp_head = asp_prev = asp;
3685 } else
3686 {
3687 asp->n1 = dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1];
3688 asp->n2 = 1;
3689 asp->n3 = AM_START;
3690 asp->n4 = dsp_shared->lens[i];
3691 if (asp_head != NULL)
3692 {
3693 asp_prev->next = asp;
3694 /*if (asp_prev->n1 == asp->n1)
3695 asp->n5 = asp_prev->n5+1;*/
3696 asp_prev = asp;
3697 } else
3698 asp_head = asp_prev = asp;
3699 asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3700 asp->n1 = dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1] + dsp_shared->lens[i] - 1;
3701 asp->n2 = 1;
3702 j = i+1;
3703 while (j<dsp_shared->numseg && dsp_shared->starts[(dsp_shared->dim)*j + n1 - 1] == -1)
3704 {
3705 j++;
3706 }
3707 if (j<dsp_shared->numseg)
3708 {
3709 if (dsp_shared->starts[(dsp_shared->dim)*j + n1 - 1] > asp->n1 + 1)
3710 asp->n3 = AM_HARDSTOP;
3711 else
3712 asp->n3 = AM_STOP;
3713 } else
3714 asp->n3 = AM_HARDSTOP;
3715 if (asp->n3 == AM_HARDSTOP)
3716 {
3717 if (strand1 != Seq_strand_minus)
3718 asp->n4 = -(dsp_shared->starts[(dsp_shared->dim)*i+n1-1] + dsp_shared->lens[i]-1);
3719 else
3720 asp->n4 = -dsp_shared->starts[(dsp_shared->dim)*i+n1-1];
3721 } else
3722 asp->n4 = -dsp_shared->lens[i];
3723 if (strand1 != Seq_strand_minus)
3724 currstop = asp->n1;
3725 else
3726 currstop = asp_prev->n1-1;
3727 asp_prev->next = asp;
3728 /*if (asp_prev->n1 == asp->n1)
3729 asp->n5 = asp_prev->n5+1;*/
3730 asp_prev = asp;
3731 }
3732 } /* asp for each dsp_shared seg */
3733
3734 /* currstop = start of sap's n2-th seq */
3735 if (strand1 == Seq_strand_minus)
3736 AlnMgr2GetNthSeqRangeInSA(sap, n2, NULL, &currstop);
3737 else
3738 AlnMgr2GetNthSeqRangeInSA(sap, n2, &currstop, NULL);
3739
3740 /* add asp for each dsp seg */
3741 for (i=0; i<dsp->numseg; i++)
3742 {
3743 asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3744 if (dsp->starts[(dsp->dim)*i + n2 - 1] < 0)
3745 {
3746 asp->n1 = currstop;
3747 asp->n2 = dsp_shared->numseg+i+1;
3748 asp->n3 = AM_GAP;
3749 asp->n4 = dsp->lens[i];
3750 asp_prev->next = asp;
3751 /*if (asp_prev->n1 == asp->n1)
3752 asp->n5 = asp_prev->n5 + 1;*/
3753 asp_prev = asp;
3754 } else
3755 {
3756 asp->n1 = dsp->starts[(dsp->dim)*i + n2 - 1];
3757 asp->n2 = 1;
3758 asp->n3 = AM_START;
3759 asp->n4 = dsp->lens[i];
3760 asp_prev->next = asp;
3761 /*if (asp_prev->n1 == asp->n1)
3762 asp->n5 = asp_prev->n5+1;*/
3763 asp_prev = asp;
3764 asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3765 asp->n1 = dsp->starts[(dsp->dim)*i + n2 - 1] + dsp->lens[i] - 1;
3766 asp->n2 = 1;
3767 j = i+1;
3768 while (j<dsp->numseg && dsp->starts[(dsp->dim)* j + n2 - 1] == -1)
3769 {
3770 j++;
3771 }
3772 if (j<dsp->numseg)
3773 {
3774 if (dsp->starts[(dsp->dim)*j + n2 - 1] > asp->n1 + 1)
3775 asp->n3 = AM_HARDSTOP;
3776 else
3777 asp->n3 = AM_STOP;
3778 } else
3779 asp->n3 = AM_HARDSTOP;
3780 if (asp->n3 == AM_HARDSTOP)
3781 {
3782 if (strand1 != Seq_strand_minus)
3783 asp->n4 = -(dsp->starts[(dsp->dim)*i+n1-1] + dsp->lens[i]-1);
3784 else
3785 asp->n4 = -dsp->starts[(dsp->dim)*i+n1-1];
3786 /* so if n4 is negative, this is the highest-numbered residue in the interval */
3787 } else
3788 asp->n4 = dsp->lens[i];
3789 if (strand1 != Seq_strand_minus)
3790 currstop = asp->n1;
3791 else
3792 currstop = asp_prev->n1-1;
3793 asp_prev->next = asp;
3794 /*if (asp_prev->n1 == asp->n1)
3795 asp->n5 = asp_prev->n5 + 1;*/
3796 asp_prev = asp;
3797 }
3798 }
3799
3800 /* create asparray and heapsort it */
3801 asp = asp_head;
3802 i = 0;
3803 while (asp != NULL)
3804 {
3805 i++;
3806 asp = asp->next;
3807 }
3808 asparray = (AM_Small2Ptr PNTR)MemNew(i*sizeof(AM_Small2Ptr));
3809 asp = asp_head;
3810 i = 0;
3811 while (asp != NULL)
3812 {
3813 asparray[i] = asp;
3814 i++;
3815 asp = asp->next;
3816 }
3817 if (strand1 != Seq_strand_minus)
3818 HeapSort(asparray, i, sizeof(asparray), AlnMgr2CompareAsps);
3819 else
3820 HeapSort(asparray, i, sizeof(asparray), AlnMgr2CompareAspsMinus);
3821 /* now need to remove redundant (identical) points */
3822 /* but still need to count those points toward the states */
3823 asp = asparray[0];
3824 asp->next = NULL;
3825 for (j=0; j<i-1; j++)
3826 {
3827 if (asparray[j+1]->n1 != asp->n1 || asparray[j+1]->n3 != asp->n3 || asp->n3 == AM_GAP)
3828 {
3829 asp->next = asparray[j+1];
3830 asp->next->next = NULL;
3831 asp = asp->next;
3832 } else
3833 {
3834 k = j;
3835 while (asparray[k] == NULL && k >= 0)
3836 {
3837 k--;
3838 }
3839 if (k>=0 && asparray[k]->n3 != AM_GAP)
3840 asparray[k]->n2++;
3841 MemFree(asparray[j+1]);
3842 asparray[j+1] = NULL;
3843 }
3844 }
3845 asp_head = asparray[0];
3846 MemFree(asparray);
3847 j=0;
3848 asp = asp_head;
3849 asp_prev = NULL;
3850 /* count up the segments; two consecutive stops make a segment */
3851 state = 0;
3852 if (strand1 != Seq_strand_minus)
3853 {
3854 while (asp != NULL)
3855 {
3856 if (asp->n3 == AM_START)
3857 {
3858 state += asp->n2;
3859 j++;
3860 } else if (asp->n3 == AM_STOP)
3861 {
3862 state -= asp->n2;
3863 asp_tmp = asp->next;
3864 while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3865 {
3866 asp_tmp = asp_tmp->next;
3867 }
3868 if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3869 j++;
3870 else if (state != 0 && asp->next != NULL && asp_tmp != NULL && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3871 {
3872 asp_tmp2 = asp_tmp;
3873 while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3874 {
3875 asp_tmp2 = asp_tmp2->next;
3876 }
3877 if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1) && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3878 j++;
3879 }
3880 } else if (asp->n3 == AM_GAP)
3881 j++;
3882 else if (asp->n3 == AM_HARDSTOP)
3883 {
3884 state -= asp->n2;
3885 asp_tmp = asp->next;
3886 while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3887 {
3888 asp_tmp = asp_tmp->next;
3889 }
3890 if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1)
3891 j++;
3892 else if (state != 0 && asp->next != NULL && asp_tmp != NULL)
3893 {
3894 asp_tmp2 = asp_tmp;
3895 while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3896 {
3897 asp_tmp2 = asp_tmp2->next;
3898 }
3899 if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1))
3900 j++;
3901 else if (asp_tmp2 == NULL)
3902 j++;
3903 }
3904 }
3905 asp = asp->next;
3906 }
3907 } else
3908 {
3909 currstop = -1;
3910 while (asp != NULL)
3911 {
3912 if (asp->n3 == AM_STOP || asp->n3 == AM_HARDSTOP)
3913 {
3914 if (currstop != asp->n1 && state > 0)
3915 j++;
3916 currstop = asp->n1;
3917 state += asp->n2;
3918 } else if (asp->n3 == AM_START)
3919 {
3920 state -= asp->n2;
3921 j++;
3922 currstop = asp->n1 - 1;
3923 } else if (asp->n3 == AM_GAP)
3924 j++;
3925 asp = asp->next;
3926 }
3927 }
3928
3929 /* dsp_new */
3930 dsp_new = DenseSegNew();
3931 dsp_new->dim = numrows;
3932 dsp_new->numseg = j;
3933 dsp_new->ids = SeqIdDupList(dsp_shared->ids);
3934 dsp_new->starts = (Int4Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Int4));
3935 dsp_new->strands = (Uint1Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Uint1));
3936 dsp_new->lens = (Int4Ptr)MemNew((dsp_new->numseg)*sizeof(Int4));
3937
3938 /* get all the ids except for the duplicated one */
3939 sip_head = NULL;
3940 sip_tmp = NULL;
3941 sip = dsp->ids;
3942 i=0;
3943 /* get all the ids except for the duplicated one */
3944 while (sip != NULL)
3945 {
3946 if (i+1 != n2)
3947 {
3948 if (sip_tmp != NULL)
3949 {
3950 sip_tmp->next = SeqIdDup(sip);
3951 sip_tmp = sip;
3952 } else
3953 sip_head = sip_tmp = SeqIdDup(sip);
3954 }
3955 i++;
3956 sip = sip->next;
3957 }
3958 sip = dsp_new->ids;
3959 while (sip->next != NULL)
3960 {
3961 sip = sip->next;
3962 }
3963 sip->next = sip_head;
3964
3965 /* construct starts and lens from asps */
3966 asp = asp_head;
3967 i=0;
3968 state = 0;
3969 currstop = -1;
3970 if (strand1 != Seq_strand_minus)
3971 {
3972 while (asp != NULL)
3973 {
3974 if (asp->n3 == AM_START)
3975 {
3976 state += asp->n2;
3977 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1;
3978 dsp_new->lens[i] = asp->n4;
3979 i++;
3980 } else if (asp->n3 == AM_STOP)
3981 {
3982 state -= asp->n2;
3983 asp_tmp = asp->next;
3984 while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3985 {
3986 asp_tmp = asp_tmp->next;
3987 }
3988 if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3989 {
3990 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
3991 dsp_new->lens[i] = asp->n4;
3992 i++;
3993 } else if (state != 0 && asp->next != NULL && asp_tmp != NULL && i < dsp_new->numseg && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3994 {
3995 asp_tmp2 = asp_tmp;
3996 while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3997 {
3998 asp_tmp2 = asp_tmp2->next;
3999 }
4000 if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1) && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
4001 {
4002 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4003 dsp_new->lens[i] = asp->n4;
4004 i++;
4005 }
4006 }
4007 } else if (asp->n3 == AM_GAP)
4008 {
4009 dsp_new->starts[dsp_new->dim*i+n1-1] = -asp->n2;
4010 if (asp->n2 > dsp_shared->numseg)
4011 dsp_new->lens[i] = dsp->lens[(asp->n2-1)-(dsp_shared->numseg)];
4012 else
4013 dsp_new->lens[i] = dsp_shared->lens[asp->n2-1];
4014 i++;
4015 } else if (asp->n3 == AM_HARDSTOP)
4016 {
4017 state -= asp->n2;
4018 asp_tmp = asp->next;
4019 while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
4020 {
4021 asp_tmp = asp_tmp->next;
4022 }
4023 if (state != 0 && asp->next != NULL && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && i < dsp_new->numseg)
4024 {
4025 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4026 if (asp->n1 > -asp->n4)
4027 dsp_new->lens[i] = asp->n4;
4028 i++;
4029 } else if (state != 0 && asp->next != NULL && asp_tmp != NULL && i < dsp_new->numseg)
4030 {
4031 asp_tmp2 = asp_tmp;
4032 while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
4033 {
4034 asp_tmp2 = asp_tmp2->next;
4035 }
4036 if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1))
4037 {
4038 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4039 if (asp->n1 > -asp->n4)
4040 dsp_new->lens[i] = asp->n4;
4041 i++;
4042 } else if (asp_tmp2 == NULL)
4043 {
4044 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4045 if (asp->n1 > -asp->n4)
4046 dsp_new->lens[i] = asp->n4;
4047 i++;
4048 }
4049 }
4050 }
4051 asp = asp->next;
4052 }
4053 for (i=0; i<dsp_new->numseg; i++)
4054 {
4055 found = FALSE;
4056 for (j=i+1; j<dsp_new->numseg && !found; j++)
4057 {
4058 if (dsp_new->starts[dsp_new->dim*j+n1-1] > -1)
4059 {
4060 if (dsp_new->lens[i] == 0)
4061 dsp_new->lens[i] = dsp_new->starts[dsp_new->dim*j+n1-1] - dsp_new->starts[dsp_new->dim*i+n1-1];
4062 else if (dsp_new->lens[i] > 0)
4063 dsp_new->lens[i] = MIN(dsp_new->lens[i], dsp_new->starts[dsp_new->dim*j+n1-1] - dsp_new->starts[dsp_new->dim*i+n1-1]);
4064 else if (dsp_new->lens[i] < 0)
4065 dsp_new->lens[i] = -dsp_new->lens[i]-dsp_new->starts[dsp_new->dim*i+n1-1]+1;
4066 found = TRUE;
4067 }
4068 }
4069 if (!found) /* last segment */
4070 {
4071 if (dsp_new->starts[dsp_new->dim*i+n1-1] >= 0)
4072 {
4073 AlnMgr2GetNthSeqRangeInSA(amaip->sharedaln, n1, NULL, &stop1);
4074 AlnMgr2GetNthSeqRangeInSA(sap, n2, NULL, &stop2);
4075 dsp_new->lens[i] = (MAX(stop1, stop2) + 1) - dsp_new->starts[dsp_new->dim*i+n1-1];
4076 }
4077 }
4078 }
4079 } else
4080 {
4081 while (asp != NULL)
4082 {
4083 if (asp->n3 == AM_STOP)
4084 {
4085 if (currstop != asp->n1 && state > 0)
4086 {
4087 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1+1;
4088 dsp_new->lens[i] = currstop - asp->n1;
4089 i++;
4090 }
4091 currstop = asp->n1;
4092 state += asp->n2;
4093 } else if (asp->n3 == AM_START)
4094 {
4095 state -= asp->n2;
4096 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1;
4097 dsp_new->lens[i] = currstop - asp->n1 + 1;
4098 i++;
4099 currstop = asp->n1 - 1;
4100 } else if (asp->n3 == AM_GAP)
4101 {
4102 dsp_new->starts[dsp_new->dim*i+n1-1] = -asp->n2;
4103 if (asp->n2 > dsp_shared->numseg)
4104 dsp_new->lens[i] = dsp->lens[(asp->n2-1)-(dsp_shared->numseg)];
4105 else
4106 dsp_new->lens[i] = dsp_shared->lens[asp->n2-1];
4107 i++;
4108 } else if (asp->n3 == AM_HARDSTOP)
4109 {
4110 if (currstop != asp->n1 && state > 0 && asp->next != NULL)
4111 {
4112 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1+1;
4113 dsp_new->lens[i] = currstop - asp->n1;
4114 i++;
4115 }
4116 currstop = asp->n1;
4117 state += asp->n2;
4118 }
4119 asp = asp->next;
4120 }
4121 }
4122 /* now add in the other rows, starting with rows from the sharedaln */
4123 for (i=0; i<dsp_shared->dim; i++)
4124 {
4125 if (i+1 != n1)
4126 {
4127 for (j=0; j<dsp_new->numseg; j++)
4128 {
4129 if (dsp_new->starts[dsp_new->dim*j+n1-1] >= 0)
4130 dsp_new->starts[dsp_new->dim*j+i] = AlnMgr2MapSegStartToSegStart(amaip->sharedaln, dsp_new->starts[dsp_new->dim*j+n1-1], n2, i+1, dsp_new->lens[j]);
4131 else
4132 {
4133 if (-(dsp_new->starts[dsp_new->dim*j+n1-1]) > dsp_shared->numseg)
4134 /* this gap came from the new sap */
4135 dsp_new->starts[dsp_new->dim*j+i] = -1;
4136 else /* this gap came from the sharedaln */
4137 dsp_new->starts[dsp_new->dim*j+i] = dsp_shared->starts[dsp_shared->dim*(-dsp_new->starts[dsp_new->dim*j+n1-1]-1)+i];
4138 }
4139 dsp_new->strands[dsp_new->dim*j+i] = AlnMgr2GetNthStrand(amaip->sharedaln, i+1);
4140 }
4141 }
4142 }
4143 for (i=0; i<dsp->dim; i++)
4144 {
4145 if (i+1 != n2)
4146 {
4147 if (i+1 > n2)
4148 offset = 1;
4149 else
4150 offset = 0;
4151 for (j=0; j<dsp_new->numseg; j++)
4152 {
4153 if (dsp_new->starts[dsp_new->dim*j+n1-1] >= 0)
4154 dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = AlnMgr2MapSegStartToSegStart(sap, dsp_new->starts[dsp_new->dim*j+n1-1], n1, i+1, dsp_new->lens[j]);
4155 else
4156 {
4157 if (-(dsp_new->starts[dsp_new->dim*j+n1-1]) > dsp_shared->numseg)
4158 /* this gap is from the new sap */
4159 dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = dsp->starts[dsp->dim*((-dsp_new->starts[dsp_new->dim*j+n1-1])-dsp_shared->numseg-1)+i];
4160 else /* this gap is from the shared alignment */
4161 dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = -1;
4162 }
4163 dsp_new->strands[dsp_new->dim*j+i+dsp_shared->dim-offset] = AlnMgr2GetNthStrand(sap, i+1);
4164 }
4165 }
4166 }
4167 /* fill in strand info for shared row, and get rid of segment keys (neg numbers) */
4168 for (j=0; j<dsp_new->numseg; j++)
4169 {
4170 dsp_new->strands[dsp_new->dim*j+n1-1] = AlnMgr2GetNthStrand(amaip->sharedaln, n1);
4171 if (dsp_new->starts[dsp_new->dim*j+n1-1] < 0)
4172 dsp_new->starts[dsp_new->dim*j+n1-1] = -1;
4173 }
4174 if (dsp_new->dim > 10)
4175 dsp_new->dim = dsp_new->dim;
4176 AlnMgr2CondenseRows(dsp_new, dsp_new->dim);
4177 sap_new = SeqAlignNew();
4178 sap_new->segtype = SAS_DENSEG;
4179 sap_new->segs = (Pointer)(dsp_new);
4180 AlnMgr2IndexSingleChildSeqAlign(sap_new);
4181 SeqAlignFree(amaip->sharedaln);
4182 amaip->sharedaln = sap_new;
4183 amaip->numrows = dsp_new->dim;
4184 amaip->ids = (SeqIdPtr PNTR)MemNew(amaip->numrows*sizeof(SeqIdPtr));
4185 sip = dsp_new->ids;
4186 for (i=0; i<amaip->numrows; i++)
4187 {
4188 amaip->ids[i] = SeqIdDup(sip);
4189 sip = sip->next;
4190 }
4191 while (asp_head != NULL)
4192 {
4193 asp = asp_head->next;
4194 MemFree(asp_head);
4195 asp_head = asp;
4196 }
4197 }
4198 }
4199
4200 /* SECTION 2c */
AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap,Int4 pos,Int4 row1,Int4 row2,Int4 len)4201 static Int4 AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2, Int4 len)
4202 {
4203 Int4 diff;
4204 DenseSegPtr dsp;
4205 Int4 pos2;
4206 Int4 seg;
4207 Uint1 strand1;
4208 Uint1 strand2;
4209
4210 if (sap == NULL)
4211 return -1;
4212 seg = AlnMgr2GetSegForStartPos(sap, pos, row1);
4213 if (seg < 0)
4214 return -1;
4215 dsp = (DenseSegPtr)(sap->segs);
4216 if (dsp->starts[dsp->dim*seg+row2-1] == -1)
4217 return -1;
4218 strand1 = dsp->strands[dsp->dim*seg+row1-1];
4219 strand2 = dsp->strands[dsp->dim*seg+row2-1];
4220 if (strand1 != strand2)
4221 pos = pos + len - 1;
4222 if (strand1 == Seq_strand_minus)
4223 diff = dsp->lens[seg] - (pos - dsp->starts[dsp->dim*seg+row1-1]) - 1;
4224 else
4225 diff = pos - dsp->starts[dsp->dim*seg+row1-1];
4226 if (diff > dsp->lens[seg]) /* unaligned here */
4227 return -1;
4228 if (strand2 == Seq_strand_minus)
4229 pos2 = dsp->starts[dsp->dim*seg+row2-1] + dsp->lens[seg] - diff -1;
4230 else
4231 pos2 = dsp->starts[dsp->dim*seg+row2-1]+ diff;
4232 return pos2;
4233 }
4234
4235 /* SECTION 2c */
AlnMgr2GetSegForStartPos(SeqAlignPtr sap,Int4 pos,Int4 row)4236 static Int4 AlnMgr2GetSegForStartPos(SeqAlignPtr sap, Int4 pos, Int4 row)
4237 {
4238 Uint2Ptr array;
4239 DenseSegPtr dsp;
4240 Int4 L;
4241 Int4 mid;
4242 Int4 offset;
4243 Int4 R;
4244 SAIndex2Ptr saip;
4245 SARowDat2Ptr srdp;
4246 Int4 start;
4247 Int4 stop;
4248 Uint1 strand;
4249
4250 if (sap == NULL || sap->saip == NULL || row < 1)
4251 return -1;
4252 AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
4253 if (pos < start || pos > stop)
4254 return -1;
4255 saip = (SAIndex2Ptr)(sap->saip);
4256 if (row > saip->numrows)
4257 return -1;
4258 srdp = saip->srdp[row-1];
4259 strand = AlnMgr2GetNthStrand(sap, row);
4260 dsp = (DenseSegPtr)(sap->segs);
4261 L = 0;
4262 R = srdp->numsect - 1;
4263 if (strand != Seq_strand_minus)
4264 {
4265 while (L < R)
4266 {
4267 mid = MIN((L + R)/2, srdp->numsect-2);
4268 if (dsp->starts[(srdp->sect[mid + 1])*(dsp->dim)+row-1] <= pos)
4269 L = mid+1;
4270 else
4271 R = mid;
4272 }
4273 } else
4274 {
4275 while (L < R)
4276 {
4277 mid = (L + R)/2;
4278 if (dsp->starts[(srdp->sect[mid])*(dsp->dim)+row-1] > pos)
4279 L = mid + 1;
4280 else
4281 R = mid;
4282 }
4283 }
4284 offset = pos - dsp->starts[(srdp->sect[L])*(dsp->dim)+row-1];
4285 if (offset >= dsp->lens[srdp->sect[L]])
4286 return -2; /* this is an insert */
4287 if (saip->anchor > 0)
4288 {
4289 array = saip->srdp[saip->anchor-1]->sect;
4290 R = binary_search_on_uint2_list(array, srdp->sect[L], saip->srdp[saip->anchor-1]->numsect);
4291 L = R;
4292 }
4293 return srdp->sect[L];
4294 }
4295
GetNextStart(DenseSegPtr dsp,Int4 row,Int4 col,Int4Ptr pnext_start_col)4296 static Int4 GetNextStart (DenseSegPtr dsp, Int4 row, Int4 col, Int4Ptr pnext_start_col)
4297 {
4298 Int4 next_start_col;
4299
4300 if (dsp == NULL || row < 0 || row >= dsp->dim || col < 0 || col >= dsp->numseg)
4301 {
4302 return -1;
4303 }
4304
4305 for (next_start_col = col + 1;
4306 next_start_col < dsp->numseg
4307 && dsp->starts[(next_start_col * dsp->dim) + row] == -1;
4308 next_start_col++)
4309 {
4310 }
4311 if (next_start_col < dsp->numseg)
4312 {
4313 if (pnext_start_col != NULL)
4314 {
4315 *pnext_start_col = next_start_col;
4316 }
4317 return dsp->starts[(next_start_col * dsp->dim) + row];
4318 }
4319 else
4320 {
4321 return -1;
4322 }
4323 }
4324
AlnMgr2CondenseColumns(DenseSegPtr dsp)4325 static void AlnMgr2CondenseColumns(DenseSegPtr dsp)
4326 /***************************************************************************
4327 *
4328 * AlnMgr2CondenseColumns finds adjacent columns which appear to align but
4329 * were not put in one column by the mixing mechanism because the input was
4330 * a set of pairwise alignment with a gap on the common sequence in this
4331 * segment. Or graphically:
4332 *
4333 * ----- ----- ----- ----- -----
4334 * AACCG ----- ----- ----- becomes AACCG
4335 * ----- AACCG ----- ----- AACCG
4336 * ----- ----- AACCG ----- AACCG
4337 * ----- ----- ----- AACCG AACCG
4338 *
4339 ***************************************************************************/
4340 {
4341 int gap_start_seg = -1;
4342 int gap_end_seg = -1;
4343 int row, seg, base_col, col, next_start, next_start_col;
4344 Boolean can_fit;
4345
4346 for (seg = 0; seg < dsp->numseg; ++seg) {
4347 if (dsp->starts[dsp->dim * seg] == -1) {
4348 if (gap_start_seg == -1) {
4349 gap_start_seg = seg;
4350 }
4351 else {
4352 if (seg == dsp->numseg - 1) {
4353 gap_end_seg = seg + 1;
4354 }
4355 }
4356 }
4357 else {
4358 if (gap_start_seg != -1) {
4359 gap_end_seg = seg;
4360 }
4361 }
4362
4363 if (gap_end_seg != -1) {
4364 for (base_col = gap_start_seg; base_col<gap_end_seg; ++base_col) {
4365 int len = dsp->lens[base_col];
4366 for (col = base_col + 1; col<gap_end_seg; ++col) {
4367 if (dsp->lens[col] != len) {
4368 continue;
4369 }
4370
4371 can_fit = TRUE;
4372 for (row = 0; row < dsp->dim; ++row) {
4373 if (dsp->starts[dsp->dim * col + row] != -1 &&
4374 dsp->starts[dsp->dim * base_col + row] != -1) {
4375 can_fit = FALSE;
4376 break;
4377 }
4378 else if (dsp->starts[dsp->dim * col + row] != -1)
4379 {
4380 /* make sure we aren't going to disturb the order of
4381 * the starts */
4382 next_start = GetNextStart (dsp, row, base_col, &next_start_col);
4383 if (next_start > -1
4384 && next_start < dsp->starts[dsp->dim * col + row]
4385 && next_start_col < col)
4386 {
4387 can_fit = FALSE;
4388 }
4389 }
4390 }
4391
4392 if (can_fit) {
4393 for (row = 0; row<dsp->dim; ++row) {
4394 if (dsp->starts[dsp->dim * col + row] != -1) {
4395 dsp->starts[dsp->dim * base_col + row] =
4396 dsp->starts[dsp->dim * col + row];
4397 }
4398 }
4399
4400 /* remove column col */
4401 {{
4402 Int4Ptr starts, lens;
4403 Uint1Ptr strands;
4404 Uint4 pos, new_pos;
4405
4406 starts = (Int4Ptr)MemNew(dsp->dim*(dsp->numseg-1)*sizeof(Int4));
4407 strands = (Uint1Ptr)MemNew(dsp->dim*(dsp->numseg-1)*sizeof(Uint1));
4408 lens = (Int4Ptr)MemNew((dsp->numseg-1)*sizeof(Int4));
4409
4410 for (pos=0; pos<dsp->dim*col; pos++) {
4411 starts[pos] = dsp->starts[pos];
4412 strands[pos] = dsp->strands[pos];
4413 }
4414 for (new_pos=pos, pos+=dsp->dim; pos<dsp->dim*dsp->numseg;
4415 pos++, new_pos++) {
4416 starts[new_pos] = dsp->starts[pos];
4417 strands[new_pos] = dsp->strands[pos];
4418 }
4419
4420 for (pos=0; pos<col; pos++) {
4421 lens[pos] = dsp->lens[pos];
4422 }
4423 for (new_pos=pos, pos++; pos<dsp->numseg; pos++, new_pos++) {
4424 lens[new_pos] = dsp->lens[pos];
4425 }
4426
4427 MemFree(dsp->starts);
4428 MemFree(dsp->strands);
4429 dsp->starts = starts;
4430 dsp->strands = strands;
4431 dsp->lens = lens;
4432
4433 dsp->numseg--;
4434
4435 }}
4436
4437 --gap_end_seg;
4438 --seg;
4439 --col;
4440 }
4441 }
4442 }
4443
4444 gap_start_seg = -1;
4445 gap_end_seg = -1;
4446 }
4447 }
4448 }
4449
4450 /* SECTION 2c */
4451 /***************************************************************************
4452 *
4453 * AlnMgr2CondenseRows finds rows of a dense-seg structure that are related
4454 * and that could be condensed into a single row (or fewer rows). It then
4455 * calls AlnMgr2DoCondense to condense those rows into continuous or
4456 * discontinuous rows. whichrow designates which row to merge, if
4457 * less than 1, the function tries to merge the last row.
4458 *
4459 ***************************************************************************/
AlnMgr2CondenseRows(DenseSegPtr dsp,Int4 whichrow)4460 static void AlnMgr2CondenseRows(DenseSegPtr dsp, Int4 whichrow)
4461 {
4462 Boolean done;
4463 Int4 i;
4464 Int4 j;
4465 Int4 k;
4466 Int4 numrows;
4467 AMCdRowPtr row;
4468 AMCdRowPtr PNTR rowarray;
4469 SeqIdPtr sip;
4470 SeqIdPtr targetsip;
4471
4472 sip = dsp->ids;
4473 rowarray = (AMCdRowPtr PNTR)MemNew((dsp->dim)*sizeof(AMCdRowPtr));
4474 if (whichrow < 1 || whichrow > dsp->dim)
4475 whichrow = dsp->dim;
4476 for (i=0; i<dsp->dim; i++)
4477 {
4478 row = (AMCdRowPtr)MemNew(sizeof(AMCdRow));
4479 row->sip = SeqIdDup(sip);
4480 sip = sip->next;
4481 row->strand = dsp->strands[i];
4482 row->rownum = i+1;
4483 rowarray[i] = row;
4484 if (i+1 == whichrow)
4485 targetsip = row->sip;
4486 }
4487 HeapSort(rowarray, i, sizeof(rowarray), AlnMgr2CompareCdRows);
4488 numrows = dsp->dim;
4489 j = -1; /* j marks the first occurrence of each sip */
4490 for (i=0; j==-1 && i<numrows; i++)
4491 {
4492 if (SeqIdComp(rowarray[i]->sip, targetsip) == SIC_YES)
4493 {
4494 j = i;
4495 if (rowarray[i]->rownum == whichrow) /* no other rows w/sip */
4496 {
4497 for (i=0; i<numrows; i++)
4498 {
4499 SeqIdFree(rowarray[i]->sip);
4500 MemFree(rowarray[i]);
4501 }
4502 MemFree(rowarray);
4503 return;
4504 }
4505 }
4506 }
4507 sip = SeqIdDup(rowarray[j]->sip);
4508 done = FALSE;
4509 for (i=j; !done && rowarray[i]->rownum < whichrow; i++)
4510 {
4511 if (SeqIdComp(rowarray[i]->sip, sip) == SIC_YES)
4512 {
4513 if (rowarray[i]->strand == rowarray[j]->strand)
4514 {
4515 if (AlnMgr2DoCondense(dsp, rowarray[i]->rownum, whichrow))
4516 {
4517 for (k=0; k<numrows; k++)
4518 {
4519 if (rowarray[k]->rownum > rowarray[i]->rownum)
4520 {
4521 rowarray[k]->rownum--;
4522 whichrow--;
4523 }
4524 }
4525 }
4526 }
4527 } else
4528 {
4529 done = TRUE;
4530 SeqIdFree(sip);
4531 sip = SeqIdDup(rowarray[i]->sip);
4532 j = i;
4533 }
4534 }
4535 SeqIdFree(sip);
4536 for (i=0; i<numrows; i++)
4537 {
4538 SeqIdFree(rowarray[i]->sip);
4539 MemFree(rowarray[i]);
4540 }
4541 MemFree(rowarray);
4542 }
4543
4544 /* SECTION 2c */
4545 /***************************************************************************
4546 *
4547 * AlnMgr2DoCondense arithmetically condenses two related rows of a dense-seg
4548 * structure into a single continuous row, a single discontinuous row, or
4549 * two rows with different information than before.
4550 *
4551 ***************************************************************************/
AlnMgr2DoCondense(DenseSegPtr dsp,Int4 rownum1,Int4 rownum2)4552 static Boolean AlnMgr2DoCondense(DenseSegPtr dsp, Int4 rownum1, Int4 rownum2)
4553 {
4554 Int4 aln;
4555 SeqAlignPtr fake_sap;
4556 Boolean fits;
4557 Boolean found;
4558 Int4 i;
4559 SeqIdPtr id;
4560 SeqIdPtr id_head;
4561 SeqIdPtr id_prev;
4562 Int4 j;
4563 Int4 k;
4564 Int4 max1;
4565 Int4 max2;
4566 Boolean merged;
4567 Int4 min1;
4568 Int4 min2;
4569 SAIndex2Ptr saip;
4570 Boolean someseq1;
4571 Boolean someseq2;
4572 Int4Ptr starts;
4573 Uint1 strand1;
4574 Uint1 strand2;
4575 Uint1Ptr strands;
4576 AM_Small2Ptr window;
4577 AM_Small2Ptr window_head;
4578 AM_Small2Ptr window_prev;
4579
4580 /* always merge up to rownum1 (better rows are first) */
4581 if (rownum1 > rownum2)
4582 {
4583 i = rownum2;
4584 rownum2 = rownum1;
4585 rownum1 = i;
4586 }
4587 strand1 = dsp->strands[rownum1-1];
4588 strand2 = dsp->strands[rownum2-1];
4589 if (strand1 != strand2)
4590 return FALSE;
4591 i = 0;
4592 window_head = window_prev = NULL;
4593 while (i < dsp->numseg)
4594 {
4595 j = i;
4596 someseq1 = someseq2 = FALSE;
4597 if (dsp->starts[dsp->dim*j+rownum1-1] >= 0)
4598 {
4599 someseq1 = TRUE;
4600 while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum2-1] < 0)
4601 {
4602 j++;
4603 }
4604 } else if (dsp->starts[dsp->dim*j+rownum2-1] >= 0)
4605 {
4606 someseq2 = TRUE;
4607 while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum1-1] < 0)
4608 {
4609 j++;
4610 }
4611 }
4612 fits = FALSE;
4613 if (j > i)
4614 {
4615 if (strand1 == Seq_strand_minus)
4616 {
4617 if (someseq1 == FALSE)
4618 {
4619 min1 = -1;
4620 for (k=j; min1 == -1 && k<dsp->numseg; k++)
4621 {
4622 if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4623 min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4624 }
4625 max1 = -1;
4626 for (k=(i-1); max1 == -1 && k>=0; k--)
4627 {
4628 max1 = dsp->starts[dsp->dim*k+rownum1-1];
4629 }
4630 } else
4631 {
4632 min1 = -1;
4633 for (k=j-1; min1 == -1 && k>=i; k--)
4634 {
4635 min1 = dsp->starts[dsp->dim*(k)+rownum1-1];
4636 }
4637 max1 = -1;
4638 for (k=i; min1 == -1 && k<j; k++)
4639 {
4640 if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4641 max1 = dsp->starts[dsp->dim*k+rownum1-1] + dsp->lens[k] -1;
4642 }
4643 }
4644 } else
4645 {
4646 if (someseq1 == FALSE)
4647 {
4648 min1 = -1;
4649 for (k=i-1; min1 == -1 && k >= 0; k--)
4650 {
4651 if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4652 min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4653 }
4654 max1 = -1;
4655 for (k=j; max1 == -1 && k<dsp->numseg; k++)
4656 {
4657 max1 = dsp->starts[dsp->dim*k+rownum1-1];
4658 }
4659 } else
4660 {
4661 min1 = -1;
4662 for (k=i; min1 == -1 && k<j; k++)
4663 {
4664 min1 = dsp->starts[dsp->dim*k+rownum1-1];
4665 }
4666 max1 = -1;
4667 for (k=j-1; max1 == -1 && k>i; k--)
4668 {
4669 if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4670 max1 = dsp->starts[dsp->dim*(k)+rownum1-1] + dsp->lens[k] - 1;
4671 }
4672 }
4673 }
4674 if (strand2 == Seq_strand_minus)
4675 {
4676 if (someseq2 == FALSE)
4677 {
4678 min2 = -1;
4679 for (k=j; min2 == -1 && k<dsp->numseg; k++)
4680 {
4681 if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4682 min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4683 }
4684 max2 = -1;
4685 for (k=(i-1); max2 == -1 && k>=0; k--)
4686 {
4687 max2 = dsp->starts[dsp->dim*k+rownum2-1];
4688 }
4689 } else
4690 {
4691 min2 = -1;
4692 for (k=j-1; min2 == -1 && k>=i; k--)
4693 {
4694 min2 = dsp->starts[dsp->dim*(k)+rownum2-1];
4695 }
4696 max2 = -1;
4697 for (k=i; max2 == -1 && k<j; k++)
4698 {
4699 if (dsp->starts[dsp->dim*k+rownum2-1] >= 0)
4700 max2 = dsp->starts[dsp->dim*k+rownum2-1] + dsp->lens[k]-1;
4701 }
4702 }
4703 } else
4704 {
4705 if (someseq2 == FALSE)
4706 {
4707 min2 = -1;
4708 for (k=i-1; min2 == -1 && k >= 0; k--)
4709 {
4710 if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4711 min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4712 }
4713 max2 = -1;
4714 for (k=j; max2 == -1 && k<dsp->numseg; k++)
4715 {
4716 max2 = dsp->starts[dsp->dim*k+rownum2-1];
4717 }
4718 } else
4719 {
4720 min2 = -1;
4721 for (k=i; min2 == -1 && k<j; k++)
4722 {
4723 min2 = dsp->starts[dsp->dim*k+rownum2-1];
4724 }
4725 max2 = -1;
4726 for (k=j-1; max2 == -1 && k>=i; k--)
4727 {
4728 if (dsp->starts[dsp->dim*(k)+rownum2-1] >= 0)
4729 max2 = dsp->starts[dsp->dim*(k)+rownum2-1] + dsp->lens[k] - 1;
4730 }
4731 }
4732 }
4733 if (someseq1 == FALSE)
4734 {
4735 if ((min1 < min2 || min2 == -1) && (max1 > max2 || max1 == -1))
4736 fits = TRUE;
4737 } else
4738 {
4739 if ((min2 < min1 || min1 == -1) && (max2 > max1 || max2 == -1))
4740 fits = TRUE;
4741 }
4742 window = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
4743 window->n1 = i;
4744 window->n2 = j-1;
4745 if (!fits)
4746 window->n4 = -1;
4747 if (window_head != NULL)
4748 {
4749 window_prev->next = window;
4750 window_prev = window;
4751 } else
4752 window_head = window_prev = window;
4753 }
4754 if (i == j)
4755 i++;
4756 else
4757 i = j;
4758 }
4759 if (window_head == NULL)
4760 return FALSE;
4761 fake_sap = SeqAlignNew();
4762 fake_sap->segtype = SAS_DENSEG;
4763 fake_sap->segs = (Pointer)dsp;
4764 AlnMgr2IndexSingleChildSeqAlign(fake_sap);
4765 aln = AlnMgr2GetNumAlnBlocks(fake_sap);
4766 if (aln == 1) /* only merge if there is a single fitted window flanked by gaps */
4767 /*or if there are several contiguous fitted windows flanked by gaps */
4768 {
4769 if (window_head->next != NULL && window_head->n4 == 0)
4770 {
4771 window = window_head->next;
4772 while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4773 {
4774 window_head->n2++;
4775 }
4776 while (window != NULL && window->n4 == 0 && window->n1 == window_head->n2+1)
4777 {
4778 window_head->n2 = window->n2;
4779 window = window->next;
4780 while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4781 {
4782 window_head->n2++;
4783 }
4784 }
4785 if (window != NULL)
4786 {
4787 while (window_head != NULL)
4788 {
4789 window = window_head->next;
4790 MemFree(window_head);
4791 window_head = window;
4792 }
4793 fake_sap->segs = NULL;
4794 SeqAlignFree(fake_sap);
4795 return FALSE;
4796 }
4797 }
4798 if (window_head->n4 == -1)
4799 {
4800 while (window_head != NULL)
4801 {
4802 window = window_head->next;
4803 MemFree(window_head);
4804 window_head = window;
4805 }
4806 fake_sap->segs = NULL;
4807 SeqAlignFree(fake_sap);
4808 return FALSE;
4809 }
4810 found = FALSE;
4811 for (i=0; !found && i<window_head->n1; i++)
4812 {
4813 if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4814 found = TRUE;
4815 }
4816 for (i=window_head->n2+1; !found && i<dsp->numseg; i++)
4817 {
4818 if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4819 found = TRUE;
4820 }
4821 if (found)
4822 {
4823 while (window_head != NULL)
4824 {
4825 window = window_head->next;
4826 MemFree(window_head);
4827 window_head = window;
4828 }
4829 fake_sap->segs = NULL;
4830 SeqAlignFree(fake_sap);
4831 return FALSE;
4832 }
4833 /* merge whole row up to rownum1 */
4834 for (i=0; i<dsp->numseg; i++)
4835 {
4836 dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
4837 }
4838 starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
4839 strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
4840 k = 0;
4841 for (i=0; i<dsp->dim; i++)
4842 {
4843 if (i != rownum2-1)
4844 {
4845 for (j=0; j<dsp->numseg; j++)
4846 {
4847 starts[(dsp->dim-1)*j+k] = dsp->starts[dsp->dim*j+i];
4848 strands[(dsp->dim-1)*j+k] = dsp->strands[dsp->dim*j+i];
4849 }
4850 k++;
4851 }
4852 }
4853 MemFree(dsp->starts);
4854 MemFree(dsp->strands);
4855 dsp->starts = starts;
4856 dsp->strands = strands;
4857 dsp->dim--;
4858 id_head = id_prev = NULL;
4859 id = dsp->ids;
4860 j = 0;
4861 while (id != NULL)
4862 {
4863 if (j+1 != rownum2)
4864 {
4865 if (id_head != NULL)
4866 {
4867 id_prev->next = SeqIdDup(id);
4868 id_prev = id_prev->next;
4869 } else
4870 id_head = id_prev = SeqIdDup(id);
4871 }
4872 j++;
4873 id = id->next;
4874 }
4875 SeqIdSetFree(dsp->ids);
4876 dsp->ids = id_head;
4877 while (window_head != NULL)
4878 {
4879 window = window_head->next;
4880 MemFree(window_head);
4881 window_head = window;
4882 }
4883 fake_sap->segs = NULL;
4884 SeqAlignFree(fake_sap);
4885 return TRUE;
4886 }
4887 /* now go through and find the largest piece of every window that can be merged */
4888 /* (can't split up an aligned region with the merge, though) */
4889 window = window_head;
4890 saip = (SAIndex2Ptr)(fake_sap->saip);
4891 while (window != NULL)
4892 {
4893 j = k = -1;
4894 found = FALSE;
4895 for (i=0; !found && i<window->n1; i++)
4896 {
4897 if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4898 found = TRUE;
4899 }
4900 if (!found)
4901 j = window->n1;
4902 found = FALSE;
4903 for (i=window->n2+1; !found && i<dsp->numseg; i++)
4904 {
4905 if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4906 found = TRUE;
4907 }
4908 if (!found)
4909 k = window->n2;
4910 if (j == -1)
4911 {
4912 found = FALSE;
4913 for (i = window->n1-1; !found && i<window->n2; i++)
4914 {
4915 j = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4916 if (j == i)
4917 found = TRUE;
4918 else
4919 j = -1;
4920 }
4921 }
4922 if (k == -1)
4923 {
4924 found = FALSE;
4925 for (i = window->n2; !found && i>=window->n1; i++)
4926 {
4927 k = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4928 if (k == i)
4929 found = TRUE;
4930 else
4931 k = -1;
4932 }
4933 }
4934 if (j > -1 && k > -1 && k > j)
4935 {
4936 window->n1 = j+1;
4937 window->n2 = k;
4938 } else
4939 window->n1 = -1;
4940 window = window->next;
4941 }
4942 window = window_head;
4943 while (window != NULL)
4944 {
4945 if (window->n4 == -1 && i >= 0) /* see if it fits now */
4946 {
4947 i = window->n1;
4948 j = window->n2+1;
4949 if (strand1 == Seq_strand_minus)
4950 {
4951 if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4952 {
4953 min1 = -1;
4954 for (k=j; min1 == -1 && k<dsp->numseg; k++)
4955 {
4956 min1 = dsp->starts[dsp->dim*k+rownum1-1];
4957 }
4958 max1 = -1;
4959 for (k=(i-1); max1 == -1 && k>=0; k--)
4960 {
4961 max1 = dsp->starts[dsp->dim*k+rownum1-1];
4962 }
4963 } else
4964 {
4965 min1 = dsp->starts[dsp->dim*(j-1)+rownum1-1];
4966 max1 = dsp->starts[dsp->dim*i+rownum1-1] + dsp->lens[i];
4967 }
4968 } else
4969 {
4970 if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4971 {
4972 min1 = -1;
4973 for (k=i-1; min1 == -1 && k >= 0; k--)
4974 {
4975 min1 = dsp->starts[dsp->dim*k+rownum1-1];
4976 }
4977 max1 = -1;
4978 for (k=j; max1 == -1 && k<dsp->numseg; k++)
4979 {
4980 max1 = dsp->starts[dsp->dim*k+rownum1-1];
4981 }
4982 } else
4983 {
4984 min1 = dsp->starts[dsp->dim*i+rownum1-1];
4985 max1 = dsp->starts[dsp->dim*(j-1)+rownum1-1] + dsp->lens[j-1];
4986 }
4987 }
4988 if (strand2 == Seq_strand_minus)
4989 {
4990 if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
4991 {
4992 min2 = -1;
4993 for (k=j; min2 == -1 && k<dsp->numseg; k++)
4994 {
4995 min2 = dsp->starts[dsp->dim*k+rownum2-1];
4996 }
4997 max2 = -1;
4998 for (k=(i-1); max2 == -1 && k>=0; k--)
4999 {
5000 max2 = dsp->starts[dsp->dim*k+rownum2-1];
5001 }
5002 } else
5003 {
5004 min2 = dsp->starts[dsp->dim*(j-1)+rownum2-1];
5005 max2 = dsp->starts[dsp->dim*i+rownum2-1] + dsp->lens[i];
5006 }
5007 } else
5008 {
5009 if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
5010 {
5011 min2 = -1;
5012 for (k=i-1; min2 == -1 && k >= 0; k--)
5013 {
5014 min2 = dsp->starts[dsp->dim*k+rownum2-1];
5015 }
5016 max2 = -1;
5017 for (k=j; max2 == -1 && k<dsp->numseg; k++)
5018 {
5019 max2 = dsp->starts[dsp->dim*k+rownum2-1];
5020 }
5021 } else
5022 {
5023 min2 = dsp->starts[dsp->dim*i+rownum2-1];
5024 max2 = dsp->starts[dsp->dim*(j-1)+rownum2-1] + dsp->lens[j-1];
5025 }
5026 }
5027 if (dsp->starts[dsp->dim*j+rownum1-1] == -1)
5028 {
5029 if (min1 < min2 && (max1 > max2 || max1 == -1))
5030 window->n4 = 0;
5031 } else
5032 {
5033 if (min2 < min1 && (max2 > max1 || max2 == -1))
5034 window->n4 = 0;
5035 }
5036 }
5037 if (window->n1 >= 0 && window->n4 >= 0)
5038 {
5039 for (i=window->n1; i<=window->n2; i++)
5040 {
5041 dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim+i+rownum2-1]);
5042 }
5043 }
5044 window = window->next;
5045 }
5046 found = FALSE;
5047 /* check to see if rownum2 is all gaps now */
5048 for (i=0; !found && i<dsp->numseg; i++)
5049 {
5050 if (dsp->starts[dsp->dim*i+rownum2-1] != -1)
5051 found = TRUE;
5052 }
5053 merged = FALSE;
5054 if (!found) /* just gaps */
5055 {
5056 /* merge whole row up to rownum1 */
5057 for (i=0; i<dsp->numseg; i++)
5058 {
5059 dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
5060 }
5061 starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
5062 strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
5063 k = 0;
5064 for (i=0; i<dsp->dim; i++)
5065 {
5066 if (i != rownum2-1)
5067 {
5068 for (j=0; j<dsp->numseg; j++)
5069 {
5070 starts[dsp->dim*j+k] = dsp->starts[dsp->dim*j+i];
5071 strands[dsp->dim*j+k] = dsp->strands[dsp->dim*j+i];
5072 }
5073 k++;
5074 }
5075 }
5076 MemFree(dsp->starts);
5077 MemFree(dsp->strands);
5078 dsp->starts = starts;
5079 dsp->strands = strands;
5080 dsp->dim--;
5081 id_head = id_prev = NULL;
5082 id = dsp->ids;
5083 j = 0;
5084 while (id != NULL)
5085 {
5086 if (j+1 != rownum2)
5087 {
5088 if (id_head != NULL)
5089 {
5090 id_prev->next = SeqIdDup(id);
5091 id_prev = id_prev->next;
5092 } else
5093 id_head = id_prev = SeqIdDup(id);
5094 }
5095 j++;
5096 id = id->next;
5097 }
5098 SeqIdSetFree(dsp->ids);
5099 dsp->ids = id_head;
5100 merged = TRUE;
5101 }
5102 while (window_head != NULL)
5103 {
5104 window = window_head->next;
5105 MemFree(window_head);
5106 window_head = window;
5107 }
5108 fake_sap->segs = NULL;
5109 SeqAlignFree(fake_sap);
5110 return merged;
5111 }
5112
5113 /* SECTION 2c */
5114 /***************************************************************************
5115 *
5116 * AlnMgr2CompareCdRows is the HeapSort callback for AlnMgr2CondenseRows.
5117 * It puts the CDRows in order first by seqid and secondarily by row number.
5118 *
5119 ***************************************************************************/
AlnMgr2CompareCdRows(VoidPtr ptr1,VoidPtr ptr2)5120 static int LIBCALLBACK AlnMgr2CompareCdRows(VoidPtr ptr1, VoidPtr ptr2)
5121 {
5122 Int4 i;
5123 AMCdRowPtr row1;
5124 AMCdRowPtr row2;
5125
5126 if (ptr1 == NULL || ptr2 == NULL)
5127 return 0;
5128 row1 = *((AMCdRowPtr PNTR)ptr1);
5129 row2 = *((AMCdRowPtr PNTR)ptr2);
5130 i = AlnMgr2OrderSeqIds(row1->sip, row2->sip);
5131 if (i == 0) /* sort from least rownum to greatest within each seqid */
5132 {
5133 if (row1->rownum < row2->rownum)
5134 return -1;
5135 else
5136 return 1;
5137 } else
5138 return i;
5139 }
5140
5141 /* SECTION 2c */
5142 /***************************************************************************
5143 *
5144 * AlnMgr2CompareAsps is a HeapSort callback for AlnMgr2AddInNewSA. It
5145 * compares the starts (n1) of the two AM_Small2Ptrs; if those are the same
5146 * it compares the types.
5147 *
5148 ***************************************************************************/
AlnMgr2CompareAsps(VoidPtr ptr1,VoidPtr ptr2)5149 static int LIBCALLBACK AlnMgr2CompareAsps(VoidPtr ptr1, VoidPtr ptr2)
5150 {
5151 AM_Small2Ptr asp1;
5152 AM_Small2Ptr asp2;
5153
5154 if (ptr1 != NULL && ptr2 != NULL)
5155 {
5156 asp1 = *((AM_Small2Ptr PNTR)ptr1);
5157 asp2 = *((AM_Small2Ptr PNTR)ptr2);
5158 if (asp1->n1 < asp2->n1)
5159 return -1;
5160 else if (asp1->n1 > asp2->n1)
5161 return 1;
5162 else if (asp1->n5 < asp2->n5)
5163 return -1;
5164 else if (asp1->n5 > asp2->n5)
5165 return 1;
5166 else
5167 {
5168 if (asp1->n3 == AM_GAP && asp2->n3 == AM_GAP)
5169 {
5170 if (asp1->n2 < asp2->n2)
5171 return -1;
5172 if (asp1->n2 > asp2->n2)
5173 return 1;
5174 }
5175 if (asp1->n3 == AM_START)
5176 {
5177 if (asp2->n3 == AM_STOP)
5178 return -1;
5179 else if (asp2->n3 == AM_GAP)
5180 return -1;
5181 else if (asp2->n3 == AM_HARDSTOP)
5182 return -1;
5183 else
5184 return 0;
5185 } else if (asp1->n3 == AM_STOP)
5186 {
5187 if (asp2->n3 == AM_START)
5188 return 1;
5189 else if (asp2->n3 == AM_GAP)
5190 return 1;
5191 else if (asp2->n3 == AM_HARDSTOP)
5192 return -1;
5193 else
5194 return 0;
5195 } else if (asp1->n3 == AM_GAP)
5196 {
5197 if (asp2->n3 == AM_START)
5198 return 1;
5199 else if (asp2->n3 == AM_STOP)
5200 return -1;
5201 else if (asp2->n3 == AM_HARDSTOP)
5202 return -1;
5203 else
5204 return 0;
5205 } else if (asp1->n3 == AM_HARDSTOP)
5206 {
5207 if (asp2->n3 == AM_START)
5208 return 1;
5209 else if (asp2->n3 == AM_STOP)
5210 return 1;
5211 else if (asp2->n3 == AM_GAP)
5212 return 1;
5213 else
5214 return 0;
5215 }
5216 }
5217 }
5218 return 0;
5219 }
5220
5221 /* SECTION 2c */
5222 /***************************************************************************
5223 *
5224 * AlnMgr2CompareAspsMinus is a HeapSort callback for AlnMgr2AddInNewSA. It
5225 * compares the starts (n1) of the two AM_Small2Ptrs; if those are the same
5226 * it compares the types. The only difference from AlnMgr2CompareAsps is
5227 * that it sorts the structures in the opposite order.
5228 *
5229 ***************************************************************************/
AlnMgr2CompareAspsMinus(VoidPtr ptr1,VoidPtr ptr2)5230 static int LIBCALLBACK AlnMgr2CompareAspsMinus(VoidPtr ptr1, VoidPtr ptr2)
5231 {
5232 AM_Small2Ptr asp1;
5233 AM_Small2Ptr asp2;
5234
5235 if (ptr1 != NULL && ptr2 != NULL)
5236 {
5237 asp1 = *((AM_Small2Ptr PNTR)ptr1);
5238 asp2 = *((AM_Small2Ptr PNTR)ptr2);
5239 if (asp1->n1 > asp2->n1)
5240 return -1;
5241 else if (asp1->n1 < asp2->n1)
5242 return 1;
5243 else if (asp1->n5 < asp2->n5)
5244 return -1;
5245 else if (asp1->n5 > asp2->n5)
5246 return 1;
5247 else
5248 {
5249 if (asp1->n3 == AM_GAP && asp2->n3 == AM_GAP)
5250 {
5251 if (asp1->n2 < asp2->n2)
5252 return -1;
5253 if (asp1->n2 > asp2->n2)
5254 return 1;
5255 }
5256 if (asp1->n3 == AM_START)
5257 {
5258 if (asp2->n3 == AM_STOP)
5259 return 1;
5260 else if (asp2->n3 == AM_GAP)
5261 return -1;
5262 else if (asp2->n3 == AM_HARDSTOP)
5263 return 1;
5264 else
5265 return 0;
5266 } else if (asp1->n3 == AM_STOP)
5267 {
5268 if (asp2->n3 == AM_START)
5269 return -1;
5270 else if (asp2->n3 == AM_GAP)
5271 return -1;
5272 else if (asp2->n3 == AM_HARDSTOP)
5273 return 1;
5274 else
5275 return 0;
5276 } else if (asp1->n3 == AM_GAP)
5277 {
5278 if (asp2->n3 == AM_START)
5279 return 1;
5280 else if (asp2->n3 == AM_STOP)
5281 return 1;
5282 else if (asp2->n3 == AM_HARDSTOP)
5283 return 1;
5284 else
5285 return 0;
5286 } else if (asp1->n3 == AM_HARDSTOP)
5287 {
5288 if (asp2->n3 == AM_START)
5289 return -1;
5290 else if (asp2->n3 == AM_STOP)
5291 return -1;
5292 else if (asp2->n3 == AM_GAP)
5293 return -1;
5294 else
5295 return 0;
5296 }
5297 }
5298 }
5299 return 0;
5300 }
5301
5302
5303 /* SECTION 2c */
5304 /***************************************************************************
5305 *
5306 * AlnMgr2GetFirstSharedRow takes two indexed or unindexed dense-seg
5307 * seqaligns and returns the row numbers of the first sequence that is
5308 * shared between the two alignments. If the alignments do not share any
5309 * sequences, both n1 and n2 are set to 0.
5310 *
5311 ***************************************************************************/
AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1,SeqAlignPtr sap2,Int4Ptr n1,Int4Ptr n2)5312 static void AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1, SeqAlignPtr sap2, Int4Ptr n1, Int4Ptr n2)
5313 {
5314 DenseSegPtr dsp1;
5315 DenseSegPtr dsp2;
5316 Int4 i;
5317 Int4 j;
5318 SeqIdPtr sip1;
5319 SeqIdPtr sip2;
5320
5321 dsp1 = (DenseSegPtr)(sap1->segs);
5322 dsp2 = (DenseSegPtr)(sap2->segs);
5323 sip1 = dsp1->ids;
5324 i = 1;
5325 while (sip1 != NULL)
5326 {
5327 j = 1;
5328 sip2 = dsp2->ids;
5329 while (sip2 != NULL)
5330 {
5331 if (SeqIdComp(sip1, sip2) == SIC_YES)
5332 {
5333 *n1 = i;
5334 *n2 = j;
5335 return;
5336 }
5337 sip2 = sip2->next;
5338 j++;
5339 }
5340 sip1 = sip1->next;
5341 i++;
5342 }
5343 /* nothing found */
5344 *n1 = 0;
5345 *n2 = 0;
5346 }
5347
5348 /* SECTION 2d */
AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1,SeqIdPtr sip2)5349 static SeqIdPtr AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1, SeqIdPtr sip2)
5350 {
5351 SeqIdPtr sip;
5352 SeqIdPtr sip_tmp;
5353
5354 if (sip1 == NULL || sip2 == NULL)
5355 return NULL;
5356 sip = sip1;
5357 while (sip != NULL)
5358 {
5359 sip_tmp = sip2;
5360 while (sip_tmp != NULL)
5361 {
5362 if (SeqIdComp(sip, sip_tmp) == SIC_YES)
5363 return sip;
5364 sip_tmp = sip_tmp->next;
5365 }
5366 sip = sip->next;
5367 }
5368 return NULL;
5369 }
5370
5371 /***************************************************************************
5372 *
5373 * AlnMgr2OrderSeqIds simply alphabetizes printed seqids in order to sort
5374 * them in order to group identical ones in a set.
5375 *
5376 ***************************************************************************/
AlnMgr2OrderSeqIds(SeqIdPtr sip1,SeqIdPtr sip2)5377 static Int4 AlnMgr2OrderSeqIds(SeqIdPtr sip1, SeqIdPtr sip2)
5378 {
5379 Char txt1[42];
5380 Char txt2[42];
5381
5382 if (sip1 == NULL && sip2 == NULL)
5383 return 0;
5384 if (sip1 == NULL && sip2 != NULL)
5385 return 1;
5386 if (sip1 != NULL && sip2 == NULL)
5387 return -1;
5388 SeqIdWrite(sip1, txt1, PRINTID_TEXTID_ACC_VER, 41);
5389 SeqIdWrite(sip2, txt2, PRINTID_TEXTID_ACC_VER, 41);
5390 txt1[41] = txt2[41] = '\0';
5391 return StringICmp(txt1, txt2);
5392 }
5393
5394 /* SECTION 2d */
5395 /***************************************************************************
5396 *
5397 * AlnMgr2SetUnaln takes an indexed alignment and sets the numunaln and
5398 * unaln array fields. The unaligned regions are numbered the same
5399 * regardless of whether the alignment is anchored, although they will
5400 * most likely be accessed and displayed differently.
5401 *
5402 ***************************************************************************/
AlnMgr2SetUnaln(SeqAlignPtr sap)5403 static void AlnMgr2SetUnaln(SeqAlignPtr sap)
5404 {
5405 AMAlignIndex2Ptr amaip;
5406 AM_Small2Ptr ams;
5407 AM_Small2Ptr ams_head;
5408 AM_Small2Ptr ams_prev;
5409 AM_Small2Ptr PNTR amsarray;
5410 DenseSegPtr dsp;
5411 Int4 i;
5412 Int4 j;
5413 SAIndex2Ptr saip;
5414
5415 if (sap == NULL || sap->saip == NULL)
5416 return;
5417 if (sap->saip->indextype == INDEX_CHILD)
5418 {
5419 saip = (SAIndex2Ptr)(sap->saip);
5420 dsp = (DenseSegPtr)(sap->segs);
5421 } else if (sap->saip->indextype == INDEX_PARENT)
5422 {
5423 amaip = (AMAlignIndex2Ptr)(sap->saip);
5424 if (amaip->alnstyle == AM2_LITE)
5425 return;
5426 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
5427 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
5428 } else
5429 return;
5430 MemFree(saip->unaln);
5431 saip->unaln = NULL;
5432 ams_head = ams_prev = NULL;
5433 for (i=0; i<saip->numrows; i++)
5434 {
5435 for (j=0; j<saip->srdp[i]->numunaln; j++)
5436 {
5437 ams = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
5438 ams->n1 = saip->srdp[i]->unaligned[j];
5439 if (ams_head != NULL)
5440 {
5441 ams_prev->next = ams;
5442 ams_prev = ams;
5443 } else
5444 ams_head = ams_prev = ams;
5445 }
5446 }
5447 if (ams_head == NULL)
5448 {
5449 saip->numunaln = -1;
5450 return;
5451 }
5452 j = 0;
5453 ams = ams_head;
5454 while (ams != NULL)
5455 {
5456 j++;
5457 ams = ams->next;
5458 }
5459 amsarray = (AM_Small2Ptr PNTR)MemNew(j*sizeof(AM_Small2Ptr));
5460 j = 0;
5461 ams = ams_head;
5462 while (ams != NULL)
5463 {
5464 amsarray[j] = ams;
5465 j++;
5466 ams = ams->next;
5467 }
5468 HeapSort(amsarray, j, sizeof(AM_Small2Ptr), AlnMgr2CompareUnalnAMS);
5469 saip->numunaln = 1;
5470 for (i=1; i<j; i++)
5471 {
5472 if (amsarray[i]->n1 != amsarray[i-1]->n1)
5473 saip->numunaln++;
5474 }
5475 saip->unaln = (Uint4Ptr)MemNew(saip->numunaln*sizeof(Uint4));
5476 saip->unaln[0] = amsarray[0]->n1;
5477 saip->numunaln = 1;
5478 for (i=1; i<j; i++)
5479 {
5480 if (amsarray[i]->n1 != amsarray[i-1]->n1)
5481 {
5482 saip->unaln[saip->numunaln] = amsarray[i]->n1;
5483 saip->numunaln++;
5484 }
5485 }
5486 for (i=0; i<j; i++)
5487 {
5488 MemFree(amsarray[i]);
5489 }
5490 MemFree(amsarray);
5491 }
5492
5493 /* SECTION 2d */
5494 /***************************************************************************
5495 *
5496 * AlnMgr2CompareUnalnAMS is the HeapSort callback for AlnMgr2SetUnaln;
5497 * it simply compares two AM_Small2 structures and orders them by their
5498 * n1 fields.
5499 *
5500 ***************************************************************************/
AlnMgr2CompareUnalnAMS(VoidPtr ptr1,VoidPtr ptr2)5501 static int LIBCALLBACK AlnMgr2CompareUnalnAMS(VoidPtr ptr1, VoidPtr ptr2)
5502 {
5503 AM_Small2Ptr ams1;
5504 AM_Small2Ptr ams2;
5505
5506 if (ptr1 == NULL || ptr2 == NULL)
5507 return 0;
5508 ams1 = *((AM_Small2Ptr PNTR)ptr1);
5509 ams2 = *((AM_Small2Ptr PNTR)ptr2);
5510 if (ams1->n1 < ams2->n1)
5511 return -1;
5512 else if (ams1->n1 > ams2->n1)
5513 return 1;
5514 else
5515 return 0;
5516 }
5517
5518 /***************************************************************************
5519 *
5520 * SECTION 3: Functions for debugging
5521 *
5522 ***************************************************************************/
5523
5524 /* SECTION 3 */
am_print_sa_index(SeqAlignPtr sap,FILE * ofp)5525 NLM_EXTERN void am_print_sa_index(SeqAlignPtr sap, FILE *ofp)
5526 {
5527 Int4 i;
5528 Int4 j;
5529 SAIndex2Ptr saip;
5530
5531 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
5532 return;
5533 saip = (SAIndex2Ptr)(sap->saip);
5534 fprintf(ofp, "Rows: %d\n", saip->numrows);
5535 fprintf(ofp, "Segments: %d\n", saip->numseg);
5536 fprintf(ofp, "Anchor: %d\n", saip->anchor);
5537 fprintf(ofp, "Alignment coordinates: ");
5538 for (i=0; i<saip->numseg; i++)
5539 {
5540 fprintf(ofp, "%d ", saip->aligncoords[i]);
5541 }
5542 fprintf(ofp, "\n\n");
5543 for (i=0; i<saip->numrows; i++)
5544 {
5545 fprintf(ofp, "row %d\n", i+1);
5546 fprintf(ofp, "numsect: %d\n", saip->srdp[i]->numsect);
5547 for (j=0; j<saip->srdp[i]->numsect; j++)
5548 {
5549 fprintf(ofp, "%d ", saip->srdp[i]->sect[j]);
5550 }
5551 fprintf(ofp, "\n");
5552 fprintf(ofp, "numunsect: %d\n", saip->srdp[i]->numunsect);
5553 for (j=0; j<saip->srdp[i]->numunsect; j++)
5554 {
5555 fprintf(ofp, "%d ", saip->srdp[i]->unsect[j]);
5556 }
5557 fprintf(ofp, "\n");
5558 fprintf(ofp, "numinsect: %d\n", saip->srdp[i]->numinsect);
5559 for (j=0; j<saip->srdp[i]->numinsect; j++)
5560 {
5561 fprintf(ofp, "%d ", saip->srdp[i]->insect[j]);
5562 }
5563 fprintf(ofp, "\n");
5564 }
5565 }
5566
5567 /* SECTION 3 */
5568 /***************************************************************************
5569 *
5570 * AlnMgr2PrintSeqAlign prints an interleaved output of the entire
5571 * indexed alignment, with 'linesize' characters on each line (max 200).
5572 *
5573 ***************************************************************************/
AlnMgr2PrintSeqAlign(SeqAlignPtr sap,Int4 linesize,Boolean isnuc,FILE * ofp)5574 NLM_EXTERN void AlnMgr2PrintSeqAlign(SeqAlignPtr sap, Int4 linesize, Boolean isnuc, FILE *ofp)
5575 {
5576 AlnMsg2Ptr amp;
5577 BioseqPtr bsp;
5578 Char buf[201];
5579 Int4 ctr;
5580 Boolean done;
5581 Int4 i;
5582 Int4 j;
5583 Int4 len;
5584 Boolean more;
5585 Int4 numrows;
5586 Int4 row;
5587 Uint1 seqcode;
5588 SeqIdPtr sip;
5589 SeqPortPtr spp;
5590 Char text[42];
5591
5592 if (sap == NULL || sap->saip == NULL || linesize > 200)
5593 return;
5594 if (isnuc)
5595 seqcode = Seq_code_iupacna;
5596 else
5597 seqcode = Seq_code_iupacaa;
5598 amp = AlnMsgNew2();
5599 numrows = AlnMgr2GetNumRows(sap);
5600 len = AlnMgr2GetAlnLength(sap, FALSE);
5601 for (i=0; i<len; i+=linesize)
5602 {
5603 fprintf(ofp, "%d - %d\n", i, MIN(i+linesize-1, len-1));
5604 for (row=0; row<numrows; row++)
5605 {
5606 sip = AlnMgr2GetNthSeqIdPtr(sap, row+1);
5607 SeqIdWrite(sip, text, PRINTID_FASTA_SHORT, 41);
5608 done = FALSE;
5609 for (j=0; j<12; j++)
5610 {
5611 if (text[j] == '\0')
5612 done = TRUE;
5613 if (done == TRUE)
5614 fprintf(ofp, " ");
5615 else
5616 fprintf(ofp, "%c", text[j]);
5617 }
5618 bsp = BioseqLockById(sip);
5619 AlnMsgReNew2(amp);
5620 amp->row_num = row+1;
5621 amp->from_aln = i;
5622 amp->to_aln = MIN(i+linesize-1, len-1);
5623 while (more = AlnMgr2GetNextAlnBit(sap, amp))
5624 {
5625 if (amp->type == AM_GAP)
5626 {
5627 for (j=amp->from_row; j<=amp->to_row; j++)
5628 {
5629 fprintf(ofp, "-");
5630 }
5631 } else
5632 {
5633 spp = SeqPortNew(bsp, amp->from_row, amp->to_row, amp->strand, seqcode);
5634 ctr = SeqPortRead(spp, (Uint1Ptr)buf, amp->to_row-amp->from_row+1);
5635 buf[ctr] = '\0';
5636 fwrite(buf, 1, ctr, ofp);
5637 SeqPortFree(spp);
5638 }
5639 }
5640 BioseqUnlock(bsp);
5641 fprintf(ofp, "\n");
5642 }
5643 fprintf(ofp, "\n\n");
5644 }
5645 AlnMsgFree2(amp);
5646 }
5647
5648 /* SECTION 3 */
AlnMgr2DumpIndexedAlnToFile(SeqAlignPtr sap,CharPtr filename)5649 NLM_EXTERN void AlnMgr2DumpIndexedAlnToFile(SeqAlignPtr sap, CharPtr filename)
5650 {
5651 AsnIoPtr aip;
5652 AMAlignIndex2Ptr amaip;
5653 SeqAlignPtr sap_tmp;
5654
5655 if (sap == NULL || sap->saip == NULL)
5656 return;
5657 if (sap->saip->indextype == INDEX_CHILD)
5658 {
5659 if (sap->dim == 0)
5660 sap->dim = AlnMgr2GetNumRows(sap);
5661 aip = AsnIoOpen(filename, "w");
5662 SeqAlignAsnWrite(sap, aip, NULL);
5663 AsnIoClose(aip);
5664 return;
5665 }
5666 amaip = (AMAlignIndex2Ptr)(sap->saip);
5667 aip = AsnIoOpen(filename, "w");
5668 if (amaip->alnstyle != AM2_LITE)
5669 {
5670 amaip->sharedaln->dim = 0; /* mark it as the sharedaln */
5671 SeqAlignAsnWrite(amaip->sharedaln, aip, NULL);
5672 }
5673 sap_tmp = sap;
5674 if (sap->dim == 0)
5675 sap->dim = AlnMgr2GetNumRows(sap);
5676 while (sap_tmp != NULL)
5677 {
5678 SeqAlignAsnWrite(sap_tmp, aip, NULL);
5679 sap_tmp = sap_tmp->next;
5680 }
5681 AsnIoClose(aip);
5682 }
5683
5684 /***************************************************************************
5685 *
5686 * SECTION 4: API-level functions (and their helper functions) used to
5687 * access an indexed alignment.
5688 * SECTION 4a: AlnMgr2GetNextAlnBit and associated functions
5689 * SECTION 4b: "GetNth" functions
5690 * SECTION 4c: other functions for accessing the alignment
5691 *
5692 ***************************************************************************/
5693
5694 /* SECTION 4a */
5695 /***************************************************************************
5696 *
5697 * AlnMgr2GetNextAlnBit takes an indexed seqalign and returns it, piece
5698 * by piece, in the row and across the range specified in the AlnMsg
5699 * structure. amp->from_aln and amp->to_aln must be filled in; these are
5700 * in alignment coordinates. AlnMgr2GetNextAlnBit will return the AlnMsg
5701 * structure with amp->from_row and amp->to_row filled in. If amp->type is
5702 * AM_SEQ, these numbers are sequence coordinates; if amp->type is AM_GAP
5703 * the numbers are alignment coordinates and there is a gap in that row.
5704 * AlnMgr2GetNextAlnBit returns one continuous piece of sequence or gap
5705 * at each call, and keeps returning TRUE until it has returned all the
5706 * information for the piece of the alignment requested.
5707 *
5708 ***************************************************************************/
AlnMgr2GetNextAlnBit(SeqAlignPtr sap,AlnMsg2Ptr amp)5709 NLM_EXTERN Boolean AlnMgr2GetNextAlnBit(SeqAlignPtr sap, AlnMsg2Ptr amp) /* NEXT */
5710 {
5711 AMAlignIndex2Ptr amaip;
5712 Uint2Ptr array;
5713 Int4 arraylen;
5714 Int4 ctr;
5715 Int4 disc;
5716 Int4 disc1;
5717 DenseSegPtr dsp;
5718 Int4 endoffset;
5719 Boolean found;
5720 Int4 i;
5721 Int4 index;
5722 Int4 intfrom;
5723 Int4 intto;
5724 Int4 j;
5725 Int4 len;
5726 Int4 offset;
5727 SAIndex2Ptr saip;
5728 SARowDat2Ptr srdp;
5729 Int4 start_sect;
5730 Int4 stop_sect;
5731 Uint2Ptr trans;
5732 Int4 translen;
5733
5734 if (sap == NULL || sap->saip == NULL || amp == NULL)
5735 return FALSE;
5736 if (amp->left_interrupt != NULL)
5737 {
5738 MemFree(amp->left_interrupt);
5739 amp->left_interrupt = NULL;
5740 }
5741 if (amp->right_interrupt != NULL)
5742 {
5743 MemFree(amp->right_interrupt);
5744 amp->right_interrupt = NULL;
5745 }
5746 if (sap->saip->indextype == INDEX_CHILD)
5747 {
5748 dsp = (DenseSegPtr)(sap->segs);
5749 saip = (SAIndex2Ptr)(sap->saip);
5750 } else if (sap->saip->indextype == INDEX_PARENT)
5751 {
5752 amaip = (AMAlignIndex2Ptr)(sap->saip);
5753 if (amaip->alnstyle == AM2_LITE)
5754 return FALSE;
5755 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
5756 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
5757 }
5758 /* reality checks */
5759 if (amp->row_num > saip->numrows)
5760 return FALSE;
5761 if (amp->len <= 0)
5762 amp->len = AlnMgr2GetAlnLength(sap, FALSE);
5763 if (amp->from_aln < 0 || amp->from_aln > amp->len-1 || amp->real_from > amp->to_aln)
5764 return FALSE;
5765 if (amp->to_aln == -1)
5766 amp->to_aln = amp->len - 1;
5767 if (amp->to_aln < amp->from_aln || amp->to_aln > amp->len-1)
5768 return FALSE;
5769 if (amp->real_from == -2)
5770 amp->real_from = amp->from_aln;
5771 amp->strand = AlnMgr2GetNthStrand(sap, amp->row_num);
5772 srdp = saip->srdp[amp->row_num-1];
5773 len = 0;
5774 start_sect = binary_search_on_uint4_list(saip->aligncoords, amp->real_from, saip->numseg);
5775 offset = amp->real_from - saip->aligncoords[start_sect];
5776 endoffset = 0;
5777 stop_sect = binary_search_on_uint4_list(saip->aligncoords, amp->to_aln, saip->numseg);
5778 /* now figure out whether it starts in sequence or a gap, and figure out how */
5779 /* long it continues in the same mode without interruption by inserts or unaligned */
5780 /* regions; the whole contiguous stretch will be reported */
5781 if (saip->anchor > 0)
5782 {
5783 trans = saip->srdp[saip->anchor-1]->sect;
5784 translen = saip->srdp[saip->anchor-1]->numsect;
5785 } else
5786 {
5787 trans = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
5788 for (i=0; i<dsp->numseg; i++)
5789 {
5790 trans[i] = i;
5791 }
5792 translen = dsp->numseg;
5793 }
5794 arraylen = -1;
5795 if ((index = binary_search_on_uint2_list(srdp->sect, trans[start_sect], srdp->numsect)) != -1)
5796 {
5797 amp->type = AM_SEQ;
5798 array = srdp->sect;
5799 arraylen = srdp->numsect;
5800 } else if ((index = binary_search_on_uint2_list(srdp->unsect, trans[start_sect], srdp->numunsect)) != -1)
5801 {
5802 amp->type = AM_GAP;
5803 array = srdp->unsect;
5804 arraylen = srdp->numunsect;
5805 }
5806 if (arraylen == -1) /* error */
5807 return FALSE;
5808 if (amp->row_num == saip->anchor)
5809 {
5810 amp->type = AM_SEQ;
5811 /* find limits of aligned region */
5812 i = start_sect;
5813 j = srdp->sect[start_sect];
5814 disc = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5815 while (j<srdp->sect[stop_sect] && disc == -1)
5816 {
5817 j++;
5818 disc = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5819 }
5820 i = binary_search_on_uint2_list(srdp->sect, j, srdp->numsect);
5821 if (i == -1)
5822 {
5823 i = binary_search_on_uint2_list(srdp->unsect, j, srdp->numunsect);
5824 }
5825 endoffset = dsp->lens[trans[i]] - (amp->to_aln - saip->aligncoords[i]) - 1;
5826 if (endoffset < 0)
5827 endoffset = 0;
5828 if (i<stop_sect && endoffset == 0) /* there's an unaligned region here, and we go to the end of the segment */
5829 {
5830 AlnMgr2GetUnalignedInfo(sap, trans[i], amp->row_num, &intfrom, &intto);
5831 amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5832 amp->right_interrupt->row = amp->row_num;
5833 amp->right_interrupt->unalnlen = intto - intfrom + 1;
5834 amp->right_interrupt->segnum = trans[i];
5835 amp->right_interrupt->which_side = AM2_RIGHT;
5836 }
5837 stop_sect = i;
5838 if (start_sect > 0 && offset == 0)
5839 {
5840 disc = binary_search_on_uint2_list(srdp->unaligned, trans[start_sect]-1, srdp->numunaln);
5841 if (disc != -1) /* there is a left unaligned region */
5842 {
5843 AlnMgr2GetUnalignedInfo(sap, trans[start_sect]-1, amp->row_num, &intfrom, &intto);
5844 amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5845 amp->left_interrupt->row = amp->row_num;
5846 amp->left_interrupt->unalnlen = intto - intfrom + 1;
5847 amp->left_interrupt->segnum = trans[start_sect];
5848 amp->left_interrupt->which_side = AM2_LEFT;
5849 }
5850 }
5851 len = 0;
5852 for (i=start_sect; i<= stop_sect; i++)
5853 {
5854 len += dsp->lens[trans[i]];
5855 }
5856 len = len - offset - endoffset;
5857 if (amp->strand == Seq_strand_minus)
5858 amp->from_row = dsp->starts[trans[stop_sect]*dsp->dim+amp->row_num-1] + endoffset;
5859 else
5860 amp->from_row = dsp->starts[trans[start_sect]*dsp->dim+amp->row_num-1] + offset;
5861 amp->to_row = amp->from_row + len - 1;
5862 amp->real_from += amp->to_row - amp->from_row + 1;
5863 if (saip->anchor <= 0)
5864 MemFree(trans);
5865 return TRUE;
5866 }
5867 /* look for limits of aligned/gapped region */
5868 i = index;
5869 j = start_sect+1;
5870 disc = -1;
5871 found = FALSE;
5872 while (i+1<arraylen && disc == -1 && array[i] <= trans[stop_sect] && array[i+1]-1 == array[i])
5873 {
5874 disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5875 if (disc == -1)
5876 i++;
5877 }
5878 disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5879 j = binary_search_on_uint2_list(trans, array[i], translen);
5880 if (amp->type == AM_SEQ && j <= stop_sect) /* there is an interrupting region, either seq/gap, insert, or unaligned, plus just check last piece */
5881 {
5882 i = binary_search_on_uint2_list(srdp->insect, trans[j]+1, srdp->numinsect);
5883 if (i != -1) /* there's an insert */
5884 {
5885 amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5886 amp->right_interrupt->row = amp->row_num;
5887 amp->right_interrupt->segnum = trans[j];
5888 amp->right_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5889 amp->right_interrupt->which_side = AM2_RIGHT;
5890 /* look for unaligned regions off insert */
5891 disc1 = -1;
5892 if (j > 0)
5893 disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1, srdp->numunaln);
5894 if (disc1 != -1)
5895 {
5896 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5897 amp->right_interrupt->unalnlen = intto - intfrom + 1;
5898 }
5899 i++;
5900 ctr = 1;
5901 while (i<srdp->numinsect && srdp->insect[i] == srdp->insect[i-1]+1)
5902 {
5903 amp->right_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5904 /* look for unaligned regions off insert */
5905 disc1 = -1;
5906 if (j > 0) {
5907 disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1+ctr, srdp->numunaln);
5908 }
5909 if (disc1 != -1)
5910 {
5911 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5912 amp->right_interrupt->unalnlen += intto - intfrom + 1;
5913 }
5914 i++;
5915 ctr++;
5916 }
5917 }
5918 if (disc != -1) /* there's an unaligned region */
5919 {
5920 if (amp->right_interrupt == NULL)
5921 amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5922 amp->right_interrupt->row = amp->row_num;
5923 amp->right_interrupt->segnum = trans[j];
5924 amp->right_interrupt->which_side = AM2_RIGHT;
5925 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc], amp->row_num, &intfrom, &intto);
5926 amp->right_interrupt->unalnlen += intto - intfrom + 1;
5927 }
5928 }
5929 stop_sect = j;
5930 /* now look for left-side unaligned or inserted regions if offset == 0 */
5931 if (amp->type == AM_SEQ && offset == 0)
5932 {
5933 disc = -1;
5934 j = 1;
5935 i = -1;
5936 if ((Int2)trans[start_sect]-j > 0)
5937 i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j, srdp->numsect);
5938 while (i == -1 && (Int2)(trans[start_sect])-j-1 >= 0)
5939 {
5940 i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j-1, srdp->numsect);
5941 j++;
5942 }
5943 disc = binary_search_on_uint2_list(srdp->unaligned, trans[start_sect]-j, srdp->numunaln);;
5944 if (disc > -1)
5945 {
5946 AlnMgr2GetUnalignedInfo(sap, trans[start_sect]-j, amp->row_num, &intfrom, &intto);
5947 amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5948 amp->left_interrupt->row = amp->row_num;
5949 amp->left_interrupt->segnum = trans[start_sect];
5950 amp->left_interrupt->which_side = AM2_LEFT;
5951 amp->left_interrupt->unalnlen = intto - intfrom + 1;
5952 }
5953 i = binary_search_on_uint2_list(srdp->insect, trans[start_sect]-j, srdp->numinsect);
5954 if (i != -1) /* there's an insert */
5955 {
5956 if (amp->left_interrupt == NULL)
5957 amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5958 amp->left_interrupt->row = amp->row_num;
5959 amp->left_interrupt->segnum = trans[start_sect];
5960 amp->left_interrupt->which_side = AM2_LEFT;
5961 amp->left_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5962 /* look for unaligned regions off insert */
5963 j = trans[start_sect]-j;
5964 disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5965 if (disc1 != -1)
5966 {
5967 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5968 amp->left_interrupt->unalnlen += intto - intfrom + 1;
5969 }
5970 i--;
5971 j--;
5972 while (i-1>=0 && srdp->insect[i] == srdp->insect[i+1]-1)
5973 {
5974 amp->left_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5975 disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5976 if (disc1 != -1)
5977 {
5978 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5979 amp->left_interrupt->unalnlen += intto - intfrom + 1;
5980 }
5981 i--;
5982 j--;
5983 }
5984 if (i>=0) /* look one more over for unaligned */
5985 {
5986 disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5987 if (disc1 != -1)
5988 {
5989 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5990 amp->left_interrupt->unalnlen += intto - intfrom + 1;
5991 }
5992 }
5993 }
5994 }
5995 endoffset = dsp->lens[trans[stop_sect]] - (amp->to_aln - saip->aligncoords[stop_sect]) - 1;
5996 if (endoffset < 0)
5997 endoffset = 0;
5998 if (amp->right_interrupt != NULL && endoffset > 0)
5999 {
6000 MemFree(amp->right_interrupt);
6001 amp->right_interrupt = NULL;
6002 }
6003 len = 0;
6004 for (i=start_sect; i<=stop_sect; i++)
6005 {
6006 len += dsp->lens[trans[i]];
6007 }
6008 len = len - offset - endoffset;
6009 if (amp->type == AM_GAP)
6010 {
6011 amp->from_row = amp->real_from;
6012 amp->to_row = amp->from_row + len - 1;
6013 } else
6014 {
6015 if (amp->strand == Seq_strand_minus)
6016 {
6017 amp->from_row = dsp->starts[trans[stop_sect]*dsp->dim+amp->row_num-1] + endoffset;
6018 amp->to_row = amp->from_row + len - 1;
6019 } else
6020 {
6021 amp->from_row = dsp->starts[trans[start_sect]*dsp->dim+amp->row_num-1] + offset;
6022 amp->to_row = amp->from_row + len - 1;
6023 }
6024 }
6025 if (saip->anchor <= 0)
6026 MemFree(trans);
6027 amp->real_from += amp->to_row - amp->from_row + 1;
6028 return TRUE;
6029 }
6030
6031 /* SECTION 4a */
binary_search_on_uint4_list(Uint4Ptr list,Uint4 pos,Uint4 listlen)6032 static Int4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
6033 {
6034 Uint4 L;
6035 Uint4 mid;
6036 Uint4 R;
6037
6038 if (list == NULL || listlen == 0)
6039 return 0;
6040 L = 0;
6041 R = listlen - 1;
6042 while (L < R)
6043 {
6044 mid = (L+R)/2;
6045 if (list[mid + 1] <= pos)
6046 L = mid + 1;
6047 else
6048 R = mid;
6049 }
6050 return R;
6051 }
6052
6053 /* SECTION 4a */
binary_search_on_uint2_list(Uint2Ptr list,Int4 ele,Uint2 listlen)6054 static Int4 binary_search_on_uint2_list(Uint2Ptr list, Int4 ele, Uint2 listlen)
6055 {
6056 Uint2 L;
6057 Uint2 mid;
6058 Uint2 R;
6059
6060 if (list == NULL || listlen == 0 || ele < 0)
6061 return -1;
6062 L = 0;
6063 R = listlen - 1;
6064 while (L < R)
6065 {
6066 mid = (L+R)/2;
6067 if (ele <= list[mid])
6068 R = mid;
6069 else
6070 L = mid+1;
6071 }
6072 if (ele == list[R])
6073 return R;
6074 else
6075 return -1;
6076 }
6077
6078 /* SECTION 4a */
AlnMgr2GetUnalignedInfo(SeqAlignPtr sap,Int4 segment,Int4 row,Int4Ptr from,Int4Ptr to)6079 static void AlnMgr2GetUnalignedInfo(SeqAlignPtr sap, Int4 segment, Int4 row, Int4Ptr from, Int4Ptr to)
6080 {
6081 AMAlignIndex2Ptr amaip;
6082 DenseSegPtr dsp;
6083 Boolean found;
6084 Int4 i;
6085 SAIndex2Ptr saip;
6086 Uint1 strand;
6087 Int4 tmp;
6088
6089 if (sap == NULL)
6090 return;
6091 strand = AlnMgr2GetNthStrand(sap, row);
6092 if (sap->saip->indextype == INDEX_CHILD)
6093 {
6094 saip = (SAIndex2Ptr)(sap->saip);
6095 dsp = (DenseSegPtr)(sap->segs);
6096 } else if (sap->saip->indextype == INDEX_PARENT)
6097 {
6098 amaip = (AMAlignIndex2Ptr)(sap->saip);
6099 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6100 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6101 }
6102 found = FALSE;
6103 *from = *to = -1;
6104 for (i=segment; i>=0 && !found; i--)
6105 {
6106 if (dsp->starts[dsp->dim*i+row-1] != -1)
6107 {
6108 found = TRUE;
6109 if (strand == Seq_strand_minus)
6110 *to = dsp->starts[dsp->dim*i+row-1]-1;
6111 else
6112 *from = dsp->starts[dsp->dim*i+row-1]+dsp->lens[i];
6113 }
6114 }
6115 found = FALSE;
6116 for (i=segment+1; i<dsp->numseg && !found; i++)
6117 {
6118 if (dsp->starts[dsp->dim*i+row-1] != -1)
6119 {
6120 found = TRUE;
6121 if (strand == Seq_strand_minus)
6122 *from = dsp->starts[dsp->dim*i+row-1]+dsp->lens[i];
6123 else
6124 *to = dsp->starts[dsp->dim*i+row-1]-1;
6125 }
6126 }
6127 if (*from > *to)
6128 {
6129 tmp = *from;
6130 *from = *to;
6131 *to = tmp;
6132 }
6133 }
6134
6135 /* SECTION 4a */
6136 /***************************************************************************
6137 *
6138 * AlnMgr2GetInterruptInfo returns a structure describing the inserts and
6139 * unaligned regions in an interrupt. The structure is allocated by this
6140 * function and must be freed with AlnMgr2FreeInterruptInfo.
6141 *
6142 ***************************************************************************/
AlnMgr2GetInterruptInfo(SeqAlignPtr sap,AMInterruptPtr interrupt)6143 NLM_EXTERN AMInterrInfoPtr AlnMgr2GetInterruptInfo(SeqAlignPtr sap, AMInterruptPtr interrupt)
6144 {
6145 AMAlignIndex2Ptr amaip;
6146 Int4 disc;
6147 Boolean done;
6148 DenseSegPtr dsp;
6149 Int4 i;
6150 AMInterrInfoPtr iip;
6151 Int4 inserts;
6152 Int4 intfrom;
6153 Int4 intto;
6154 Int4 j;
6155 Int4 k;
6156 Int4 n;
6157 SAIndex2Ptr saip;
6158 SARowDat2Ptr srdp;
6159 Uint1 strand;
6160 Uint2Ptr trans;
6161 Int4 translen;
6162 Int4 u;
6163
6164 if (interrupt == NULL || sap == NULL || sap->saip == NULL)
6165 return NULL;
6166 if (sap->saip->indextype == INDEX_CHILD)
6167 {
6168 dsp = (DenseSegPtr)(sap->segs);
6169 saip = (SAIndex2Ptr)(sap->saip);
6170 } else if (sap->saip->indextype == INDEX_PARENT)
6171 {
6172 amaip = (AMAlignIndex2Ptr)(sap->saip);
6173 if (amaip->alnstyle == AM2_LITE)
6174 return FALSE;
6175 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6176 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6177 }
6178 if (dsp->numseg < interrupt->segnum)
6179 return NULL;
6180 if (saip->anchor > 0)
6181 {
6182 trans = saip->srdp[saip->anchor-1]->sect;
6183 translen = saip->srdp[saip->anchor-1]->numsect;
6184 } else
6185 {
6186 trans = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
6187 for (i=0; i<dsp->numseg; i++)
6188 {
6189 trans[i] = i;
6190 }
6191 translen = dsp->numseg;
6192 }
6193 strand = AlnMgr2GetNthStrand(sap, interrupt->row-1);
6194 srdp = saip->srdp[interrupt->row-1];
6195 /* now look for inserts and unaligned regions on the side indicated */
6196 if (interrupt->which_side == AM2_RIGHT)
6197 {
6198 /* check if this is unaligned */
6199 disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6200 /* then look for inserts */
6201 done = FALSE;
6202 iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6203 if (disc != -1)
6204 iip->num = 1;
6205 inserts = 0;
6206 for (i=interrupt->segnum+1; !done; i++)
6207 {
6208 n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6209 if (n == -1)
6210 n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6211 if (n == -1)
6212 {
6213 done = TRUE;
6214 } else
6215 {
6216 inserts++; /* only increment if region gets interrupted */
6217 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6218 if (disc != -1) /* this insert has an unaligned region */
6219 {
6220 iip->num += inserts;
6221 iip->num++;
6222 inserts = 0;
6223 }
6224 }
6225 }
6226 if (inserts != 0)
6227 iip->num++;
6228 iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6229 iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6230 iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6231 k = 0;
6232 disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6233 if (disc != -1) /* starts with unaligned */
6234 {
6235 AlnMgr2GetUnalignedInfo(sap, interrupt->segnum, interrupt->row, &intfrom, &intto);
6236 iip->starts[k] = intfrom;
6237 iip->lens[k] = intto - intfrom + 1;
6238 iip->types[k] = AM_UNALIGNED;
6239 k++;
6240 }
6241 disc = 0;
6242 done = FALSE;
6243 for (i=interrupt->segnum+1; !done; i++)
6244 {
6245 n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6246 u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numinsect);
6247 if (n == -1 && u == -1)
6248 {
6249 done = TRUE;
6250 } else
6251 {
6252 if (u == -1)
6253 {
6254 if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6255 iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6256 iip->lens[k] += dsp->lens[i];
6257 iip->types[k] = AM_INSERT;
6258 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6259 if (disc != -1) /* this insert has an unaligned region */
6260 {
6261 k++;
6262 AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6263 iip->starts[k] = intfrom;
6264 iip->lens[k] = intto - intfrom + 1;
6265 iip->types[k] = AM_UNALIGNED;
6266 k++;
6267 }
6268 }
6269 }
6270 }
6271 } else if (interrupt->which_side == AM2_LEFT)
6272 {
6273 /* check if the next non-gap segment to the left has unaligned */
6274 j = 1;
6275 n = 0;
6276 while (n != -1 && interrupt->segnum-j >= 0)
6277 {
6278 n = binary_search_on_uint2_list(srdp->unsect, interrupt->segnum-j, srdp->numunsect);
6279 if (n == -1)
6280 n = binary_search_on_uint2_list(srdp->insect, interrupt->segnum-j, srdp->numinsect);
6281 if (n != -1)
6282 j++;
6283 }
6284 disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum-j, srdp->numunaln);
6285 /* then look for inserts */
6286 done = FALSE;
6287 iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6288 if (disc != -1)
6289 iip->num = 1;
6290 inserts = 0;
6291 for (i=interrupt->segnum-1; !done; i--)
6292 {
6293 n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6294 if (n == -1)
6295 n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6296 if (n == -1)
6297 {
6298 done = TRUE;
6299 } else
6300 {
6301 inserts++; /* only increment if region gets interrupted */
6302 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6303 if (disc != -1) /* this insert has an unaligned region */
6304 {
6305 iip->num += inserts;
6306 iip->num++;
6307 inserts = 0;
6308 }
6309 }
6310 }
6311 i++;
6312 iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6313 iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6314 iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6315 k = 0;
6316 disc = 0;
6317 /* check first non-inserted segment for unaligned */
6318 if (i >= 0)
6319 {
6320 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6321 if (disc != -1) /* there's an unaligned region */
6322 {
6323 AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6324 iip->starts[k] = intfrom;
6325 iip->lens[k] = intto - intfrom + 1;
6326 iip->types[k] = AM_UNALIGNED;
6327 k++;
6328 }
6329 }
6330 i++; /* start from leftmost end of inserts/unaligned */
6331 for (i; i<interrupt->segnum; i++)
6332 {
6333 u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6334 if (u == -1)
6335 {
6336 if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6337 iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6338 iip->lens[k] += dsp->lens[i];
6339 iip->types[k] = AM_INSERT;
6340 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6341 if (disc != -1) /* this insert has an unaligned region */
6342 {
6343 k++;
6344 AlnMgr2GetUnalignedInfo(sap, binary_search_on_uint2_list(trans, i, translen), interrupt->row, &intfrom, &intto);
6345 iip->starts[k] = intfrom;
6346 iip->lens[k] = intto - intfrom + 1;
6347 iip->types[k] = AM_UNALIGNED;
6348 k++;
6349 }
6350 }
6351 }
6352 }
6353 iip->strand = strand;
6354 return iip;
6355 }
6356
6357 /* SECTION 4b */
6358 /***************************************************************************
6359 *
6360 * AlnMgr2GetNthStrand takes an indexed seqalign and a row number and
6361 * returns the strand of the row indicated. A return of 0 indicates
6362 * an error.
6363 *
6364 ***************************************************************************/
AlnMgr2GetNthStrand(SeqAlignPtr sap,Int4 n)6365 NLM_EXTERN Uint1 AlnMgr2GetNthStrand(SeqAlignPtr sap, Int4 n)
6366 {
6367 AMAlignIndex2Ptr amaip;
6368 DenseSegPtr dsp;
6369
6370 if (sap == NULL || sap->saip == NULL || n < 1)
6371 return 0;
6372 if (sap->saip->indextype == INDEX_CHILD)
6373 {
6374 dsp = (DenseSegPtr)(sap->segs);
6375 if (n > dsp->dim)
6376 return 0;
6377 if (dsp->strands == NULL)
6378 return Seq_strand_plus;
6379 return (dsp->strands[n-1]);
6380 } else if (sap->saip->indextype == INDEX_PARENT)
6381 {
6382 amaip = (AMAlignIndex2Ptr)(sap->saip);
6383 if (amaip->alnstyle == AM2_LITE) /* can't get Nth strand for this */
6384 return 0;
6385 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6386 if (n > dsp->dim)
6387 return 0;
6388 if (dsp->strands == NULL)
6389 return Seq_strand_plus;
6390 return (dsp->strands[n-1]);
6391 }
6392 return 0;
6393 }
6394
6395 /* SECTION 4b */
6396 /***************************************************************************
6397 *
6398 * AlnMgr2GetNthSeqIdPtr returns the seqid (this is a duplicated,
6399 * allocated seqid that must be freed) of the nth row (1-based) of an
6400 * indexed parent or child seqalign.
6401 *
6402 ***************************************************************************/
AlnMgr2GetNthSeqIdPtr(SeqAlignPtr sap,Int4 n)6403 NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtr(SeqAlignPtr sap, Int4 n)
6404 {
6405 AMAlignIndex2Ptr amaip;
6406 DenseSegPtr dsp;
6407 Int4 i;
6408 SeqIdPtr sip;
6409
6410 if (sap == NULL || sap->saip == NULL)
6411 return NULL;
6412 if (sap->saip->indextype == INDEX_CHILD)
6413 {
6414 dsp = (DenseSegPtr)(sap->segs);
6415 if (n > dsp->dim)
6416 return NULL;
6417 sip = dsp->ids;
6418 for (i=1; i<n && sip != NULL; i++)
6419 {
6420 sip = sip->next;
6421 }
6422 if (sip == NULL) return NULL;
6423 return (SeqIdDup(sip));
6424 } else if (sap->saip->indextype == INDEX_PARENT)
6425 {
6426 amaip = (AMAlignIndex2Ptr)(sap->saip);
6427 if (n > amaip->numrows)
6428 return NULL;
6429 sip = SeqIdDup(amaip->ids[n-1]);
6430 return sip;
6431 } else
6432 return NULL;
6433 }
6434
6435 /* SECTION 4b */
6436 /***************************************************************************
6437 *
6438 * AlnMgr2GetNthSeqRangeInSA returns the smallest and largest sequence
6439 * coordinates contained in the nth row of an indexed seqalign. Either
6440 * start or stop can be NULL to only retrieve one of the coordinates.
6441 * If start and stop are -1, there is an error; if they are both -2, the
6442 * row is just one big insert. RANGE
6443 *
6444 ***************************************************************************/
AlnMgr2GetNthSeqRangeInSA(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6445 NLM_EXTERN void AlnMgr2GetNthSeqRangeInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6446 {
6447 AMAlignIndex2Ptr amaip;
6448 Int4 beg;
6449 DenseSegPtr dsp;
6450 Int4 end;
6451 SAIndex2Ptr saip;
6452 SARowDat2Ptr srdp;
6453 Uint1 strand;
6454
6455 if (start != NULL)
6456 *start = -1;
6457 if (stop != NULL)
6458 *stop = -1;
6459 if (sap == NULL || sap->saip == NULL)
6460 return;
6461 if (sap->saip->indextype == INDEX_CHILD)
6462 {
6463 saip = (SAIndex2Ptr)(sap->saip);
6464 dsp = (DenseSegPtr)(sap->segs);
6465 } else if (sap->saip->indextype == INDEX_PARENT)
6466 {
6467 amaip = (AMAlignIndex2Ptr)(sap->saip);
6468 if (amaip->alnstyle == AM2_LITE)
6469 {
6470 AlnMgr2GetNthSeqRangeInSASet(sap, n, start, stop);
6471 return;
6472 }
6473 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6474 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6475 }
6476 if (n > saip->numrows || n <= 0)
6477 return;
6478 srdp = saip->srdp[n-1];
6479 beg = -1;
6480 if (srdp->numsect == 0) /* just one big insert */
6481 beg = end = -2;
6482 strand = AlnMgr2GetNthStrand(sap, n);
6483 if (beg != -2 && strand != Seq_strand_minus)
6484 {
6485 beg = dsp->starts[srdp->sect[0]*(dsp->dim) + n-1];
6486 end = dsp->starts[srdp->sect[srdp->numsect-1]*(dsp->dim) + n-1] + dsp->lens[srdp->sect[srdp->numsect-1]] - 1;
6487 } else if (beg != -2)
6488 {
6489 beg = dsp->starts[srdp->sect[srdp->numsect-1]*(dsp->dim) + n-1];
6490 end = dsp->starts[srdp->sect[0]*(dsp->dim) + n-1] + dsp->lens[srdp->sect[0]] - 1;
6491 }
6492 if (start != NULL)
6493 *start = beg;
6494 if (stop != NULL)
6495 *stop = end;
6496 return;
6497 }
6498
6499 /* SECTION 4b */
6500 /***************************************************************************
6501 *
6502 * AlnMgr2GetNthRowSpanInSA returns the least and greatest alignment
6503 * coordinates (inclusive) spanned by the indicated row. Either stop or
6504 * start can be NULL to retrieve just one of the coordinates.
6505 *
6506 ***************************************************************************/
AlnMgr2GetNthRowSpanInSA(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6507 NLM_EXTERN void AlnMgr2GetNthRowSpanInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6508 {
6509 AMAlignIndex2Ptr amaip;
6510 DenseSegPtr dsp;
6511 Int4 i;
6512 SAIndex2Ptr saip;
6513 SARowDat2Ptr srdp;
6514
6515 if (start != NULL)
6516 *start = -1;
6517 if (stop != NULL)
6518 *stop = -1;
6519 if (sap == NULL || sap->saip == NULL)
6520 return;
6521 if (sap->saip->indextype == INDEX_CHILD)
6522 {
6523 saip = (SAIndex2Ptr)(sap->saip);
6524 dsp = (DenseSegPtr)(sap->segs);
6525 } else if (sap->saip->indextype == INDEX_PARENT)
6526 {
6527 amaip = (AMAlignIndex2Ptr)(sap->saip);
6528 if (amaip->alnstyle == AM2_LITE)
6529 return;
6530 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6531 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6532 }
6533 if (n > saip->numrows || n <= 0)
6534 return;
6535 srdp = saip->srdp[n-1];
6536 if (srdp->numsect == 0)
6537 {
6538 if (start != NULL)
6539 *start = -1;
6540 if (stop != NULL)
6541 *stop = -1;
6542 return;
6543 }
6544 if (start != NULL)
6545 {
6546 if (saip->anchor > 0)
6547 i = binary_search_on_uint2_list(saip->srdp[saip->anchor-1]->sect, srdp->sect[0], saip->srdp[saip->anchor-1]->numsect);
6548 else
6549 i = srdp->sect[0];
6550 *start = saip->aligncoords[i];
6551 }
6552 if (stop != NULL)
6553 {
6554 if (saip->anchor > 0)
6555 i = binary_search_on_uint2_list(saip->srdp[saip->anchor-1]->sect, srdp->sect[srdp->numsect-1], saip->srdp[saip->anchor-1]->numsect);
6556 else
6557 i = srdp->sect[srdp->numsect-1];
6558 *stop = saip->aligncoords[i] + dsp->lens[srdp->sect[srdp->numsect-1]] - 1;
6559 }
6560 return;
6561 }
6562
6563 /* SECTION 4b */
AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6564 static void AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6565 {
6566 AMAlignIndex2Ptr amaip;
6567 Int4 from;
6568 Int4 i;
6569 Int4 max;
6570 Int4 min;
6571 Int4 to;
6572
6573 if (start != NULL)
6574 *start = -1;
6575 if (stop != NULL)
6576 *stop = -1;
6577 if (sap == NULL || sap->saip == NULL || n < 0)
6578 return;
6579 if (sap->saip->indextype == INDEX_CHILD)
6580 {
6581 AlnMgr2GetNthSeqRangeInSA(sap, n, start, stop);
6582 return;
6583 }
6584 amaip = (AMAlignIndex2Ptr)(sap->saip);
6585 min = max = -1;
6586 for (i=0; i<amaip->numsaps; i++)
6587 {
6588 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n, &from, &to);
6589 if (from != -1 && (from < min || min == -1))
6590 min = from;
6591 if (to > max)
6592 max = to;
6593 }
6594 if (start != NULL)
6595 *start = from;
6596 if (stop != NULL)
6597 *stop = to;
6598 }
6599
6600 /* SECTION 4b */
AlnMgr2GetMaxTailLength(SeqAlignPtr sap,Uint1 which_tail)6601 NLM_EXTERN Int4 AlnMgr2GetMaxTailLength(SeqAlignPtr sap, Uint1 which_tail)
6602 {
6603 Int4 i;
6604 Int4 maxlen;
6605 Int4 n;
6606 Int4 start;
6607 Int4 stop;
6608 Uint1 strand;
6609
6610 if (sap == NULL || sap->saip == NULL)
6611 return 0;
6612 n = AlnMgr2GetNumRows(sap);
6613 maxlen = -1;
6614 for (i=0; i<n; i++)
6615 {
6616 AlnMgr2GetNthRowTail(sap, i+1, which_tail, &start, &stop, &strand);
6617 if (stop - start + 1 > maxlen)
6618 maxlen = stop - start + 1;
6619 }
6620 return maxlen;
6621 }
6622
6623 /* SECTION 4b */
6624 /***************************************************************************
6625 *
6626 * AlnMgr2GetNthRowTail returns the sequence extremities that are not
6627 * contained in the alignment (if the alignment starts at 10 in row 2, the
6628 * tail in that row is 0-9). It takes an indexed seqalign, a 1-based row
6629 * number, and AM2_LEFT_TAIL or AM2_RIGHT_TAIL, and returns the start, stop,
6630 * and strand of the tail indicated in the row desired. AlnMgr2GetNthRowTail
6631 * returns TRUE if the calculations were successfully completed.
6632 *
6633 ***************************************************************************/
AlnMgr2GetNthRowTail(SeqAlignPtr sap,Int4 n,Uint1 which_tail,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand)6634 NLM_EXTERN Boolean AlnMgr2GetNthRowTail(SeqAlignPtr sap, Int4 n, Uint1 which_tail, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
6635 {
6636 BioseqPtr bsp;
6637 SeqIdPtr sip;
6638 Int4 tmp_start;
6639 Int4 tmp_stop;
6640 Uint1 tmp_strand;
6641
6642 if (sap == NULL || n < 1 || sap->saip == NULL)
6643 return FALSE;
6644 tmp_start = tmp_stop = -1;
6645 AlnMgr2GetNthSeqRangeInSA(sap, n, &tmp_start, &tmp_stop);
6646 if (tmp_start == -1 || tmp_stop == -1)
6647 return FALSE;
6648 tmp_strand = AlnMgr2GetNthStrand(sap, n);
6649 if (which_tail == AM2_LEFT_TAIL)
6650 {
6651 if (tmp_strand == Seq_strand_minus)
6652 {
6653 sip = AlnMgr2GetNthSeqIdPtr(sap, n);
6654 bsp = BioseqLockById(sip);
6655 SeqIdFree(sip);
6656 if (bsp == NULL)
6657 return FALSE;
6658 if (tmp_stop == bsp->length-1 || stop == NULL)
6659 {
6660 if (start)
6661 *start = -1;
6662 if (stop)
6663 *stop = -1;
6664 } else
6665 {
6666 if (start)
6667 *start = tmp_stop+1;
6668 if (stop)
6669 *stop = bsp->length-1;
6670 }
6671 BioseqUnlock(bsp);
6672 if (strand)
6673 *strand = tmp_strand;
6674 } else
6675 {
6676 if (tmp_start >= 1)
6677 {
6678 if (start)
6679 *start = 0;
6680 if (stop)
6681 *stop = tmp_start - 1;
6682 } else
6683 {
6684 if (start)
6685 *start = -1;
6686 if (stop)
6687 *stop = -1;
6688 }
6689 if (strand)
6690 *strand = tmp_strand;
6691 }
6692 } else if (which_tail == AM2_RIGHT_TAIL)
6693 {
6694 if (tmp_strand == Seq_strand_minus)
6695 {
6696 if (tmp_start >= 1)
6697 {
6698 if (start)
6699 *start = 0;
6700 if (stop)
6701 *stop = tmp_start - 1;
6702 } else
6703 {
6704 if (start)
6705 *start = -1;
6706 if (stop)
6707 *stop = -1;
6708 }
6709 if (strand)
6710 *strand = tmp_strand;
6711 } else
6712 {
6713 sip = AlnMgr2GetNthSeqIdPtr(sap, n);
6714 bsp = BioseqLockById(sip);
6715 SeqIdFree(sip);
6716 if (bsp == NULL)
6717 return FALSE;
6718 if (bsp->length-1 == tmp_stop)
6719 {
6720 if (start)
6721 *start = -1;
6722 if (stop)
6723 *stop = -1;
6724 } else
6725 {
6726 if (start)
6727 *start = tmp_stop + 1;
6728 if (stop)
6729 *stop = bsp->length-1;
6730 }
6731 if (strand)
6732 *strand = tmp_strand;
6733 BioseqUnlock(bsp);
6734 }
6735 }
6736 return TRUE;
6737 }
6738
6739 /* SECTION 4c */
6740 /***************************************************************************
6741 *
6742 * AlnMgr2GetAlnLength returns the total alignment length of an indexed
6743 * alignment. If fill_in is TRUE, the function computes the total length
6744 * of all the internal unaligned regions and adds that to the alignment
6745 * length; otherwise only the aligned portions are considered. (LENGTH)
6746 *
6747 ***************************************************************************/
AlnMgr2GetAlnLength(SeqAlignPtr sap,Boolean fill_in)6748 NLM_EXTERN Int4 AlnMgr2GetAlnLength(SeqAlignPtr sap, Boolean fill_in)
6749 {
6750 AMAlignIndex2Ptr amaip;
6751 DenseSegPtr dsp;
6752 Int4 i;
6753 Uint2 lastseg;
6754 Int4 len;
6755 SAIndex2Ptr saip;
6756 SeqAlignPtr salp;
6757
6758 if (sap == NULL || sap->saip == NULL)
6759 return -1;
6760 if (sap->saip->indextype == INDEX_CHILD)
6761 {
6762 dsp = (DenseSegPtr)(sap->segs);
6763 saip = (SAIndex2Ptr)(sap->saip);
6764 salp = sap;
6765 } else if (sap->saip->indextype == INDEX_PARENT)
6766 {
6767 amaip = (AMAlignIndex2Ptr)(sap->saip);
6768 if (amaip->alnstyle == AM2_LITE)
6769 return -1;
6770 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6771 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6772 salp = amaip->sharedaln;
6773 }
6774 if (saip->unaln == FALSE || fill_in == FALSE)
6775 {
6776 if (saip->anchor == -1)
6777 return (saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1]);
6778 else
6779 {
6780 lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
6781 return (saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg]);
6782 }
6783 } else
6784 {
6785 len = 0;
6786 for (i=0; i<dsp->numseg; i++)
6787 {
6788 len += dsp->lens[i];
6789 len += AlnMgr2GetMaxUnalignedLength(salp, i);
6790 }
6791 }
6792 return len;
6793 }
6794
6795 /* SECTION 4c */ /* FOR DDV */
AlnMgr2IsSAPDiscAli(SeqAlignPtr sap)6796 NLM_EXTERN Boolean AlnMgr2IsSAPDiscAli(SeqAlignPtr sap)
6797 {
6798 AMAlignIndex2Ptr amaip;
6799 SAIndex2Ptr saip;
6800
6801 if (sap == NULL || sap->saip == NULL)
6802 return FALSE;
6803 if (sap->saip->indextype == INDEX_CHILD)
6804 {
6805 saip = (SAIndex2Ptr)(sap->saip);
6806 } else if (sap->saip->indextype == INDEX_PARENT)
6807 {
6808 amaip = (AMAlignIndex2Ptr)(sap->saip);
6809 if (amaip->alnstyle == AM2_LITE)
6810 return FALSE;
6811 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6812 }
6813 if (saip->numunaln > 0)
6814 return TRUE;
6815 return FALSE;
6816 }
6817
6818 /* SECTION 4c */ /* FOR DDV */
AlnMgr2GetNumAlnBlocks(SeqAlignPtr sap)6819 NLM_EXTERN Int4 AlnMgr2GetNumAlnBlocks(SeqAlignPtr sap)
6820 {
6821 AMAlignIndex2Ptr amaip;
6822 SAIndex2Ptr saip;
6823
6824 if (sap == NULL || sap->saip == NULL)
6825 return -1;
6826 if (sap->saip->indextype == INDEX_CHILD)
6827 {
6828 saip = (SAIndex2Ptr)(sap->saip);
6829 } else if (sap->saip->indextype == INDEX_PARENT)
6830 {
6831 amaip = (AMAlignIndex2Ptr)(sap->saip);
6832 if (amaip->alnstyle == AM2_LITE)
6833 return -1;
6834 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6835 }
6836 if (saip->numunaln >= 0)
6837 return (saip->numunaln + 1);
6838 else if (saip->numunaln == -1)
6839 return 1;
6840 else
6841 return -1;
6842 }
6843
6844 /* SECTION 4c */ /* FOR DDV */
AlnMgr2GetNthBlockRange(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6845 NLM_EXTERN Boolean AlnMgr2GetNthBlockRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6846 {
6847 AMAlignIndex2Ptr amaip;
6848 SAIndex2Ptr saip;
6849
6850 if (sap == NULL || sap->saip == NULL)
6851 return FALSE;
6852 if (sap->saip->indextype == INDEX_CHILD)
6853 {
6854 saip = (SAIndex2Ptr)(sap->saip);
6855 } else if (sap->saip->indextype == INDEX_PARENT)
6856 {
6857 amaip = (AMAlignIndex2Ptr)(sap->saip);
6858 if (amaip->alnstyle == AM2_LITE)
6859 return FALSE;
6860 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6861 }
6862 if (!start || !stop)
6863 return FALSE;
6864 *start = -1;
6865 *stop = -1;
6866 if (n >= saip->numunaln)
6867 return FALSE;
6868 if (n < saip->numunaln)
6869 {
6870 *start = saip->aligncoords[saip->unaln[n-1]+1];
6871 *stop = saip->aligncoords[saip->unaln[n]] - 1;
6872 }
6873 return TRUE;
6874 }
6875
6876 /* SECTION 4c */ /* FOR DDV */
6877 /***************************************************************************
6878 *
6879 * AlnMgr2GetNthUnalignedForNthRow returns the bioseq coordinates for the
6880 * requested row, in the requested unaligned region. Any error will result
6881 * in -1 returns for both start and stop.
6882 *
6883 ***************************************************************************/
AlnMgr2GetNthUnalignedForNthRow(SeqAlignPtr sap,Int4 unaligned,Int4 row,Int4Ptr start,Int4Ptr stop)6884 NLM_EXTERN Boolean AlnMgr2GetNthUnalignedForNthRow(SeqAlignPtr sap, Int4 unaligned, Int4 row, Int4Ptr start, Int4Ptr stop)
6885 {
6886 AMAlignIndex2Ptr amaip;
6887 DenseSegPtr dsp;
6888 Int4 i;
6889 SAIndex2Ptr saip;
6890 Int4 seg;
6891 Uint1 strand;
6892
6893 if (sap == NULL || sap->saip == NULL)
6894 return FALSE;
6895 if (sap->saip->indextype == INDEX_CHILD)
6896 {
6897 saip = (SAIndex2Ptr)(sap->saip);
6898 dsp = (DenseSegPtr)(sap->segs);
6899 } else if (sap->saip->indextype == INDEX_PARENT)
6900 {
6901 amaip = (AMAlignIndex2Ptr)(sap->saip);
6902 if (amaip->alnstyle == AM2_LITE)
6903 return FALSE;
6904 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6905 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6906 }
6907 if (row > saip->numrows)
6908 return FALSE;
6909 if (saip->numunaln == 0) /* not set yet */
6910 AlnMgr2SetUnaln(sap);
6911 if (saip->numunaln == -1 || unaligned > saip->numunaln)
6912 {
6913 if (start)
6914 *start = -1;
6915 if (stop)
6916 *stop = -1;
6917 return FALSE;
6918 }
6919 seg = -1;
6920 if (unaligned <= saip->numunaln && unaligned > 0)
6921 seg = saip->unaln[unaligned-1];
6922 if (start)
6923 *start = -1;
6924 if (stop)
6925 *stop = -1;
6926 i = binary_search_on_uint2_list(saip->srdp[row-1]->unaligned, seg, saip->srdp[row-1]->numunaln);
6927 if (i == -1 || saip->srdp[row-1]->unaligned[i] >= dsp->numseg-1)
6928 return FALSE;
6929 strand = AlnMgr2GetNthStrand(sap, row);
6930 if (strand == Seq_strand_minus)
6931 {
6932 *start = dsp->starts[(saip->srdp[row-1]->unaligned[i]+1)*dsp->dim+row-1] + dsp->lens[(saip->srdp[row-1]->unaligned[i])];
6933 *stop = dsp->starts[(saip->srdp[row-1]->unaligned[i])*dsp->dim+row-1] - 1;
6934 } else
6935 {
6936 *start = dsp->starts[(saip->srdp[row-1]->unaligned[i])*dsp->dim+row-1] + dsp->lens[(saip->srdp[row-1]->unaligned[i])];
6937 *stop = dsp->starts[(saip->srdp[row-1]->unaligned[i]+1)*dsp->dim+row-1] - 1;
6938 }
6939 return TRUE;
6940 }
6941
6942 /* SECTION 4c */ /* FOR DDV */
6943 /***************************************************************************
6944 *
6945 * AlnMgr2GetNextLengthBit is called in a loop on an indexed alignment, with
6946 * seg starting at 0, to return the lengths of the aligned and unaligned
6947 * regions. If the length returned is negative, it's an unaligned region;
6948 * otherwise it's aligned.
6949 *
6950 ***************************************************************************/
AlnMgr2GetNextLengthBit(SeqAlignPtr sap,Int4Ptr len,Int4Ptr seg)6951 NLM_EXTERN Boolean AlnMgr2GetNextLengthBit(SeqAlignPtr sap, Int4Ptr len, Int4Ptr seg)
6952 {
6953 AMAlignIndex2Ptr amaip;
6954 DenseSegPtr dsp;
6955 Int4 i;
6956 Int4 lastseg;
6957 Int4 maxseg;
6958 SAIndex2Ptr saip;
6959
6960 if (sap == NULL || sap->saip == NULL || seg == NULL)
6961 return FALSE;
6962 if (sap->saip->indextype == INDEX_CHILD)
6963 {
6964 saip = (SAIndex2Ptr)(sap->saip);
6965 dsp = (DenseSegPtr)(sap->segs);
6966 } else if (sap->saip->indextype == INDEX_PARENT)
6967 {
6968 amaip = (AMAlignIndex2Ptr)(sap->saip);
6969 if (amaip->alnstyle == AM2_LITE)
6970 return FALSE;
6971 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6972 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6973 }
6974 if (saip->numunaln == -1) /* the whole thing is just one big aligned segment */
6975 {
6976 if (*seg != 0)
6977 return FALSE;
6978 if (saip->anchor == -1)
6979 {
6980 *len = saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1];
6981 *seg = 1;
6982 } else
6983 {
6984 lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
6985 *len = saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg];
6986 *seg = 1;
6987 }
6988 return TRUE;
6989 } else
6990 {
6991 if (saip->unaln == 0) /* not set */
6992 {
6993 AlnMgr2SetUnaln(sap);
6994 if (saip->numunaln == -1) /* no unaligned regions */
6995 {
6996 if (*seg != 0)
6997 return FALSE;
6998 if (saip->anchor == -1)
6999 *len = saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1];
7000 else
7001 {
7002 lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
7003 *len = saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg];
7004 }
7005 *seg = 1;
7006 return TRUE;
7007 }
7008 }
7009 if (*seg > saip->numunaln || -(*seg) > saip->numunaln)
7010 return FALSE;
7011 if (*seg >= 0)
7012 {
7013 *len = 0;
7014 if (*seg == 0)
7015 i = 0;
7016 else
7017 i = saip->unaln[*seg-1]+1;
7018 if (*seg < saip->numunaln)
7019 maxseg = saip->unaln[*seg];
7020 else
7021 maxseg = dsp->numseg-1;
7022 while (i<=maxseg)
7023 {
7024 (*len) += dsp->lens[i];
7025 i++;
7026 }
7027 *seg = -(*seg+1);
7028 return TRUE;
7029 } else
7030 {
7031 *len = -AlnMgr2GetMaxUnalignedLength(sap, saip->unaln[-(*seg)-1]);
7032 *seg = -(*seg);
7033 return TRUE;
7034 }
7035 }
7036 }
7037
7038 /* SECTION 4c */
AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap,Int4 seg)7039 static Int4 AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap, Int4 seg)
7040 {
7041 AMAlignIndex2Ptr amaip;
7042 DenseSegPtr dsp;
7043 Boolean found;
7044 Int4 from;
7045 Int4 i;
7046 Int4 max;
7047 Int4 row;
7048 SAIndex2Ptr saip;
7049 Int4 to;
7050
7051 if (sap == NULL)
7052 return -1;
7053 if (sap->saip->indextype == INDEX_CHILD)
7054 {
7055 saip = (SAIndex2Ptr)(sap->saip);
7056 dsp = (DenseSegPtr)(sap->segs);
7057 } else if (sap->saip->indextype == INDEX_PARENT)
7058 {
7059 amaip = (AMAlignIndex2Ptr)(sap->saip);
7060 if (amaip->alnstyle == AM2_LITE)
7061 return -1;
7062 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7063 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7064 }
7065 found = FALSE;
7066 for (row=0; row<dsp->dim && !found; row++)
7067 {
7068 for (i=0; i<saip->srdp[row]->numunaln && !found; i++)
7069 {
7070 if (saip->srdp[row]->unaligned[i] == seg)
7071 found = TRUE;
7072 }
7073 }
7074 if (!found)
7075 return 0;
7076 max = 0;
7077 for (i=0; i<dsp->dim; i++)
7078 {
7079 AlnMgr2GetUnalignedInfo(sap, seg, i+1, &from, &to);
7080 if (to - from > max)
7081 max = to - from;
7082 }
7083 return max;
7084 }
7085
7086 /* SECTION 4c */
7087 /***************************************************************************
7088 *
7089 * AlnMgr2GetNumRows returns the number of rows in an indexed seqalign.
7090 *
7091 ***************************************************************************/
AlnMgr2GetNumRows(SeqAlignPtr sap)7092 NLM_EXTERN Int4 AlnMgr2GetNumRows(SeqAlignPtr sap)
7093 {
7094 AMAlignIndex2Ptr amaip;
7095 SAIndex2Ptr saip;
7096
7097 if (sap == NULL || sap->saip == NULL)
7098 return -1;
7099 if (sap->saip->indextype == INDEX_CHILD)
7100 {
7101 saip = (SAIndex2Ptr)(sap->saip);
7102 return (saip->numrows);
7103 } else if (sap->saip->indextype == INDEX_PARENT)
7104 {
7105 amaip = (AMAlignIndex2Ptr)(sap->saip);
7106 return (amaip->numrows);
7107 }
7108 return -1;
7109 }
7110
7111 /* SECTION 4c */
7112 /***************************************************************************
7113 *
7114 * AlnMgr2GetNumSegs returns the number of gap- or aligned- contiguous
7115 * segments in the alignment (continuous or not).
7116 *
7117 ***************************************************************************/
AlnMgr2GetNumSegs(SeqAlignPtr sap)7118 NLM_EXTERN Int4 AlnMgr2GetNumSegs(SeqAlignPtr sap)
7119 {
7120 AMAlignIndex2Ptr amaip;
7121 DenseSegPtr dsp;
7122
7123 if (sap == NULL || sap->saip == NULL)
7124 return -1;
7125 if (sap->saip->indextype == INDEX_CHILD)
7126 {
7127 dsp = (DenseSegPtr)(sap->segs);
7128 return dsp->numseg;
7129 } else if (sap->saip->indextype == INDEX_PARENT)
7130 {
7131 amaip = (AMAlignIndex2Ptr)(sap->saip);
7132 if (amaip->alnstyle == AM2_LITE)
7133 return -1;
7134 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7135 return dsp->numseg;
7136 }
7137 return -1;
7138 }
7139
7140 /* SECTION 4c */
7141 /***************************************************************************
7142 *
7143 * AlnMgr2GetNumSegsInRange returns the number of alignment segments
7144 * spanned by the given range (partially or fully). The range is
7145 * given in alignment coordinates.
7146 *
7147 ***************************************************************************/
AlnMgr2GetNumSegsInRange(SeqAlignPtr sap,Int4 from,Int4 to,Int4Ptr start_seg)7148 NLM_EXTERN Int4 AlnMgr2GetNumSegsInRange(SeqAlignPtr sap, Int4 from, Int4 to, Int4Ptr start_seg)
7149 {
7150 Uint4Ptr aligncoords;
7151 AMAlignIndex2Ptr amaip;
7152 DenseSegPtr dsp;
7153 Int4 len;
7154 SAIndex2Ptr saip;
7155 Int4 start;
7156 Int4 stop;
7157
7158 if (start_seg != NULL)
7159 *start_seg = -1;
7160 if (sap == NULL || sap->saip == NULL)
7161 return -1;
7162 len = AlnMgr2GetAlnLength(sap, FALSE);
7163 if (from < 0 || to > len-1)
7164 return -1;
7165 if (sap->saip->indextype == INDEX_CHILD)
7166 {
7167 dsp = (DenseSegPtr)(sap->segs);
7168 saip = (SAIndex2Ptr)(sap->saip);
7169 aligncoords = saip->aligncoords;
7170 } else if (sap->saip->indextype == INDEX_PARENT)
7171 {
7172 amaip = (AMAlignIndex2Ptr)(sap->saip);
7173 if (amaip->alnstyle == AM2_LITE)
7174 return -1;
7175 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7176 saip = (SAIndex2Ptr)(sap->saip);
7177 aligncoords = saip->aligncoords;
7178 }
7179 if (from == 0 && to == len-1) /* whole alignment */
7180 {
7181 if (start_seg)
7182 *start_seg = 0;
7183 return dsp->numseg;
7184 }
7185 start = binary_search_on_uint4_list(aligncoords, from, dsp->numseg);
7186 stop = binary_search_on_uint4_list(aligncoords, to, dsp->numseg);
7187 if (start_seg != NULL)
7188 *start_seg = start;
7189 return (stop-start+1);
7190 }
7191
7192 /* SECTION 4c */
7193 /***************************************************************************
7194 *
7195 * AlnMgr2GetNthSegmentRange returns the alignment coordinate range of the
7196 * Nth segment (count starts at 1) of the seqalign. start and stop are
7197 * optional arguments (in case only one end is desired).
7198 *
7199 ***************************************************************************/
AlnMgr2GetNthSegmentRange(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)7200 NLM_EXTERN void AlnMgr2GetNthSegmentRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
7201 {
7202 AMAlignIndex2Ptr amaip;
7203 Int4 i;
7204 SAIndex2Ptr saip;
7205
7206 if (sap == NULL || sap->saip == NULL)
7207 return;
7208 i = AlnMgr2GetNumSegs(sap);
7209 if (n > i || n < 0)
7210 return;
7211 if (sap->saip->indextype == INDEX_CHILD)
7212 {
7213 saip = (SAIndex2Ptr)(sap->saip);
7214 if (start != NULL)
7215 *start = saip->aligncoords[n-1];
7216 if (stop != NULL)
7217 {
7218 if (i > n) /* not the last segment */
7219 *stop = saip->aligncoords[n] - 1;
7220 else
7221 *stop = AlnMgr2GetAlnLength(sap, FALSE) - 1;
7222 }
7223 return;
7224 } else if (sap->saip->indextype == INDEX_PARENT)
7225 {
7226 amaip = (AMAlignIndex2Ptr)(sap->saip);
7227 if (amaip->alnstyle == AM2_LITE)
7228 return;
7229 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7230 if (start != NULL)
7231 *start = saip->aligncoords[n-1];
7232 if (stop != NULL)
7233 {
7234 if (i > n) /* not the last segment */
7235 *stop = saip->aligncoords[n] - 1;
7236 else
7237 *stop = AlnMgr2GetAlnLength(sap, FALSE) - 1;
7238 }
7239 return;
7240 }
7241 }
7242
7243 /* SECTION 4c */
7244 /***************************************************************************
7245 *
7246 * AlnMgr2GetFirstNForSip returns the first row that a seqid occurs on,
7247 * or -1 if the seqid is not in the alignment or if there is another
7248 * error.
7249 *
7250 ***************************************************************************/
AlnMgr2GetFirstNForSip(SeqAlignPtr sap,SeqIdPtr sip)7251 NLM_EXTERN Int4 AlnMgr2GetFirstNForSip(SeqAlignPtr sap, SeqIdPtr sip)
7252 {
7253 AMAlignIndex2Ptr amaip;
7254 DenseSegPtr dsp;
7255 Int4 i;
7256 SeqIdPtr sip_tmp;
7257
7258 if (sap == NULL || sip == NULL || sap->saip == NULL)
7259 return -1;
7260 if (sap->saip->indextype == INDEX_CHILD)
7261 {
7262 dsp = (DenseSegPtr)(sap->segs);
7263 sip_tmp = dsp->ids;
7264 i = 1;
7265 while (sip_tmp != NULL)
7266 {
7267 if (SeqIdComp(sip, sip_tmp) == SIC_YES)
7268 return i;
7269 sip_tmp = sip_tmp->next;
7270 i++;
7271 }
7272 } else if (sap->saip->indextype == INDEX_PARENT)
7273 {
7274 amaip = (AMAlignIndex2Ptr)(sap->saip);
7275 if (amaip->alnstyle == AM2_LITE)
7276 return -1;
7277 for (i=0; i<amaip->numrows; i++)
7278 {
7279 if (SeqIdComp(sip, amaip->ids[i]) == SIC_YES)
7280 return (i+1);
7281 }
7282 }
7283 return -1;
7284 }
7285
7286 /***************************************************************************
7287 *
7288 * AlnMgr2GetFirstNForSipList returns the first row that one of a list of seqids occur on,
7289 * or -1 if none of the seqids are in the alignment or if there is another
7290 * error.
7291 * Handy if sip comes from a BioSeq, where it can point to a linked list
7292 * of SeqIds.
7293 *
7294 ***************************************************************************/
AlnMgr2GetFirstNForSipList(SeqAlignPtr sap,SeqIdPtr sip)7295 NLM_EXTERN Int4 AlnMgr2GetFirstNForSipList(SeqAlignPtr sap, SeqIdPtr sip)
7296 {
7297 Int4 i;
7298 if (sap == NULL || sap->saip == NULL)
7299 return -1;
7300
7301 for (; sip; sip = sip->next) {
7302 i = AlnMgr2GetFirstNForSip(sap, sip);
7303 if (i != -1)
7304 return i;
7305 }
7306 return -1;
7307 }
7308
7309 /***************************************************************************
7310 *
7311 * AlnMgr2GetParent returns the top-level seqalign associated with a given
7312 * indexed alignment. It returns the actual pointer, not a copy.
7313 *
7314 ***************************************************************************/
AlnMgr2GetParent(SeqAlignPtr sap)7315 NLM_EXTERN SeqAlignPtr AlnMgr2GetParent(SeqAlignPtr sap)
7316 {
7317 SAIndex2Ptr saip;
7318
7319 if (sap == NULL || sap->saip == NULL)
7320 return NULL;
7321 if (sap->saip->indextype == INDEX_PARENT)
7322 return sap;
7323 saip = (SAIndex2Ptr)(sap->saip);
7324 return (saip->top);
7325 }
7326
7327 /***************************************************************************
7328 *
7329 * SECTION 5: Functions to change, assign or retrieve an anchor row.
7330 * SECTION 5a: functions for child seqaligns
7331 * SECTION 5b: functions for parent seqaligns
7332 * SECTION 5c: functions to retrieve anchor row information
7333 *
7334 ***************************************************************************/
7335
7336 /* SECTION 5a */
AlnMgr2AnchorChild(SeqAlignPtr sap,Int4 which_row)7337 static void AlnMgr2AnchorChild(SeqAlignPtr sap, Int4 which_row)
7338 {
7339 AMBitty2Ptr abp;
7340 AMBitty2Ptr abp_head;
7341 AMBitty2Ptr abp_head2;
7342 AMBitty2Ptr abp_prev;
7343 AMBitty2Ptr abp_prev2;
7344 AMBitty2Ptr abp_uhead;
7345 AMBitty2Ptr abp_uprev;
7346 Uint2Ptr anchor_unsect;
7347 Int4 curr;
7348 Int4 curr2;
7349 DenseSegPtr dsp;
7350 Int4 i;
7351 Int4 j;
7352 Uint2 numunsect;
7353 SAIndex2Ptr saip;
7354 SARowDat2Ptr srdp;
7355
7356 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
7357 return;
7358 saip = (SAIndex2Ptr)(sap->saip);
7359 if (which_row > saip->numrows)
7360 return;
7361 if (saip->anchor == which_row) /*already anchored to the right row */
7362 return;
7363 if (saip->anchor != -1 || which_row <= 0) /* already anchored -- must reindex as a flat alignment first */
7364 {
7365 SAIndex2Free2(sap->saip);
7366 sap->saip = NULL;
7367 AlnMgr2IndexSingleDenseSegSA(sap);
7368 if (which_row <= 0)
7369 return;
7370 saip = (SAIndex2Ptr)(sap->saip);
7371 }
7372 numunsect = saip->srdp[which_row-1]->numunsect;
7373 if (numunsect > 0)
7374 anchor_unsect = saip->srdp[which_row-1]->unsect;
7375 else
7376 anchor_unsect = NULL;
7377 for (i=0; i<saip->numrows; i++)
7378 {
7379 if (i+1 != which_row)
7380 {
7381 abp_head = NULL;
7382 abp_head2 = NULL;
7383 abp_uhead = NULL;
7384 curr = 0;
7385 curr2 = 0;
7386 srdp = saip->srdp[i];
7387 for (j=0; j<srdp->numsect; j++)
7388 {
7389 if (anchor_unsect != NULL && curr < numunsect && srdp->sect[j] > anchor_unsect[curr])
7390 {
7391 while (curr < numunsect && srdp->sect[j] > anchor_unsect[curr])
7392 {
7393 curr++;
7394 }
7395 }
7396 if (curr < numunsect && anchor_unsect != NULL && srdp->sect[j] == anchor_unsect[curr]) /* this one is an insert */
7397 {
7398 abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7399 abp->n = srdp->sect[j];
7400 if (abp_head == NULL)
7401 abp_head = abp_prev = abp;
7402 else
7403 {
7404 abp_prev->next = abp;
7405 abp_prev = abp;
7406 }
7407 curr++;
7408 } else /* put it in the keeper pile */
7409 {
7410 abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7411 abp->n = srdp->sect[j];
7412 if (abp_head2 == NULL)
7413 abp_head2 = abp_prev2 = abp;
7414 else
7415 {
7416 abp_prev2->next = abp;
7417 abp_prev2 = abp;
7418 }
7419 }
7420 }
7421 for (j=0; j<srdp->numunsect; j++)
7422 {
7423 if (anchor_unsect != NULL && curr2 < numunsect && srdp->unsect[j] > anchor_unsect[curr2])
7424 {
7425 while (curr2 < numunsect && srdp->unsect[j] > anchor_unsect[curr2])
7426 {
7427 curr2++;
7428 }
7429 }
7430 if (curr2 >= numunsect || (curr2 < numunsect && (anchor_unsect == NULL || srdp->unsect[j] != anchor_unsect[curr2]))) /* these get kept */
7431 {
7432 abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7433 abp->n = srdp->unsect[j];
7434 if (abp_uhead == NULL)
7435 abp_uhead = abp_uprev = abp;
7436 else
7437 {
7438 abp_uprev->next = abp;
7439 abp_uprev = abp;
7440 }
7441 }
7442 }
7443 MemFree(srdp->sect);
7444 MemFree(srdp->unsect);
7445 srdp->numsect = srdp->numunsect = srdp->numinsect = 0;
7446 abp = abp_head; /* inserts */
7447 while (abp != NULL)
7448 {
7449 srdp->numinsect++;
7450 abp = abp->next;
7451 }
7452 srdp->insect = (Uint2Ptr)MemNew((srdp->numinsect)*sizeof(Uint2));
7453 abp = abp_head;
7454 j = 0;
7455 while (abp != NULL)
7456 {
7457 srdp->insect[j] = abp->n;
7458 j++;
7459 abp_prev = abp;
7460 abp = abp->next;
7461 MemFree(abp_prev);
7462 }
7463 abp = abp_head2; /* aligned sections */
7464 while (abp != NULL)
7465 {
7466 srdp->numsect++;
7467 abp = abp->next;
7468 }
7469 srdp->sect = (Uint2Ptr)MemNew((srdp->numsect)*sizeof(Uint2));
7470 abp = abp_head2;
7471 j = 0;
7472 while (abp != NULL)
7473 {
7474 srdp->sect[j] = abp->n;
7475 j++;
7476 abp_prev = abp;
7477 abp = abp->next;
7478 MemFree(abp_prev);
7479 }
7480 abp = abp_uhead; /* aligned gaps */
7481 while (abp != NULL)
7482 {
7483 srdp->numunsect++;
7484 abp = abp->next;
7485 }
7486 srdp->unsect = (Uint2Ptr)MemNew((srdp->numunsect)*sizeof(Uint2));
7487 abp = abp_uhead;
7488 j = 0;
7489 while (abp != NULL)
7490 {
7491 srdp->unsect[j] = abp->n;
7492 j++;
7493 abp_prev = abp;
7494 abp = abp->next;
7495 MemFree(abp_prev);
7496 }
7497 } else /* this is the anchor row -- fill in the alignment coords*/
7498 {
7499 srdp = saip->srdp[i];
7500 MemFree(saip->aligncoords);
7501 saip->numseg = srdp->numsect;
7502 saip->aligncoords = (Uint4Ptr)MemNew((saip->numseg)*sizeof(Uint4));
7503 dsp = (DenseSegPtr)(sap->segs);
7504 for (j=1; j<saip->numseg; j++)
7505 {
7506 saip->aligncoords[j] = saip->aligncoords[j-1] + dsp->lens[srdp->sect[j-1]];
7507 }
7508 saip->anchor = i+1;
7509 }
7510 }
7511 }
7512
7513 /* SECTION 5c */
7514 /***************************************************************************
7515 *
7516 * AlnMgr2AnchorSeqAlign takes an indexed seqalign and a row (1-based) and
7517 * reindexes the alignment so that there are no gaps in the row indicated.
7518 * Other rows may contain inserts after this operation. After an alignment
7519 * is anchored, its length often shrinks. If which_row is less than 1, the
7520 * function reindexes the alignment as a flat alignment.
7521 *
7522 ***************************************************************************/
AlnMgr2AnchorSeqAlign(SeqAlignPtr sap,Int4 which_row)7523 NLM_EXTERN void AlnMgr2AnchorSeqAlign(SeqAlignPtr sap, Int4 which_row)
7524 {
7525 AMAlignIndex2Ptr amaip;
7526
7527 if (sap == NULL || sap->saip == NULL)
7528 return;
7529 if (sap->saip->indextype == INDEX_CHILD)
7530 AlnMgr2AnchorChild(sap, which_row);
7531 else if (sap->saip->indextype == INDEX_PARENT)
7532 {
7533 amaip = (AMAlignIndex2Ptr)(sap->saip);
7534 if (amaip->alnstyle == AM2_LITE)
7535 return;
7536 AlnMgr2AnchorChild(amaip->sharedaln, which_row);
7537 amaip->anchor = which_row;
7538 }
7539 }
7540
7541 /* SECTION 5c */
7542 /***************************************************************************
7543 *
7544 * AlnMgr2FindAnchor returns the row number (1-based) of the anchor row
7545 * for an indexed seqalign, or -1 if the alignment is unanchored or if
7546 * there is another type of error.
7547 *
7548 ***************************************************************************/
AlnMgr2FindAnchor(SeqAlignPtr sap)7549 NLM_EXTERN Int4 AlnMgr2FindAnchor(SeqAlignPtr sap)
7550 {
7551 AMAlignIndex2Ptr amaip;
7552 SAIndex2Ptr saip;
7553
7554 if (sap == NULL || sap->saip == NULL)
7555 return -1;
7556 if (sap->saip->indextype == INDEX_CHILD)
7557 {
7558 saip = (SAIndex2Ptr)(sap->saip);
7559 return (saip->anchor);
7560 } else if (sap->saip->indextype == INDEX_PARENT)
7561 {
7562 amaip = (AMAlignIndex2Ptr)(sap->saip);
7563 if (amaip->alnstyle == AM2_LITE)
7564 return -1;
7565 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7566 return (saip->anchor);
7567 } else
7568 return -1;
7569 }
7570
7571 /***************************************************************************
7572 *
7573 * SECTION 6: Functions for coordinate conversion (bioseq to seqalign
7574 * coordinates and vice versa)
7575 *
7576 ***************************************************************************/
7577
7578 /* SECTION 6 */
7579 /***************************************************************************
7580 *
7581 * AlnMgr2MapBioseqToSeqAlign takes an indexed seqalign, a position in a
7582 * row of the alignment, and a 1-based row number, and maps the row position
7583 * to alignment coordinates.
7584 *
7585 ***************************************************************************/
AlnMgr2MapBioseqToSeqAlign(SeqAlignPtr sap,Int4 pos,Int4 row)7586 NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlign(SeqAlignPtr sap, Int4 pos, Int4 row)
7587 {
7588 AMAlignIndex2Ptr amaip;
7589 Uint2Ptr array;
7590 DenseSegPtr dsp;
7591 Int4 L;
7592 Int4 mid;
7593 Int4 offset;
7594 Int4 R;
7595 Int4 retval;
7596 SAIndex2Ptr saip;
7597 SARowDat2Ptr srdp;
7598 Int4 start;
7599 Int4 stop;
7600 Uint1 strand;
7601
7602 if (sap == NULL || sap->saip == NULL || row < 1)
7603 return -1;
7604 AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
7605 if (pos < start || pos > stop)
7606 return -1;
7607 if (sap->saip->indextype == INDEX_CHILD)
7608 {
7609 saip = (SAIndex2Ptr)(sap->saip);
7610 dsp = (DenseSegPtr)(sap->segs);
7611 } else if (sap->saip->indextype == INDEX_PARENT)
7612 {
7613 amaip = (AMAlignIndex2Ptr)(sap->saip);
7614 if (amaip->alnstyle == AM2_LITE)
7615 return -1;
7616 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7617 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7618 }
7619 if (row > saip->numrows)
7620 return -1;
7621 srdp = saip->srdp[row-1];
7622 if (srdp->numsect < 1) {
7623 return -1;
7624 }
7625 strand = AlnMgr2GetNthStrand(sap, row);
7626 L = 0;
7627 R = srdp->numsect - 1;
7628 if (strand != Seq_strand_minus)
7629 {
7630 while (L < R)
7631 {
7632 mid = (L + R)/2;
7633 if (dsp->starts[(srdp->sect[mid+1])*(dsp->dim)+row-1] <= pos)
7634 L = mid + 1;
7635 else
7636 R = mid;
7637 }
7638 } else
7639 {
7640 while (L < R)
7641 {
7642 mid = ceil((L + R)/2);
7643 if (dsp->starts[(srdp->sect[mid])*(dsp->dim)+row-1] > pos)
7644 L = mid + 1;
7645 else
7646 R = mid;
7647 }
7648 }
7649 offset = pos - dsp->starts[(srdp->sect[L])*(dsp->dim)+row-1];
7650 if (offset > dsp->lens[srdp->sect[L]])
7651 return -2; /* this is an insert */
7652 if (saip->anchor > 0)
7653 {
7654 array = saip->srdp[saip->anchor-1]->sect;
7655 R = binary_search_on_uint2_list(array, srdp->sect[L], saip->srdp[saip->anchor-1]->numsect);
7656 L = R;
7657 srdp = saip->srdp[saip->anchor-1];
7658 if (strand != Seq_strand_minus)
7659 retval = (saip->aligncoords[L] + offset);
7660 else
7661 retval = (saip->aligncoords[L] + dsp->lens[srdp->sect[L]] - offset - 1);
7662 } else
7663 {
7664 if (strand != Seq_strand_minus)
7665 retval = saip->aligncoords[srdp->sect[L]] + offset;
7666 else
7667 retval = (saip->aligncoords[srdp->sect[L]] + dsp->lens[srdp->sect[L]] - offset - 1);
7668 }
7669 return retval;
7670 }
7671
7672 /* SECTION 6 */
7673 /***************************************************************************
7674 *
7675 * AlnMgr2MapSeqAlignToBioseq takes an indexed seqalign, an alignment
7676 * coordinate (pos), and the 1-based number of a row, and maps the alignment
7677 * coordinate to the corresponding bioseq coordinate of the row desired.
7678 * A return of -1 indicates an error; a return of -2 means that the bioseq
7679 * is gapped at this alignment position.
7680 *
7681 ***************************************************************************/
AlnMgr2MapSeqAlignToBioseq(SeqAlignPtr sap,Int4 pos,Int4 row)7682 NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseq(SeqAlignPtr sap, Int4 pos, Int4 row)
7683 {
7684 AMAlignIndex2Ptr amaip;
7685 DenseSegPtr dsp;
7686 Int4 len;
7687 Int4 offset;
7688 SAIndex2Ptr saip;
7689 Int4 sect;
7690 SARowDat2Ptr srdp;
7691 Int4 start;
7692 Uint1 strand;
7693 Uint2Ptr trans;
7694
7695 if (sap == NULL || sap->saip == NULL)
7696 return -1;
7697 len = AlnMgr2GetAlnLength(sap, FALSE);
7698 if (pos < 0 || pos > len - 1)
7699 return -1;
7700 if (sap->saip->indextype == INDEX_CHILD)
7701 {
7702 saip = (SAIndex2Ptr)(sap->saip);
7703 dsp = (DenseSegPtr)(sap->segs);
7704 } else if (sap->saip->indextype == INDEX_PARENT)
7705 {
7706 amaip = (AMAlignIndex2Ptr)(sap->saip);
7707 if (amaip->alnstyle == AM2_LITE)
7708 return -1;
7709 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7710 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7711 }
7712 if (row > saip->numrows)
7713 return -1;
7714
7715 sect = binary_search_on_uint4_list(saip->aligncoords, pos, saip->numseg);
7716 offset = pos - saip->aligncoords[sect];
7717 if (saip->anchor > 0)
7718 {
7719 trans = saip->srdp[saip->anchor-1]->sect;
7720 sect = trans[sect];
7721 }
7722 srdp = saip->srdp[row-1];
7723 start = binary_search_on_uint2_list(srdp->sect, sect, srdp->numsect);
7724 if (start == -1)
7725 return -2; /* this row has a gap or insert at this alignment position */
7726 strand = AlnMgr2GetNthStrand(sap, row);
7727 if (strand != Seq_strand_minus)
7728 return (dsp->starts[sect*(dsp->dim)+row-1] + offset);
7729 else
7730 return (dsp->starts[sect*(dsp->dim)+row-1] + dsp->lens[sect] - 1 - offset);
7731 }
7732
7733 /* SECTION 6 */
7734 /***************************************************************************
7735 *
7736 * AlnMgr2MapRowToRow takes an indexed seqalign, a position in row1, the
7737 * 1-based number of row1, and a target row (row2), and maps the bioseq
7738 * coordinate in row 1 to the corresponding (aligned) bioseq coordinate in
7739 * row2. A return of -1 indicates an error while a return of -2 means that
7740 * the bioseq in row2 is gapped at the desired position.
7741 *
7742 ***************************************************************************/
AlnMgr2MapRowToRow(SeqAlignPtr sap,Int4 pos,Int4 row1,Int4 row2)7743 NLM_EXTERN Int4 AlnMgr2MapRowToRow(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2)
7744 {
7745 Int4 alnpos;
7746
7747 if (sap == NULL)
7748 return -1;
7749 alnpos = AlnMgr2MapBioseqToSeqAlign(sap, pos, row1);
7750 return (AlnMgr2MapSeqAlignToBioseq(sap, alnpos, row2));
7751 }
7752
7753 /***************************************************************************
7754 *
7755 * SECTION 7: Functions to change an alignment and retrieve parts of an
7756 * alignment
7757 *
7758 ***************************************************************************/
7759
7760 /***************************************************************************
7761 *
7762 * AlnMgr2TruncateSeqAlign truncates a given seqalign to contain only the
7763 * bioseq coordinates from start to stop on the indicated row. Anything
7764 * before those coordinates is discarded; anything remaining afterwards
7765 * is made into another seqalign and put in sap->next (the original next,
7766 * if any, is now at sap->next->next). Doesn't work on parent seqaligns.
7767 * The function returns TRUE if the orignal alignment extended past stop.
7768 *
7769 ***************************************************************************/
7770 /* SECTION 7 */
AlnMgr2TruncateSeqAlign(SeqAlignPtr sap,Int4 start,Int4 stop,Int4 row)7771 NLM_EXTERN Boolean AlnMgr2TruncateSeqAlign(SeqAlignPtr sap, Int4 start, Int4 stop, Int4 row)
7772 {
7773 DenseDiagPtr ddp;
7774 DenseDiagPtr ddp2;
7775 DenseSegPtr dsp;
7776 Int4 from;
7777 Int4 i;
7778 Int4 mstart;
7779 Int4 mstop;
7780 SeqAlignPtr sap1;
7781 SeqAlignPtr sap2;
7782 Int4 tmp;
7783 Int4 to;
7784
7785 if (sap == NULL || stop<start || row < 1)
7786 return FALSE;
7787 if (sap->segtype == SAS_DENSEG)
7788 {
7789 if (sap->saip == NULL)
7790 AlnMgr2IndexSingleChildSeqAlign(sap);
7791 AlnMgr2GetNthSeqRangeInSA(sap, row, &mstart, &mstop);
7792 if (mstart > start || mstop < stop)
7793 return FALSE;
7794 if (mstart == start)
7795 {
7796 if (mstop == stop)
7797 return FALSE;
7798 else if (mstop > stop)
7799 {
7800 from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
7801 to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
7802 if (to < from)
7803 {
7804 tmp = to;
7805 to = from;
7806 from = tmp;
7807 }
7808 sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7809 AlnMgr2IndexSingleChildSeqAlign(sap1);
7810 from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
7811 if (from < 0)
7812 return FALSE;
7813 to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
7814 if (to < from)
7815 {
7816 tmp = to;
7817 to = from;
7818 from = tmp;
7819 }
7820 sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7821 sap2->next = sap->next;
7822 sap->next = sap2;
7823 dsp = (DenseSegPtr)(sap->segs);
7824 sap->segs = (Pointer)(sap1->segs);
7825 sap1->segs = NULL;
7826 DenseSegFree(dsp);
7827 SeqAlignFree(sap1);
7828 AlnMgr2IndexSingleChildSeqAlign(sap);
7829 AlnMgr2IndexSingleChildSeqAlign(sap2);
7830 return TRUE;
7831 }
7832 } else if (mstart < start) /* throw away the first part */
7833 {
7834 from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
7835 to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
7836 if (to < from)
7837 {
7838 tmp = to;
7839 to = from;
7840 from = tmp;
7841 }
7842 sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7843 if (mstop == stop) /* done */
7844 {
7845 dsp = (DenseSegPtr)(sap->segs);
7846 sap->segs = (Pointer)(sap1->segs);
7847 sap1->segs = NULL;
7848 DenseSegFree(dsp);
7849 SeqAlignFree(sap1);
7850 AlnMgr2IndexSingleChildSeqAlign(sap);
7851 return FALSE;
7852 } else if (mstop > stop)
7853 {
7854 from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
7855 if (from < 0)
7856 return FALSE;
7857 to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
7858 if (to < from)
7859 {
7860 tmp = to;
7861 to = from;
7862 from = tmp;
7863 }
7864 sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7865 sap2->next = sap->next;
7866 sap->next = sap2;
7867 AlnMgr2IndexSingleChildSeqAlign(sap2);
7868 dsp = (DenseSegPtr)(sap->segs);
7869 sap->segs = (Pointer)(sap1->segs);
7870 sap1->segs = NULL;
7871 DenseSegFree(dsp);
7872 SeqAlignFree(sap1);
7873 AlnMgr2IndexSingleChildSeqAlign(sap);
7874 return TRUE;
7875 }
7876 }
7877 } else if (sap->segtype == SAS_DENDIAG)
7878 {
7879 ddp = (DenseDiagPtr)(sap->segs);
7880 if (ddp->dim < row)
7881 return FALSE;
7882 mstart = ddp->starts[row-1];
7883 mstop = mstart + ddp->len - 1;
7884 if (mstart > start || mstop < stop)
7885 return FALSE;
7886 if (mstart == start)
7887 {
7888 if (mstop == stop)
7889 return FALSE;
7890 else if (mstop > stop)
7891 {
7892 ddp2 = DenseDiagNew();
7893 ddp2->dim = ddp->dim;
7894 ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
7895 ddp2->id = SeqIdDupList(ddp->id);
7896 ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
7897 ddp2->scores = ScoreDup(ddp->scores);
7898 for (i=0; i<ddp->dim; i++)
7899 {
7900 ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
7901 ddp2->strands[i] = ddp->strands[i];
7902 }
7903 ddp2->len = mstop - stop;
7904 ddp->len = ddp->len - (mstop - stop);
7905 sap2 = SeqAlignNew();
7906 sap2->type = SAT_PARTIAL;
7907 sap2->segtype = SAS_DENSEG;
7908 sap2->segs = (Pointer)ddp2;
7909 sap2->next = sap->next;
7910 sap->next = sap2;
7911 AlnMgr2IndexSingleChildSeqAlign(sap2);
7912 return TRUE;
7913 }
7914 } else if (mstart < start)
7915 {
7916 for (i=0; i<ddp->dim; i++)
7917 {
7918 ddp->starts[i] = ddp->starts[i] + start - mstart;
7919 }
7920 ddp->len = ddp->len - (start - mstart);
7921 AlnMgr2IndexSingleChildSeqAlign(sap);
7922 if (mstop == stop)
7923 return FALSE;
7924 else if (mstop > stop)
7925 {
7926 ddp2 = DenseDiagNew();
7927 ddp2->dim = ddp->dim;
7928 ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
7929 ddp2->id = SeqIdDupList(ddp->id);
7930 ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
7931 ddp2->scores = ScoreDup(ddp->scores);
7932 for (i=0; i<ddp->dim; i++)
7933 {
7934 ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
7935 ddp2->strands[i] = ddp->strands[i];
7936 }
7937 ddp2->len = mstop - stop;
7938 ddp->len = ddp->len - (mstop - stop);
7939 sap2 = SeqAlignNew();
7940 sap2->type = SAT_PARTIAL;
7941 sap2->segtype = SAS_DENSEG;
7942 sap2->segs = (Pointer)ddp2;
7943 sap2->next = sap->next;
7944 sap->next = sap2;
7945 AlnMgr2IndexSingleChildSeqAlign(sap2);
7946 return TRUE;
7947 }
7948 }
7949 } else
7950 return FALSE;
7951 return FALSE;
7952 }
7953
7954 /* SECTION 7 */
7955 /***************************************************************************
7956 *
7957 * AlnMgr2GetSubAlign retrieves a portion of an indexed alignment, from
7958 * 'from' to 'to' in the row coordinates specified, or if which_row is 0,
7959 * 'from' and 'to' are assumed to be alignment coordinates. If 'to' is -1,
7960 * the subalignment will go to the end of the specified row (or to the end
7961 * of the whole alignment). If the alignment is discontinuous and fill_in
7962 * is FALSE, the alignment will be returned as an SAS_DISC set, each piece
7963 * represented by a single alignment. If the alignment is discontinuous and
7964 * fill_in is TRUE, the unaligned regions will be added in to the alignment,
7965 * with all gaps in all other rows. If the alignment is continuous, it
7966 * doesn't matter whether fill_in is TRUE or FALSE. (SUBALIGN)
7967 *
7968 ***************************************************************************/
AlnMgr2GetSubAlign(SeqAlignPtr sap,Int4 from,Int4 to,Int4 which_row,Boolean fill_in)7969 NLM_EXTERN SeqAlignPtr AlnMgr2GetSubAlign(SeqAlignPtr sap, Int4 from, Int4 to, Int4 which_row, Boolean fill_in)
7970 {
7971 Int4 a;
7972 AMAlignIndex2Ptr amaip;
7973 AlnMsg2Ptr amp;
7974 Boolean anchored;
7975 Int4 currlen;
7976 DenseSegPtr dsp;
7977 DenseSegPtr dsp_new;
7978 Int4 from_aln;
7979 Int4 from_seq;
7980 Int4 i;
7981 SeqIdPtr id;
7982 Int4 j;
7983 Int4 k;
7984 Int4 len;
7985 Int4 lengthbit;
7986 Int4 minlen;
7987 Boolean more;
7988 Int4 n;
7989 Int4 numseg;
7990 Int4 numunaln;
7991 AMRowInfoPtr row;
7992 AMRowInfoPtr row_head;
7993 AMRowInfoPtr row_prev;
7994 AMRowInfoPtr PNTR rowheads;
7995 AMRowInfoPtr PNTR rows;
7996 SeqAlignPtr salp;
7997 SeqAlignPtr salp_head;
7998 SeqAlignPtr salp_prev;
7999 SeqAlignPtr sap_real;
8000 Int4 seg;
8001 Int4 start_seg;
8002 Uint1 strand;
8003 SeqAlignPtr subsalp;
8004 Int4 tmp;
8005 Int4 to_aln;
8006 Int4 to_seq;
8007 Int4 ustart;
8008 Int4 ustop;
8009
8010 if (sap == NULL || sap->saip == NULL)
8011 return NULL;
8012 len = AlnMgr2GetAlnLength(sap, FALSE);
8013 if (which_row == 0 && (to > len-1 || from < 0))
8014 return NULL;
8015 n = AlnMgr2GetNumRows(sap);
8016 if (which_row < 0 || which_row > n)
8017 return NULL;
8018 if (to == -1)
8019 {
8020 if (which_row == 0)
8021 to = len-1;
8022 else
8023 AlnMgr2GetNthSeqRangeInSA(sap, which_row, NULL, &to);
8024 }
8025 if (sap->saip->indextype == INDEX_CHILD)
8026 sap_real = sap;
8027 else if (sap->saip->indextype == INDEX_PARENT)
8028 {
8029 amaip = (AMAlignIndex2Ptr)(sap->saip);
8030 if (amaip->alnstyle == AM2_LITE)
8031 return NULL;
8032 sap_real = amaip->sharedaln;
8033 if (from == 0 && to == len-1 && !AlnMgr2IsSAPDiscAli(sap_real)) /* need whole aln -- take a shortcut! */
8034 return SeqAlignDup(sap_real);
8035 }
8036 if ((a = AlnMgr2FindAnchor(sap_real)) > 0)
8037 {
8038 anchored = TRUE;
8039 salp = SeqAlignDup(sap_real);
8040 AlnMgr2IndexSingleChildSeqAlign(salp);
8041 if (which_row == 0) /* anchor coordinates */
8042 {
8043 AlnMgr2GetNthSeqRangeInSA(salp, a, &from_seq, &to_seq);
8044 from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from_seq, a);
8045 to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to_seq, a);
8046 if (from_aln > to_aln)
8047 {
8048 tmp = from_aln;
8049 from_aln = to_aln;
8050 to_aln = tmp;
8051 }
8052 } else
8053 {
8054 from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from, which_row);
8055 to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to, which_row);
8056 if (from_aln > to_aln)
8057 {
8058 tmp = from_aln;
8059 from_aln = to_aln;
8060 to_aln = tmp;
8061 }
8062 }
8063 } else
8064 {
8065 anchored = FALSE;
8066 salp = sap_real;
8067 if (which_row == 0) /* alignment coordinates */
8068 {
8069 from_aln = from;
8070 to_aln = to;
8071 } else
8072 {
8073 from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from, which_row);
8074 to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to, which_row);
8075 if (from_aln > to_aln)
8076 {
8077 tmp = from_aln;
8078 from_aln = to_aln;
8079 to_aln = tmp;
8080 }
8081 }
8082 }
8083 rows = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
8084 amp = AlnMsgNew2();
8085 seg = lengthbit = 0;
8086 currlen = 0;
8087 numunaln = 0;
8088 salp_head = salp_prev = NULL;
8089 while (AlnMgr2GetNextLengthBit(sap, &lengthbit, &seg))
8090 {
8091 if (currlen <= to_aln && seg >= 0 && currlen+lengthbit-1 >= from_aln)
8092 {
8093 numseg = AlnMgr2GetNumSegsInRange(sap, currlen, currlen+lengthbit-1, &start_seg);
8094 numunaln = 0;
8095 for (i=0; i<n; i++)
8096 {
8097 row_head = NULL;
8098 for (j=start_seg; j<numseg+start_seg; j++)
8099 {
8100 AlnMsgReNew2(amp);
8101 AlnMgr2GetNthSegmentRange(sap, j+1, &->from_aln, &->to_aln);
8102 amp->from_aln = MAX(amp->from_aln, from_aln);
8103 amp->to_aln = MIN(amp->to_aln, to_aln);
8104 amp->row_num = i+1;
8105 while ((more = AlnMgr2GetNextAlnBit(salp, amp)) == TRUE)
8106 {
8107 if (amp->right_interrupt != NULL && amp->right_interrupt->unalnlen > 0)
8108 numunaln++;
8109 row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8110 if (amp->type == AM_GAP)
8111 row->from = -1;
8112 else
8113 row->from = amp->from_row;
8114 row->len = amp->to_row - amp->from_row + 1;
8115 if (row_head != NULL)
8116 {
8117 row_prev->next = row;
8118 row_prev = row;
8119 } else
8120 row_head = row_prev = row;
8121 }
8122 }
8123 rows[i] = row_head;
8124 }
8125 }
8126 rowheads = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
8127 for (i=0; i<n; i++)
8128 {
8129 rowheads[i] = rows[i];
8130 }
8131 while (rows[0] != NULL)
8132 {
8133 minlen = -1;
8134 for (i=0; i<n; i++)
8135 {
8136 if (rows[i]->len < minlen || minlen == -1)
8137 minlen = rows[i]->len;
8138 }
8139 for (i=0; i<n; i++)
8140 {
8141 if (rows[i]->len > minlen)
8142 {
8143 row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8144 row->next = rows[i]->next;
8145 rows[i]->next = row;
8146 if (rows[i]->from == -1)
8147 row->from = -1;
8148 else if (AlnMgr2GetNthStrand(salp, i) == Seq_strand_minus)
8149 {
8150 row->from = rows[i]->from;
8151 rows[i]->from = rows[i]->from + rows[i]->len - 1 - minlen;
8152 } else
8153 row->from = rows[i]->from + minlen;
8154 row->len = rows[i]->len - minlen;
8155 rows[i]->len = minlen;
8156 }
8157 rows[i] = rows[i]->next;
8158 }
8159 }
8160 for (i=0; i<n; i++)
8161 {
8162 rows[i] = rowheads[i];
8163 }
8164 MemFree(rowheads);
8165 dsp = DenseSegNew();
8166 row = rows[0];
8167 while (row != NULL)
8168 {
8169 dsp->numseg++;
8170 row = row->next;
8171 }
8172 if (fill_in)
8173 dsp->numseg += numunaln;
8174 dsp->dim = n;
8175 dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8176 dsp->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8177 dsp->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8178 j = 0;
8179 row = rows[0];
8180 while (row != NULL)
8181 {
8182 dsp->lens[j] = row->len;
8183 j++;
8184 row = row->next;
8185 }
8186 id = AlnMgr2GetNthSeqIdPtr(salp, 0);
8187 dsp->ids = id;
8188 for (i=0; i<n; i++)
8189 {
8190 if (i > 0)
8191 {
8192 id->next = AlnMgr2GetNthSeqIdPtr(salp, i+1);
8193 id = id->next;
8194 }
8195 row = rows[i];
8196 j = 0;
8197 strand = AlnMgr2GetNthStrand(salp, i+1);
8198 while (row != NULL)
8199 {
8200 dsp->starts[n*j + i] = row->from;
8201 dsp->strands[n*j + i] = strand;
8202 j++;
8203 row = row->next;
8204 }
8205 }
8206 if (fill_in)
8207 {
8208 for (i=0; i<n; i++)
8209 {
8210 AlnMgr2GetNthUnalignedForNthRow(sap, seg+1, i+1, &ustart, &ustop);
8211 if (ustart >= 0 && ustop >= ustart)
8212 {
8213 for (k=0; k<n; k++)
8214 {
8215 dsp->starts[n*j + k] = -1;
8216 dsp->strands[n*j + k] = dsp->strands[i];
8217 }
8218 dsp->starts[n*j + i] = ustart;
8219 j++;
8220 }
8221 }
8222 }
8223 subsalp = SeqAlignNew();
8224 subsalp->type = SAT_PARTIAL;
8225 subsalp->segtype = SAS_DENSEG;
8226 subsalp->dim = n;
8227 subsalp->segs = (Pointer)(dsp);
8228 for (i=0; i<n; i++)
8229 {
8230 row = rows[i];
8231 while (row != NULL)
8232 {
8233 row_prev = row->next;
8234 MemFree(row);
8235 row = row_prev;
8236 }
8237 }
8238 if (seg < 0)
8239 seg = -seg;
8240 currlen += lengthbit;
8241 seg++;
8242 if (salp_head != NULL)
8243 {
8244 salp_prev->next = subsalp;
8245 salp_prev = subsalp;
8246 } else
8247 salp_head = salp_prev = subsalp;
8248 }
8249 MemFree(rows);
8250 AlnMsgFree2(amp);
8251 if (fill_in && salp_head->next != NULL) /* stick subsalps together into a big aln */
8252 {
8253 j = 0;
8254 subsalp = salp_head;
8255 while (subsalp != NULL)
8256 {
8257 dsp = (DenseSegPtr)(subsalp->segs);
8258 j += dsp->numseg;
8259 subsalp = subsalp->next;
8260 }
8261 dsp_new = DenseSegNew();
8262 dsp_new->dim = n;
8263 dsp_new->numseg = j;
8264 dsp_new->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8265 dsp_new->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8266 dsp_new->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8267 subsalp = salp_head;
8268 k = 0;
8269 while (subsalp != NULL)
8270 {
8271 dsp = (DenseSegPtr)(subsalp->segs);
8272 for (j=0; j<dsp->numseg; j++)
8273 {
8274 dsp_new->lens[k] = dsp->lens[j];
8275 for (i=0; i<n; i++)
8276 {
8277 dsp_new->starts[k*n+i] = dsp->starts[j*n+i];
8278 dsp_new->strands[k*n+i] = dsp->strands[j*n+i];
8279 }
8280 k++;
8281 }
8282 subsalp = subsalp->next;
8283 }
8284 subsalp = SeqAlignNew();
8285 subsalp->type = SAT_PARTIAL;
8286 subsalp->segtype = SAS_DENSEG;
8287 subsalp->dim = n;
8288 subsalp->segs = (Pointer)(dsp_new);
8289 SeqAlignSetFree(salp_head);
8290 } else if (!fill_in && salp_head->next != NULL)
8291 {
8292 subsalp = SeqAlignNew();
8293 subsalp->segtype = SAS_DISC;
8294 subsalp->type = SAT_PARTIAL;
8295 subsalp->segs = (SeqAlignPtr)(salp_head);
8296 salp_prev = salp_head;
8297 while (salp_prev != NULL)
8298 {
8299 AMAlignIndexFreeEitherIndex(salp_prev);
8300 salp_prev = salp_prev->next;
8301 }
8302 } else /* if !salp_head->next */
8303 {
8304 subsalp = salp_head;
8305 subsalp->dim = AlnMgr2GetNumRows(subsalp);
8306 subsalp->type = SAT_PARTIAL;
8307 AMAlignIndexFreeEitherIndex(subsalp);
8308 }
8309 if (anchored)
8310 SeqAlignFree(salp);
8311 return subsalp;
8312 }
8313
8314 /***************************************************************************
8315 *
8316 * SECTION 8: Miscellaneous functions to compute useful information
8317 * about an alignment
8318 *
8319 ***************************************************************************/
8320 /* SECTION 8 */
8321 /***************************************************************************
8322 *
8323 * AlnMgr2ComputeScoreForSeqAlign computes an ad hoc numerical score for
8324 * an indexed alignment by computing a similarity score for the whole
8325 * alignment (residue pair by residue pair score, from a matrix for proteins
8326 * and identity for nucleotides) and then subtracting gap open and gap
8327 * extension penalties.
8328 *
8329 ***************************************************************************/
AlnMgr2ComputeScoreForSeqAlign(SeqAlignPtr sap)8330 NLM_EXTERN Int4 AlnMgr2ComputeScoreForSeqAlign(SeqAlignPtr sap)
8331 {
8332 AMFreqPtr afp;
8333 DenseSegPtr dsp;
8334 Int4 gaplen;
8335 Int4 i;
8336 Boolean is_prot;
8337 Int4 j;
8338 Int4 len;
8339 Int4 mismatch;
8340 Int4 numgaps;
8341 Int4 numseqs;
8342 Boolean open;
8343 Int4 res1;
8344 Int4 res2;
8345 Int4 score;
8346 Int4 seqscore;
8347
8348 if (sap->segtype == SAS_DISC)
8349 return -1;
8350 if (sap->saip == NULL)
8351 AlnMgr2IndexSingleChildSeqAlign(sap);
8352 is_prot = AlnMgr2IsItProtein(sap);
8353 len = AlnMgr2GetAlnLength(sap, FALSE);
8354 dsp = (DenseSegPtr)(sap->segs);
8355 numseqs = dsp->dim;
8356 open = FALSE;
8357 gaplen = 0;
8358 numgaps = 0;
8359 for (i=0; i<dsp->dim; i++)
8360 {
8361 for (j=0; j<dsp->numseg; j++)
8362 {
8363 if (dsp->starts[(dsp->dim)*j+i] == -1)
8364 {
8365 if (!open)
8366 {
8367 gaplen += dsp->lens[j];
8368 numgaps++;
8369 open = TRUE;
8370 } else
8371 gaplen += dsp->lens[j];
8372 } else
8373 open = FALSE;
8374 }
8375 }
8376 mismatch = 0;
8377 seqscore = 0;
8378 afp = AlnMgr2ComputeFreqMatrix(sap, 0, -1, 0);
8379 if (afp == NULL)
8380 return -1;
8381 for (i=0; i<afp->len; i++)
8382 {
8383 res1 = -1;
8384 res2 = -1;
8385 for (j=0; j<afp->size; j++)
8386 {
8387 if (afp->freq[j][i] == 1)
8388 {
8389 if (res1 == -1)
8390 res1 = j;
8391 else
8392 res2 = j;
8393 } else if (afp->freq[j][i] == 2)
8394 res1 = res2 = j;
8395 }
8396 if (res1 > 0 && res2 > 0) /* don't penalize gaps */
8397 seqscore += AlnMgr2GetScoreForPair(res1, res2, is_prot);
8398 }
8399 AMFreqFree(afp);
8400 score = seqscore + numgaps*AM_GAPOPEN + gaplen*AM_GAPEXT;
8401 return score;
8402 }
8403
AlnMgr2SeqPortRead(SeqPortPtr PNTR spp,Uint1Ptr buf,Int4Ptr bufpos,Int4 start,Int4 stop,Uint1 strand,Uint1 code,BioseqPtr bsp)8404 static Int4 AlnMgr2SeqPortRead(SeqPortPtr PNTR spp, Uint1Ptr buf, Int4Ptr bufpos, Int4 start, Int4 stop, Uint1 strand, Uint1 code, BioseqPtr bsp)
8405 {
8406 if (*spp == NULL) /* first call */ {
8407 if (strand == Seq_strand_minus){
8408 *spp = SeqPortNew(bsp, MAX(0, stop-AM_SEQPORTSIZE), stop, strand, code);
8409 *bufpos = MAX(0, stop-AM_SEQPORTSIZE);
8410 }
8411 else {
8412 *spp = SeqPortNew(bsp, start, MIN(start+AM_SEQPORTSIZE, bsp->length-1), strand, code);
8413 *bufpos = start;
8414 }
8415 }
8416 /* see if what we need is in current seqport or a new one is needed */
8417 else if ((start < *bufpos) || (start > *bufpos+AM_SEQPORTSIZE)
8418 || (stop < *bufpos) || (stop > *bufpos+AM_SEQPORTSIZE)) {
8419 SeqPortFree(*spp);
8420 if (strand == Seq_strand_minus) {
8421 *spp = SeqPortNew(bsp, MAX(0, stop-AM_SEQPORTSIZE), stop, strand, code);
8422 *bufpos = MAX(0, stop-AM_SEQPORTSIZE);
8423 }
8424 else {
8425 *spp = SeqPortNew(bsp, start, MIN(start+AM_SEQPORTSIZE, bsp->length-1), strand, code);
8426 *bufpos = start;
8427 }
8428 }
8429 return (SeqPortRead(*spp, buf, (MIN(start+AM_SEQPORTSIZE-1, stop)) - start+1));
8430 }
8431
8432 /* SECTION 8 */
8433 /***************************************************************************
8434 *
8435 * AlnMgr2ComputeFreqMatrix takes an indexed seqalign and returns a matrix
8436 * indicating nucleotide or amino acid frequency at each position of the
8437 * alignment. The matrix can be made over only a part of the alignment, if
8438 * from and to are nonzero, and if row is nonzero, from and to are taken
8439 * to be bioseq coordinates from that row (if row == 0 from and to are
8440 * assumed to be alignment coordinates).
8441 *
8442 ***************************************************************************/
AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap,Int4 from,Int4 to,Int4 row)8443 NLM_EXTERN AMFreqPtr AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row)
8444 {
8445 AMFreqPtr afp;
8446 AlnMsg2Ptr amp;
8447 BioseqPtr bsp;
8448 Uint1 buf[AM_SEQPORTSIZE];
8449 Int4 bufpos;
8450 Uint1 code;
8451 Int4 counter;
8452 Int4 ctr;
8453 Int4 from_a;
8454 Int4 i;
8455 Boolean isna;
8456 Int4 j;
8457 Int4 l;
8458 Int4 len;
8459 Boolean more;
8460 Int4 n;
8461 Int4 numrows;
8462 Uint1 res;
8463 SeqIdPtr sip;
8464 SeqPortPtr spp;
8465 Int4 tmp;
8466 Int4 to_a;
8467
8468 if (sap == NULL || sap->saip == NULL || (from > to && to != -1))
8469 return NULL;
8470 numrows = AlnMgr2GetNumRows(sap);
8471 bufpos = -1;
8472 if (row > numrows || row < 0)
8473 return NULL;
8474 len = AlnMgr2GetAlnLength(sap, FALSE);
8475 if (to >= len)
8476 return NULL;
8477 if (to == -1)
8478 to = len-1;
8479 sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
8480 bsp = BioseqLockById(sip);
8481 if (bsp != NULL)
8482 isna = ISA_na(bsp->mol);
8483 else
8484 {
8485 SeqIdFree(sip);
8486 return NULL;
8487 }
8488 BioseqUnlock(bsp);
8489 SeqIdFree(sip);
8490 if (isna)
8491 code = Seq_code_ncbi4na;
8492 else
8493 code = Seq_code_ncbistdaa;
8494 afp = (AMFreqPtr)MemNew(sizeof(AMFreq));
8495 afp->len = len;
8496 if (isna)
8497 afp->size = AM_NUCSIZE;
8498 else
8499 afp->size = AM_PROTSIZE;
8500 afp->freq = (Int4Ptr PNTR)MemNew((afp->size)*sizeof(Int4Ptr));
8501 for (i=0; i<afp->size; i++)
8502 {
8503 afp->freq[i] = (Int4Ptr)MemNew((afp->len)*sizeof(Int4));
8504 }
8505 amp = AlnMsgNew2();
8506 if (row != 0)
8507 {
8508 from_a = AlnMgr2MapBioseqToSeqAlign(sap, from, row);
8509 to_a = AlnMgr2MapBioseqToSeqAlign(sap, to, row);
8510 if (from_a > to_a)
8511 {
8512 tmp = to_a;
8513 to_a = from_a;
8514 from_a = tmp;
8515 }
8516 } else
8517 {
8518 from_a = from;
8519 to_a = to;
8520 }
8521 for (i=0; i<numrows; i++)
8522 {
8523 spp = NULL;
8524 AlnMsgReNew2(amp);
8525 amp->from_aln = from_a;
8526 amp->to_aln = to_a;
8527 amp->row_num = i+1;
8528 j = 0;
8529 while ((more = AlnMgr2GetNextAlnBit(sap, amp)))
8530 {
8531 if (amp->type == AM_GAP)
8532 {
8533 for (n=0; n<(amp->to_row - amp->from_row+1); n++)
8534 {
8535 afp->freq[0][j] = afp->freq[0][j]+1;
8536 j++;
8537 }
8538 } else if (amp->type == AM_SEQ)
8539 {
8540 sip = AlnMgr2GetNthSeqIdPtr(sap, i+1);
8541 bsp = BioseqLockById(sip);
8542 if (bsp != NULL) {
8543 for (l=amp->from_row; l<=amp->to_row; l+=AM_SEQPORTSIZE)
8544 {
8545 counter = AlnMgr2SeqPortRead(&spp, buf, &bufpos, l, MIN(l+AM_SEQPORTSIZE, amp->to_row), amp->strand, code, bsp);
8546 ctr = 0;
8547 while (ctr < counter)
8548 {
8549 res = buf[ctr];
8550 if (isna)
8551 {
8552 if (res == 1 || res == 2)
8553 afp->freq[res][j]++;
8554 else if (res == 4)
8555 afp->freq[3][j]++;
8556 else if (res == 8)
8557 afp->freq[4][j]++;
8558 else
8559 afp->freq[5][j]++;
8560 } else
8561 afp->freq[res][j]++;
8562 j++;
8563 ctr++;
8564 }
8565 }
8566 BioseqUnlock(bsp);
8567 }
8568 SeqIdFree(sip);
8569 }
8570 }
8571 SeqPortFree(spp);
8572 }
8573 AlnMsgFree2(amp);
8574 return afp;
8575 }
8576
8577 /* SECTION 8 */
8578 /***************************************************************************
8579 *
8580 * AlnMgr2GetScoreForPair assigns scores to nucleotide and protein residue
8581 * pairs. Nucleotide pairs are scored according to a standard mismatch
8582 * penalty, and amino acid pairs are scored according to the BLOSUM62
8583 * matrix below. This matrix has been rearranged so that the rows and
8584 * columns appear in alphabetical order, so that it directly correlates
8585 * to the NCBIstdaa alphabet (with a minus-one difference).
8586 *
8587 ***************************************************************************/
AlnMgr2GetScoreForPair(Int4 res1,Int4 res2,Boolean is_prot)8588 static Int4 AlnMgr2GetScoreForPair(Int4 res1, Int4 res2, Boolean is_prot)
8589 {
8590 Int4 matrix[24][24] = {
8591 {4, -2, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, 0, -2, -1, -4},
8592 {-2, 4, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -2, 0, -1, 0, -1, -3, -4, -1, -3, 1, -4},
8593 {0, -3, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2, -2, -3, -4},
8594 {-2, 4, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -1, -3, 1, -4},
8595 {-1, 1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -1, -2, 4, -4},
8596 {-2, -3, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, -1, 3, -3, -4},
8597 {0, -1, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -1, -3, -2, -4},
8598 {-2, 0, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, -1, 2, 0, -4},
8599 {-1, -3, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1, -1, -3, -4},
8600 {-1, 0, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -1, -2, 1, -4},
8601 {-1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1, -1, -3, -4},
8602 {-1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1, -1, -1, -4},
8603 {-2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -1, -2, 0, -4},
8604 {-1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -2, -3, -1, -4},
8605 {-1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1, -1, 3, -4},
8606 {-1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -1, -2, 0, -4},
8607 {1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, 0, -2, 0, -4},
8608 {0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, 0, -2, -1, -4},
8609 {0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1, -1, -2, -4},
8610 {-3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, -2, 2, -3, -4},
8611 {0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1, -4},
8612 {-2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, -2, -4},
8613 {-1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4, -4},
8614 {-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1}};
8615
8616 if (is_prot) /* protein->use BLOSUM62 matrix */
8617 return matrix[res1-1][res2-1];
8618 else /* nucleotide->use match score/mismatch penalty */
8619 {
8620 if (res1 == 0 || res2 == 0) /* don't count gaps */
8621 return 0;
8622 if (res1 == res2)
8623 return 1;
8624 else
8625 return -3;
8626 }
8627 }
8628
8629 /* SECTION 8 */
8630 /***************************************************************************
8631 *
8632 * AlnMgr2IsItProtein takes an indexed alignment and quickly decides if
8633 * it's a protein or nucleotide alignment, returning TRUE for protein.
8634 *
8635 ***************************************************************************/
AlnMgr2IsItProtein(SeqAlignPtr sap)8636 NLM_EXTERN Boolean AlnMgr2IsItProtein(SeqAlignPtr sap)
8637 {
8638 BioseqPtr bsp;
8639 Boolean is_na;
8640 SeqIdPtr sip;
8641
8642 if (sap == NULL || sap->saip == NULL)
8643 return FALSE;
8644 sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
8645 bsp = BioseqLockById(sip);
8646 if (bsp == NULL)
8647 return FALSE;
8648 is_na = ISA_na(bsp->mol);
8649 SeqIdFree(sip);
8650 BioseqUnlock(bsp);
8651 return (!is_na);
8652 }
8653
8654 /***************************************************************************
8655 *
8656 * SECTION 9: Sorting functions and other algorithms to help order
8657 * alignments for various purposes
8658 *
8659 ***************************************************************************/
8660
8661 /* SECTION 9 */
AMCompareStarts(VoidPtr ptr1,VoidPtr ptr2)8662 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2)
8663 {
8664 AMBitty2Ptr bit1;
8665 AMBitty2Ptr bit2;
8666
8667 if (ptr1 != NULL && ptr2 != NULL)
8668 {
8669 bit1 = (AMBitty2Ptr)ptr1;
8670 bit2 = (AMBitty2Ptr)ptr2;
8671 if (bit1->num2 < bit2->num2)
8672 return -1;
8673 else if (bit1->num2 > bit2->num2)
8674 return 1;
8675 else if (bit1->num3 > bit2->num3) /* compare aln lengths */
8676 return -1;
8677 else if (bit1->num3 < bit2->num3)
8678 return 1;
8679 else
8680 return 0;
8681 }
8682 return 0;
8683 }
8684
8685 /* SECTION 9 */
8686 /***************************************************************************
8687 *
8688 * AlnMgr2SortAlnSetByNthRowPos takes an indexed parent alignment and sorts
8689 * all the child alignments along the row indicated. If the indicated row
8690 * is aligned on the plus strand, the alignments are sorted from smaller
8691 * to larger coordinates along that row; otherwise they are sorted in
8692 * reverse order.
8693 *
8694 ***************************************************************************/
AlnMgr2SortAlnSetByNthRowPos(SeqAlignPtr sap,Int4 row)8695 NLM_EXTERN void AlnMgr2SortAlnSetByNthRowPos(SeqAlignPtr sap, Int4 row)
8696 {
8697 AMAlignIndex2Ptr amaip;
8698 AMBitty2Ptr bit;
8699 Int4 i;
8700 SeqAlignPtr PNTR saparray;
8701 Uint1 strand;
8702
8703 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
8704 return;
8705 amaip = (AMAlignIndex2Ptr)(sap->saip);
8706 bit = (AMBitty2Ptr)MemNew((amaip->numsaps)*sizeof(AMBitty2));
8707 saparray = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
8708 for (i=0; i<amaip->numsaps; i++)
8709 {
8710 bit[i].num1 = i;
8711 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], row, &bit[i].num2, NULL);
8712 bit[i].num3 = AlnMgr2GetAlnLength(amaip->saps[i], FALSE);
8713 strand = AlnMgr2GetNthStrand(amaip->saps[i], row);
8714 if (strand == Seq_strand_minus)
8715 bit[i].num2 = -bit[i].num2;
8716 saparray[i] = amaip->saps[i];
8717 }
8718 HeapSort(bit, amaip->numsaps, sizeof(AMBitty2), AMCompareStarts);
8719 for (i=0; i<amaip->numsaps; i++)
8720 {
8721 amaip->saps[i] = saparray[bit[i].num1];
8722 }
8723 MemFree(saparray);
8724 MemFree(bit);
8725 if (amaip->alnstyle != AM2_LITE)
8726 AlnMgr2ReIndexSeqAlign(sap);
8727 }
8728
8729
8730 /***************************************************************************
8731 *
8732 * SECTION 10: Basic alignment operations
8733 *
8734 ***************************************************************************/
8735
8736 /***************************************************************************
8737 *
8738 * AlnMgr2MergeTwoAlignments takes two alignments, with identical rows in
8739 * the same order (otherwise it rejects the alignments), and merges them
8740 * into a single alignment. If there is unaligned space between the two
8741 * alignments and this space is the same length for every row, the function
8742 * aligns those sequences; it rejects alignments when the unaligned spaces
8743 * are different sizes. The function returns a newly allocated alignment.
8744 *
8745 ***************************************************************************/
AlnMgr2MergeTwoAlignments(SeqAlignPtr sap1_orig,SeqAlignPtr sap2_orig)8746 NLM_EXTERN SeqAlignPtr AlnMgr2MergeTwoAlignments(SeqAlignPtr sap1_orig, SeqAlignPtr sap2_orig)
8747 {
8748 Int4 c;
8749 DenseSegPtr dsp;
8750 DenseSegPtr dsp1;
8751 DenseSegPtr dsp2;
8752 DenseSegPtr dsp_new;
8753 Int4 i;
8754 Int4 j;
8755 Int4 n1;
8756 Int4 n2;
8757 SeqAlignPtr sap1;
8758 SeqAlignPtr sap2;
8759 SeqAlignPtr sap_new;
8760 SeqIdPtr sip1;
8761 SeqIdPtr sip2;
8762 Int4 start1;
8763 Int4 start2;
8764 Int4 stop1;
8765 Int4 stop2;
8766 Uint1 strand1;
8767 Uint1 strand2;
8768 SeqAlignPtr tmp;
8769
8770 if (sap1_orig == NULL || sap2_orig == NULL)
8771 return NULL;
8772 if (sap1_orig->next != NULL)
8773 {
8774 AlnMgr2IndexSeqAlign(sap1_orig);
8775 sap1 = AlnMgr2GetSubAlign(sap1_orig, 0, -1, 0, TRUE);
8776 } else
8777 sap1 = SeqAlignDup(sap1_orig);
8778 if (sap2_orig->next != NULL)
8779 {
8780 AlnMgr2IndexSeqAlign(sap2_orig);
8781 sap2 = AlnMgr2GetSubAlign(sap2_orig, 0, -1, 0, TRUE);
8782 } else
8783 sap2 = SeqAlignDup(sap2_orig);
8784 AlnMgr2IndexSingleChildSeqAlign(sap1);
8785 AlnMgr2IndexSingleChildSeqAlign(sap2);
8786 n1 = AlnMgr2GetNumRows(sap1);
8787 n2 = AlnMgr2GetNumRows(sap2);
8788 if (n1 != n2)
8789 {
8790 SeqAlignFree(sap1);
8791 SeqAlignFree(sap2);
8792 return NULL;
8793 }
8794 /* put the alignments in order by the first row */
8795 AlnMgr2GetNthSeqRangeInSA(sap1, 1, &start1, &stop1);
8796 AlnMgr2GetNthSeqRangeInSA(sap2, 1, &start2, &stop2);
8797 strand1 = AlnMgr2GetNthStrand(sap1, 1);
8798 if (strand1 == Seq_strand_minus)
8799 {
8800 if (stop2 > start1)
8801 {
8802 tmp = sap1;
8803 sap1 = sap2;
8804 sap2 = tmp;
8805 }
8806 } else
8807 {
8808 if (stop1 > start2)
8809 {
8810 tmp = sap1;
8811 sap1 = sap2;
8812 sap2 = tmp;
8813 }
8814 }
8815 dsp1 = (DenseSegPtr)(sap1->segs);
8816 dsp2 = (DenseSegPtr)(sap2->segs);
8817 sip1 = dsp1->ids;
8818 sip2 = dsp2->ids;
8819 while (sip1 != NULL && sip2 != NULL)
8820 {
8821 if (SeqIdComp(sip1, sip2) != SIC_YES)
8822 {
8823 SeqAlignFree(sap1);
8824 SeqAlignFree(sap2);
8825 return NULL;
8826 }
8827 sip1 = sip1->next;
8828 sip2 = sip2->next;
8829 }
8830 dsp = DenseSegNew();
8831 dsp->dim = n1;
8832 dsp->numseg = 1;
8833 dsp->starts = (Int4Ptr)MemNew(n1*sizeof(Int4));
8834 dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
8835 dsp->strands = (Uint1Ptr)MemNew(n1*sizeof(Int4));
8836 for (i=0; i<n1; i++)
8837 {
8838 strand1 = AlnMgr2GetNthStrand(sap1, i+1);
8839 strand2 = AlnMgr2GetNthStrand(sap2, i+1);
8840 if (strand1 != strand2)
8841 {
8842 DenseSegFree(dsp);
8843 SeqAlignFree(sap1);
8844 SeqAlignFree(sap2);
8845 return NULL;
8846 }
8847 AlnMgr2GetNthSeqRangeInSA(sap1, i+1, &start1, &stop1);
8848 AlnMgr2GetNthSeqRangeInSA(sap2, i+1, &start2, &stop2);
8849 if (strand1 == Seq_strand_minus)
8850 {
8851 dsp->starts[i] = stop2 + 1;
8852 if (i == 0)
8853 dsp->lens[0] = start2 - (stop2 + 1);
8854 else
8855 {
8856 if (start2 - (stop2 + 1) != dsp->lens[0])
8857 {
8858 DenseSegFree(dsp);
8859 SeqAlignFree(sap1);
8860 SeqAlignFree(sap2);
8861 return NULL;
8862 }
8863 }
8864 } else
8865 {
8866 dsp->starts[i] = stop1 + 1;
8867 if (i == 0)
8868 dsp->lens[0] = start2 - (stop1 + 1);
8869 else
8870 {
8871 if (start2 - (stop1 + 1) != dsp->lens[0])
8872 {
8873 DenseSegFree(dsp);
8874 SeqAlignFree(sap1);
8875 SeqAlignFree(sap2);
8876 return NULL;
8877 }
8878 }
8879 }
8880 dsp->strands[i] = strand1;
8881 }
8882 if (dsp->lens[0] == 0)
8883 {
8884 DenseSegFree(dsp);
8885 dsp = NULL;
8886 }
8887 dsp_new = DenseSegNew();
8888 dsp_new->numseg = dsp1->numseg + dsp2->numseg;
8889 if (dsp != NULL)
8890 dsp_new->numseg++;
8891 dsp_new->dim = n1;
8892 dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
8893 dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
8894 dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
8895 for (i=0; i<dsp1->numseg; i++)
8896 {
8897 for (j=0; j<n1; j++)
8898 {
8899 dsp_new->starts[i*n1 + j] = dsp1->starts[i*n1 + j];
8900 dsp_new->strands[i*n1 + j] = dsp1->strands[i*n1 + j];
8901 }
8902 dsp_new->lens[i] = dsp1->lens[i];
8903 }
8904 c = dsp1->numseg;
8905 if (dsp != NULL)
8906 {
8907 for (j=0; j<n1; j++)
8908 {
8909 dsp_new->starts[c*n1 + j] = dsp->starts[j];
8910 dsp_new->strands[c*n1 + j] = dsp->strands[j];
8911 }
8912 dsp_new->lens[c] = dsp->lens[0];
8913 c++;
8914 }
8915 for (i=0; i<dsp2->numseg; i++, c++)
8916 {
8917 for (j=0; j<n1; j++)
8918 {
8919 dsp_new->starts[c*n1 + j] = dsp2->starts[i*n1 + j];
8920 dsp_new->strands[c*n1 + j] = dsp2->strands[i*n1 + j];
8921 }
8922 dsp_new->lens[c] = dsp2->lens[i];
8923 }
8924 dsp_new->ids = SeqIdDupList(dsp1->ids);
8925 sap_new = SeqAlignNew();
8926 sap_new->segtype = SAS_DENSEG;
8927 sap_new->dim = n1;
8928 sap_new->segs = (Pointer)dsp_new;
8929 if (dsp != NULL)
8930 DenseSegFree(dsp);
8931 SeqAlignFree(sap1);
8932 SeqAlignFree(sap2);
8933 return sap_new;
8934 }
8935
8936 /* SECTION 10 */
8937 /***************************************************************************
8938 *
8939 * AlnMgr2ExtendToCoords takes an indexed child seqalign and blindly extends
8940 * it to the coordinates specified on the given row. If other rows are too
8941 * short to allow this extension, the alignment is extended as far as
8942 * possible. If to == -1 the extension goes to the end of the sequence
8943 * specified.
8944 *
8945 ***************************************************************************/
AlnMgr2ExtendToCoords(SeqAlignPtr sap,Int4 from,Int4 to,Int4 row)8946 NLM_EXTERN void AlnMgr2ExtendToCoords(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row)
8947 {
8948 BioseqPtr bsp;
8949 Int4 diff1;
8950 Int4 diff2;
8951 DenseSegPtr dsp;
8952 DenseSegPtr dsp_new;
8953 Int4 i;
8954 Int4 j;
8955 Int4 numrows;
8956 Int4 numseg;
8957 Int4 prediff1;
8958 Int4 prediff2;
8959 Int4 seg;
8960 SeqIdPtr sip;
8961 Int4 start;
8962 Int4 stop;
8963
8964 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
8965 return;
8966 numrows = AlnMgr2GetNumRows(sap);
8967 if (row < 1 || row > numrows)
8968 return;
8969 AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
8970 numseg = 0;
8971 dsp = (DenseSegPtr)(sap->segs);
8972 if (start <= from)
8973 from = start;
8974 else
8975 numseg++;
8976 diff1 = start - from;
8977 sip = AlnMgr2GetNthSeqIdPtr(sap, row);
8978 bsp = BioseqLockById(sip);
8979 if (to == -1)
8980 to = bsp->length - 1;
8981 BioseqUnlock(bsp);
8982 SeqIdFree(sip);
8983 if (stop >= to)
8984 to = stop;
8985 else
8986 numseg++;
8987 diff2 = to - stop;
8988 if (numseg == 0)
8989 return;
8990 sip = dsp->ids;
8991 prediff1 = diff1;
8992 prediff2 = diff2;
8993 for (i=0; i<numrows; i++)
8994 {
8995 bsp = BioseqLockById(sip);
8996 if (dsp->strands[i] == Seq_strand_minus)
8997 {
8998 if (dsp->starts[i]+dsp->lens[0]+diff1 > bsp->length)
8999 diff1 = bsp->length - (dsp->starts[i] + dsp->lens[0]);
9000 if (dsp->starts[(dsp->numseg-1)*dsp->dim+i] > diff2)
9001 diff2 = dsp->starts[(dsp->numseg-1)*dsp->dim+i];
9002 } else
9003 {
9004 if (dsp->starts[i] < diff1)
9005 diff1 = dsp->starts[i];
9006 if (dsp->starts[(dsp->numseg-1)*dsp->dim+i]+dsp->lens[dsp->numseg-1]+diff2 > bsp->length)
9007 diff2 = bsp->length - (dsp->starts[(dsp->numseg-1)*dsp->dim+i] + dsp->lens[dsp->numseg-1]);
9008 }
9009 sip = sip->next;
9010 BioseqUnlock(bsp);
9011 }
9012 if (diff1 == 0 && prediff1 != 0)
9013 numseg--;
9014 else if (diff1 < 0)
9015 numseg--;
9016 if (diff2 == 0 && prediff2 != 0)
9017 numseg--;
9018 else if (diff2 < 0)
9019 numseg--;
9020 if (numseg == 0)
9021 return;
9022 dsp_new = DenseSegNew();
9023 dsp_new->dim = dsp->dim;
9024 dsp_new->numseg = dsp->numseg+numseg;
9025 dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
9026 dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9027 dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
9028 seg = 0;
9029 if (diff1 > 0)
9030 {
9031 for (j=0; j<dsp->dim; j++)
9032 {
9033 AlnMgr2GetNthSeqRangeInSA(sap, j+1, &start, &stop);
9034 if (dsp->strands[j] == Seq_strand_minus)
9035 dsp_new->starts[j] = stop+1;
9036 else
9037 dsp_new->starts[j] = start-diff1;
9038 dsp_new->strands[j] = dsp->strands[j];
9039 }
9040 dsp_new->lens[0] = diff1;
9041 seg++;
9042 }
9043 for (i=0; i<dsp->numseg; i++)
9044 {
9045 for (j=0; j<dsp->dim; j++)
9046 {
9047 dsp_new->starts[(seg)*dsp->dim+j] = dsp->starts[i*dsp->dim+j];
9048 dsp_new->strands[(seg)*dsp->dim+j] = dsp->strands[i*dsp->dim+j];
9049 }
9050 dsp_new->lens[seg] = dsp->lens[i];
9051 seg++;
9052 }
9053 if (diff2 > 0)
9054 {
9055 for (j=0; j<dsp->dim; j++)
9056 {
9057 AlnMgr2GetNthSeqRangeInSA(sap, j+1, &start, &stop);
9058 if (dsp->strands[j] == Seq_strand_minus)
9059 dsp_new->starts[seg*dsp->dim+j] = start-diff2;
9060 else
9061 dsp_new->starts[seg*dsp->dim+j] = stop+1;
9062 dsp_new->strands[seg*dsp->dim+j] = dsp->strands[j];
9063 }
9064 dsp_new->lens[seg] = diff2;
9065 }
9066 dsp_new->ids = dsp->ids;
9067 dsp->ids = NULL;
9068 DenseSegFree(dsp);
9069 sap->segs = (Pointer)dsp_new;
9070 SAIndex2Free2(sap->saip);
9071 sap->saip = NULL;
9072 AlnMgr2IndexSingleChildSeqAlign(sap);
9073 }
9074
9075 /* SECTION 10 */
9076 /***************************************************************************
9077 *
9078 * AlnMgr2PadConservatively extends an alignment so that the whole of
9079 * all sequences is included. If two sequences have tails on the same
9080 * side, they are each aligned with columns of all gaps:
9081 *
9082 * <-new aln region->
9083 * xxxxxxxx----------xxxxxxxxxxxxxxxxxxxx
9084 * --------xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
9085 *
9086 * This function returns a newly allocated alignment and doesn't change
9087 * the original (except for indexing). If the extension was not done for
9088 * some reason, the function returns NULL;
9089 *
9090 ***************************************************************************/
AlnMgr2PadConservatively(SeqAlignPtr sap)9091 NLM_EXTERN SeqAlignPtr AlnMgr2PadConservatively(SeqAlignPtr sap)
9092 {
9093 AMAlignIndex2Ptr amaip;
9094 BioseqPtr bsp;
9095 Int4 ctr1;
9096 Int4 ctr2;
9097 DenseSegPtr dsp;
9098 DenseSegPtr dsp_new;
9099 Int4 i;
9100 Int4 j;
9101 Int4Ptr lenarray;
9102 Int4 n1;
9103 Int4 n2;
9104 Int4 newseg;
9105 SeqAlignPtr sap_new;
9106 SeqIdPtr sip;
9107 Int4 start;
9108 Int4 stop;
9109 Uint1 strand;
9110
9111 if (sap == NULL || sap->next != NULL)
9112 return NULL;
9113 if (sap->saip == NULL)
9114 AlnMgr2IndexSeqAlign(sap);
9115 if (sap->saip->indextype == INDEX_PARENT)
9116 {
9117 amaip = (AMAlignIndex2Ptr)(sap->saip);
9118 if (amaip->alnstyle == AM2_LITE)
9119 return NULL;
9120 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
9121 } else
9122 dsp = (DenseSegPtr)(sap->segs);
9123 newseg = 0;
9124 lenarray = (Int4Ptr)MemNew(dsp->dim*sizeof(Int4));
9125 n1 = n2 = 0;
9126 for (i=0; i<dsp->dim; i++)
9127 {
9128 sip = AlnMgr2GetNthSeqIdPtr(sap, i+1);
9129 bsp = BioseqLockById(sip);
9130 lenarray[i] = bsp->length;
9131 BioseqUnlock(bsp);
9132 SeqIdFree(sip);
9133 AlnMgr2GetNthSeqRangeInSA(sap, i+1, &start, &stop);
9134 if (start > 0)
9135 {
9136 n1++;
9137 newseg++;
9138 }
9139 if (stop < lenarray[i]-1)
9140 {
9141 newseg++;
9142 }
9143 }
9144 if (newseg == 0)
9145 {
9146 MemFree(lenarray);
9147 return NULL;
9148 }
9149 dsp_new = DenseSegNew();
9150 dsp_new->numseg = dsp->numseg + newseg;
9151 dsp_new->dim = dsp->dim;
9152 dsp_new->ids = SeqIdDupList(dsp->ids);
9153 dsp_new->starts = (Int4Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9154 dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9155 dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Uint1));
9156 n2 = n1+dsp->numseg;
9157 ctr1 = 0;
9158 ctr2 = 0;
9159 for (i=0; i<dsp->dim; i++)
9160 {
9161 AlnMgr2GetNthSeqRangeInSA(sap, i+1, &start, &stop);
9162 strand = AlnMgr2GetNthStrand(sap, i+1);
9163 if (strand == Seq_strand_minus && lenarray[i]-1-stop > 0)
9164 {
9165 for (j=0; j<ctr1; j++)
9166 {
9167 dsp_new->starts[dsp->dim*j+i] = -1;
9168 dsp_new->strands[dsp->dim*j+i] = strand;
9169 }
9170 dsp_new->starts[dsp->dim*ctr1+i] = stop+1;
9171 dsp_new->lens[ctr1] = lenarray[i]-1-stop;
9172 dsp_new->strands[dsp->dim*ctr1+i] = strand;
9173 for (j=ctr1+1; j<n1; j++)
9174 {
9175 dsp_new->starts[dsp->dim*j+i] = -1;
9176 dsp_new->strands[dsp->dim*j+i] = strand;
9177 }
9178 ctr1++;
9179 } else if (strand == Seq_strand_plus && start > 0)
9180 {
9181 for (j=0; j<ctr1; j++)
9182 {
9183 dsp_new->starts[dsp->dim*j+i] = -1;
9184 dsp_new->strands[dsp->dim*j+i] = strand;
9185 }
9186 dsp_new->starts[dsp->dim*ctr1+i] = 0;
9187 dsp_new->lens[ctr1] = start;
9188 dsp_new->strands[dsp->dim*ctr1+i] = strand;
9189 for (j=ctr1+1; j<n1; j++)
9190 {
9191 dsp_new->starts[dsp->dim*j+i] = -1;
9192 dsp_new->strands[dsp->dim*j+i] = strand;
9193 }
9194 ctr1++;
9195 } else /* nothing to add on this row, just fill in with -1s */
9196 {
9197 for (j=0; j<n1; j++)
9198 {
9199 dsp_new->starts[dsp->dim*j+i] = -1;
9200 dsp_new->strands[dsp->dim*j+i] = strand;
9201 }
9202 }
9203 /* now fill in the non-extended part of the alignment (copy from original) */
9204 for (j=0; j<dsp->numseg; j++)
9205 {
9206 dsp_new->starts[dsp->dim*(j+n1)+i] = dsp->starts[dsp->dim*j+i];
9207 dsp_new->lens[j+n1] = dsp->lens[j];
9208 dsp_new->strands[dsp->dim*(j+n1)+i] = dsp->strands[dsp->dim*j+i];
9209 }
9210 /* now the other ends */
9211 if (strand == Seq_strand_minus && start > 0)
9212 {
9213 for (j=n2; j<n2+ctr2; j++)
9214 {
9215 dsp_new->starts[dsp->dim*j+i] = -1;
9216 dsp_new->strands[dsp->dim*j+i] = strand;
9217 }
9218 dsp_new->starts[dsp->dim*(ctr2+n2)+i] = 0;
9219 dsp_new->lens[ctr2+n2] = start;
9220 dsp_new->strands[dsp->dim*(ctr2+n2)+i] = strand;
9221 for (j=n2+ctr2+1; j<dsp_new->numseg; j++)
9222 {
9223 dsp_new->starts[dsp->dim*j+i] = -1;
9224 dsp_new->strands[dsp->dim*j+i] = strand;
9225 }
9226 ctr2++;
9227 } else if (strand == Seq_strand_plus && lenarray[i]-1-stop > 0)
9228 {
9229 for (j=n2; j<ctr2+n2; j++)
9230 {
9231 dsp_new->starts[dsp->dim*j+i] = -1;
9232 dsp_new->strands[dsp->dim*j+i] = strand;
9233 }
9234 dsp_new->starts[dsp->dim*(ctr2+n2)+i] = stop+1;
9235 dsp_new->lens[ctr2+n2] = lenarray[i]-1-stop;
9236 dsp_new->strands[dsp->dim*(ctr2+n2)+i] = strand;
9237 for (j=ctr2+n2+1; j<dsp_new->numseg; j++)
9238 {
9239 dsp_new->starts[dsp->dim*j+i] = -1;
9240 dsp_new->strands[dsp->dim*j+i] = strand;
9241 }
9242 ctr2++;
9243 } else /* nothing to add on this row, just fill in with -1s */
9244 {
9245 for (j=n2; j<dsp_new->numseg; j++)
9246 {
9247 dsp_new->starts[dsp->dim*j+i] = -1;
9248 dsp_new->strands[dsp->dim*j+i] = strand;
9249 }
9250 }
9251 }
9252 sap_new = SeqAlignNew();
9253 sap_new->dim = dsp->dim;
9254 sap_new->segtype = SAS_DENSEG;
9255 sap_new->segs = (Pointer)(dsp_new);
9256 MemFree(lenarray);
9257 return sap_new;
9258 }
9259
9260 /* SECTION 10 */
9261 /***************************************************************************
9262 *
9263 * AlnMgr2ExtractPairwiseSeqAlign takes an indexed alignment (parent or
9264 * child, but must be fully indexed, not lite) and extracts a pairwise
9265 * subalignment containing the two requested rows. The subalignment is
9266 * unindexed and may have internal unaligned regions.
9267 *
9268 ***************************************************************************/
AlnMgr2ExtractPairwiseSeqAlign(SeqAlignPtr sap,Int4 n1,Int4 n2)9269 NLM_EXTERN SeqAlignPtr AlnMgr2ExtractPairwiseSeqAlign(SeqAlignPtr sap, Int4 n1, Int4 n2)
9270 {
9271 AMAlignIndex2Ptr amaip;
9272 DenseSegPtr dsp;
9273 DenseSegPtr dsp_new;
9274 Int4 i;
9275 Int4 j;
9276 Int4 n;
9277 SeqAlignPtr sap_new;
9278
9279 if (sap == NULL || sap->saip == NULL || n1 == n2 || n1 <= 0 || n2 <= 0)
9280 return NULL;
9281 if (sap->saip->indextype == INDEX_CHILD)
9282 dsp = (DenseSegPtr)(sap->segs);
9283 else
9284 {
9285 amaip = (AMAlignIndex2Ptr)(sap->saip);
9286 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
9287 }
9288 if (n1 > dsp->dim || n2 > dsp->dim)
9289 return NULL;
9290 n = 0;
9291 for (i=0; i<dsp->numseg; i++)
9292 {
9293 if (dsp->starts[dsp->dim*i+n1-1] == -1 && dsp->starts[dsp->dim*i+n2-1] == -1)
9294 n++;
9295 }
9296 if (n == dsp->numseg) /* no overlap at all */
9297 return NULL;
9298 dsp_new = DenseSegNew();
9299 dsp_new->numseg = dsp->numseg - n;
9300 dsp_new->starts = (Int4Ptr)MemNew(2*dsp_new->numseg*sizeof(Int4));
9301 dsp_new->strands = (Uint1Ptr)MemNew(2*dsp_new->numseg*sizeof(Uint1));
9302 dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9303 dsp_new->dim = 2;
9304 dsp_new->ids = AlnMgr2GetNthSeqIdPtr(sap, n1);
9305 dsp_new->ids->next = AlnMgr2GetNthSeqIdPtr(sap, n2);
9306 j = 0;
9307 for (i=0; i<dsp->numseg; i++)
9308 {
9309 if (dsp->starts[dsp->dim*i+n1-1] > -1 || dsp->starts[dsp->dim*i+n2-1] > -1)
9310 {
9311 dsp_new->starts[2*j] = dsp->starts[dsp->dim*i+n1-1];
9312 dsp_new->starts[2*j+1] = dsp->starts[dsp->dim*i+n2-1];
9313 dsp_new->strands[2*j] = dsp->strands[n1-1];
9314 dsp_new->strands[2*j+1] = dsp->strands[n2-1];
9315 dsp_new->lens[j] = dsp->lens[i];
9316 j++;
9317 }
9318 }
9319 sap_new = SeqAlignNew();
9320 sap_new->dim = 2;
9321 sap_new->type = SAT_PARTIAL;
9322 sap_new->segtype = SAS_DENSEG;
9323 sap_new->segs = (Pointer)dsp_new;
9324 return sap_new;
9325 }
9326
9327 /* SECTION 10 */
amconssetfree(AMConsSetPtr acp)9328 static void amconssetfree(AMConsSetPtr acp)
9329 {
9330 AMConsSetPtr acp_next;
9331
9332 while (acp != NULL)
9333 {
9334 acp_next = acp->next;
9335 MemFree(acp->starts);
9336 MemFree(acp->stops);
9337 MemFree(acp->strands);
9338 MemFree(acp);
9339 acp = acp_next;
9340 }
9341 }
9342
AlnMgr2SortForConsistent(VoidPtr ptr1,VoidPtr ptr2)9343 static int LIBCALLBACK AlnMgr2SortForConsistent(VoidPtr ptr1, VoidPtr ptr2)
9344 {
9345 AMConsSetPtr acp1;
9346 AMConsSetPtr acp2;
9347 SAIndex2Ptr saip1;
9348 SAIndex2Ptr saip2;
9349
9350 acp1 = *((AMConsSetPtr PNTR)ptr1);
9351 acp2 = *((AMConsSetPtr PNTR)ptr2);
9352 saip1 = (SAIndex2Ptr)(acp1->sap->saip);
9353 saip2 = (SAIndex2Ptr)(acp2->sap->saip);
9354 if (saip1->score == 0)
9355 saip1->score = AlnMgr2ComputeScoreForSeqAlign(acp1->sap);
9356 if (saip2->score == 0)
9357 saip2->score = AlnMgr2ComputeScoreForSeqAlign(acp2->sap);
9358 if (saip1->score > saip2->score)
9359 return -1;
9360 else if (saip1->score < saip2->score)
9361 return 1;
9362 else
9363 return 0;
9364 }
9365
9366 /* SECTION 10 */
9367 /***************************************************************************
9368 *
9369 * AlnMgr2RemoveInconsistentAlnsFromSet takes an alignment that is
9370 * indexed at least at the AM2_LITE level, and prunes the child
9371 * alignments so that the remaining alignments form a consistent,
9372 * nonoverlapping set. All alignments must have the same number of rows,
9373 * and they must be the same rows (although not necessarily in the same
9374 * order). The function uses a simple greedy algorithm to construct the
9375 * nonoverlapping set, starting with the highest-scoring alignment.
9376 * If fuzz is negative, the function creates the best nonoverlapping set
9377 * by actually truncating alignments.
9378 *
9379 ***************************************************************************/
AlnMgr2RemoveInconsistentAlnsFromSet(SeqAlignPtr sap_head,Int4 fuzz)9380 NLM_EXTERN void AlnMgr2RemoveInconsistentAlnsFromSet(SeqAlignPtr sap_head, Int4 fuzz)
9381 {
9382 AMConsSetPtr acp;
9383 AMConsSetPtr acp_head;
9384 AMConsSetPtr acp_prev;
9385 AMConsSetPtr PNTR acparray;
9386 DenseSegPtr dsp;
9387 Int4 i;
9388 Int4 j;
9389 Int4 k;
9390 Int4 lfuzz;
9391 SeqAlignPtr newsap;
9392 Int4 numrows;
9393 Int4 numsaps;
9394 Int4 orientation;
9395 Int4 row;
9396 SAIndex2Ptr saip;
9397 SeqAlignPtr salp_head;
9398 SeqAlignPtr salp_prev;
9399 SeqAlignPtr sap;
9400 SeqAlignPtr sapnext;
9401 Int4 score;
9402 SeqIdPtr sip;
9403 SeqIdPtr sip_head;
9404 Uint1 strand;
9405
9406 lfuzz = fuzz;
9407 if (fuzz < 0)
9408 fuzz = 1;
9409 sap = (SeqAlignPtr)(sap_head->segs);
9410 if (sap->next == NULL)
9411 return;
9412 dsp = (DenseSegPtr)(sap->segs);
9413 sip_head = dsp->ids;
9414 numrows = AlnMgr2GetNumRows(sap);
9415 acp_head = NULL;
9416 strand = AlnMgr2GetNthStrand(sap, 1);
9417 numsaps = 0;
9418 while (sap != NULL)
9419 {
9420 if (AlnMgr2GetNumRows(sap) != numrows)
9421 {
9422 amconssetfree(acp_head);
9423 return;
9424 }
9425 numsaps++;
9426 acp = (AMConsSetPtr)MemNew(sizeof(AMConsSet));
9427 acp->starts = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9428 acp->stops = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9429 acp->strands = (Uint1Ptr)MemNew(numrows*sizeof(Uint1));
9430 acp->which = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9431 acp->sap = sap;
9432 if (acp_head != NULL)
9433 {
9434 acp_prev->next = acp;
9435 acp_prev = acp;
9436 } else
9437 acp_head = acp_prev = acp;
9438 sip = sip_head;
9439 row = AlnMgr2GetFirstNForSip(sap, sip);
9440 if (row <= 0)
9441 {
9442 amconssetfree(acp_head);
9443 return;
9444 }
9445 if (acp->strands[row] != strand)
9446 {
9447 sapnext = acp->sap->next;
9448 acp->sap->next = NULL;
9449 score = ((SAIndex2Ptr)(acp->sap->saip))->score;
9450 SeqAlignListReverseStrand(acp->sap);
9451 AMAlignIndexFreeEitherIndex(acp->sap);
9452 AlnMgr2IndexSingleChildSeqAlign(acp->sap);
9453 saip = (SAIndex2Ptr)(acp->sap->saip);
9454 saip->score = score;
9455 acp->strands[row] = strand;
9456 acp->sap->next = sapnext;
9457 }
9458 for (i=0; i<numrows; i++)
9459 {
9460 acp->which[i] = row;
9461 AlnMgr2GetNthSeqRangeInSA(sap, i+1, &acp->starts[i], &acp->stops[i]);
9462 acp->strands[i] = AlnMgr2GetNthStrand(sap, i+1);
9463 }
9464 sap = sap->next;
9465 }
9466 acparray = (AMConsSetPtr PNTR)MemNew(numsaps*sizeof(AMConsSetPtr));
9467 acp = acp_head;
9468 i = 0;
9469 while (acp != NULL)
9470 {
9471 acparray[i] = acp;
9472 acp = acp->next;
9473 i++;
9474 }
9475 HeapSort(acparray, numsaps, sizeof(AMConsSetPtr), AlnMgr2SortForConsistent);
9476 /* orientation -1 means that ith is before jth in ALL rows, 1 means ith is after jth in ALL rows */
9477 for (i=0; i<numsaps; i++)
9478 {
9479 if (acparray[i]->used != -1)
9480 {
9481 for (j=i+1; j<numsaps; j++)
9482 {
9483 orientation = 0;
9484 for (k=0; acparray[j]->used != -1 && k<numrows; k++)
9485 {
9486 if (acparray[i]->strands[k] != acparray[j]->strands[k])
9487 acparray[j]->used = -1;
9488 if (acparray[i]->starts[k] - fuzz < acparray[j]->starts[k])
9489 {
9490 if (acparray[i]->stops[k] - fuzz < acparray[j]->starts[k])
9491 {
9492 if ((acparray[i]->strands[k] == Seq_strand_plus && orientation == 1) || (acparray[i]->strands[k] == Seq_strand_minus && orientation == -1))
9493 acparray[j]->used = -1;
9494 else if (orientation == 0)
9495 {
9496 if (acparray[i]->strands[k] == Seq_strand_minus)
9497 orientation = 1;
9498 else
9499 orientation = -1;
9500 }
9501 } else
9502 {
9503 if (lfuzz >= 0) /* just mark it for deletion */
9504 acparray[j]->used = -1;
9505 else /* truncate it */
9506 {
9507 if (acparray[j]->stops[k] > acparray[i]->stops[k])
9508 {
9509 newsap = AlnMgr2GetSubAlign(acparray[j]->sap, acparray[i]->stops[k]+1, acparray[j]->stops[k], k+1, TRUE);
9510 SeqAlignFree(acparray[j]->sap);
9511 acparray[j]->sap = newsap;
9512 acparray[j]->starts[k] = acparray[i]->stops[k]+1;
9513 } else
9514 acparray[j]->used = -1;
9515 }
9516 }
9517 } else if (acparray[i]->starts[k] - fuzz > acparray[j]->starts[k])
9518 {
9519 if (acparray[i]->starts[k] + fuzz > acparray[j]->stops[k])
9520 {
9521 if ((acparray[i]->strands[k] == Seq_strand_plus && orientation == -1) || (acparray[i]->strands[k] == Seq_strand_minus && orientation == 1))
9522 acparray[j]->used = -1;
9523 else if (orientation == 0)
9524 {
9525 if (acparray[i]->strands[k] == Seq_strand_minus)
9526 orientation = -1;
9527 else
9528 orientation = 1;
9529 }
9530 } else
9531 {
9532 if (lfuzz >= 0) /* mark for deletion */
9533 acparray[j]->used = -1;
9534 else /* truncate */
9535 {
9536 if (acparray[j]->starts[k] < acparray[i]->starts[k])
9537 {
9538 newsap = AlnMgr2GetSubAlign(acparray[j]->sap, acparray[j]->starts[k], acparray[i]->starts[k]-1, k+1, TRUE);
9539 SeqAlignFree(acparray[j]->sap);
9540 acparray[j]->sap = newsap;
9541 AlnMgr2IndexSingleChildSeqAlign(newsap);
9542 acparray[j]->starts[k] = acparray[i]->stops[k]+1;
9543 } else
9544 acparray[j]->used = -1;
9545 }
9546 }
9547 } else
9548 acparray[j]->used = -1;
9549 }
9550 }
9551 }
9552 }
9553 /* now free all the unused ones, stick the rest back together, reindex, and return */
9554 salp_head = salp_prev = NULL;
9555 for (i=0; i<numsaps; i++)
9556 {
9557 if (acparray[i]->used == -1)
9558 {
9559 SeqAlignFree(acparray[i]->sap);
9560 acparray[i]->sap = NULL;
9561 } else
9562 {
9563 if (salp_head != NULL)
9564 {
9565 salp_prev->next = acparray[i]->sap;
9566 salp_prev = acparray[i]->sap;
9567 salp_prev->next = NULL;
9568 } else
9569 {
9570 salp_head = salp_prev = acparray[i]->sap;
9571 salp_prev->next = NULL;
9572 }
9573 }
9574 }
9575 amconssetfree(acp_head);
9576 MemFree(acparray);
9577 sap_head->segs = (Pointer)(salp_head);
9578 AMAlignIndex2Free2(sap_head->saip);
9579 AlnMgr2IndexLite(sap_head);
9580 }
9581
AlnMgr2CompareByScore(VoidPtr ptr1,VoidPtr ptr2)9582 static int LIBCALLBACK AlnMgr2CompareByScore(VoidPtr ptr1, VoidPtr ptr2)
9583 {
9584 SAIndex2Ptr saip1;
9585 SAIndex2Ptr saip2;
9586 SeqAlignPtr sap1;
9587 SeqAlignPtr sap2;
9588
9589 if (ptr1 == NULL || ptr2 == NULL)
9590 return 0;
9591 sap1 = *((SeqAlignPtr PNTR) ptr1);
9592 sap2 = *((SeqAlignPtr PNTR) ptr2);
9593 saip1 = (SAIndex2Ptr)(sap1->saip);
9594 saip2 = (SAIndex2Ptr)(sap2->saip);
9595 if (saip1->score == 0)
9596 saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
9597 if (saip2->score == 0)
9598 saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
9599 if (saip1->score > saip2->score)
9600 return -1;
9601 if (saip1->score < saip2->score)
9602 return 1;
9603 return 0;
9604 }
9605
9606 /***************************************************************************
9607 *
9608 * AlnMgr2FuseSet takes a set of alignments sharing all their rows and orders
9609 * the alignments, then fuses together any adjacent alignments. If returnall
9610 * is TRUE, all pieces are returned; if not, then only the largest piece is
9611 * returned. This function will work best when called after
9612 * AlnMgr2RemoveInconsistentAlnsFromSet(sap_head, -1).
9613 *
9614 ***************************************************************************/
AlnMgr2FuseSet(SeqAlignPtr sap_head,Boolean returnall)9615 NLM_EXTERN SeqAlignPtr AlnMgr2FuseSet(SeqAlignPtr sap_head, Boolean returnall)
9616 {
9617 AMAlignIndex2Ptr amaip;
9618 DenseSegPtr dsp_new;
9619 DenseSegPtr dsp1;
9620 DenseSegPtr dsp2;
9621 Boolean found;
9622 Int4 i;
9623 Int4 n;
9624 Int4 numrows;
9625 Int4 r;
9626 SeqAlignPtr sap_keep;
9627 SeqAlignPtr sap_keep_head;
9628 SeqAlignPtr sap_keep_prev;
9629 SAIndex2Ptr saip;
9630 SeqAlignPtr PNTR saparray;
9631 Int4 start1;
9632 Int4 start2;
9633 Int4 stop1;
9634 Int4 stop2;
9635 Uint1 strand;
9636
9637 if (sap_head == NULL || sap_head->saip == NULL)
9638 return NULL;
9639 AlnMgr2SortAlnSetByNthRowPos(sap_head, 1);
9640 amaip = (AMAlignIndex2Ptr)(sap_head->saip);
9641 sap_keep = amaip->saps[0];
9642 sap_keep_head = sap_keep_prev = NULL;
9643 numrows = AlnMgr2GetNumRows(sap_keep);
9644 for (i=1; i<amaip->numsaps; i++)
9645 {
9646 /* check for consistency with sap_keep; fuse if possible */
9647 found = FALSE;
9648 for (n=0; !found && n<numrows; n++)
9649 {
9650 strand = AlnMgr2GetNthStrand(sap_keep, n+1);
9651 AlnMgr2GetNthSeqRangeInSA(sap_keep, n+1, &start1, &stop1);
9652 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n+1, &start2, &stop2);
9653 if (strand == Seq_strand_minus)
9654 {
9655 if (stop2+1 != start1)
9656 found = TRUE;
9657 } else
9658 {
9659 if (start2 != stop1+1)
9660 found = TRUE;
9661 }
9662 }
9663 if (!found) /* fuse together */
9664 {
9665 dsp1 = (DenseSegPtr)(sap_keep->segs);
9666 dsp2 = (DenseSegPtr)(amaip->saps[i]->segs);
9667 dsp_new = DenseSegNew();
9668 dsp_new->dim = dsp1->dim;
9669 dsp_new->numseg = dsp1->numseg+dsp2->numseg;
9670 dsp_new->starts = (Int4Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9671 dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9672 dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9673 for (n=0; n<dsp_new->numseg; n++)
9674 {
9675 for (r=0; r<dsp_new->dim; r++)
9676 {
9677 if (n >= dsp1->numseg)
9678 dsp_new->starts[r*n*r] = dsp2->starts[r*(n-dsp1->numseg)+r];
9679 else
9680 dsp_new->starts[r*n+r] = dsp1->starts[r*n+r];
9681 dsp_new->strands[r*n*r] = dsp1->strands[r];
9682 }
9683 if (n >= dsp1->numseg)
9684 dsp_new->lens[n] = dsp2->lens[n-dsp1->numseg];
9685 else
9686 dsp_new->lens[n] = dsp1->lens[n];
9687 }
9688 SeqAlignFree(amaip->saps[i]);
9689 amaip->saps[i] = NULL;
9690 } else /* add next alignment to keepers pile */
9691 {
9692 if (sap_keep_head == NULL)
9693 {
9694 if (sap_keep != NULL)
9695 {
9696 sap_keep_head = sap_keep;
9697 sap_keep->next = amaip->saps[i];
9698 sap_keep_prev = amaip->saps[i];
9699 } else
9700 sap_keep_head = sap_keep_prev = amaip->saps[i];
9701 } else
9702 {
9703 sap_keep_prev->next = amaip->saps[i];
9704 sap_keep_prev = amaip->saps[i];
9705 }
9706 }
9707 }
9708 if (sap_keep_head == NULL || sap_keep_head->next == NULL) /* everything was fused */
9709 sap_keep_head = sap_keep;
9710 if (returnall)
9711 {
9712 sap_head->segs = (Pointer)(sap_keep_head);
9713 return sap_keep_head;
9714 }
9715 i=0;
9716 sap_keep = sap_keep_head;
9717 while (sap_keep != NULL)
9718 {
9719 sap_keep = sap_keep->next;
9720 i++;
9721 }
9722 saparray = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
9723 i = 0;
9724 sap_keep = sap_keep_head;
9725 while (sap_keep != NULL)
9726 {
9727 saip = (SAIndex2Ptr)(sap_keep->saip);
9728 saip->score = 0;
9729 saparray[i] = sap_keep;
9730 i++;
9731 sap_keep = sap_keep->next;
9732 }
9733 HeapSort(saparray, i, sizeof(SeqAlignPtr), AlnMgr2CompareByScore);
9734 sap_keep = saparray[0];
9735 for (n=1; n<i; n++)
9736 {
9737 SeqAlignFree(saparray[n]);
9738 }
9739 MemFree(saparray);
9740 return sap_keep;
9741 }
9742
AlnMgr2FillInUnaligned(SeqAlignPtr sap)9743 NLM_EXTERN void AlnMgr2FillInUnaligned(SeqAlignPtr sap)
9744 {
9745 Int4 curr;
9746 DenseSegPtr dsp;
9747 DenseSegPtr dsp_new;
9748 Boolean found;
9749 Int4 i;
9750 Int4 j;
9751 Int4 k;
9752 Int4 last;
9753 Int4 n;
9754 Int4 offset;
9755 Int4 start;
9756 Int4 stop;
9757 Uint1 strand;
9758
9759 if (sap == NULL || (sap->saip != NULL && sap->saip->indextype != INDEX_CHILD))
9760 return;
9761 n = 0;
9762 dsp = (DenseSegPtr)(sap->segs);
9763 for (i=0; i<dsp->dim; i++)
9764 {
9765 j = 0;
9766 AlnMgr2GetNthSeqRangeInSA(sap, i, &start, &stop);
9767 strand = dsp->strands[i];
9768 last = -1;
9769 while (j<dsp->numseg-1)
9770 {
9771 if (strand == Seq_strand_minus)
9772 {
9773 if (last != -1)
9774 {
9775 found = FALSE;
9776 while (j<dsp->numseg && !found)
9777 {
9778 if (dsp->starts[j*dsp->dim+i] != -1)
9779 {
9780 if (dsp->starts[j*dsp->dim+i]+dsp->lens[j] != last)
9781 n++;
9782 found = TRUE;
9783 }
9784 if (!found)
9785 j++;
9786 }
9787 } else
9788 last = dsp->starts[j*dsp->dim+i];
9789 } else
9790 {
9791 if (last != -1)
9792 {
9793 found = FALSE;
9794 while (j<dsp->numseg && !found)
9795 {
9796 if (dsp->starts[j*dsp->dim+i] != -1)
9797 {
9798 if (dsp->starts[j*dsp->dim+i]+dsp->lens[j] != last)
9799 n++;
9800 found = TRUE;
9801 }
9802 if (!found)
9803 j++;
9804 }
9805 } else
9806 {
9807 last = dsp->starts[j*dsp->dim+i];
9808 if (last != -1)
9809 last += dsp->lens[j];
9810 }
9811 }
9812 }
9813 }
9814 if (n == 0) /* no unaligned regions */
9815 return;
9816 dsp_new = DenseSegNew();
9817 dsp_new->numseg = dsp->numseg + n;
9818 dsp_new->dim = dsp->dim;
9819 dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
9820 dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
9821 for (i=0; i<dsp_new->numseg; i++)
9822 {
9823 for (j=0; j<dsp_new->dim; j++)
9824 {
9825 dsp_new->strands[i*dsp_new->dim+j] = dsp->strands[j];
9826 }
9827 }
9828 dsp_new->ids = SeqIdDupList(dsp->ids);
9829 dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9830 curr = 0;
9831 for (j=0; j<dsp->numseg; j++)
9832 {
9833 for (i=0; i<dsp->dim; i++)
9834 {
9835 offset = 0;
9836 strand = dsp->strands[i];
9837 if (dsp->starts[j*dsp->dim+i] == -1)
9838 dsp_new->starts[curr*dsp_new->dim+i] = -1;
9839 else
9840 {
9841 k = j+1;
9842 found = FALSE;
9843 while (k < dsp->numseg)
9844 {
9845 if (dsp->starts[k*dsp->dim+i] != -1)
9846 {
9847 found = TRUE;
9848 if (strand == Seq_strand_minus)
9849 {
9850 if (dsp->starts[k*dsp->dim+i] + dsp->lens[k] != dsp->starts[j*dsp->dim+i])
9851 {
9852 dsp_new->lens[curr+offset] = dsp->starts[j*dsp->dim+i] - dsp->starts[k*dsp->dim+i] - dsp->lens[k];
9853 dsp_new->starts[(curr+offset)*dsp->dim+i] = dsp->starts[k*dsp->dim+i] + dsp->lens[k];
9854 offset++;
9855 }
9856 } else
9857 {
9858 if (dsp->starts[j*dsp->dim+i] + dsp->lens[j] != dsp->starts[k*dsp->dim+i])
9859 {
9860 dsp_new->lens[curr+offset] = dsp->starts[k*dsp->dim+i] - dsp->starts[j*dsp->dim+i] - dsp->lens[j];
9861 dsp_new->starts[(curr+offset)*dsp->dim+i] = dsp->starts[j*dsp->dim+i] + dsp->lens[j];
9862 }
9863 }
9864 }
9865 k++;
9866 }
9867 }
9868 }
9869 curr = curr + 1 + offset;
9870 }
9871 DenseSegFree(dsp);
9872 sap->segs = (Pointer)(dsp_new);
9873 AMAlignIndexFreeEitherIndex(sap);
9874 }
9875
9876 /* SECTION 11 -- functions for std-segs */
AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap,Int4 n)9877 NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap, Int4 n)
9878 {
9879 SeqLocPtr slp;
9880 StdSegPtr ssp;
9881
9882 if (sap == NULL || sap->segtype != SAS_STD)
9883 return NULL;
9884 ssp = (StdSegPtr)(sap->segs);
9885 slp = ssp->loc;
9886 n--;
9887 while (n > 0)
9888 {
9889 if (slp == NULL)
9890 return NULL;
9891 slp = slp->next;
9892 n--;
9893 }
9894 return (SeqIdDup(SeqLocId(slp)));
9895 }
9896
AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap,SeqIdPtr sip)9897 NLM_EXTERN Int4 AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap, SeqIdPtr sip)
9898 {
9899 Int4 i;
9900 SeqIdPtr sip_tmp;
9901 StdSegPtr ssp;
9902
9903 if (sap == NULL || sap->segtype != SAS_STD)
9904 return -1;
9905 ssp = (StdSegPtr)(sap->segs);
9906 sip_tmp = ssp->ids;
9907 i = 1;
9908 while (sip_tmp != NULL)
9909 {
9910 if (SeqIdComp(sip, sip_tmp) == SIC_YES)
9911 return i;
9912 sip_tmp = sip_tmp->next;
9913 i++;
9914 }
9915 return -1;
9916 }
9917
AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)9918 NLM_EXTERN void AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
9919 {
9920 SeqLocPtr slp;
9921 StdSegPtr ssp;
9922
9923 if (start != NULL)
9924 *start = -1;
9925 if (stop != NULL)
9926 *stop = -1;
9927 if (sap == NULL || sap->segtype != SAS_STD)
9928 return;
9929 ssp = (StdSegPtr)(sap->segs);
9930 slp = ssp->loc;
9931 n--;
9932 while (n > 0)
9933 {
9934 if (slp == NULL)
9935 return;
9936 slp = slp->next;
9937 n--;
9938 }
9939 if (slp == NULL)
9940 return;
9941 if (start != NULL)
9942 *start = SeqLocStart(slp);
9943 if (stop != NULL)
9944 *stop = SeqLocStop(slp);
9945 }
9946
9947
9948 /***************************************************************************
9949 *
9950 * AlnMgr2GetSeqRangeForSipInSAStdSeg returns the smallest and largest sequence
9951 * coordinates in in a Std-Seg seqalign for a given Sequence Id. Also return the
9952 * strand type. Either start, stop or strand can be NULL to only retrieve some of them.
9953 * If start and stop are -1, there is an error (not a std-seg), the SeqID does not participate in this
9954 * alignment or the alignment is one big insert on that id. Returns true if the sip was found
9955 * in the alignment with real coordinates, i.e. *start would not be -1. RANGE
9956 *
9957 ***************************************************************************/
AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap,SeqIdPtr sip,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand)9958 NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
9959 {
9960 Int4 c_start, c_stop;
9961 Uint1 c_strand;
9962 StdSegPtr ssp;
9963 Boolean range_found = FALSE;
9964 Boolean strands_inconsistent = FALSE;
9965
9966 if (start) *start = -1;
9967 if (stop) *stop = -1;
9968 if (strand) *strand = Seq_strand_unknown;
9969
9970 if (sap->segtype != SAS_STD)
9971 return FALSE;
9972
9973 ssp = (StdSegPtr)(sap->segs);
9974 while (ssp) {
9975 if (AlnMgr2GetSeqRangeForSipInStdSeg(ssp, sip, &c_start, &c_stop, &c_strand, NULL) &&
9976 c_start != -1) /* skip inserts on our bioseq */
9977 {
9978 range_found = TRUE;
9979
9980 if (start) {
9981 if (*start == -1) {
9982 *start = c_start;
9983 } else {
9984 *start = MIN(*start, c_start);
9985 }
9986 }
9987 if (stop) {
9988 *stop = MAX(*stop, c_stop);
9989 }
9990 if (strand && ! strands_inconsistent) {
9991 /* if strands are different each time, ignore them. */
9992 if (*strand != Seq_strand_unknown && *strand != c_strand) {
9993 *strand = Seq_strand_unknown;
9994 strands_inconsistent = TRUE;
9995 } else {
9996 *strand = c_strand;
9997 }
9998 }
9999 }
10000 ssp = ssp->next;
10001 }
10002 return range_found;
10003 }
10004
10005
10006 /***************************************************************************
10007 *
10008 * AlnMgr2GetSeqRangeForSipInStdSeg returns the start and stop sequence
10009 * coordinates in a Std-Segment for a given Sequence Id. Also return the
10010 * strand type. Either start, stop or strand can be NULL to only retrieve some of them.
10011 * If start and stop are -1, the SeqID was not found in this segment.
10012 * Returns true if the sip was found, even if it is a gap (start, stop = -1). RANGE
10013 *
10014 ***************************************************************************/
AlnMgr2GetSeqRangeForSipInStdSeg(StdSegPtr ssp,SeqIdPtr sip,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand,Uint1Ptr segType)10015 NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInStdSeg(
10016 StdSegPtr ssp,
10017 SeqIdPtr sip,
10018 Int4Ptr start,
10019 Int4Ptr stop,
10020 Uint1Ptr strand,
10021 Uint1Ptr segType) /* AM_SEQ, AM_GAP, AM_INSERT */
10022 {
10023 SeqLocPtr loc;
10024 Uint1 m_strand;
10025 Int4 m_start, m_stop, m_swap;
10026 Boolean s_present = FALSE;
10027 Boolean m_present = FALSE;
10028 Boolean found_id = FALSE;
10029
10030 for ( loc = ssp->loc;
10031 loc != NULL;
10032 loc = loc->next ) {
10033 /* One SeqLoc for each Sequence aligned by this segment. */
10034 /* find the one that matches the sip parameter. */
10035 if (SeqIdForSameBioseq(sip, SeqLocId(loc))) {
10036 m_strand = SeqLocStrand(loc);
10037 m_start = SeqLocStart(loc);
10038 m_stop = SeqLocStop(loc);
10039 /* Might have to reverse the order of start and stop on
10040 minus strands so that start is less than stop. */
10041 if (m_start > m_stop) {
10042 m_swap = m_start;
10043 m_start = m_stop;
10044 m_stop = m_swap;
10045 }
10046 if (start) *start = m_start;
10047 if (stop) *stop = m_stop;
10048 if (strand) *strand = m_strand;
10049 if (m_start != -1)
10050 m_present = TRUE;
10051
10052 /* found our sequence in this segment. */
10053 found_id = TRUE;
10054 } else { /* a different sequence */
10055 if (SeqLocStart(loc) != -1)
10056 s_present = TRUE;
10057 }
10058 }
10059
10060 if (segType) {
10061 if (m_present && s_present)
10062 *segType = AM_SEQ;
10063 else if (!m_present && s_present)
10064 *segType = AM_INSERT;
10065 else if (m_present && !s_present)
10066 *segType = AM_GAP;
10067 else
10068 *segType = AM_GAP; /* start will be -1 */
10069 }
10070 return found_id;
10071 }
10072
10073
10074 /***************************************************************************
10075 *
10076 * AlnMgr2GetNthStdSeg returns the a pointer to the Nth segment of
10077 * a standard segment alignment. Numbering starts with 1.
10078 * returns NULL if not n segments or is not a std-seg aligment.
10079 * Useful to pass its return value to AlnMgr2GetSeqRangeForSipInStdSeg()
10080 *
10081 ***************************************************************************/
AlnMgr2GetNthStdSeg(SeqAlignPtr sap,Int2 n)10082 NLM_EXTERN StdSegPtr AlnMgr2GetNthStdSeg(SeqAlignPtr sap, Int2 n)
10083 {
10084 StdSegPtr ssp;
10085 Int2 i;
10086
10087 if (sap == NULL || sap->segtype != SAS_STD || n < 1)
10088 return NULL;
10089
10090 i = 1;
10091 ssp = (StdSegPtr)(sap->segs);
10092 while(ssp)
10093 {
10094 if (i == n)
10095 return ssp;
10096 ++i;
10097 ssp = ssp->next;
10098 }
10099
10100 return NULL;
10101 }
10102
10103 /***************************************************************************
10104 *
10105 * AlnMgr2GetNumStdSegs returns the number of segments in a standar-seg alignment.
10106 * returns -1 if sap is null or not a standard-seg alignment.
10107 *
10108 ***************************************************************************/
AlnMgr2GetNumStdSegs(SeqAlignPtr sap)10109 NLM_EXTERN Int4 AlnMgr2GetNumStdSegs(SeqAlignPtr sap)
10110 {
10111 Int4 seg_count = 0;
10112 StdSegPtr ssp;
10113
10114 if (sap == NULL || sap->segtype != SAS_STD)
10115 return -1;
10116
10117 ssp = (StdSegPtr)(sap->segs);
10118 while(ssp)
10119 {
10120 ++seg_count;
10121 ssp = ssp->next;
10122 }
10123 return seg_count;
10124 }
10125
AlnMgr2GetLongestSeqLoc(SeqAlignPtr sap)10126 static SeqLocPtr AlnMgr2GetLongestSeqLoc(SeqAlignPtr sap)
10127 {
10128 Int4 longest;
10129 Int4 n;
10130 SeqLocPtr slp;
10131 SeqLocPtr slp_longest;
10132 StdSegPtr ssp;
10133
10134 if (sap == NULL || sap->segtype != SAS_STD)
10135 return NULL;
10136 longest = -1;
10137 ssp = (StdSegPtr)(sap->segs);
10138 slp = ssp->loc;
10139 while (slp != NULL)
10140 {
10141 n = SeqLocLen(slp);
10142 if (n > longest)
10143 {
10144 slp_longest = slp;
10145 longest = n;
10146 }
10147 slp = slp->next;
10148 }
10149 return slp_longest;
10150 }
10151
10152 /***************************************************************************
10153 *
10154 * The two mapping functions act a little differently for std-segs. The
10155 * alignment coordinates are 1:1 linearly correlated with the longest
10156 * seqloc in the set; the others may be significantly shorter.
10157 * The mapping functions deal with % lengths, and map those instead of
10158 * coordinates (which may not be linear);
10159 *
10160 ***************************************************************************/
AlnMgr2MapBioseqToSeqAlignStdSeg(SeqAlignPtr sap,Int4 n,Int4 pos)10161 NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlignStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos)
10162 {
10163 SeqLocPtr slp;
10164 SeqLocPtr slp_longest;
10165 StdSegPtr ssp;
10166 Int4 start1;
10167 Int4 start2;
10168 Int4 stop1;
10169 Int4 stop2;
10170
10171 if (sap == NULL || sap->segtype != SAS_STD)
10172 return -1;
10173 slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10174 start1 = SeqLocStart(slp_longest);
10175 stop1 = SeqLocStop(slp_longest);
10176 ssp = (StdSegPtr)(sap->segs);
10177 slp = ssp->loc;
10178 n--;
10179 while (n > 0)
10180 {
10181 if (slp == NULL)
10182 return -1;
10183 n--;
10184 slp = slp->next;
10185 }
10186 if (slp == NULL)
10187 return -1;
10188 start2 = SeqLocStart(slp);
10189 stop2 = SeqLocStop(slp);
10190 if (start2 == -1) /* NULL */
10191 return -1;
10192 return (((stop1-start1)*(pos - start2))/(stop2-start2));
10193 }
10194
AlnMgr2MapSeqAlignToBioseqStdSeg(SeqAlignPtr sap,Int4 n,Int4 pos)10195 NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseqStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos)
10196 {
10197 SeqLocPtr slp;
10198 SeqLocPtr slp_longest;
10199 StdSegPtr ssp;
10200 Int4 start1;
10201 Int4 start2;
10202 Int4 stop1;
10203 Int4 stop2;
10204
10205 if (sap == NULL || sap->segtype != SAS_STD)
10206 return -1;
10207 slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10208 start1 = SeqLocStart(slp_longest);
10209 stop1 = SeqLocStop(slp_longest);
10210 ssp = (StdSegPtr)(sap->segs);
10211 slp = ssp->loc;
10212 n--;
10213 while (n > 0)
10214 {
10215 if (slp == NULL)
10216 return -1;
10217 n--;
10218 slp = slp->next;
10219 }
10220 if (slp == NULL)
10221 return -1;
10222 start2 = SeqLocStart(slp);
10223 stop2 = SeqLocStop(slp);
10224 if (start2 == -1) /* NULL */
10225 return -1;
10226 return (start2 + ((stop2-start2)*(pos-start1))/(stop1-start1));
10227 }
10228
AlnMgr2GetAlnLengthStdSeg(SeqAlignPtr sap)10229 NLM_EXTERN Int4 AlnMgr2GetAlnLengthStdSeg(SeqAlignPtr sap)
10230 {
10231 SeqLocPtr slp_longest;
10232
10233 if (sap == NULL || sap->segtype != SAS_STD)
10234 return -1;
10235 slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10236 return (SeqLocLen(slp_longest));
10237 }
10238