1 /* ===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *            National Center for Biotechnology Information (NCBI)
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government do not place any restriction on its use or reproduction.
12 *  We would, however, appreciate having the NCBI and the author cited in
13 *  any work or product based on this material.
14 *
15 *  Although all reasonable efforts have been taken to ensure the accuracy
16 *  and reliability of the software and data, the NLM and the U.S.
17 *  Government do not and cannot warrant the performance or results that
18 *  may be obtained by using this software or data. The NLM and the U.S.
19 *  Government disclaim all warranties, express or implied, including
20 *  warranties of performance, merchantability or fitness for any particular
21 *  purpose.
22 *
23 * ===========================================================================
24 *
25 * File Name:  alignmgr2.c
26 *
27 * Author:  Sarah Wheelan
28 *
29 * Version Creation Date:  10/01
30 *
31 * $Revision: 6.66 $
32 *
33 * File Description: SeqAlign indexing, access, and manipulation functions
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * $Log: alignmgr2.c,v $
38 * Revision 6.66  2016/09/02 14:57:38  ucko
39 * Formally clean up calls to printf-family functions that are at least
40 * nominally unsafe, as already done in Debian/Ubuntu packages.
41 *
42 * Revision 6.65  2013/11/26 01:23:42  kans
43 * JIRA:GP-6623 AlnMgr2ConvertAllToDenseSeg bails specifically for Spliced-seg
44 *
45 * Revision 6.64  2013/11/26 00:15:42  kans
46 * JIRA:GP-5360 AlnMgr2ConvertAllToDenseSeg returns Boolean if not Dense-diag or Dense-seg to avoid crash on Spliced-seg
47 *
48 * Revision 6.63  2008/12/01 19:35:39  bollin
49 * prevent crash when mapping positions and row of alignment is entirely in the gapl.
50 *
51 * Revision 6.62  2007/03/09 20:37:06  bollin
52 * Fixed insidious double-increment bug in AlnMgr2MergeTwoAlignments - if the
53 * second alignment to be merged had more than one segment, the seg index was
54 * incremented past the number of segments (and some segments were not initialized).
55 *
56 * Revision 6.61  2007/01/09 14:13:52  bollin
57 * Fixed bug in AlnMgr2ExtendToCoords - prior version was not extending on 5' end.
58 *
59 * Revision 6.60  2006/09/06 15:48:33  bollin
60 * removed compiler warnings
61 *
62 * Revision 6.59  2006/09/06 15:14:54  bollin
63 * fixed bug that was generating segments of length zero at the end of an
64 * alignment
65 *
66 * Revision 6.58  2005/03/01 13:56:03  bollin
67 * if the alignment we want to index is a DenseSeg and not a list of alignments,
68 * just give it a simple index - don't decompose to pairwise and reconstruct it.
69 *
70 * Revision 6.57  2005/02/23 14:40:55  bollin
71 * when condensing columns in AlnMgr2CondenseColumns, make sure we do not
72 * disturb the ascending order of starts for each row
73 *
74 * Revision 6.56  2004/09/15 14:59:19  bollin
75 * make sure we do not read outside the alignment index arrays
76 *
77 * Revision 6.55  2004/05/20 19:46:25  bollin
78 * removed unused variables
79 *
80 * Revision 6.54  2004/05/11 13:19:49  bollin
81 * update the dimension of the shared alignment after adding a sequence.
82 *
83 * Revision 6.53  2004/04/13 14:43:07  kskatz
84 * Final resolution of revisions 6.51 and 6.52: reverted 6.52; then  cleaned up readability of AlnMgr2SeqPortRead() and ensured that it will never call SeqPortRead for a length > AM_SEQPORTSIZE
85 *
86 * Revision 6.52  2004/04/12 19:52:15  kskatz
87 * Revision 6.51 was right neighborhood,wrong off-by-one: It was in AlnMgr2ComputeFreqMatrix() call to AlnMgr2SeqPortRead() when using l+AM_SEQPORTSIZE instead of l+AM_SEQPORTSIZE-1
88 *
89 * Revision 6.51  2004/04/12 17:00:44  kskatz
90 * Fixed off-by-one error in AlnMgr2SeqPortRead() length passed to SeqPortRead(); stop-start+1 changed to stop-start
91 *
92 * Revision 6.50  2004/03/11 14:15:41  bollin
93 * added extra check in AlnMgr2GetNthSeqIdPtr to avoid core dump if there are
94 * fewer than N SeqIDs in the alignment.
95 *
96 * Revision 6.49  2003/10/20 17:54:34  kans
97 * AlnMgr2ComputeFreqMatrix protect against dereferencing NULL bsp
98 *
99 * Revision 6.48  2003/10/09 13:46:52  rsmith
100 * Add AlnMgr2GetFirstNForSipList.
101 *
102 * Revision 6.47  2003/05/15 18:53:10  rsmith
103 * in AlnMgr2GetSeqRangeForSipInStdSeg always return start & stop in coordinate order. Do not assume what minus strand will do or not.
104 *
105 * Revision 6.46  2003/04/24 20:28:48  rsmith
106 * made AlnMgr2GetNthStdSeg use 1 based numbering like the other Nth functions.
107 *
108 * Revision 6.45  2003/04/23 20:36:13  rsmith
109 * Added four functions in Section 11 to get information about Std-Seg alignments.
110 *
111 * Revision 6.44  2003/03/31 20:17:11  todorov
112 * Added AlnMgr2IndexSeqAlignEx
113 *
114 * Revision 6.43  2003/02/03 12:36:22  kans
115 * AlnMgr2ComputeScoreForSeqAlign checks return value of AlnMgr2ComputeFreqMatrix, returns -1 if NULL to avoid dereference crash
116 *
117 * Revision 6.42  2002/10/23 16:32:19  todorov
118 * CondenseColumns fixed: needed to move the lens too.
119 *
120 * Revision 6.40  2002/10/16 15:54:28  todorov
121 * use the default dim value if not set
122 *
123 * Revision 6.39  2002/08/07 21:57:33  kans
124 * added AlignMgr2GetFirstNForStdSeg
125 *
126 * Revision 6.38  2002/07/11 14:35:51  kans
127 * fixed Mac complaints about prototypes
128 *
129 * Revision 6.37  2002/07/11 12:55:38  wheelan
130 * added support for std-seg alignments
131 *
132 * Revision 6.36  2002/06/04 17:43:07  todorov
133 * 1) Substituted AddInNewSA with a new and optimized AddInNewPairwiseSA function.
134 * 2) Fixed a few bugs in other functions.
135 *
136 * Revision 6.35  2002/05/17 15:04:42  wheelan
137 * bug fix in ExtendToCoords
138 *
139 * Revision 6.34  2002/05/17 11:02:36  wheelan
140 * bug fixes in Merge func
141 *
142 * Revision 6.32  2002/03/04 17:19:18  wheelan
143 * added AlnMgr2FuseSet, changed behavior of RemoveInconsistent, fixed GetNextAlnBitBugs
144 *
145 * Revision 6.31  2002/01/31 17:41:47  wheelan
146 * various bug fixes -- no more 0 len segments, better handling of rows that are one big insert, etc.
147 *
148 * Revision 6.30  2002/01/30 19:12:53  wheelan
149 * added RemoveInconsistentAlnsFromSet, ExtractPairwiseSeqAlign, changed behavior of GetSubAlign, changed structures and behavior of GetNextAlnBit, added GetInterruptInfo, added AlnMgr2IndexAsRows, bug fixes in indexing routines
150 *
151 * Revision 6.29  2002/01/02 15:05:07  wheelan
152 * changes to force more efficient ordering in CompareAsp callbacks, plus more stringent checks in AlnMgr2AddInNewSA
153 *
154 * Revision 6.28  2001/12/28 22:53:20  wheelan
155 * bug fixes; added AlnMgr2DupAlnAndIndexes, changed some New and Free funcs
156 *
157 * Revision 6.27  2001/12/27 16:07:22  wheelan
158 * bug fix in ExtendToEnd
159 *
160 * Revision 6.26  2001/12/20 19:43:20  wheelan
161 * bug fix in GetNextAlnBit -- no more incorrect inserts
162 *
163 * Revision 6.25  2001/12/18 16:36:57  wheelan
164 * scattered fixes to unaligned region code
165 *
166 * Revision 6.24  2001/12/17 19:36:39  wheelan
167 * various fixes in AlnMgr2AddInNewSA
168 *
169 * Revision 6.23  2001/12/14 12:38:50  wheelan
170 * added functions for ddv
171 *
172 * Revision 6.22  2001/12/05 12:25:49  wheelan
173 * bug fix in SortByNthRow
174 *
175 * Revision 6.21  2001/12/04 19:28:55  wheelan
176 * bug fixes in AddInNewSA and in IndexSingleDenseSegSA
177 *
178 * Revision 6.20  2001/12/04 14:31:27  wheelan
179 * fixes to avoid mistakenly processing AM2_LITE as real indexed alignments
180 *
181 * Revision 6.19  2001/11/30 16:55:21  wheelan
182 * added AlnMgr2PadConservatively
183 *
184 * Revision 6.18  2001/11/29 18:38:47  wheelan
185 * cleanup as recommended by Mac compiler
186 *
187 * Revision 6.17  2001/11/29 17:37:16  wheelan
188 * added ExtendToCoords and MergeTwoAlignments
189 *
190 * Revision 6.16  2001/11/27 15:47:40  wheelan
191 * bug fixes in AnchorSeqAlign, DoCondense, and AddInNewSA
192 *
193 * Revision 6.15  2001/11/15 18:23:06  wheelan
194 * small change in AlnMgr2GetNthRowSpan
195 *
196 * Revision 6.14  2001/11/15 18:09:38  wheelan
197 * another bug fix in AddInNewSA
198 *
199 * Revision 6.13  2001/11/15 15:30:54  wheelan
200 * many bugs fixed, leaks plugged, plus reworked AddInNewSA to use new AMSmall field
201 *
202 * Revision 6.12  2001/11/13 14:36:13  wheelan
203 * many bug fixes in AddInNewSA and MapBioseqToSeqAlign
204 *
205 * Revision 6.11  2001/11/08 19:56:07  wheelan
206 * added AlnMgr2GetNthRowSpanInSA, fixed various memory errors
207 *
208 * Revision 6.10  2001/11/08 01:39:15  wheelan
209 * many bug fixes in and around AddInNewSA
210 *
211 * Revision 6.9  2001/11/02 14:01:30  wheelan
212 * bug fixes in AlnMgr2AddInNewSA
213 *
214 * Revision 6.8  2001/10/31 12:00:46  wheelan
215 * commented out the mistakenly uncommented comment
216 *
217 * Revision 6.7  2001/10/30 20:14:38  wheelan
218 * bug fixes for minus strands in AddInNewSA, bug fix in GetSubAlign
219 *
220 * Revision 6.6  2001/10/23 12:14:27  wheelan
221 * changes in AlnMgr2GetNextAlnBit as well as tree-based multiple alignment algorithm
222 *
223 * Revision 6.5  2001/10/18 15:10:53  wheelan
224 * fixed AlnMgr2ComputeScoreForSeqAlign
225 *
226 * Revision 6.4  2001/10/16 12:00:17  wheelan
227 * added GetParent and FreeEitherIndex
228 *
229 * Revision 6.3  2001/10/08 18:43:29  wheelan
230 * added comments
231 *
232 * Revision 6.2  2001/10/03 18:13:01  wheelan
233 * changed some colliding defines
234 *
235 * Revision 6.1  2001/10/03 14:20:11  wheelan
236 * initial checkin
237 *
238 * ==========================================================================
239 *
240 */
242 #include <alignmgr2.h>
244 /***************************************************************************
245 *
246 *  static functions
247 *
248 ***************************************************************************/
249 /* SECTION 1 */
250 static SARowDat2Ptr SARowDat2New(void);
251 static void SARowDat2Free(SARowDat2Ptr srdp);
252 static SARowDat2Ptr SARowDat2Copy(SARowDat2Ptr srdp);
253 static SAIndex2Ptr SAIndex2New(void);
254 static SAIndex2Ptr SAIndex2Copy(VoidPtr index);
255 static AMAlignIndex2Ptr AMAlignIndex2Copy(VoidPtr index);
256 static void AMIntervalSetFree(AMIntervalSetPtr amint);
257 /* SECTION 2 */
258 static void AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap);
259 static void AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap);
260 static Boolean AlnMgr2UnpackSeqAlign(SeqAlignPtr sap);
261 static Boolean AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap);
262 static void AlnMgr2DecomposeToPairwise(SeqAlignPtr sap);
263 static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap);
264 static void AlnMgr2SortBySeqId(SeqAlignPtr sap);
265 static int LIBCALLBACK AlnMgr2CompareIds(VoidPtr ptr1, VoidPtr ptr2);
266 static void AlnMgr2TossWorse(SeqAlignPtr sap, Int4 i, Int4 j);
267 static AMIntervalSetPtr AlnMgr2MakeIntervals(SeqAlignPtr sap);
268 static void AlnMgr2SortIntervals(AMIntervalSetPtr amint);
269 static int LIBCALLBACK AlnMgr2CompareIntervals(VoidPtr ptr1, VoidPtr ptr2);
270 static AMVertexPtr PNTR  AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap, AMIntervalSetPtr amint_head, AMVertexPtr PNTR vertexhead, AMEdgePtr PNTR edgehead, Int4Ptr numvertices);
271 static void AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray, Int4 numvertices);
272 static int LIBCALLBACK AlnMgr2CompareVertices(VoidPtr ptr1, VoidPtr ptr2);
273 static void AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head);
274 static int LIBCALLBACK AlnMgr2CompareEdges(VoidPtr ptr1, VoidPtr ptr2);
275 static Int4 AlnMgr2MatchToVertex(SeqIdPtr sip, Int4 start, Int4 stop, AMVertexPtr PNTR vertexarray, Int4 numvertices);
276 static void AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head);
277 static void AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head);
278 static AMVertexPtr AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray, AMEdgePtr edge);
279 static AMEdgePtr AlnMgr2GetEdgeList(Int4 vertexnum, AMEdgePtr edge_head, AMEdgePtr already_used);
280 static void AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head);
281 static Boolean AlnMgr2SameSeq(AMVertexPtr vertex1, AMVertexPtr vertex2);
282 static void AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head, SeqAlignPtr sap);
283 static AMVertexPtr AlnMgr2GetAdjacentVertices(AMVertexPtr vertex, AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head);
284 static void AlnMgr2AddInNewSA(SeqAlignPtr parent, SeqAlignPtr sap);
285 static void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap);
286 static Int4 AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2, Int4 len);
287 static Int4 AlnMgr2GetSegForStartPos(SeqAlignPtr sap, Int4 pos, Int4 row);
288 static void AlnMgr2CondenseColumns(DenseSegPtr dsp);
289 static void AlnMgr2CondenseRows(DenseSegPtr dsp, Int4 whichrow);
290 static Boolean AlnMgr2DoCondense(DenseSegPtr dsp, Int4 rownum1, Int4 rownum2);
291 static int LIBCALLBACK AlnMgr2CompareCdRows(VoidPtr ptr1, VoidPtr ptr2);
292 static int LIBCALLBACK AlnMgr2CompareAsps(VoidPtr ptr1, VoidPtr ptr2);
293 static int LIBCALLBACK AlnMgr2CompareAspsMinus(VoidPtr ptr1, VoidPtr ptr2);
294 static void AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1, SeqAlignPtr sap2, Int4Ptr n1, Int4Ptr n2);
295 static SeqIdPtr AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1, SeqIdPtr sip2);
296 static Int4 AlnMgr2OrderSeqIds(SeqIdPtr sip1, SeqIdPtr sip2);
297 static void AlnMgr2SetUnaln(SeqAlignPtr sap);
298 static int LIBCALLBACK AlnMgr2CompareUnalnAMS(VoidPtr ptr1, VoidPtr ptr2);
299 /* SECTION 4 */
300 static Int4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen);
301 static Int4 binary_search_on_uint2_list(Uint2Ptr list, Int4 ele, Uint2 listlen);
302 static void AlnMgr2GetUnalignedInfo(SeqAlignPtr sap, Int4 segment, Int4 row, Int4Ptr from, Int4Ptr to);
303 static void AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);
304 static Int4 AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap, Int4 seg);
305 /* SECTION 5 */
306 static void AlnMgr2AnchorChild(SeqAlignPtr sap, Int4 which_row);
307 /* SECTION 8 */
308 static Int4 AlnMgr2GetScoreForPair(Int4 res1, Int4 res2, Boolean is_prot);
309 /* SECTION 9 */
310 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2);
313 typedef struct am_seqpieceset AMSeqPieceSet, PNTR AMSeqPieceSetPtr;
314 typedef struct am_seqpiece AMSeqPiece, PNTR AMSeqPiecePtr;
316 struct am_seqpiece {
317   Int4 beg;
318   Int4 end;
319   Int4 left;
320   Int4 right;
321   Int4 orig_left;
322   Int4 orig_right;
323   Boolean aligned;
324   Int4 seg;
325   Int4 pos;
326   DenseSegPtr alt_dsp;
327   Int4 alt_seg;
328   Int4 alt_pos;
329   AMSeqPiecePtr next;
330   AMSeqPiecePtr prev;
331   AMSeqPieceSetPtr set;
332 };
334 struct am_seqpieceset {
335   AMSeqPiecePtr head;
336   AMSeqPiecePtr tail;
337   DenseSegPtr dsp;
338   DenseSegPtr alt_dsp;
339   Int4 row;
340   Int4 row2;
341   Int4 alt_row;
342   Int4 alt_row2;
343   Uint1 strand;
344   Boolean plus;
345   Int4 max_pos;
346   Boolean sign;
347   AMSeqPieceSetPtr next;
348 };
352 /***************************************************************************
353 *
354 *  SECTION 1: Functions for allocating and freeing data structures used
355 *  by the alignment manager; copying functions are also here.
356 *
357 ***************************************************************************/
359 /* SECTION 1 */
SARowDat2New(void)360 static SARowDat2Ptr SARowDat2New(void)
361 {
362    return (SARowDat2Ptr)MemNew(sizeof(SARowDat2));
363 }
365 /* SECTION 1 */
SARowDat2Free(SARowDat2Ptr srdp)366 static void SARowDat2Free(SARowDat2Ptr srdp)
367 {
368    if (srdp == NULL)
369       return;
370    if (srdp->sect != NULL)
371       MemFree(srdp->sect);
372    if (srdp->unsect != NULL)
373       MemFree(srdp->unsect);
374    MemFree(srdp->insect);
375    MemFree(srdp->unaligned);
376    MemFree(srdp);
377 }
379 /* SECTION 1 */
SARowDat2Copy(SARowDat2Ptr srdp)380 static SARowDat2Ptr SARowDat2Copy(SARowDat2Ptr srdp)
381 {
382    Int4          i;
383    SARowDat2Ptr  srdp2;
385    if (srdp == NULL)
386       return NULL;
387    srdp2 = SARowDat2New();
388    srdp2->numsect = srdp->numsect;
389    srdp2->sect = (Uint2Ptr)MemNew(srdp2->numsect*sizeof(Uint2));
390    for (i=0; i<srdp2->numsect; i++)
391    {
392       srdp2->sect[i] = srdp->sect[i];
393    }
394    srdp2->numunsect = srdp->numunsect;
395    srdp2->unsect = (Uint2Ptr)MemNew(srdp2->numunsect*sizeof(Uint2));
396    for (i=0; i<srdp2->numunsect; i++)
397    {
398       srdp2->unsect[i] = srdp->unsect[i];
399    }
400    srdp2->numinsect = srdp->numinsect;
401    srdp2->insect = (Uint2Ptr)MemNew(srdp2->numinsect*sizeof(Uint2));
402    for (i=0; i<srdp2->numinsect; i++)
403    {
404       srdp2->insect[i] = srdp->insect[i];
405    }
406    srdp2->numunaln = srdp->numunaln;
407    srdp2->unaligned = (Uint2Ptr)MemNew(srdp2->numunaln*sizeof(Uint2));
408    for (i=0; i<srdp2->numunaln; i++)
409    {
410       srdp2->unaligned[i] = srdp->unaligned[i];
411    }
412    return srdp2;
413 }
415 /* SECTION 1 */
SAIndex2New(void)416 static SAIndex2Ptr SAIndex2New(void)
417 {
418    SAIndex2Ptr  saip;
420    saip = (SAIndex2Ptr)MemNew(sizeof(SAIndex2));
421    saip->indextype = INDEX_CHILD;
422    saip->freefunc = (SeqAlignIndexFreeFunc)(SAIndex2Free2);
423    saip->anchor = -1;
424    return saip;
425 }
427 /* SECTION 1 */
SAIndex2Free2(VoidPtr index)428 NLM_EXTERN Boolean LIBCALLBACK SAIndex2Free2(VoidPtr index)
429 {
430    Int4        i;
431    SAIndex2Ptr  saip;
433    if (index == NULL)
434       return TRUE;
435    saip = (SAIndex2Ptr)(index);
436    MemFree(saip->aligncoords);
437    for (i=0; i<saip->numrows; i++)
438    {
439       SARowDat2Free(saip->srdp[i]);
440    }
441    MemFree(saip->srdp);
442    MemFree(saip);
443    return TRUE;
444 }
446 /* SECTION 1 */
AlnMgr2FreeInterruptInfo(AMInterrInfoPtr interr)447 NLM_EXTERN void AlnMgr2FreeInterruptInfo(AMInterrInfoPtr interr)
448 {
449    if (interr == NULL)
450       return;
451    MemFree(interr->starts);
452    MemFree(interr->lens);
453    MemFree(interr->types);
454    MemFree(interr);
455 }
457 /* SECTION 1*/
SAIndex2Copy(VoidPtr index)458 static SAIndex2Ptr SAIndex2Copy(VoidPtr index)
459 {
460    Int4         i;
461    SAIndex2Ptr  saip;
462    SAIndex2Ptr  saip2;
464    saip2 = SAIndex2New();
465    saip = (SAIndex2Ptr)(index);
466    saip2->numseg = saip->numseg;
467    saip2->aligncoords = (Uint4Ptr)MemNew(saip2->numseg*sizeof(Uint4));
468    for (i=0; i<saip2->numseg; i++)
469    {
470       saip2->aligncoords[i] = saip->aligncoords[i];
471    }
472    saip2->anchor = saip->anchor;
473    saip2->numrows = saip->numrows;
474    saip2->numseg = saip->numseg;
475    saip2->srdp = (SARowDat2Ptr PNTR)MemNew(saip2->numrows*sizeof(SARowDat2));
476    for (i=0; i<saip2->numrows; i++)
477    {
478       saip2->srdp[i] = SARowDat2Copy(saip->srdp[i]);
479    }
480    saip2->numunaln = saip->numunaln;
481    saip2->unaln = (Uint4Ptr)MemNew(saip2->numunaln*sizeof(Uint4));
482    for (i=0; i<saip2->numunaln; i++)
483    {
484       saip2->unaln[i] = saip->unaln[i];
485    }
486    saip2->numinchain = saip->numinchain;
487    saip2->numsplitaln = saip->numsplitaln;
488    saip2->score = saip->score;
489    saip2->aligned = saip->aligned;
490    return saip2;
491 }
493 /* SECTION 1 */
AMAlignIndex2New(void)494 static AMAlignIndex2Ptr AMAlignIndex2New(void)
495 {
496    AMAlignIndex2Ptr  amaip;
498    amaip = (AMAlignIndex2Ptr)MemNew(sizeof(AMAlignIndex2));
499    amaip->indextype = INDEX_PARENT;
500    amaip->freefunc = (SeqAlignIndexFreeFunc)(AMAlignIndex2Free2);
501    return amaip;
502 }
504 /* SECTION 1 */
AMAlignIndex2Free2(VoidPtr index)505 NLM_EXTERN Boolean LIBCALLBACK AMAlignIndex2Free2(VoidPtr index)
506 {
507    AMAlignIndex2Ptr  amaip;
508    Int4              i;
510    if (index == NULL)
511       return FALSE;
512    amaip = (AMAlignIndex2Ptr)(index);
513    for (i=0; i<amaip->numrows; i++)
514    {
515       SeqIdFree(amaip->ids[i]);
516    }
517    MemFree(amaip->ids);
518    MemFree(amaip->saps);
519    MemFree(amaip->aligned);
520    SeqAlignFree(amaip->sharedaln);
521    MemFree(amaip);
522    return TRUE;
523 }
525 /* SECTION 1 */
AMAlignIndex2Copy(VoidPtr index)526 static AMAlignIndex2Ptr AMAlignIndex2Copy(VoidPtr index)
527 {
528    AMAlignIndex2Ptr  amaip;
529    AMAlignIndex2Ptr  amaip2;
530    Int4              i;
532    if (index == NULL)
533       return NULL;
534    amaip = (AMAlignIndex2Ptr)(index);
535    amaip2 = AMAlignIndex2New();
536    amaip2->alnstyle = amaip->alnstyle;
537    amaip2->anchor = amaip->anchor;
538    amaip2->numrows = amaip->numrows;
539    amaip2->ids = (SeqIdPtr PNTR)MemNew(amaip2->numrows*sizeof(SeqIdPtr));
540    for (i=0; i<amaip2->numrows; i++)
541    {
542       amaip2->ids[i] = SeqIdDup(amaip->ids[i]);
543    }
544    amaip2->numsaps = amaip->numsaps;
545    amaip2->saps = (SeqAlignPtr PNTR)MemNew(amaip2->numsaps*sizeof(SeqAlignPtr));
546    amaip2->aligned = (Boolean PNTR)MemNew(amaip2->numsaps*sizeof(Boolean));
547    for (i=0; i<amaip2->numsaps; i++)
548    {
549       amaip2->saps[i] = SeqAlignDup(amaip->saps[i]);
550       amaip2->aligned[i] = amaip->aligned[i];
551       if (i>0)
552          amaip2->saps[i-1]->next = amaip2->saps[i];
553    }
554    amaip2->sharedaln = AlnMgr2DupAlnAndIndexes(amaip->sharedaln);
555    return amaip2;
556 }
558 /* SECTION 1 */
AMAlignIndexFreeEitherIndex(SeqAlignPtr sap)559 NLM_EXTERN void AMAlignIndexFreeEitherIndex(SeqAlignPtr sap)
560 {
561    if (sap == NULL || sap->saip == NULL)
562       return;
563    if (sap->saip->indextype == INDEX_PARENT)
564       AMAlignIndex2Free2(sap->saip);
565    else
566       SAIndex2Free2(sap->saip);
567    sap->saip = NULL;
568 }
570 /* SECTION 1 */
AlnMgr2DupAlnAndIndexes(SeqAlignPtr sap)571 NLM_EXTERN SeqAlignPtr AlnMgr2DupAlnAndIndexes(SeqAlignPtr sap)
572 {
573    AMAlignIndex2Ptr  amaip;
574    SAIndex2Ptr       saip;
575    SeqAlignPtr       sap_new;
577    if (sap == NULL)
578       return NULL;
579    if (sap->saip == NULL)
580       return (SeqAlignDup(sap));
581    sap_new = NULL;
582    if (sap->saip->indextype == INDEX_CHILD)
583    {
584       sap_new = SeqAlignDup(sap);
585       sap_new->saip = (Pointer)SAIndex2Copy(sap->saip);
586       saip = (SAIndex2Ptr)(sap_new->saip);
587       saip->top = AlnMgr2GetParent(sap);
588    } else if (sap->saip->indextype == INDEX_PARENT)
589    {
590       sap_new = SeqAlignNew();
591       sap_new->type = sap->type;
592       sap_new->segtype = sap->segtype;
593       sap_new->saip = (Pointer)(AMAlignIndex2Copy(sap->saip));
594       amaip = (AMAlignIndex2Ptr)(sap_new->saip);
595       sap_new->segs = amaip->saps[0];
596    }
597    return sap_new;
598 }
600 /* SECTION 1 */
AlnMsgNew2(void)601 NLM_EXTERN AlnMsg2Ptr AlnMsgNew2(void)
602 {
603    AlnMsg2Ptr  amp;
605    amp = (AlnMsg2Ptr)MemNew(sizeof(AlnMsg2));
606    amp->real_from = -2;
607    return amp;
608 }
610 /* SECTION 1 */
AlnMsgFree2(AlnMsg2Ptr amp)611 NLM_EXTERN AlnMsg2Ptr AlnMsgFree2(AlnMsg2Ptr amp)
612 {
613    if (amp->left_interrupt != NULL)
614    {
615       MemFree(amp->left_interrupt);
616       amp->left_interrupt = NULL;
617    }
618    if (amp->right_interrupt != NULL)
619    {
620       MemFree(amp->right_interrupt);
621       amp->right_interrupt = NULL;
622    }
623    MemFree(amp);
624    return NULL;
625 }
627 /* SECTION 1 */
AlnMsgReNew2(AlnMsg2Ptr amp)628 NLM_EXTERN void AlnMsgReNew2(AlnMsg2Ptr amp)
629 {
630    if (amp == NULL)
631       return;
632    if (amp->left_interrupt != NULL)
633    {
634       MemFree(amp->left_interrupt);
635       amp->left_interrupt = NULL;
636    }
637    if (amp->right_interrupt != NULL)
638    {
639       MemFree(amp->right_interrupt);
640       amp->right_interrupt = NULL;
641    }
642    amp->real_from = -2;
643    amp->len = -2;
644    return;
645 }
647 /* SECTION 1 */
AMIntervalSetFree(AMIntervalSetPtr amint)648 static void AMIntervalSetFree(AMIntervalSetPtr amint)
649 {
650    AMIntervalPtr  intv;
651    AMIntervalPtr  intv_next;
653    intv = amint->int_head;
654    while (intv != NULL)
655    {
656       intv_next = intv->next;
657       MemFree(intv);
658       intv = intv_next;
659    }
660    SeqIdFree(amint->sip);
661    MemFree(amint);
662 }
664 /* SECTION 1 */
AMFreqFree(AMFreqPtr afp)665 NLM_EXTERN void AMFreqFree(AMFreqPtr afp)
666 {
667    Int4  i;
669    if (afp == NULL)
670       return;
671    for (i=0; i<afp->size; i++)
672    {
673       MemFree(afp->freq[i]);
674    }
675    MemFree(afp->freq);
676    MemFree(afp);
677 }
679 /* SECTION 1 */
AMSeqPieceSetFree(AMSeqPieceSetPtr s_set)680 static void AMSeqPieceSetFree(AMSeqPieceSetPtr s_set)
681 {
682   AMSeqPieceSetPtr s_set_next;
683   AMSeqPiecePtr s, s_next;
685   while (s_set) {
686     s = s_set->head;
687     while (s) {
688       s_next = s->next;
689       MemFree(s);
690       s = s_next;
691     }
692     s_set_next = s_set->next;
693     MemFree(s_set);
694     s_set = s_set_next;
695   }
696 }
698 /***************************************************************************
699 *
700 *  SECTION 2: Functions used to create the indexes for parent and child
701 *  seqaligns.
702 *    SECTION 2a: Functions to create indexes for child seqaligns, and
703 *                to convert seqaligns to dense-seg type
704 *    SECTION 2b: Functions to unpack and rearrange complicated seqaligns
705 *                into simple chains of dense-seg and dense-diag types
706 *    SECTION 2c: Functions to create indexes for parent seqaligns
707 *    SECTION 2d: Accessory functions for parent indexing
708 *
709 ***************************************************************************/
711 /***************************************************************************
712 *
713 *  AlnMgr2ConvertDendiagToDensegChain takes a dense-diag style alignment
714 *  and makes each diag into its own denseg seqalign, then links the new
715 *  alignments together.
716 *
717 ***************************************************************************/
718 /* SECTION 2a */
AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap)719 static void AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap)
720 {
721    DenseDiagPtr  ddp;
722    DenseDiagPtr  ddp_next;
723    DenseSegPtr   dsp;
724    Int4          i;
725    SeqAlignPtr   sap_new;
726    SeqAlignPtr   sap_next;
727    SeqAlignPtr   sap_prev;
729    if (sap == NULL || sap->segtype != SAS_DENDIAG)
730       return;
731    sap_next = sap->next;
732    ddp = (DenseDiagPtr)(sap->segs);
733    /* convert the first diag to dense-seg and put it in the original alignment */
734    dsp = DenseSegNew();
735    dsp->ids = ddp->id;
736    ddp->id = NULL;
737    dsp->dim = ddp->dim;
738    dsp->numseg = 1;
739    dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Int4));
740    dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
741    dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
742    for (i=0; i<dsp->dim; i++)
743    {
744       dsp->starts[i] = ddp->starts[i];
745       if (ddp->strands != NULL)
746          dsp->strands[i] = ddp->strands[i];
747       else
748          dsp->strands[i] = Seq_strand_plus;
749    }
750    dsp->lens[0] = ddp->len;
751    sap->segs = (Pointer)(dsp);
752    sap->segtype = SAS_DENSEG;
753    ddp_next = ddp->next;
754    ddp->next = NULL;
755    DenseDiagFree(ddp);
756    ddp = ddp_next;
757    if (ddp == NULL)
758       return;
759    sap_prev = sap;
760    while (ddp)
761    {
762       sap_new = SeqAlignNew();
763       sap_new->type = SAT_PARTIAL;
764       sap_new->segtype = SAS_DENSEG;
765       sap_new->dim = ddp->dim;
766       dsp = DenseSegNew();
767       dsp->ids = ddp->id;
768       ddp->id = NULL;
769       dsp->dim = ddp->dim;
770       dsp->numseg = 1;
771       dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Int4));
772       dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
773       dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
774       for (i=0; i<dsp->dim; i++)
775       {
776          dsp->starts[i] = ddp->starts[i];
777          if (ddp->strands != NULL)
778             dsp->strands[i] = ddp->strands[i];
779          else
780             dsp->strands[i] = Seq_strand_plus;
781       }
782       dsp->lens[0] = ddp->len;
783       sap_new->segs = (Pointer)(dsp);
784       ddp_next = ddp->next;
785       ddp->next = NULL;
786       DenseDiagFree(ddp);
787       ddp = ddp_next;
788       sap_prev->next = sap_new;
789       sap_prev = sap_new;
790    }
791    sap_new->next = sap_next;
792 }
794 /* SECTION 2a */
795 /***************************************************************************
796 *
797 *  AlnMgr2IndexSingleDenseSegSA creates the SAIndex2 structure for a given
798 *  dense-seg seqalign. This structure has binary-searchable indexes into
799 *  the segs. If the strands are not allocated, this function allocates
800 *  them and sets them to Seq_strand_plus.
801 *
802 ***************************************************************************/
AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap)803 static void AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap)
804 {
805    DenseSegPtr  dsp;
806    Int4         i;
807    Int4         j;
808    Int4         last;
809    Int4         next;
810    Int4         row;
811    SAIndex2Ptr   saip;
812    Boolean      unal;
814    if (sap->segtype != SAS_DENSEG)
815       return;
816    dsp = (DenseSegPtr)(sap->segs);
817    if (dsp->strands == NULL)
818    {
819       dsp->strands = (Uint1Ptr)MemNew(dsp->dim*dsp->numseg*sizeof(Uint1));
820       for (i=0; i<dsp->dim*dsp->numseg; i++)
821       {
822          dsp->strands[i] = Seq_strand_plus;
823       }
824    }
825    saip = SAIndex2New();
826    saip->aligncoords = (Uint4Ptr)MemNew((dsp->numseg)*sizeof(Uint4));
827    saip->srdp = (SARowDat2Ptr PNTR)MemNew((dsp->dim)*sizeof(SARowDat2Ptr));
828    saip->numrows = dsp->dim;
829    saip->numseg = dsp->numseg;
830    for (i=0; i<dsp->dim; i++)
831    {
832       saip->srdp[i] = SARowDat2New();
833    }
834    for (i=0; i<dsp->numseg; i++)
835    {
836       if (i != 0)
837          saip->aligncoords[i] = saip->aligncoords[i-1] + dsp->lens[i-1];
838       for (row=0; row<dsp->dim; row++)
839       {
840          if (dsp->starts[dsp->dim*i + row] != -1)
841             saip->srdp[row]->numsect++;
842       }
843    }
844    for (row=0; row<dsp->dim; row++)
845    {
846       saip->srdp[row]->sect = (Uint2Ptr)MemNew((saip->srdp[row]->numsect)*sizeof(Uint2));
847       saip->srdp[row]->unsect = (Uint2Ptr)MemNew((dsp->numseg - saip->srdp[row]->numsect)*sizeof(Uint2));
848       saip->srdp[row]->numsect = 0;
849       saip->srdp[row]->unaligned = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
850    }
851    for (i=0; i<dsp->numseg; i++)
852    {
853       for (row=0; row<dsp->dim; row++)
854       {
855          if (dsp->starts[dsp->dim*i + row] != -1)
856          {
857             saip->srdp[row]->sect[saip->srdp[row]->numsect] = i;
858             saip->srdp[row]->numsect++;
859          } else
860          {
861             saip->srdp[row]->unsect[saip->srdp[row]->numunsect] = i;
862             saip->srdp[row]->numunsect++;
863          }
864       }
865    }
866    for (row=0; row<dsp->dim; row++)
867    {
868       for (i=0; i<dsp->numseg; i++)
869       {
870          unal = FALSE;
871          last = -1;
872          j = i;  /* only blocks with sequence can have flanking unal. regions */
873          if (j >= 0 && dsp->starts[dsp->dim*j+row] != -1)
874          {
875             if (dsp->strands[row] == Seq_strand_minus)
876                last = dsp->starts[dsp->dim*j+row];
877             else
878                last = dsp->starts[dsp->dim*j+row] + dsp->lens[j];
879          }
880          if (last > -1)
881          {
882             next = -1;
883             j++;
884             /* find next block of aligned sequence in this row */
885             for (j; j<dsp->numseg && next == -1; j++)
886             {
887                if (dsp->starts[dsp->dim*j+row] != -1)
888                {
889                   if (dsp->strands[row] == Seq_strand_minus)
890                      next = dsp->starts[dsp->dim*j+row] + dsp->lens[j];
891                   else
892                      next = dsp->starts[dsp->dim*j+row];
893                }
894             }
895             if (next > -1) /* look for unaligned seq on right side of this seg */
896             {
897                if (next != last)
898                   unal = TRUE;
899             }
900          }
901          if (unal == TRUE)
902          {
903             saip->srdp[row]->unaligned[saip->srdp[row]->numunaln] = i;
904             saip->srdp[row]->numunaln++;
905          }
906       }
907    }
908    sap->saip = (SeqAlignIndexPtr)(saip);
909 }
911 /* SECTION 2a */
912 /***************************************************************************
913 *
914 *  AlnMgr2IndexSingleChildSeqAlign takes a simple dense-seg or dense-diag
915 *  seqalign, converts it to dense-seg, and then calls
916 *  AlnMgr2IndexSingleDenseSegSA to create the indexes. If the alignment has
917 *  already been indexed, this erases that index and reindexes the alignment.
918 *  (SINGCHILD)
919 *
920 ***************************************************************************/
AlnMgr2IndexSingleChildSeqAlign(SeqAlignPtr sap)921 NLM_EXTERN Boolean AlnMgr2IndexSingleChildSeqAlign(SeqAlignPtr sap)
922 {
923    SeqAlignPtr  salp;
924    SeqAlignPtr  salp_prev;
925    SeqAlignPtr  sap_next;
927    if (sap == NULL)
928       return FALSE;
929    if (sap->saip != NULL)
930    {
931       if (sap->saip->indextype != INDEX_CHILD)
932          return FALSE;
933       SAIndex2Free2(sap->saip);
934       sap->saip = NULL;
935    }
936    sap_next = sap->next;
937    sap->next = NULL;
938    if (sap->segtype == SAS_DISC)
939       return FALSE;
940    if (sap->segtype == SAS_DENDIAG)
941       AlnMgr2ConvertDendiagToDensegChain(sap);
942    salp = sap;
943    salp_prev = sap;
944    while (salp != NULL)
945    {
946       AlnMgr2IndexSingleDenseSegSA(salp);
947       salp_prev = salp;
948       salp = salp->next;
949    }
950    salp_prev->next = sap_next;
951    return TRUE;
952 }
954 /***************************************************************************
955 *
956 *  AlnMgr2UnpackSeqAlign rearranges any seqalign (except alignments with
957 *  more than two levels of nested discontinuous alignments) to a simple
958 *  discontinuous alignment or a linked list of alignments.
959 *
960 ***************************************************************************/
961 /* SECTION 2b */
AlnMgr2UnpackSeqAlign(SeqAlignPtr sap)962 static Boolean AlnMgr2UnpackSeqAlign(SeqAlignPtr sap)
963 {
964    SeqAlignPtr  sap_new;
965    SeqAlignPtr  sap_next;
966    SeqAlignPtr  sap_segs;
967    SeqAlignPtr  sap_segs_head;
968    SeqAlignPtr  sap_segs_prev;
970    if (sap == NULL)
971       return FALSE;
972    sap_segs = NULL;
973    if (sap->segtype == SAS_DISC)
974    {
975       sap_segs_head = (SeqAlignPtr)(sap->segs);
976       if (sap_segs_head->segtype == SAS_DISC)
977       {
978          sap_segs_prev = (SeqAlignPtr)(sap_segs_head->segs);
979          sap_segs_head->segs = NULL;
980          sap_next = sap_segs_head->next;
981          sap_segs_head->next = NULL;
982          SeqAlignFree(sap_segs_head);
983          sap_segs_head = sap_segs_prev;
984          sap->segs = (Pointer)(sap_segs_head);
985          while (sap_segs_prev->next)
986          {
987             sap_segs_prev = sap_segs_prev->next;
988             if (sap_segs_prev->segtype == SAS_DISC)
989                return FALSE;
990          }
991          sap_segs_prev->next = sap_next;
992          sap_segs = sap_next;
993       } else
994          sap_segs = sap_segs_head->next;
995       while (sap_segs)
996       {
997          if (sap_segs->segtype == SAS_DISC)
998          {
999             sap_next = sap_segs->next;
1000             sap_segs->next = NULL;
1001             sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1002             sap_segs->segs = NULL;
1003             SeqAlignFree(sap_segs);
1004             while (sap_segs_prev->next)
1005             {
1006                sap_segs_prev = sap_segs_prev->next;
1007                if (sap_segs_prev->segtype == SAS_DISC)
1008                   return FALSE;
1009             }
1010             sap_segs_prev->next = sap_next;
1011             sap_segs = sap_next;
1012          } else
1013             sap_segs = sap_segs->next;
1014       }
1015    } else
1016    {
1017       sap_new = SeqAlignNew();
1018       sap_new->type = SAT_GLOBAL;
1019       sap_new->segtype = sap->segtype;
1020       sap_new->dim = sap->dim;
1021       sap_new->segs = sap->segs;
1022       sap_new->master = sap->master;
1023       sap_new->bounds = sap->bounds;
1024       sap_new->next = sap->next;
1025       sap_new->score = sap->score;
1026       sap->next = NULL;
1027       sap->segtype = SAS_DISC;
1028       sap->type = 0;
1029       sap->dim = 0;
1030       sap->master = NULL;
1031       sap->bounds = NULL;
1032       sap->score = NULL;
1033       sap->segs = (Pointer)sap_new;
1034       sap_segs_prev = sap_new;
1035       sap_segs = sap_new->next;
1036       while (sap_segs)
1037       {
1038          if (sap_segs->segtype == SAS_DISC)
1039          {
1040             sap_next = sap_segs->next;
1041             sap_segs->next = NULL;
1042             sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1043             sap_segs->segs = NULL;
1044             SeqAlignFree(sap_segs);
1045             while (sap_segs_prev->next)
1046             {
1047                sap_segs_prev = sap_segs_prev->next;
1048                if (sap_segs_prev->segtype == SAS_DISC)
1049                   return FALSE;
1050             }
1051             sap_segs_prev->next = sap_next;
1052             sap_segs = sap_next;
1053          } else
1054             sap_segs = sap_segs->next;
1055       }
1056    }
1057    return TRUE;
1058 }
1060 /* SECTION 2b */
AlnMgr2UnpackSeqAlignChain(SeqAlignPtr sap)1061 static void AlnMgr2UnpackSeqAlignChain(SeqAlignPtr sap)
1062 {
1063    Int4         i;
1064    SeqAlignPtr  salp_head;
1065    SeqAlignPtr  salp_prev;
1066    SeqAlignPtr  sap_next;
1067    SeqAlignPtr  sap_orig;
1068    SeqAlignPtr  sap_prev;
1070    salp_head = salp_prev = NULL;
1071    i = 0;
1072    while (sap != NULL)
1073    {
1074       sap_next = sap->next;
1075       sap->next = NULL;
1076       AlnMgr2UnpackSeqAlign(sap);
1077       while (sap != NULL)
1078       {
1079          if (salp_prev != NULL)
1080          {
1081             salp_prev->next = (SeqAlignPtr)(sap->segs);
1082             sap->segs = NULL;
1083             while (salp_prev->next != NULL)
1084             {
1085                salp_prev = salp_prev->next;
1086             }
1087          } else
1088          {
1089             salp_head = salp_prev = (SeqAlignPtr)(sap->segs);
1090             sap->segs = NULL;
1091             while (salp_prev->next != NULL)
1092             {
1093                salp_prev = salp_prev->next;
1094             }
1095          }
1096          sap_prev = sap;
1097          sap = sap->next;
1098          sap_prev->next = NULL;
1099          if (i>0)
1100             SeqAlignFree(sap_prev);
1101          else
1102             sap_orig = sap_prev; /* this is the pointer that was passed in */
1103          i++;
1104       }
1105       sap = sap_next;
1106    }
1107    sap_orig->segs = (Pointer)(salp_head);
1108 }
1110 /* SECTION 2b */
1111 /***************************************************************************
1112 *
1113 *  AlnMgr2ConvertAllToDenseSeg goes through a chain of simple child
1114 *  seqaligns and makes sure that each is a dense-seg seqalign with the
1115 *  strands explicitly allocated; dense-diag alignments are converted and
1116 *  non-allocated strands are allocated and all set to Seq_strand_plus.
1117 *
1118 ***************************************************************************/
AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)1119 static Boolean AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)
1120 {
1121    DenseSegPtr  dsp;
1122    Int4         i;
1123    SeqAlignPtr  sap_next;
1125    while (sap != NULL)
1126    {
1127       sap_next = sap->next;
1128       if (sap->segtype == SAS_DENDIAG) {
1129          AlnMgr2ConvertDendiagToDensegChain(sap);
1130       }
1131       else if (sap->segtype == SAS_DENSEG)
1132       {
1133          dsp = (DenseSegPtr)(sap->segs);
1134          if (dsp->strands == NULL)
1135          {
1136             dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
1137             for (i=0; i<(dsp->dim)*(dsp->numseg); i++)
1138             {
1139                dsp->strands[i] = Seq_strand_plus;
1140             }
1141          }
1142       }
1143       else if (sap->segtype == SAS_SPLICED)
1144       {
1145         return FALSE;
1146       }
1147       sap = sap_next;
1148    }
1149    return TRUE;
1150 }
1152 /* SECTION 2c */
1153 /***************************************************************************
1154 *
1155 *  AlnMgr2IndexLite takes a seqalign or a list of seqaligns, converts
1156 *  each alignment to a dense-seg structure and indexes it, and then
1157 *  allocates an AMAlignIndex2 structure and fills in the saps array.
1158 *
1159 ***************************************************************************/
AlnMgr2IndexLite(SeqAlignPtr sap)1160 NLM_EXTERN Boolean AlnMgr2IndexLite(SeqAlignPtr sap)
1161 {
1162    AMAlignIndex2Ptr  amaip;
1163    Int4              i;
1164    SAIndex2Ptr       saip;
1165    SeqAlignPtr       salp;
1167    if (sap == NULL)
1168       return FALSE;
1169    if (!AlnMgr2UnpackSeqAlign(sap))
1170       return FALSE;
1171    if (!AlnMgr2ConvertAllToDenseSeg((SeqAlignPtr)sap->segs))
1172       return FALSE;
1173    amaip = AMAlignIndex2New();
1174    amaip->alnstyle = AM2_LITE;
1175    salp = (SeqAlignPtr)(sap->segs);
1176    while (salp != NULL)
1177    {
1178       amaip->numsaps++;
1179       AlnMgr2IndexSingleChildSeqAlign(salp);
1180       salp = salp->next;
1181    }
1182    amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
1183    salp = (SeqAlignPtr)(sap->segs);
1184    i = 0;
1185    while (salp != NULL)
1186    {
1187       amaip->saps[i] = salp;
1188       i++;
1189       saip = (SAIndex2Ptr)(salp->saip);
1190       saip->numinchain = i;
1191       saip->top = sap;
1192       salp = salp->next;
1193    }
1194    sap->saip = (SeqAlignIndexPtr)amaip;
1195    amaip->aligned = (Boolean PNTR)MemNew((amaip->numsaps)*sizeof(Boolean));
1196    for (i=0; i<amaip->numsaps; i++)
1197    {
1198       amaip->aligned[i] = TRUE;
1199    }
1200    return TRUE;
1201 }
1203 /* SECTION 2c */
1204 /***************************************************************************
1205 *
1206 *  AlnMgr2IndexSeqAlign takes a seqalign of any type except std-seg and
1207 *  creates indexes on it for easy retrieval of useful information by other
1208 *  AlnMgr2 functions. If the seqalign is a single alignment, that alignment
1209 *  gets a simple index and is left alone otherwise. If the seqalign is
1210 *  a set of alignments or a dense-diag set, the subalignments get
1211 *  individually indexed and then are combined into a (fake) multiple
1212 *  alignment which also gets indexed. The subalignments can now be accessed
1213 *  as a multiple alignment by AlnMgr2 functions.
1214 *
1215 ***************************************************************************/
AlnMgr2IndexSeqAlign(SeqAlignPtr sap)1217 NLM_EXTERN void AlnMgr2IndexSeqAlign(SeqAlignPtr sap)
1218 {
1219    AlnMgr2IndexSeqAlignEx(sap, TRUE);
1220 }
AlnMgr2IndexSeqAlignEx(SeqAlignPtr sap,Boolean replace_gi)1222 NLM_EXTERN void AlnMgr2IndexSeqAlignEx(SeqAlignPtr sap, Boolean replace_gi)
1223 {
1224    AMAlignIndex2Ptr  amaip;
1225    AMIntervalSetPtr  amint;
1226    AMIntervalSetPtr  amint_head;
1227    AMEdgePtr         edge;
1228    AMEdgePtr         edge_head;
1229    Int4              i;
1230    Int4              numvertices;
1231    AMVertexPtr       vertex_head;
1232    AMVertexPtr       PNTR vertexarray;
1234    if (sap == NULL || sap->saip != NULL)
1235       return;
1236    if (replace_gi) {
1237      SAM_ReplaceGI(sap);
1238    }
1240    if (sap->next == NULL && sap->segtype == SAS_DENSEG)
1241    {
1242      AlnMgr2IndexSingleChildSeqAlign(sap);
1243      return;
1244    }
1246    AlnMgr2IndexLite(sap);
1247    AlnMgr2DecomposeToPairwise(sap);
1248    amaip = (AMAlignIndex2Ptr)(sap->saip);
1249    amaip->alnstyle = AM2_FULLINDEX;
1250    AlnMgr2HidePairwiseConflicts(sap);
1251    amint_head = AlnMgr2MakeIntervals(sap);
1252    vertex_head = NULL;
1253    edge_head = NULL;
1254    vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1255    while (amint_head != NULL)
1256    {
1257       amint = amint_head->next;
1258       AMIntervalSetFree(amint_head);
1259       amint_head = amint;
1260    }
1261    AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1262    AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1263    for (i=0; i<numvertices; i++)
1264    {
1265       SeqIdFree(vertexarray[i]->sip);
1266       MemFree(vertexarray[i]);
1267    }
1268    MemFree(vertexarray);
1269    while (edge_head != NULL)
1270    {
1271       edge = edge_head->next;
1272       MemFree(edge_head);
1273       edge_head = edge;
1274    }
1275    amaip = (AMAlignIndex2Ptr)(sap->saip);
1276    amaip->alnstyle = AM2_FULLINDEX;
1277 }
1279 /* SECTION 2c */
1280 /***************************************************************************
1281 *
1282 *  AlnMgr2ReIndexSeqAlign takes an indexed alignment (that has, presumably,
1283 *  been changed), makes sure all child seqaligns are indexed (if they are
1284 *  already indexed they are not reindexed), and reindexes all the child
1285 *  seqaligns as a set.
1286 *
1287 ***************************************************************************/
AlnMgr2ReIndexSeqAlign(SeqAlignPtr sap)1288 NLM_EXTERN void AlnMgr2ReIndexSeqAlign(SeqAlignPtr sap)
1289 {
1290    AMAlignIndex2Ptr   amaip;
1291    AMIntervalSetPtr  amint;
1292    AMIntervalSetPtr  amint_head;
1293    AMEdgePtr         edge_head;
1294    Int4              i;
1295    Int4              numvertices;
1296    AMVertexPtr       vertex_head;
1297    AMVertexPtr       PNTR vertexarray;
1299    if (sap == NULL)
1300       return;
1301    if (sap->saip == NULL)
1302    {
1303       AlnMgr2IndexSeqAlign(sap);
1304       return;
1305    }
1306    if (sap->saip->indextype == INDEX_CHILD)
1307       return;
1308    amaip = (AMAlignIndex2Ptr)(sap->saip);
1309    for (i=0; i<amaip->numsaps; i++)
1310    {
1311       if (amaip->saps[i]->saip == NULL)
1312          AlnMgr2IndexSingleChildSeqAlign(amaip->saps[i]);
1313    }
1314    if (amaip->alnstyle != AM2_LITE)
1315       return;
1316    AlnMgr2DecomposeToPairwise(sap);
1317    AlnMgr2HidePairwiseConflicts(sap);
1318    amint_head = AlnMgr2MakeIntervals(sap);
1319    vertex_head = NULL;
1320    edge_head = NULL;
1321    vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1322    while (amint_head != NULL)
1323    {
1324       amint = amint_head->next;
1325       AMIntervalSetFree(amint_head);
1326       amint_head = amint;
1327    }
1328    AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1329    AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1330    MemFree(vertexarray);
1331 }
AlnMgr2CompareByAnchor(VoidPtr ptr1,VoidPtr ptr2)1333 static int LIBCALLBACK AlnMgr2CompareByAnchor(VoidPtr ptr1, VoidPtr ptr2)
1334 {
1335    DenseSegPtr  dsp;
1336    int          ret;
1337    SAIndex2Ptr  saip1;
1338    SAIndex2Ptr  saip2;
1339    SeqAlignPtr  sap1;
1340    SeqAlignPtr  sap2;
1341    SeqIdPtr     sip1;
1342    SeqIdPtr     sip2;
1343    Int4         start1;
1344    Int4         start2;
1345    Int4         stop1;
1346    Int4         stop2;
1348    sap1 = *((SeqAlignPtr PNTR)ptr1);
1349    sap2 = *((SeqAlignPtr PNTR)ptr2);
1350    saip1 = (SAIndex2Ptr)(sap1->saip);
1351    saip2 = (SAIndex2Ptr)(sap2->saip);
1352    dsp = (DenseSegPtr)(sap1->segs);
1353    if (saip1->tmp == 1)
1354       sip1 = dsp->ids->next;
1355    else
1356       sip1 = dsp->ids;
1357    dsp = (DenseSegPtr)(sap2->segs);
1358    if (saip2->tmp == 1)
1359       sip2 = dsp->ids->next;
1360    else
1361       sip2 = dsp->ids;
1362    ret = AlnMgr2OrderSeqIds(sip1, sip2);
1363    if (ret != 0)
1364       return ret;
1365    /* these share both ids -- put best first */
1366    if (saip1->score == 0)
1367       saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
1368    if (saip2->score == 0)
1369       saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
1370    if (saip1->score > saip2->score)
1371       return -1;
1372    else if (saip1->score < saip2->score)
1373       return 1;
1374    AlnMgr2GetNthSeqRangeInSA(sap1, saip1->tmp, &start1, &stop1);
1375    AlnMgr2GetNthSeqRangeInSA(sap2, saip2->tmp, &start2, &stop2);
1376    if (start1 < start2)
1377       return -1;
1378    else if (start1 > start2)
1379       return 1;
1380    else if (stop1 > stop2)
1381       return -1;
1382    else if (stop1 < stop2)
1383       return 1;
1384    return 0;
1385 }
1387 /* SECTION 2c */
AlnMgr2IndexAsRows(SeqAlignPtr sap,Uint1 strand,Boolean truncate)1388 NLM_EXTERN Boolean AlnMgr2IndexAsRows(SeqAlignPtr sap, Uint1 strand, Boolean truncate)
1389 {
1390    AMAlignIndex2Ptr  amaip;
1391    DenseSegPtr       dsp;
1392    DenseSegPtr       dsp_tmp;
1393    Boolean           found;
1394    Int4              i;
1395    Boolean           impossible;
1396    Int4              numsaps;
1397    SAIndex2Ptr       saip;
1398    SeqAlignPtr       salp;
1399    SeqAlignPtr       sap_head;
1400    SeqAlignPtr       sap_prev;
1401    SeqAlignPtr       sap_tmp;
1402    SeqAlignPtr       PNTR saparray;
1403    SeqAlignPtr       set_head;
1404    SeqAlignPtr       set_prev;
1405    SeqIdPtr          sharedsip;
1406    SeqIdPtr          sip;
1407    SeqIdPtr          sip_next;
1408    SeqIdPtr          sip_tmp;
1409    Int4              tmp;
1411    if (sap == NULL)
1412       return FALSE;
1413    if (sap->saip != NULL)
1414       AMAlignIndexFreeEitherIndex(sap);
1415    AlnMgr2IndexLite(sap);
1416    AlnMgr2DecomposeToPairwise(sap);
1417    /* need to figure out which row is shared by all saps */
1418    sap_tmp = (SeqAlignPtr)(sap->segs);
1419    dsp = (DenseSegPtr)(sap_tmp->segs);
1420    sip = dsp->ids;
1421    found = FALSE;
1422    while (!found && sip != NULL)
1423    {
1424       sap_tmp = (SeqAlignPtr)(sap->segs);
1425       sip_next = sip->next;
1426       sip->next = NULL;
1427       impossible = FALSE;
1428       while (!impossible && sap_tmp != NULL)
1429       {
1430          dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1431          if (AlnMgr2SeqIdListsOverlap(sip, dsp_tmp->ids) == NULL)
1432             impossible = TRUE;
1433          sap_tmp = sap_tmp->next;
1434       }
1435       sip->next = sip_next;
1436       if (!impossible) /* found one that matched a row in every alignment */
1437          found = TRUE;
1438       else
1439          sip = sip_next;
1440    }
1441    if (!found) /* didn't find a seqid that was contained in all alignments */
1442       return FALSE;
1443    /* mark the shared row to make things easier */
1444    sharedsip = SeqIdDup(sip);
1445    sap_tmp = (SeqAlignPtr)(sap->segs);
1446    i = 0;
1447    while (sap_tmp != NULL)
1448    {
1449       saip = (SAIndex2Ptr)(sap_tmp->saip);
1450       dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1451       if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1452          saip->tmp = 1;
1453       else
1454          saip->tmp = 2;
1455       sap_tmp = sap_tmp->next;
1456       i++;
1457    }
1458    saparray = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
1459    sap_tmp = (SeqAlignPtr)(sap->segs);
1460    i = 0;
1461    while (sap_tmp != NULL)
1462    {
1463       saparray[i] = sap_tmp;
1464       i++;
1465       sap_tmp = sap_tmp->next;
1466    }
1467    numsaps = i;
1468    HeapSort(saparray, i, sizeof(SeqAlignPtr), AlnMgr2CompareByAnchor);
1469    /* now each clump of alignments is a row -- need to eliminate overlaps next */
1470    sip = NULL;
1471    i = 0;
1472    sap_head = sap_prev = NULL;
1473    while (i<numsaps)
1474    {
1475       saparray[i]->next = NULL;
1476       set_head = set_prev = saparray[i];
1477       saip = (SAIndex2Ptr)(saparray[i]->saip);
1478       sip = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp); /* get other seqid */
1479       i++;
1480       if (i<numsaps)
1481          sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1482       while (i<numsaps && SeqIdComp(sip, sip_tmp) == SIC_YES)
1483       {
1484          set_prev->next = saparray[i];
1485          set_prev = saparray[i];
1486          saparray[i]->next = NULL;
1487          i++;
1488          SeqIdFree(sip_tmp);
1489          if (i<numsaps)
1490             sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1491       }
1492       AlnMgr2IndexLite(set_head);
1493       if (!truncate)
1494          AlnMgr2RemoveInconsistentAlnsFromSet(set_head, 0);
1495       else
1496          AlnMgr2RemoveInconsistentAlnsFromSet(set_head, -1);
1497       sap_tmp = (SeqAlignPtr)(set_head->segs);
1498       while (sap_tmp != NULL)
1499       {
1500          saip = (SAIndex2Ptr)(sap_tmp->saip);
1501          dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1502          if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1503             saip->tmp = 1;
1504          else
1505             saip->tmp = 2;
1506          sap_tmp = sap_tmp->next;
1507       }
1508       if (sap_head != NULL)
1509          sap_prev->next = set_head;
1510       else
1511          sap_head = sap_prev = set_head;
1512       while (sap_prev->next != NULL)
1513       {
1514          sap_prev = sap_prev->next;
1515       }
1516       sap_prev->next = NULL;
1517    }
1518    /* now we have lots of freed pointers sitting in the array */
1519    MemFree(saparray);
1520    saparray = NULL;
1521    /* sap_head is the head of a chain of LITE-indexed alignments, each of which is one row */
1522    /* first make sure that the shared row is on the requested strand */
1523    sap_tmp = sap_head;
1524    if (strand == Seq_strand_both || strand == Seq_strand_unknown || strand == 0)
1525       strand = Seq_strand_plus;
1526    while (sap_tmp != NULL)
1527    {
1528       salp = (SeqAlignPtr)(sap_tmp->segs);
1529       saip = (SAIndex2Ptr)(salp->saip);
1530       /* strand is same for all children */
1531       if (AlnMgr2GetNthStrand(salp, saip->tmp) != strand)
1532       {
1533          SeqAlignListReverseStrand(salp);
1534          while (salp != NULL)
1535          {
1536             saip = (SAIndex2Ptr)salp->saip;
1537             tmp = saip->tmp;
1538             SAIndex2Free2(salp->saip);
1539             salp->saip = NULL;
1540             AlnMgr2IndexSingleChildSeqAlign(salp);
1541             saip = (SAIndex2Ptr)salp->saip;
1542             saip->tmp = tmp;
1543             salp = salp->next;
1544          }
1545       }
1546       sap_tmp = sap_tmp->next;
1547    }
1548    sap_tmp = sap_head;
1549    sap->segs = NULL;
1550    AMAlignIndex2Free2(sap->saip);
1551    sap->saip = (SeqAlignIndexPtr)AMAlignIndex2New();
1552    amaip = (AMAlignIndex2Ptr)(sap->saip);
1553    amaip->alnstyle = AM2_FULLINDEX;
1554    set_head = set_prev = NULL;
1555    while (sap_tmp != NULL)
1556    {
1557       salp = (SeqAlignPtr)(sap_tmp->segs);
1558       while (salp != NULL)
1559       {
1560          AlnMgr2AddInNewPairwiseSA(sap, salp);
1561          if (set_head != NULL)
1562          {
1563             set_prev->next = salp;
1564             set_prev = salp;
1565          } else
1566             set_head = set_prev = salp;
1567          salp = salp->next;
1568       }
1569       sap_tmp->segs = NULL;
1570       sap_tmp = sap_tmp->next;
1571    }
1572    AlnMgr2CondenseColumns((DenseSegPtr)(amaip->sharedaln->segs));
1573    AlnMgr2IndexSingleChildSeqAlign(amaip->sharedaln);
1574    set_prev->next = NULL;
1575    sap->segs = (Pointer)(set_head);
1576    SeqAlignListFree(sap_head);
1577    SeqIdFree(sharedsip);
1578    return TRUE;
1579 }
1581 /* SECTION 2c */
1582 /***************************************************************************
1583 *
1584 *  AlnMgr2IndexIndexedChain takes a linked list of indexed seqaligns
1585 *  and does an in-place transformation to an indexed parent-child
1586 *  seqalign set.
1587 *
1588 ***************************************************************************/
AlnMgr2IndexIndexedChain(SeqAlignPtr sap)1589 NLM_EXTERN void AlnMgr2IndexIndexedChain(SeqAlignPtr sap)
1590 {
1591    AMAlignIndex2Ptr  amaip;
1592    AMIntervalSetPtr  amint;
1593    AMIntervalSetPtr  amint_head;
1594    AMEdgePtr         edge_head;
1595    Int4              numvertices;
1596    AMVertexPtr       vertex_head;
1597    AMVertexPtr       PNTR vertexarray;
1599    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
1600       return;
1601    AlnMgr2IndexLite(sap);
1602    AlnMgr2DecomposeToPairwise(sap);
1603    amaip = (AMAlignIndex2Ptr)(sap->saip);
1604    amaip->alnstyle = AM2_FULLINDEX;
1605    AlnMgr2HidePairwiseConflicts(sap);
1606    amint_head = AlnMgr2MakeIntervals(sap);
1607    vertex_head = NULL;
1608    edge_head = NULL;
1609    vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1610    while (amint_head != NULL)
1611    {
1612       amint = amint_head->next;
1613       AMIntervalSetFree(amint_head);
1614       amint_head = amint;
1615    }
1616    AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1617    AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1618    MemFree(vertexarray);
1619 }
1621 /* SECTION 2c */
1622 /***************************************************************************
1623 *
1624 *  AlnMgr2DecomposeToPairwise takes a parent seqalign and goes through all
1625 *  its children, checking their dimensions. If a child seqalign is found
1626 *  with dimension greater than 2, that alignment is copied into a set of
1627 *  two-row alignments, each new alignment containing the first row of the
1628 *  original alignment and a different row. This function does NOT take out
1629 *  segs with only gaps (is this a problem????). The resulting seqaligns
1630 *  are all individually indexed and then the whole set is indexed lite.
1631 *
1632 ***************************************************************************/
AlnMgr2DecomposeToPairwise(SeqAlignPtr sap)1633 static void AlnMgr2DecomposeToPairwise(SeqAlignPtr sap)
1634 {
1635    DenseSegPtr  dsp;
1636    DenseSegPtr  dsp_orig;
1637    Int4         i;
1638    Int4         j;
1639    Int4         n;
1640    SAIndex2Ptr   saip;
1641    SAIndex2Ptr   saip_orig;
1642    SeqAlignPtr  salp;
1643    SeqAlignPtr  salp_new;
1644    SeqAlignPtr  salp_next;
1645    SeqAlignPtr  salp_prev;
1647    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1648       return;
1649    salp = (SeqAlignPtr)(sap->segs);
1650    salp_prev = NULL;
1651    while (salp)
1652    {
1653       n = AlnMgr2GetNumRows(salp);
1654       if (n > 2)
1655       {
1656          salp_next = salp->next;
1657          saip_orig = (SAIndex2Ptr)(salp->saip);
1658          for (i=2; i<=n; i++)
1659          {
1660             salp_new = SeqAlignNew();
1661             dsp_orig = (DenseSegPtr)(salp->segs);
1662             dsp = DenseSegNew();
1663             dsp->dim = 2;
1664             dsp->numseg = dsp_orig->numseg;
1665             dsp->ids = AlnMgr2GetNthSeqIdPtr(salp, 1);
1666             dsp->ids->next = AlnMgr2GetNthSeqIdPtr(salp, i);
1667             dsp->starts = (Int4Ptr)MemNew(dsp->numseg*2*sizeof(Int4));
1668             dsp->lens = (Int4Ptr)MemNew(dsp->numseg*sizeof(Int4));
1669             dsp->strands = (Uint1Ptr)MemNew(dsp->numseg*2*sizeof(Uint1));
1670             for (j=0; j<dsp->numseg; j++)
1671             {
1672                dsp->lens[j] = dsp_orig->lens[j];
1673                dsp->starts[2*j] = dsp_orig->starts[dsp_orig->dim*j];
1674                dsp->starts[2*j+1] = dsp_orig->starts[dsp_orig->dim*j+i-1];
1675                dsp->strands[2*j] = dsp_orig->strands[dsp_orig->dim*j];
1676                dsp->strands[2*j+1] = dsp_orig->strands[dsp_orig->dim*j+i-1];
1677             }
1678             salp_new = SeqAlignNew();
1679             salp_new->dim = 2;
1680             salp_new->segs = (Pointer)dsp;
1681             salp_new->segtype = SAS_DENSEG;
1682             AlnMgr2IndexSingleChildSeqAlign(salp_new);
1683             saip = (SAIndex2Ptr)(salp_new->saip);
1684             saip->numinchain = saip_orig->numinchain;
1685             saip->numsplitaln = i-1;
1686             if (salp_prev == NULL)
1687             {
1688                salp_prev = salp_new;
1689                sap->segs = (Pointer)salp_new;
1690             } else
1691             {
1692                salp_prev->next = salp_new;
1693                salp_prev = salp_new;
1694             }
1695          }
1696          salp_prev->next = salp_next;
1697          salp->next = NULL;
1698          SeqAlignFree(salp);
1699          salp = salp_next;
1700       } else
1701       {
1702          salp_prev = salp;
1703          salp = salp->next;
1704       }
1705    }
1706    AMAlignIndex2Free2(sap->saip);
1707    sap->saip = NULL;
1708    AlnMgr2IndexLite(sap);
1709 }
1711 /* SECTION 2c */
1712 /***************************************************************************
1713 *
1714 *  AlnMgr2HidePairwiseConflicts looks through a set of indexed seqaligns
1715 *  to find pairs of alignments that share the same seqids and that provide
1716 *  conflicting information. These pairs are then sent to AlnMgr2TossWorse,
1717 *  which hides the worse alignment by unaligning it. Note that the hidden
1718 *  alignments are not destroyed and are not taken out of the set.
1719 *
1720 ***************************************************************************/
AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap)1721 static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap)
1722 {
1723    AMAlignIndex2Ptr  amaip;
1724    Int4             i;
1725    Boolean          inset;
1726    Int4             j;
1727    Boolean          match;
1728    SeqIdPtr         sip11;
1729    SeqIdPtr         sip12;
1730    SeqIdPtr         sip21;
1731    SeqIdPtr         sip22;
1732    Int4             start11;
1733    Int4             start12;
1734    Int4             start21;
1735    Int4             start22;
1736    Int4             stop11;
1737    Int4             stop12;
1738    Int4             stop21;
1739    Int4             stop22;
1740    Int4Ptr          tossed;
1742    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1743       return;
1744    amaip = (AMAlignIndex2Ptr)(sap->saip);
1745    AlnMgr2SortBySeqId(sap);
1746    tossed = (Int4Ptr)MemNew(amaip->numsaps*sizeof(Int4));
1747    for (i=0; i<amaip->numsaps-1; i++)
1748    {
1749       for (j=0; j<amaip->numsaps; j++)
1750       {
1751          tossed[j] = 0;
1752       }
1753       inset = TRUE;
1754       for (j=i+1; amaip->aligned[i] && j<amaip->numsaps && inset == TRUE; j++)
1755       {
1756          if (tossed[j] == 0 && amaip->aligned[i] && amaip->aligned[j])
1757          {
1758             sip11 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], 1);
1759             sip12 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], 2);
1760             sip21 = AlnMgr2GetNthSeqIdPtr(amaip->saps[j], 1);
1761             sip22 = AlnMgr2GetNthSeqIdPtr(amaip->saps[j], 2);
1762             match = FALSE;
1763             if (SeqIdComp(sip11, sip21) == SIC_YES && SeqIdComp(sip12, sip22) == SIC_YES)
1764             {
1765                match = TRUE;
1766                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, &start11, &stop11);
1767                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &start12, &stop12);
1768                AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 1, &start21, &stop21);
1769                AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 2, &start22, &stop22);
1770             } else if (SeqIdComp(sip11, sip22) == SIC_YES && SeqIdComp(sip12, sip21) == SIC_YES)
1771             {
1772                match = TRUE;
1773                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, &start11, &stop11);
1774                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &start12, &stop12);
1775                AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 2, &start21, &stop21);
1776                AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 1, &start22, &stop22);
1777             } else if (SeqIdComp(sip11, sip21) != SIC_YES && SeqIdComp(sip11, sip22) != SIC_YES)
1778                inset = FALSE;
1779             if (match == TRUE)
1780             {
1781                if ((start11 < start21 && stop11 > stop21) || (start11 < stop21 && stop11 > stop21) || (start11 > start21 && stop11 < stop21))
1782                {
1783                   AlnMgr2TossWorse(sap, i, j);
1784                   if (amaip->aligned[j] == 0) /* j just got tossed -- put it in the list */
1785                      tossed[j] = 1;
1786                }
1787             }
1788             SeqIdFree(sip11);
1789 	    SeqIdFree(sip12);
1790 	    SeqIdFree(sip21);
1791 	    SeqIdFree(sip22);
1792          }
1793       }
1794       if (amaip->aligned[i] == 0) /* the query alignment got tossed -- restore */
1795       {                           /* all the ones that it tossed out */
1796          for (j=0; j<amaip->numsaps; j++)
1797          {
1798             if (tossed[j] == 1)
1799                amaip->aligned[j] = 1;
1800          }
1801       }
1802    }
1803    MemFree(tossed);
1804 }
1806 /* SECTION 2c */
AlnMgr2SortBySeqId(SeqAlignPtr sap)1807 static void AlnMgr2SortBySeqId(SeqAlignPtr sap)
1808 {
1809    AMAlignIndex2Ptr  amaip;
1810    Int4             i;
1811    SAIndex2Ptr       saip;
1813    amaip = (AMAlignIndex2Ptr)(sap->saip);
1814    for (i=0; i<amaip->numsaps; i++)
1815    {
1816       saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
1817       saip->aligned = amaip->aligned[i];
1818    }
1819    HeapSort(amaip->saps, amaip->numsaps, sizeof(amaip->saps), AlnMgr2CompareIds);
1820    for (i=0; i<amaip->numsaps; i++)
1821    {
1822       saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
1823       amaip->aligned[i] = saip->aligned;
1824    }
1825 }
1827 /* SECTION 2c */
AlnMgr2CompareIds(VoidPtr ptr1,VoidPtr ptr2)1828 static int LIBCALLBACK AlnMgr2CompareIds(VoidPtr ptr1, VoidPtr ptr2)
1829 {
1830    Int4         ret;
1831    SAIndex2Ptr  saip1;
1832    SAIndex2Ptr  saip2;
1833    SeqAlignPtr  sap1;
1834    SeqAlignPtr  sap2;
1835    SeqIdPtr     sip1;
1836    SeqIdPtr     sip2;
1838    if (ptr1 == NULL || ptr2 == NULL)
1839       return 0;
1840    sap1 = *((SeqAlignPtr PNTR) ptr1);
1841    sap2 = *((SeqAlignPtr PNTR) ptr2);
1842    sip1 = AlnMgr2GetNthSeqIdPtr(sap1, 1);
1843    sip2 = AlnMgr2GetNthSeqIdPtr(sap2, 1);
1844    ret = (AlnMgr2OrderSeqIds(sip1, sip2));
1845    SeqIdFree(sip1);
1846    SeqIdFree(sip2);
1847    if (ret != 0)
1848       return ret;
1849    saip1 = (SAIndex2Ptr)(sap1->saip);
1850    saip2 = (SAIndex2Ptr)(sap2->saip);
1851    if (saip1->score == 0)
1852       saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
1853    if (saip2->score == 0)
1854       saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
1855    if (saip1->score > saip2->score)
1856       return -1;
1857    if (saip1->score < saip2->score)
1858       return 1;
1859    return 0;
1860 }
1862 /* SECTION 2c */
1863 /***************************************************************************
1864 *
1865 *  Given an indexed seqalign set, AlnMgr2TossWorse looks at the indicated
1866 *  pair of seqaligns, gets their scores, and sets the unaligned bit of the
1867 *  seqalign with the worse score.
1868 *
1869 ***************************************************************************/
AlnMgr2TossWorse(SeqAlignPtr sap,Int4 i,Int4 j)1870 static void AlnMgr2TossWorse(SeqAlignPtr sap, Int4 i, Int4 j)
1871 {
1872    AMAlignIndex2Ptr  amaip;
1873    SAIndex2Ptr       saip1;
1874    SAIndex2Ptr       saip2;
1875    Int4              score1;
1876    Int4              score2;
1878    amaip = (AMAlignIndex2Ptr)(sap->saip);
1879    saip1 = (SAIndex2Ptr)(amaip->saps[i]->saip);
1880    saip2 = (SAIndex2Ptr)(amaip->saps[j]->saip);
1881    if (saip1->score == 0)
1882       saip1->score = score1 = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[i]);
1883    else
1884       score1 = saip1->score;
1885    if (saip1->score == 0)
1886       saip2->score = score2 = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[j]);
1887    else
1888       score2 = saip2->score;
1889    if (score1 >= score2)
1890       amaip->aligned[j] = FALSE;
1891    else if (score2 > score1)
1892       amaip->aligned[i] = FALSE;
1893 }
1895 /* SECTION 2c */
1896 /***************************************************************************
1897 *
1898 *  AlnMgr2MakeIntervals takes every row from every seqalign and bins it
1899 *  with other sequences with the same seqid and the same strand.
1900 *
1901 ***************************************************************************/
AlnMgr2MakeIntervals(SeqAlignPtr sap)1902 static AMIntervalSetPtr AlnMgr2MakeIntervals(SeqAlignPtr sap)
1903 {
1904    AMAlignIndex2Ptr   amaip;
1905    AMIntervalSetPtr  amint;
1906    AMIntervalSetPtr  amint_head;
1907    AMIntervalSetPtr  amint_prev;
1908    Boolean           found;
1909    Int4              i;
1910    AMIntervalPtr     intv;
1911    AMIntervalPtr     int_prev;
1912    Int4              j;
1913    Int4              k;
1914    SeqIdPtr          sip;
1915    Uint1             strand;
1917    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1918       return NULL;
1919    amaip = (AMAlignIndex2Ptr)(sap->saip);
1920    amint_head = amint_prev = NULL;
1921    for (i=0; i<amaip->numsaps; i++)
1922    {
1923       if (amaip->aligned[i])
1924       {
1925          j = AlnMgr2GetNumRows(amaip->saps[i]);
1926          for (k=0; k<j; k++)
1927          {
1928             intv = (AMIntervalPtr)MemNew(sizeof(AMInterval));
1929             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], k+1, &(intv->from), &(intv->to));
1930             sip = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], k+1);
1931             strand = AlnMgr2GetNthStrand(amaip->saps[i], k+1);
1932             if (strand != Seq_strand_minus)
1933                strand = Seq_strand_plus; /* to avoid dealing with Seq_strand_unknown */
1934             intv->strand = strand;
1935             if (amint_head != NULL) /* figure out which interval set this goes in */
1936             {
1937                amint = amint_head;
1938                found = FALSE;
1939                while (amint != NULL && !found)
1940                {
1941                   if (SeqIdComp(sip, amint->sip) == SIC_YES && strand == amint->strand)
1942                      found = TRUE;
1943                   else
1944                      amint = amint->next;
1945                }
1946                if (found) /* add this to the interval set matched */
1947                {
1948                   int_prev = amint->int_head;
1949                   while (int_prev->next != NULL)
1950                   {
1951                      int_prev = int_prev->next;
1952                   }
1953                   int_prev->next = intv;
1954                } else /* make a new interval set */
1955                {
1956                   amint = (AMIntervalSetPtr)MemNew(sizeof(AMIntervalSet));
1957                   amint->sip = SeqIdDup(sip);
1958                   amint->strand = strand;
1959                   amint->int_head = intv;
1960                   amint_prev = amint_head;
1961                   while (amint_prev->next != NULL)
1962                   {
1963                      amint_prev = amint_prev->next;
1964                   }
1965                   amint_prev->next = amint;
1966                }
1967             } else  /* make a new interval set */
1968             {
1969                amint = (AMIntervalSetPtr)MemNew(sizeof(AMIntervalSet));
1970                amint->sip = SeqIdDup(sip);
1971                amint->strand = strand;
1972                amint->int_head = intv;
1973                amint_head = amint;
1974             }
1975             SeqIdFree(sip);
1976          }
1977       }
1978    }
1979    return amint_head;
1980 }
1982 /* SECTION 2c */
1983 /***************************************************************************
1984 *
1985 *  AlnMgr2MakeVerticesFromIntervals takes the set of intervals created from
1986 *  the alignments, and makes nonoverlapping vertices. Each vertex is a
1987 *  single seqid plus a start and stop (so one seqid may have more than one
1988 *  vertex). Each vertex is also associated with edges, or alignments, which
1989 *  link the vertices together. An edge is simply two vertices plus a weight,
1990 *  which is the alignment quality score. This function creates the vertices,
1991 *  then creates the edges, and sorts the edges and vertices by quality and
1992 *  by number of edges per vertex.
1993 *
1994 ***************************************************************************/
AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap,AMIntervalSetPtr amint_head,AMVertexPtr PNTR vertexhead,AMEdgePtr PNTR edgehead,Int4Ptr numvertices)1995 static AMVertexPtr PNTR  AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap, AMIntervalSetPtr amint_head, AMVertexPtr PNTR vertexhead, AMEdgePtr PNTR edgehead, Int4Ptr numvertices)
1996 {
1997    AMAlignIndex2Ptr   amaip;
1998    AMIntervalSetPtr  amint;
1999    AMEdgePtr         edge;
2000    AMEdgePtr         edge_head;
2001    AMEdgePtr         edge_prev;
2002    Int4              i;
2003    AMIntervalPtr     intv;
2004    Int4              j;
2005    Int4              k;
2006    Int4              n;
2007    SAIndex2Ptr       saip;
2008    SeqIdPtr          sip1;
2009    SeqIdPtr          sip2;
2010    Int4              start;
2011    Int4              stop;
2012    Int4              v1;
2013    Int4              v2;
2014    AMVertexPtr       vertex;
2015    AMVertexPtr       vertex_head;
2016    AMVertexPtr       vertex_prev;
2017    AMVertexPtr       PNTR vertexarray;
2019    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
2020       return NULL;
2021    amint = amint_head;
2022    vertex_head = vertex_prev = NULL;
2023    while (amint != NULL)
2024    {
2025       AlnMgr2SortIntervals(amint);
2026       vertex = (AMVertexPtr)MemNew(sizeof(AMVertex));
2027       intv = amint->int_head;
2028       vertex->sip = SeqIdDup(amint->sip);
2029       vertex->strand = amint->strand;
2030       vertex->from = intv->from;
2031       vertex->to = intv->to;
2032       intv = intv->next;
2033       while (intv != NULL)
2034       {
2035          if ((intv->from <= vertex->to && intv->from >= vertex->from) || (intv->to <= vertex->to && intv->to >= vertex->from))
2036          {
2037             if (intv->from < vertex->from)
2038                vertex->from = intv->from;
2039             if (intv->to > vertex->to)
2040                vertex->to = intv->to;
2041          } else
2042          {
2043             if (vertex_head != NULL)
2044             {
2045                vertex_prev->next = vertex;
2046                vertex_prev = vertex;
2047             } else
2048                vertex_head = vertex_prev = vertex;
2049             vertex = (AMVertexPtr)MemNew(sizeof(AMVertex));
2050             vertex->from = intv->from;
2051             vertex->to = intv->to;
2052             vertex->sip = SeqIdDup(amint->sip);
2053             vertex->strand = amint->strand;
2054          }
2055          intv = intv->next;
2056       }
2057       if (vertex_head != NULL)
2058       {
2059          vertex_prev->next = vertex;
2060          vertex_prev = vertex;
2061       } else
2062          vertex_head = vertex_prev = vertex;
2063       amint = amint->next;
2064    }
2065    vertex = vertex_head;
2066    i = 0;
2067    while (vertex != NULL)
2068    {
2069       i++;
2070       vertex = vertex->next;
2071    }
2072    vertexarray = (AMVertexPtr PNTR)MemNew(i*sizeof(AMVertexPtr));
2073    *numvertices = i;
2074    vertex = vertex_head;
2075    i = 0;
2076    while (vertex != NULL)
2077    {
2078       vertexarray[i] = vertex;
2079       vertex = vertex->next;
2080       i++;
2081    }
2082    amaip = (AMAlignIndex2Ptr)(sap->saip);
2083    /* now make the edges from the alignments */
2084    edge_head = NULL;
2085    for (i=0; i<amaip->numsaps; i++)
2086    {
2087       if (amaip->aligned[i])
2088       {
2089          j = AlnMgr2GetNumRows(amaip->saps[i]);
2090          for (k=0; k<j; k++)
2091          {
2092             sip1 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], k+1);
2093             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], k+1, &start, &stop);
2094             v1 = AlnMgr2MatchToVertex(sip1, start, stop, vertexarray, *numvertices);
2095             for (n=k+1; n<j; n++)
2096             {
2097                vertexarray[v1]->numedges++;
2098                sip2 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], n+1);
2099                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n+1, &start, &stop);
2100                v2 = AlnMgr2MatchToVertex(sip2, start, stop, vertexarray, *numvertices);
2101                vertexarray[v2]->numedges++;
2102                edge = (AMEdgePtr)MemNew(sizeof(AMEdge));
2103                edge->vertex1 = v1;
2104                edge->vertex2 = v2;
2105                saip = NULL;
2106                if (amaip->saps[i]->saip != NULL)
2107                   saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
2108                if (saip != NULL && saip->score != 0)
2109                   edge->weight = saip->score;
2110                else
2111                   edge->weight = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[i]);
2112                edge->sap = amaip->saps[i];
2113                edge->used = 0;
2114                if (edge_head != NULL)
2115                {
2116                   edge_prev->next = edge;
2117                   edge_prev = edge;
2118                } else
2119                   edge_head = edge_prev = edge;
2120                SeqIdFree(sip2);
2121             }
2122             SeqIdFree(sip1);
2123          }
2124       }
2125    }
2126    AlnMgr2SortEdgesByWeight(&edge_head);
2127    *vertexhead = vertexarray[0];
2128    *edgehead = edge_head;
2129    return vertexarray;
2130 }
2132 /* SECTION 2C */
2133 /***************************************************************************
2134 *
2135 *  AlnMgr2SortVerticesByNumEdges -- the name says it all -- each vertex is
2136 *  associated with one or more edges and the most populated vertices get
2137 *  put first.
2138 *
2139 ***************************************************************************/
AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray,Int4 numvertices)2140 static void AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray, Int4 numvertices)
2141 {
2142    Int4  i;
2144    HeapSort(vertexarray, numvertices, sizeof(vertexarray), AlnMgr2CompareVertices);
2145    for (i=0; i<numvertices-1; i++)
2146    {
2147       vertexarray[i]->next = vertexarray[i+1];
2148    }
2149    vertexarray[numvertices-1]->next = NULL;
2150 }
2152 /* SECTION 2c */
2153 /***************************************************************************
2154 *
2155 *  AlnMgr2CompareVertices is the HeapSort callback for
2156 *  AlnMgr2SortVerticesByNumEdges.
2157 *
2158 ***************************************************************************/
AlnMgr2CompareVertices(VoidPtr ptr1,VoidPtr ptr2)2159 static int LIBCALLBACK AlnMgr2CompareVertices(VoidPtr ptr1, VoidPtr ptr2)
2160 {
2161    AMVertexPtr  vertex1;
2162    AMVertexPtr  vertex2;
2164    if (ptr1 != NULL && ptr2 != NULL)
2165    {
2166       vertex1 = *((AMVertexPtr PNTR)ptr1);
2167       vertex2 = *((AMVertexPtr PNTR)ptr2);
2168       if (vertex1->numedges > vertex2->numedges)
2169          return -1;
2170       else if (vertex1->numedges < vertex2->numedges)
2171          return 1;
2172       else
2173          return 0;
2174    }
2175    return 0;
2176 }
2178 /* SECTION 2C */
2179 /***************************************************************************
2180 *
2181 *  AlnMgr2SortEdgesByWeight takes a set of edges (alignments) and sorts
2182 *  them by their preset weights (alignment scores), using AlnMgr2CompareEdges
2183 *  as its HeapSort callback.
2184 *
2185 ***************************************************************************/
AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head)2186 static void AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head)
2187 {
2188    AMEdgePtr  edge;
2189    AMEdgePtr  PNTR edgearray;
2190    Int4       i;
2191    Int4       j;
2193    if (edge_head == NULL || *edge_head == NULL)
2194       return;
2195    edge = *edge_head;
2196    i = 0;
2197    while (edge != NULL)
2198    {
2199       i++;
2200       edge = edge->next;
2201    }
2202    edgearray = (AMEdgePtr PNTR)MemNew(i*sizeof(AMEdgePtr));
2203    edge = *edge_head;
2204    i = 0;
2205    while (edge != NULL)
2206    {
2207       edgearray[i] = edge;
2208       edge = edge->next;
2209       i++;
2210    }
2211    HeapSort(edgearray, i, sizeof(edgearray), AlnMgr2CompareEdges);
2212    for (j=0; j<i-1; j++)
2213    {
2214       edgearray[j]->next = edgearray[j+1];
2215    }
2216    edgearray[i-1]->next = NULL;
2217    *edge_head = edgearray[0];
2218    MemFree(edgearray);
2219 }
2221 /* SECTION 2c */
2222 /***************************************************************************
2223 *
2224 *  AlnMgr2CompareEdges is the HeapSort callback for AlnMgr2SortEdgesByWeight.
2225 *  It simply compares the preset edge weights.
2226 *
2227 ***************************************************************************/
AlnMgr2CompareEdges(VoidPtr ptr1,VoidPtr ptr2)2228 static int LIBCALLBACK AlnMgr2CompareEdges(VoidPtr ptr1, VoidPtr ptr2)
2229 {
2230    AMEdgePtr  edge1;
2231    AMEdgePtr  edge2;
2233    if (ptr1 != NULL && ptr2 != NULL)
2234    {
2235       edge1 = *((AMEdgePtr PNTR)ptr1);
2236       edge2 = *((AMEdgePtr PNTR)ptr2);
2237       if (edge1->weight > edge2->weight)
2238          return -1;
2239       else if (edge1->weight < edge2->weight)
2240          return 1;
2241       else
2242          return 0;
2243    }
2244    return 0;
2245 }
2247 /* SECTION 2c */
2248 /***************************************************************************
2249 *
2250 *  AlnMgr2MatchToVertex is called by AlnMgr2MakeVerticesFromIntervals to
2251 *  figure out which vertex in the array the seqid, start, and stop match to.
2252 *
2253 ***************************************************************************/
AlnMgr2MatchToVertex(SeqIdPtr sip,Int4 start,Int4 stop,AMVertexPtr PNTR vertexarray,Int4 numvertices)2254 static Int4 AlnMgr2MatchToVertex(SeqIdPtr sip, Int4 start, Int4 stop, AMVertexPtr PNTR vertexarray, Int4 numvertices)
2255 {
2256    Int4  i;
2258    if (sip == NULL || vertexarray == NULL)
2259       return -1;
2260    i = 0;
2261    while (i<numvertices)
2262    {
2263       if (SeqIdComp(sip, vertexarray[i]->sip) == SIC_YES)
2264       {
2265          if (start >= vertexarray[i]->from && start <= vertexarray[i]->to && stop >= vertexarray[i]->from && stop <= vertexarray[i]->to)
2266             return i;
2267       }
2268       i++;
2269    }
2270    return -1;
2271 }
2273 /* SECTION 2c */
2274 /***************************************************************************
2275 *
2276 *  AlnMgr2SortIntervals sorts the AMIntervals by start position within the
2277 *  set, calling AlnMgr2CompareIntervals in a HeapSort.
2278 *
2279 ***************************************************************************/
AlnMgr2SortIntervals(AMIntervalSetPtr amint)2280 static void AlnMgr2SortIntervals(AMIntervalSetPtr amint)
2281 {
2282    Int4           i;
2283    AMIntervalPtr  PNTR intarray;
2284    AMIntervalPtr  intv;
2285    AMIntervalPtr  intv_head;
2286    Int4           j;
2288    i = 0;
2289    intv = amint->int_head;
2290    while (intv != NULL)
2291    {
2292       i++;
2293       intv = intv->next;
2294    }
2295    intarray = (AMIntervalPtr PNTR)MemNew(i*sizeof(AMIntervalPtr));
2296    intv = amint->int_head;
2297    i = 0;
2298    while (intv != NULL)
2299    {
2300       intarray[i] = intv;
2301       intv = intv->next;
2302       i++;
2303    }
2304    HeapSort(intarray, i, sizeof(intarray), AlnMgr2CompareIntervals);
2305    intv_head = intv = intarray[0];
2306    for (j=1; j<i; j++)
2307    {
2308       intv->next = intarray[j];
2309       intarray[j]->next = NULL;
2310       intv = intv->next;
2311    }
2312    amint->int_head = intv_head;
2313    MemFree(intarray);
2314 }
2316 /* SECTION 2c */
2317 /***************************************************************************
2318 *
2319 *  AlnMgr2CompareIntervals is the HeapSort callback for
2320 *  AlnMgr2SortIntervals, which sorts a set of AMIntervals by start position.
2321 *
2322 ***************************************************************************/
AlnMgr2CompareIntervals(VoidPtr ptr1,VoidPtr ptr2)2323 static int LIBCALLBACK AlnMgr2CompareIntervals(VoidPtr ptr1, VoidPtr ptr2)
2324 {
2325    AMIntervalPtr  intv1;
2326    AMIntervalPtr  intv2;
2328    if (ptr1 != NULL && ptr2 != NULL)
2329    {
2330       intv1 = *((AMIntervalPtr PNTR)ptr1);
2331       intv2 = *((AMIntervalPtr PNTR)ptr2);
2332       if (intv1->from > intv2->from)
2333          return 1;
2334       else if (intv1->from < intv2->from)
2335          return -1;
2336       else
2337       {
2338          if (intv1->to > intv2->to)
2339             return 1;
2340          else
2341             return -1;
2342       }
2343    }
2344    return 0;
2345 }
2347 /* SECTION 2c */
2348 /***************************************************************************
2349 *
2350 *  AlnMgr2UsePrimsAlgorithm takes the set of edges and vertices produced by
2351 *  earlier functions and creates a subset of edges that can be made into
2352 *  a multiple alignment.
2353 *
2354 ***************************************************************************/
AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray,Int4 numvertices,AMEdgePtr edge_head)2355 static void AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head)
2356 {
2357    if (vertexarray == NULL || edge_head == NULL)
2358       return;
2359    edge_head->used = AM_USED;
2360    vertexarray[edge_head->vertex1]->used = TRUE;
2361    vertexarray[edge_head->vertex2]->used = TRUE;
2362    AlnMgr2RecursePrims(vertexarray, edge_head);
2363    AlnMgr2CleanUpLeftovers(vertexarray, numvertices, edge_head);
2364    return;
2365 }
2367 /* SECTION 2C */
AlnMgr2GetEdgeList(Int4 vertexnum,AMEdgePtr edge_head,AMEdgePtr already_used)2368 static AMEdgePtr AlnMgr2GetEdgeList(Int4 vertexnum, AMEdgePtr edge_head, AMEdgePtr already_used)
2369 {
2370    AMEdgePtr  edge;
2371    AMEdgePtr  list;
2372    AMEdgePtr  list_head;
2373    AMEdgePtr  list_prev;
2375    edge = edge_head;
2376    list_head = NULL;
2377    while (edge != NULL)
2378    {
2379       if ((edge->vertex1 == vertexnum || edge->vertex2 == vertexnum) && edge != already_used)
2380       {
2381          list = (AMEdgePtr)MemNew(sizeof(AMEdge));
2382          list->vertex1 = edge->vertex1;
2383          list->vertex2 = edge->vertex2;
2384          list->weight = edge->weight;
2385          list->used = edge->used;
2386          if (list_head != NULL)
2387          {
2388             list_prev->next = list;
2389             list_prev = list;
2390          } else
2391             list_head = list_prev = list;
2392       }
2393       edge = edge->next;
2394    }
2395    return list_head;
2396 }
2398 /* SECTION 2C */
2399 /***************************************************************************
2400 *
2401 *  AlnMgr2GetBetterVertex returns the vertex of the edge indicated that
2402 *  is shared by the largest number of other edges.
2403 *
2404 ***************************************************************************/
AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray,AMEdgePtr edge)2405 static AMVertexPtr AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray, AMEdgePtr edge)
2406 {
2407    if (vertexarray[edge->vertex1]->numedges >= vertexarray[edge->vertex2]->numedges)
2408       return vertexarray[edge->vertex1];
2409    else
2410       return vertexarray[edge->vertex2];
2411 }
2413 /* SECTION 2C */
2414 /***************************************************************************
2415 *
2416 *  AlnMgr2RecursePrims is a simple yet powerful algorithm that builds a
2417 *  minimal spanning tree of the edges and vertexes by starting with a set
2418 *  of edges and vertices, picking the best/shortest edge, then picking
2419 *  other edges one by one that join a vertex in the set with a vertex not
2420 *  in the set, until all edges are used (or deemed impossible).
2421 *
2422 ***************************************************************************/
AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray,AMEdgePtr edge_head)2423 static void AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head)
2424 {
2425    AMEdgePtr  edge;
2426    Boolean    found;
2428    edge = edge_head;
2429    found = FALSE;
2430    /* find an edge that isn't used, that joins a vertex in the set */
2431    /* with a vertex outside the set, and add it and the new vertex */
2432    while (edge != NULL && !found)
2433    {
2434       if (edge->used == AM_NOTUSED)
2435       {
2436          if (vertexarray[edge->vertex1]->used != vertexarray[edge->vertex2]->used)
2437          {
2438             found = TRUE;
2439             vertexarray[edge->vertex1]->used = TRUE;
2440             vertexarray[edge->vertex2]->used = TRUE;
2441             edge->used = AM_USED;
2442             AlnMgr2RecursePrims(vertexarray, edge_head);
2443          }
2444       }
2445       edge = edge->next;
2446    }
2447 }
2449 /* SECTION 2C */
2450 /***************************************************************************
2451 *
2452 *  AlnMgr2CleanUpLeftovers takes the edges that are unused after
2453 *  AlnMgr2RecursePrims and looks for edges that duplicate another edge in
2454 *  the set or edges that share a seqid (but not a vertex) with another edge
2455 *  already in the set. It adds these edges to the set; they don't belong
2456 *  there in tree-based terms but as alignments they are related.
2457 *
2458 ***************************************************************************/
AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray,Int4 numvertices,AMEdgePtr edge_head)2459 static void AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head)
2460 {
2461    AMEdgePtr  edge;
2462    AMEdgePtr  edge_tmp;
2463    Boolean    found;
2464    Int4       i;
2465    BoolPtr    tmpverts;
2467    tmpverts = (BoolPtr)MemNew(numvertices*sizeof(Boolean));
2468    for (i=0; i<numvertices; i++)
2469    {
2470       tmpverts[i] = vertexarray[i]->used;
2471    }
2472    edge = edge_head;
2473    while (edge != NULL)
2474    {
2475       if (edge->used == AM_NOTUSED)
2476       {
2477          if (tmpverts[edge->vertex1] == TRUE && tmpverts[edge->vertex2] == TRUE)
2478          {
2479             /* see if this edge duplicates another edge; if so, add it */
2480             edge_tmp = edge_head;
2481             found = FALSE;
2482             while (edge_tmp != NULL && !found)
2483             {
2484                if ((edge->vertex1 == edge_tmp->vertex1 && edge->vertex2 == edge_tmp->vertex2) || (edge->vertex1 == edge_tmp->vertex2 && edge->vertex2 == edge_tmp->vertex1))
2485                {
2486                   found = TRUE;
2487                   edge->used = AM_USED;
2488                }
2489                edge_tmp = edge_tmp->next;
2490             }
2491             if (!found)
2492                edge->used = AM_CONFLICT;
2493          } else if (tmpverts[edge->vertex1] == FALSE && tmpverts[edge->vertex2] == FALSE)
2494          {
2495             /* if one of the vertices shares a seqid with a vertex in the set, put both vertices */
2496             /* and the edge in the set. */
2497             found = FALSE;
2498             for (i=0; i<numvertices && !found; i++)
2499             {
2500                if (tmpverts[i] == TRUE && (SeqIdComp(vertexarray[i]->sip, vertexarray[edge->vertex1]->sip) == SIC_YES || SeqIdComp(vertexarray[i]->sip, vertexarray[edge->vertex2]->sip) == SIC_YES))
2501                {
2502                   found = TRUE;
2503                   vertexarray[edge->vertex1]->used = TRUE;
2504                   vertexarray[edge->vertex2]->used = TRUE;
2505                   edge->used = AM_USED;
2506                }
2507             }
2508             if (!found)
2509                edge->used = AM_CONFLICT;
2510          }
2511       }
2512       edge = edge->next;
2513    }
2514    MemFree(tmpverts);
2515 }
2517 /* SECTION 2C */
2518 /***************************************************************************
2519 *
2520 *  AlnMgr2SameSeq decides whether two vertices come from the same
2521 *  sequence (simple seqid compare).
2522 *
2523 ***************************************************************************/
AlnMgr2SameSeq(AMVertexPtr vertex1,AMVertexPtr vertex2)2524 static Boolean AlnMgr2SameSeq(AMVertexPtr vertex1, AMVertexPtr vertex2)
2525 {
2526    if (vertex1 == NULL || vertex2 == NULL)
2527       return FALSE;
2528    if (SeqIdComp(vertex1->sip, vertex2->sip) == SIC_YES)
2529       return TRUE;
2530    else
2531       return FALSE;
2532 }
2535 /* SECTION 2C */
2536 /***************************************************************************
2537 *
2538 *  AlnMgr2BuildAlignmentFromTree performs a breadth-first traversal of
2539 *  the tree, adding edges to the growing alignment as it goes.
2540 *
2541 ***************************************************************************/
AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray,Int4 numvertices,AMEdgePtr edge_head,SeqAlignPtr sap)2542 static void AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head, SeqAlignPtr sap)
2543 {
2544    AMAlignIndex2Ptr  amaip;
2545    AMVertexPtr      adj;
2546    AMVertexPtr      adj_head;
2547    AMEdgePtr        edge;
2548    Int4             i;
2549    Int4             j;
2550    AMQueuePtr       q;
2551    AMQueuePtr       q_head;
2552    AMQueuePtr       q_prev;
2554    amaip = (AMAlignIndex2Ptr)(sap->saip);
2555    AlnMgr2AddInNewPairwiseSA(sap, edge_head->sap);
2556    edge_head->aligned = TRUE;
2557    q_head = (AMQueuePtr)MemNew(sizeof(AMQueue));
2558    q_head->vertex = AlnMgr2GetBetterVertex(vertexarray, edge_head);
2559    q_head->vertex->visited = TRUE;
2560    /* unlink the vertices */
2561    for (i=0; i<numvertices; i++)
2562    {
2563       vertexarray[i]->next = NULL;
2564    }
2565    while (q_head != NULL)
2566    {
2567       q_prev = q_head;
2568       while (q_prev->next != NULL)
2569       {
2570          q_prev = q_prev->next;
2571       }
2572       adj_head = AlnMgr2GetAdjacentVertices(q_head->vertex, vertexarray, edge_head);
2573       adj = adj_head;
2574       while (adj != NULL)
2575       {
2576          if (adj->visited == FALSE)
2577          {
2578             edge = edge_head;
2579             while (edge != NULL)
2580             {
2581                /* if the edge is used in the tree but not yet aligned, and it's adjacent, align it */
2582                if (edge->aligned == FALSE && edge->used == AM_USED && ((AlnMgr2SameSeq(vertexarray[edge->vertex1], q_head->vertex) && AlnMgr2SameSeq(vertexarray[edge->vertex2], adj)) || (AlnMgr2SameSeq(vertexarray[edge->vertex1], adj) && AlnMgr2SameSeq(vertexarray[edge->vertex2], q_head->vertex))))
2583                {
2584                  AlnMgr2AddInNewPairwiseSA(sap, edge->sap);
2585                  edge->aligned = TRUE;
2586                }
2587                edge = edge->next;
2588             }
2589             q = (AMQueuePtr)MemNew(sizeof(AMQueue));
2590             q->vertex = adj;
2591             q_prev->next = q;
2592             q_prev = q;
2593             adj->visited = TRUE;
2594          }
2595          adj = adj->next;
2596       }
2597       q = q_head->next;
2598       MemFree(q_head);
2599       q_head = q;
2600       if (q_head == NULL) /* look for discontinuous sets -- those will be left over */
2601       {
2602          edge = edge_head;
2603          while (edge != NULL && q_head == NULL)
2604          {
2605             if (edge->aligned == FALSE && (vertexarray[edge->vertex1]->visited == FALSE || vertexarray[edge->vertex2]->visited == FALSE))
2606             {
2607                q_head = (AMQueuePtr)MemNew(sizeof(AMQueue));
2608                q_head->vertex = AlnMgr2GetBetterVertex(vertexarray, edge);
2609                vertexarray[edge->vertex1]->visited = vertexarray[edge->vertex2]->visited = TRUE;
2610             }
2611             edge = edge->next;
2612          }
2613       }
2614    }
2615 /* now the vertices are no longer in a linked list -> put them back together */
2616    for (j=0; j<i-1; j++)
2617    {
2618       vertexarray[j]->next = vertexarray[j+1];
2619       vertexarray[j+1]->next = NULL;
2620    }
2621    AlnMgr2CondenseColumns((DenseSegPtr)(amaip->sharedaln->segs));
2622    AlnMgr2IndexSingleChildSeqAlign(amaip->sharedaln);
2623 }
2625 /* SECTION 2c */
2626 /***************************************************************************
2627 *
2628 *  AlnMgr2GetAdjacentVertices returns a linked list of all vertices which
2629 *  are adjacent to the given edge; that is, it returns a list of all
2630 *  vertices which are linked by an edge to either vertex of the given edge.
2631 *
2632 ***************************************************************************/
AlnMgr2GetAdjacentVertices(AMVertexPtr vertex,AMVertexPtr PNTR vertexarray,AMEdgePtr edge_head)2633 static AMVertexPtr AlnMgr2GetAdjacentVertices(AMVertexPtr vertex, AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head)
2634 {
2635    AMVertexPtr  adj_head;
2636    AMVertexPtr  adj_prev;
2637    AMEdgePtr    edge;
2639    edge = edge_head;
2640    adj_head = adj_prev = NULL;
2641    while (edge != NULL)
2642    {
2643       if (AlnMgr2SameSeq(vertexarray[edge->vertex1], vertex))
2644       {
2645          if (adj_head == NULL)
2646             adj_head = adj_prev = vertexarray[edge->vertex2];
2647          else
2648          {
2649             adj_prev->next = vertexarray[edge->vertex2];
2650             adj_prev = adj_prev->next;
2651          }
2652       } else if (AlnMgr2SameSeq(vertexarray[edge->vertex2], vertex))
2653       {
2654          if (adj_head == NULL)
2655             adj_head = adj_prev = vertexarray[edge->vertex1];
2656          else
2657          {
2658             adj_prev->next = vertexarray[edge->vertex1];
2659             adj_prev = adj_prev->next;
2660          }
2661       }
2662       if (adj_prev != NULL)
2663          adj_prev->next = NULL;
2664       edge = edge->next;
2665    }
2666    return adj_head;
2667 }
2669 /* SECTION 2c */
AlnMgr2GetFirstRowForSeqId(DenseSegPtr dsp,SeqIdPtr sip,Uint1 strand,Int4Ptr row_curr,SeqIdPtr PNTR sip_curr)2671 static Boolean AlnMgr2GetFirstRowForSeqId(
2672   DenseSegPtr dsp,
2673   SeqIdPtr sip,
2674   Uint1 strand,
2675   Int4Ptr row_curr,
2676   SeqIdPtr PNTR sip_curr)
2677 {
2678   Boolean found = FALSE;
2680   while (*sip_curr) {
2681     (*row_curr)++;
2682     if (SeqIdComp(sip, *sip_curr) == SIC_YES &&
2683         strand == dsp->strands[*row_curr]) {
2684       found = TRUE;
2685     }
2686     *sip_curr = (*sip_curr)->next;
2687     if (found) return TRUE;
2688   }
2689   return FALSE;
2690 }
AlnMgr2CreateSeqPieceSet(DenseSegPtr dsp,Int4 row)2693 static AMSeqPieceSetPtr AlnMgr2CreateSeqPieceSet(DenseSegPtr dsp, Int4 row)
2694 {
2695   AMSeqPieceSetPtr s_set = (AMSeqPieceSetPtr)MemNew(sizeof(AMSeqPieceSet));
2696   AMSeqPiecePtr s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2697   s->beg = -1;
2698   s->end = -1;
2699   s->seg = -1;
2700   s->pos = row - dsp->dim;
2701   s->set = s_set;
2702   s->prev = NULL;
2703   s->next = NULL;
2704   s->left = -1;
2705   s->right = -1;
2706   s->orig_left = -2;
2707   s->orig_right = -2;
2708   s->aligned = FALSE;
2709   s->alt_dsp = NULL;
2710   s->alt_seg = -1;
2711   s->alt_pos = -1;
2713   s->next = NULL;
2715   s_set->dsp = dsp;
2716   s_set->row = row;
2717   s_set->row2 = -1;
2718   s_set->alt_row = -1;
2719   s_set->alt_row2 = -1;
2720   s_set->head = s;
2721   s_set->tail = s;
2722   s_set->max_pos = dsp->dim * dsp->numseg;
2723   s_set->strand = dsp->strands[row];
2724   s_set->plus = s_set->strand != Seq_strand_minus;
2725   s_set->next = NULL;
2727   return s_set;
2728 }
AlnMgr2GetNextSeqPiece(AMSeqPiecePtr s)2730 static AMSeqPiecePtr AlnMgr2GetNextSeqPiece(AMSeqPiecePtr s)
2731 {
2732   DenseSegPtr dsp;
2733   Int4 max_pos;
2734   AMSeqPiecePtr s_new;
2736   dsp = s->set->dsp;
2737   max_pos = s->set->max_pos;
2739   if (s->pos < max_pos) {
2740     s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2741     s_new->pos = s->pos + dsp->dim;
2742     s_new->seg = s->seg + 1;
2743     s_new->set = s->set;
2744     s_new->prev = s;
2745     s = s->next = s_new;
2746     s->set->tail = s;
2748     s->next = NULL;
2750     /* initialize the following */
2751     s->left = -1;
2752     s->right = -1;
2753     s->aligned = FALSE;
2754     s->alt_dsp = NULL;
2755     s->alt_seg = -1;
2756     s->alt_pos = -1;
2757     s->orig_left = -2;
2758     s->orig_right = -2;
2760     /* find the beg and end */
2761     while (s->pos < max_pos) {
2762       if (dsp->starts[s->pos] != -1) {
2763         s->beg = s->end = dsp->starts[s->pos];
2764         if (s->set->plus) {
2765           s->end += dsp->lens[s->seg] - 1;
2766         } else {
2767           s->beg += dsp->lens[s->seg] - 1;
2768         }
2769         return s;
2770       } else {
2771         s->seg++;
2772         s->pos += dsp->dim;
2773       }
2774     }
2775     s->beg = -1;
2776     s->end = -1;
2777     return s;
2778   }
2779   return NULL;
2780 }
AlnMgr2GetNextLimitedSeqPiece(AMSeqPiecePtr s,AMSeqPiecePtr right)2782 static AMSeqPiecePtr AlnMgr2GetNextLimitedSeqPiece(
2783   AMSeqPiecePtr s,
2784   AMSeqPiecePtr right)
2785 {
2786   DenseSegPtr dsp;
2787   Int4 new_pos, new_seg, max_pos, max_seg;
2788   AMSeqPiecePtr s_new;
2790   AMSeqPiecePtr left = right->prev;
2792   dsp = s->set->dsp;
2793   max_pos = s->set->max_pos;
2794   max_seg = right->seg;
2795   new_pos = s->pos + dsp->dim;
2796   new_seg = s->seg + 1;
2798   while (new_pos < max_pos && new_seg <= max_seg) {
2799     if (dsp->starts[new_pos] != -1) {
2800       s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2801       s_new->pos = new_pos;
2802       s_new->seg = new_seg;
2803       s_new->set = s->set;
2804       s_new->next = NULL;
2805       s_new->prev = s;
2806       s = s->next = s_new;
2807       s->set->tail = s;
2808       s->beg = s->end = dsp->starts[s->pos];
2809       if (s->set->plus) {
2810         s->end += dsp->lens[s->seg] - 1;
2811       } else {
2812         s->beg += dsp->lens[s->seg] - 1;
2813       }
2814       /* aligned to a sequence in anchor or not */
2815       if (s->seg == right->seg) {
2816         s->aligned = TRUE;
2817         s->left = right->beg;
2818         s->right = right->end;
2819       } else {
2820         s->aligned = FALSE;
2821         s->left = left->end;
2822         s->right = right->beg;
2823       }
2824       /* these are not yet used */
2825       s->orig_left = -2;
2826       s->orig_right = -2;
2827       s->alt_dsp = NULL;
2828       s->alt_seg = -1;
2829       s->alt_pos = -1;
2830       return s;
2831     }
2832     new_pos += dsp->dim;
2833     new_seg++;
2834   }
2835   return NULL;
2836 }
AlnMgr2AddSeqPiece(AMSeqPieceSetPtr set,AMSeqPiecePtr what)2838 static void AlnMgr2AddSeqPiece(
2839   AMSeqPieceSetPtr set,
2840   AMSeqPiecePtr what)
2841 {
2842   AMSeqPiecePtr s;
2843   DenseSegPtr dsp = set->dsp;
2844   DenseSegPtr alt_dsp = what->set->dsp;
2846   s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2847   s->beg = what->beg;
2848   s->end = what->end;
2850   if (alt_dsp == dsp) {
2851     s->seg = what->seg;
2852     s->pos = what->pos;
2853     s->alt_dsp = NULL;
2854     s->alt_seg = -1;
2855     s->alt_pos = -1;
2856   } else {
2857     s->seg = -1;
2858     s->pos = -1;
2859     s->alt_dsp = alt_dsp;
2860     s->alt_seg = what->seg;
2861     s->alt_pos = what->pos;
2862   }
2863   s->left = what->left;
2864   s->right = what->right;
2865   s->orig_left = what->orig_left;
2866   s->orig_right = what->orig_right;
2867   s->aligned = what->aligned;
2868   s->set = set;
2869   s->next = NULL;
2870   if ((s->prev = set->tail) != NULL) {
2871     s->prev->next = s;
2872   }
2873   set->tail = s;
2874 }
AlnMgr2InsertSeqPiece(AMSeqPiecePtr where,AMSeqPiecePtr what,Int4 end)2876 static void AlnMgr2InsertSeqPiece(
2877   AMSeqPiecePtr where,
2878   AMSeqPiecePtr what,
2879   Int4 end)
2880 {
2881   AMSeqPiecePtr s;
2882   DenseSegPtr dsp = where->set->dsp;
2883   DenseSegPtr alt_dsp = what->set->dsp;
2886   s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2887   s->beg = what->beg;
2888   s->end = end;
2890   if (where->beg == what->beg) {
2891     s->seg = where->seg;
2892     s->pos = where->pos;
2893     where->beg = end + (where->set->plus? 1 : -1);
2894     if (alt_dsp == dsp) {
2895       s->alt_dsp = NULL;
2896       s->alt_seg = -1;
2897       s->alt_pos = -1;
2898     } else {
2899       s->alt_dsp = alt_dsp;
2900       s->alt_seg = what->seg;
2901       s->alt_pos = what->pos;
2902     }
2903   } else {
2904     if (alt_dsp == dsp) {
2905       s->seg = what->seg;
2906       s->pos = what->pos;
2907       s->alt_dsp = NULL;
2908       s->alt_seg = -1;
2909       s->alt_pos = -1;
2910     } else {
2911       s->seg = -1;
2912       s->pos = -1;
2913       s->alt_dsp = alt_dsp;
2914       s->alt_seg = what->seg;
2915       s->alt_pos = what->pos;
2916     }
2917   }
2918   s->left = what->left;
2919   s->right = what->right;
2920   s->orig_left = what->orig_left;
2921   s->orig_right = what->orig_right;
2922   s->aligned = what->aligned;
2923   s->set = where->set;
2924   s->next = where;
2925   if ((s->prev = where->prev) != NULL) {
2926     if (s->prev) {
2927       s->prev->next = s;
2928     } else {
2929       if (s->set->head == where) {
2930         s->set->head = s;
2931       }
2932     }
2933     where->prev = s;
2934   }
2935 }
AlnMgr2CopySeg(DenseSegPtr DSP,Int4 PNTR SEG_ptr,Int4 PNTR POS_ptr,DenseSegPtr Dsp,Int4 PNTR Seg_ptr,Int4 PNTR Pos_ptr,AMSeqPiecePtr PNTR s_ptr)2937 static void AlnMgr2CopySeg(
2938   DenseSegPtr DSP,
2939   Int4 PNTR SEG_ptr,
2940   Int4 PNTR POS_ptr,
2941   DenseSegPtr Dsp,
2942   Int4 PNTR Seg_ptr,
2943   Int4 PNTR Pos_ptr,
2944   AMSeqPiecePtr PNTR s_ptr)
2945 {
2946   Int4 i, rdelta, ldelta, POS, Pos, max_Pos, pos2, alt_pos2, SEG, Seg,
2947     beg, end;
2948   AMSeqPiecePtr s;
2949   Boolean plus;
2951   POS = *POS_ptr; Pos = *Pos_ptr;
2952   SEG = *SEG_ptr; Seg = *Seg_ptr;
2953   s = *s_ptr;
2955   if (s->set->row != s->set->row2) { /* if not a B */
2956     if (!(s->next)) {
2957       *s_ptr = NULL;
2958       return; /* skip the last A */
2959     }
2960   }
2962   max_Pos = POS+Dsp->dim;
2964   DSP->lens[SEG] = ABS(s->end - s->beg) + 1;
2966   if (s->set->dsp != Dsp) { /* the extra row for the non-anchor seq */
2967     for (i = 0; POS < max_Pos; POS++, i++) {
2968       DSP->starts[POS] = -1;
2969       DSP->strands[POS] = Dsp->strands[i];
2970     }
2971     DSP->starts[POS] = MIN(s->beg, s->end);
2972     DSP->strands[POS] = s->set->strand;
2973     POS++;
2975   } else { /* not dealing with the extra row itself */
2977     if (s->pos >= 0 && s->set->row != s->set->row2) { /* Dsp involved */
2978       beg = end = s->set->dsp->starts[s->pos];
2979       if (s->set->plus) {
2980         end += s->set->dsp->lens[s->seg]-1;
2981       } else {
2982         beg += s->set->dsp->lens[s->seg]-1;
2983       }
2984       if (ldelta = ABS(s->beg - beg)) {
2985         /* need to "continue" from the orig seg */
2986         Pos = s->pos - s->set->row;
2987         Seg = s->seg;
2988       }
2989       rdelta = ABS(end - s->end);
2991       for (; POS < max_Pos; POS++, Pos++) {
2992         DSP->strands[POS] = Dsp->strands[Pos];
2993         plus = DSP->strands[POS] != Seq_strand_minus;
2994         if (Dsp->starts[Pos] != -1) {
2995           DSP->starts[POS] = Dsp->starts[Pos] + (plus ? ldelta : rdelta);
2996         } else {
2997           DSP->starts[POS] = -1;
2998         }
2999       }
3000       if (ldelta) {
3001         /* restore these */
3002         Pos = *Pos_ptr;
3003         Seg = *Seg_ptr;
3004       } else {
3005         Seg++;
3006       }
3008       if (s->alt_dsp) { /* dsp involved too */
3009         alt_pos2 =
3010           s->alt_pos + s->set->alt_row2 - s->set->alt_row;
3011         beg = end = s->alt_dsp->starts[s->alt_pos];
3012         if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
3013           beg += s->alt_dsp->lens[s->alt_seg]-1;
3014         } else {
3015           end += s->alt_dsp->lens[s->alt_seg]-1;
3016         }
3017         ldelta = ABS(s->beg - beg);
3018         rdelta = ABS(end - s->end);
3020         if (s->set->row2 != -1) { /* 2nd row merged*/
3021           pos2 = POS - DSP->dim + s->set->row2;
3022         } else { /* extra row */
3023           pos2 = POS;
3024           POS++;
3025         }
3026         DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
3027         plus = DSP->strands[pos2] != Seq_strand_minus;
3028         if (s->alt_dsp->starts[alt_pos2] != -1) {
3029           DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
3030             (plus ? ldelta : rdelta);
3031         } else {
3032           DSP->starts[pos2] = -1;
3033         }
3034       } else { /* dsp not involved */
3035         if (s->set->row2 == -1) { /* 2nd row not merged */
3036           DSP->starts[POS] = -1;
3037           DSP->strands[POS] =
3038             s->set->alt_dsp->strands[s->set->alt_row2];
3039           POS++;
3040         }
3041       }
3042     } else { /* Dsp not involved */
3043       for (i = 0; POS < max_Pos; POS++, i++) {
3044         DSP->starts[POS] = -1;
3045         DSP->strands[POS] = Dsp->strands[i];
3046       }
3047       if (s->set->row == s->set->row2) { /* if a B */
3048         if (!(s->alt_dsp)) {
3049           Pos += s->set->dsp->dim; /* move to next seg */
3050           Seg++;
3051         }
3052       } else { /* not a B */
3053         alt_pos2 =
3054           s->alt_pos + s->set->alt_row2 - s->set->alt_row;
3056         beg = end = s->alt_dsp->starts[s->alt_pos];
3057         if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
3058           beg += s->alt_dsp->lens[s->alt_seg]-1;
3059         } else {
3060           end += s->alt_dsp->lens[s->alt_seg]-1;
3061         }
3062         ldelta = ABS(s->beg - beg);
3063         rdelta = ABS(end - s->end);
3065         if (s->set->row2 != -1) { /* merged row2 */
3066           pos2 = POS - DSP->dim + s->set->row2;
3067         } else {
3068           pos2 = POS;
3069           POS++;
3070         }
3071         DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
3072         plus = DSP->strands[pos2] != Seq_strand_minus;
3073         if (s->alt_dsp->starts[alt_pos2] != -1) {
3074           DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
3075             (plus ? ldelta : rdelta);
3076         } else {
3077           DSP->starts[pos2] = -1;
3078         }
3079       }
3080       DSP->starts[POS + s->set->row - DSP->dim] = MIN(s->beg, s->end);
3081     }
3082   }
3083   (*SEG_ptr)++;
3084   *Seg_ptr = Seg;
3085   *s_ptr = (*s_ptr)->next;
3086   *POS_ptr = POS;
3087   *Pos_ptr = Pos;
3088 }
AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent,SeqAlignPtr sap)3090 NLM_EXTERN void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap)
3091 {
3092   AMAlignIndex2Ptr amaip;
3093   DenseSegPtr dsp, Dsp, DSP;
3094   Int4 Seg, SEG;
3095   Int4 Pos, POS, max_POS;
3096   Int4 A_end, B_beg;
3097   Int4 anchor, Anchor;
3098   Int4 row;
3099   SeqIdPtr sip, extra_sip;
3100   AMSeqPieceSetPtr a_set, A_set, b_set, B_set_head, B_set;
3101   AMSeqPiecePtr a, A, b, B;
3102   Boolean conflict;
3103   Boolean a_plus, b_plus;
3104   Int4 upper_limit;
3105   Int4 extra_segs;
3107   dsp = (DenseSegPtr)(sap->segs);
3108   if (dsp->dim != 2) {
3109     if (dsp->dim == 0) {
3110       dsp->dim = 2; /* set to default */
3111     } else {
3112       ErrPostEx(SEV_ERROR, 0,0,
3113                 "AlnMgr2AddInNewPairwiseSA: dsp->dim (=%d) should be 2.",
3114                 dsp->dim);
3115       return;
3116     }
3117   }
3118   if (dsp->numseg < 1) {
3119     ErrPostEx(SEV_ERROR, 0,0,
3120               "AlnMgr2AddInNewPairwiseSA: dsp->numseg (=%d) should be > 0.",
3121               dsp->numseg);
3122     return;
3123   }
3125   amaip = (AMAlignIndex2Ptr)(parent->saip);
3126   if (amaip->sharedaln == NULL) {/* first alignment to be added */
3127     SeqAlignPtr salp;
3128     Int4 i;
3130     salp = SeqAlignDup(sap);
3131     AlnMgr2IndexSingleChildSeqAlign(salp);
3132     amaip->sharedaln = salp;
3133     amaip->numrows = dsp->dim;
3134     sip = dsp->ids;
3135     amaip->ids = (SeqIdPtr PNTR)MemNew((dsp->dim)*sizeof(SeqIdPtr));
3136     i = 0;
3137     while (sip != NULL) {
3138       amaip->ids[i] = SeqIdDup(sip);
3139       sip = sip->next;
3140       i++;
3141     }
3142     MemFree(amaip->saps);
3143     amaip->saps = (SeqAlignPtr PNTR)MemNew(sizeof(SeqAlignPtr));
3144     amaip->saps[0] = sap;
3145     amaip->numsaps = 1;
3146     MemFree(amaip->aligned);
3147     amaip->aligned =  (Boolean PNTR) MemNew(sizeof(Boolean));
3148     amaip->aligned[0] = TRUE;
3150     return;
3151   }
3153   /* add the new sap */
3154   amaip->numsaps++;
3155   amaip->saps = (SeqAlignPtr PNTR) MemMore
3156     (amaip->saps, amaip->numsaps*sizeof(SeqAlignPtr));
3157   amaip->saps[amaip->numsaps-1] = sap;
3158   amaip->aligned = (Boolean PNTR) MemMore
3159     (amaip->aligned, (amaip->numsaps)*sizeof(Boolean));
3160   amaip->aligned[amaip->numsaps-1] = TRUE;
3162   Dsp = (DenseSegPtr)(amaip->sharedaln->segs);
3164   AlnMgr2GetFirstSharedRow(amaip->sharedaln, sap, &Anchor, &anchor);
3166   {{ /* make sure the shared rows are on the same strand */
3167     Uint1 Strand, strand;
3169     Strand = AlnMgr2GetNthStrand(amaip->sharedaln, Anchor);
3170     if (Strand == Seq_strand_unknown)
3171       Strand = Seq_strand_plus;
3172     strand = AlnMgr2GetNthStrand(sap, anchor);
3173     if (strand == Seq_strand_unknown)
3174       strand = Seq_strand_plus;
3175     if (Strand != strand) {
3176       SeqAlignListReverseStrand(sap);
3177       SAIndex2Free2(sap->saip);
3178       sap->saip = NULL;
3179       AlnMgr2IndexSingleChildSeqAlign(sap);
3180       dsp = (DenseSegPtr)(sap->segs);
3181       strand = AlnMgr2GetNthStrand(sap, anchor);
3182       if (strand == Seq_strand_unknown)
3183         strand = Seq_strand_plus;
3184     }
3185     a_plus = strand != Seq_strand_minus;
3186   }}
3187   anchor--; Anchor--; /* make them 0-based */
3189   /* create new dsp */
3190   DSP = DenseSegNew();
3191   DSP->numseg = Dsp->numseg;
3192   DSP->dim = Dsp->dim;
3193 /*   DSP->ids = SeqIdDupList(Dsp->ids); */
3195   /* collect other shared seqids */
3196   b_set = B_set = B_set_head = NULL;
3197   row = -1; sip = Dsp->ids;
3198   extra_sip = dsp->ids;
3199   if (anchor == 0) {
3200     extra_sip = extra_sip->next;
3201   }
3202   while (AlnMgr2GetFirstRowForSeqId
3203          (Dsp, extra_sip, dsp->strands[1-anchor], &row, &sip)) {
3204     if (B_set) {
3205       B_set->next = AlnMgr2CreateSeqPieceSet(Dsp, row);
3206       B_set = B_set->next;
3207     } else {
3208       B_set = B_set_head = AlnMgr2CreateSeqPieceSet(Dsp, row);
3209     }
3210   }
3211   b_plus = dsp->strands[1-anchor] != Seq_strand_minus;
3213   /* ids */
3214   DSP->ids = Dsp->ids;
3215   Dsp->ids = NULL;
3217   /* collect a, b */
3218   a_set = AlnMgr2CreateSeqPieceSet(dsp, anchor);
3219   a = a_set->head;
3220   b_set = AlnMgr2CreateSeqPieceSet(dsp, 1-anchor);
3221   while (a = AlnMgr2GetNextSeqPiece(a)) {
3222     b = b_set->tail;
3223     while (b = AlnMgr2GetNextLimitedSeqPiece(b, a)) {
3224       if (!b->aligned) {
3225         DSP->numseg++;
3226       }
3227     }
3228   }
3230   /* collect A, B */
3231   A_set = AlnMgr2CreateSeqPieceSet(Dsp, Anchor);
3232   A = A_set->head;
3233   while (A = AlnMgr2GetNextSeqPiece(A)) {
3234     B_set = B_set_head;
3235     while (B_set) {
3236       B = B_set->tail;
3237       while (B = AlnMgr2GetNextLimitedSeqPiece(B, A)) {};
3238       B_set=B_set->next;
3239     }
3240   }
3242   /* resolve a, A */
3243   A_set->alt_row = a_set->row;
3244   a = a_set->head->next;
3245   A = A_set->head->next;
3246   while (a && A && a->next && A->next) {
3247     if (a_plus ? a->beg < A->beg : a->beg > A->beg) {
3248       AlnMgr2InsertSeqPiece
3249         (A, a, a_plus ? MIN(a->end, A->beg-1) : MAX(a->end, A->beg+1));
3250       DSP->numseg++;
3251       if (a_plus ? a->end < A->beg : a->end > A->beg) {
3252         a = a->next;
3253       } else {
3254         a->beg = A->beg;
3255       }
3256     } else if (a_plus ? A->beg < a->beg : A->beg > a->beg) {
3257       if (a_plus ? A->end < a->beg : A->end > a->beg) {
3258         A = A->next;
3259       } else {
3260         AlnMgr2InsertSeqPiece(A, A, a_plus ? a->beg - 1 : a->beg + 1);
3261         DSP->numseg++;
3262       }
3263     } else { /* a->beg == A->beg */
3264       if (a_plus ? a->end < A->end : a->end > A->end) {
3265         AlnMgr2InsertSeqPiece(A, a, a->end);
3266         DSP->numseg++;
3267         a = a->next;
3268       } else if (a_plus ? a->end > A->end : a->end < A->end) {
3269         a->beg = A->end + (a_plus ? 1 : -1);
3270         A->alt_dsp = a->set->dsp;
3271         A->alt_seg = a->seg;
3272         A->alt_pos = a->pos;
3273         A = A->next;
3274       } else { /* a->end == A->end */
3275         A->alt_dsp = a->set->dsp;
3276         A->alt_seg = a->seg;
3277         A->alt_pos = a->pos;
3278         a = a->next;
3279         A = A->next;
3280       }
3281     }
3282   }
3283   while (a && a->next) {
3284     AlnMgr2InsertSeqPiece(A, a, a->end);
3285     DSP->numseg++;
3286     a = a->next;
3287   }
3289   /* set the upper limits */
3290   if (B_set_head) {
3291     if (a_plus) {
3292       upper_limit =
3293         A_set->tail->end = A_set->tail->beg = A_set->tail->prev->end + 1;
3295       b = b_set->tail;
3296       while (b && b->right == -1) {
3297         b->right = upper_limit;
3298         b = b->prev;
3299       }
3301       B_set = B_set_head;
3302       while (B_set) {
3303         B = B_set->tail;
3304         while (B && B->right == -1) {
3305           B->right = upper_limit;
3306           B = B->prev;
3307         }
3308         B_set = B_set->next;
3309       }
3311     } else {
3312       upper_limit =
3313         A_set->head->beg = A_set->head->end = A_set->head->next->beg + 1;
3315       b = b_set->head;
3316       while (b && b->left == -1) {
3317         b->left = upper_limit;
3318         b = b->next;
3319       }
3321       B_set = B_set_head;
3322       while (B_set) {
3323         B = B_set->head;
3324         while (B && B->left == -1) {
3325           B->left = upper_limit;
3326           B = B->next;
3327         }
3328         B_set = B_set->next;
3329       }
3331     }
3332   }
3334   /* try to resolve b, B */
3335   if (B_set_head) {
3336     b = b_set->head->next;
3337     B_set = B_set_head;
3338     while (B_set) {
3339       B = B_set->head->next;
3340       conflict = FALSE;
3341       extra_segs = 0;
3342       while (b && B) {
3343         if (b_plus ? b->beg < B->beg : b->beg > B->beg) {
3344           if (b_plus ? b->end < B->beg : b->end > B->beg) {
3345             /* trim the limits */
3346             if (a_plus ? B->left <= b->left : B->left >= b->left) {
3347               if (a_plus ? B->right < b->left : B->right > b->left) {
3348                 conflict = TRUE; break;
3349               } else {
3350                 if (B->aligned) {
3351                   conflict = TRUE; break; /* no trimming allowed */
3352                 } else {
3353                   B->left = b->left;
3354                 }
3355               }
3356               if (a_plus ? b->right > B->right : b->right < B->right) {
3357                 if (b->aligned) {
3358                   conflict = TRUE; break; /* no trimming allowed */
3359                 } else {
3360                   b->orig_right = b->right; /* for recovering */
3361                   b->right = B->right;
3362                 }
3363               }
3364             }
3365             AlnMgr2InsertSeqPiece(B, b, b->end);
3366             if (!(b->aligned)) extra_segs++;
3367             b = b->next;
3368           } else {
3369             conflict = TRUE; break;
3370           }
3372         } else if (b_plus ? B->beg < b->beg : B->beg > b->beg) {
3373           if (b_plus ? B->end < b->beg : B->end > b->beg) {
3374             /* trim the limits */
3375             if (a_plus ? b->left < B->left : b->left > B->left) {
3376               if (a_plus ? b->right < B->left : b->right > B->left) {
3377                 conflict = TRUE; break;
3378               } else {
3379                 if (b->aligned) {
3380                   conflict = TRUE; break; /* no trimming allowed */
3381                 } else {
3382                   b->orig_left = b->left; /* for recovering */
3383                   b->left = B->left;
3384                 }
3385               }
3386               if (a_plus ? B->right > b->right : B->right < b->right) {
3387                 if (B->aligned) {
3388                   conflict = TRUE; break; /* no trimming allowed */
3389                 } else {
3390                   B->right = b->right;
3391                 }
3392               }
3393             }
3395             B = B->next;
3397           } else {
3398             conflict = TRUE; break;
3399           }
3400         } else { /* B->beg == b->beg */
3401           conflict = TRUE; break;
3402         }
3403       }
3404       if (!conflict) {
3405         while (b) {
3406           AlnMgr2AddSeqPiece(B_set, b);
3407           if (!(b->aligned)) extra_segs++;
3408           b = b->next;
3409         }
3410 /*         DSP->numseg += extra_segs; */
3411         break;
3412       }
3413       /* conflict, roll back b, recovering limits, try next B */
3414       if (!b) {
3415         b = b_set->tail;
3416       }
3417       while (b) {
3418         if (b->orig_left != -2) {
3419           b->left = b->orig_left;
3420         }
3421         if (b->orig_right != -2) {
3422           b->right = b->orig_right;
3423         }
3424         b = b->prev;
3425       }
3426       b = b_set->head->next;
3427       B_set = B_set->next;
3428     }
3429   }
3430   if (B_set) {  /* B_set has no conflict with b_set */
3431     B = B_set->head->next;
3432     B_set->row2 = B_set->row; /* mark the set */
3433     A_set->row2 = B_set->row;
3434     A_set->alt_row2 = b_set->row;
3435   } else {  /* this mean extra row */
3436     A_set->row2 = -1;
3437     A_set->alt_row2 = b_set->row;
3438     A_set->alt_dsp = b_set->dsp;
3439     DSP->dim++;
3440     sip = DSP->ids;
3441     while (sip->next) {
3442       sip = sip->next;
3443     }
3444     AddSeqId(&sip, extra_sip);
3446     /* fix the index too */
3447     amaip->numrows = DSP->dim;
3448     amaip->ids = (SeqIdPtr PNTR)MemMore
3449       (amaip->ids,amaip->numrows*sizeof(SeqIdPtr));
3450     amaip->ids[amaip->numrows-1] = SeqIdDup(extra_sip);
3452     b_set->row2 = b_set->row; /* mark the set */
3453     B = b_set->head->next;
3454     B_beg = -1; /* nothing to comp Bs to */
3455   }
3457   /* allocate memory for the new sharedaln matrix */
3458   DSP->starts = (Int4Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Int4));
3459   DSP->strands = (Uint1Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Uint1));
3460   DSP->lens = (Int4Ptr)MemNew(DSP->numseg * sizeof(Int4));
3462   /* loop through segments */
3463   POS = 0; Pos = 0; Seg = 0; SEG = 0;
3464   A = A_set->head->next;
3465   while (Seg < Dsp->numseg) {
3467     A_end = Dsp->starts[Pos+A_set->row];
3468     if (a_plus && A_end >= 0) {
3469       A_end += Dsp->lens[Seg] - 1;
3470     }
3471     if (B_set) {
3472       B_beg = Dsp->starts[Pos+B_set->row];
3473     }
3475     if (A_end >= 0) {
3476       while (A && (a_plus ? A->end <= A_end : A->end >= A_end)) {
3477         while (B && (a_plus ? B->left < A->beg : B->left > A->beg)) {
3478           if (B->aligned) {
3479             B = B->next;
3480             break; /* the aligned piece should be last */
3481           } else {
3482             AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3483           }
3484         }
3485         if (B && B->aligned && B->left == A->beg) {
3486           B = B->next;
3487         }
3488         AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3489       }
3490     } else if (B && B_beg >= 0) {
3491       while (B && (b_plus ? B->beg <= B_beg : B->beg >= B_beg)) {
3492         while (A && (a_plus ? A->beg <= B->left : A->beg >= B->left)) {
3493           AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3494         }
3495         if (B->aligned) {
3496           B = B->next;
3497         } else {
3498           AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3499         }
3500       }
3501     } else {
3502       /* just copy the Dsp segment */
3503       DSP->lens[SEG] = Dsp->lens[Seg];
3504       max_POS = POS + Dsp->dim;
3505       for (; POS < max_POS; POS++, Pos++) {
3506         DSP->starts[POS] = Dsp->starts[Pos];
3507         DSP->strands[POS] = Dsp->strands[Pos];
3508       }
3509       if (DSP->dim > Dsp->dim) {
3510         DSP->starts[POS] = -1;
3511         DSP->strands[POS] = dsp->strands[1-anchor];
3512         POS++;
3513       }
3514       SEG++;
3515       Seg++;
3516     }
3517   }
3518   while (A) {
3519     while (B && (a_plus ? B->right <= A->beg : B->right >= A->beg)) {
3520       if (B->aligned) {
3521         B = B->next;
3522       } else {
3523         AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3524       }
3525     }
3526     AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3527   }
3528   while (B) {
3529     if (B->aligned) {
3530       B = B->next;
3531     } else {
3532       AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3533     }
3534   }
3536   /* Done */
3537   AMSeqPieceSetFree(A_set);
3538   AMSeqPieceSetFree(a_set);
3539   AMSeqPieceSetFree(B_set_head);
3540   AMSeqPieceSetFree(b_set);
3542   amaip->sharedaln->segs = DSP;
3543   /* update the dim for the shared_aln to match the new DensegPtr */
3544   amaip->sharedaln->dim = DSP->dim;
3546   DenseSegFree(Dsp);
3547 }
3549 /***************************************************************************
3550 *
3551 *  AlnMgr2AddInNewSA adds a seqalign to an existing seqalign. The new
3552 *  seqalign must share at least one row with the existing seqalign. The
3553 *  new, combined dense-seg structure is computed, and then it is condensed
3554 *  using AlnMgr2CondenseRows to make sure that there are no superfluous rows.
3555 *
3556 ***************************************************************************/
AlnMgr2AddInNewSA(SeqAlignPtr parent,SeqAlignPtr sap)3557 static void AlnMgr2AddInNewSA(SeqAlignPtr parent, SeqAlignPtr sap)
3558 {
3559    AMAlignIndex2Ptr  amaip;
3560    AM_Small2Ptr      asp;
3561    AM_Small2Ptr      asp_head;
3562    AM_Small2Ptr      asp_prev;
3563    AM_Small2Ptr      asp_tmp;
3564    AM_Small2Ptr      asp_tmp2;
3565    AM_Small2Ptr      PNTR asparray;
3566    Int4             currstop;
3567    DenseSegPtr      dsp;
3568    DenseSegPtr      dsp_new;
3569    DenseSegPtr      dsp_shared;
3570    Boolean          found;
3571    Int4             i;
3572    Int4             j;
3573    Int4             k;
3574    Int4             n1;
3575    Int4             n2;
3576    Int4             numrows;
3577    Int4             offset;
3578    SeqAlignPtr      salp;
3579    SeqAlignPtr      sap_new;
3580    SeqAlignPtr      PNTR saptmp;
3581    SeqIdPtr         sip;
3582    SeqIdPtr         sip_head;
3583    SeqIdPtr         sip_tmp;
3584    Int4             state;
3585    Int4             stop1;
3586    Int4             stop2;
3587    Uint1            strand1;
3588    Uint1            strand2;
3590    amaip = (AMAlignIndex2Ptr)(parent->saip);
3591    if (amaip->sharedaln == NULL) /* this is the first alignment to be added */
3592    {
3593       salp = SeqAlignDup(sap);
3594       AlnMgr2IndexSingleChildSeqAlign(salp);
3595       dsp = (DenseSegPtr)(salp->segs);
3596       amaip->sharedaln = salp;
3597       amaip->numrows = dsp->dim;
3598       sip = dsp->ids;
3599       amaip->ids = (SeqIdPtr PNTR)MemNew((dsp->dim)*sizeof(SeqIdPtr));
3600       i = 0;
3601       while (sip != NULL)
3602       {
3603          amaip->ids[i] = SeqIdDup(sip);
3604          sip = sip->next;
3605          i++;
3606       }
3607       MemFree(amaip->saps);
3608       amaip->saps = (SeqAlignPtr PNTR)MemNew(sizeof(SeqAlignPtr));
3609       amaip->saps[0] = sap;
3610       amaip->numsaps = 1;
3611    } else
3612    {
3613       /* free ids */
3614       for (i=0; i<amaip->numrows; i++)
3615       {
3616          SeqIdFree(amaip->ids[i]);
3617       }
3618       MemFree(amaip->ids);
3620       /* add the new sap */
3621       saptmp = amaip->saps;
3622       amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps+1)*sizeof(SeqAlignPtr));
3623       for (i=0; i<amaip->numsaps; i++)
3624       {
3625          amaip->saps[i] = saptmp[i];
3626       }
3627       amaip->saps[amaip->numsaps] = sap;
3628       MemFree(saptmp);
3629       amaip->numsaps++;
3631       /* dsp, dsp_shared, n1, n2 */
3632       dsp = (DenseSegPtr)(sap->segs);
3633       dsp_shared = (DenseSegPtr)(amaip->sharedaln->segs);
3634       AlnMgr2GetFirstSharedRow(amaip->sharedaln, sap, &n1, &n2);
3635       if (n1 == n2 && n1 == 0)
3636          return;
3638       /* make sure the shared rows are on the same strand */
3639       strand1 = AlnMgr2GetNthStrand(amaip->sharedaln, n1);
3640       if (strand1 == Seq_strand_unknown)
3641          strand1 = Seq_strand_plus;
3642       strand2 = AlnMgr2GetNthStrand(sap, n2);
3643       if (strand2 == Seq_strand_unknown)
3644          strand2 = Seq_strand_plus;
3645       if (strand1 != strand2)
3646       {
3647          SeqAlignListReverseStrand(sap);
3648          SAIndex2Free2(sap->saip);
3649          sap->saip = NULL;
3650          AlnMgr2IndexSingleChildSeqAlign(sap);
3651          dsp = (DenseSegPtr)(sap->segs);
3652          strand2 = AlnMgr2GetNthStrand(sap, n2);
3653          if (strand2 == Seq_strand_unknown)
3654             strand2 = Seq_strand_plus;
3655       }
3657       /* numrows */
3658       numrows = dsp->dim + dsp_shared->dim - 1; /* for now this works; compress at the end */
3659       asp_head = NULL;
3661       /* currstop */
3662       if (strand1 == Seq_strand_minus)
3663          AlnMgr2GetNthSeqRangeInSA(amaip->sharedaln, n1, NULL, &currstop);
3664       else
3665          currstop = -1;
3667       /* add asp for each dsp_shared seg */
3668       for (i=0; i<dsp_shared->numseg; i++)
3669       {
3670          asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3671          if (dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1] < 0)
3672          {
3673             asp->n1 = currstop;
3674             asp->n2 = i+1;
3675             asp->n3 = AM_GAP;
3676             asp->n4 = dsp_shared->lens[i];
3677             if (asp_head != NULL)
3678             {
3679                asp_prev->next = asp;
3680                /*if (asp_prev->n1 == asp->n1)
3681                   asp->n5 = asp_prev->n5+1;*/
3682                asp_prev = asp;
3683             } else
3684                asp_head = asp_prev = asp;
3685          } else
3686          {
3687             asp->n1 = dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1];
3688             asp->n2 = 1;
3689             asp->n3 = AM_START;
3690             asp->n4 = dsp_shared->lens[i];
3691             if (asp_head != NULL)
3692             {
3693                asp_prev->next = asp;
3694                /*if (asp_prev->n1 == asp->n1)
3695                   asp->n5 = asp_prev->n5+1;*/
3696                asp_prev = asp;
3697             } else
3698                asp_head = asp_prev = asp;
3699             asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3700             asp->n1 = dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1] + dsp_shared->lens[i] - 1;
3701             asp->n2 = 1;
3702             j = i+1;
3703             while (j<dsp_shared->numseg && dsp_shared->starts[(dsp_shared->dim)*j + n1 - 1] == -1)
3704             {
3705                j++;
3706             }
3707             if (j<dsp_shared->numseg)
3708             {
3709                if (dsp_shared->starts[(dsp_shared->dim)*j + n1 - 1] > asp->n1 + 1)
3710                   asp->n3 = AM_HARDSTOP;
3711                else
3712                   asp->n3 = AM_STOP;
3713             } else
3714                asp->n3 = AM_HARDSTOP;
3715             if (asp->n3 == AM_HARDSTOP)
3716             {
3717                if (strand1 != Seq_strand_minus)
3718                   asp->n4 = -(dsp_shared->starts[(dsp_shared->dim)*i+n1-1] + dsp_shared->lens[i]-1);
3719                else
3720                   asp->n4 = -dsp_shared->starts[(dsp_shared->dim)*i+n1-1];
3721             } else
3722                asp->n4 = -dsp_shared->lens[i];
3723             if (strand1 != Seq_strand_minus)
3724                currstop = asp->n1;
3725             else
3726                currstop = asp_prev->n1-1;
3727             asp_prev->next = asp;
3728             /*if (asp_prev->n1 == asp->n1)
3729                asp->n5 = asp_prev->n5+1;*/
3730             asp_prev = asp;
3731          }
3732       } /* asp for each dsp_shared seg */
3734       /* currstop = start of sap's n2-th seq */
3735       if (strand1 == Seq_strand_minus)
3736          AlnMgr2GetNthSeqRangeInSA(sap, n2, NULL, &currstop);
3737       else
3738          AlnMgr2GetNthSeqRangeInSA(sap, n2, &currstop, NULL);
3740       /* add asp for each dsp seg */
3741       for (i=0; i<dsp->numseg; i++)
3742       {
3743          asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3744          if (dsp->starts[(dsp->dim)*i + n2 - 1] < 0)
3745          {
3746             asp->n1 = currstop;
3747             asp->n2 = dsp_shared->numseg+i+1;
3748             asp->n3 = AM_GAP;
3749             asp->n4 = dsp->lens[i];
3750             asp_prev->next = asp;
3751             /*if (asp_prev->n1 == asp->n1)
3752                asp->n5 = asp_prev->n5 + 1;*/
3753             asp_prev = asp;
3754          } else
3755          {
3756             asp->n1 = dsp->starts[(dsp->dim)*i + n2 - 1];
3757             asp->n2 = 1;
3758             asp->n3 = AM_START;
3759             asp->n4 = dsp->lens[i];
3760             asp_prev->next = asp;
3761             /*if (asp_prev->n1 == asp->n1)
3762                asp->n5 = asp_prev->n5+1;*/
3763             asp_prev = asp;
3764             asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3765             asp->n1 = dsp->starts[(dsp->dim)*i + n2 - 1] + dsp->lens[i] - 1;
3766             asp->n2 = 1;
3767             j = i+1;
3768             while (j<dsp->numseg && dsp->starts[(dsp->dim)* j + n2 - 1] == -1)
3769             {
3770                j++;
3771             }
3772             if (j<dsp->numseg)
3773             {
3774                if (dsp->starts[(dsp->dim)*j + n2 - 1] > asp->n1 + 1)
3775                   asp->n3 = AM_HARDSTOP;
3776                else
3777                   asp->n3 = AM_STOP;
3778             } else
3779                asp->n3 = AM_HARDSTOP;
3780             if (asp->n3 == AM_HARDSTOP)
3781             {
3782                if (strand1 != Seq_strand_minus)
3783                   asp->n4 = -(dsp->starts[(dsp->dim)*i+n1-1] + dsp->lens[i]-1);
3784                else
3785                   asp->n4 = -dsp->starts[(dsp->dim)*i+n1-1];
3786                /* so if n4 is negative, this is the highest-numbered residue in the interval */
3787             } else
3788                asp->n4 = dsp->lens[i];
3789             if (strand1 != Seq_strand_minus)
3790                currstop = asp->n1;
3791             else
3792                currstop = asp_prev->n1-1;
3793             asp_prev->next = asp;
3794             /*if (asp_prev->n1 == asp->n1)
3795                asp->n5 = asp_prev->n5 + 1;*/
3796             asp_prev = asp;
3797          }
3798       }
3800       /* create asparray and heapsort it */
3801       asp = asp_head;
3802       i = 0;
3803       while (asp != NULL)
3804       {
3805          i++;
3806          asp = asp->next;
3807       }
3808       asparray = (AM_Small2Ptr PNTR)MemNew(i*sizeof(AM_Small2Ptr));
3809       asp = asp_head;
3810       i = 0;
3811       while (asp != NULL)
3812       {
3813          asparray[i] = asp;
3814          i++;
3815          asp = asp->next;
3816       }
3817       if (strand1 != Seq_strand_minus)
3818          HeapSort(asparray, i, sizeof(asparray), AlnMgr2CompareAsps);
3819       else
3820          HeapSort(asparray, i, sizeof(asparray), AlnMgr2CompareAspsMinus);
3821       /* now need to remove redundant (identical) points        */
3822       /* but still need to count those points toward the states */
3823       asp = asparray[0];
3824       asp->next = NULL;
3825       for (j=0; j<i-1; j++)
3826       {
3827          if (asparray[j+1]->n1 != asp->n1 || asparray[j+1]->n3 != asp->n3 || asp->n3 == AM_GAP)
3828          {
3829             asp->next = asparray[j+1];
3830             asp->next->next = NULL;
3831             asp = asp->next;
3832          } else
3833          {
3834             k = j;
3835             while (asparray[k] == NULL && k >= 0)
3836             {
3837                k--;
3838             }
3839             if (k>=0 && asparray[k]->n3 != AM_GAP)
3840                asparray[k]->n2++;
3841             MemFree(asparray[j+1]);
3842             asparray[j+1] = NULL;
3843          }
3844       }
3845       asp_head = asparray[0];
3846       MemFree(asparray);
3847       j=0;
3848       asp = asp_head;
3849       asp_prev = NULL;
3850       /* count up the segments; two consecutive stops make a segment */
3851       state = 0;
3852       if (strand1 != Seq_strand_minus)
3853       {
3854          while (asp != NULL)
3855          {
3856             if (asp->n3 == AM_START)
3857             {
3858                state += asp->n2;
3859                j++;
3860             } else if (asp->n3 == AM_STOP)
3861             {
3862                state -= asp->n2;
3863                asp_tmp = asp->next;
3864                while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3865                {
3866                   asp_tmp = asp_tmp->next;
3867                }
3868                if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3869                   j++;
3870                else if (state != 0 && asp->next != NULL && asp_tmp != NULL && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3871                {
3872                   asp_tmp2 = asp_tmp;
3873                   while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3874                   {
3875                      asp_tmp2 = asp_tmp2->next;
3876                   }
3877                   if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1) && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3878                      j++;
3879                }
3880             } else if (asp->n3 == AM_GAP)
3881                j++;
3882             else if (asp->n3 == AM_HARDSTOP)
3883             {
3884                state -= asp->n2;
3885                asp_tmp = asp->next;
3886                while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3887                {
3888                   asp_tmp = asp_tmp->next;
3889                }
3890                if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1)
3891                   j++;
3892                else if (state != 0 && asp->next != NULL && asp_tmp != NULL)
3893                {
3894                   asp_tmp2 = asp_tmp;
3895                   while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3896                   {
3897                      asp_tmp2 = asp_tmp2->next;
3898                   }
3899                   if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1))
3900                      j++;
3901                   else if (asp_tmp2 == NULL)
3902                      j++;
3903                }
3904             }
3905             asp = asp->next;
3906          }
3907       } else
3908       {
3909          currstop = -1;
3910          while (asp != NULL)
3911          {
3912             if (asp->n3 == AM_STOP || asp->n3 == AM_HARDSTOP)
3913             {
3914                if (currstop != asp->n1 && state > 0)
3915                   j++;
3916                currstop = asp->n1;
3917                state += asp->n2;
3918             } else if (asp->n3 == AM_START)
3919             {
3920                state -= asp->n2;
3921                j++;
3922                currstop = asp->n1 - 1;
3923             } else if (asp->n3 == AM_GAP)
3924                j++;
3925             asp = asp->next;
3926          }
3927       }
3929       /* dsp_new */
3930       dsp_new = DenseSegNew();
3931       dsp_new->dim = numrows;
3932       dsp_new->numseg = j;
3933       dsp_new->ids = SeqIdDupList(dsp_shared->ids);
3934       dsp_new->starts = (Int4Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Int4));
3935       dsp_new->strands = (Uint1Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Uint1));
3936       dsp_new->lens = (Int4Ptr)MemNew((dsp_new->numseg)*sizeof(Int4));
3938       /* get all the ids except for the duplicated one */
3939       sip_head = NULL;
3940       sip_tmp = NULL;
3941       sip = dsp->ids;
3942       i=0;
3943       /* get all the ids except for the duplicated one */
3944       while (sip != NULL)
3945       {
3946          if (i+1 != n2)
3947          {
3948             if (sip_tmp != NULL)
3949             {
3950                sip_tmp->next = SeqIdDup(sip);
3951                sip_tmp = sip;
3952             } else
3953                sip_head = sip_tmp = SeqIdDup(sip);
3954          }
3955          i++;
3956          sip = sip->next;
3957       }
3958       sip = dsp_new->ids;
3959       while (sip->next != NULL)
3960       {
3961          sip = sip->next;
3962       }
3963       sip->next = sip_head;
3965       /* construct starts and lens from asps */
3966       asp = asp_head;
3967       i=0;
3968       state = 0;
3969       currstop = -1;
3970       if (strand1 != Seq_strand_minus)
3971       {
3972          while (asp != NULL)
3973          {
3974             if (asp->n3 == AM_START)
3975             {
3976                state += asp->n2;
3977                dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1;
3978                dsp_new->lens[i] = asp->n4;
3979                i++;
3980             } else if (asp->n3 == AM_STOP)
3981             {
3982                state -= asp->n2;
3983                asp_tmp = asp->next;
3984                while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3985                {
3986                   asp_tmp = asp_tmp->next;
3987                }
3988                if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3989                {
3990                   dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
3991                   dsp_new->lens[i] = asp->n4;
3992                   i++;
3993                } else if (state != 0 && asp->next != NULL && asp_tmp != NULL && i < dsp_new->numseg && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3994                {
3995                   asp_tmp2 = asp_tmp;
3996                   while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3997                   {
3998                      asp_tmp2 = asp_tmp2->next;
3999                   }
4000                   if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1) && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
4001                   {
4002                      dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4003                      dsp_new->lens[i] = asp->n4;
4004                      i++;
4005                   }
4006                }
4007             } else if (asp->n3 == AM_GAP)
4008             {
4009                dsp_new->starts[dsp_new->dim*i+n1-1] = -asp->n2;
4010                if (asp->n2 > dsp_shared->numseg)
4011                   dsp_new->lens[i] = dsp->lens[(asp->n2-1)-(dsp_shared->numseg)];
4012                else
4013                   dsp_new->lens[i] = dsp_shared->lens[asp->n2-1];
4014                i++;
4015             } else if (asp->n3 == AM_HARDSTOP)
4016             {
4017                state -= asp->n2;
4018                asp_tmp = asp->next;
4019                while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
4020                {
4021                   asp_tmp = asp_tmp->next;
4022                }
4023                if (state != 0 && asp->next != NULL && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && i < dsp_new->numseg)
4024                {
4025                   dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4026                   if (asp->n1 > -asp->n4)
4027                      dsp_new->lens[i] = asp->n4;
4028                   i++;
4029                } else if (state != 0 && asp->next != NULL && asp_tmp != NULL && i < dsp_new->numseg)
4030                {
4031                   asp_tmp2 = asp_tmp;
4032                   while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
4033                   {
4034                      asp_tmp2 = asp_tmp2->next;
4035                   }
4036                   if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1))
4037                   {
4038                      dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4039                      if (asp->n1 > -asp->n4)
4040                         dsp_new->lens[i] = asp->n4;
4041                      i++;
4042                   } else if (asp_tmp2 == NULL)
4043                   {
4044                      dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4045                      if (asp->n1 > -asp->n4)
4046                         dsp_new->lens[i] = asp->n4;
4047                      i++;
4048                   }
4049                }
4050             }
4051             asp = asp->next;
4052          }
4053          for (i=0; i<dsp_new->numseg; i++)
4054          {
4055             found = FALSE;
4056             for (j=i+1; j<dsp_new->numseg && !found; j++)
4057             {
4058                if (dsp_new->starts[dsp_new->dim*j+n1-1] > -1)
4059                {
4060                   if (dsp_new->lens[i] == 0)
4061                      dsp_new->lens[i] = dsp_new->starts[dsp_new->dim*j+n1-1] - dsp_new->starts[dsp_new->dim*i+n1-1];
4062                   else if (dsp_new->lens[i] > 0)
4063                      dsp_new->lens[i] = MIN(dsp_new->lens[i], dsp_new->starts[dsp_new->dim*j+n1-1] - dsp_new->starts[dsp_new->dim*i+n1-1]);
4064                   else if (dsp_new->lens[i] < 0)
4065                      dsp_new->lens[i] = -dsp_new->lens[i]-dsp_new->starts[dsp_new->dim*i+n1-1]+1;
4066                   found = TRUE;
4067                }
4068             }
4069             if (!found) /* last segment */
4070             {
4071                if (dsp_new->starts[dsp_new->dim*i+n1-1] >= 0)
4072                {
4073                   AlnMgr2GetNthSeqRangeInSA(amaip->sharedaln, n1, NULL, &stop1);
4074                   AlnMgr2GetNthSeqRangeInSA(sap, n2, NULL, &stop2);
4075                   dsp_new->lens[i] = (MAX(stop1, stop2) + 1) - dsp_new->starts[dsp_new->dim*i+n1-1];
4076                }
4077             }
4078          }
4079       } else
4080       {
4081          while (asp != NULL)
4082          {
4083             if (asp->n3 == AM_STOP)
4084             {
4085                if (currstop != asp->n1 && state > 0)
4086                {
4087                   dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1+1;
4088                   dsp_new->lens[i] = currstop - asp->n1;
4089                   i++;
4090                }
4091                currstop = asp->n1;
4092                state += asp->n2;
4093             } else if (asp->n3 == AM_START)
4094             {
4095                state -= asp->n2;
4096                dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1;
4097                dsp_new->lens[i] = currstop - asp->n1 + 1;
4098                i++;
4099                currstop = asp->n1 - 1;
4100             } else if (asp->n3 == AM_GAP)
4101             {
4102                dsp_new->starts[dsp_new->dim*i+n1-1] = -asp->n2;
4103                if (asp->n2 > dsp_shared->numseg)
4104                   dsp_new->lens[i] = dsp->lens[(asp->n2-1)-(dsp_shared->numseg)];
4105                else
4106                   dsp_new->lens[i] = dsp_shared->lens[asp->n2-1];
4107                i++;
4108             } else if (asp->n3 == AM_HARDSTOP)
4109             {
4110                if (currstop != asp->n1 && state > 0 && asp->next != NULL)
4111                {
4112                   dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1+1;
4113                   dsp_new->lens[i] = currstop - asp->n1;
4114                   i++;
4115                }
4116                currstop = asp->n1;
4117                state += asp->n2;
4118             }
4119             asp = asp->next;
4120          }
4121       }
4122       /* now add in the other rows, starting with rows from the sharedaln */
4123       for (i=0; i<dsp_shared->dim; i++)
4124       {
4125          if (i+1 != n1)
4126          {
4127             for (j=0; j<dsp_new->numseg; j++)
4128             {
4129                if (dsp_new->starts[dsp_new->dim*j+n1-1] >= 0)
4130                   dsp_new->starts[dsp_new->dim*j+i] = AlnMgr2MapSegStartToSegStart(amaip->sharedaln, dsp_new->starts[dsp_new->dim*j+n1-1], n2, i+1, dsp_new->lens[j]);
4131                else
4132                {
4133                   if (-(dsp_new->starts[dsp_new->dim*j+n1-1]) > dsp_shared->numseg)
4134                   /* this gap came from the new sap */
4135                      dsp_new->starts[dsp_new->dim*j+i] = -1;
4136                   else /* this gap came from the sharedaln */
4137                      dsp_new->starts[dsp_new->dim*j+i] = dsp_shared->starts[dsp_shared->dim*(-dsp_new->starts[dsp_new->dim*j+n1-1]-1)+i];
4138                }
4139                dsp_new->strands[dsp_new->dim*j+i] = AlnMgr2GetNthStrand(amaip->sharedaln, i+1);
4140             }
4141          }
4142       }
4143       for (i=0; i<dsp->dim; i++)
4144       {
4145          if (i+1 != n2)
4146          {
4147             if (i+1 > n2)
4148                offset = 1;
4149             else
4150                offset = 0;
4151             for (j=0; j<dsp_new->numseg; j++)
4152             {
4153                if (dsp_new->starts[dsp_new->dim*j+n1-1] >= 0)
4154                   dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = AlnMgr2MapSegStartToSegStart(sap, dsp_new->starts[dsp_new->dim*j+n1-1], n1, i+1, dsp_new->lens[j]);
4155                else
4156                {
4157                   if (-(dsp_new->starts[dsp_new->dim*j+n1-1]) > dsp_shared->numseg)
4158                   /* this gap is from the new sap */
4159                      dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = dsp->starts[dsp->dim*((-dsp_new->starts[dsp_new->dim*j+n1-1])-dsp_shared->numseg-1)+i];
4160                   else /* this gap is from the shared alignment */
4161                      dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = -1;
4162                }
4163                dsp_new->strands[dsp_new->dim*j+i+dsp_shared->dim-offset] = AlnMgr2GetNthStrand(sap, i+1);
4164             }
4165          }
4166       }
4167       /* fill in strand info for shared row, and get rid of segment keys (neg numbers) */
4168       for (j=0; j<dsp_new->numseg; j++)
4169       {
4170          dsp_new->strands[dsp_new->dim*j+n1-1] = AlnMgr2GetNthStrand(amaip->sharedaln, n1);
4171          if (dsp_new->starts[dsp_new->dim*j+n1-1] < 0)
4172             dsp_new->starts[dsp_new->dim*j+n1-1] = -1;
4173       }
4174 if (dsp_new->dim > 10)
4175    dsp_new->dim = dsp_new->dim;
4176       AlnMgr2CondenseRows(dsp_new, dsp_new->dim);
4177       sap_new = SeqAlignNew();
4178       sap_new->segtype = SAS_DENSEG;
4179       sap_new->segs = (Pointer)(dsp_new);
4180       AlnMgr2IndexSingleChildSeqAlign(sap_new);
4181       SeqAlignFree(amaip->sharedaln);
4182       amaip->sharedaln = sap_new;
4183       amaip->numrows = dsp_new->dim;
4184       amaip->ids = (SeqIdPtr PNTR)MemNew(amaip->numrows*sizeof(SeqIdPtr));
4185       sip = dsp_new->ids;
4186       for (i=0; i<amaip->numrows; i++)
4187       {
4188          amaip->ids[i] = SeqIdDup(sip);
4189          sip = sip->next;
4190       }
4191       while (asp_head != NULL)
4192       {
4193          asp = asp_head->next;
4194          MemFree(asp_head);
4195          asp_head = asp;
4196       }
4197    }
4198 }
4200 /* SECTION 2c */
AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap,Int4 pos,Int4 row1,Int4 row2,Int4 len)4201 static Int4 AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2, Int4 len)
4202 {
4203    Int4         diff;
4204    DenseSegPtr  dsp;
4205    Int4         pos2;
4206    Int4         seg;
4207    Uint1        strand1;
4208    Uint1        strand2;
4210    if (sap == NULL)
4211       return -1;
4212    seg = AlnMgr2GetSegForStartPos(sap, pos, row1);
4213    if (seg < 0)
4214       return -1;
4215    dsp = (DenseSegPtr)(sap->segs);
4216    if (dsp->starts[dsp->dim*seg+row2-1] == -1)
4217       return -1;
4218    strand1 = dsp->strands[dsp->dim*seg+row1-1];
4219    strand2 = dsp->strands[dsp->dim*seg+row2-1];
4220    if (strand1 != strand2)
4221       pos = pos + len - 1;
4222    if (strand1 == Seq_strand_minus)
4223       diff = dsp->lens[seg] - (pos - dsp->starts[dsp->dim*seg+row1-1]) - 1;
4224    else
4225       diff = pos - dsp->starts[dsp->dim*seg+row1-1];
4226    if (diff > dsp->lens[seg]) /* unaligned here */
4227       return -1;
4228    if (strand2 == Seq_strand_minus)
4229       pos2 = dsp->starts[dsp->dim*seg+row2-1] + dsp->lens[seg] - diff -1;
4230    else
4231       pos2 = dsp->starts[dsp->dim*seg+row2-1]+ diff;
4232    return pos2;
4233 }
4235 /* SECTION 2c */
AlnMgr2GetSegForStartPos(SeqAlignPtr sap,Int4 pos,Int4 row)4236 static Int4 AlnMgr2GetSegForStartPos(SeqAlignPtr sap, Int4 pos, Int4 row)
4237 {
4238    Uint2Ptr         array;
4239    DenseSegPtr      dsp;
4240    Int4             L;
4241    Int4             mid;
4242    Int4             offset;
4243    Int4             R;
4244    SAIndex2Ptr       saip;
4245    SARowDat2Ptr      srdp;
4246    Int4             start;
4247    Int4             stop;
4248    Uint1            strand;
4250    if (sap == NULL || sap->saip == NULL || row < 1)
4251       return -1;
4252    AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
4253    if (pos < start || pos > stop)
4254       return -1;
4255    saip = (SAIndex2Ptr)(sap->saip);
4256    if (row > saip->numrows)
4257       return -1;
4258    srdp = saip->srdp[row-1];
4259    strand = AlnMgr2GetNthStrand(sap, row);
4260    dsp = (DenseSegPtr)(sap->segs);
4261    L = 0;
4262    R = srdp->numsect - 1;
4263    if (strand != Seq_strand_minus)
4264    {
4265       while (L < R)
4266       {
4267          mid = MIN((L + R)/2, srdp->numsect-2);
4268          if (dsp->starts[(srdp->sect[mid + 1])*(dsp->dim)+row-1] <= pos)
4269             L = mid+1;
4270          else
4271             R = mid;
4272       }
4273    } else
4274    {
4275       while (L < R)
4276       {
4277          mid = (L + R)/2;
4278          if (dsp->starts[(srdp->sect[mid])*(dsp->dim)+row-1] > pos)
4279             L = mid + 1;
4280          else
4281             R = mid;
4282       }
4283    }
4284    offset = pos - dsp->starts[(srdp->sect[L])*(dsp->dim)+row-1];
4285    if (offset >= dsp->lens[srdp->sect[L]])
4286       return -2;  /* this is an insert */
4287    if (saip->anchor > 0)
4288    {
4289       array = saip->srdp[saip->anchor-1]->sect;
4290       R = binary_search_on_uint2_list(array, srdp->sect[L], saip->srdp[saip->anchor-1]->numsect);
4291       L = R;
4292    }
4293    return srdp->sect[L];
4294 }
GetNextStart(DenseSegPtr dsp,Int4 row,Int4 col,Int4Ptr pnext_start_col)4296 static Int4 GetNextStart (DenseSegPtr dsp, Int4 row, Int4 col, Int4Ptr pnext_start_col)
4297 {
4298   Int4 next_start_col;
4300   if (dsp == NULL || row < 0 || row >= dsp->dim || col < 0 || col >= dsp->numseg)
4301   {
4302     return -1;
4303   }
4305   for (next_start_col = col + 1;
4306        next_start_col < dsp->numseg
4307          && dsp->starts[(next_start_col * dsp->dim) + row] == -1;
4308        next_start_col++)
4309   {
4310   }
4311   if (next_start_col < dsp->numseg)
4312   {
4313     if (pnext_start_col != NULL)
4314     {
4315       *pnext_start_col = next_start_col;
4316     }
4317     return dsp->starts[(next_start_col * dsp->dim) + row];
4318   }
4319   else
4320   {
4321     return -1;
4322   }
4323 }
AlnMgr2CondenseColumns(DenseSegPtr dsp)4325 static void AlnMgr2CondenseColumns(DenseSegPtr dsp)
4326 /***************************************************************************
4327 *
4328 *  AlnMgr2CondenseColumns finds adjacent columns which appear to align but
4329 *  were not put in one column by the mixing mechanism because the input was
4330 *  a set of pairwise alignment with a gap on the common sequence in this
4331 *  segment. Or graphically:
4332 *
4333 *  ----- ----- ----- -----             -----
4334 *  AACCG ----- ----- -----   becomes   AACCG
4335 *  ----- AACCG ----- -----             AACCG
4336 *  ----- ----- AACCG -----             AACCG
4337 *  ----- ----- ----- AACCG             AACCG
4338 *
4339 ***************************************************************************/
4340 {
4341   int gap_start_seg = -1;
4342   int gap_end_seg = -1;
4343   int row, seg, base_col, col, next_start, next_start_col;
4344   Boolean can_fit;
4346   for (seg = 0;  seg < dsp->numseg;  ++seg) {
4347     if (dsp->starts[dsp->dim * seg] == -1) {
4348       if (gap_start_seg == -1) {
4349         gap_start_seg = seg;
4350       }
4351       else {
4352         if (seg == dsp->numseg - 1) {
4353           gap_end_seg = seg + 1;
4354         }
4355       }
4356     }
4357     else {
4358       if (gap_start_seg != -1) {
4359         gap_end_seg = seg;
4360       }
4361     }
4363     if (gap_end_seg != -1) {
4364       for (base_col = gap_start_seg;  base_col<gap_end_seg;  ++base_col) {
4365         int len = dsp->lens[base_col];
4366         for (col = base_col + 1;  col<gap_end_seg;  ++col) {
4367           if (dsp->lens[col] != len) {
4368             continue;
4369           }
4371           can_fit = TRUE;
4372           for (row = 0;  row < dsp->dim;  ++row) {
4373             if (dsp->starts[dsp->dim * col + row] != -1  &&
4374                 dsp->starts[dsp->dim * base_col + row] != -1) {
4375               can_fit = FALSE;
4376               break;
4377             }
4378             else if (dsp->starts[dsp->dim * col + row] != -1)
4379             {
4380               /* make sure we aren't going to disturb the order of
4381                * the starts */
4382               next_start = GetNextStart (dsp, row, base_col, &next_start_col);
4383               if (next_start > -1
4384                   && next_start < dsp->starts[dsp->dim * col + row]
4385                   && next_start_col < col)
4386               {
4387                 can_fit = FALSE;
4388               }
4389             }
4390           }
4392           if (can_fit) {
4393             for (row = 0;  row<dsp->dim;  ++row) {
4394               if (dsp->starts[dsp->dim * col + row] != -1) {
4395                 dsp->starts[dsp->dim * base_col + row] =
4396                   dsp->starts[dsp->dim * col + row];
4397               }
4398             }
4400             /* remove column col */
4401             {{
4402               Int4Ptr       starts, lens;
4403               Uint1Ptr      strands;
4404               Uint4         pos, new_pos;
4406               starts = (Int4Ptr)MemNew(dsp->dim*(dsp->numseg-1)*sizeof(Int4));
4407               strands = (Uint1Ptr)MemNew(dsp->dim*(dsp->numseg-1)*sizeof(Uint1));
4408               lens = (Int4Ptr)MemNew((dsp->numseg-1)*sizeof(Int4));
4410               for (pos=0; pos<dsp->dim*col; pos++) {
4411                 starts[pos] = dsp->starts[pos];
4412                 strands[pos] = dsp->strands[pos];
4413               }
4414               for (new_pos=pos, pos+=dsp->dim; pos<dsp->dim*dsp->numseg;
4415                    pos++, new_pos++) {
4416                 starts[new_pos] = dsp->starts[pos];
4417                 strands[new_pos] = dsp->strands[pos];
4418               }
4420               for (pos=0; pos<col; pos++) {
4421                 lens[pos] = dsp->lens[pos];
4422               }
4423               for (new_pos=pos, pos++; pos<dsp->numseg; pos++, new_pos++) {
4424                 lens[new_pos] = dsp->lens[pos];
4425               }
4427               MemFree(dsp->starts);
4428               MemFree(dsp->strands);
4429               dsp->starts = starts;
4430               dsp->strands = strands;
4431               dsp->lens = lens;
4433               dsp->numseg--;
4435             }}
4437             --gap_end_seg;
4438             --seg;
4439             --col;
4440           }
4441         }
4442       }
4444       gap_start_seg = -1;
4445       gap_end_seg = -1;
4446     }
4447   }
4448 }
4450 /* SECTION 2c */
4451 /***************************************************************************
4452 *
4453 *  AlnMgr2CondenseRows finds rows of a dense-seg structure that are related
4454 *  and that could be condensed into a single row (or fewer rows). It then
4455 *  calls AlnMgr2DoCondense to condense those rows into continuous or
4456 *  discontinuous rows. whichrow designates which row to merge, if
4457 *  less than 1, the function tries to merge the last row.
4458 *
4459 ***************************************************************************/
AlnMgr2CondenseRows(DenseSegPtr dsp,Int4 whichrow)4460 static void AlnMgr2CondenseRows(DenseSegPtr dsp, Int4 whichrow)
4461 {
4462    Boolean     done;
4463    Int4        i;
4464    Int4        j;
4465    Int4        k;
4466    Int4        numrows;
4467    AMCdRowPtr  row;
4468    AMCdRowPtr  PNTR rowarray;
4469    SeqIdPtr    sip;
4470    SeqIdPtr    targetsip;
4472    sip = dsp->ids;
4473    rowarray = (AMCdRowPtr PNTR)MemNew((dsp->dim)*sizeof(AMCdRowPtr));
4474    if (whichrow < 1 || whichrow > dsp->dim)
4475       whichrow = dsp->dim;
4476    for (i=0; i<dsp->dim; i++)
4477    {
4478       row = (AMCdRowPtr)MemNew(sizeof(AMCdRow));
4479       row->sip = SeqIdDup(sip);
4480       sip = sip->next;
4481       row->strand = dsp->strands[i];
4482       row->rownum = i+1;
4483       rowarray[i] = row;
4484       if (i+1 == whichrow)
4485          targetsip = row->sip;
4486    }
4487    HeapSort(rowarray, i, sizeof(rowarray), AlnMgr2CompareCdRows);
4488    numrows = dsp->dim;
4489    j = -1; /* j marks the first occurrence of each sip */
4490    for (i=0; j==-1 && i<numrows; i++)
4491    {
4492       if (SeqIdComp(rowarray[i]->sip, targetsip) == SIC_YES)
4493       {
4494          j = i;
4495          if (rowarray[i]->rownum == whichrow) /* no other rows w/sip */
4496          {
4497             for (i=0; i<numrows; i++)
4498             {
4499                SeqIdFree(rowarray[i]->sip);
4500                MemFree(rowarray[i]);
4501             }
4502             MemFree(rowarray);
4503             return;
4504          }
4505       }
4506    }
4507    sip = SeqIdDup(rowarray[j]->sip);
4508    done = FALSE;
4509    for (i=j; !done && rowarray[i]->rownum < whichrow; i++)
4510    {
4511       if (SeqIdComp(rowarray[i]->sip, sip) == SIC_YES)
4512       {
4513          if (rowarray[i]->strand == rowarray[j]->strand)
4514          {
4515             if (AlnMgr2DoCondense(dsp, rowarray[i]->rownum, whichrow))
4516             {
4517                for (k=0; k<numrows; k++)
4518                {
4519                   if (rowarray[k]->rownum > rowarray[i]->rownum)
4520                   {
4521                      rowarray[k]->rownum--;
4522                      whichrow--;
4523                   }
4524                }
4525             }
4526          }
4527       } else
4528       {
4529          done = TRUE;
4530          SeqIdFree(sip);
4531          sip = SeqIdDup(rowarray[i]->sip);
4532          j = i;
4533       }
4534    }
4535    SeqIdFree(sip);
4536    for (i=0; i<numrows; i++)
4537    {
4538       SeqIdFree(rowarray[i]->sip);
4539       MemFree(rowarray[i]);
4540    }
4541    MemFree(rowarray);
4542 }
4544 /* SECTION 2c */
4545 /***************************************************************************
4546 *
4547 *  AlnMgr2DoCondense arithmetically condenses two related rows of a dense-seg
4548 *  structure into a single continuous row, a single discontinuous row, or
4549 *  two rows with different information than before.
4550 *
4551 ***************************************************************************/
AlnMgr2DoCondense(DenseSegPtr dsp,Int4 rownum1,Int4 rownum2)4552 static Boolean AlnMgr2DoCondense(DenseSegPtr dsp, Int4 rownum1, Int4 rownum2)
4553 {
4554    Int4          aln;
4555    SeqAlignPtr   fake_sap;
4556    Boolean       fits;
4557    Boolean       found;
4558    Int4          i;
4559    SeqIdPtr      id;
4560    SeqIdPtr      id_head;
4561    SeqIdPtr      id_prev;
4562    Int4          j;
4563    Int4          k;
4564    Int4          max1;
4565    Int4          max2;
4566    Boolean       merged;
4567    Int4          min1;
4568    Int4          min2;
4569    SAIndex2Ptr   saip;
4570    Boolean       someseq1;
4571    Boolean       someseq2;
4572    Int4Ptr       starts;
4573    Uint1         strand1;
4574    Uint1         strand2;
4575    Uint1Ptr      strands;
4576    AM_Small2Ptr  window;
4577    AM_Small2Ptr  window_head;
4578    AM_Small2Ptr  window_prev;
4580    /* always merge up to rownum1 (better rows are first) */
4581    if (rownum1 > rownum2)
4582    {
4583       i = rownum2;
4584       rownum2 = rownum1;
4585       rownum1 = i;
4586    }
4587    strand1 = dsp->strands[rownum1-1];
4588    strand2 = dsp->strands[rownum2-1];
4589    if (strand1 != strand2)
4590       return FALSE;
4591    i = 0;
4592    window_head = window_prev = NULL;
4593    while (i < dsp->numseg)
4594    {
4595       j = i;
4596       someseq1 = someseq2 = FALSE;
4597       if (dsp->starts[dsp->dim*j+rownum1-1] >= 0)
4598       {
4599          someseq1 = TRUE;
4600          while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum2-1] < 0)
4601          {
4602             j++;
4603          }
4604       } else if (dsp->starts[dsp->dim*j+rownum2-1] >= 0)
4605       {
4606          someseq2 = TRUE;
4607          while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum1-1] < 0)
4608          {
4609             j++;
4610          }
4611       }
4612       fits = FALSE;
4613       if (j > i)
4614       {
4615          if (strand1 == Seq_strand_minus)
4616          {
4617             if (someseq1 == FALSE)
4618             {
4619                min1 = -1;
4620                for (k=j; min1 == -1 && k<dsp->numseg; k++)
4621                {
4622                   if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4623                      min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4624                }
4625                max1 = -1;
4626                for (k=(i-1); max1 == -1 && k>=0; k--)
4627                {
4628                   max1 = dsp->starts[dsp->dim*k+rownum1-1];
4629                }
4630             } else
4631             {
4632                min1 = -1;
4633                for (k=j-1; min1 == -1 && k>=i; k--)
4634                {
4635                   min1 = dsp->starts[dsp->dim*(k)+rownum1-1];
4636                }
4637                max1 = -1;
4638                for (k=i; min1 == -1 && k<j; k++)
4639                {
4640                   if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4641                      max1 = dsp->starts[dsp->dim*k+rownum1-1] + dsp->lens[k] -1;
4642                }
4643             }
4644          } else
4645          {
4646             if (someseq1 == FALSE)
4647             {
4648                min1 = -1;
4649                for (k=i-1; min1 == -1 && k >= 0; k--)
4650                {
4651                   if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4652                      min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4653                }
4654                max1 = -1;
4655                for (k=j; max1 == -1 && k<dsp->numseg; k++)
4656                {
4657                   max1 = dsp->starts[dsp->dim*k+rownum1-1];
4658                }
4659             } else
4660             {
4661                min1 = -1;
4662                for (k=i; min1 == -1 && k<j; k++)
4663                {
4664                   min1 = dsp->starts[dsp->dim*k+rownum1-1];
4665                }
4666                max1 = -1;
4667                for (k=j-1; max1 == -1 && k>i; k--)
4668                {
4669                   if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4670                      max1 = dsp->starts[dsp->dim*(k)+rownum1-1] + dsp->lens[k] - 1;
4671                }
4672             }
4673          }
4674          if (strand2 == Seq_strand_minus)
4675          {
4676             if (someseq2 == FALSE)
4677             {
4678                min2 = -1;
4679                for (k=j; min2 == -1 && k<dsp->numseg; k++)
4680                {
4681                   if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4682                      min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4683                }
4684                max2 = -1;
4685                for (k=(i-1); max2 == -1 && k>=0; k--)
4686                {
4687                   max2 = dsp->starts[dsp->dim*k+rownum2-1];
4688                }
4689             } else
4690             {
4691                min2 = -1;
4692                for (k=j-1; min2 == -1 && k>=i; k--)
4693                {
4694                   min2 = dsp->starts[dsp->dim*(k)+rownum2-1];
4695                }
4696                max2 = -1;
4697                for (k=i; max2 == -1 && k<j; k++)
4698                {
4699                   if (dsp->starts[dsp->dim*k+rownum2-1] >= 0)
4700                      max2 = dsp->starts[dsp->dim*k+rownum2-1] + dsp->lens[k]-1;
4701                }
4702             }
4703          } else
4704          {
4705             if (someseq2 == FALSE)
4706             {
4707                min2 = -1;
4708                for (k=i-1; min2 == -1 && k >= 0; k--)
4709                {
4710                   if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4711                      min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4712                }
4713                max2 = -1;
4714                for (k=j; max2 == -1 && k<dsp->numseg; k++)
4715                {
4716                   max2 = dsp->starts[dsp->dim*k+rownum2-1];
4717                }
4718             } else
4719             {
4720                min2 = -1;
4721                for (k=i; min2 == -1 && k<j; k++)
4722                {
4723                   min2 = dsp->starts[dsp->dim*k+rownum2-1];
4724                }
4725                max2 = -1;
4726                for (k=j-1; max2 == -1 && k>=i; k--)
4727                {
4728                   if (dsp->starts[dsp->dim*(k)+rownum2-1] >= 0)
4729                      max2 = dsp->starts[dsp->dim*(k)+rownum2-1] + dsp->lens[k] - 1;
4730                }
4731             }
4732          }
4733          if (someseq1 == FALSE)
4734          {
4735             if ((min1 < min2 || min2 == -1) && (max1 > max2 || max1 == -1))
4736                fits = TRUE;
4737          } else
4738          {
4739             if ((min2 < min1 || min1 == -1) && (max2 > max1 || max2 == -1))
4740                fits = TRUE;
4741          }
4742          window = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
4743          window->n1 = i;
4744          window->n2 = j-1;
4745          if (!fits)
4746             window->n4 = -1;
4747          if (window_head != NULL)
4748          {
4749             window_prev->next = window;
4750             window_prev = window;
4751          } else
4752             window_head = window_prev = window;
4753       }
4754       if (i == j)
4755          i++;
4756       else
4757          i = j;
4758    }
4759    if (window_head == NULL)
4760       return FALSE;
4761    fake_sap = SeqAlignNew();
4762    fake_sap->segtype = SAS_DENSEG;
4763    fake_sap->segs = (Pointer)dsp;
4764    AlnMgr2IndexSingleChildSeqAlign(fake_sap);
4765    aln = AlnMgr2GetNumAlnBlocks(fake_sap);
4766    if (aln == 1) /* only merge if there is a single fitted window flanked by gaps */
4767    /*or if there are several contiguous fitted windows flanked by gaps */
4768    {
4769       if (window_head->next != NULL && window_head->n4 == 0)
4770       {
4771          window = window_head->next;
4772          while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4773          {
4774             window_head->n2++;
4775          }
4776          while (window != NULL && window->n4 == 0 && window->n1 == window_head->n2+1)
4777          {
4778             window_head->n2 = window->n2;
4779             window = window->next;
4780             while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4781             {
4782                window_head->n2++;
4783             }
4784          }
4785          if (window != NULL)
4786          {
4787             while (window_head != NULL)
4788             {
4789                window = window_head->next;
4790                MemFree(window_head);
4791                window_head = window;
4792             }
4793             fake_sap->segs = NULL;
4794             SeqAlignFree(fake_sap);
4795             return FALSE;
4796          }
4797       }
4798       if (window_head->n4 == -1)
4799       {
4800          while (window_head != NULL)
4801          {
4802             window = window_head->next;
4803             MemFree(window_head);
4804             window_head = window;
4805          }
4806          fake_sap->segs = NULL;
4807          SeqAlignFree(fake_sap);
4808          return FALSE;
4809       }
4810       found = FALSE;
4811       for (i=0; !found && i<window_head->n1; i++)
4812       {
4813          if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4814             found = TRUE;
4815       }
4816       for (i=window_head->n2+1; !found && i<dsp->numseg; i++)
4817       {
4818          if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4819             found = TRUE;
4820       }
4821       if (found)
4822       {
4823          while (window_head != NULL)
4824          {
4825             window = window_head->next;
4826             MemFree(window_head);
4827             window_head = window;
4828          }
4829          fake_sap->segs = NULL;
4830          SeqAlignFree(fake_sap);
4831          return FALSE;
4832       }
4833       /* merge whole row up to rownum1 */
4834       for (i=0; i<dsp->numseg; i++)
4835       {
4836          dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
4837       }
4838       starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
4839       strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
4840       k = 0;
4841       for (i=0; i<dsp->dim; i++)
4842       {
4843          if (i != rownum2-1)
4844          {
4845             for (j=0; j<dsp->numseg; j++)
4846             {
4847                starts[(dsp->dim-1)*j+k] = dsp->starts[dsp->dim*j+i];
4848                strands[(dsp->dim-1)*j+k] = dsp->strands[dsp->dim*j+i];
4849             }
4850             k++;
4851          }
4852       }
4853       MemFree(dsp->starts);
4854       MemFree(dsp->strands);
4855       dsp->starts = starts;
4856       dsp->strands = strands;
4857       dsp->dim--;
4858       id_head = id_prev = NULL;
4859       id = dsp->ids;
4860       j = 0;
4861       while (id != NULL)
4862       {
4863          if (j+1 != rownum2)
4864          {
4865             if (id_head != NULL)
4866             {
4867                id_prev->next = SeqIdDup(id);
4868                id_prev = id_prev->next;
4869             } else
4870                id_head = id_prev = SeqIdDup(id);
4871          }
4872          j++;
4873          id = id->next;
4874       }
4875       SeqIdSetFree(dsp->ids);
4876       dsp->ids = id_head;
4877       while (window_head != NULL)
4878       {
4879          window = window_head->next;
4880          MemFree(window_head);
4881          window_head = window;
4882       }
4883       fake_sap->segs = NULL;
4884       SeqAlignFree(fake_sap);
4885       return TRUE;
4886    }
4887    /* now go through and find the largest piece of every window that can be merged */
4888    /* (can't split up an aligned region with the merge, though)                    */
4889    window = window_head;
4890    saip = (SAIndex2Ptr)(fake_sap->saip);
4891    while (window != NULL)
4892    {
4893       j = k = -1;
4894       found = FALSE;
4895       for (i=0; !found && i<window->n1; i++)
4896       {
4897          if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4898             found = TRUE;
4899       }
4900       if (!found)
4901          j = window->n1;
4902       found = FALSE;
4903       for (i=window->n2+1; !found && i<dsp->numseg; i++)
4904       {
4905          if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4906             found = TRUE;
4907       }
4908       if (!found)
4909          k = window->n2;
4910       if (j == -1)
4911       {
4912          found = FALSE;
4913          for (i = window->n1-1; !found && i<window->n2; i++)
4914          {
4915             j = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4916             if (j == i)
4917                found = TRUE;
4918             else
4919                j = -1;
4920          }
4921       }
4922       if (k == -1)
4923       {
4924          found = FALSE;
4925          for (i = window->n2; !found && i>=window->n1; i++)
4926          {
4927             k = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4928             if (k == i)
4929                found = TRUE;
4930             else
4931                k = -1;
4932          }
4933       }
4934       if (j > -1 && k > -1 && k > j)
4935       {
4936          window->n1 = j+1;
4937          window->n2 = k;
4938       } else
4939          window->n1 = -1;
4940       window = window->next;
4941    }
4942    window = window_head;
4943    while (window != NULL)
4944    {
4945       if (window->n4 == -1 && i >= 0) /* see if it fits now */
4946       {
4947          i = window->n1;
4948          j = window->n2+1;
4949          if (strand1 == Seq_strand_minus)
4950          {
4951             if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4952             {
4953                min1 = -1;
4954                for (k=j; min1 == -1 && k<dsp->numseg; k++)
4955                {
4956                   min1 = dsp->starts[dsp->dim*k+rownum1-1];
4957                }
4958                max1 = -1;
4959                for (k=(i-1); max1 == -1 && k>=0; k--)
4960                {
4961                   max1 = dsp->starts[dsp->dim*k+rownum1-1];
4962                }
4963             } else
4964             {
4965                min1 = dsp->starts[dsp->dim*(j-1)+rownum1-1];
4966                max1 = dsp->starts[dsp->dim*i+rownum1-1] + dsp->lens[i];
4967             }
4968          } else
4969          {
4970             if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4971             {
4972                min1 = -1;
4973                for (k=i-1; min1 == -1 && k >= 0; k--)
4974                {
4975                   min1 = dsp->starts[dsp->dim*k+rownum1-1];
4976                }
4977                max1 = -1;
4978                for (k=j; max1 == -1 && k<dsp->numseg; k++)
4979                {
4980                   max1 = dsp->starts[dsp->dim*k+rownum1-1];
4981                }
4982             } else
4983             {
4984                min1 = dsp->starts[dsp->dim*i+rownum1-1];
4985                max1 = dsp->starts[dsp->dim*(j-1)+rownum1-1] + dsp->lens[j-1];
4986             }
4987          }
4988          if (strand2 == Seq_strand_minus)
4989          {
4990             if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
4991             {
4992                min2 = -1;
4993                for (k=j; min2 == -1 && k<dsp->numseg; k++)
4994                {
4995                   min2 = dsp->starts[dsp->dim*k+rownum2-1];
4996                }
4997                max2 = -1;
4998                for (k=(i-1); max2 == -1 && k>=0; k--)
4999                {
5000                   max2 = dsp->starts[dsp->dim*k+rownum2-1];
5001                }
5002             } else
5003             {
5004                min2 = dsp->starts[dsp->dim*(j-1)+rownum2-1];
5005                max2 = dsp->starts[dsp->dim*i+rownum2-1] + dsp->lens[i];
5006             }
5007          } else
5008          {
5009             if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
5010             {
5011                min2 = -1;
5012                for (k=i-1; min2 == -1 && k >= 0; k--)
5013                {
5014                   min2 = dsp->starts[dsp->dim*k+rownum2-1];
5015                }
5016                max2 = -1;
5017                for (k=j; max2 == -1 && k<dsp->numseg; k++)
5018                {
5019                   max2 = dsp->starts[dsp->dim*k+rownum2-1];
5020                }
5021             } else
5022             {
5023                min2 = dsp->starts[dsp->dim*i+rownum2-1];
5024                max2 = dsp->starts[dsp->dim*(j-1)+rownum2-1] + dsp->lens[j-1];
5025             }
5026          }
5027          if (dsp->starts[dsp->dim*j+rownum1-1] == -1)
5028          {
5029             if (min1 < min2 && (max1 > max2 || max1 == -1))
5030                window->n4 = 0;
5031          } else
5032          {
5033             if (min2 < min1 && (max2 > max1 || max2 == -1))
5034                window->n4 = 0;
5035          }
5036       }
5037       if (window->n1 >= 0 && window->n4 >= 0)
5038       {
5039          for (i=window->n1; i<=window->n2; i++)
5040          {
5041             dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim+i+rownum2-1]);
5042          }
5043       }
5044       window = window->next;
5045    }
5046    found = FALSE;
5047    /* check to see if rownum2 is all gaps now */
5048    for (i=0; !found && i<dsp->numseg; i++)
5049    {
5050       if (dsp->starts[dsp->dim*i+rownum2-1] != -1)
5051          found = TRUE;
5052    }
5053    merged = FALSE;
5054    if (!found) /* just gaps */
5055    {
5056       /* merge whole row up to rownum1 */
5057       for (i=0; i<dsp->numseg; i++)
5058       {
5059          dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
5060       }
5061       starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
5062       strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
5063       k = 0;
5064       for (i=0; i<dsp->dim; i++)
5065       {
5066          if (i != rownum2-1)
5067          {
5068             for (j=0; j<dsp->numseg; j++)
5069             {
5070                starts[dsp->dim*j+k] = dsp->starts[dsp->dim*j+i];
5071                strands[dsp->dim*j+k] = dsp->strands[dsp->dim*j+i];
5072             }
5073             k++;
5074          }
5075       }
5076       MemFree(dsp->starts);
5077       MemFree(dsp->strands);
5078       dsp->starts = starts;
5079       dsp->strands = strands;
5080       dsp->dim--;
5081       id_head = id_prev = NULL;
5082       id = dsp->ids;
5083       j = 0;
5084       while (id != NULL)
5085       {
5086          if (j+1 != rownum2)
5087          {
5088             if (id_head != NULL)
5089             {
5090                id_prev->next = SeqIdDup(id);
5091                id_prev = id_prev->next;
5092             } else
5093                id_head = id_prev = SeqIdDup(id);
5094          }
5095          j++;
5096          id = id->next;
5097       }
5098       SeqIdSetFree(dsp->ids);
5099       dsp->ids = id_head;
5100       merged = TRUE;
5101    }
5102    while (window_head != NULL)
5103    {
5104       window = window_head->next;
5105       MemFree(window_head);
5106       window_head = window;
5107    }
5108    fake_sap->segs = NULL;
5109    SeqAlignFree(fake_sap);
5110    return merged;
5111 }
5113 /* SECTION 2c */
5114 /***************************************************************************
5115 *
5116 *  AlnMgr2CompareCdRows is the HeapSort callback for AlnMgr2CondenseRows.
5117 *  It puts the CDRows in order first by seqid and secondarily by row number.
5118 *
5119 ***************************************************************************/
AlnMgr2CompareCdRows(VoidPtr ptr1,VoidPtr ptr2)5120 static int LIBCALLBACK AlnMgr2CompareCdRows(VoidPtr ptr1, VoidPtr ptr2)
5121 {
5122    Int4        i;
5123    AMCdRowPtr  row1;
5124    AMCdRowPtr  row2;
5126    if (ptr1 == NULL || ptr2 == NULL)
5127       return 0;
5128    row1 = *((AMCdRowPtr PNTR)ptr1);
5129    row2 = *((AMCdRowPtr PNTR)ptr2);
5130    i = AlnMgr2OrderSeqIds(row1->sip, row2->sip);
5131    if (i == 0) /* sort from least rownum to greatest within each seqid */
5132    {
5133       if (row1->rownum < row2->rownum)
5134          return -1;
5135       else
5136          return 1;
5137    } else
5138       return i;
5139 }
5141 /* SECTION 2c */
5142 /***************************************************************************
5143 *
5144 *  AlnMgr2CompareAsps is a HeapSort callback for AlnMgr2AddInNewSA. It
5145 *  compares the starts (n1) of the two AM_Small2Ptrs; if those are the same
5146 *  it compares the types.
5147 *
5148 ***************************************************************************/
AlnMgr2CompareAsps(VoidPtr ptr1,VoidPtr ptr2)5149 static int LIBCALLBACK AlnMgr2CompareAsps(VoidPtr ptr1, VoidPtr ptr2)
5150 {
5151    AM_Small2Ptr  asp1;
5152    AM_Small2Ptr  asp2;
5154    if (ptr1 != NULL && ptr2 != NULL)
5155    {
5156       asp1 = *((AM_Small2Ptr PNTR)ptr1);
5157       asp2 = *((AM_Small2Ptr PNTR)ptr2);
5158       if (asp1->n1 < asp2->n1)
5159          return -1;
5160       else if (asp1->n1 > asp2->n1)
5161          return 1;
5162       else if (asp1->n5 < asp2->n5)
5163          return -1;
5164       else if (asp1->n5 > asp2->n5)
5165          return 1;
5166       else
5167       {
5168          if (asp1->n3 == AM_GAP && asp2->n3 == AM_GAP)
5169          {
5170             if (asp1->n2 < asp2->n2)
5171                return -1;
5172             if (asp1->n2 > asp2->n2)
5173                return 1;
5174          }
5175          if (asp1->n3 == AM_START)
5176          {
5177             if (asp2->n3 == AM_STOP)
5178                return -1;
5179             else if (asp2->n3 == AM_GAP)
5180                return -1;
5181             else if (asp2->n3 == AM_HARDSTOP)
5182                return -1;
5183             else
5184                return 0;
5185          } else if (asp1->n3 == AM_STOP)
5186          {
5187             if (asp2->n3 == AM_START)
5188                return 1;
5189             else if (asp2->n3 == AM_GAP)
5190                return 1;
5191             else if (asp2->n3 == AM_HARDSTOP)
5192                return -1;
5193             else
5194                return 0;
5195          } else if (asp1->n3 == AM_GAP)
5196          {
5197             if (asp2->n3 == AM_START)
5198                return 1;
5199             else if (asp2->n3 == AM_STOP)
5200                return -1;
5201             else if (asp2->n3 == AM_HARDSTOP)
5202                return -1;
5203             else
5204                return 0;
5205          } else if (asp1->n3 == AM_HARDSTOP)
5206          {
5207             if (asp2->n3 == AM_START)
5208                return 1;
5209             else if (asp2->n3 == AM_STOP)
5210                return 1;
5211             else if (asp2->n3 == AM_GAP)
5212                return 1;
5213             else
5214                return 0;
5215          }
5216       }
5217    }
5218    return 0;
5219 }
5221 /* SECTION 2c */
5222 /***************************************************************************
5223 *
5224 *  AlnMgr2CompareAspsMinus is a HeapSort callback for AlnMgr2AddInNewSA. It
5225 *  compares the starts (n1) of the two AM_Small2Ptrs; if those are the same
5226 *  it compares the types. The only difference from AlnMgr2CompareAsps is
5227 *  that it sorts the structures in the opposite order.
5228 *
5229 ***************************************************************************/
AlnMgr2CompareAspsMinus(VoidPtr ptr1,VoidPtr ptr2)5230 static int LIBCALLBACK AlnMgr2CompareAspsMinus(VoidPtr ptr1, VoidPtr ptr2)
5231 {
5232    AM_Small2Ptr  asp1;
5233    AM_Small2Ptr  asp2;
5235    if (ptr1 != NULL && ptr2 != NULL)
5236    {
5237       asp1 = *((AM_Small2Ptr PNTR)ptr1);
5238       asp2 = *((AM_Small2Ptr PNTR)ptr2);
5239       if (asp1->n1 > asp2->n1)
5240          return -1;
5241       else if (asp1->n1 < asp2->n1)
5242          return 1;
5243       else if (asp1->n5 < asp2->n5)
5244          return -1;
5245       else if (asp1->n5 > asp2->n5)
5246          return 1;
5247       else
5248       {
5249          if (asp1->n3 == AM_GAP && asp2->n3 == AM_GAP)
5250          {
5251             if (asp1->n2 < asp2->n2)
5252                return -1;
5253             if (asp1->n2 > asp2->n2)
5254                return 1;
5255          }
5256          if (asp1->n3 == AM_START)
5257          {
5258             if (asp2->n3 == AM_STOP)
5259                return 1;
5260             else if (asp2->n3 == AM_GAP)
5261                return -1;
5262             else if (asp2->n3 == AM_HARDSTOP)
5263                return 1;
5264             else
5265                return 0;
5266          } else if (asp1->n3 == AM_STOP)
5267          {
5268             if (asp2->n3 == AM_START)
5269                return -1;
5270             else if (asp2->n3 == AM_GAP)
5271                return -1;
5272             else if (asp2->n3 == AM_HARDSTOP)
5273                return 1;
5274             else
5275                return 0;
5276          } else if (asp1->n3 == AM_GAP)
5277          {
5278             if (asp2->n3 == AM_START)
5279                return 1;
5280             else if (asp2->n3 == AM_STOP)
5281                return 1;
5282             else if (asp2->n3 == AM_HARDSTOP)
5283                return 1;
5284             else
5285                return 0;
5286          } else if (asp1->n3 == AM_HARDSTOP)
5287          {
5288             if (asp2->n3 == AM_START)
5289                return -1;
5290             else if (asp2->n3 == AM_STOP)
5291                return -1;
5292             else if (asp2->n3 == AM_GAP)
5293                return -1;
5294             else
5295                return 0;
5296          }
5297       }
5298    }
5299    return 0;
5300 }
5303 /* SECTION 2c */
5304 /***************************************************************************
5305 *
5306 *  AlnMgr2GetFirstSharedRow takes two indexed or unindexed dense-seg
5307 *  seqaligns and returns the row numbers of the first sequence that is
5308 *  shared between the two alignments. If the alignments do not share any
5309 *  sequences, both n1 and n2 are set to 0.
5310 *
5311 ***************************************************************************/
AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1,SeqAlignPtr sap2,Int4Ptr n1,Int4Ptr n2)5312 static void AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1, SeqAlignPtr sap2, Int4Ptr n1, Int4Ptr n2)
5313 {
5314    DenseSegPtr  dsp1;
5315    DenseSegPtr  dsp2;
5316    Int4         i;
5317    Int4         j;
5318    SeqIdPtr     sip1;
5319    SeqIdPtr     sip2;
5321    dsp1 = (DenseSegPtr)(sap1->segs);
5322    dsp2 = (DenseSegPtr)(sap2->segs);
5323    sip1 = dsp1->ids;
5324    i = 1;
5325    while (sip1 != NULL)
5326    {
5327       j = 1;
5328       sip2 = dsp2->ids;
5329       while (sip2 != NULL)
5330       {
5331          if (SeqIdComp(sip1, sip2) == SIC_YES)
5332          {
5333             *n1 = i;
5334             *n2 = j;
5335             return;
5336          }
5337          sip2 = sip2->next;
5338          j++;
5339       }
5340       sip1 = sip1->next;
5341       i++;
5342    }
5343    /* nothing found */
5344    *n1 = 0;
5345    *n2 = 0;
5346 }
5348 /* SECTION 2d */
AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1,SeqIdPtr sip2)5349 static SeqIdPtr AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1, SeqIdPtr sip2)
5350 {
5351    SeqIdPtr  sip;
5352    SeqIdPtr  sip_tmp;
5354    if (sip1 == NULL || sip2 == NULL)
5355       return NULL;
5356    sip = sip1;
5357    while (sip != NULL)
5358    {
5359       sip_tmp = sip2;
5360       while (sip_tmp != NULL)
5361       {
5362          if (SeqIdComp(sip, sip_tmp) == SIC_YES)
5363             return sip;
5364          sip_tmp = sip_tmp->next;
5365       }
5366       sip = sip->next;
5367    }
5368    return NULL;
5369 }
5371 /***************************************************************************
5372 *
5373 *  AlnMgr2OrderSeqIds simply alphabetizes printed seqids in order to sort
5374 *  them in order to group identical ones in a set.
5375 *
5376 ***************************************************************************/
AlnMgr2OrderSeqIds(SeqIdPtr sip1,SeqIdPtr sip2)5377 static Int4 AlnMgr2OrderSeqIds(SeqIdPtr sip1, SeqIdPtr sip2)
5378 {
5379    Char  txt1[42];
5380    Char  txt2[42];
5382    if (sip1 == NULL && sip2 == NULL)
5383       return 0;
5384    if (sip1 == NULL && sip2 != NULL)
5385       return 1;
5386    if (sip1 != NULL && sip2 == NULL)
5387       return -1;
5388    SeqIdWrite(sip1, txt1, PRINTID_TEXTID_ACC_VER, 41);
5389    SeqIdWrite(sip2, txt2, PRINTID_TEXTID_ACC_VER, 41);
5390    txt1[41] = txt2[41] = '\0';
5391    return StringICmp(txt1, txt2);
5392 }
5394 /* SECTION 2d */
5395 /***************************************************************************
5396 *
5397 *  AlnMgr2SetUnaln takes an indexed alignment and sets the numunaln and
5398 *  unaln array fields. The unaligned regions are numbered the same
5399 *  regardless of whether the alignment is anchored, although they will
5400 *  most likely be accessed and displayed differently.
5401 *
5402 ***************************************************************************/
AlnMgr2SetUnaln(SeqAlignPtr sap)5403 static void AlnMgr2SetUnaln(SeqAlignPtr sap)
5404 {
5405    AMAlignIndex2Ptr  amaip;
5406    AM_Small2Ptr      ams;
5407    AM_Small2Ptr      ams_head;
5408    AM_Small2Ptr      ams_prev;
5409    AM_Small2Ptr      PNTR amsarray;
5410    DenseSegPtr       dsp;
5411    Int4              i;
5412    Int4              j;
5413    SAIndex2Ptr       saip;
5415    if (sap == NULL || sap->saip == NULL)
5416       return;
5417    if (sap->saip->indextype == INDEX_CHILD)
5418    {
5419       saip = (SAIndex2Ptr)(sap->saip);
5420       dsp = (DenseSegPtr)(sap->segs);
5421    } else if (sap->saip->indextype == INDEX_PARENT)
5422    {
5423       amaip = (AMAlignIndex2Ptr)(sap->saip);
5424       if (amaip->alnstyle == AM2_LITE)
5425          return;
5426       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
5427       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
5428    } else
5429       return;
5430    MemFree(saip->unaln);
5431    saip->unaln = NULL;
5432    ams_head = ams_prev = NULL;
5433    for (i=0; i<saip->numrows; i++)
5434    {
5435       for (j=0; j<saip->srdp[i]->numunaln; j++)
5436       {
5437          ams = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
5438          ams->n1 = saip->srdp[i]->unaligned[j];
5439          if (ams_head != NULL)
5440          {
5441             ams_prev->next = ams;
5442             ams_prev = ams;
5443          } else
5444             ams_head = ams_prev = ams;
5445       }
5446    }
5447    if (ams_head == NULL)
5448    {
5449       saip->numunaln = -1;
5450       return;
5451    }
5452    j = 0;
5453    ams = ams_head;
5454    while (ams != NULL)
5455    {
5456       j++;
5457       ams = ams->next;
5458    }
5459    amsarray = (AM_Small2Ptr PNTR)MemNew(j*sizeof(AM_Small2Ptr));
5460    j = 0;
5461    ams = ams_head;
5462    while (ams != NULL)
5463    {
5464       amsarray[j] = ams;
5465       j++;
5466       ams = ams->next;
5467    }
5468    HeapSort(amsarray, j, sizeof(AM_Small2Ptr), AlnMgr2CompareUnalnAMS);
5469    saip->numunaln = 1;
5470    for (i=1; i<j; i++)
5471    {
5472       if (amsarray[i]->n1 != amsarray[i-1]->n1)
5473          saip->numunaln++;
5474    }
5475    saip->unaln = (Uint4Ptr)MemNew(saip->numunaln*sizeof(Uint4));
5476    saip->unaln[0] = amsarray[0]->n1;
5477    saip->numunaln = 1;
5478    for (i=1; i<j; i++)
5479    {
5480       if (amsarray[i]->n1 != amsarray[i-1]->n1)
5481       {
5482          saip->unaln[saip->numunaln] = amsarray[i]->n1;
5483          saip->numunaln++;
5484       }
5485    }
5486    for (i=0; i<j; i++)
5487    {
5488       MemFree(amsarray[i]);
5489    }
5490    MemFree(amsarray);
5491 }
5493 /* SECTION 2d */
5494 /***************************************************************************
5495 *
5496 *  AlnMgr2CompareUnalnAMS is the HeapSort callback for AlnMgr2SetUnaln;
5497 *  it simply compares two AM_Small2 structures and orders them by their
5498 *  n1 fields.
5499 *
5500 ***************************************************************************/
AlnMgr2CompareUnalnAMS(VoidPtr ptr1,VoidPtr ptr2)5501 static int LIBCALLBACK AlnMgr2CompareUnalnAMS(VoidPtr ptr1, VoidPtr ptr2)
5502 {
5503    AM_Small2Ptr  ams1;
5504    AM_Small2Ptr  ams2;
5506    if (ptr1 == NULL || ptr2 == NULL)
5507       return 0;
5508    ams1 = *((AM_Small2Ptr PNTR)ptr1);
5509    ams2 = *((AM_Small2Ptr PNTR)ptr2);
5510    if (ams1->n1 < ams2->n1)
5511       return -1;
5512    else if (ams1->n1 > ams2->n1)
5513       return 1;
5514    else
5515       return 0;
5516 }
5518 /***************************************************************************
5519 *
5520 *  SECTION 3: Functions for debugging
5521 *
5522 ***************************************************************************/
5524 /* SECTION 3 */
am_print_sa_index(SeqAlignPtr sap,FILE * ofp)5525 NLM_EXTERN void am_print_sa_index(SeqAlignPtr sap, FILE *ofp)
5526 {
5527    Int4        i;
5528    Int4        j;
5529    SAIndex2Ptr  saip;
5531    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
5532       return;
5533    saip = (SAIndex2Ptr)(sap->saip);
5534    fprintf(ofp, "Rows: %d\n", saip->numrows);
5535    fprintf(ofp, "Segments: %d\n", saip->numseg);
5536    fprintf(ofp, "Anchor: %d\n", saip->anchor);
5537    fprintf(ofp, "Alignment coordinates: ");
5538    for (i=0; i<saip->numseg; i++)
5539    {
5540       fprintf(ofp, "%d ", saip->aligncoords[i]);
5541    }
5542    fprintf(ofp, "\n\n");
5543    for (i=0; i<saip->numrows; i++)
5544    {
5545       fprintf(ofp, "row %d\n", i+1);
5546       fprintf(ofp, "numsect: %d\n", saip->srdp[i]->numsect);
5547       for (j=0; j<saip->srdp[i]->numsect; j++)
5548       {
5549          fprintf(ofp, "%d ", saip->srdp[i]->sect[j]);
5550       }
5551       fprintf(ofp, "\n");
5552       fprintf(ofp, "numunsect: %d\n", saip->srdp[i]->numunsect);
5553       for (j=0; j<saip->srdp[i]->numunsect; j++)
5554       {
5555          fprintf(ofp, "%d ", saip->srdp[i]->unsect[j]);
5556       }
5557       fprintf(ofp, "\n");
5558       fprintf(ofp, "numinsect: %d\n", saip->srdp[i]->numinsect);
5559       for (j=0; j<saip->srdp[i]->numinsect; j++)
5560       {
5561          fprintf(ofp, "%d ", saip->srdp[i]->insect[j]);
5562       }
5563       fprintf(ofp, "\n");
5564    }
5565 }
5567 /* SECTION 3 */
5568 /***************************************************************************
5569 *
5570 *  AlnMgr2PrintSeqAlign prints an interleaved output of the entire
5571 *  indexed alignment, with 'linesize' characters on each line (max 200).
5572 *
5573 ***************************************************************************/
AlnMgr2PrintSeqAlign(SeqAlignPtr sap,Int4 linesize,Boolean isnuc,FILE * ofp)5574 NLM_EXTERN void AlnMgr2PrintSeqAlign(SeqAlignPtr sap, Int4 linesize, Boolean isnuc, FILE *ofp)
5575 {
5576    AlnMsg2Ptr   amp;
5577    BioseqPtr   bsp;
5578    Char        buf[201];
5579    Int4        ctr;
5580    Boolean     done;
5581    Int4        i;
5582    Int4        j;
5583    Int4        len;
5584    Boolean     more;
5585    Int4        numrows;
5586    Int4        row;
5587    Uint1       seqcode;
5588    SeqIdPtr    sip;
5589    SeqPortPtr  spp;
5590    Char        text[42];
5592    if (sap == NULL || sap->saip == NULL || linesize > 200)
5593       return;
5594    if (isnuc)
5595       seqcode = Seq_code_iupacna;
5596    else
5597       seqcode = Seq_code_iupacaa;
5598    amp = AlnMsgNew2();
5599    numrows = AlnMgr2GetNumRows(sap);
5600    len = AlnMgr2GetAlnLength(sap, FALSE);
5601    for (i=0; i<len; i+=linesize)
5602    {
5603       fprintf(ofp, "%d - %d\n", i, MIN(i+linesize-1, len-1));
5604       for (row=0; row<numrows; row++)
5605       {
5606          sip = AlnMgr2GetNthSeqIdPtr(sap, row+1);
5607          SeqIdWrite(sip, text, PRINTID_FASTA_SHORT, 41);
5608          done = FALSE;
5609          for (j=0; j<12; j++)
5610          {
5611             if (text[j] == '\0')
5612                done = TRUE;
5613             if (done == TRUE)
5614                fprintf(ofp, " ");
5615             else
5616                fprintf(ofp, "%c", text[j]);
5617          }
5618          bsp = BioseqLockById(sip);
5619          AlnMsgReNew2(amp);
5620          amp->row_num = row+1;
5621          amp->from_aln = i;
5622          amp->to_aln = MIN(i+linesize-1, len-1);
5623          while (more = AlnMgr2GetNextAlnBit(sap, amp))
5624          {
5625             if (amp->type == AM_GAP)
5626             {
5627                for (j=amp->from_row; j<=amp->to_row; j++)
5628                {
5629                   fprintf(ofp, "-");
5630                }
5631             } else
5632             {
5633                spp = SeqPortNew(bsp, amp->from_row, amp->to_row, amp->strand, seqcode);
5634                ctr = SeqPortRead(spp, (Uint1Ptr)buf, amp->to_row-amp->from_row+1);
5635                buf[ctr] = '\0';
5636                fwrite(buf, 1, ctr, ofp);
5637                SeqPortFree(spp);
5638             }
5639          }
5640          BioseqUnlock(bsp);
5641          fprintf(ofp, "\n");
5642       }
5643       fprintf(ofp, "\n\n");
5644    }
5645    AlnMsgFree2(amp);
5646 }
5648 /* SECTION 3 */
AlnMgr2DumpIndexedAlnToFile(SeqAlignPtr sap,CharPtr filename)5649 NLM_EXTERN void AlnMgr2DumpIndexedAlnToFile(SeqAlignPtr sap, CharPtr filename)
5650 {
5651    AsnIoPtr          aip;
5652    AMAlignIndex2Ptr  amaip;
5653    SeqAlignPtr       sap_tmp;
5655    if (sap == NULL || sap->saip == NULL)
5656       return;
5657    if (sap->saip->indextype == INDEX_CHILD)
5658    {
5659       if (sap->dim == 0)
5660          sap->dim = AlnMgr2GetNumRows(sap);
5661       aip = AsnIoOpen(filename, "w");
5662       SeqAlignAsnWrite(sap, aip, NULL);
5663       AsnIoClose(aip);
5664       return;
5665    }
5666    amaip = (AMAlignIndex2Ptr)(sap->saip);
5667    aip = AsnIoOpen(filename, "w");
5668    if (amaip->alnstyle != AM2_LITE)
5669    {
5670       amaip->sharedaln->dim = 0;  /* mark it as the sharedaln */
5671       SeqAlignAsnWrite(amaip->sharedaln, aip, NULL);
5672    }
5673    sap_tmp = sap;
5674    if (sap->dim == 0)
5675       sap->dim = AlnMgr2GetNumRows(sap);
5676    while (sap_tmp != NULL)
5677    {
5678       SeqAlignAsnWrite(sap_tmp, aip, NULL);
5679       sap_tmp = sap_tmp->next;
5680    }
5681    AsnIoClose(aip);
5682 }
5684 /***************************************************************************
5685 *
5686 *  SECTION 4: API-level functions (and their helper functions) used to
5687 *  access an indexed alignment.
5688 *    SECTION 4a: AlnMgr2GetNextAlnBit and associated functions
5689 *    SECTION 4b: "GetNth" functions
5690 *    SECTION 4c: other functions for accessing the alignment
5691 *
5692 ***************************************************************************/
5694 /* SECTION 4a */
5695 /***************************************************************************
5696 *
5697 *  AlnMgr2GetNextAlnBit takes an indexed seqalign and returns it, piece
5698 *  by piece, in the row and across the range specified in the AlnMsg
5699 *  structure. amp->from_aln and amp->to_aln must be filled in; these are
5700 *  in alignment coordinates. AlnMgr2GetNextAlnBit will return the AlnMsg
5701 *  structure with amp->from_row and amp->to_row filled in. If amp->type is
5702 *  AM_SEQ, these numbers are sequence coordinates; if amp->type is AM_GAP
5703 *  the numbers are alignment coordinates and there is a gap in that row.
5704 *  AlnMgr2GetNextAlnBit returns one continuous piece of sequence or gap
5705 *  at each call, and keeps returning TRUE until it has returned all the
5706 *  information for the piece of the alignment requested.
5707 *
5708 ***************************************************************************/
AlnMgr2GetNextAlnBit(SeqAlignPtr sap,AlnMsg2Ptr amp)5709 NLM_EXTERN Boolean AlnMgr2GetNextAlnBit(SeqAlignPtr sap, AlnMsg2Ptr amp) /* NEXT */
5710 {
5711    AMAlignIndex2Ptr  amaip;
5712    Uint2Ptr         array;
5713    Int4             arraylen;
5714    Int4             ctr;
5715    Int4             disc;
5716    Int4             disc1;
5717    DenseSegPtr      dsp;
5718    Int4             endoffset;
5719    Boolean          found;
5720    Int4             i;
5721    Int4             index;
5722    Int4             intfrom;
5723    Int4             intto;
5724    Int4             j;
5725    Int4             len;
5726    Int4             offset;
5727    SAIndex2Ptr       saip;
5728    SARowDat2Ptr      srdp;
5729    Int4             start_sect;
5730    Int4             stop_sect;
5731    Uint2Ptr         trans;
5732    Int4             translen;
5734    if (sap == NULL || sap->saip == NULL || amp == NULL)
5735       return FALSE;
5736    if (amp->left_interrupt != NULL)
5737    {
5738       MemFree(amp->left_interrupt);
5739       amp->left_interrupt = NULL;
5740    }
5741    if (amp->right_interrupt != NULL)
5742    {
5743       MemFree(amp->right_interrupt);
5744       amp->right_interrupt = NULL;
5745    }
5746    if (sap->saip->indextype == INDEX_CHILD)
5747    {
5748       dsp = (DenseSegPtr)(sap->segs);
5749       saip = (SAIndex2Ptr)(sap->saip);
5750    } else if (sap->saip->indextype == INDEX_PARENT)
5751    {
5752       amaip = (AMAlignIndex2Ptr)(sap->saip);
5753       if (amaip->alnstyle == AM2_LITE)
5754          return FALSE;
5755       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
5756       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
5757    }
5758    /* reality checks */
5759    if (amp->row_num > saip->numrows)
5760       return FALSE;
5761    if (amp->len <= 0)
5762       amp->len = AlnMgr2GetAlnLength(sap, FALSE);
5763    if (amp->from_aln < 0 || amp->from_aln > amp->len-1 || amp->real_from > amp->to_aln)
5764       return FALSE;
5765    if (amp->to_aln == -1)
5766       amp->to_aln = amp->len - 1;
5767    if (amp->to_aln < amp->from_aln || amp->to_aln > amp->len-1)
5768       return FALSE;
5769    if (amp->real_from == -2)
5770       amp->real_from = amp->from_aln;
5771    amp->strand = AlnMgr2GetNthStrand(sap, amp->row_num);
5772    srdp = saip->srdp[amp->row_num-1];
5773    len = 0;
5774    start_sect = binary_search_on_uint4_list(saip->aligncoords, amp->real_from, saip->numseg);
5775    offset = amp->real_from - saip->aligncoords[start_sect];
5776    endoffset = 0;
5777    stop_sect = binary_search_on_uint4_list(saip->aligncoords, amp->to_aln, saip->numseg);
5778    /* now figure out whether it starts in sequence or a gap, and figure out how       */
5779    /* long it continues in the same mode without interruption by inserts or unaligned */
5780    /* regions; the whole contiguous stretch will be reported                          */
5781    if (saip->anchor > 0)
5782    {
5783       trans = saip->srdp[saip->anchor-1]->sect;
5784       translen = saip->srdp[saip->anchor-1]->numsect;
5785    } else
5786    {
5787       trans = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
5788       for (i=0; i<dsp->numseg; i++)
5789       {
5790          trans[i] = i;
5791       }
5792       translen = dsp->numseg;
5793    }
5794    arraylen = -1;
5795    if ((index = binary_search_on_uint2_list(srdp->sect, trans[start_sect], srdp->numsect)) != -1)
5796    {
5797       amp->type = AM_SEQ;
5798       array = srdp->sect;
5799       arraylen = srdp->numsect;
5800    } else if ((index = binary_search_on_uint2_list(srdp->unsect, trans[start_sect], srdp->numunsect)) != -1)
5801    {
5802       amp->type = AM_GAP;
5803       array = srdp->unsect;
5804       arraylen = srdp->numunsect;
5805    }
5806    if (arraylen == -1) /* error */
5807       return FALSE;
5808    if (amp->row_num == saip->anchor)
5809    {
5810       amp->type = AM_SEQ;
5811       /* find limits of aligned region */
5812       i = start_sect;
5813       j = srdp->sect[start_sect];
5814       disc = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5815       while (j<srdp->sect[stop_sect] && disc == -1)
5816       {
5817          j++;
5818          disc = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5819       }
5820       i = binary_search_on_uint2_list(srdp->sect, j, srdp->numsect);
5821       if (i == -1)
5822       {
5823          i = binary_search_on_uint2_list(srdp->unsect, j, srdp->numunsect);
5824       }
5825       endoffset = dsp->lens[trans[i]] - (amp->to_aln - saip->aligncoords[i]) - 1;
5826       if (endoffset < 0)
5827          endoffset = 0;
5828       if (i<stop_sect && endoffset == 0) /* there's an unaligned region here, and we go to the end of the segment */
5829       {
5830          AlnMgr2GetUnalignedInfo(sap, trans[i], amp->row_num, &intfrom, &intto);
5831          amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5832          amp->right_interrupt->row = amp->row_num;
5833          amp->right_interrupt->unalnlen = intto - intfrom + 1;
5834          amp->right_interrupt->segnum = trans[i];
5835          amp->right_interrupt->which_side = AM2_RIGHT;
5836       }
5837       stop_sect = i;
5838       if (start_sect > 0 && offset == 0)
5839       {
5840          disc = binary_search_on_uint2_list(srdp->unaligned, trans[start_sect]-1, srdp->numunaln);
5841          if (disc != -1) /* there is a left unaligned region */
5842          {
5843             AlnMgr2GetUnalignedInfo(sap, trans[start_sect]-1, amp->row_num, &intfrom, &intto);
5844             amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5845             amp->left_interrupt->row = amp->row_num;
5846             amp->left_interrupt->unalnlen = intto - intfrom + 1;
5847             amp->left_interrupt->segnum = trans[start_sect];
5848             amp->left_interrupt->which_side = AM2_LEFT;
5849          }
5850       }
5851       len = 0;
5852       for (i=start_sect; i<= stop_sect; i++)
5853       {
5854          len += dsp->lens[trans[i]];
5855       }
5856       len = len - offset - endoffset;
5857       if (amp->strand == Seq_strand_minus)
5858          amp->from_row = dsp->starts[trans[stop_sect]*dsp->dim+amp->row_num-1] + endoffset;
5859       else
5860          amp->from_row = dsp->starts[trans[start_sect]*dsp->dim+amp->row_num-1] + offset;
5861       amp->to_row = amp->from_row + len - 1;
5862       amp->real_from += amp->to_row - amp->from_row + 1;
5863       if (saip->anchor <= 0)
5864          MemFree(trans);
5865       return TRUE;
5866    }
5867    /* look for limits of aligned/gapped region */
5868    i = index;
5869    j = start_sect+1;
5870    disc = -1;
5871    found = FALSE;
5872    while (i+1<arraylen && disc == -1 && array[i] <= trans[stop_sect] && array[i+1]-1 == array[i])
5873    {
5874       disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5875       if (disc == -1)
5876          i++;
5877    }
5878    disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5879    j = binary_search_on_uint2_list(trans, array[i], translen);
5880    if (amp->type == AM_SEQ && j <= stop_sect) /* there is an interrupting region, either seq/gap, insert, or unaligned, plus just check last piece */
5881    {
5882       i = binary_search_on_uint2_list(srdp->insect, trans[j]+1, srdp->numinsect);
5883       if (i != -1) /* there's an insert */
5884       {
5885          amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5886          amp->right_interrupt->row = amp->row_num;
5887          amp->right_interrupt->segnum = trans[j];
5888          amp->right_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5889          amp->right_interrupt->which_side = AM2_RIGHT;
5890          /* look for unaligned regions off insert */
5891          disc1 = -1;
5892          if (j > 0)
5893             disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1, srdp->numunaln);
5894          if (disc1 != -1)
5895          {
5896             AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5897             amp->right_interrupt->unalnlen = intto - intfrom + 1;
5898          }
5899          i++;
5900          ctr = 1;
5901          while (i<srdp->numinsect && srdp->insect[i] == srdp->insect[i-1]+1)
5902          {
5903             amp->right_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5904             /* look for unaligned regions off insert */
5905             disc1 = -1;
5906             if (j > 0) {
5907               disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1+ctr, srdp->numunaln);
5908             }
5909             if (disc1 != -1)
5910             {
5911                AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5912                amp->right_interrupt->unalnlen += intto - intfrom + 1;
5913             }
5914             i++;
5915             ctr++;
5916          }
5917       }
5918       if (disc != -1) /* there's an unaligned region */
5919       {
5920          if (amp->right_interrupt == NULL)
5921             amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5922          amp->right_interrupt->row = amp->row_num;
5923          amp->right_interrupt->segnum = trans[j];
5924          amp->right_interrupt->which_side = AM2_RIGHT;
5925          AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc], amp->row_num, &intfrom, &intto);
5926          amp->right_interrupt->unalnlen += intto - intfrom + 1;
5927       }
5928    }
5929    stop_sect = j;
5930    /* now look for left-side unaligned or inserted regions if offset == 0 */
5931    if (amp->type == AM_SEQ && offset == 0)
5932    {
5933       disc = -1;
5934       j = 1;
5935       i = -1;
5936       if ((Int2)trans[start_sect]-j > 0)
5937       i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j, srdp->numsect);
5938       while (i == -1 && (Int2)(trans[start_sect])-j-1 >= 0)
5939       {
5940          i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j-1, srdp->numsect);
5941          j++;
5942       }
5943       disc = binary_search_on_uint2_list(srdp->unaligned, trans[start_sect]-j, srdp->numunaln);;
5944       if (disc > -1)
5945       {
5946          AlnMgr2GetUnalignedInfo(sap, trans[start_sect]-j, amp->row_num, &intfrom, &intto);
5947          amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5948          amp->left_interrupt->row = amp->row_num;
5949          amp->left_interrupt->segnum = trans[start_sect];
5950          amp->left_interrupt->which_side = AM2_LEFT;
5951          amp->left_interrupt->unalnlen = intto - intfrom + 1;
5952       }
5953       i = binary_search_on_uint2_list(srdp->insect, trans[start_sect]-j, srdp->numinsect);
5954       if (i != -1) /* there's an insert */
5955       {
5956          if (amp->left_interrupt == NULL)
5957             amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5958          amp->left_interrupt->row = amp->row_num;
5959          amp->left_interrupt->segnum = trans[start_sect];
5960          amp->left_interrupt->which_side = AM2_LEFT;
5961          amp->left_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5962          /* look for unaligned regions off insert */
5963          j = trans[start_sect]-j;
5964          disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5965          if (disc1 != -1)
5966          {
5967             AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5968             amp->left_interrupt->unalnlen += intto - intfrom + 1;
5969          }
5970          i--;
5971          j--;
5972          while (i-1>=0 && srdp->insect[i] == srdp->insect[i+1]-1)
5973          {
5974             amp->left_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5975             disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5976             if (disc1 != -1)
5977             {
5978                AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5979                amp->left_interrupt->unalnlen += intto - intfrom + 1;
5980             }
5981             i--;
5982             j--;
5983          }
5984          if (i>=0) /* look one more over for unaligned */
5985          {
5986             disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5987             if (disc1 != -1)
5988             {
5989                AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5990                amp->left_interrupt->unalnlen += intto - intfrom + 1;
5991             }
5992          }
5993       }
5994    }
5995    endoffset = dsp->lens[trans[stop_sect]] - (amp->to_aln - saip->aligncoords[stop_sect]) - 1;
5996    if (endoffset < 0)
5997       endoffset = 0;
5998    if (amp->right_interrupt != NULL && endoffset > 0)
5999    {
6000       MemFree(amp->right_interrupt);
6001       amp->right_interrupt = NULL;
6002    }
6003    len = 0;
6004    for (i=start_sect; i<=stop_sect; i++)
6005    {
6006       len += dsp->lens[trans[i]];
6007    }
6008    len = len - offset - endoffset;
6009    if (amp->type == AM_GAP)
6010    {
6011       amp->from_row = amp->real_from;
6012       amp->to_row = amp->from_row + len - 1;
6013    } else
6014    {
6015       if (amp->strand == Seq_strand_minus)
6016       {
6017          amp->from_row = dsp->starts[trans[stop_sect]*dsp->dim+amp->row_num-1] + endoffset;
6018          amp->to_row = amp->from_row + len - 1;
6019       } else
6020       {
6021          amp->from_row = dsp->starts[trans[start_sect]*dsp->dim+amp->row_num-1] + offset;
6022          amp->to_row = amp->from_row + len - 1;
6023       }
6024    }
6025    if (saip->anchor <= 0)
6026       MemFree(trans);
6027    amp->real_from += amp->to_row - amp->from_row + 1;
6028    return TRUE;
6029 }
6031 /* SECTION 4a */
binary_search_on_uint4_list(Uint4Ptr list,Uint4 pos,Uint4 listlen)6032 static Int4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
6033 {
6034    Uint4  L;
6035    Uint4  mid;
6036    Uint4  R;
6038    if (list == NULL || listlen == 0)
6039       return 0;
6040    L = 0;
6041    R = listlen - 1;
6042    while (L < R)
6043    {
6044       mid = (L+R)/2;
6045       if (list[mid + 1] <= pos)
6046          L = mid + 1;
6047       else
6048          R = mid;
6049    }
6050    return R;
6051 }
6053 /* SECTION 4a */
binary_search_on_uint2_list(Uint2Ptr list,Int4 ele,Uint2 listlen)6054 static Int4 binary_search_on_uint2_list(Uint2Ptr list, Int4 ele, Uint2 listlen)
6055 {
6056    Uint2  L;
6057    Uint2  mid;
6058    Uint2  R;
6060    if (list == NULL || listlen == 0 || ele < 0)
6061       return -1;
6062    L = 0;
6063    R = listlen - 1;
6064    while (L < R)
6065    {
6066       mid = (L+R)/2;
6067       if (ele <= list[mid])
6068          R = mid;
6069       else
6070          L = mid+1;
6071    }
6072    if (ele == list[R])
6073       return R;
6074    else
6075       return -1;
6076 }
6078 /* SECTION 4a */
AlnMgr2GetUnalignedInfo(SeqAlignPtr sap,Int4 segment,Int4 row,Int4Ptr from,Int4Ptr to)6079 static void AlnMgr2GetUnalignedInfo(SeqAlignPtr sap, Int4 segment, Int4 row, Int4Ptr from, Int4Ptr to)
6080 {
6081    AMAlignIndex2Ptr  amaip;
6082    DenseSegPtr      dsp;
6083    Boolean          found;
6084    Int4             i;
6085    SAIndex2Ptr       saip;
6086    Uint1            strand;
6087    Int4             tmp;
6089    if (sap == NULL)
6090       return;
6091    strand = AlnMgr2GetNthStrand(sap, row);
6092    if (sap->saip->indextype == INDEX_CHILD)
6093    {
6094       saip = (SAIndex2Ptr)(sap->saip);
6095       dsp = (DenseSegPtr)(sap->segs);
6096    } else if (sap->saip->indextype == INDEX_PARENT)
6097    {
6098       amaip = (AMAlignIndex2Ptr)(sap->saip);
6099       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6100       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6101    }
6102    found = FALSE;
6103    *from = *to = -1;
6104    for (i=segment; i>=0 && !found; i--)
6105    {
6106       if (dsp->starts[dsp->dim*i+row-1] != -1)
6107       {
6108          found = TRUE;
6109          if (strand == Seq_strand_minus)
6110             *to = dsp->starts[dsp->dim*i+row-1]-1;
6111          else
6112             *from = dsp->starts[dsp->dim*i+row-1]+dsp->lens[i];
6113       }
6114    }
6115    found = FALSE;
6116    for (i=segment+1; i<dsp->numseg && !found; i++)
6117    {
6118       if (dsp->starts[dsp->dim*i+row-1] != -1)
6119       {
6120          found = TRUE;
6121          if (strand == Seq_strand_minus)
6122             *from = dsp->starts[dsp->dim*i+row-1]+dsp->lens[i];
6123          else
6124             *to = dsp->starts[dsp->dim*i+row-1]-1;
6125       }
6126    }
6127    if (*from > *to)
6128    {
6129       tmp = *from;
6130       *from = *to;
6131       *to = tmp;
6132    }
6133 }
6135 /* SECTION 4a */
6136 /***************************************************************************
6137 *
6138 *  AlnMgr2GetInterruptInfo returns a structure describing the inserts and
6139 *  unaligned regions in an interrupt. The structure is allocated by this
6140 *  function and must be freed with AlnMgr2FreeInterruptInfo.
6141 *
6142 ***************************************************************************/
AlnMgr2GetInterruptInfo(SeqAlignPtr sap,AMInterruptPtr interrupt)6143 NLM_EXTERN AMInterrInfoPtr AlnMgr2GetInterruptInfo(SeqAlignPtr sap, AMInterruptPtr interrupt)
6144 {
6145    AMAlignIndex2Ptr  amaip;
6146    Int4              disc;
6147    Boolean           done;
6148    DenseSegPtr       dsp;
6149    Int4              i;
6150    AMInterrInfoPtr   iip;
6151    Int4              inserts;
6152    Int4              intfrom;
6153    Int4              intto;
6154    Int4              j;
6155    Int4              k;
6156    Int4              n;
6157    SAIndex2Ptr       saip;
6158    SARowDat2Ptr      srdp;
6159    Uint1             strand;
6160    Uint2Ptr          trans;
6161    Int4              translen;
6162    Int4              u;
6164    if (interrupt == NULL || sap == NULL || sap->saip == NULL)
6165       return NULL;
6166    if (sap->saip->indextype == INDEX_CHILD)
6167    {
6168       dsp = (DenseSegPtr)(sap->segs);
6169       saip = (SAIndex2Ptr)(sap->saip);
6170    } else if (sap->saip->indextype == INDEX_PARENT)
6171    {
6172       amaip = (AMAlignIndex2Ptr)(sap->saip);
6173       if (amaip->alnstyle == AM2_LITE)
6174          return FALSE;
6175       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6176       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6177    }
6178    if (dsp->numseg < interrupt->segnum)
6179       return NULL;
6180    if (saip->anchor > 0)
6181    {
6182       trans = saip->srdp[saip->anchor-1]->sect;
6183       translen = saip->srdp[saip->anchor-1]->numsect;
6184    } else
6185    {
6186       trans = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
6187       for (i=0; i<dsp->numseg; i++)
6188       {
6189          trans[i] = i;
6190       }
6191       translen = dsp->numseg;
6192    }
6193    strand = AlnMgr2GetNthStrand(sap, interrupt->row-1);
6194    srdp = saip->srdp[interrupt->row-1];
6195    /* now look for inserts and unaligned regions on the side indicated */
6196    if (interrupt->which_side == AM2_RIGHT)
6197    {
6198       /* check if this is unaligned */
6199       disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6200       /* then look for inserts */
6201       done = FALSE;
6202       iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6203       if (disc != -1)
6204          iip->num = 1;
6205       inserts = 0;
6206       for (i=interrupt->segnum+1; !done; i++)
6207       {
6208          n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6209          if (n == -1)
6210             n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6211          if (n == -1)
6212          {
6213             done = TRUE;
6214          } else
6215          {
6216             inserts++; /* only increment if region gets interrupted */
6217             disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6218             if (disc != -1) /* this insert has an unaligned region */
6219             {
6220                iip->num += inserts;
6221                iip->num++;
6222                inserts = 0;
6223             }
6224          }
6225       }
6226       if (inserts != 0)
6227          iip->num++;
6228       iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6229       iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6230       iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6231       k = 0;
6232       disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6233       if (disc != -1) /* starts with unaligned */
6234       {
6235          AlnMgr2GetUnalignedInfo(sap, interrupt->segnum, interrupt->row, &intfrom, &intto);
6236          iip->starts[k] = intfrom;
6237          iip->lens[k] = intto - intfrom + 1;
6238          iip->types[k] = AM_UNALIGNED;
6239          k++;
6240       }
6241       disc = 0;
6242       done = FALSE;
6243       for (i=interrupt->segnum+1; !done; i++)
6244       {
6245          n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6246          u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numinsect);
6247          if (n == -1 && u == -1)
6248          {
6249             done = TRUE;
6250          } else
6251          {
6252             if (u == -1)
6253             {
6254                if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6255                   iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6256                iip->lens[k] += dsp->lens[i];
6257                iip->types[k] = AM_INSERT;
6258                disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6259                if (disc != -1) /* this insert has an unaligned region */
6260                {
6261                   k++;
6262                   AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6263                   iip->starts[k] = intfrom;
6264                   iip->lens[k] = intto - intfrom + 1;
6265                   iip->types[k] = AM_UNALIGNED;
6266                   k++;
6267                }
6268             }
6269          }
6270       }
6271    } else if (interrupt->which_side == AM2_LEFT)
6272    {
6273       /* check if the next non-gap segment to the left has unaligned */
6274       j = 1;
6275       n = 0;
6276       while (n != -1 && interrupt->segnum-j >= 0)
6277       {
6278          n = binary_search_on_uint2_list(srdp->unsect, interrupt->segnum-j, srdp->numunsect);
6279          if (n == -1)
6280             n = binary_search_on_uint2_list(srdp->insect, interrupt->segnum-j, srdp->numinsect);
6281          if (n != -1)
6282             j++;
6283       }
6284       disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum-j, srdp->numunaln);
6285       /* then look for inserts */
6286       done = FALSE;
6287       iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6288       if (disc != -1)
6289          iip->num = 1;
6290       inserts = 0;
6291       for (i=interrupt->segnum-1; !done; i--)
6292       {
6293          n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6294          if (n == -1)
6295             n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6296          if (n == -1)
6297          {
6298             done = TRUE;
6299          } else
6300          {
6301             inserts++; /* only increment if region gets interrupted */
6302             disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6303             if (disc != -1) /* this insert has an unaligned region */
6304             {
6305                iip->num += inserts;
6306                iip->num++;
6307                inserts = 0;
6308             }
6309          }
6310       }
6311       i++;
6312       iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6313       iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6314       iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6315       k = 0;
6316       disc = 0;
6317       /* check first non-inserted segment for unaligned */
6318       if (i >= 0)
6319       {
6320          disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6321          if (disc != -1) /* there's an unaligned region */
6322          {
6323             AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6324             iip->starts[k] = intfrom;
6325             iip->lens[k] = intto - intfrom + 1;
6326             iip->types[k] = AM_UNALIGNED;
6327             k++;
6328          }
6329       }
6330       i++; /* start from leftmost end of inserts/unaligned */
6331       for (i; i<interrupt->segnum; i++)
6332       {
6333          u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6334          if (u == -1)
6335          {
6336             if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6337                iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6338             iip->lens[k] += dsp->lens[i];
6339             iip->types[k] = AM_INSERT;
6340             disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6341             if (disc != -1) /* this insert has an unaligned region */
6342             {
6343                k++;
6344                AlnMgr2GetUnalignedInfo(sap, binary_search_on_uint2_list(trans, i, translen), interrupt->row, &intfrom, &intto);
6345                iip->starts[k] = intfrom;
6346                iip->lens[k] = intto - intfrom + 1;
6347                iip->types[k] = AM_UNALIGNED;
6348                k++;
6349             }
6350          }
6351       }
6352    }
6353    iip->strand = strand;
6354    return iip;
6355 }
6357 /* SECTION 4b */
6358 /***************************************************************************
6359 *
6360 *  AlnMgr2GetNthStrand takes an indexed seqalign and a row number and
6361 *  returns the strand of the row indicated. A return of 0 indicates
6362 *  an error.
6363 *
6364 ***************************************************************************/
AlnMgr2GetNthStrand(SeqAlignPtr sap,Int4 n)6365 NLM_EXTERN Uint1 AlnMgr2GetNthStrand(SeqAlignPtr sap, Int4 n)
6366 {
6367    AMAlignIndex2Ptr  amaip;
6368    DenseSegPtr      dsp;
6370    if (sap == NULL || sap->saip == NULL || n < 1)
6371       return 0;
6372    if (sap->saip->indextype == INDEX_CHILD)
6373    {
6374       dsp = (DenseSegPtr)(sap->segs);
6375       if (n > dsp->dim)
6376          return 0;
6377       if (dsp->strands == NULL)
6378          return Seq_strand_plus;
6379       return (dsp->strands[n-1]);
6380    } else if (sap->saip->indextype == INDEX_PARENT)
6381    {
6382       amaip = (AMAlignIndex2Ptr)(sap->saip);
6383       if (amaip->alnstyle == AM2_LITE) /* can't get Nth strand for this */
6384          return 0;
6385       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6386       if (n > dsp->dim)
6387          return 0;
6388       if (dsp->strands == NULL)
6389          return Seq_strand_plus;
6390       return (dsp->strands[n-1]);
6391    }
6392    return 0;
6393 }
6395 /* SECTION 4b */
6396 /***************************************************************************
6397 *
6398 *  AlnMgr2GetNthSeqIdPtr returns the seqid (this is a duplicated,
6399 *  allocated seqid that must be freed) of the nth row (1-based) of an
6400 *  indexed parent or child seqalign.
6401 *
6402 ***************************************************************************/
AlnMgr2GetNthSeqIdPtr(SeqAlignPtr sap,Int4 n)6403 NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtr(SeqAlignPtr sap, Int4 n)
6404 {
6405    AMAlignIndex2Ptr  amaip;
6406    DenseSegPtr      dsp;
6407    Int4             i;
6408    SeqIdPtr         sip;
6410    if (sap == NULL || sap->saip == NULL)
6411       return NULL;
6412    if (sap->saip->indextype == INDEX_CHILD)
6413    {
6414       dsp = (DenseSegPtr)(sap->segs);
6415       if (n > dsp->dim)
6416          return NULL;
6417       sip = dsp->ids;
6418       for (i=1; i<n && sip != NULL; i++)
6419       {
6420          sip = sip->next;
6421       }
6422       if (sip == NULL) return NULL;
6423       return (SeqIdDup(sip));
6424    } else if (sap->saip->indextype == INDEX_PARENT)
6425    {
6426       amaip = (AMAlignIndex2Ptr)(sap->saip);
6427       if (n > amaip->numrows)
6428          return NULL;
6429       sip = SeqIdDup(amaip->ids[n-1]);
6430       return sip;
6431    } else
6432       return NULL;
6433 }
6435 /* SECTION 4b */
6436 /***************************************************************************
6437 *
6438 *  AlnMgr2GetNthSeqRangeInSA returns the smallest and largest sequence
6439 *  coordinates contained in the nth row of an indexed seqalign. Either
6440 *  start or stop can be NULL to only retrieve one of the coordinates.
6441 *  If start and stop are -1, there is an error; if they are both -2, the
6442 *  row is just one big insert. RANGE
6443 *
6444 ***************************************************************************/
AlnMgr2GetNthSeqRangeInSA(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6445 NLM_EXTERN void AlnMgr2GetNthSeqRangeInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6446 {
6447    AMAlignIndex2Ptr  amaip;
6448    Int4             beg;
6449    DenseSegPtr      dsp;
6450    Int4             end;
6451    SAIndex2Ptr       saip;
6452    SARowDat2Ptr      srdp;
6453    Uint1            strand;
6455    if (start != NULL)
6456       *start = -1;
6457    if (stop != NULL)
6458       *stop = -1;
6459    if (sap == NULL || sap->saip == NULL)
6460       return;
6461    if (sap->saip->indextype == INDEX_CHILD)
6462    {
6463       saip = (SAIndex2Ptr)(sap->saip);
6464       dsp = (DenseSegPtr)(sap->segs);
6465    } else if (sap->saip->indextype == INDEX_PARENT)
6466    {
6467       amaip = (AMAlignIndex2Ptr)(sap->saip);
6468       if (amaip->alnstyle == AM2_LITE)
6469       {
6470          AlnMgr2GetNthSeqRangeInSASet(sap, n, start, stop);
6471          return;
6472       }
6473       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6474       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6475    }
6476    if (n > saip->numrows || n <= 0)
6477       return;
6478    srdp = saip->srdp[n-1];
6479    beg = -1;
6480    if (srdp->numsect == 0) /* just one big insert */
6481       beg = end = -2;
6482    strand = AlnMgr2GetNthStrand(sap, n);
6483    if (beg != -2 && strand != Seq_strand_minus)
6484    {
6485       beg = dsp->starts[srdp->sect[0]*(dsp->dim) + n-1];
6486       end = dsp->starts[srdp->sect[srdp->numsect-1]*(dsp->dim) + n-1] + dsp->lens[srdp->sect[srdp->numsect-1]] - 1;
6487    } else if (beg != -2)
6488    {
6489       beg = dsp->starts[srdp->sect[srdp->numsect-1]*(dsp->dim) + n-1];
6490       end = dsp->starts[srdp->sect[0]*(dsp->dim) + n-1] + dsp->lens[srdp->sect[0]] - 1;
6491    }
6492    if (start != NULL)
6493       *start = beg;
6494    if (stop != NULL)
6495       *stop = end;
6496    return;
6497 }
6499 /* SECTION 4b */
6500 /***************************************************************************
6501 *
6502 *  AlnMgr2GetNthRowSpanInSA returns the least and greatest alignment
6503 *  coordinates (inclusive) spanned by the indicated row. Either stop or
6504 *  start can be NULL to retrieve just one of the coordinates.
6505 *
6506 ***************************************************************************/
AlnMgr2GetNthRowSpanInSA(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6507 NLM_EXTERN void AlnMgr2GetNthRowSpanInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6508 {
6509    AMAlignIndex2Ptr  amaip;
6510    DenseSegPtr      dsp;
6511    Int4             i;
6512    SAIndex2Ptr       saip;
6513    SARowDat2Ptr      srdp;
6515    if (start != NULL)
6516       *start = -1;
6517    if (stop != NULL)
6518       *stop = -1;
6519    if (sap == NULL || sap->saip == NULL)
6520       return;
6521    if (sap->saip->indextype == INDEX_CHILD)
6522    {
6523       saip = (SAIndex2Ptr)(sap->saip);
6524       dsp = (DenseSegPtr)(sap->segs);
6525    } else if (sap->saip->indextype == INDEX_PARENT)
6526    {
6527       amaip = (AMAlignIndex2Ptr)(sap->saip);
6528       if (amaip->alnstyle == AM2_LITE)
6529          return;
6530       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6531       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6532    }
6533    if (n > saip->numrows || n <= 0)
6534       return;
6535    srdp = saip->srdp[n-1];
6536    if (srdp->numsect == 0)
6537    {
6538       if (start != NULL)
6539          *start = -1;
6540       if (stop != NULL)
6541          *stop = -1;
6542       return;
6543    }
6544    if (start != NULL)
6545    {
6546       if (saip->anchor > 0)
6547          i = binary_search_on_uint2_list(saip->srdp[saip->anchor-1]->sect, srdp->sect[0], saip->srdp[saip->anchor-1]->numsect);
6548       else
6549          i = srdp->sect[0];
6550       *start = saip->aligncoords[i];
6551    }
6552    if (stop != NULL)
6553    {
6554       if (saip->anchor > 0)
6555          i = binary_search_on_uint2_list(saip->srdp[saip->anchor-1]->sect, srdp->sect[srdp->numsect-1], saip->srdp[saip->anchor-1]->numsect);
6556       else
6557          i = srdp->sect[srdp->numsect-1];
6558       *stop = saip->aligncoords[i] + dsp->lens[srdp->sect[srdp->numsect-1]] - 1;
6559    }
6560    return;
6561 }
6563 /* SECTION 4b */
AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6564 static void AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6565 {
6566    AMAlignIndex2Ptr  amaip;
6567    Int4              from;
6568    Int4              i;
6569    Int4              max;
6570    Int4              min;
6571    Int4              to;
6573    if (start != NULL)
6574       *start = -1;
6575    if (stop != NULL)
6576       *stop = -1;
6577    if (sap == NULL || sap->saip == NULL || n < 0)
6578       return;
6579    if (sap->saip->indextype == INDEX_CHILD)
6580    {
6581       AlnMgr2GetNthSeqRangeInSA(sap, n, start, stop);
6582       return;
6583    }
6584    amaip = (AMAlignIndex2Ptr)(sap->saip);
6585    min = max = -1;
6586    for (i=0; i<amaip->numsaps; i++)
6587    {
6588       AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n, &from, &to);
6589       if (from != -1 && (from < min || min == -1))
6590          min = from;
6591       if (to > max)
6592          max = to;
6593    }
6594    if (start != NULL)
6595       *start = from;
6596    if (stop != NULL)
6597       *stop = to;
6598 }
6600 /* SECTION 4b */
AlnMgr2GetMaxTailLength(SeqAlignPtr sap,Uint1 which_tail)6601 NLM_EXTERN Int4 AlnMgr2GetMaxTailLength(SeqAlignPtr sap, Uint1 which_tail)
6602 {
6603    Int4   i;
6604    Int4   maxlen;
6605    Int4   n;
6606    Int4   start;
6607    Int4   stop;
6608    Uint1  strand;
6610    if (sap == NULL || sap->saip == NULL)
6611       return 0;
6612    n = AlnMgr2GetNumRows(sap);
6613    maxlen = -1;
6614    for (i=0; i<n; i++)
6615    {
6616       AlnMgr2GetNthRowTail(sap, i+1, which_tail, &start, &stop, &strand);
6617       if (stop - start + 1 > maxlen)
6618          maxlen = stop - start + 1;
6619    }
6620    return maxlen;
6621 }
6623 /* SECTION 4b */
6624 /***************************************************************************
6625 *
6626 *  AlnMgr2GetNthRowTail returns the sequence extremities that are not
6627 *  contained in the alignment (if the alignment starts at 10 in row 2, the
6628 *  tail in that row is 0-9). It takes an indexed seqalign, a 1-based row
6629 *  number, and AM2_LEFT_TAIL or AM2_RIGHT_TAIL, and returns the start, stop,
6630 *  and strand of the tail indicated in the row desired. AlnMgr2GetNthRowTail
6631 *  returns TRUE if the calculations were successfully completed.
6632 *
6633 ***************************************************************************/
AlnMgr2GetNthRowTail(SeqAlignPtr sap,Int4 n,Uint1 which_tail,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand)6634 NLM_EXTERN Boolean AlnMgr2GetNthRowTail(SeqAlignPtr sap, Int4 n, Uint1 which_tail, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
6635 {
6636    BioseqPtr  bsp;
6637    SeqIdPtr   sip;
6638    Int4       tmp_start;
6639    Int4       tmp_stop;
6640    Uint1      tmp_strand;
6642    if (sap == NULL || n < 1 || sap->saip == NULL)
6643       return FALSE;
6644    tmp_start = tmp_stop = -1;
6645    AlnMgr2GetNthSeqRangeInSA(sap, n, &tmp_start, &tmp_stop);
6646    if (tmp_start == -1 || tmp_stop == -1)
6647       return FALSE;
6648    tmp_strand = AlnMgr2GetNthStrand(sap, n);
6649    if (which_tail == AM2_LEFT_TAIL)
6650    {
6651       if (tmp_strand == Seq_strand_minus)
6652       {
6653          sip = AlnMgr2GetNthSeqIdPtr(sap, n);
6654          bsp = BioseqLockById(sip);
6655          SeqIdFree(sip);
6656          if (bsp == NULL)
6657             return FALSE;
6658          if (tmp_stop == bsp->length-1 || stop == NULL)
6659          {
6660             if (start)
6661                *start = -1;
6662             if (stop)
6663                *stop = -1;
6664          } else
6665          {
6666             if (start)
6667                *start = tmp_stop+1;
6668             if (stop)
6669                *stop = bsp->length-1;
6670          }
6671          BioseqUnlock(bsp);
6672          if (strand)
6673             *strand = tmp_strand;
6674       } else
6675       {
6676          if (tmp_start >= 1)
6677          {
6678             if (start)
6679                *start = 0;
6680             if (stop)
6681                *stop = tmp_start - 1;
6682          } else
6683          {
6684             if (start)
6685                *start = -1;
6686             if (stop)
6687                *stop = -1;
6688          }
6689          if (strand)
6690             *strand = tmp_strand;
6691       }
6692    } else if (which_tail == AM2_RIGHT_TAIL)
6693    {
6694       if (tmp_strand == Seq_strand_minus)
6695       {
6696          if (tmp_start >= 1)
6697          {
6698             if (start)
6699                *start = 0;
6700             if (stop)
6701                *stop = tmp_start - 1;
6702          } else
6703          {
6704             if (start)
6705                *start = -1;
6706             if (stop)
6707                *stop = -1;
6708          }
6709          if (strand)
6710             *strand = tmp_strand;
6711       } else
6712       {
6713          sip = AlnMgr2GetNthSeqIdPtr(sap, n);
6714          bsp = BioseqLockById(sip);
6715          SeqIdFree(sip);
6716          if (bsp == NULL)
6717             return FALSE;
6718          if (bsp->length-1 == tmp_stop)
6719          {
6720             if (start)
6721                *start = -1;
6722             if (stop)
6723                *stop = -1;
6724          } else
6725          {
6726             if (start)
6727                *start = tmp_stop + 1;
6728             if (stop)
6729                *stop = bsp->length-1;
6730          }
6731          if (strand)
6732             *strand = tmp_strand;
6733          BioseqUnlock(bsp);
6734       }
6735    }
6736    return TRUE;
6737 }
6739 /* SECTION 4c */
6740 /***************************************************************************
6741 *
6742 *  AlnMgr2GetAlnLength returns the total alignment length of an indexed
6743 *  alignment. If fill_in is TRUE, the function computes the total length
6744 *  of all the internal unaligned regions and adds that to the alignment
6745 *  length; otherwise only the aligned portions are considered. (LENGTH)
6746 *
6747 ***************************************************************************/
AlnMgr2GetAlnLength(SeqAlignPtr sap,Boolean fill_in)6748 NLM_EXTERN Int4 AlnMgr2GetAlnLength(SeqAlignPtr sap, Boolean fill_in)
6749 {
6750    AMAlignIndex2Ptr  amaip;
6751    DenseSegPtr      dsp;
6752    Int4             i;
6753    Uint2            lastseg;
6754    Int4             len;
6755    SAIndex2Ptr       saip;
6756    SeqAlignPtr      salp;
6758    if (sap == NULL || sap->saip == NULL)
6759       return -1;
6760    if (sap->saip->indextype == INDEX_CHILD)
6761    {
6762       dsp = (DenseSegPtr)(sap->segs);
6763       saip = (SAIndex2Ptr)(sap->saip);
6764       salp = sap;
6765    } else if (sap->saip->indextype == INDEX_PARENT)
6766    {
6767       amaip = (AMAlignIndex2Ptr)(sap->saip);
6768       if (amaip->alnstyle == AM2_LITE)
6769          return -1;
6770       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6771       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6772       salp = amaip->sharedaln;
6773    }
6774    if (saip->unaln == FALSE || fill_in == FALSE)
6775    {
6776       if (saip->anchor == -1)
6777          return (saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1]);
6778       else
6779       {
6780          lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
6781          return (saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg]);
6782       }
6783    } else
6784    {
6785       len = 0;
6786       for (i=0; i<dsp->numseg; i++)
6787       {
6788          len += dsp->lens[i];
6789          len += AlnMgr2GetMaxUnalignedLength(salp, i);
6790       }
6791    }
6792    return len;
6793 }
6795 /* SECTION 4c */ /* FOR DDV */
AlnMgr2IsSAPDiscAli(SeqAlignPtr sap)6796 NLM_EXTERN Boolean AlnMgr2IsSAPDiscAli(SeqAlignPtr sap)
6797 {
6798    AMAlignIndex2Ptr  amaip;
6799    SAIndex2Ptr       saip;
6801    if (sap == NULL || sap->saip == NULL)
6802       return FALSE;
6803    if (sap->saip->indextype == INDEX_CHILD)
6804    {
6805       saip = (SAIndex2Ptr)(sap->saip);
6806    } else if (sap->saip->indextype == INDEX_PARENT)
6807    {
6808       amaip = (AMAlignIndex2Ptr)(sap->saip);
6809       if (amaip->alnstyle == AM2_LITE)
6810          return FALSE;
6811       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6812    }
6813    if (saip->numunaln > 0)
6814       return TRUE;
6815    return FALSE;
6816 }
6818 /* SECTION 4c */ /* FOR DDV */
AlnMgr2GetNumAlnBlocks(SeqAlignPtr sap)6819 NLM_EXTERN Int4 AlnMgr2GetNumAlnBlocks(SeqAlignPtr sap)
6820 {
6821    AMAlignIndex2Ptr  amaip;
6822    SAIndex2Ptr       saip;
6824    if (sap == NULL || sap->saip == NULL)
6825       return -1;
6826    if (sap->saip->indextype == INDEX_CHILD)
6827    {
6828       saip = (SAIndex2Ptr)(sap->saip);
6829    } else if (sap->saip->indextype == INDEX_PARENT)
6830    {
6831       amaip = (AMAlignIndex2Ptr)(sap->saip);
6832       if (amaip->alnstyle == AM2_LITE)
6833          return -1;
6834       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6835    }
6836    if (saip->numunaln >= 0)
6837       return (saip->numunaln + 1);
6838    else if (saip->numunaln == -1)
6839       return 1;
6840    else
6841       return -1;
6842 }
6844 /* SECTION 4c */ /* FOR DDV */
AlnMgr2GetNthBlockRange(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6845 NLM_EXTERN Boolean AlnMgr2GetNthBlockRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6846 {
6847    AMAlignIndex2Ptr  amaip;
6848    SAIndex2Ptr       saip;
6850    if (sap == NULL || sap->saip == NULL)
6851       return FALSE;
6852    if (sap->saip->indextype == INDEX_CHILD)
6853    {
6854       saip = (SAIndex2Ptr)(sap->saip);
6855    } else if (sap->saip->indextype == INDEX_PARENT)
6856    {
6857       amaip = (AMAlignIndex2Ptr)(sap->saip);
6858       if (amaip->alnstyle == AM2_LITE)
6859          return FALSE;
6860       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6861    }
6862    if (!start || !stop)
6863       return FALSE;
6864    *start = -1;
6865    *stop = -1;
6866    if (n >= saip->numunaln)
6867       return FALSE;
6868    if (n < saip->numunaln)
6869    {
6870       *start = saip->aligncoords[saip->unaln[n-1]+1];
6871       *stop = saip->aligncoords[saip->unaln[n]] - 1;
6872    }
6873    return TRUE;
6874 }
6876 /* SECTION 4c */ /* FOR DDV */
6877 /***************************************************************************
6878 *
6879 *  AlnMgr2GetNthUnalignedForNthRow returns the bioseq coordinates for the
6880 *  requested row, in the requested unaligned region. Any error will result
6881 *  in -1 returns for both start and stop.
6882 *
6883 ***************************************************************************/
AlnMgr2GetNthUnalignedForNthRow(SeqAlignPtr sap,Int4 unaligned,Int4 row,Int4Ptr start,Int4Ptr stop)6884 NLM_EXTERN Boolean AlnMgr2GetNthUnalignedForNthRow(SeqAlignPtr sap, Int4 unaligned, Int4 row, Int4Ptr start, Int4Ptr stop)
6885 {
6886    AMAlignIndex2Ptr  amaip;
6887    DenseSegPtr       dsp;
6888    Int4              i;
6889    SAIndex2Ptr       saip;
6890    Int4              seg;
6891    Uint1             strand;
6893    if (sap == NULL || sap->saip == NULL)
6894       return FALSE;
6895    if (sap->saip->indextype == INDEX_CHILD)
6896    {
6897       saip = (SAIndex2Ptr)(sap->saip);
6898       dsp = (DenseSegPtr)(sap->segs);
6899    } else if (sap->saip->indextype == INDEX_PARENT)
6900    {
6901       amaip = (AMAlignIndex2Ptr)(sap->saip);
6902       if (amaip->alnstyle == AM2_LITE)
6903          return FALSE;
6904       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6905       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6906    }
6907    if (row > saip->numrows)
6908       return FALSE;
6909    if (saip->numunaln == 0) /* not set yet */
6910       AlnMgr2SetUnaln(sap);
6911    if (saip->numunaln == -1 || unaligned > saip->numunaln)
6912    {
6913       if (start)
6914          *start = -1;
6915       if (stop)
6916          *stop = -1;
6917       return FALSE;
6918    }
6919    seg = -1;
6920    if (unaligned <= saip->numunaln && unaligned > 0)
6921       seg = saip->unaln[unaligned-1];
6922    if (start)
6923       *start = -1;
6924    if (stop)
6925       *stop = -1;
6926    i = binary_search_on_uint2_list(saip->srdp[row-1]->unaligned, seg, saip->srdp[row-1]->numunaln);
6927    if (i == -1 || saip->srdp[row-1]->unaligned[i] >= dsp->numseg-1)
6928       return FALSE;
6929    strand = AlnMgr2GetNthStrand(sap, row);
6930    if (strand == Seq_strand_minus)
6931    {
6932       *start = dsp->starts[(saip->srdp[row-1]->unaligned[i]+1)*dsp->dim+row-1] + dsp->lens[(saip->srdp[row-1]->unaligned[i])];
6933       *stop = dsp->starts[(saip->srdp[row-1]->unaligned[i])*dsp->dim+row-1] - 1;
6934    } else
6935    {
6936       *start = dsp->starts[(saip->srdp[row-1]->unaligned[i])*dsp->dim+row-1] + dsp->lens[(saip->srdp[row-1]->unaligned[i])];
6937       *stop = dsp->starts[(saip->srdp[row-1]->unaligned[i]+1)*dsp->dim+row-1] - 1;
6938    }
6939    return TRUE;
6940 }
6942 /* SECTION 4c */ /* FOR DDV */
6943 /***************************************************************************
6944 *
6945 *  AlnMgr2GetNextLengthBit is called in a loop on an indexed alignment, with
6946 *  seg starting at 0, to return the lengths of the aligned and unaligned
6947 *  regions. If the length returned is negative, it's an unaligned region;
6948 *  otherwise it's aligned.
6949 *
6950 ***************************************************************************/
AlnMgr2GetNextLengthBit(SeqAlignPtr sap,Int4Ptr len,Int4Ptr seg)6951 NLM_EXTERN Boolean AlnMgr2GetNextLengthBit(SeqAlignPtr sap, Int4Ptr len, Int4Ptr seg)
6952 {
6953    AMAlignIndex2Ptr  amaip;
6954    DenseSegPtr       dsp;
6955    Int4              i;
6956    Int4              lastseg;
6957    Int4              maxseg;
6958    SAIndex2Ptr       saip;
6960    if (sap == NULL || sap->saip == NULL || seg == NULL)
6961       return FALSE;
6962    if (sap->saip->indextype == INDEX_CHILD)
6963    {
6964       saip = (SAIndex2Ptr)(sap->saip);
6965       dsp = (DenseSegPtr)(sap->segs);
6966    } else if (sap->saip->indextype == INDEX_PARENT)
6967    {
6968       amaip = (AMAlignIndex2Ptr)(sap->saip);
6969       if (amaip->alnstyle == AM2_LITE)
6970          return FALSE;
6971       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6972       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6973    }
6974    if (saip->numunaln == -1) /* the whole thing is just one big aligned segment */
6975    {
6976       if (*seg != 0)
6977          return FALSE;
6978       if (saip->anchor == -1)
6979       {
6980          *len = saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1];
6981          *seg = 1;
6982       } else
6983       {
6984          lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
6985          *len = saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg];
6986          *seg = 1;
6987       }
6988       return TRUE;
6989    } else
6990    {
6991       if (saip->unaln == 0) /* not set */
6992       {
6993          AlnMgr2SetUnaln(sap);
6994          if (saip->numunaln == -1) /* no unaligned regions */
6995          {
6996             if (*seg != 0)
6997                return FALSE;
6998             if (saip->anchor == -1)
6999                *len = saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1];
7000             else
7001             {
7002                lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
7003                *len = saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg];
7004             }
7005             *seg = 1;
7006             return TRUE;
7007          }
7008       }
7009       if (*seg > saip->numunaln || -(*seg) > saip->numunaln)
7010          return FALSE;
7011       if (*seg >= 0)
7012       {
7013          *len = 0;
7014          if (*seg == 0)
7015             i = 0;
7016          else
7017             i = saip->unaln[*seg-1]+1;
7018          if (*seg < saip->numunaln)
7019             maxseg = saip->unaln[*seg];
7020          else
7021             maxseg = dsp->numseg-1;
7022          while (i<=maxseg)
7023          {
7024             (*len) += dsp->lens[i];
7025             i++;
7026          }
7027          *seg = -(*seg+1);
7028          return TRUE;
7029       } else
7030       {
7031          *len = -AlnMgr2GetMaxUnalignedLength(sap, saip->unaln[-(*seg)-1]);
7032          *seg = -(*seg);
7033          return TRUE;
7034       }
7035    }
7036 }
7038 /* SECTION 4c */
AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap,Int4 seg)7039 static Int4 AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap, Int4 seg)
7040 {
7041    AMAlignIndex2Ptr  amaip;
7042    DenseSegPtr      dsp;
7043    Boolean          found;
7044    Int4             from;
7045    Int4             i;
7046    Int4             max;
7047    Int4             row;
7048    SAIndex2Ptr       saip;
7049    Int4             to;
7051    if (sap == NULL)
7052       return -1;
7053    if (sap->saip->indextype == INDEX_CHILD)
7054    {
7055       saip = (SAIndex2Ptr)(sap->saip);
7056       dsp = (DenseSegPtr)(sap->segs);
7057    } else if (sap->saip->indextype == INDEX_PARENT)
7058    {
7059       amaip = (AMAlignIndex2Ptr)(sap->saip);
7060       if (amaip->alnstyle == AM2_LITE)
7061          return -1;
7062       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7063       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7064    }
7065    found = FALSE;
7066    for (row=0; row<dsp->dim && !found; row++)
7067    {
7068       for (i=0; i<saip->srdp[row]->numunaln && !found; i++)
7069       {
7070          if (saip->srdp[row]->unaligned[i] == seg)
7071             found = TRUE;
7072       }
7073    }
7074    if (!found)
7075       return 0;
7076    max = 0;
7077    for (i=0; i<dsp->dim; i++)
7078    {
7079       AlnMgr2GetUnalignedInfo(sap, seg, i+1, &from, &to);
7080       if (to - from > max)
7081          max = to - from;
7082    }
7083    return max;
7084 }
7086 /* SECTION 4c */
7087 /***************************************************************************
7088 *
7089 *  AlnMgr2GetNumRows returns the number of rows in an indexed seqalign.
7090 *
7091 ***************************************************************************/
AlnMgr2GetNumRows(SeqAlignPtr sap)7092 NLM_EXTERN Int4 AlnMgr2GetNumRows(SeqAlignPtr sap)
7093 {
7094    AMAlignIndex2Ptr  amaip;
7095    SAIndex2Ptr       saip;
7097    if (sap == NULL || sap->saip == NULL)
7098       return -1;
7099    if (sap->saip->indextype == INDEX_CHILD)
7100    {
7101       saip = (SAIndex2Ptr)(sap->saip);
7102       return (saip->numrows);
7103    } else if (sap->saip->indextype == INDEX_PARENT)
7104    {
7105       amaip = (AMAlignIndex2Ptr)(sap->saip);
7106       return (amaip->numrows);
7107    }
7108    return -1;
7109 }
7111 /* SECTION 4c */
7112 /***************************************************************************
7113 *
7114 *  AlnMgr2GetNumSegs returns the number of gap- or aligned- contiguous
7115 *  segments in the alignment (continuous or not).
7116 *
7117 ***************************************************************************/
AlnMgr2GetNumSegs(SeqAlignPtr sap)7118 NLM_EXTERN Int4 AlnMgr2GetNumSegs(SeqAlignPtr sap)
7119 {
7120    AMAlignIndex2Ptr  amaip;
7121    DenseSegPtr      dsp;
7123    if (sap == NULL || sap->saip == NULL)
7124       return -1;
7125    if (sap->saip->indextype == INDEX_CHILD)
7126    {
7127       dsp = (DenseSegPtr)(sap->segs);
7128       return dsp->numseg;
7129    } else if (sap->saip->indextype == INDEX_PARENT)
7130    {
7131       amaip = (AMAlignIndex2Ptr)(sap->saip);
7132       if (amaip->alnstyle == AM2_LITE)
7133          return -1;
7134       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7135       return dsp->numseg;
7136    }
7137    return -1;
7138 }
7140 /* SECTION 4c */
7141 /***************************************************************************
7142 *
7143 *  AlnMgr2GetNumSegsInRange returns the number of alignment segments
7144 *  spanned by the given range (partially or fully). The range is
7145 *  given in alignment coordinates.
7146 *
7147 ***************************************************************************/
AlnMgr2GetNumSegsInRange(SeqAlignPtr sap,Int4 from,Int4 to,Int4Ptr start_seg)7148 NLM_EXTERN Int4 AlnMgr2GetNumSegsInRange(SeqAlignPtr sap, Int4 from, Int4 to, Int4Ptr start_seg)
7149 {
7150    Uint4Ptr         aligncoords;
7151    AMAlignIndex2Ptr  amaip;
7152    DenseSegPtr      dsp;
7153    Int4             len;
7154    SAIndex2Ptr       saip;
7155    Int4             start;
7156    Int4             stop;
7158    if (start_seg != NULL)
7159       *start_seg = -1;
7160    if (sap == NULL || sap->saip == NULL)
7161       return -1;
7162    len = AlnMgr2GetAlnLength(sap, FALSE);
7163    if (from < 0 || to > len-1)
7164       return -1;
7165    if (sap->saip->indextype == INDEX_CHILD)
7166    {
7167       dsp = (DenseSegPtr)(sap->segs);
7168       saip = (SAIndex2Ptr)(sap->saip);
7169       aligncoords = saip->aligncoords;
7170    } else if (sap->saip->indextype == INDEX_PARENT)
7171    {
7172       amaip = (AMAlignIndex2Ptr)(sap->saip);
7173       if (amaip->alnstyle == AM2_LITE)
7174          return -1;
7175       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7176       saip = (SAIndex2Ptr)(sap->saip);
7177       aligncoords = saip->aligncoords;
7178    }
7179    if (from == 0 && to == len-1) /* whole alignment */
7180    {
7181       if (start_seg)
7182          *start_seg = 0;
7183       return dsp->numseg;
7184    }
7185    start = binary_search_on_uint4_list(aligncoords, from, dsp->numseg);
7186    stop = binary_search_on_uint4_list(aligncoords, to, dsp->numseg);
7187    if (start_seg != NULL)
7188       *start_seg = start;
7189    return (stop-start+1);
7190 }
7192 /* SECTION 4c */
7193 /***************************************************************************
7194 *
7195 *  AlnMgr2GetNthSegmentRange returns the alignment coordinate range of the
7196 *  Nth segment (count starts at 1) of the seqalign. start and stop are
7197 *  optional arguments (in case only one end is desired).
7198 *
7199 ***************************************************************************/
AlnMgr2GetNthSegmentRange(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)7200 NLM_EXTERN void AlnMgr2GetNthSegmentRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
7201 {
7202    AMAlignIndex2Ptr  amaip;
7203    Int4             i;
7204    SAIndex2Ptr       saip;
7206    if (sap == NULL || sap->saip == NULL)
7207       return;
7208    i = AlnMgr2GetNumSegs(sap);
7209    if (n > i || n < 0)
7210       return;
7211    if (sap->saip->indextype == INDEX_CHILD)
7212    {
7213       saip = (SAIndex2Ptr)(sap->saip);
7214       if (start != NULL)
7215          *start = saip->aligncoords[n-1];
7216       if (stop != NULL)
7217       {
7218          if (i > n) /* not the last segment */
7219             *stop = saip->aligncoords[n] - 1;
7220          else
7221             *stop = AlnMgr2GetAlnLength(sap, FALSE) - 1;
7222       }
7223       return;
7224    } else if (sap->saip->indextype == INDEX_PARENT)
7225    {
7226       amaip = (AMAlignIndex2Ptr)(sap->saip);
7227       if (amaip->alnstyle == AM2_LITE)
7228          return;
7229       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7230       if (start != NULL)
7231          *start = saip->aligncoords[n-1];
7232       if (stop != NULL)
7233       {
7234          if (i > n) /* not the last segment */
7235             *stop = saip->aligncoords[n] - 1;
7236          else
7237             *stop = AlnMgr2GetAlnLength(sap, FALSE) - 1;
7238       }
7239       return;
7240    }
7241 }
7243 /* SECTION 4c */
7244 /***************************************************************************
7245 *
7246 *  AlnMgr2GetFirstNForSip returns the first row that a seqid occurs on,
7247 *  or -1 if the seqid is not in the alignment or if there is another
7248 *  error.
7249 *
7250 ***************************************************************************/
AlnMgr2GetFirstNForSip(SeqAlignPtr sap,SeqIdPtr sip)7251 NLM_EXTERN Int4 AlnMgr2GetFirstNForSip(SeqAlignPtr sap, SeqIdPtr sip)
7252 {
7253    AMAlignIndex2Ptr  amaip;
7254    DenseSegPtr      dsp;
7255    Int4             i;
7256    SeqIdPtr         sip_tmp;
7258    if (sap == NULL || sip == NULL || sap->saip == NULL)
7259       return -1;
7260    if (sap->saip->indextype == INDEX_CHILD)
7261    {
7262       dsp = (DenseSegPtr)(sap->segs);
7263       sip_tmp = dsp->ids;
7264       i = 1;
7265       while (sip_tmp != NULL)
7266       {
7267          if (SeqIdComp(sip, sip_tmp) == SIC_YES)
7268             return i;
7269          sip_tmp = sip_tmp->next;
7270          i++;
7271       }
7272    } else if (sap->saip->indextype == INDEX_PARENT)
7273    {
7274       amaip = (AMAlignIndex2Ptr)(sap->saip);
7275       if (amaip->alnstyle == AM2_LITE)
7276          return -1;
7277       for (i=0; i<amaip->numrows; i++)
7278       {
7279          if (SeqIdComp(sip, amaip->ids[i]) == SIC_YES)
7280             return (i+1);
7281       }
7282    }
7283    return -1;
7284 }
7286 /***************************************************************************
7287 *
7288 *  AlnMgr2GetFirstNForSipList returns the first row that one of a list of seqids occur on,
7289 *  or -1 if none of the seqids are in the alignment or if there is another
7290 *  error.
7291 *  Handy if sip comes from a BioSeq, where it can point to a linked list
7292 *  of SeqIds.
7293 *
7294 ***************************************************************************/
AlnMgr2GetFirstNForSipList(SeqAlignPtr sap,SeqIdPtr sip)7295 NLM_EXTERN Int4 AlnMgr2GetFirstNForSipList(SeqAlignPtr sap, SeqIdPtr sip)
7296 {
7297     Int4    i;
7298     if (sap == NULL || sap->saip == NULL)
7299         return -1;
7301     for (; sip; sip = sip->next) {
7302         i = AlnMgr2GetFirstNForSip(sap, sip);
7303         if (i != -1)
7304             return i;
7305     }
7306     return -1;
7307 }
7309 /***************************************************************************
7310 *
7311 *  AlnMgr2GetParent returns the top-level seqalign associated with a given
7312 *  indexed alignment. It returns the actual pointer, not a copy.
7313 *
7314 ***************************************************************************/
AlnMgr2GetParent(SeqAlignPtr sap)7315 NLM_EXTERN SeqAlignPtr AlnMgr2GetParent(SeqAlignPtr sap)
7316 {
7317    SAIndex2Ptr  saip;
7319    if (sap == NULL || sap->saip == NULL)
7320       return NULL;
7321    if (sap->saip->indextype == INDEX_PARENT)
7322       return sap;
7323    saip = (SAIndex2Ptr)(sap->saip);
7324    return (saip->top);
7325 }
7327 /***************************************************************************
7328 *
7329 *  SECTION 5: Functions to change, assign or retrieve an anchor row.
7330 *    SECTION 5a: functions for child seqaligns
7331 *    SECTION 5b: functions for parent seqaligns
7332 *    SECTION 5c: functions to retrieve anchor row information
7333 *
7334 ***************************************************************************/
7336 /* SECTION 5a */
AlnMgr2AnchorChild(SeqAlignPtr sap,Int4 which_row)7337 static void AlnMgr2AnchorChild(SeqAlignPtr sap, Int4 which_row)
7338 {
7339    AMBitty2Ptr  abp;
7340    AMBitty2Ptr  abp_head;
7341    AMBitty2Ptr  abp_head2;
7342    AMBitty2Ptr  abp_prev;
7343    AMBitty2Ptr  abp_prev2;
7344    AMBitty2Ptr  abp_uhead;
7345    AMBitty2Ptr  abp_uprev;
7346    Uint2Ptr     anchor_unsect;
7347    Int4         curr;
7348    Int4         curr2;
7349    DenseSegPtr  dsp;
7350    Int4         i;
7351    Int4         j;
7352    Uint2        numunsect;
7353    SAIndex2Ptr   saip;
7354    SARowDat2Ptr  srdp;
7356    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
7357       return;
7358    saip = (SAIndex2Ptr)(sap->saip);
7359    if (which_row > saip->numrows)
7360       return;
7361    if (saip->anchor == which_row) /*already anchored to the right row */
7362       return;
7363    if (saip->anchor != -1 || which_row <= 0)  /* already anchored -- must reindex as a flat alignment first */
7364    {
7365       SAIndex2Free2(sap->saip);
7366       sap->saip = NULL;
7367       AlnMgr2IndexSingleDenseSegSA(sap);
7368       if (which_row <= 0)
7369          return;
7370       saip = (SAIndex2Ptr)(sap->saip);
7371    }
7372    numunsect = saip->srdp[which_row-1]->numunsect;
7373    if (numunsect > 0)
7374       anchor_unsect = saip->srdp[which_row-1]->unsect;
7375    else
7376       anchor_unsect = NULL;
7377    for (i=0; i<saip->numrows; i++)
7378    {
7379       if (i+1 != which_row)
7380       {
7381          abp_head = NULL;
7382          abp_head2 = NULL;
7383          abp_uhead = NULL;
7384          curr = 0;
7385          curr2 = 0;
7386          srdp = saip->srdp[i];
7387          for (j=0; j<srdp->numsect; j++)
7388          {
7389             if (anchor_unsect != NULL && curr < numunsect && srdp->sect[j] > anchor_unsect[curr])
7390             {
7391                while (curr < numunsect && srdp->sect[j] > anchor_unsect[curr])
7392                {
7393                   curr++;
7394                }
7395             }
7396             if (curr < numunsect && anchor_unsect != NULL && srdp->sect[j] == anchor_unsect[curr]) /* this one is an insert */
7397             {
7398                abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7399                abp->n = srdp->sect[j];
7400                if (abp_head == NULL)
7401                   abp_head = abp_prev = abp;
7402                else
7403                {
7404                   abp_prev->next = abp;
7405                   abp_prev = abp;
7406                }
7407                curr++;
7408             } else /* put it in the keeper pile */
7409             {
7410                abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7411                abp->n = srdp->sect[j];
7412                if (abp_head2 == NULL)
7413                   abp_head2 = abp_prev2 = abp;
7414                else
7415                {
7416                   abp_prev2->next = abp;
7417                   abp_prev2 = abp;
7418                }
7419             }
7420          }
7421          for (j=0; j<srdp->numunsect; j++)
7422          {
7423             if (anchor_unsect != NULL && curr2 < numunsect && srdp->unsect[j] > anchor_unsect[curr2])
7424             {
7425                while (curr2 < numunsect && srdp->unsect[j] > anchor_unsect[curr2])
7426                {
7427                   curr2++;
7428                }
7429             }
7430             if (curr2 >= numunsect || (curr2 < numunsect && (anchor_unsect == NULL || srdp->unsect[j] != anchor_unsect[curr2]))) /* these get kept */
7431             {
7432                abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7433                abp->n = srdp->unsect[j];
7434                if (abp_uhead == NULL)
7435                   abp_uhead = abp_uprev = abp;
7436                else
7437                {
7438                   abp_uprev->next = abp;
7439                   abp_uprev = abp;
7440                }
7441             }
7442          }
7443          MemFree(srdp->sect);
7444          MemFree(srdp->unsect);
7445          srdp->numsect = srdp->numunsect = srdp->numinsect = 0;
7446          abp = abp_head; /* inserts */
7447          while (abp != NULL)
7448          {
7449             srdp->numinsect++;
7450             abp = abp->next;
7451          }
7452          srdp->insect = (Uint2Ptr)MemNew((srdp->numinsect)*sizeof(Uint2));
7453          abp = abp_head;
7454          j = 0;
7455          while (abp != NULL)
7456          {
7457             srdp->insect[j] = abp->n;
7458             j++;
7459             abp_prev = abp;
7460             abp = abp->next;
7461             MemFree(abp_prev);
7462          }
7463          abp = abp_head2; /* aligned sections */
7464          while (abp != NULL)
7465          {
7466             srdp->numsect++;
7467             abp = abp->next;
7468          }
7469          srdp->sect = (Uint2Ptr)MemNew((srdp->numsect)*sizeof(Uint2));
7470          abp = abp_head2;
7471          j = 0;
7472          while (abp != NULL)
7473          {
7474             srdp->sect[j] = abp->n;
7475             j++;
7476             abp_prev = abp;
7477             abp = abp->next;
7478             MemFree(abp_prev);
7479          }
7480          abp = abp_uhead; /* aligned gaps */
7481          while (abp != NULL)
7482          {
7483             srdp->numunsect++;
7484             abp = abp->next;
7485          }
7486          srdp->unsect = (Uint2Ptr)MemNew((srdp->numunsect)*sizeof(Uint2));
7487          abp = abp_uhead;
7488          j = 0;
7489          while (abp != NULL)
7490          {
7491             srdp->unsect[j] = abp->n;
7492             j++;
7493             abp_prev = abp;
7494             abp = abp->next;
7495             MemFree(abp_prev);
7496          }
7497       } else /* this is the anchor row -- fill in the alignment coords*/
7498       {
7499          srdp = saip->srdp[i];
7500          MemFree(saip->aligncoords);
7501          saip->numseg = srdp->numsect;
7502          saip->aligncoords = (Uint4Ptr)MemNew((saip->numseg)*sizeof(Uint4));
7503          dsp = (DenseSegPtr)(sap->segs);
7504          for (j=1; j<saip->numseg; j++)
7505          {
7506             saip->aligncoords[j] = saip->aligncoords[j-1] + dsp->lens[srdp->sect[j-1]];
7507          }
7508          saip->anchor = i+1;
7509       }
7510    }
7511 }
7513 /* SECTION 5c */
7514 /***************************************************************************
7515 *
7516 *  AlnMgr2AnchorSeqAlign takes an indexed seqalign and a row (1-based) and
7517 *  reindexes the alignment so that there are no gaps in the row indicated.
7518 *  Other rows may contain inserts after this operation. After an alignment
7519 *  is anchored, its length often shrinks. If which_row is less than 1, the
7520 *  function reindexes the alignment as a flat alignment.
7521 *
7522 ***************************************************************************/
AlnMgr2AnchorSeqAlign(SeqAlignPtr sap,Int4 which_row)7523 NLM_EXTERN void AlnMgr2AnchorSeqAlign(SeqAlignPtr sap, Int4 which_row)
7524 {
7525    AMAlignIndex2Ptr  amaip;
7527    if (sap == NULL || sap->saip == NULL)
7528       return;
7529    if (sap->saip->indextype == INDEX_CHILD)
7530       AlnMgr2AnchorChild(sap, which_row);
7531    else if (sap->saip->indextype == INDEX_PARENT)
7532    {
7533       amaip = (AMAlignIndex2Ptr)(sap->saip);
7534       if (amaip->alnstyle == AM2_LITE)
7535          return;
7536       AlnMgr2AnchorChild(amaip->sharedaln, which_row);
7537       amaip->anchor = which_row;
7538    }
7539 }
7541 /* SECTION 5c */
7542 /***************************************************************************
7543 *
7544 *  AlnMgr2FindAnchor returns the row number (1-based) of the anchor row
7545 *  for an indexed seqalign, or -1 if the alignment is unanchored or if
7546 *  there is another type of error.
7547 *
7548 ***************************************************************************/
AlnMgr2FindAnchor(SeqAlignPtr sap)7549 NLM_EXTERN Int4 AlnMgr2FindAnchor(SeqAlignPtr sap)
7550 {
7551    AMAlignIndex2Ptr  amaip;
7552    SAIndex2Ptr       saip;
7554    if (sap == NULL || sap->saip == NULL)
7555       return -1;
7556    if (sap->saip->indextype == INDEX_CHILD)
7557    {
7558       saip = (SAIndex2Ptr)(sap->saip);
7559       return (saip->anchor);
7560    } else if (sap->saip->indextype == INDEX_PARENT)
7561    {
7562       amaip = (AMAlignIndex2Ptr)(sap->saip);
7563       if (amaip->alnstyle == AM2_LITE)
7564          return -1;
7565       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7566       return (saip->anchor);
7567    } else
7568       return -1;
7569 }
7571 /***************************************************************************
7572 *
7573 *  SECTION 6: Functions for coordinate conversion (bioseq to seqalign
7574 *  coordinates and vice versa)
7575 *
7576 ***************************************************************************/
7578 /* SECTION 6 */
7579 /***************************************************************************
7580 *
7581 *  AlnMgr2MapBioseqToSeqAlign takes an indexed seqalign, a position in a
7582 *  row of the alignment, and a 1-based row number, and maps the row position
7583 *  to alignment coordinates.
7584 *
7585 ***************************************************************************/
AlnMgr2MapBioseqToSeqAlign(SeqAlignPtr sap,Int4 pos,Int4 row)7586 NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlign(SeqAlignPtr sap, Int4 pos, Int4 row)
7587 {
7588    AMAlignIndex2Ptr  amaip;
7589    Uint2Ptr         array;
7590    DenseSegPtr      dsp;
7591    Int4             L;
7592    Int4             mid;
7593    Int4             offset;
7594    Int4             R;
7595    Int4             retval;
7596    SAIndex2Ptr       saip;
7597    SARowDat2Ptr      srdp;
7598    Int4             start;
7599    Int4             stop;
7600    Uint1            strand;
7602    if (sap == NULL || sap->saip == NULL || row < 1)
7603       return -1;
7604    AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
7605    if (pos < start || pos > stop)
7606       return -1;
7607    if (sap->saip->indextype == INDEX_CHILD)
7608    {
7609       saip = (SAIndex2Ptr)(sap->saip);
7610       dsp = (DenseSegPtr)(sap->segs);
7611    } else if (sap->saip->indextype == INDEX_PARENT)
7612    {
7613       amaip = (AMAlignIndex2Ptr)(sap->saip);
7614       if (amaip->alnstyle == AM2_LITE)
7615          return -1;
7616       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7617       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7618 }
7619    if (row > saip->numrows)
7620       return -1;
7621    srdp = saip->srdp[row-1];
7622    if (srdp->numsect < 1) {
7623      return -1;
7624    }
7625    strand = AlnMgr2GetNthStrand(sap, row);
7626    L = 0;
7627    R = srdp->numsect - 1;
7628    if (strand != Seq_strand_minus)
7629    {
7630       while (L < R)
7631       {
7632          mid = (L + R)/2;
7633          if (dsp->starts[(srdp->sect[mid+1])*(dsp->dim)+row-1] <= pos)
7634             L = mid + 1;
7635          else
7636             R = mid;
7637       }
7638    } else
7639    {
7640       while (L < R)
7641       {
7642          mid = ceil((L + R)/2);
7643          if (dsp->starts[(srdp->sect[mid])*(dsp->dim)+row-1] > pos)
7644             L = mid + 1;
7645          else
7646             R = mid;
7647       }
7648    }
7649    offset = pos - dsp->starts[(srdp->sect[L])*(dsp->dim)+row-1];
7650    if (offset > dsp->lens[srdp->sect[L]])
7651       return -2;  /* this is an insert */
7652    if (saip->anchor > 0)
7653    {
7654       array = saip->srdp[saip->anchor-1]->sect;
7655       R = binary_search_on_uint2_list(array, srdp->sect[L], saip->srdp[saip->anchor-1]->numsect);
7656       L = R;
7657       srdp = saip->srdp[saip->anchor-1];
7658       if (strand != Seq_strand_minus)
7659          retval = (saip->aligncoords[L] + offset);
7660       else
7661          retval = (saip->aligncoords[L] + dsp->lens[srdp->sect[L]] - offset - 1);
7662    } else
7663    {
7664       if (strand != Seq_strand_minus)
7665          retval = saip->aligncoords[srdp->sect[L]] + offset;
7666       else
7667          retval = (saip->aligncoords[srdp->sect[L]] + dsp->lens[srdp->sect[L]] - offset - 1);
7668    }
7669    return retval;
7670 }
7672 /* SECTION 6 */
7673 /***************************************************************************
7674 *
7675 *  AlnMgr2MapSeqAlignToBioseq takes an indexed seqalign, an alignment
7676 *  coordinate (pos), and the 1-based number of a row, and maps the alignment
7677 *  coordinate to the corresponding bioseq coordinate of the row desired.
7678 *  A return of -1 indicates an error; a return of -2 means that the bioseq
7679 *  is gapped at this alignment position.
7680 *
7681 ***************************************************************************/
AlnMgr2MapSeqAlignToBioseq(SeqAlignPtr sap,Int4 pos,Int4 row)7682 NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseq(SeqAlignPtr sap, Int4 pos, Int4 row)
7683 {
7684    AMAlignIndex2Ptr  amaip;
7685    DenseSegPtr      dsp;
7686    Int4             len;
7687    Int4             offset;
7688    SAIndex2Ptr      saip;
7689    Int4             sect;
7690    SARowDat2Ptr     srdp;
7691    Int4             start;
7692    Uint1            strand;
7693    Uint2Ptr         trans;
7695    if (sap == NULL || sap->saip == NULL)
7696       return -1;
7697    len = AlnMgr2GetAlnLength(sap, FALSE);
7698    if (pos < 0 || pos > len - 1)
7699       return -1;
7700    if (sap->saip->indextype == INDEX_CHILD)
7701    {
7702       saip = (SAIndex2Ptr)(sap->saip);
7703       dsp = (DenseSegPtr)(sap->segs);
7704    } else if (sap->saip->indextype == INDEX_PARENT)
7705    {
7706       amaip = (AMAlignIndex2Ptr)(sap->saip);
7707       if (amaip->alnstyle == AM2_LITE)
7708          return -1;
7709       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7710       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7711    }
7712    if (row > saip->numrows)
7713    return -1;
7715    sect = binary_search_on_uint4_list(saip->aligncoords, pos, saip->numseg);
7716    offset = pos - saip->aligncoords[sect];
7717    if (saip->anchor > 0)
7718    {
7719       trans = saip->srdp[saip->anchor-1]->sect;
7720       sect = trans[sect];
7721    }
7722    srdp = saip->srdp[row-1];
7723    start = binary_search_on_uint2_list(srdp->sect, sect, srdp->numsect);
7724    if (start == -1)
7725       return -2; /* this row has a gap or insert at this alignment position */
7726    strand = AlnMgr2GetNthStrand(sap, row);
7727    if (strand != Seq_strand_minus)
7728       return (dsp->starts[sect*(dsp->dim)+row-1] + offset);
7729    else
7730       return (dsp->starts[sect*(dsp->dim)+row-1] + dsp->lens[sect] - 1 - offset);
7731 }
7733 /* SECTION 6 */
7734 /***************************************************************************
7735 *
7736 *  AlnMgr2MapRowToRow takes an indexed seqalign, a position in row1, the
7737 *  1-based number of row1, and a target row (row2), and maps the bioseq
7738 *  coordinate in row 1 to the corresponding (aligned) bioseq coordinate in
7739 *  row2. A return of -1 indicates an error while a return of -2 means that
7740 *  the bioseq in row2 is gapped at the desired position.
7741 *
7742 ***************************************************************************/
AlnMgr2MapRowToRow(SeqAlignPtr sap,Int4 pos,Int4 row1,Int4 row2)7743 NLM_EXTERN Int4 AlnMgr2MapRowToRow(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2)
7744 {
7745    Int4  alnpos;
7747    if (sap == NULL)
7748       return -1;
7749    alnpos = AlnMgr2MapBioseqToSeqAlign(sap, pos, row1);
7750    return (AlnMgr2MapSeqAlignToBioseq(sap, alnpos, row2));
7751 }
7753 /***************************************************************************
7754 *
7755 *  SECTION 7: Functions to change an alignment and retrieve parts of an
7756 *    alignment
7757 *
7758 ***************************************************************************/
7760 /***************************************************************************
7761 *
7762 *   AlnMgr2TruncateSeqAlign truncates a given seqalign to contain only the
7763 *   bioseq coordinates from start to stop on the indicated row.  Anything
7764 *   before those coordinates is discarded; anything remaining afterwards
7765 *   is made into another seqalign and put in sap->next (the original next,
7766 *   if any, is now at sap->next->next).  Doesn't work on parent seqaligns.
7767 *   The function returns TRUE if the orignal alignment extended past stop.
7768 *
7769 ***************************************************************************/
7770 /* SECTION 7 */
AlnMgr2TruncateSeqAlign(SeqAlignPtr sap,Int4 start,Int4 stop,Int4 row)7771 NLM_EXTERN Boolean AlnMgr2TruncateSeqAlign(SeqAlignPtr sap, Int4 start, Int4 stop, Int4 row)
7772 {
7773    DenseDiagPtr  ddp;
7774    DenseDiagPtr  ddp2;
7775    DenseSegPtr   dsp;
7776    Int4          from;
7777    Int4          i;
7778    Int4          mstart;
7779    Int4          mstop;
7780    SeqAlignPtr   sap1;
7781    SeqAlignPtr   sap2;
7782    Int4          tmp;
7783    Int4          to;
7785    if (sap == NULL || stop<start || row < 1)
7786       return FALSE;
7787    if (sap->segtype == SAS_DENSEG)
7788    {
7789       if (sap->saip == NULL)
7790          AlnMgr2IndexSingleChildSeqAlign(sap);
7791       AlnMgr2GetNthSeqRangeInSA(sap, row, &mstart, &mstop);
7792       if (mstart > start || mstop < stop)
7793          return FALSE;
7794       if (mstart == start)
7795       {
7796          if (mstop == stop)
7797             return FALSE;
7798          else if (mstop > stop)
7799          {
7800             from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
7801             to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
7802             if (to < from)
7803             {
7804                tmp = to;
7805                to = from;
7806                from = tmp;
7807             }
7808             sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7809             AlnMgr2IndexSingleChildSeqAlign(sap1);
7810             from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
7811             if (from < 0)
7812                return FALSE;
7813             to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
7814             if (to < from)
7815             {
7816                tmp = to;
7817                to = from;
7818                from = tmp;
7819             }
7820             sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7821             sap2->next = sap->next;
7822             sap->next = sap2;
7823             dsp = (DenseSegPtr)(sap->segs);
7824             sap->segs = (Pointer)(sap1->segs);
7825             sap1->segs = NULL;
7826             DenseSegFree(dsp);
7827             SeqAlignFree(sap1);
7828             AlnMgr2IndexSingleChildSeqAlign(sap);
7829             AlnMgr2IndexSingleChildSeqAlign(sap2);
7830             return TRUE;
7831          }
7832       } else if (mstart < start) /* throw away the first part */
7833       {
7834          from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
7835          to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
7836          if (to < from)
7837          {
7838             tmp = to;
7839             to = from;
7840             from = tmp;
7841          }
7842          sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7843          if (mstop == stop) /* done */
7844          {
7845             dsp = (DenseSegPtr)(sap->segs);
7846             sap->segs = (Pointer)(sap1->segs);
7847             sap1->segs = NULL;
7848             DenseSegFree(dsp);
7849             SeqAlignFree(sap1);
7850             AlnMgr2IndexSingleChildSeqAlign(sap);
7851             return FALSE;
7852          } else if (mstop > stop)
7853          {
7854             from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
7855             if (from < 0)
7856                return FALSE;
7857             to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
7858             if (to < from)
7859             {
7860                tmp = to;
7861                to = from;
7862                from = tmp;
7863             }
7864             sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7865             sap2->next = sap->next;
7866             sap->next = sap2;
7867             AlnMgr2IndexSingleChildSeqAlign(sap2);
7868             dsp = (DenseSegPtr)(sap->segs);
7869             sap->segs = (Pointer)(sap1->segs);
7870             sap1->segs = NULL;
7871             DenseSegFree(dsp);
7872             SeqAlignFree(sap1);
7873             AlnMgr2IndexSingleChildSeqAlign(sap);
7874             return TRUE;
7875          }
7876       }
7877    } else if (sap->segtype == SAS_DENDIAG)
7878    {
7879       ddp = (DenseDiagPtr)(sap->segs);
7880       if (ddp->dim < row)
7881          return FALSE;
7882       mstart = ddp->starts[row-1];
7883       mstop = mstart + ddp->len - 1;
7884       if (mstart > start || mstop < stop)
7885          return FALSE;
7886       if (mstart == start)
7887       {
7888          if (mstop == stop)
7889             return FALSE;
7890          else if (mstop > stop)
7891          {
7892             ddp2 = DenseDiagNew();
7893             ddp2->dim = ddp->dim;
7894             ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
7895             ddp2->id = SeqIdDupList(ddp->id);
7896             ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
7897             ddp2->scores = ScoreDup(ddp->scores);
7898             for (i=0; i<ddp->dim; i++)
7899             {
7900                ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
7901                ddp2->strands[i] = ddp->strands[i];
7902             }
7903             ddp2->len = mstop - stop;
7904             ddp->len = ddp->len - (mstop - stop);
7905             sap2 = SeqAlignNew();
7906             sap2->type = SAT_PARTIAL;
7907             sap2->segtype = SAS_DENSEG;
7908             sap2->segs = (Pointer)ddp2;
7909             sap2->next = sap->next;
7910             sap->next = sap2;
7911             AlnMgr2IndexSingleChildSeqAlign(sap2);
7912             return TRUE;
7913          }
7914       } else if (mstart < start)
7915       {
7916          for (i=0; i<ddp->dim; i++)
7917          {
7918             ddp->starts[i] = ddp->starts[i] + start - mstart;
7919          }
7920          ddp->len = ddp->len - (start - mstart);
7921          AlnMgr2IndexSingleChildSeqAlign(sap);
7922          if (mstop == stop)
7923             return FALSE;
7924          else if (mstop > stop)
7925          {
7926             ddp2 = DenseDiagNew();
7927             ddp2->dim = ddp->dim;
7928             ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
7929             ddp2->id = SeqIdDupList(ddp->id);
7930             ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
7931             ddp2->scores = ScoreDup(ddp->scores);
7932             for (i=0; i<ddp->dim; i++)
7933             {
7934                ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
7935                ddp2->strands[i] = ddp->strands[i];
7936             }
7937             ddp2->len = mstop - stop;
7938             ddp->len = ddp->len - (mstop - stop);
7939             sap2 = SeqAlignNew();
7940             sap2->type = SAT_PARTIAL;
7941             sap2->segtype = SAS_DENSEG;
7942             sap2->segs = (Pointer)ddp2;
7943             sap2->next = sap->next;
7944             sap->next = sap2;
7945             AlnMgr2IndexSingleChildSeqAlign(sap2);
7946             return TRUE;
7947          }
7948       }
7949    } else
7950       return FALSE;
7951    return FALSE;
7952 }
7954 /* SECTION 7 */
7955 /***************************************************************************
7956 *
7957 *  AlnMgr2GetSubAlign retrieves a portion of an indexed alignment, from
7958 *  'from' to 'to' in the row coordinates specified, or if which_row is 0,
7959 *  'from' and 'to' are assumed to be alignment coordinates. If 'to' is -1,
7960 *  the subalignment will go to the end of the specified row (or to the end
7961 *  of the whole alignment). If the alignment is discontinuous and fill_in
7962 *  is FALSE, the alignment will be returned as an SAS_DISC set, each piece
7963 *  represented by a single alignment. If the alignment is discontinuous and
7964 *  fill_in is TRUE, the unaligned regions will be added in to the alignment,
7965 *  with all gaps in all other rows. If the alignment is continuous, it
7966 *  doesn't matter whether fill_in is TRUE or FALSE. (SUBALIGN)
7967 *
7968 ***************************************************************************/
AlnMgr2GetSubAlign(SeqAlignPtr sap,Int4 from,Int4 to,Int4 which_row,Boolean fill_in)7969 NLM_EXTERN SeqAlignPtr AlnMgr2GetSubAlign(SeqAlignPtr sap, Int4 from, Int4 to, Int4 which_row, Boolean fill_in)
7970 {
7971    Int4             a;
7972    AMAlignIndex2Ptr  amaip;
7973    AlnMsg2Ptr        amp;
7974    Boolean          anchored;
7975    Int4             currlen;
7976    DenseSegPtr      dsp;
7977    DenseSegPtr      dsp_new;
7978    Int4             from_aln;
7979    Int4             from_seq;
7980    Int4             i;
7981    SeqIdPtr         id;
7982    Int4             j;
7983    Int4             k;
7984    Int4             len;
7985    Int4             lengthbit;
7986    Int4             minlen;
7987    Boolean          more;
7988    Int4             n;
7989    Int4             numseg;
7990    Int4             numunaln;
7991    AMRowInfoPtr     row;
7992    AMRowInfoPtr     row_head;
7993    AMRowInfoPtr     row_prev;
7994    AMRowInfoPtr     PNTR rowheads;
7995    AMRowInfoPtr     PNTR rows;
7996    SeqAlignPtr      salp;
7997    SeqAlignPtr      salp_head;
7998    SeqAlignPtr      salp_prev;
7999    SeqAlignPtr      sap_real;
8000    Int4             seg;
8001    Int4             start_seg;
8002    Uint1            strand;
8003    SeqAlignPtr      subsalp;
8004    Int4             tmp;
8005    Int4             to_aln;
8006    Int4             to_seq;
8007    Int4             ustart;
8008    Int4             ustop;
8010    if (sap == NULL || sap->saip == NULL)
8011       return NULL;
8012    len = AlnMgr2GetAlnLength(sap, FALSE);
8013    if (which_row == 0 && (to > len-1 || from < 0))
8014       return NULL;
8015    n = AlnMgr2GetNumRows(sap);
8016    if (which_row < 0 || which_row > n)
8017       return NULL;
8018    if (to == -1)
8019    {
8020       if (which_row == 0)
8021          to = len-1;
8022       else
8023          AlnMgr2GetNthSeqRangeInSA(sap, which_row, NULL, &to);
8024    }
8025    if (sap->saip->indextype == INDEX_CHILD)
8026       sap_real = sap;
8027    else if (sap->saip->indextype == INDEX_PARENT)
8028    {
8029       amaip = (AMAlignIndex2Ptr)(sap->saip);
8030       if (amaip->alnstyle == AM2_LITE)
8031          return NULL;
8032       sap_real = amaip->sharedaln;
8033       if (from == 0 && to == len-1 && !AlnMgr2IsSAPDiscAli(sap_real))  /* need whole aln -- take a shortcut! */
8034          return SeqAlignDup(sap_real);
8035    }
8036    if ((a = AlnMgr2FindAnchor(sap_real)) > 0)
8037    {
8038       anchored = TRUE;
8039       salp = SeqAlignDup(sap_real);
8040       AlnMgr2IndexSingleChildSeqAlign(salp);
8041       if (which_row == 0) /* anchor coordinates */
8042       {
8043          AlnMgr2GetNthSeqRangeInSA(salp, a, &from_seq, &to_seq);
8044          from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from_seq, a);
8045          to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to_seq, a);
8046          if (from_aln > to_aln)
8047          {
8048             tmp = from_aln;
8049             from_aln = to_aln;
8050             to_aln = tmp;
8051          }
8052       } else
8053       {
8054          from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from, which_row);
8055          to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to, which_row);
8056          if (from_aln > to_aln)
8057          {
8058             tmp = from_aln;
8059             from_aln = to_aln;
8060             to_aln = tmp;
8061          }
8062       }
8063    } else
8064    {
8065       anchored = FALSE;
8066       salp = sap_real;
8067       if (which_row == 0) /* alignment coordinates */
8068       {
8069          from_aln = from;
8070          to_aln = to;
8071       } else
8072       {
8073          from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from, which_row);
8074          to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to, which_row);
8075          if (from_aln > to_aln)
8076          {
8077             tmp = from_aln;
8078             from_aln = to_aln;
8079             to_aln = tmp;
8080          }
8081       }
8082    }
8083    rows = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
8084    amp = AlnMsgNew2();
8085    seg = lengthbit = 0;
8086    currlen = 0;
8087    numunaln = 0;
8088    salp_head = salp_prev = NULL;
8089    while (AlnMgr2GetNextLengthBit(sap, &lengthbit, &seg))
8090    {
8091       if (currlen <= to_aln && seg >= 0 && currlen+lengthbit-1 >= from_aln)
8092       {
8093          numseg = AlnMgr2GetNumSegsInRange(sap, currlen, currlen+lengthbit-1, &start_seg);
8094          numunaln = 0;
8095          for (i=0; i<n; i++)
8096          {
8097             row_head = NULL;
8098             for (j=start_seg; j<numseg+start_seg; j++)
8099             {
8100                AlnMsgReNew2(amp);
8101                AlnMgr2GetNthSegmentRange(sap, j+1, &amp->from_aln, &amp->to_aln);
8102                amp->from_aln = MAX(amp->from_aln, from_aln);
8103                amp->to_aln = MIN(amp->to_aln, to_aln);
8104                amp->row_num = i+1;
8105                while ((more = AlnMgr2GetNextAlnBit(salp, amp)) == TRUE)
8106                {
8107                   if (amp->right_interrupt != NULL && amp->right_interrupt->unalnlen > 0)
8108                      numunaln++;
8109                   row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8110                   if (amp->type == AM_GAP)
8111                      row->from = -1;
8112                   else
8113                      row->from = amp->from_row;
8114                   row->len = amp->to_row - amp->from_row + 1;
8115                   if (row_head != NULL)
8116                   {
8117                      row_prev->next = row;
8118                      row_prev = row;
8119                   } else
8120                      row_head = row_prev = row;
8121                }
8122             }
8123             rows[i] = row_head;
8124          }
8125       }
8126       rowheads = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
8127       for (i=0; i<n; i++)
8128       {
8129          rowheads[i] = rows[i];
8130       }
8131       while (rows[0] != NULL)
8132       {
8133          minlen = -1;
8134          for (i=0; i<n; i++)
8135          {
8136             if (rows[i]->len < minlen || minlen == -1)
8137                minlen = rows[i]->len;
8138          }
8139          for (i=0; i<n; i++)
8140          {
8141             if (rows[i]->len > minlen)
8142             {
8143                row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8144                row->next = rows[i]->next;
8145                rows[i]->next = row;
8146                if (rows[i]->from == -1)
8147                   row->from = -1;
8148                else if (AlnMgr2GetNthStrand(salp, i) == Seq_strand_minus)
8149                {
8150                   row->from = rows[i]->from;
8151                   rows[i]->from = rows[i]->from + rows[i]->len - 1 - minlen;
8152                } else
8153                   row->from = rows[i]->from + minlen;
8154                row->len = rows[i]->len - minlen;
8155                rows[i]->len = minlen;
8156             }
8157             rows[i] = rows[i]->next;
8158          }
8159       }
8160       for (i=0; i<n; i++)
8161       {
8162          rows[i] = rowheads[i];
8163       }
8164       MemFree(rowheads);
8165       dsp = DenseSegNew();
8166       row = rows[0];
8167       while (row != NULL)
8168       {
8169          dsp->numseg++;
8170          row = row->next;
8171       }
8172       if (fill_in)
8173          dsp->numseg += numunaln;
8174       dsp->dim = n;
8175       dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8176       dsp->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8177       dsp->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8178       j = 0;
8179       row = rows[0];
8180       while (row != NULL)
8181       {
8182          dsp->lens[j] = row->len;
8183          j++;
8184          row = row->next;
8185       }
8186       id = AlnMgr2GetNthSeqIdPtr(salp, 0);
8187       dsp->ids = id;
8188       for (i=0; i<n; i++)
8189       {
8190          if (i > 0)
8191          {
8192             id->next = AlnMgr2GetNthSeqIdPtr(salp, i+1);
8193             id = id->next;
8194          }
8195          row = rows[i];
8196          j = 0;
8197          strand = AlnMgr2GetNthStrand(salp, i+1);
8198          while (row != NULL)
8199          {
8200             dsp->starts[n*j + i] = row->from;
8201             dsp->strands[n*j + i] = strand;
8202             j++;
8203             row = row->next;
8204          }
8205       }
8206       if (fill_in)
8207       {
8208          for (i=0; i<n; i++)
8209          {
8210             AlnMgr2GetNthUnalignedForNthRow(sap, seg+1, i+1, &ustart, &ustop);
8211             if (ustart >= 0 && ustop >= ustart)
8212             {
8213                for (k=0; k<n; k++)
8214                {
8215                   dsp->starts[n*j + k] = -1;
8216                   dsp->strands[n*j + k] = dsp->strands[i];
8217                }
8218                dsp->starts[n*j + i] = ustart;
8219                j++;
8220             }
8221          }
8222       }
8223       subsalp = SeqAlignNew();
8224       subsalp->type = SAT_PARTIAL;
8225       subsalp->segtype = SAS_DENSEG;
8226       subsalp->dim = n;
8227       subsalp->segs = (Pointer)(dsp);
8228       for (i=0; i<n; i++)
8229       {
8230          row = rows[i];
8231          while (row != NULL)
8232          {
8233             row_prev = row->next;
8234             MemFree(row);
8235             row = row_prev;
8236          }
8237       }
8238       if (seg < 0)
8239          seg = -seg;
8240       currlen += lengthbit;
8241       seg++;
8242       if (salp_head != NULL)
8243       {
8244          salp_prev->next = subsalp;
8245          salp_prev = subsalp;
8246       } else
8247          salp_head = salp_prev = subsalp;
8248    }
8249    MemFree(rows);
8250    AlnMsgFree2(amp);
8251    if (fill_in && salp_head->next != NULL)  /* stick subsalps together into a big aln */
8252    {
8253       j = 0;
8254       subsalp = salp_head;
8255       while (subsalp != NULL)
8256       {
8257          dsp = (DenseSegPtr)(subsalp->segs);
8258          j += dsp->numseg;
8259          subsalp = subsalp->next;
8260       }
8261       dsp_new = DenseSegNew();
8262       dsp_new->dim = n;
8263       dsp_new->numseg = j;
8264       dsp_new->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8265       dsp_new->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8266       dsp_new->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8267       subsalp = salp_head;
8268       k = 0;
8269       while (subsalp != NULL)
8270       {
8271          dsp = (DenseSegPtr)(subsalp->segs);
8272          for (j=0; j<dsp->numseg; j++)
8273          {
8274             dsp_new->lens[k] = dsp->lens[j];
8275             for (i=0; i<n; i++)
8276             {
8277                dsp_new->starts[k*n+i] = dsp->starts[j*n+i];
8278                dsp_new->strands[k*n+i] = dsp->strands[j*n+i];
8279             }
8280             k++;
8281          }
8282          subsalp = subsalp->next;
8283       }
8284       subsalp = SeqAlignNew();
8285       subsalp->type = SAT_PARTIAL;
8286       subsalp->segtype = SAS_DENSEG;
8287       subsalp->dim = n;
8288       subsalp->segs = (Pointer)(dsp_new);
8289       SeqAlignSetFree(salp_head);
8290    } else if (!fill_in && salp_head->next != NULL)
8291    {
8292       subsalp = SeqAlignNew();
8293       subsalp->segtype = SAS_DISC;
8294       subsalp->type = SAT_PARTIAL;
8295       subsalp->segs = (SeqAlignPtr)(salp_head);
8296       salp_prev = salp_head;
8297       while (salp_prev != NULL)
8298       {
8299          AMAlignIndexFreeEitherIndex(salp_prev);
8300          salp_prev = salp_prev->next;
8301       }
8302    } else  /* if !salp_head->next */
8303    {
8304       subsalp = salp_head;
8305       subsalp->dim = AlnMgr2GetNumRows(subsalp);
8306       subsalp->type = SAT_PARTIAL;
8307       AMAlignIndexFreeEitherIndex(subsalp);
8308    }
8309    if (anchored)
8310       SeqAlignFree(salp);
8311    return subsalp;
8312 }
8314 /***************************************************************************
8315 *
8316 *  SECTION 8: Miscellaneous functions to compute useful information
8317 *    about an alignment
8318 *
8319 ***************************************************************************/
8320 /* SECTION 8 */
8321 /***************************************************************************
8322 *
8323 *  AlnMgr2ComputeScoreForSeqAlign computes an ad hoc numerical score for
8324 *  an indexed alignment by computing a similarity score for the whole
8325 *  alignment (residue pair by residue pair score, from a matrix for proteins
8326 *  and identity for nucleotides) and then subtracting gap open and gap
8327 *  extension penalties.
8328 *
8329 ***************************************************************************/
AlnMgr2ComputeScoreForSeqAlign(SeqAlignPtr sap)8330 NLM_EXTERN Int4 AlnMgr2ComputeScoreForSeqAlign(SeqAlignPtr sap)
8331 {
8332    AMFreqPtr    afp;
8333    DenseSegPtr  dsp;
8334    Int4         gaplen;
8335    Int4         i;
8336    Boolean      is_prot;
8337    Int4         j;
8338    Int4         len;
8339    Int4         mismatch;
8340    Int4         numgaps;
8341    Int4         numseqs;
8342    Boolean      open;
8343    Int4         res1;
8344    Int4         res2;
8345    Int4         score;
8346    Int4         seqscore;
8348    if (sap->segtype == SAS_DISC)
8349       return -1;
8350    if (sap->saip == NULL)
8351       AlnMgr2IndexSingleChildSeqAlign(sap);
8352    is_prot = AlnMgr2IsItProtein(sap);
8353    len = AlnMgr2GetAlnLength(sap, FALSE);
8354    dsp = (DenseSegPtr)(sap->segs);
8355    numseqs = dsp->dim;
8356    open = FALSE;
8357    gaplen = 0;
8358    numgaps = 0;
8359    for (i=0; i<dsp->dim; i++)
8360    {
8361       for (j=0; j<dsp->numseg; j++)
8362       {
8363          if (dsp->starts[(dsp->dim)*j+i] == -1)
8364          {
8365             if (!open)
8366             {
8367                gaplen += dsp->lens[j];
8368                numgaps++;
8369                open = TRUE;
8370             } else
8371                gaplen += dsp->lens[j];
8372          } else
8373             open = FALSE;
8374       }
8375    }
8376    mismatch = 0;
8377    seqscore = 0;
8378    afp = AlnMgr2ComputeFreqMatrix(sap, 0, -1, 0);
8379    if (afp == NULL)
8380      return -1;
8381    for (i=0; i<afp->len; i++)
8382    {
8383       res1 = -1;
8384       res2 = -1;
8385       for (j=0; j<afp->size; j++)
8386       {
8387          if (afp->freq[j][i] == 1)
8388          {
8389             if (res1 == -1)
8390                res1 = j;
8391             else
8392                res2 = j;
8393          } else if (afp->freq[j][i] == 2)
8394             res1 = res2 = j;
8395       }
8396       if (res1 > 0 && res2 > 0) /* don't penalize gaps */
8397          seqscore += AlnMgr2GetScoreForPair(res1, res2, is_prot);
8398    }
8399    AMFreqFree(afp);
8400    score = seqscore + numgaps*AM_GAPOPEN + gaplen*AM_GAPEXT;
8401    return score;
8402 }
AlnMgr2SeqPortRead(SeqPortPtr PNTR spp,Uint1Ptr buf,Int4Ptr bufpos,Int4 start,Int4 stop,Uint1 strand,Uint1 code,BioseqPtr bsp)8404 static Int4 AlnMgr2SeqPortRead(SeqPortPtr PNTR spp, Uint1Ptr buf, Int4Ptr bufpos, Int4 start, Int4 stop, Uint1 strand, Uint1 code, BioseqPtr bsp)
8405 {
8406     if (*spp == NULL) /* first call */ {
8407         if (strand == Seq_strand_minus){
8408             *spp = SeqPortNew(bsp, MAX(0, stop-AM_SEQPORTSIZE), stop, strand, code);
8409             *bufpos = MAX(0, stop-AM_SEQPORTSIZE);
8410         }
8411         else {
8412             *spp = SeqPortNew(bsp, start, MIN(start+AM_SEQPORTSIZE, bsp->length-1), strand, code);
8413             *bufpos = start;
8414         }
8415     }
8416     /* see if what we need is in current seqport or a new one is needed */
8417     else if ((start < *bufpos) || (start > *bufpos+AM_SEQPORTSIZE)
8418              || (stop < *bufpos) || (stop > *bufpos+AM_SEQPORTSIZE)) {
8419         SeqPortFree(*spp);
8420         if (strand == Seq_strand_minus) {
8421             *spp = SeqPortNew(bsp, MAX(0, stop-AM_SEQPORTSIZE), stop, strand, code);
8422             *bufpos = MAX(0, stop-AM_SEQPORTSIZE);
8423         }
8424         else {
8425             *spp = SeqPortNew(bsp, start, MIN(start+AM_SEQPORTSIZE, bsp->length-1), strand, code);
8426             *bufpos = start;
8427         }
8428     }
8429     return (SeqPortRead(*spp, buf, (MIN(start+AM_SEQPORTSIZE-1, stop)) - start+1));
8430 }
8432 /* SECTION 8 */
8433 /***************************************************************************
8434 *
8435 *  AlnMgr2ComputeFreqMatrix takes an indexed seqalign and returns a matrix
8436 *  indicating nucleotide or amino acid frequency at each position of the
8437 *  alignment. The matrix can be made over only a part of the alignment, if
8438 *  from and to are nonzero, and if row is nonzero, from and to are taken
8439 *  to be bioseq coordinates from that row (if row == 0 from and to are
8440 *  assumed to be alignment coordinates).
8441 *
8442 ***************************************************************************/
AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap,Int4 from,Int4 to,Int4 row)8443 NLM_EXTERN AMFreqPtr AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row)
8444 {
8445    AMFreqPtr   afp;
8446    AlnMsg2Ptr  amp;
8447    BioseqPtr   bsp;
8448    Uint1       buf[AM_SEQPORTSIZE];
8449    Int4        bufpos;
8450    Uint1       code;
8451    Int4        counter;
8452    Int4        ctr;
8453    Int4        from_a;
8454    Int4        i;
8455    Boolean     isna;
8456    Int4        j;
8457    Int4        l;
8458    Int4        len;
8459    Boolean     more;
8460    Int4        n;
8461    Int4        numrows;
8462    Uint1       res;
8463    SeqIdPtr    sip;
8464    SeqPortPtr  spp;
8465    Int4        tmp;
8466    Int4        to_a;
8468    if (sap == NULL || sap->saip == NULL || (from > to && to != -1))
8469       return NULL;
8470    numrows = AlnMgr2GetNumRows(sap);
8471    bufpos = -1;
8472    if (row > numrows || row < 0)
8473       return NULL;
8474    len = AlnMgr2GetAlnLength(sap, FALSE);
8475    if (to >= len)
8476       return NULL;
8477    if (to == -1)
8478       to = len-1;
8479    sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
8480    bsp = BioseqLockById(sip);
8481    if (bsp != NULL)
8482       isna = ISA_na(bsp->mol);
8483    else
8484    {
8485       SeqIdFree(sip);
8486       return NULL;
8487    }
8488    BioseqUnlock(bsp);
8489    SeqIdFree(sip);
8490    if (isna)
8491       code = Seq_code_ncbi4na;
8492    else
8493       code = Seq_code_ncbistdaa;
8494    afp = (AMFreqPtr)MemNew(sizeof(AMFreq));
8495    afp->len = len;
8496    if (isna)
8497       afp->size = AM_NUCSIZE;
8498    else
8499       afp->size = AM_PROTSIZE;
8500    afp->freq = (Int4Ptr PNTR)MemNew((afp->size)*sizeof(Int4Ptr));
8501    for (i=0; i<afp->size; i++)
8502    {
8503       afp->freq[i] = (Int4Ptr)MemNew((afp->len)*sizeof(Int4));
8504    }
8505    amp = AlnMsgNew2();
8506    if (row != 0)
8507    {
8508       from_a = AlnMgr2MapBioseqToSeqAlign(sap, from, row);
8509       to_a = AlnMgr2MapBioseqToSeqAlign(sap, to, row);
8510       if (from_a > to_a)
8511       {
8512          tmp = to_a;
8513          to_a = from_a;
8514          from_a = tmp;
8515       }
8516    } else
8517    {
8518       from_a = from;
8519       to_a = to;
8520    }
8521    for (i=0; i<numrows; i++)
8522    {
8523       spp = NULL;
8524       AlnMsgReNew2(amp);
8525       amp->from_aln = from_a;
8526       amp->to_aln = to_a;
8527       amp->row_num = i+1;
8528       j = 0;
8529       while ((more = AlnMgr2GetNextAlnBit(sap, amp)))
8530       {
8531          if (amp->type == AM_GAP)
8532          {
8533             for (n=0; n<(amp->to_row - amp->from_row+1); n++)
8534             {
8535                afp->freq[0][j] = afp->freq[0][j]+1;
8536                j++;
8537             }
8538          } else if (amp->type == AM_SEQ)
8539          {
8540             sip = AlnMgr2GetNthSeqIdPtr(sap, i+1);
8541             bsp = BioseqLockById(sip);
8542             if (bsp != NULL) {
8543               for (l=amp->from_row; l<=amp->to_row; l+=AM_SEQPORTSIZE)
8544               {
8545                  counter = AlnMgr2SeqPortRead(&spp, buf, &bufpos, l, MIN(l+AM_SEQPORTSIZE, amp->to_row), amp->strand, code, bsp);
8546                  ctr = 0;
8547                  while (ctr < counter)
8548                  {
8549                     res = buf[ctr];
8550                     if (isna)
8551                     {
8552                        if (res == 1 || res == 2)
8553                           afp->freq[res][j]++;
8554                        else if (res == 4)
8555                           afp->freq[3][j]++;
8556                        else if (res == 8)
8557                           afp->freq[4][j]++;
8558                        else
8559                           afp->freq[5][j]++;
8560                     } else
8561                        afp->freq[res][j]++;
8562                     j++;
8563                     ctr++;
8564                  }
8565               }
8566               BioseqUnlock(bsp);
8567             }
8568             SeqIdFree(sip);
8569          }
8570       }
8571       SeqPortFree(spp);
8572    }
8573    AlnMsgFree2(amp);
8574    return afp;
8575 }
8577 /* SECTION 8 */
8578 /***************************************************************************
8579 *
8580 *  AlnMgr2GetScoreForPair assigns scores to nucleotide and protein residue
8581 *  pairs. Nucleotide pairs are scored according to a standard mismatch
8582 *  penalty, and amino acid pairs are scored according to the BLOSUM62
8583 *  matrix below. This matrix has been rearranged so that the rows and
8584 *  columns appear in alphabetical order, so that it directly correlates
8585 *  to the NCBIstdaa alphabet (with a minus-one difference).
8586 *
8587 ***************************************************************************/
AlnMgr2GetScoreForPair(Int4 res1,Int4 res2,Boolean is_prot)8588 static Int4 AlnMgr2GetScoreForPair(Int4 res1, Int4 res2, Boolean is_prot)
8589 {
8590    Int4  matrix[24][24] = {
8591      {4, -2, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, 0, -2, -1, -4},
8592      {-2, 4, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -2, 0, -1, 0, -1, -3, -4, -1, -3, 1, -4},
8593      {0, -3, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2, -2, -3, -4},
8594      {-2, 4, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -1, -3, 1, -4},
8595      {-1, 1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -1, -2, 4, -4},
8596      {-2, -3, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, -1, 3, -3, -4},
8597      {0, -1, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -1, -3, -2, -4},
8598      {-2, 0, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, -1, 2, 0, -4},
8599      {-1, -3, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1, -1, -3, -4},
8600      {-1, 0, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -1, -2, 1, -4},
8601      {-1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1, -1, -3, -4},
8602      {-1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1, -1, -1, -4},
8603      {-2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -1, -2, 0, -4},
8604      {-1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -2, -3, -1, -4},
8605      {-1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1, -1, 3, -4},
8606      {-1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -1, -2, 0, -4},
8607      {1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, 0, -2, 0, -4},
8608      {0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, 0, -2, -1, -4},
8609      {0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1, -1, -2, -4},
8610      {-3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, -2, 2, -3, -4},
8611      {0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1, -4},
8612      {-2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, -2, -4},
8613      {-1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4, -4},
8614      {-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1}};
8616    if (is_prot) /* protein->use BLOSUM62 matrix */
8617       return matrix[res1-1][res2-1];
8618    else /* nucleotide->use match score/mismatch penalty */
8619    {
8620       if (res1 == 0 || res2 == 0) /* don't count gaps */
8621          return 0;
8622       if (res1 == res2)
8623          return 1;
8624       else
8625          return -3;
8626    }
8627 }
8629 /* SECTION 8 */
8630 /***************************************************************************
8631 *
8632 *  AlnMgr2IsItProtein takes an indexed alignment and quickly decides if
8633 *  it's a protein or nucleotide alignment, returning TRUE for protein.
8634 *
8635 ***************************************************************************/
AlnMgr2IsItProtein(SeqAlignPtr sap)8636 NLM_EXTERN Boolean AlnMgr2IsItProtein(SeqAlignPtr sap)
8637 {
8638    BioseqPtr  bsp;
8639    Boolean    is_na;
8640    SeqIdPtr   sip;
8642    if (sap == NULL || sap->saip == NULL)
8643       return FALSE;
8644    sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
8645    bsp = BioseqLockById(sip);
8646    if (bsp == NULL)
8647       return FALSE;
8648    is_na = ISA_na(bsp->mol);
8649    SeqIdFree(sip);
8650    BioseqUnlock(bsp);
8651    return (!is_na);
8652 }
8654 /***************************************************************************
8655 *
8656 *  SECTION 9: Sorting functions and other algorithms to help order
8657 *  alignments for various purposes
8658 *
8659 ***************************************************************************/
8661 /* SECTION 9 */
AMCompareStarts(VoidPtr ptr1,VoidPtr ptr2)8662 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2)
8663 {
8664    AMBitty2Ptr  bit1;
8665    AMBitty2Ptr  bit2;
8667    if (ptr1 != NULL && ptr2 != NULL)
8668    {
8669       bit1 = (AMBitty2Ptr)ptr1;
8670       bit2 = (AMBitty2Ptr)ptr2;
8671       if (bit1->num2 < bit2->num2)
8672          return -1;
8673       else if (bit1->num2 > bit2->num2)
8674          return 1;
8675       else if (bit1->num3 > bit2->num3) /* compare aln lengths */
8676          return -1;
8677       else if (bit1->num3 < bit2->num3)
8678          return 1;
8679       else
8680          return 0;
8681    }
8682    return 0;
8683 }
8685 /* SECTION 9 */
8686 /***************************************************************************
8687 *
8688 *  AlnMgr2SortAlnSetByNthRowPos takes an indexed parent alignment and sorts
8689 *  all the child alignments along the row indicated. If the indicated row
8690 *  is aligned on the plus strand, the alignments are sorted from smaller
8691 *  to larger coordinates along that row; otherwise they are sorted in
8692 *  reverse order.
8693 *
8694 ***************************************************************************/
AlnMgr2SortAlnSetByNthRowPos(SeqAlignPtr sap,Int4 row)8695 NLM_EXTERN void AlnMgr2SortAlnSetByNthRowPos(SeqAlignPtr sap, Int4 row)
8696 {
8697    AMAlignIndex2Ptr  amaip;
8698    AMBitty2Ptr       bit;
8699    Int4             i;
8700    SeqAlignPtr      PNTR saparray;
8701    Uint1            strand;
8703    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
8704       return;
8705    amaip = (AMAlignIndex2Ptr)(sap->saip);
8706    bit = (AMBitty2Ptr)MemNew((amaip->numsaps)*sizeof(AMBitty2));
8707    saparray = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
8708    for (i=0; i<amaip->numsaps; i++)
8709    {
8710       bit[i].num1 = i;
8711       AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], row, &bit[i].num2, NULL);
8712       bit[i].num3 = AlnMgr2GetAlnLength(amaip->saps[i], FALSE);
8713       strand = AlnMgr2GetNthStrand(amaip->saps[i], row);
8714       if (strand == Seq_strand_minus)
8715          bit[i].num2 = -bit[i].num2;
8716       saparray[i] = amaip->saps[i];
8717    }
8718    HeapSort(bit, amaip->numsaps, sizeof(AMBitty2), AMCompareStarts);
8719    for (i=0; i<amaip->numsaps; i++)
8720    {
8721       amaip->saps[i] = saparray[bit[i].num1];
8722    }
8723    MemFree(saparray);
8724    MemFree(bit);
8725    if (amaip->alnstyle != AM2_LITE)
8726       AlnMgr2ReIndexSeqAlign(sap);
8727 }
8730 /***************************************************************************
8731 *
8732 *  SECTION 10: Basic alignment operations
8733 *
8734 ***************************************************************************/
8736 /***************************************************************************
8737 *
8738 *  AlnMgr2MergeTwoAlignments takes two alignments, with identical rows in
8739 *  the same order (otherwise it rejects the alignments), and merges them
8740 *  into a single alignment. If there is unaligned space between the two
8741 *  alignments and this space is the same length for every row, the function
8742 *  aligns those sequences; it rejects alignments when the unaligned spaces
8743 *  are different sizes. The function returns a newly allocated alignment.
8744 *
8745 ***************************************************************************/
AlnMgr2MergeTwoAlignments(SeqAlignPtr sap1_orig,SeqAlignPtr sap2_orig)8746 NLM_EXTERN SeqAlignPtr AlnMgr2MergeTwoAlignments(SeqAlignPtr sap1_orig, SeqAlignPtr sap2_orig)
8747 {
8748    Int4         c;
8749    DenseSegPtr  dsp;
8750    DenseSegPtr  dsp1;
8751    DenseSegPtr  dsp2;
8752    DenseSegPtr  dsp_new;
8753    Int4         i;
8754    Int4         j;
8755    Int4         n1;
8756    Int4         n2;
8757    SeqAlignPtr  sap1;
8758    SeqAlignPtr  sap2;
8759    SeqAlignPtr  sap_new;
8760    SeqIdPtr     sip1;
8761    SeqIdPtr     sip2;
8762    Int4         start1;
8763    Int4         start2;
8764    Int4         stop1;
8765    Int4         stop2;
8766    Uint1        strand1;
8767    Uint1        strand2;
8768    SeqAlignPtr  tmp;
8770    if (sap1_orig == NULL || sap2_orig == NULL)
8771       return NULL;
8772    if (sap1_orig->next != NULL)
8773    {
8774       AlnMgr2IndexSeqAlign(sap1_orig);
8775       sap1 = AlnMgr2GetSubAlign(sap1_orig, 0, -1, 0, TRUE);
8776    } else
8777       sap1 = SeqAlignDup(sap1_orig);
8778    if (sap2_orig->next != NULL)
8779    {
8780       AlnMgr2IndexSeqAlign(sap2_orig);
8781       sap2 = AlnMgr2GetSubAlign(sap2_orig, 0, -1, 0, TRUE);
8782    } else
8783       sap2 = SeqAlignDup(sap2_orig);
8784    AlnMgr2IndexSingleChildSeqAlign(sap1);
8785    AlnMgr2IndexSingleChildSeqAlign(sap2);
8786    n1 = AlnMgr2GetNumRows(sap1);
8787    n2 = AlnMgr2GetNumRows(sap2);
8788    if (n1 != n2)
8789    {
8790       SeqAlignFree(sap1);
8791       SeqAlignFree(sap2);
8792       return NULL;
8793    }
8794    /* put the alignments in order by the first row */
8795    AlnMgr2GetNthSeqRangeInSA(sap1, 1, &start1, &stop1);
8796    AlnMgr2GetNthSeqRangeInSA(sap2, 1, &start2, &stop2);
8797    strand1 = AlnMgr2GetNthStrand(sap1, 1);
8798    if (strand1 == Seq_strand_minus)
8799    {
8800       if (stop2 > start1)
8801       {
8802          tmp = sap1;
8803          sap1 = sap2;
8804          sap2 = tmp;
8805       }
8806    } else
8807    {
8808       if (stop1 > start2)
8809       {
8810          tmp = sap1;
8811          sap1 = sap2;
8812          sap2 = tmp;
8813       }
8814    }
8815    dsp1 = (DenseSegPtr)(sap1->segs);
8816    dsp2 = (DenseSegPtr)(sap2->segs);
8817    sip1 = dsp1->ids;
8818    sip2 = dsp2->ids;
8819    while (sip1 != NULL && sip2 != NULL)
8820    {
8821       if (SeqIdComp(sip1, sip2) != SIC_YES)
8822       {
8823          SeqAlignFree(sap1);
8824          SeqAlignFree(sap2);
8825          return NULL;
8826       }
8827       sip1 = sip1->next;
8828       sip2 = sip2->next;
8829    }
8830    dsp = DenseSegNew();
8831    dsp->dim = n1;
8832    dsp->numseg = 1;
8833    dsp->starts = (Int4Ptr)MemNew(n1*sizeof(Int4));
8834    dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
8835    dsp->strands = (Uint1Ptr)MemNew(n1*sizeof(Int4));
8836    for (i=0; i<n1; i++)
8837    {
8838       strand1 = AlnMgr2GetNthStrand(sap1, i+1);
8839       strand2 = AlnMgr2GetNthStrand(sap2, i+1);
8840       if (strand1 != strand2)
8841       {
8842          DenseSegFree(dsp);
8843          SeqAlignFree(sap1);
8844          SeqAlignFree(sap2);
8845          return NULL;
8846       }
8847       AlnMgr2GetNthSeqRangeInSA(sap1, i+1, &start1, &stop1);
8848       AlnMgr2GetNthSeqRangeInSA(sap2, i+1, &start2, &stop2);
8849       if (strand1 == Seq_strand_minus)
8850       {
8851          dsp->starts[i] = stop2 + 1;
8852          if (i == 0)
8853             dsp->lens[0] = start2 - (stop2 + 1);
8854          else
8855          {
8856             if (start2 - (stop2 + 1) != dsp->lens[0])
8857             {
8858                DenseSegFree(dsp);
8859                SeqAlignFree(sap1);
8860                SeqAlignFree(sap2);
8861                return NULL;
8862             }
8863          }
8864       } else
8865       {
8866          dsp->starts[i] = stop1 + 1;
8867          if (i == 0)
8868             dsp->lens[0] = start2 - (stop1 + 1);
8869          else
8870          {
8871             if (start2 - (stop1 + 1) != dsp->lens[0])
8872             {
8873                DenseSegFree(dsp);
8874                SeqAlignFree(sap1);
8875                SeqAlignFree(sap2);
8876                return NULL;
8877             }
8878          }
8879       }
8880       dsp->strands[i] = strand1;
8881    }
8882    if (dsp->lens[0] == 0)
8883    {
8884       DenseSegFree(dsp);
8885       dsp = NULL;
8886    }
8887    dsp_new = DenseSegNew();
8888    dsp_new->numseg = dsp1->numseg + dsp2->numseg;
8889    if (dsp != NULL)
8890       dsp_new->numseg++;
8891    dsp_new->dim = n1;
8892    dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
8893    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
8894    dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
8895    for (i=0; i<dsp1->numseg; i++)
8896    {
8897       for (j=0; j<n1; j++)
8898       {
8899          dsp_new->starts[i*n1 + j] = dsp1->starts[i*n1 + j];
8900          dsp_new->strands[i*n1 + j] = dsp1->strands[i*n1 + j];
8901       }
8902       dsp_new->lens[i] = dsp1->lens[i];
8903    }
8904    c = dsp1->numseg;
8905    if (dsp != NULL)
8906    {
8907       for (j=0; j<n1; j++)
8908       {
8909          dsp_new->starts[c*n1 + j] = dsp->starts[j];
8910          dsp_new->strands[c*n1 + j] = dsp->strands[j];
8911       }
8912       dsp_new->lens[c] = dsp->lens[0];
8913       c++;
8914    }
8915    for (i=0; i<dsp2->numseg; i++, c++)
8916    {
8917       for (j=0; j<n1; j++)
8918       {
8919          dsp_new->starts[c*n1 + j] = dsp2->starts[i*n1 + j];
8920          dsp_new->strands[c*n1 + j] = dsp2->strands[i*n1 + j];
8921       }
8922       dsp_new->lens[c] = dsp2->lens[i];
8923    }
8924    dsp_new->ids = SeqIdDupList(dsp1->ids);
8925    sap_new = SeqAlignNew();
8926    sap_new->segtype = SAS_DENSEG;
8927    sap_new->dim = n1;
8928    sap_new->segs = (Pointer)dsp_new;
8929    if (dsp != NULL)
8930       DenseSegFree(dsp);
8931    SeqAlignFree(sap1);
8932    SeqAlignFree(sap2);
8933    return sap_new;
8934 }
8936 /* SECTION 10 */
8937 /***************************************************************************
8938 *
8939 *  AlnMgr2ExtendToCoords takes an indexed child seqalign and blindly extends
8940 *  it to the coordinates specified on the given row. If other rows are too
8941 *  short to allow this extension, the alignment is extended as far as
8942 *  possible. If to == -1 the extension goes to the end of the sequence
8943 *  specified.
8944 *
8945 ***************************************************************************/
AlnMgr2ExtendToCoords(SeqAlignPtr sap,Int4 from,Int4 to,Int4 row)8946 NLM_EXTERN void AlnMgr2ExtendToCoords(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row)
8947 {
8948    BioseqPtr    bsp;
8949    Int4         diff1;
8950    Int4         diff2;
8951    DenseSegPtr  dsp;
8952    DenseSegPtr  dsp_new;
8953    Int4         i;
8954    Int4         j;
8955    Int4         numrows;
8956    Int4         numseg;
8957    Int4         prediff1;
8958    Int4         prediff2;
8959    Int4         seg;
8960    SeqIdPtr     sip;
8961    Int4         start;
8962    Int4         stop;
8964    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
8965       return;
8966    numrows = AlnMgr2GetNumRows(sap);
8967    if (row < 1 || row > numrows)
8968       return;
8969    AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
8970    numseg = 0;
8971    dsp = (DenseSegPtr)(sap->segs);
8972    if (start <= from)
8973       from = start;
8974    else
8975       numseg++;
8976    diff1 = start - from;
8977    sip = AlnMgr2GetNthSeqIdPtr(sap, row);
8978    bsp = BioseqLockById(sip);
8979    if (to == -1)
8980       to = bsp->length - 1;
8981    BioseqUnlock(bsp);
8982    SeqIdFree(sip);
8983    if (stop >= to)
8984       to = stop;
8985    else
8986       numseg++;
8987    diff2 = to - stop;
8988    if (numseg == 0)
8989       return;
8990    sip = dsp->ids;
8991    prediff1 = diff1;
8992    prediff2 = diff2;
8993    for (i=0; i<numrows; i++)
8994    {
8995       bsp = BioseqLockById(sip);
8996       if (dsp->strands[i] == Seq_strand_minus)
8997       {
8998          if (dsp->starts[i]+dsp->lens[0]+diff1 > bsp->length)
8999             diff1 = bsp->length - (dsp->starts[i] + dsp->lens[0]);
9000          if (dsp->starts[(dsp->numseg-1)*dsp->dim+i] > diff2)
9001             diff2 = dsp->starts[(dsp->numseg-1)*dsp->dim+i];
9002       } else
9003       {
9004          if (dsp->starts[i] < diff1)
9005             diff1 = dsp->starts[i];
9006          if (dsp->starts[(dsp->numseg-1)*dsp->dim+i]+dsp->lens[dsp->numseg-1]+diff2 > bsp->length)
9007             diff2 = bsp->length - (dsp->starts[(dsp->numseg-1)*dsp->dim+i] + dsp->lens[dsp->numseg-1]);
9008       }
9009       sip = sip->next;
9010       BioseqUnlock(bsp);
9011    }
9012    if (diff1 == 0 && prediff1 != 0)
9013       numseg--;
9014    else if (diff1 < 0)
9015       numseg--;
9016    if (diff2 == 0 && prediff2 != 0)
9017       numseg--;
9018    else if (diff2 < 0)
9019       numseg--;
9020    if (numseg == 0)
9021       return;
9022    dsp_new = DenseSegNew();
9023    dsp_new->dim = dsp->dim;
9024    dsp_new->numseg = dsp->numseg+numseg;
9025    dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
9026    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9027    dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
9028    seg = 0;
9029    if (diff1 > 0)
9030    {
9031       for (j=0; j<dsp->dim; j++)
9032       {
9033          AlnMgr2GetNthSeqRangeInSA(sap, j+1, &start, &stop);
9034          if (dsp->strands[j] == Seq_strand_minus)
9035             dsp_new->starts[j] = stop+1;
9036          else
9037             dsp_new->starts[j] = start-diff1;
9038          dsp_new->strands[j] = dsp->strands[j];
9039       }
9040       dsp_new->lens[0] = diff1;
9041       seg++;
9042    }
9043    for (i=0; i<dsp->numseg; i++)
9044    {
9045       for (j=0; j<dsp->dim; j++)
9046       {
9047          dsp_new->starts[(seg)*dsp->dim+j] = dsp->starts[i*dsp->dim+j];
9048          dsp_new->strands[(seg)*dsp->dim+j] = dsp->strands[i*dsp->dim+j];
9049       }
9050       dsp_new->lens[seg] = dsp->lens[i];
9051       seg++;
9052    }
9053    if (diff2 > 0)
9054    {
9055       for (j=0; j<dsp->dim; j++)
9056       {
9057          AlnMgr2GetNthSeqRangeInSA(sap, j+1, &start, &stop);
9058          if (dsp->strands[j] == Seq_strand_minus)
9059             dsp_new->starts[seg*dsp->dim+j] = start-diff2;
9060          else
9061             dsp_new->starts[seg*dsp->dim+j] = stop+1;
9062          dsp_new->strands[seg*dsp->dim+j] = dsp->strands[j];
9063       }
9064       dsp_new->lens[seg] = diff2;
9065    }
9066    dsp_new->ids = dsp->ids;
9067    dsp->ids = NULL;
9068    DenseSegFree(dsp);
9069    sap->segs = (Pointer)dsp_new;
9070    SAIndex2Free2(sap->saip);
9071    sap->saip = NULL;
9072    AlnMgr2IndexSingleChildSeqAlign(sap);
9073 }
9075 /* SECTION 10 */
9076 /***************************************************************************
9077 *
9078 *  AlnMgr2PadConservatively extends an alignment so that the whole of
9079 *  all sequences is included. If two sequences have tails on the same
9080 *  side, they are each aligned with columns of all gaps:
9081 *
9082 *   <-new aln region->
9083 *   xxxxxxxx----------xxxxxxxxxxxxxxxxxxxx
9084 *   --------xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
9085 *
9086 *  This function returns a newly allocated alignment and doesn't change
9087 *  the original (except for indexing). If the extension was not done for
9088 *  some reason, the function returns NULL;
9089 *
9090 ***************************************************************************/
AlnMgr2PadConservatively(SeqAlignPtr sap)9091 NLM_EXTERN SeqAlignPtr AlnMgr2PadConservatively(SeqAlignPtr sap)
9092 {
9093    AMAlignIndex2Ptr  amaip;
9094    BioseqPtr         bsp;
9095    Int4              ctr1;
9096    Int4              ctr2;
9097    DenseSegPtr       dsp;
9098    DenseSegPtr       dsp_new;
9099    Int4              i;
9100    Int4              j;
9101    Int4Ptr           lenarray;
9102    Int4              n1;
9103    Int4              n2;
9104    Int4              newseg;
9105    SeqAlignPtr       sap_new;
9106    SeqIdPtr          sip;
9107    Int4              start;
9108    Int4              stop;
9109    Uint1             strand;
9111    if (sap == NULL || sap->next != NULL)
9112       return NULL;
9113    if (sap->saip == NULL)
9114       AlnMgr2IndexSeqAlign(sap);
9115    if (sap->saip->indextype == INDEX_PARENT)
9116    {
9117       amaip = (AMAlignIndex2Ptr)(sap->saip);
9118       if (amaip->alnstyle == AM2_LITE)
9119          return NULL;
9120       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
9121    } else
9122       dsp = (DenseSegPtr)(sap->segs);
9123    newseg = 0;
9124    lenarray = (Int4Ptr)MemNew(dsp->dim*sizeof(Int4));
9125    n1 = n2 = 0;
9126    for (i=0; i<dsp->dim; i++)
9127    {
9128       sip = AlnMgr2GetNthSeqIdPtr(sap, i+1);
9129       bsp = BioseqLockById(sip);
9130       lenarray[i] = bsp->length;
9131       BioseqUnlock(bsp);
9132       SeqIdFree(sip);
9133       AlnMgr2GetNthSeqRangeInSA(sap, i+1, &start, &stop);
9134       if (start > 0)
9135       {
9136          n1++;
9137          newseg++;
9138       }
9139       if (stop < lenarray[i]-1)
9140       {
9141          newseg++;
9142       }
9143    }
9144    if (newseg == 0)
9145    {
9146       MemFree(lenarray);
9147       return NULL;
9148    }
9149    dsp_new = DenseSegNew();
9150    dsp_new->numseg = dsp->numseg + newseg;
9151    dsp_new->dim = dsp->dim;
9152    dsp_new->ids = SeqIdDupList(dsp->ids);
9153    dsp_new->starts = (Int4Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9154    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9155    dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Uint1));
9156    n2 = n1+dsp->numseg;
9157    ctr1 = 0;
9158    ctr2 = 0;
9159    for (i=0; i<dsp->dim; i++)
9160    {
9161       AlnMgr2GetNthSeqRangeInSA(sap, i+1, &start, &stop);
9162       strand = AlnMgr2GetNthStrand(sap, i+1);
9163       if (strand == Seq_strand_minus && lenarray[i]-1-stop > 0)
9164       {
9165          for (j=0; j<ctr1; j++)
9166          {
9167             dsp_new->starts[dsp->dim*j+i] = -1;
9168             dsp_new->strands[dsp->dim*j+i] = strand;
9169          }
9170          dsp_new->starts[dsp->dim*ctr1+i] = stop+1;
9171          dsp_new->lens[ctr1] = lenarray[i]-1-stop;
9172          dsp_new->strands[dsp->dim*ctr1+i] = strand;
9173          for (j=ctr1+1; j<n1; j++)
9174          {
9175             dsp_new->starts[dsp->dim*j+i] = -1;
9176             dsp_new->strands[dsp->dim*j+i] = strand;
9177          }
9178          ctr1++;
9179       } else if (strand == Seq_strand_plus && start > 0)
9180       {
9181          for (j=0; j<ctr1; j++)
9182          {
9183             dsp_new->starts[dsp->dim*j+i] = -1;
9184             dsp_new->strands[dsp->dim*j+i] = strand;
9185          }
9186          dsp_new->starts[dsp->dim*ctr1+i] = 0;
9187          dsp_new->lens[ctr1] = start;
9188          dsp_new->strands[dsp->dim*ctr1+i] = strand;
9189          for (j=ctr1+1; j<n1; j++)
9190          {
9191             dsp_new->starts[dsp->dim*j+i] = -1;
9192             dsp_new->strands[dsp->dim*j+i] = strand;
9193          }
9194          ctr1++;
9195       } else /* nothing to add on this row, just fill in with -1s */
9196       {
9197          for (j=0; j<n1; j++)
9198          {
9199             dsp_new->starts[dsp->dim*j+i] = -1;
9200             dsp_new->strands[dsp->dim*j+i] = strand;
9201          }
9202       }
9203    /* now fill in the non-extended part of the alignment (copy from original) */
9204       for (j=0; j<dsp->numseg; j++)
9205       {
9206          dsp_new->starts[dsp->dim*(j+n1)+i] = dsp->starts[dsp->dim*j+i];
9207          dsp_new->lens[j+n1] = dsp->lens[j];
9208          dsp_new->strands[dsp->dim*(j+n1)+i] = dsp->strands[dsp->dim*j+i];
9209       }
9210   /* now the other ends */
9211       if (strand == Seq_strand_minus && start > 0)
9212       {
9213          for (j=n2; j<n2+ctr2; j++)
9214          {
9215             dsp_new->starts[dsp->dim*j+i] = -1;
9216             dsp_new->strands[dsp->dim*j+i] = strand;
9217          }
9218          dsp_new->starts[dsp->dim*(ctr2+n2)+i] = 0;
9219          dsp_new->lens[ctr2+n2] = start;
9220          dsp_new->strands[dsp->dim*(ctr2+n2)+i] = strand;
9221          for (j=n2+ctr2+1; j<dsp_new->numseg; j++)
9222          {
9223             dsp_new->starts[dsp->dim*j+i] = -1;
9224             dsp_new->strands[dsp->dim*j+i] = strand;
9225          }
9226          ctr2++;
9227       } else if (strand == Seq_strand_plus && lenarray[i]-1-stop > 0)
9228       {
9229          for (j=n2; j<ctr2+n2; j++)
9230          {
9231             dsp_new->starts[dsp->dim*j+i] = -1;
9232             dsp_new->strands[dsp->dim*j+i] = strand;
9233          }
9234          dsp_new->starts[dsp->dim*(ctr2+n2)+i] = stop+1;
9235          dsp_new->lens[ctr2+n2] = lenarray[i]-1-stop;
9236          dsp_new->strands[dsp->dim*(ctr2+n2)+i] = strand;
9237          for (j=ctr2+n2+1; j<dsp_new->numseg; j++)
9238          {
9239             dsp_new->starts[dsp->dim*j+i] = -1;
9240             dsp_new->strands[dsp->dim*j+i] = strand;
9241          }
9242          ctr2++;
9243       } else /* nothing to add on this row, just fill in with -1s */
9244       {
9245          for (j=n2; j<dsp_new->numseg; j++)
9246          {
9247             dsp_new->starts[dsp->dim*j+i] = -1;
9248             dsp_new->strands[dsp->dim*j+i] = strand;
9249          }
9250       }
9251    }
9252    sap_new = SeqAlignNew();
9253    sap_new->dim = dsp->dim;
9254    sap_new->segtype = SAS_DENSEG;
9255    sap_new->segs = (Pointer)(dsp_new);
9256    MemFree(lenarray);
9257    return sap_new;
9258 }
9260 /* SECTION 10 */
9261 /***************************************************************************
9262 *
9263 *  AlnMgr2ExtractPairwiseSeqAlign takes an indexed alignment (parent or
9264 *  child, but must be fully indexed, not lite) and extracts a pairwise
9265 *  subalignment containing the two requested rows. The subalignment is
9266 *  unindexed and may have internal unaligned regions.
9267 *
9268 ***************************************************************************/
AlnMgr2ExtractPairwiseSeqAlign(SeqAlignPtr sap,Int4 n1,Int4 n2)9269 NLM_EXTERN SeqAlignPtr AlnMgr2ExtractPairwiseSeqAlign(SeqAlignPtr sap, Int4 n1, Int4 n2)
9270 {
9271    AMAlignIndex2Ptr  amaip;
9272    DenseSegPtr       dsp;
9273    DenseSegPtr       dsp_new;
9274    Int4              i;
9275    Int4              j;
9276    Int4              n;
9277    SeqAlignPtr       sap_new;
9279    if (sap == NULL || sap->saip == NULL || n1 == n2 || n1 <= 0 || n2 <= 0)
9280       return NULL;
9281    if (sap->saip->indextype == INDEX_CHILD)
9282       dsp = (DenseSegPtr)(sap->segs);
9283    else
9284    {
9285       amaip = (AMAlignIndex2Ptr)(sap->saip);
9286       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
9287    }
9288    if (n1 > dsp->dim || n2 > dsp->dim)
9289       return NULL;
9290    n = 0;
9291    for (i=0; i<dsp->numseg; i++)
9292    {
9293       if (dsp->starts[dsp->dim*i+n1-1] == -1 && dsp->starts[dsp->dim*i+n2-1] == -1)
9294          n++;
9295    }
9296    if (n == dsp->numseg) /* no overlap at all */
9297       return NULL;
9298    dsp_new = DenseSegNew();
9299    dsp_new->numseg = dsp->numseg - n;
9300    dsp_new->starts = (Int4Ptr)MemNew(2*dsp_new->numseg*sizeof(Int4));
9301    dsp_new->strands = (Uint1Ptr)MemNew(2*dsp_new->numseg*sizeof(Uint1));
9302    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9303    dsp_new->dim = 2;
9304    dsp_new->ids = AlnMgr2GetNthSeqIdPtr(sap, n1);
9305    dsp_new->ids->next = AlnMgr2GetNthSeqIdPtr(sap, n2);
9306    j = 0;
9307    for (i=0; i<dsp->numseg; i++)
9308    {
9309       if (dsp->starts[dsp->dim*i+n1-1] > -1 || dsp->starts[dsp->dim*i+n2-1] > -1)
9310       {
9311          dsp_new->starts[2*j] = dsp->starts[dsp->dim*i+n1-1];
9312          dsp_new->starts[2*j+1] = dsp->starts[dsp->dim*i+n2-1];
9313          dsp_new->strands[2*j] = dsp->strands[n1-1];
9314          dsp_new->strands[2*j+1] = dsp->strands[n2-1];
9315          dsp_new->lens[j] = dsp->lens[i];
9316          j++;
9317       }
9318    }
9319    sap_new = SeqAlignNew();
9320    sap_new->dim = 2;
9321    sap_new->type = SAT_PARTIAL;
9322    sap_new->segtype = SAS_DENSEG;
9323    sap_new->segs = (Pointer)dsp_new;
9324    return sap_new;
9325 }
9327 /* SECTION 10 */
amconssetfree(AMConsSetPtr acp)9328 static void amconssetfree(AMConsSetPtr acp)
9329 {
9330    AMConsSetPtr  acp_next;
9332    while (acp != NULL)
9333    {
9334       acp_next = acp->next;
9335       MemFree(acp->starts);
9336       MemFree(acp->stops);
9337       MemFree(acp->strands);
9338       MemFree(acp);
9339       acp = acp_next;
9340    }
9341 }
AlnMgr2SortForConsistent(VoidPtr ptr1,VoidPtr ptr2)9343 static int LIBCALLBACK AlnMgr2SortForConsistent(VoidPtr ptr1, VoidPtr ptr2)
9344 {
9345    AMConsSetPtr  acp1;
9346    AMConsSetPtr  acp2;
9347    SAIndex2Ptr   saip1;
9348    SAIndex2Ptr   saip2;
9350    acp1 = *((AMConsSetPtr PNTR)ptr1);
9351    acp2 = *((AMConsSetPtr PNTR)ptr2);
9352    saip1 = (SAIndex2Ptr)(acp1->sap->saip);
9353    saip2 = (SAIndex2Ptr)(acp2->sap->saip);
9354    if (saip1->score == 0)
9355       saip1->score = AlnMgr2ComputeScoreForSeqAlign(acp1->sap);
9356    if (saip2->score == 0)
9357       saip2->score = AlnMgr2ComputeScoreForSeqAlign(acp2->sap);
9358    if (saip1->score > saip2->score)
9359       return -1;
9360    else if (saip1->score < saip2->score)
9361       return 1;
9362    else
9363       return 0;
9364 }
9366 /* SECTION 10 */
9367 /***************************************************************************
9368 *
9369 *  AlnMgr2RemoveInconsistentAlnsFromSet takes an alignment that is
9370 *  indexed at least at the AM2_LITE level, and prunes the child
9371 *  alignments so that the remaining alignments form a consistent,
9372 *  nonoverlapping set. All alignments must have the same number of rows,
9373 *  and they must be the same rows (although not necessarily in the same
9374 *  order). The function uses a simple greedy algorithm to construct the
9375 *  nonoverlapping set, starting with the highest-scoring alignment.
9376 *  If fuzz is negative, the function creates the best nonoverlapping set
9377 *  by actually truncating alignments.
9378 *
9379 ***************************************************************************/
AlnMgr2RemoveInconsistentAlnsFromSet(SeqAlignPtr sap_head,Int4 fuzz)9380 NLM_EXTERN void AlnMgr2RemoveInconsistentAlnsFromSet(SeqAlignPtr sap_head, Int4 fuzz)
9381 {
9382    AMConsSetPtr  acp;
9383    AMConsSetPtr  acp_head;
9384    AMConsSetPtr  acp_prev;
9385    AMConsSetPtr  PNTR acparray;
9386    DenseSegPtr   dsp;
9387    Int4          i;
9388    Int4          j;
9389    Int4          k;
9390    Int4          lfuzz;
9391    SeqAlignPtr   newsap;
9392    Int4          numrows;
9393    Int4          numsaps;
9394    Int4          orientation;
9395    Int4          row;
9396    SAIndex2Ptr   saip;
9397    SeqAlignPtr   salp_head;
9398    SeqAlignPtr   salp_prev;
9399    SeqAlignPtr   sap;
9400    SeqAlignPtr   sapnext;
9401    Int4          score;
9402    SeqIdPtr      sip;
9403    SeqIdPtr      sip_head;
9404    Uint1         strand;
9406    lfuzz = fuzz;
9407    if (fuzz < 0)
9408       fuzz = 1;
9409    sap = (SeqAlignPtr)(sap_head->segs);
9410    if (sap->next == NULL)
9411       return;
9412    dsp = (DenseSegPtr)(sap->segs);
9413    sip_head = dsp->ids;
9414    numrows = AlnMgr2GetNumRows(sap);
9415    acp_head = NULL;
9416    strand = AlnMgr2GetNthStrand(sap, 1);
9417    numsaps = 0;
9418    while (sap != NULL)
9419    {
9420       if (AlnMgr2GetNumRows(sap) != numrows)
9421       {
9422          amconssetfree(acp_head);
9423          return;
9424       }
9425       numsaps++;
9426       acp = (AMConsSetPtr)MemNew(sizeof(AMConsSet));
9427       acp->starts = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9428       acp->stops = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9429       acp->strands = (Uint1Ptr)MemNew(numrows*sizeof(Uint1));
9430       acp->which = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9431       acp->sap = sap;
9432       if (acp_head != NULL)
9433       {
9434          acp_prev->next = acp;
9435          acp_prev = acp;
9436       } else
9437          acp_head = acp_prev = acp;
9438       sip = sip_head;
9439       row = AlnMgr2GetFirstNForSip(sap, sip);
9440       if (row <= 0)
9441       {
9442          amconssetfree(acp_head);
9443          return;
9444       }
9445       if (acp->strands[row] != strand)
9446       {
9447          sapnext = acp->sap->next;
9448          acp->sap->next = NULL;
9449          score = ((SAIndex2Ptr)(acp->sap->saip))->score;
9450          SeqAlignListReverseStrand(acp->sap);
9451          AMAlignIndexFreeEitherIndex(acp->sap);
9452          AlnMgr2IndexSingleChildSeqAlign(acp->sap);
9453          saip = (SAIndex2Ptr)(acp->sap->saip);
9454          saip->score = score;
9455          acp->strands[row] = strand;
9456          acp->sap->next = sapnext;
9457       }
9458       for (i=0; i<numrows; i++)
9459       {
9460          acp->which[i] = row;
9461          AlnMgr2GetNthSeqRangeInSA(sap, i+1, &acp->starts[i], &acp->stops[i]);
9462          acp->strands[i] = AlnMgr2GetNthStrand(sap, i+1);
9463       }
9464       sap = sap->next;
9465    }
9466    acparray = (AMConsSetPtr PNTR)MemNew(numsaps*sizeof(AMConsSetPtr));
9467    acp = acp_head;
9468    i = 0;
9469    while (acp != NULL)
9470    {
9471       acparray[i] = acp;
9472       acp = acp->next;
9473       i++;
9474    }
9475    HeapSort(acparray, numsaps, sizeof(AMConsSetPtr), AlnMgr2SortForConsistent);
9476    /* orientation -1 means that ith is before jth in ALL rows, 1 means ith is after jth in ALL rows */
9477    for (i=0; i<numsaps; i++)
9478    {
9479       if (acparray[i]->used != -1)
9480       {
9481          for (j=i+1; j<numsaps; j++)
9482          {
9483             orientation = 0;
9484             for (k=0; acparray[j]->used != -1 && k<numrows; k++)
9485             {
9486                if (acparray[i]->strands[k] != acparray[j]->strands[k])
9487                   acparray[j]->used = -1;
9488                if (acparray[i]->starts[k] - fuzz < acparray[j]->starts[k])
9489                {
9490                   if (acparray[i]->stops[k] - fuzz < acparray[j]->starts[k])
9491                   {
9492                      if ((acparray[i]->strands[k] == Seq_strand_plus && orientation == 1) || (acparray[i]->strands[k] == Seq_strand_minus && orientation == -1))
9493                         acparray[j]->used = -1;
9494                      else if (orientation == 0)
9495                      {
9496                         if (acparray[i]->strands[k] == Seq_strand_minus)
9497                            orientation = 1;
9498                         else
9499                            orientation = -1;
9500                      }
9501                   } else
9502                   {
9503                      if (lfuzz >= 0) /* just mark it for deletion */
9504                         acparray[j]->used = -1;
9505                      else /* truncate it */
9506                      {
9507                         if (acparray[j]->stops[k] > acparray[i]->stops[k])
9508                         {
9509                            newsap = AlnMgr2GetSubAlign(acparray[j]->sap, acparray[i]->stops[k]+1, acparray[j]->stops[k], k+1, TRUE);
9510                            SeqAlignFree(acparray[j]->sap);
9511                            acparray[j]->sap = newsap;
9512                            acparray[j]->starts[k] = acparray[i]->stops[k]+1;
9513                         } else
9514                            acparray[j]->used = -1;
9515                      }
9516                   }
9517                } else if (acparray[i]->starts[k] - fuzz > acparray[j]->starts[k])
9518                {
9519                  if (acparray[i]->starts[k] + fuzz > acparray[j]->stops[k])
9520                   {
9521                      if ((acparray[i]->strands[k] == Seq_strand_plus && orientation == -1) || (acparray[i]->strands[k] == Seq_strand_minus && orientation == 1))
9522                         acparray[j]->used = -1;
9523                      else if (orientation == 0)
9524                      {
9525                         if (acparray[i]->strands[k] == Seq_strand_minus)
9526                            orientation = -1;
9527                         else
9528                            orientation = 1;
9529                      }
9530                   } else
9531                   {
9532                      if (lfuzz >= 0) /* mark for deletion */
9533                         acparray[j]->used = -1;
9534                      else /* truncate */
9535                      {
9536                         if (acparray[j]->starts[k] < acparray[i]->starts[k])
9537                         {
9538                            newsap = AlnMgr2GetSubAlign(acparray[j]->sap, acparray[j]->starts[k], acparray[i]->starts[k]-1, k+1, TRUE);
9539                            SeqAlignFree(acparray[j]->sap);
9540                            acparray[j]->sap = newsap;
9541                            AlnMgr2IndexSingleChildSeqAlign(newsap);
9542                            acparray[j]->starts[k] = acparray[i]->stops[k]+1;
9543                         } else
9544                            acparray[j]->used = -1;
9545                      }
9546                   }
9547                } else
9548                   acparray[j]->used = -1;
9549             }
9550          }
9551       }
9552    }
9553    /* now free all the unused ones, stick the rest back together, reindex, and return */
9554    salp_head = salp_prev = NULL;
9555    for (i=0; i<numsaps; i++)
9556    {
9557       if (acparray[i]->used == -1)
9558       {
9559          SeqAlignFree(acparray[i]->sap);
9560          acparray[i]->sap = NULL;
9561       } else
9562       {
9563          if (salp_head != NULL)
9564          {
9565             salp_prev->next = acparray[i]->sap;
9566             salp_prev = acparray[i]->sap;
9567             salp_prev->next = NULL;
9568          } else
9569          {
9570             salp_head = salp_prev = acparray[i]->sap;
9571             salp_prev->next = NULL;
9572          }
9573       }
9574    }
9575    amconssetfree(acp_head);
9576    MemFree(acparray);
9577    sap_head->segs = (Pointer)(salp_head);
9578    AMAlignIndex2Free2(sap_head->saip);
9579    AlnMgr2IndexLite(sap_head);
9580 }
AlnMgr2CompareByScore(VoidPtr ptr1,VoidPtr ptr2)9582 static int LIBCALLBACK AlnMgr2CompareByScore(VoidPtr ptr1, VoidPtr ptr2)
9583 {
9584    SAIndex2Ptr  saip1;
9585    SAIndex2Ptr  saip2;
9586    SeqAlignPtr  sap1;
9587    SeqAlignPtr  sap2;
9589    if (ptr1 == NULL || ptr2 == NULL)
9590       return 0;
9591    sap1 = *((SeqAlignPtr PNTR) ptr1);
9592    sap2 = *((SeqAlignPtr PNTR) ptr2);
9593    saip1 = (SAIndex2Ptr)(sap1->saip);
9594    saip2 = (SAIndex2Ptr)(sap2->saip);
9595    if (saip1->score == 0)
9596       saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
9597    if (saip2->score == 0)
9598       saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
9599    if (saip1->score > saip2->score)
9600       return -1;
9601    if (saip1->score < saip2->score)
9602       return 1;
9603    return 0;
9604 }
9606 /***************************************************************************
9607 *
9608 *  AlnMgr2FuseSet takes a set of alignments sharing all their rows and orders
9609 *  the alignments, then fuses together any adjacent alignments. If returnall
9610 *  is TRUE, all pieces are returned; if not, then only the largest piece is
9611 *  returned. This function will work best when called after
9612 *  AlnMgr2RemoveInconsistentAlnsFromSet(sap_head, -1).
9613 *
9614 ***************************************************************************/
AlnMgr2FuseSet(SeqAlignPtr sap_head,Boolean returnall)9615 NLM_EXTERN SeqAlignPtr AlnMgr2FuseSet(SeqAlignPtr sap_head, Boolean returnall)
9616 {
9617    AMAlignIndex2Ptr  amaip;
9618    DenseSegPtr       dsp_new;
9619    DenseSegPtr       dsp1;
9620    DenseSegPtr       dsp2;
9621    Boolean           found;
9622    Int4              i;
9623    Int4              n;
9624    Int4              numrows;
9625    Int4              r;
9626    SeqAlignPtr       sap_keep;
9627    SeqAlignPtr       sap_keep_head;
9628    SeqAlignPtr       sap_keep_prev;
9629    SAIndex2Ptr       saip;
9630    SeqAlignPtr       PNTR saparray;
9631    Int4              start1;
9632    Int4              start2;
9633    Int4              stop1;
9634    Int4              stop2;
9635    Uint1             strand;
9637    if (sap_head == NULL || sap_head->saip == NULL)
9638       return NULL;
9639    AlnMgr2SortAlnSetByNthRowPos(sap_head, 1);
9640    amaip = (AMAlignIndex2Ptr)(sap_head->saip);
9641    sap_keep = amaip->saps[0];
9642    sap_keep_head = sap_keep_prev = NULL;
9643    numrows = AlnMgr2GetNumRows(sap_keep);
9644    for (i=1; i<amaip->numsaps; i++)
9645    {
9646       /* check for consistency with sap_keep; fuse if possible */
9647       found = FALSE;
9648       for (n=0; !found && n<numrows; n++)
9649       {
9650          strand = AlnMgr2GetNthStrand(sap_keep, n+1);
9651          AlnMgr2GetNthSeqRangeInSA(sap_keep, n+1, &start1, &stop1);
9652          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n+1, &start2, &stop2);
9653          if (strand == Seq_strand_minus)
9654          {
9655             if (stop2+1 != start1)
9656                found = TRUE;
9657          } else
9658          {
9659             if (start2 != stop1+1)
9660                found = TRUE;
9661          }
9662       }
9663       if (!found) /* fuse together */
9664       {
9665          dsp1 = (DenseSegPtr)(sap_keep->segs);
9666          dsp2 = (DenseSegPtr)(amaip->saps[i]->segs);
9667          dsp_new = DenseSegNew();
9668          dsp_new->dim = dsp1->dim;
9669          dsp_new->numseg = dsp1->numseg+dsp2->numseg;
9670          dsp_new->starts = (Int4Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9671          dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9672          dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9673          for (n=0; n<dsp_new->numseg; n++)
9674          {
9675             for (r=0; r<dsp_new->dim; r++)
9676             {
9677                if (n >= dsp1->numseg)
9678                   dsp_new->starts[r*n*r] = dsp2->starts[r*(n-dsp1->numseg)+r];
9679                else
9680                   dsp_new->starts[r*n+r] = dsp1->starts[r*n+r];
9681                dsp_new->strands[r*n*r] = dsp1->strands[r];
9682             }
9683             if (n >= dsp1->numseg)
9684                dsp_new->lens[n] = dsp2->lens[n-dsp1->numseg];
9685             else
9686                dsp_new->lens[n] = dsp1->lens[n];
9687          }
9688          SeqAlignFree(amaip->saps[i]);
9689          amaip->saps[i] = NULL;
9690       } else /* add next alignment to keepers pile */
9691       {
9692          if (sap_keep_head == NULL)
9693          {
9694             if (sap_keep != NULL)
9695             {
9696                sap_keep_head = sap_keep;
9697                sap_keep->next = amaip->saps[i];
9698                sap_keep_prev = amaip->saps[i];
9699             } else
9700                sap_keep_head = sap_keep_prev = amaip->saps[i];
9701          } else
9702          {
9703             sap_keep_prev->next = amaip->saps[i];
9704             sap_keep_prev = amaip->saps[i];
9705          }
9706       }
9707    }
9708    if (sap_keep_head == NULL || sap_keep_head->next == NULL) /* everything was fused */
9709       sap_keep_head = sap_keep;
9710    if (returnall)
9711    {
9712       sap_head->segs = (Pointer)(sap_keep_head);
9713       return sap_keep_head;
9714    }
9715    i=0;
9716    sap_keep = sap_keep_head;
9717    while (sap_keep != NULL)
9718    {
9719       sap_keep = sap_keep->next;
9720       i++;
9721    }
9722    saparray = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
9723    i = 0;
9724    sap_keep = sap_keep_head;
9725    while (sap_keep != NULL)
9726    {
9727       saip = (SAIndex2Ptr)(sap_keep->saip);
9728       saip->score = 0;
9729       saparray[i] = sap_keep;
9730       i++;
9731       sap_keep = sap_keep->next;
9732    }
9733    HeapSort(saparray, i, sizeof(SeqAlignPtr), AlnMgr2CompareByScore);
9734    sap_keep = saparray[0];
9735    for (n=1; n<i; n++)
9736    {
9737       SeqAlignFree(saparray[n]);
9738    }
9739    MemFree(saparray);
9740    return sap_keep;
9741 }
AlnMgr2FillInUnaligned(SeqAlignPtr sap)9743 NLM_EXTERN void AlnMgr2FillInUnaligned(SeqAlignPtr sap)
9744 {
9745    Int4         curr;
9746    DenseSegPtr  dsp;
9747    DenseSegPtr  dsp_new;
9748    Boolean      found;
9749    Int4         i;
9750    Int4         j;
9751    Int4         k;
9752    Int4         last;
9753    Int4         n;
9754    Int4         offset;
9755    Int4         start;
9756    Int4         stop;
9757    Uint1        strand;
9759    if (sap == NULL || (sap->saip != NULL && sap->saip->indextype != INDEX_CHILD))
9760       return;
9761    n = 0;
9762    dsp = (DenseSegPtr)(sap->segs);
9763    for (i=0; i<dsp->dim; i++)
9764    {
9765       j = 0;
9766       AlnMgr2GetNthSeqRangeInSA(sap, i, &start, &stop);
9767       strand = dsp->strands[i];
9768       last = -1;
9769       while (j<dsp->numseg-1)
9770       {
9771          if (strand == Seq_strand_minus)
9772          {
9773             if (last != -1)
9774             {
9775                found = FALSE;
9776                while (j<dsp->numseg && !found)
9777                {
9778                   if (dsp->starts[j*dsp->dim+i] != -1)
9779                   {
9780                      if (dsp->starts[j*dsp->dim+i]+dsp->lens[j] != last)
9781                         n++;
9782                      found = TRUE;
9783                   }
9784                   if (!found)
9785                      j++;
9786                }
9787             } else
9788                last = dsp->starts[j*dsp->dim+i];
9789          } else
9790          {
9791             if (last != -1)
9792             {
9793                found = FALSE;
9794                while (j<dsp->numseg && !found)
9795                {
9796                   if (dsp->starts[j*dsp->dim+i] != -1)
9797                   {
9798                      if (dsp->starts[j*dsp->dim+i]+dsp->lens[j] != last)
9799                         n++;
9800                      found = TRUE;
9801                   }
9802                   if (!found)
9803                      j++;
9804                }
9805             } else
9806             {
9807                last = dsp->starts[j*dsp->dim+i];
9808                if (last != -1)
9809                   last += dsp->lens[j];
9810             }
9811          }
9812       }
9813    }
9814    if (n == 0) /* no unaligned regions */
9815       return;
9816    dsp_new = DenseSegNew();
9817    dsp_new->numseg = dsp->numseg + n;
9818    dsp_new->dim = dsp->dim;
9819    dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
9820    dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
9821    for (i=0; i<dsp_new->numseg; i++)
9822    {
9823       for (j=0; j<dsp_new->dim; j++)
9824       {
9825          dsp_new->strands[i*dsp_new->dim+j] = dsp->strands[j];
9826       }
9827    }
9828    dsp_new->ids = SeqIdDupList(dsp->ids);
9829    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9830    curr = 0;
9831    for (j=0; j<dsp->numseg; j++)
9832    {
9833       for (i=0; i<dsp->dim; i++)
9834       {
9835          offset = 0;
9836          strand = dsp->strands[i];
9837          if (dsp->starts[j*dsp->dim+i] == -1)
9838             dsp_new->starts[curr*dsp_new->dim+i] = -1;
9839          else
9840          {
9841             k = j+1;
9842             found = FALSE;
9843             while (k < dsp->numseg)
9844             {
9845                if (dsp->starts[k*dsp->dim+i] != -1)
9846                {
9847                   found = TRUE;
9848                   if (strand == Seq_strand_minus)
9849                   {
9850                      if (dsp->starts[k*dsp->dim+i] + dsp->lens[k] != dsp->starts[j*dsp->dim+i])
9851                      {
9852                         dsp_new->lens[curr+offset] = dsp->starts[j*dsp->dim+i] - dsp->starts[k*dsp->dim+i] - dsp->lens[k];
9853                         dsp_new->starts[(curr+offset)*dsp->dim+i] = dsp->starts[k*dsp->dim+i] + dsp->lens[k];
9854                         offset++;
9855                      }
9856                   } else
9857                   {
9858                      if (dsp->starts[j*dsp->dim+i] + dsp->lens[j] != dsp->starts[k*dsp->dim+i])
9859                      {
9860                         dsp_new->lens[curr+offset] = dsp->starts[k*dsp->dim+i] - dsp->starts[j*dsp->dim+i] - dsp->lens[j];
9861                         dsp_new->starts[(curr+offset)*dsp->dim+i] = dsp->starts[j*dsp->dim+i] + dsp->lens[j];
9862                      }
9863                   }
9864                }
9865                k++;
9866             }
9867          }
9868       }
9869       curr = curr + 1 + offset;
9870    }
9871    DenseSegFree(dsp);
9872    sap->segs = (Pointer)(dsp_new);
9873    AMAlignIndexFreeEitherIndex(sap);
9874 }
9876 /* SECTION 11 -- functions for std-segs */
AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap,Int4 n)9877 NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap, Int4 n)
9878 {
9879    SeqLocPtr  slp;
9880    StdSegPtr  ssp;
9882    if (sap == NULL || sap->segtype != SAS_STD)
9883       return NULL;
9884    ssp = (StdSegPtr)(sap->segs);
9885    slp = ssp->loc;
9886    n--;
9887    while (n > 0)
9888    {
9889       if (slp == NULL)
9890          return NULL;
9891       slp = slp->next;
9892       n--;
9893    }
9894    return (SeqIdDup(SeqLocId(slp)));
9895 }
AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap,SeqIdPtr sip)9897 NLM_EXTERN Int4 AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap, SeqIdPtr sip)
9898 {
9899    Int4       i;
9900    SeqIdPtr   sip_tmp;
9901    StdSegPtr  ssp;
9903    if (sap == NULL || sap->segtype != SAS_STD)
9904       return -1;
9905    ssp = (StdSegPtr)(sap->segs);
9906    sip_tmp = ssp->ids;
9907    i = 1;
9908    while (sip_tmp != NULL)
9909    {
9910       if (SeqIdComp(sip, sip_tmp) == SIC_YES)
9911          return i;
9912       sip_tmp = sip_tmp->next;
9913       i++;
9914    }
9915    return -1;
9916 }
AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)9918 NLM_EXTERN void AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
9919 {
9920    SeqLocPtr  slp;
9921    StdSegPtr  ssp;
9923    if (start != NULL)
9924       *start = -1;
9925    if (stop != NULL)
9926       *stop = -1;
9927    if (sap == NULL || sap->segtype != SAS_STD)
9928       return;
9929    ssp = (StdSegPtr)(sap->segs);
9930    slp = ssp->loc;
9931    n--;
9932    while (n > 0)
9933    {
9934       if (slp == NULL)
9935          return;
9936       slp = slp->next;
9937       n--;
9938    }
9939    if (slp == NULL)
9940       return;
9941    if (start != NULL)
9942       *start = SeqLocStart(slp);
9943    if (stop != NULL)
9944       *stop = SeqLocStop(slp);
9945 }
9948 /***************************************************************************
9949 *
9950 *   AlnMgr2GetSeqRangeForSipInSAStdSeg  returns the smallest and largest sequence
9951 *  coordinates in in a Std-Seg seqalign for a given Sequence Id.  Also return the
9952 *  strand type.  Either start, stop or strand can be NULL to only retrieve some of them.
9953 *  If start and stop are -1, there is an error (not a std-seg), the SeqID does not participate in this
9954 *  alignment or the alignment is one big insert on that id.  Returns true if the sip was found
9955 *  in the alignment with real coordinates, i.e. *start would not be -1.  RANGE
9956 *
9957 ***************************************************************************/
AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap,SeqIdPtr sip,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand)9958 NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
9959 {
9960     Int4        c_start, c_stop;
9961     Uint1       c_strand;
9962     StdSegPtr   ssp;
9963     Boolean     range_found = FALSE;
9964     Boolean     strands_inconsistent = FALSE;
9966     if (start) *start = -1;
9967     if (stop)  *stop  = -1;
9968     if (strand) *strand = Seq_strand_unknown;
9970     if (sap->segtype != SAS_STD)
9971         return FALSE;
9973     ssp = (StdSegPtr)(sap->segs);
9974     while (ssp) {
9975         if (AlnMgr2GetSeqRangeForSipInStdSeg(ssp, sip, &c_start, &c_stop, &c_strand, NULL) &&
9976             c_start != -1) /* skip inserts on our bioseq */
9977         {
9978              range_found = TRUE;
9980             if (start) {
9981                 if (*start == -1) {
9982                     *start = c_start;
9983                 } else {
9984                     *start = MIN(*start, c_start);
9985                 }
9986             }
9987             if (stop) {
9988                 *stop = MAX(*stop, c_stop);
9989             }
9990             if (strand && ! strands_inconsistent) {
9991             /* if strands are different each time, ignore them. */
9992                 if (*strand != Seq_strand_unknown && *strand != c_strand) {
9993                     *strand = Seq_strand_unknown;
9994                     strands_inconsistent = TRUE;
9995                 } else {
9996                     *strand = c_strand;
9997                 }
9998             }
9999         }
10000         ssp = ssp->next;
10001     }
10002     return range_found;
10003 }
10006 /***************************************************************************
10007 *
10008 *   AlnMgr2GetSeqRangeForSipInStdSeg  returns the start and stop sequence
10009 *  coordinates in a Std-Segment for a given Sequence Id.  Also return the
10010 *  strand type.  Either start, stop or strand can be NULL to only retrieve some of them.
10011 *  If start and stop are -1, the SeqID was not found in this segment.
10012 *  Returns true if the sip was found, even if it is a gap (start, stop = -1).  RANGE
10013 *
10014 ***************************************************************************/
AlnMgr2GetSeqRangeForSipInStdSeg(StdSegPtr ssp,SeqIdPtr sip,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand,Uint1Ptr segType)10015 NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInStdSeg(
10016     StdSegPtr   ssp,
10017     SeqIdPtr    sip,
10018     Int4Ptr     start,
10019     Int4Ptr     stop,
10020     Uint1Ptr    strand,
10021     Uint1Ptr    segType) /* AM_SEQ, AM_GAP, AM_INSERT */
10022 {
10023     SeqLocPtr   loc;
10024     Uint1       m_strand;
10025     Int4        m_start, m_stop, m_swap;
10026     Boolean     s_present = FALSE;
10027     Boolean     m_present = FALSE;
10028     Boolean     found_id = FALSE;
10030     for ( loc = ssp->loc;
10031           loc != NULL;
10032           loc = loc->next ) {
10033     /* One SeqLoc for each Sequence aligned by this segment. */
10034         /* find the one that matches the sip parameter. */
10035         if (SeqIdForSameBioseq(sip, SeqLocId(loc))) {
10036             m_strand = SeqLocStrand(loc);
10037             m_start  = SeqLocStart(loc);
10038             m_stop   = SeqLocStop(loc);
10039             /* Might have to reverse the order of start and stop on
10040                minus strands so that start is less than stop. */
10041             if (m_start > m_stop) {
10042               m_swap  = m_start;
10043               m_start = m_stop;
10044               m_stop = m_swap;
10045             }
10046             if (start)  *start  = m_start;
10047             if (stop)   *stop   = m_stop;
10048             if (strand) *strand = m_strand;
10049             if (m_start != -1)
10050                 m_present = TRUE;
10052             /* found our sequence in this segment. */
10053             found_id = TRUE;
10054         } else { /* a different sequence */
10055             if (SeqLocStart(loc) != -1)
10056                 s_present = TRUE;
10057         }
10058     }
10060     if (segType) {
10061         if (m_present && s_present)
10062             *segType = AM_SEQ;
10063         else if (!m_present && s_present)
10064             *segType = AM_INSERT;
10065         else if (m_present && !s_present)
10066             *segType = AM_GAP;
10067         else
10068             *segType = AM_GAP; /* start will be -1 */
10069     }
10070     return found_id;
10071 }
10074 /***************************************************************************
10075 *
10076 *   AlnMgr2GetNthStdSeg  returns the a pointer to the Nth segment of
10077 *   a standard segment alignment.  Numbering starts with 1.
10078 *   returns NULL if not n segments or is not a std-seg aligment.
10079 *   Useful to pass its return value to AlnMgr2GetSeqRangeForSipInStdSeg()
10080 *
10081 ***************************************************************************/
AlnMgr2GetNthStdSeg(SeqAlignPtr sap,Int2 n)10082 NLM_EXTERN StdSegPtr AlnMgr2GetNthStdSeg(SeqAlignPtr sap, Int2 n)
10083 {
10084     StdSegPtr   ssp;
10085 	Int2        i;
10087     if (sap == NULL || sap->segtype != SAS_STD || n < 1)
10088         return NULL;
10090     i = 1;
10091     ssp = (StdSegPtr)(sap->segs);
10092     while(ssp)
10093     {
10094         if (i == n)
10095             return ssp;
10096         ++i;
10097         ssp = ssp->next;
10098     }
10100     return NULL;
10101 }
10103 /***************************************************************************
10104 *
10105 *  AlnMgr2GetNumStdSegs returns the number of segments in a standar-seg alignment.
10106 *   returns -1 if sap is null or not a standard-seg alignment.
10107 *
10108 ***************************************************************************/
AlnMgr2GetNumStdSegs(SeqAlignPtr sap)10109 NLM_EXTERN Int4 AlnMgr2GetNumStdSegs(SeqAlignPtr sap)
10110 {
10111     Int4        seg_count = 0;
10112     StdSegPtr   ssp;
10114     if (sap == NULL || sap->segtype != SAS_STD)
10115         return -1;
10117     ssp = (StdSegPtr)(sap->segs);
10118 	while(ssp)
10119 	{
10120 		++seg_count;
10121 		ssp = ssp->next;
10122 	}
10123 	return seg_count;
10124 }
AlnMgr2GetLongestSeqLoc(SeqAlignPtr sap)10126 static SeqLocPtr AlnMgr2GetLongestSeqLoc(SeqAlignPtr sap)
10127 {
10128    Int4       longest;
10129    Int4       n;
10130    SeqLocPtr  slp;
10131    SeqLocPtr  slp_longest;
10132    StdSegPtr  ssp;
10134    if (sap == NULL || sap->segtype != SAS_STD)
10135       return NULL;
10136    longest = -1;
10137    ssp = (StdSegPtr)(sap->segs);
10138    slp = ssp->loc;
10139    while (slp != NULL)
10140    {
10141       n = SeqLocLen(slp);
10142       if (n > longest)
10143       {
10144          slp_longest = slp;
10145          longest = n;
10146       }
10147       slp = slp->next;
10148    }
10149    return slp_longest;
10150 }
10152 /***************************************************************************
10153 *
10154 *  The two mapping functions act a little differently for std-segs. The
10155 *  alignment coordinates are 1:1 linearly correlated with the longest
10156 *  seqloc in the set; the others may be significantly shorter.
10157 *  The mapping functions deal with % lengths, and map those instead of
10158 *  coordinates (which may not be linear);
10159 *
10160 ***************************************************************************/
AlnMgr2MapBioseqToSeqAlignStdSeg(SeqAlignPtr sap,Int4 n,Int4 pos)10161 NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlignStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos)
10162 {
10163    SeqLocPtr  slp;
10164    SeqLocPtr  slp_longest;
10165    StdSegPtr  ssp;
10166    Int4       start1;
10167    Int4       start2;
10168    Int4       stop1;
10169    Int4       stop2;
10171    if (sap == NULL || sap->segtype != SAS_STD)
10172       return -1;
10173    slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10174    start1 = SeqLocStart(slp_longest);
10175    stop1 = SeqLocStop(slp_longest);
10176    ssp = (StdSegPtr)(sap->segs);
10177    slp = ssp->loc;
10178    n--;
10179    while (n > 0)
10180    {
10181       if (slp == NULL)
10182          return -1;
10183       n--;
10184       slp = slp->next;
10185    }
10186    if (slp == NULL)
10187       return -1;
10188    start2 = SeqLocStart(slp);
10189    stop2 = SeqLocStop(slp);
10190    if (start2 == -1) /* NULL */
10191       return -1;
10192    return (((stop1-start1)*(pos - start2))/(stop2-start2));
10193 }
AlnMgr2MapSeqAlignToBioseqStdSeg(SeqAlignPtr sap,Int4 n,Int4 pos)10195 NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseqStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos)
10196 {
10197    SeqLocPtr  slp;
10198    SeqLocPtr  slp_longest;
10199    StdSegPtr  ssp;
10200    Int4       start1;
10201    Int4       start2;
10202    Int4       stop1;
10203    Int4       stop2;
10205    if (sap == NULL || sap->segtype != SAS_STD)
10206       return -1;
10207    slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10208    start1 = SeqLocStart(slp_longest);
10209    stop1 = SeqLocStop(slp_longest);
10210    ssp = (StdSegPtr)(sap->segs);
10211    slp = ssp->loc;
10212    n--;
10213    while (n > 0)
10214    {
10215       if (slp == NULL)
10216          return -1;
10217       n--;
10218       slp = slp->next;
10219    }
10220    if (slp == NULL)
10221       return -1;
10222    start2 = SeqLocStart(slp);
10223    stop2 = SeqLocStop(slp);
10224    if (start2 == -1)  /* NULL */
10225       return -1;
10226    return (start2 + ((stop2-start2)*(pos-start1))/(stop1-start1));
10227 }
AlnMgr2GetAlnLengthStdSeg(SeqAlignPtr sap)10229 NLM_EXTERN Int4 AlnMgr2GetAlnLengthStdSeg(SeqAlignPtr sap)
10230 {
10231    SeqLocPtr  slp_longest;
10233    if (sap == NULL || sap->segtype != SAS_STD)
10234       return -1;
10235    slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10236    return (SeqLocLen(slp_longest));
10237 }