1 /* ===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *            National Center for Biotechnology Information (NCBI)
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government do not place any restriction on its use or reproduction.
12 *  We would, however, appreciate having the NCBI and the author cited in
13 *  any work or product based on this material.
14 *
15 *  Although all reasonable efforts have been taken to ensure the accuracy
16 *  and reliability of the software and data, the NLM and the U.S.
17 *  Government do not and cannot warrant the performance or results that
18 *  may be obtained by using this software or data. The NLM and the U.S.
19 *  Government disclaim all warranties, express or implied, including
20 *  warranties of performance, merchantability or fitness for any particular
21 *  purpose.
22 *
23 * ===========================================================================
24 *
25 * File Name:  alignmgr2.c
26 *
27 * Author:  Sarah Wheelan
28 *
29 * Version Creation Date:  10/01
30 *
31 * $Revision: 6.66 $
32 *
33 * File Description: SeqAlign indexing, access, and manipulation functions
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * $Log: alignmgr2.c,v $
38 * Revision 6.66  2016/09/02 14:57:38  ucko
39 * Formally clean up calls to printf-family functions that are at least
40 * nominally unsafe, as already done in Debian/Ubuntu packages.
41 *
42 * Revision 6.65  2013/11/26 01:23:42  kans
43 * JIRA:GP-6623 AlnMgr2ConvertAllToDenseSeg bails specifically for Spliced-seg
44 *
45 * Revision 6.64  2013/11/26 00:15:42  kans
46 * JIRA:GP-5360 AlnMgr2ConvertAllToDenseSeg returns Boolean if not Dense-diag or Dense-seg to avoid crash on Spliced-seg
47 *
48 * Revision 6.63  2008/12/01 19:35:39  bollin
49 * prevent crash when mapping positions and row of alignment is entirely in the gapl.
50 *
51 * Revision 6.62  2007/03/09 20:37:06  bollin
52 * Fixed insidious double-increment bug in AlnMgr2MergeTwoAlignments - if the
53 * second alignment to be merged had more than one segment, the seg index was
54 * incremented past the number of segments (and some segments were not initialized).
55 *
56 * Revision 6.61  2007/01/09 14:13:52  bollin
57 * Fixed bug in AlnMgr2ExtendToCoords - prior version was not extending on 5' end.
58 *
59 * Revision 6.60  2006/09/06 15:48:33  bollin
60 * removed compiler warnings
61 *
62 * Revision 6.59  2006/09/06 15:14:54  bollin
63 * fixed bug that was generating segments of length zero at the end of an
64 * alignment
65 *
66 * Revision 6.58  2005/03/01 13:56:03  bollin
67 * if the alignment we want to index is a DenseSeg and not a list of alignments,
68 * just give it a simple index - don't decompose to pairwise and reconstruct it.
69 *
70 * Revision 6.57  2005/02/23 14:40:55  bollin
71 * when condensing columns in AlnMgr2CondenseColumns, make sure we do not
72 * disturb the ascending order of starts for each row
73 *
74 * Revision 6.56  2004/09/15 14:59:19  bollin
75 * make sure we do not read outside the alignment index arrays
76 *
77 * Revision 6.55  2004/05/20 19:46:25  bollin
78 * removed unused variables
79 *
80 * Revision 6.54  2004/05/11 13:19:49  bollin
81 * update the dimension of the shared alignment after adding a sequence.
82 *
83 * Revision 6.53  2004/04/13 14:43:07  kskatz
84 * Final resolution of revisions 6.51 and 6.52: reverted 6.52; then  cleaned up readability of AlnMgr2SeqPortRead() and ensured that it will never call SeqPortRead for a length > AM_SEQPORTSIZE
85 *
86 * Revision 6.52  2004/04/12 19:52:15  kskatz
87 * Revision 6.51 was right neighborhood,wrong off-by-one: It was in AlnMgr2ComputeFreqMatrix() call to AlnMgr2SeqPortRead() when using l+AM_SEQPORTSIZE instead of l+AM_SEQPORTSIZE-1
88 *
89 * Revision 6.51  2004/04/12 17:00:44  kskatz
90 * Fixed off-by-one error in AlnMgr2SeqPortRead() length passed to SeqPortRead(); stop-start+1 changed to stop-start
91 *
92 * Revision 6.50  2004/03/11 14:15:41  bollin
93 * added extra check in AlnMgr2GetNthSeqIdPtr to avoid core dump if there are
94 * fewer than N SeqIDs in the alignment.
95 *
96 * Revision 6.49  2003/10/20 17:54:34  kans
97 * AlnMgr2ComputeFreqMatrix protect against dereferencing NULL bsp
98 *
99 * Revision 6.48  2003/10/09 13:46:52  rsmith
100 * Add AlnMgr2GetFirstNForSipList.
101 *
102 * Revision 6.47  2003/05/15 18:53:10  rsmith
103 * in AlnMgr2GetSeqRangeForSipInStdSeg always return start & stop in coordinate order. Do not assume what minus strand will do or not.
104 *
105 * Revision 6.46  2003/04/24 20:28:48  rsmith
106 * made AlnMgr2GetNthStdSeg use 1 based numbering like the other Nth functions.
107 *
108 * Revision 6.45  2003/04/23 20:36:13  rsmith
109 * Added four functions in Section 11 to get information about Std-Seg alignments.
110 *
111 * Revision 6.44  2003/03/31 20:17:11  todorov
112 * Added AlnMgr2IndexSeqAlignEx
113 *
114 * Revision 6.43  2003/02/03 12:36:22  kans
115 * AlnMgr2ComputeScoreForSeqAlign checks return value of AlnMgr2ComputeFreqMatrix, returns -1 if NULL to avoid dereference crash
116 *
117 * Revision 6.42  2002/10/23 16:32:19  todorov
118 * CondenseColumns fixed: needed to move the lens too.
119 *
120 * Revision 6.40  2002/10/16 15:54:28  todorov
121 * use the default dim value if not set
122 *
123 * Revision 6.39  2002/08/07 21:57:33  kans
124 * added AlignMgr2GetFirstNForStdSeg
125 *
126 * Revision 6.38  2002/07/11 14:35:51  kans
127 * fixed Mac complaints about prototypes
128 *
129 * Revision 6.37  2002/07/11 12:55:38  wheelan
130 * added support for std-seg alignments
131 *
132 * Revision 6.36  2002/06/04 17:43:07  todorov
133 * 1) Substituted AddInNewSA with a new and optimized AddInNewPairwiseSA function.
134 * 2) Fixed a few bugs in other functions.
135 *
136 * Revision 6.35  2002/05/17 15:04:42  wheelan
137 * bug fix in ExtendToCoords
138 *
139 * Revision 6.34  2002/05/17 11:02:36  wheelan
140 * bug fixes in Merge func
141 *
142 * Revision 6.32  2002/03/04 17:19:18  wheelan
143 * added AlnMgr2FuseSet, changed behavior of RemoveInconsistent, fixed GetNextAlnBitBugs
144 *
145 * Revision 6.31  2002/01/31 17:41:47  wheelan
146 * various bug fixes -- no more 0 len segments, better handling of rows that are one big insert, etc.
147 *
148 * Revision 6.30  2002/01/30 19:12:53  wheelan
149 * added RemoveInconsistentAlnsFromSet, ExtractPairwiseSeqAlign, changed behavior of GetSubAlign, changed structures and behavior of GetNextAlnBit, added GetInterruptInfo, added AlnMgr2IndexAsRows, bug fixes in indexing routines
150 *
151 * Revision 6.29  2002/01/02 15:05:07  wheelan
152 * changes to force more efficient ordering in CompareAsp callbacks, plus more stringent checks in AlnMgr2AddInNewSA
153 *
154 * Revision 6.28  2001/12/28 22:53:20  wheelan
155 * bug fixes; added AlnMgr2DupAlnAndIndexes, changed some New and Free funcs
156 *
157 * Revision 6.27  2001/12/27 16:07:22  wheelan
158 * bug fix in ExtendToEnd
159 *
160 * Revision 6.26  2001/12/20 19:43:20  wheelan
161 * bug fix in GetNextAlnBit -- no more incorrect inserts
162 *
163 * Revision 6.25  2001/12/18 16:36:57  wheelan
164 * scattered fixes to unaligned region code
165 *
166 * Revision 6.24  2001/12/17 19:36:39  wheelan
167 * various fixes in AlnMgr2AddInNewSA
168 *
169 * Revision 6.23  2001/12/14 12:38:50  wheelan
170 * added functions for ddv
171 *
172 * Revision 6.22  2001/12/05 12:25:49  wheelan
173 * bug fix in SortByNthRow
174 *
175 * Revision 6.21  2001/12/04 19:28:55  wheelan
176 * bug fixes in AddInNewSA and in IndexSingleDenseSegSA
177 *
178 * Revision 6.20  2001/12/04 14:31:27  wheelan
179 * fixes to avoid mistakenly processing AM2_LITE as real indexed alignments
180 *
181 * Revision 6.19  2001/11/30 16:55:21  wheelan
182 * added AlnMgr2PadConservatively
183 *
184 * Revision 6.18  2001/11/29 18:38:47  wheelan
185 * cleanup as recommended by Mac compiler
186 *
187 * Revision 6.17  2001/11/29 17:37:16  wheelan
188 * added ExtendToCoords and MergeTwoAlignments
189 *
190 * Revision 6.16  2001/11/27 15:47:40  wheelan
191 * bug fixes in AnchorSeqAlign, DoCondense, and AddInNewSA
192 *
193 * Revision 6.15  2001/11/15 18:23:06  wheelan
194 * small change in AlnMgr2GetNthRowSpan
195 *
196 * Revision 6.14  2001/11/15 18:09:38  wheelan
197 * another bug fix in AddInNewSA
198 *
199 * Revision 6.13  2001/11/15 15:30:54  wheelan
200 * many bugs fixed, leaks plugged, plus reworked AddInNewSA to use new AMSmall field
201 *
202 * Revision 6.12  2001/11/13 14:36:13  wheelan
203 * many bug fixes in AddInNewSA and MapBioseqToSeqAlign
204 *
205 * Revision 6.11  2001/11/08 19:56:07  wheelan
206 * added AlnMgr2GetNthRowSpanInSA, fixed various memory errors
207 *
208 * Revision 6.10  2001/11/08 01:39:15  wheelan
209 * many bug fixes in and around AddInNewSA
210 *
211 * Revision 6.9  2001/11/02 14:01:30  wheelan
212 * bug fixes in AlnMgr2AddInNewSA
213 *
214 * Revision 6.8  2001/10/31 12:00:46  wheelan
215 * commented out the mistakenly uncommented comment
216 *
217 * Revision 6.7  2001/10/30 20:14:38  wheelan
218 * bug fixes for minus strands in AddInNewSA, bug fix in GetSubAlign
219 *
220 * Revision 6.6  2001/10/23 12:14:27  wheelan
221 * changes in AlnMgr2GetNextAlnBit as well as tree-based multiple alignment algorithm
222 *
223 * Revision 6.5  2001/10/18 15:10:53  wheelan
224 * fixed AlnMgr2ComputeScoreForSeqAlign
225 *
226 * Revision 6.4  2001/10/16 12:00:17  wheelan
227 * added GetParent and FreeEitherIndex
228 *
229 * Revision 6.3  2001/10/08 18:43:29  wheelan
230 * added comments
231 *
232 * Revision 6.2  2001/10/03 18:13:01  wheelan
233 * changed some colliding defines
234 *
235 * Revision 6.1  2001/10/03 14:20:11  wheelan
236 * initial checkin
237 *
238 * ==========================================================================
239 *
240 */
241 
242 #include <alignmgr2.h>
243 
244 /***************************************************************************
245 *
246 *  static functions
247 *
248 ***************************************************************************/
249 /* SECTION 1 */
250 static SARowDat2Ptr SARowDat2New(void);
251 static void SARowDat2Free(SARowDat2Ptr srdp);
252 static SARowDat2Ptr SARowDat2Copy(SARowDat2Ptr srdp);
253 static SAIndex2Ptr SAIndex2New(void);
254 static SAIndex2Ptr SAIndex2Copy(VoidPtr index);
255 static AMAlignIndex2Ptr AMAlignIndex2Copy(VoidPtr index);
256 static void AMIntervalSetFree(AMIntervalSetPtr amint);
257 /* SECTION 2 */
258 static void AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap);
259 static void AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap);
260 static Boolean AlnMgr2UnpackSeqAlign(SeqAlignPtr sap);
261 static Boolean AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap);
262 static void AlnMgr2DecomposeToPairwise(SeqAlignPtr sap);
263 static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap);
264 static void AlnMgr2SortBySeqId(SeqAlignPtr sap);
265 static int LIBCALLBACK AlnMgr2CompareIds(VoidPtr ptr1, VoidPtr ptr2);
266 static void AlnMgr2TossWorse(SeqAlignPtr sap, Int4 i, Int4 j);
267 static AMIntervalSetPtr AlnMgr2MakeIntervals(SeqAlignPtr sap);
268 static void AlnMgr2SortIntervals(AMIntervalSetPtr amint);
269 static int LIBCALLBACK AlnMgr2CompareIntervals(VoidPtr ptr1, VoidPtr ptr2);
270 static AMVertexPtr PNTR  AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap, AMIntervalSetPtr amint_head, AMVertexPtr PNTR vertexhead, AMEdgePtr PNTR edgehead, Int4Ptr numvertices);
271 static void AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray, Int4 numvertices);
272 static int LIBCALLBACK AlnMgr2CompareVertices(VoidPtr ptr1, VoidPtr ptr2);
273 static void AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head);
274 static int LIBCALLBACK AlnMgr2CompareEdges(VoidPtr ptr1, VoidPtr ptr2);
275 static Int4 AlnMgr2MatchToVertex(SeqIdPtr sip, Int4 start, Int4 stop, AMVertexPtr PNTR vertexarray, Int4 numvertices);
276 static void AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head);
277 static void AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head);
278 static AMVertexPtr AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray, AMEdgePtr edge);
279 static AMEdgePtr AlnMgr2GetEdgeList(Int4 vertexnum, AMEdgePtr edge_head, AMEdgePtr already_used);
280 static void AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head);
281 static Boolean AlnMgr2SameSeq(AMVertexPtr vertex1, AMVertexPtr vertex2);
282 static void AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head, SeqAlignPtr sap);
283 static AMVertexPtr AlnMgr2GetAdjacentVertices(AMVertexPtr vertex, AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head);
284 static void AlnMgr2AddInNewSA(SeqAlignPtr parent, SeqAlignPtr sap);
285 static void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap);
286 static Int4 AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2, Int4 len);
287 static Int4 AlnMgr2GetSegForStartPos(SeqAlignPtr sap, Int4 pos, Int4 row);
288 static void AlnMgr2CondenseColumns(DenseSegPtr dsp);
289 static void AlnMgr2CondenseRows(DenseSegPtr dsp, Int4 whichrow);
290 static Boolean AlnMgr2DoCondense(DenseSegPtr dsp, Int4 rownum1, Int4 rownum2);
291 static int LIBCALLBACK AlnMgr2CompareCdRows(VoidPtr ptr1, VoidPtr ptr2);
292 static int LIBCALLBACK AlnMgr2CompareAsps(VoidPtr ptr1, VoidPtr ptr2);
293 static int LIBCALLBACK AlnMgr2CompareAspsMinus(VoidPtr ptr1, VoidPtr ptr2);
294 static void AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1, SeqAlignPtr sap2, Int4Ptr n1, Int4Ptr n2);
295 static SeqIdPtr AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1, SeqIdPtr sip2);
296 static Int4 AlnMgr2OrderSeqIds(SeqIdPtr sip1, SeqIdPtr sip2);
297 static void AlnMgr2SetUnaln(SeqAlignPtr sap);
298 static int LIBCALLBACK AlnMgr2CompareUnalnAMS(VoidPtr ptr1, VoidPtr ptr2);
299 /* SECTION 4 */
300 static Int4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen);
301 static Int4 binary_search_on_uint2_list(Uint2Ptr list, Int4 ele, Uint2 listlen);
302 static void AlnMgr2GetUnalignedInfo(SeqAlignPtr sap, Int4 segment, Int4 row, Int4Ptr from, Int4Ptr to);
303 static void AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);
304 static Int4 AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap, Int4 seg);
305 /* SECTION 5 */
306 static void AlnMgr2AnchorChild(SeqAlignPtr sap, Int4 which_row);
307 /* SECTION 8 */
308 static Int4 AlnMgr2GetScoreForPair(Int4 res1, Int4 res2, Boolean is_prot);
309 /* SECTION 9 */
310 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2);
311 
312 
313 typedef struct am_seqpieceset AMSeqPieceSet, PNTR AMSeqPieceSetPtr;
314 typedef struct am_seqpiece AMSeqPiece, PNTR AMSeqPiecePtr;
315 
316 struct am_seqpiece {
317   Int4 beg;
318   Int4 end;
319   Int4 left;
320   Int4 right;
321   Int4 orig_left;
322   Int4 orig_right;
323   Boolean aligned;
324   Int4 seg;
325   Int4 pos;
326   DenseSegPtr alt_dsp;
327   Int4 alt_seg;
328   Int4 alt_pos;
329   AMSeqPiecePtr next;
330   AMSeqPiecePtr prev;
331   AMSeqPieceSetPtr set;
332 };
333 
334 struct am_seqpieceset {
335   AMSeqPiecePtr head;
336   AMSeqPiecePtr tail;
337   DenseSegPtr dsp;
338   DenseSegPtr alt_dsp;
339   Int4 row;
340   Int4 row2;
341   Int4 alt_row;
342   Int4 alt_row2;
343   Uint1 strand;
344   Boolean plus;
345   Int4 max_pos;
346   Boolean sign;
347   AMSeqPieceSetPtr next;
348 };
349 
350 
351 
352 /***************************************************************************
353 *
354 *  SECTION 1: Functions for allocating and freeing data structures used
355 *  by the alignment manager; copying functions are also here.
356 *
357 ***************************************************************************/
358 
359 /* SECTION 1 */
SARowDat2New(void)360 static SARowDat2Ptr SARowDat2New(void)
361 {
362    return (SARowDat2Ptr)MemNew(sizeof(SARowDat2));
363 }
364 
365 /* SECTION 1 */
SARowDat2Free(SARowDat2Ptr srdp)366 static void SARowDat2Free(SARowDat2Ptr srdp)
367 {
368    if (srdp == NULL)
369       return;
370    if (srdp->sect != NULL)
371       MemFree(srdp->sect);
372    if (srdp->unsect != NULL)
373       MemFree(srdp->unsect);
374    MemFree(srdp->insect);
375    MemFree(srdp->unaligned);
376    MemFree(srdp);
377 }
378 
379 /* SECTION 1 */
SARowDat2Copy(SARowDat2Ptr srdp)380 static SARowDat2Ptr SARowDat2Copy(SARowDat2Ptr srdp)
381 {
382    Int4          i;
383    SARowDat2Ptr  srdp2;
384 
385    if (srdp == NULL)
386       return NULL;
387    srdp2 = SARowDat2New();
388    srdp2->numsect = srdp->numsect;
389    srdp2->sect = (Uint2Ptr)MemNew(srdp2->numsect*sizeof(Uint2));
390    for (i=0; i<srdp2->numsect; i++)
391    {
392       srdp2->sect[i] = srdp->sect[i];
393    }
394    srdp2->numunsect = srdp->numunsect;
395    srdp2->unsect = (Uint2Ptr)MemNew(srdp2->numunsect*sizeof(Uint2));
396    for (i=0; i<srdp2->numunsect; i++)
397    {
398       srdp2->unsect[i] = srdp->unsect[i];
399    }
400    srdp2->numinsect = srdp->numinsect;
401    srdp2->insect = (Uint2Ptr)MemNew(srdp2->numinsect*sizeof(Uint2));
402    for (i=0; i<srdp2->numinsect; i++)
403    {
404       srdp2->insect[i] = srdp->insect[i];
405    }
406    srdp2->numunaln = srdp->numunaln;
407    srdp2->unaligned = (Uint2Ptr)MemNew(srdp2->numunaln*sizeof(Uint2));
408    for (i=0; i<srdp2->numunaln; i++)
409    {
410       srdp2->unaligned[i] = srdp->unaligned[i];
411    }
412    return srdp2;
413 }
414 
415 /* SECTION 1 */
SAIndex2New(void)416 static SAIndex2Ptr SAIndex2New(void)
417 {
418    SAIndex2Ptr  saip;
419 
420    saip = (SAIndex2Ptr)MemNew(sizeof(SAIndex2));
421    saip->indextype = INDEX_CHILD;
422    saip->freefunc = (SeqAlignIndexFreeFunc)(SAIndex2Free2);
423    saip->anchor = -1;
424    return saip;
425 }
426 
427 /* SECTION 1 */
SAIndex2Free2(VoidPtr index)428 NLM_EXTERN Boolean LIBCALLBACK SAIndex2Free2(VoidPtr index)
429 {
430    Int4        i;
431    SAIndex2Ptr  saip;
432 
433    if (index == NULL)
434       return TRUE;
435    saip = (SAIndex2Ptr)(index);
436    MemFree(saip->aligncoords);
437    for (i=0; i<saip->numrows; i++)
438    {
439       SARowDat2Free(saip->srdp[i]);
440    }
441    MemFree(saip->srdp);
442    MemFree(saip);
443    return TRUE;
444 }
445 
446 /* SECTION 1 */
AlnMgr2FreeInterruptInfo(AMInterrInfoPtr interr)447 NLM_EXTERN void AlnMgr2FreeInterruptInfo(AMInterrInfoPtr interr)
448 {
449    if (interr == NULL)
450       return;
451    MemFree(interr->starts);
452    MemFree(interr->lens);
453    MemFree(interr->types);
454    MemFree(interr);
455 }
456 
457 /* SECTION 1*/
SAIndex2Copy(VoidPtr index)458 static SAIndex2Ptr SAIndex2Copy(VoidPtr index)
459 {
460    Int4         i;
461    SAIndex2Ptr  saip;
462    SAIndex2Ptr  saip2;
463 
464    saip2 = SAIndex2New();
465    saip = (SAIndex2Ptr)(index);
466    saip2->numseg = saip->numseg;
467    saip2->aligncoords = (Uint4Ptr)MemNew(saip2->numseg*sizeof(Uint4));
468    for (i=0; i<saip2->numseg; i++)
469    {
470       saip2->aligncoords[i] = saip->aligncoords[i];
471    }
472    saip2->anchor = saip->anchor;
473    saip2->numrows = saip->numrows;
474    saip2->numseg = saip->numseg;
475    saip2->srdp = (SARowDat2Ptr PNTR)MemNew(saip2->numrows*sizeof(SARowDat2));
476    for (i=0; i<saip2->numrows; i++)
477    {
478       saip2->srdp[i] = SARowDat2Copy(saip->srdp[i]);
479    }
480    saip2->numunaln = saip->numunaln;
481    saip2->unaln = (Uint4Ptr)MemNew(saip2->numunaln*sizeof(Uint4));
482    for (i=0; i<saip2->numunaln; i++)
483    {
484       saip2->unaln[i] = saip->unaln[i];
485    }
486    saip2->numinchain = saip->numinchain;
487    saip2->numsplitaln = saip->numsplitaln;
488    saip2->score = saip->score;
489    saip2->aligned = saip->aligned;
490    return saip2;
491 }
492 
493 /* SECTION 1 */
AMAlignIndex2New(void)494 static AMAlignIndex2Ptr AMAlignIndex2New(void)
495 {
496    AMAlignIndex2Ptr  amaip;
497 
498    amaip = (AMAlignIndex2Ptr)MemNew(sizeof(AMAlignIndex2));
499    amaip->indextype = INDEX_PARENT;
500    amaip->freefunc = (SeqAlignIndexFreeFunc)(AMAlignIndex2Free2);
501    return amaip;
502 }
503 
504 /* SECTION 1 */
AMAlignIndex2Free2(VoidPtr index)505 NLM_EXTERN Boolean LIBCALLBACK AMAlignIndex2Free2(VoidPtr index)
506 {
507    AMAlignIndex2Ptr  amaip;
508    Int4              i;
509 
510    if (index == NULL)
511       return FALSE;
512    amaip = (AMAlignIndex2Ptr)(index);
513    for (i=0; i<amaip->numrows; i++)
514    {
515       SeqIdFree(amaip->ids[i]);
516    }
517    MemFree(amaip->ids);
518    MemFree(amaip->saps);
519    MemFree(amaip->aligned);
520    SeqAlignFree(amaip->sharedaln);
521    MemFree(amaip);
522    return TRUE;
523 }
524 
525 /* SECTION 1 */
AMAlignIndex2Copy(VoidPtr index)526 static AMAlignIndex2Ptr AMAlignIndex2Copy(VoidPtr index)
527 {
528    AMAlignIndex2Ptr  amaip;
529    AMAlignIndex2Ptr  amaip2;
530    Int4              i;
531 
532    if (index == NULL)
533       return NULL;
534    amaip = (AMAlignIndex2Ptr)(index);
535    amaip2 = AMAlignIndex2New();
536    amaip2->alnstyle = amaip->alnstyle;
537    amaip2->anchor = amaip->anchor;
538    amaip2->numrows = amaip->numrows;
539    amaip2->ids = (SeqIdPtr PNTR)MemNew(amaip2->numrows*sizeof(SeqIdPtr));
540    for (i=0; i<amaip2->numrows; i++)
541    {
542       amaip2->ids[i] = SeqIdDup(amaip->ids[i]);
543    }
544    amaip2->numsaps = amaip->numsaps;
545    amaip2->saps = (SeqAlignPtr PNTR)MemNew(amaip2->numsaps*sizeof(SeqAlignPtr));
546    amaip2->aligned = (Boolean PNTR)MemNew(amaip2->numsaps*sizeof(Boolean));
547    for (i=0; i<amaip2->numsaps; i++)
548    {
549       amaip2->saps[i] = SeqAlignDup(amaip->saps[i]);
550       amaip2->aligned[i] = amaip->aligned[i];
551       if (i>0)
552          amaip2->saps[i-1]->next = amaip2->saps[i];
553    }
554    amaip2->sharedaln = AlnMgr2DupAlnAndIndexes(amaip->sharedaln);
555    return amaip2;
556 }
557 
558 /* SECTION 1 */
AMAlignIndexFreeEitherIndex(SeqAlignPtr sap)559 NLM_EXTERN void AMAlignIndexFreeEitherIndex(SeqAlignPtr sap)
560 {
561    if (sap == NULL || sap->saip == NULL)
562       return;
563    if (sap->saip->indextype == INDEX_PARENT)
564       AMAlignIndex2Free2(sap->saip);
565    else
566       SAIndex2Free2(sap->saip);
567    sap->saip = NULL;
568 }
569 
570 /* SECTION 1 */
AlnMgr2DupAlnAndIndexes(SeqAlignPtr sap)571 NLM_EXTERN SeqAlignPtr AlnMgr2DupAlnAndIndexes(SeqAlignPtr sap)
572 {
573    AMAlignIndex2Ptr  amaip;
574    SAIndex2Ptr       saip;
575    SeqAlignPtr       sap_new;
576 
577    if (sap == NULL)
578       return NULL;
579    if (sap->saip == NULL)
580       return (SeqAlignDup(sap));
581    sap_new = NULL;
582    if (sap->saip->indextype == INDEX_CHILD)
583    {
584       sap_new = SeqAlignDup(sap);
585       sap_new->saip = (Pointer)SAIndex2Copy(sap->saip);
586       saip = (SAIndex2Ptr)(sap_new->saip);
587       saip->top = AlnMgr2GetParent(sap);
588    } else if (sap->saip->indextype == INDEX_PARENT)
589    {
590       sap_new = SeqAlignNew();
591       sap_new->type = sap->type;
592       sap_new->segtype = sap->segtype;
593       sap_new->saip = (Pointer)(AMAlignIndex2Copy(sap->saip));
594       amaip = (AMAlignIndex2Ptr)(sap_new->saip);
595       sap_new->segs = amaip->saps[0];
596    }
597    return sap_new;
598 }
599 
600 /* SECTION 1 */
AlnMsgNew2(void)601 NLM_EXTERN AlnMsg2Ptr AlnMsgNew2(void)
602 {
603    AlnMsg2Ptr  amp;
604 
605    amp = (AlnMsg2Ptr)MemNew(sizeof(AlnMsg2));
606    amp->real_from = -2;
607    return amp;
608 }
609 
610 /* SECTION 1 */
AlnMsgFree2(AlnMsg2Ptr amp)611 NLM_EXTERN AlnMsg2Ptr AlnMsgFree2(AlnMsg2Ptr amp)
612 {
613    if (amp->left_interrupt != NULL)
614    {
615       MemFree(amp->left_interrupt);
616       amp->left_interrupt = NULL;
617    }
618    if (amp->right_interrupt != NULL)
619    {
620       MemFree(amp->right_interrupt);
621       amp->right_interrupt = NULL;
622    }
623    MemFree(amp);
624    return NULL;
625 }
626 
627 /* SECTION 1 */
AlnMsgReNew2(AlnMsg2Ptr amp)628 NLM_EXTERN void AlnMsgReNew2(AlnMsg2Ptr amp)
629 {
630    if (amp == NULL)
631       return;
632    if (amp->left_interrupt != NULL)
633    {
634       MemFree(amp->left_interrupt);
635       amp->left_interrupt = NULL;
636    }
637    if (amp->right_interrupt != NULL)
638    {
639       MemFree(amp->right_interrupt);
640       amp->right_interrupt = NULL;
641    }
642    amp->real_from = -2;
643    amp->len = -2;
644    return;
645 }
646 
647 /* SECTION 1 */
AMIntervalSetFree(AMIntervalSetPtr amint)648 static void AMIntervalSetFree(AMIntervalSetPtr amint)
649 {
650    AMIntervalPtr  intv;
651    AMIntervalPtr  intv_next;
652 
653    intv = amint->int_head;
654    while (intv != NULL)
655    {
656       intv_next = intv->next;
657       MemFree(intv);
658       intv = intv_next;
659    }
660    SeqIdFree(amint->sip);
661    MemFree(amint);
662 }
663 
664 /* SECTION 1 */
AMFreqFree(AMFreqPtr afp)665 NLM_EXTERN void AMFreqFree(AMFreqPtr afp)
666 {
667    Int4  i;
668 
669    if (afp == NULL)
670       return;
671    for (i=0; i<afp->size; i++)
672    {
673       MemFree(afp->freq[i]);
674    }
675    MemFree(afp->freq);
676    MemFree(afp);
677 }
678 
679 /* SECTION 1 */
AMSeqPieceSetFree(AMSeqPieceSetPtr s_set)680 static void AMSeqPieceSetFree(AMSeqPieceSetPtr s_set)
681 {
682   AMSeqPieceSetPtr s_set_next;
683   AMSeqPiecePtr s, s_next;
684 
685   while (s_set) {
686     s = s_set->head;
687     while (s) {
688       s_next = s->next;
689       MemFree(s);
690       s = s_next;
691     }
692     s_set_next = s_set->next;
693     MemFree(s_set);
694     s_set = s_set_next;
695   }
696 }
697 
698 /***************************************************************************
699 *
700 *  SECTION 2: Functions used to create the indexes for parent and child
701 *  seqaligns.
702 *    SECTION 2a: Functions to create indexes for child seqaligns, and
703 *                to convert seqaligns to dense-seg type
704 *    SECTION 2b: Functions to unpack and rearrange complicated seqaligns
705 *                into simple chains of dense-seg and dense-diag types
706 *    SECTION 2c: Functions to create indexes for parent seqaligns
707 *    SECTION 2d: Accessory functions for parent indexing
708 *
709 ***************************************************************************/
710 
711 /***************************************************************************
712 *
713 *  AlnMgr2ConvertDendiagToDensegChain takes a dense-diag style alignment
714 *  and makes each diag into its own denseg seqalign, then links the new
715 *  alignments together.
716 *
717 ***************************************************************************/
718 /* SECTION 2a */
AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap)719 static void AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap)
720 {
721    DenseDiagPtr  ddp;
722    DenseDiagPtr  ddp_next;
723    DenseSegPtr   dsp;
724    Int4          i;
725    SeqAlignPtr   sap_new;
726    SeqAlignPtr   sap_next;
727    SeqAlignPtr   sap_prev;
728 
729    if (sap == NULL || sap->segtype != SAS_DENDIAG)
730       return;
731    sap_next = sap->next;
732    ddp = (DenseDiagPtr)(sap->segs);
733    /* convert the first diag to dense-seg and put it in the original alignment */
734    dsp = DenseSegNew();
735    dsp->ids = ddp->id;
736    ddp->id = NULL;
737    dsp->dim = ddp->dim;
738    dsp->numseg = 1;
739    dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Int4));
740    dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
741    dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
742    for (i=0; i<dsp->dim; i++)
743    {
744       dsp->starts[i] = ddp->starts[i];
745       if (ddp->strands != NULL)
746          dsp->strands[i] = ddp->strands[i];
747       else
748          dsp->strands[i] = Seq_strand_plus;
749    }
750    dsp->lens[0] = ddp->len;
751    sap->segs = (Pointer)(dsp);
752    sap->segtype = SAS_DENSEG;
753    ddp_next = ddp->next;
754    ddp->next = NULL;
755    DenseDiagFree(ddp);
756    ddp = ddp_next;
757    if (ddp == NULL)
758       return;
759    sap_prev = sap;
760    while (ddp)
761    {
762       sap_new = SeqAlignNew();
763       sap_new->type = SAT_PARTIAL;
764       sap_new->segtype = SAS_DENSEG;
765       sap_new->dim = ddp->dim;
766       dsp = DenseSegNew();
767       dsp->ids = ddp->id;
768       ddp->id = NULL;
769       dsp->dim = ddp->dim;
770       dsp->numseg = 1;
771       dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Int4));
772       dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
773       dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
774       for (i=0; i<dsp->dim; i++)
775       {
776          dsp->starts[i] = ddp->starts[i];
777          if (ddp->strands != NULL)
778             dsp->strands[i] = ddp->strands[i];
779          else
780             dsp->strands[i] = Seq_strand_plus;
781       }
782       dsp->lens[0] = ddp->len;
783       sap_new->segs = (Pointer)(dsp);
784       ddp_next = ddp->next;
785       ddp->next = NULL;
786       DenseDiagFree(ddp);
787       ddp = ddp_next;
788       sap_prev->next = sap_new;
789       sap_prev = sap_new;
790    }
791    sap_new->next = sap_next;
792 }
793 
794 /* SECTION 2a */
795 /***************************************************************************
796 *
797 *  AlnMgr2IndexSingleDenseSegSA creates the SAIndex2 structure for a given
798 *  dense-seg seqalign. This structure has binary-searchable indexes into
799 *  the segs. If the strands are not allocated, this function allocates
800 *  them and sets them to Seq_strand_plus.
801 *
802 ***************************************************************************/
AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap)803 static void AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap)
804 {
805    DenseSegPtr  dsp;
806    Int4         i;
807    Int4         j;
808    Int4         last;
809    Int4         next;
810    Int4         row;
811    SAIndex2Ptr   saip;
812    Boolean      unal;
813 
814    if (sap->segtype != SAS_DENSEG)
815       return;
816    dsp = (DenseSegPtr)(sap->segs);
817    if (dsp->strands == NULL)
818    {
819       dsp->strands = (Uint1Ptr)MemNew(dsp->dim*dsp->numseg*sizeof(Uint1));
820       for (i=0; i<dsp->dim*dsp->numseg; i++)
821       {
822          dsp->strands[i] = Seq_strand_plus;
823       }
824    }
825    saip = SAIndex2New();
826    saip->aligncoords = (Uint4Ptr)MemNew((dsp->numseg)*sizeof(Uint4));
827    saip->srdp = (SARowDat2Ptr PNTR)MemNew((dsp->dim)*sizeof(SARowDat2Ptr));
828    saip->numrows = dsp->dim;
829    saip->numseg = dsp->numseg;
830    for (i=0; i<dsp->dim; i++)
831    {
832       saip->srdp[i] = SARowDat2New();
833    }
834    for (i=0; i<dsp->numseg; i++)
835    {
836       if (i != 0)
837          saip->aligncoords[i] = saip->aligncoords[i-1] + dsp->lens[i-1];
838       for (row=0; row<dsp->dim; row++)
839       {
840          if (dsp->starts[dsp->dim*i + row] != -1)
841             saip->srdp[row]->numsect++;
842       }
843    }
844    for (row=0; row<dsp->dim; row++)
845    {
846       saip->srdp[row]->sect = (Uint2Ptr)MemNew((saip->srdp[row]->numsect)*sizeof(Uint2));
847       saip->srdp[row]->unsect = (Uint2Ptr)MemNew((dsp->numseg - saip->srdp[row]->numsect)*sizeof(Uint2));
848       saip->srdp[row]->numsect = 0;
849       saip->srdp[row]->unaligned = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
850    }
851    for (i=0; i<dsp->numseg; i++)
852    {
853       for (row=0; row<dsp->dim; row++)
854       {
855          if (dsp->starts[dsp->dim*i + row] != -1)
856          {
857             saip->srdp[row]->sect[saip->srdp[row]->numsect] = i;
858             saip->srdp[row]->numsect++;
859          } else
860          {
861             saip->srdp[row]->unsect[saip->srdp[row]->numunsect] = i;
862             saip->srdp[row]->numunsect++;
863          }
864       }
865    }
866    for (row=0; row<dsp->dim; row++)
867    {
868       for (i=0; i<dsp->numseg; i++)
869       {
870          unal = FALSE;
871          last = -1;
872          j = i;  /* only blocks with sequence can have flanking unal. regions */
873          if (j >= 0 && dsp->starts[dsp->dim*j+row] != -1)
874          {
875             if (dsp->strands[row] == Seq_strand_minus)
876                last = dsp->starts[dsp->dim*j+row];
877             else
878                last = dsp->starts[dsp->dim*j+row] + dsp->lens[j];
879          }
880          if (last > -1)
881          {
882             next = -1;
883             j++;
884             /* find next block of aligned sequence in this row */
885             for (j; j<dsp->numseg && next == -1; j++)
886             {
887                if (dsp->starts[dsp->dim*j+row] != -1)
888                {
889                   if (dsp->strands[row] == Seq_strand_minus)
890                      next = dsp->starts[dsp->dim*j+row] + dsp->lens[j];
891                   else
892                      next = dsp->starts[dsp->dim*j+row];
893                }
894             }
895             if (next > -1) /* look for unaligned seq on right side of this seg */
896             {
897                if (next != last)
898                   unal = TRUE;
899             }
900          }
901          if (unal == TRUE)
902          {
903             saip->srdp[row]->unaligned[saip->srdp[row]->numunaln] = i;
904             saip->srdp[row]->numunaln++;
905          }
906       }
907    }
908    sap->saip = (SeqAlignIndexPtr)(saip);
909 }
910 
911 /* SECTION 2a */
912 /***************************************************************************
913 *
914 *  AlnMgr2IndexSingleChildSeqAlign takes a simple dense-seg or dense-diag
915 *  seqalign, converts it to dense-seg, and then calls
916 *  AlnMgr2IndexSingleDenseSegSA to create the indexes. If the alignment has
917 *  already been indexed, this erases that index and reindexes the alignment.
918 *  (SINGCHILD)
919 *
920 ***************************************************************************/
AlnMgr2IndexSingleChildSeqAlign(SeqAlignPtr sap)921 NLM_EXTERN Boolean AlnMgr2IndexSingleChildSeqAlign(SeqAlignPtr sap)
922 {
923    SeqAlignPtr  salp;
924    SeqAlignPtr  salp_prev;
925    SeqAlignPtr  sap_next;
926 
927    if (sap == NULL)
928       return FALSE;
929    if (sap->saip != NULL)
930    {
931       if (sap->saip->indextype != INDEX_CHILD)
932          return FALSE;
933       SAIndex2Free2(sap->saip);
934       sap->saip = NULL;
935    }
936    sap_next = sap->next;
937    sap->next = NULL;
938    if (sap->segtype == SAS_DISC)
939       return FALSE;
940    if (sap->segtype == SAS_DENDIAG)
941       AlnMgr2ConvertDendiagToDensegChain(sap);
942    salp = sap;
943    salp_prev = sap;
944    while (salp != NULL)
945    {
946       AlnMgr2IndexSingleDenseSegSA(salp);
947       salp_prev = salp;
948       salp = salp->next;
949    }
950    salp_prev->next = sap_next;
951    return TRUE;
952 }
953 
954 /***************************************************************************
955 *
956 *  AlnMgr2UnpackSeqAlign rearranges any seqalign (except alignments with
957 *  more than two levels of nested discontinuous alignments) to a simple
958 *  discontinuous alignment or a linked list of alignments.
959 *
960 ***************************************************************************/
961 /* SECTION 2b */
AlnMgr2UnpackSeqAlign(SeqAlignPtr sap)962 static Boolean AlnMgr2UnpackSeqAlign(SeqAlignPtr sap)
963 {
964    SeqAlignPtr  sap_new;
965    SeqAlignPtr  sap_next;
966    SeqAlignPtr  sap_segs;
967    SeqAlignPtr  sap_segs_head;
968    SeqAlignPtr  sap_segs_prev;
969 
970    if (sap == NULL)
971       return FALSE;
972    sap_segs = NULL;
973    if (sap->segtype == SAS_DISC)
974    {
975       sap_segs_head = (SeqAlignPtr)(sap->segs);
976       if (sap_segs_head->segtype == SAS_DISC)
977       {
978          sap_segs_prev = (SeqAlignPtr)(sap_segs_head->segs);
979          sap_segs_head->segs = NULL;
980          sap_next = sap_segs_head->next;
981          sap_segs_head->next = NULL;
982          SeqAlignFree(sap_segs_head);
983          sap_segs_head = sap_segs_prev;
984          sap->segs = (Pointer)(sap_segs_head);
985          while (sap_segs_prev->next)
986          {
987             sap_segs_prev = sap_segs_prev->next;
988             if (sap_segs_prev->segtype == SAS_DISC)
989                return FALSE;
990          }
991          sap_segs_prev->next = sap_next;
992          sap_segs = sap_next;
993       } else
994          sap_segs = sap_segs_head->next;
995       while (sap_segs)
996       {
997          if (sap_segs->segtype == SAS_DISC)
998          {
999             sap_next = sap_segs->next;
1000             sap_segs->next = NULL;
1001             sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1002             sap_segs->segs = NULL;
1003             SeqAlignFree(sap_segs);
1004             while (sap_segs_prev->next)
1005             {
1006                sap_segs_prev = sap_segs_prev->next;
1007                if (sap_segs_prev->segtype == SAS_DISC)
1008                   return FALSE;
1009             }
1010             sap_segs_prev->next = sap_next;
1011             sap_segs = sap_next;
1012          } else
1013             sap_segs = sap_segs->next;
1014       }
1015    } else
1016    {
1017       sap_new = SeqAlignNew();
1018       sap_new->type = SAT_GLOBAL;
1019       sap_new->segtype = sap->segtype;
1020       sap_new->dim = sap->dim;
1021       sap_new->segs = sap->segs;
1022       sap_new->master = sap->master;
1023       sap_new->bounds = sap->bounds;
1024       sap_new->next = sap->next;
1025       sap_new->score = sap->score;
1026       sap->next = NULL;
1027       sap->segtype = SAS_DISC;
1028       sap->type = 0;
1029       sap->dim = 0;
1030       sap->master = NULL;
1031       sap->bounds = NULL;
1032       sap->score = NULL;
1033       sap->segs = (Pointer)sap_new;
1034       sap_segs_prev = sap_new;
1035       sap_segs = sap_new->next;
1036       while (sap_segs)
1037       {
1038          if (sap_segs->segtype == SAS_DISC)
1039          {
1040             sap_next = sap_segs->next;
1041             sap_segs->next = NULL;
1042             sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1043             sap_segs->segs = NULL;
1044             SeqAlignFree(sap_segs);
1045             while (sap_segs_prev->next)
1046             {
1047                sap_segs_prev = sap_segs_prev->next;
1048                if (sap_segs_prev->segtype == SAS_DISC)
1049                   return FALSE;
1050             }
1051             sap_segs_prev->next = sap_next;
1052             sap_segs = sap_next;
1053          } else
1054             sap_segs = sap_segs->next;
1055       }
1056    }
1057    return TRUE;
1058 }
1059 
1060 /* SECTION 2b */
AlnMgr2UnpackSeqAlignChain(SeqAlignPtr sap)1061 static void AlnMgr2UnpackSeqAlignChain(SeqAlignPtr sap)
1062 {
1063    Int4         i;
1064    SeqAlignPtr  salp_head;
1065    SeqAlignPtr  salp_prev;
1066    SeqAlignPtr  sap_next;
1067    SeqAlignPtr  sap_orig;
1068    SeqAlignPtr  sap_prev;
1069 
1070    salp_head = salp_prev = NULL;
1071    i = 0;
1072    while (sap != NULL)
1073    {
1074       sap_next = sap->next;
1075       sap->next = NULL;
1076       AlnMgr2UnpackSeqAlign(sap);
1077       while (sap != NULL)
1078       {
1079          if (salp_prev != NULL)
1080          {
1081             salp_prev->next = (SeqAlignPtr)(sap->segs);
1082             sap->segs = NULL;
1083             while (salp_prev->next != NULL)
1084             {
1085                salp_prev = salp_prev->next;
1086             }
1087          } else
1088          {
1089             salp_head = salp_prev = (SeqAlignPtr)(sap->segs);
1090             sap->segs = NULL;
1091             while (salp_prev->next != NULL)
1092             {
1093                salp_prev = salp_prev->next;
1094             }
1095          }
1096          sap_prev = sap;
1097          sap = sap->next;
1098          sap_prev->next = NULL;
1099          if (i>0)
1100             SeqAlignFree(sap_prev);
1101          else
1102             sap_orig = sap_prev; /* this is the pointer that was passed in */
1103          i++;
1104       }
1105       sap = sap_next;
1106    }
1107    sap_orig->segs = (Pointer)(salp_head);
1108 }
1109 
1110 /* SECTION 2b */
1111 /***************************************************************************
1112 *
1113 *  AlnMgr2ConvertAllToDenseSeg goes through a chain of simple child
1114 *  seqaligns and makes sure that each is a dense-seg seqalign with the
1115 *  strands explicitly allocated; dense-diag alignments are converted and
1116 *  non-allocated strands are allocated and all set to Seq_strand_plus.
1117 *
1118 ***************************************************************************/
AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)1119 static Boolean AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)
1120 {
1121    DenseSegPtr  dsp;
1122    Int4         i;
1123    SeqAlignPtr  sap_next;
1124 
1125    while (sap != NULL)
1126    {
1127       sap_next = sap->next;
1128       if (sap->segtype == SAS_DENDIAG) {
1129          AlnMgr2ConvertDendiagToDensegChain(sap);
1130       }
1131       else if (sap->segtype == SAS_DENSEG)
1132       {
1133          dsp = (DenseSegPtr)(sap->segs);
1134          if (dsp->strands == NULL)
1135          {
1136             dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
1137             for (i=0; i<(dsp->dim)*(dsp->numseg); i++)
1138             {
1139                dsp->strands[i] = Seq_strand_plus;
1140             }
1141          }
1142       }
1143       else if (sap->segtype == SAS_SPLICED)
1144       {
1145         return FALSE;
1146       }
1147       sap = sap_next;
1148    }
1149    return TRUE;
1150 }
1151 
1152 /* SECTION 2c */
1153 /***************************************************************************
1154 *
1155 *  AlnMgr2IndexLite takes a seqalign or a list of seqaligns, converts
1156 *  each alignment to a dense-seg structure and indexes it, and then
1157 *  allocates an AMAlignIndex2 structure and fills in the saps array.
1158 *
1159 ***************************************************************************/
AlnMgr2IndexLite(SeqAlignPtr sap)1160 NLM_EXTERN Boolean AlnMgr2IndexLite(SeqAlignPtr sap)
1161 {
1162    AMAlignIndex2Ptr  amaip;
1163    Int4              i;
1164    SAIndex2Ptr       saip;
1165    SeqAlignPtr       salp;
1166 
1167    if (sap == NULL)
1168       return FALSE;
1169    if (!AlnMgr2UnpackSeqAlign(sap))
1170       return FALSE;
1171    if (!AlnMgr2ConvertAllToDenseSeg((SeqAlignPtr)sap->segs))
1172       return FALSE;
1173    amaip = AMAlignIndex2New();
1174    amaip->alnstyle = AM2_LITE;
1175    salp = (SeqAlignPtr)(sap->segs);
1176    while (salp != NULL)
1177    {
1178       amaip->numsaps++;
1179       AlnMgr2IndexSingleChildSeqAlign(salp);
1180       salp = salp->next;
1181    }
1182    amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
1183    salp = (SeqAlignPtr)(sap->segs);
1184    i = 0;
1185    while (salp != NULL)
1186    {
1187       amaip->saps[i] = salp;
1188       i++;
1189       saip = (SAIndex2Ptr)(salp->saip);
1190       saip->numinchain = i;
1191       saip->top = sap;
1192       salp = salp->next;
1193    }
1194    sap->saip = (SeqAlignIndexPtr)amaip;
1195    amaip->aligned = (Boolean PNTR)MemNew((amaip->numsaps)*sizeof(Boolean));
1196    for (i=0; i<amaip->numsaps; i++)
1197    {
1198       amaip->aligned[i] = TRUE;
1199    }
1200    return TRUE;
1201 }
1202 
1203 /* SECTION 2c */
1204 /***************************************************************************
1205 *
1206 *  AlnMgr2IndexSeqAlign takes a seqalign of any type except std-seg and
1207 *  creates indexes on it for easy retrieval of useful information by other
1208 *  AlnMgr2 functions. If the seqalign is a single alignment, that alignment
1209 *  gets a simple index and is left alone otherwise. If the seqalign is
1210 *  a set of alignments or a dense-diag set, the subalignments get
1211 *  individually indexed and then are combined into a (fake) multiple
1212 *  alignment which also gets indexed. The subalignments can now be accessed
1213 *  as a multiple alignment by AlnMgr2 functions.
1214 *
1215 ***************************************************************************/
1216 
AlnMgr2IndexSeqAlign(SeqAlignPtr sap)1217 NLM_EXTERN void AlnMgr2IndexSeqAlign(SeqAlignPtr sap)
1218 {
1219    AlnMgr2IndexSeqAlignEx(sap, TRUE);
1220 }
1221 
AlnMgr2IndexSeqAlignEx(SeqAlignPtr sap,Boolean replace_gi)1222 NLM_EXTERN void AlnMgr2IndexSeqAlignEx(SeqAlignPtr sap, Boolean replace_gi)
1223 {
1224    AMAlignIndex2Ptr  amaip;
1225    AMIntervalSetPtr  amint;
1226    AMIntervalSetPtr  amint_head;
1227    AMEdgePtr         edge;
1228    AMEdgePtr         edge_head;
1229    Int4              i;
1230    Int4              numvertices;
1231    AMVertexPtr       vertex_head;
1232    AMVertexPtr       PNTR vertexarray;
1233 
1234    if (sap == NULL || sap->saip != NULL)
1235       return;
1236    if (replace_gi) {
1237      SAM_ReplaceGI(sap);
1238    }
1239 
1240    if (sap->next == NULL && sap->segtype == SAS_DENSEG)
1241    {
1242      AlnMgr2IndexSingleChildSeqAlign(sap);
1243      return;
1244    }
1245 
1246    AlnMgr2IndexLite(sap);
1247    AlnMgr2DecomposeToPairwise(sap);
1248    amaip = (AMAlignIndex2Ptr)(sap->saip);
1249    amaip->alnstyle = AM2_FULLINDEX;
1250    AlnMgr2HidePairwiseConflicts(sap);
1251    amint_head = AlnMgr2MakeIntervals(sap);
1252    vertex_head = NULL;
1253    edge_head = NULL;
1254    vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1255    while (amint_head != NULL)
1256    {
1257       amint = amint_head->next;
1258       AMIntervalSetFree(amint_head);
1259       amint_head = amint;
1260    }
1261    AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1262    AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1263    for (i=0; i<numvertices; i++)
1264    {
1265       SeqIdFree(vertexarray[i]->sip);
1266       MemFree(vertexarray[i]);
1267    }
1268    MemFree(vertexarray);
1269    while (edge_head != NULL)
1270    {
1271       edge = edge_head->next;
1272       MemFree(edge_head);
1273       edge_head = edge;
1274    }
1275    amaip = (AMAlignIndex2Ptr)(sap->saip);
1276    amaip->alnstyle = AM2_FULLINDEX;
1277 }
1278 
1279 /* SECTION 2c */
1280 /***************************************************************************
1281 *
1282 *  AlnMgr2ReIndexSeqAlign takes an indexed alignment (that has, presumably,
1283 *  been changed), makes sure all child seqaligns are indexed (if they are
1284 *  already indexed they are not reindexed), and reindexes all the child
1285 *  seqaligns as a set.
1286 *
1287 ***************************************************************************/
AlnMgr2ReIndexSeqAlign(SeqAlignPtr sap)1288 NLM_EXTERN void AlnMgr2ReIndexSeqAlign(SeqAlignPtr sap)
1289 {
1290    AMAlignIndex2Ptr   amaip;
1291    AMIntervalSetPtr  amint;
1292    AMIntervalSetPtr  amint_head;
1293    AMEdgePtr         edge_head;
1294    Int4              i;
1295    Int4              numvertices;
1296    AMVertexPtr       vertex_head;
1297    AMVertexPtr       PNTR vertexarray;
1298 
1299    if (sap == NULL)
1300       return;
1301    if (sap->saip == NULL)
1302    {
1303       AlnMgr2IndexSeqAlign(sap);
1304       return;
1305    }
1306    if (sap->saip->indextype == INDEX_CHILD)
1307       return;
1308    amaip = (AMAlignIndex2Ptr)(sap->saip);
1309    for (i=0; i<amaip->numsaps; i++)
1310    {
1311       if (amaip->saps[i]->saip == NULL)
1312          AlnMgr2IndexSingleChildSeqAlign(amaip->saps[i]);
1313    }
1314    if (amaip->alnstyle != AM2_LITE)
1315       return;
1316    AlnMgr2DecomposeToPairwise(sap);
1317    AlnMgr2HidePairwiseConflicts(sap);
1318    amint_head = AlnMgr2MakeIntervals(sap);
1319    vertex_head = NULL;
1320    edge_head = NULL;
1321    vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1322    while (amint_head != NULL)
1323    {
1324       amint = amint_head->next;
1325       AMIntervalSetFree(amint_head);
1326       amint_head = amint;
1327    }
1328    AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1329    AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1330    MemFree(vertexarray);
1331 }
1332 
AlnMgr2CompareByAnchor(VoidPtr ptr1,VoidPtr ptr2)1333 static int LIBCALLBACK AlnMgr2CompareByAnchor(VoidPtr ptr1, VoidPtr ptr2)
1334 {
1335    DenseSegPtr  dsp;
1336    int          ret;
1337    SAIndex2Ptr  saip1;
1338    SAIndex2Ptr  saip2;
1339    SeqAlignPtr  sap1;
1340    SeqAlignPtr  sap2;
1341    SeqIdPtr     sip1;
1342    SeqIdPtr     sip2;
1343    Int4         start1;
1344    Int4         start2;
1345    Int4         stop1;
1346    Int4         stop2;
1347 
1348    sap1 = *((SeqAlignPtr PNTR)ptr1);
1349    sap2 = *((SeqAlignPtr PNTR)ptr2);
1350    saip1 = (SAIndex2Ptr)(sap1->saip);
1351    saip2 = (SAIndex2Ptr)(sap2->saip);
1352    dsp = (DenseSegPtr)(sap1->segs);
1353    if (saip1->tmp == 1)
1354       sip1 = dsp->ids->next;
1355    else
1356       sip1 = dsp->ids;
1357    dsp = (DenseSegPtr)(sap2->segs);
1358    if (saip2->tmp == 1)
1359       sip2 = dsp->ids->next;
1360    else
1361       sip2 = dsp->ids;
1362    ret = AlnMgr2OrderSeqIds(sip1, sip2);
1363    if (ret != 0)
1364       return ret;
1365    /* these share both ids -- put best first */
1366    if (saip1->score == 0)
1367       saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
1368    if (saip2->score == 0)
1369       saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
1370    if (saip1->score > saip2->score)
1371       return -1;
1372    else if (saip1->score < saip2->score)
1373       return 1;
1374    AlnMgr2GetNthSeqRangeInSA(sap1, saip1->tmp, &start1, &stop1);
1375    AlnMgr2GetNthSeqRangeInSA(sap2, saip2->tmp, &start2, &stop2);
1376    if (start1 < start2)
1377       return -1;
1378    else if (start1 > start2)
1379       return 1;
1380    else if (stop1 > stop2)
1381       return -1;
1382    else if (stop1 < stop2)
1383       return 1;
1384    return 0;
1385 }
1386 
1387 /* SECTION 2c */
AlnMgr2IndexAsRows(SeqAlignPtr sap,Uint1 strand,Boolean truncate)1388 NLM_EXTERN Boolean AlnMgr2IndexAsRows(SeqAlignPtr sap, Uint1 strand, Boolean truncate)
1389 {
1390    AMAlignIndex2Ptr  amaip;
1391    DenseSegPtr       dsp;
1392    DenseSegPtr       dsp_tmp;
1393    Boolean           found;
1394    Int4              i;
1395    Boolean           impossible;
1396    Int4              numsaps;
1397    SAIndex2Ptr       saip;
1398    SeqAlignPtr       salp;
1399    SeqAlignPtr       sap_head;
1400    SeqAlignPtr       sap_prev;
1401    SeqAlignPtr       sap_tmp;
1402    SeqAlignPtr       PNTR saparray;
1403    SeqAlignPtr       set_head;
1404    SeqAlignPtr       set_prev;
1405    SeqIdPtr          sharedsip;
1406    SeqIdPtr          sip;
1407    SeqIdPtr          sip_next;
1408    SeqIdPtr          sip_tmp;
1409    Int4              tmp;
1410 
1411    if (sap == NULL)
1412       return FALSE;
1413    if (sap->saip != NULL)
1414       AMAlignIndexFreeEitherIndex(sap);
1415    AlnMgr2IndexLite(sap);
1416    AlnMgr2DecomposeToPairwise(sap);
1417    /* need to figure out which row is shared by all saps */
1418    sap_tmp = (SeqAlignPtr)(sap->segs);
1419    dsp = (DenseSegPtr)(sap_tmp->segs);
1420    sip = dsp->ids;
1421    found = FALSE;
1422    while (!found && sip != NULL)
1423    {
1424       sap_tmp = (SeqAlignPtr)(sap->segs);
1425       sip_next = sip->next;
1426       sip->next = NULL;
1427       impossible = FALSE;
1428       while (!impossible && sap_tmp != NULL)
1429       {
1430          dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1431          if (AlnMgr2SeqIdListsOverlap(sip, dsp_tmp->ids) == NULL)
1432             impossible = TRUE;
1433          sap_tmp = sap_tmp->next;
1434       }
1435       sip->next = sip_next;
1436       if (!impossible) /* found one that matched a row in every alignment */
1437          found = TRUE;
1438       else
1439          sip = sip_next;
1440    }
1441    if (!found) /* didn't find a seqid that was contained in all alignments */
1442       return FALSE;
1443    /* mark the shared row to make things easier */
1444    sharedsip = SeqIdDup(sip);
1445    sap_tmp = (SeqAlignPtr)(sap->segs);
1446    i = 0;
1447    while (sap_tmp != NULL)
1448    {
1449       saip = (SAIndex2Ptr)(sap_tmp->saip);
1450       dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1451       if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1452          saip->tmp = 1;
1453       else
1454          saip->tmp = 2;
1455       sap_tmp = sap_tmp->next;
1456       i++;
1457    }
1458    saparray = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
1459    sap_tmp = (SeqAlignPtr)(sap->segs);
1460    i = 0;
1461    while (sap_tmp != NULL)
1462    {
1463       saparray[i] = sap_tmp;
1464       i++;
1465       sap_tmp = sap_tmp->next;
1466    }
1467    numsaps = i;
1468    HeapSort(saparray, i, sizeof(SeqAlignPtr), AlnMgr2CompareByAnchor);
1469    /* now each clump of alignments is a row -- need to eliminate overlaps next */
1470    sip = NULL;
1471    i = 0;
1472    sap_head = sap_prev = NULL;
1473    while (i<numsaps)
1474    {
1475       saparray[i]->next = NULL;
1476       set_head = set_prev = saparray[i];
1477       saip = (SAIndex2Ptr)(saparray[i]->saip);
1478       sip = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp); /* get other seqid */
1479       i++;
1480       if (i<numsaps)
1481          sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1482       while (i<numsaps && SeqIdComp(sip, sip_tmp) == SIC_YES)
1483       {
1484          set_prev->next = saparray[i];
1485          set_prev = saparray[i];
1486          saparray[i]->next = NULL;
1487          i++;
1488          SeqIdFree(sip_tmp);
1489          if (i<numsaps)
1490             sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1491       }
1492       AlnMgr2IndexLite(set_head);
1493       if (!truncate)
1494          AlnMgr2RemoveInconsistentAlnsFromSet(set_head, 0);
1495       else
1496          AlnMgr2RemoveInconsistentAlnsFromSet(set_head, -1);
1497       sap_tmp = (SeqAlignPtr)(set_head->segs);
1498       while (sap_tmp != NULL)
1499       {
1500          saip = (SAIndex2Ptr)(sap_tmp->saip);
1501          dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1502          if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1503             saip->tmp = 1;
1504          else
1505             saip->tmp = 2;
1506          sap_tmp = sap_tmp->next;
1507       }
1508       if (sap_head != NULL)
1509          sap_prev->next = set_head;
1510       else
1511          sap_head = sap_prev = set_head;
1512       while (sap_prev->next != NULL)
1513       {
1514          sap_prev = sap_prev->next;
1515       }
1516       sap_prev->next = NULL;
1517    }
1518    /* now we have lots of freed pointers sitting in the array */
1519    MemFree(saparray);
1520    saparray = NULL;
1521    /* sap_head is the head of a chain of LITE-indexed alignments, each of which is one row */
1522    /* first make sure that the shared row is on the requested strand */
1523    sap_tmp = sap_head;
1524    if (strand == Seq_strand_both || strand == Seq_strand_unknown || strand == 0)
1525       strand = Seq_strand_plus;
1526    while (sap_tmp != NULL)
1527    {
1528       salp = (SeqAlignPtr)(sap_tmp->segs);
1529       saip = (SAIndex2Ptr)(salp->saip);
1530       /* strand is same for all children */
1531       if (AlnMgr2GetNthStrand(salp, saip->tmp) != strand)
1532       {
1533          SeqAlignListReverseStrand(salp);
1534          while (salp != NULL)
1535          {
1536             saip = (SAIndex2Ptr)salp->saip;
1537             tmp = saip->tmp;
1538             SAIndex2Free2(salp->saip);
1539             salp->saip = NULL;
1540             AlnMgr2IndexSingleChildSeqAlign(salp);
1541             saip = (SAIndex2Ptr)salp->saip;
1542             saip->tmp = tmp;
1543             salp = salp->next;
1544          }
1545       }
1546       sap_tmp = sap_tmp->next;
1547    }
1548    sap_tmp = sap_head;
1549    sap->segs = NULL;
1550    AMAlignIndex2Free2(sap->saip);
1551    sap->saip = (SeqAlignIndexPtr)AMAlignIndex2New();
1552    amaip = (AMAlignIndex2Ptr)(sap->saip);
1553    amaip->alnstyle = AM2_FULLINDEX;
1554    set_head = set_prev = NULL;
1555    while (sap_tmp != NULL)
1556    {
1557       salp = (SeqAlignPtr)(sap_tmp->segs);
1558       while (salp != NULL)
1559       {
1560          AlnMgr2AddInNewPairwiseSA(sap, salp);
1561          if (set_head != NULL)
1562          {
1563             set_prev->next = salp;
1564             set_prev = salp;
1565          } else
1566             set_head = set_prev = salp;
1567          salp = salp->next;
1568       }
1569       sap_tmp->segs = NULL;
1570       sap_tmp = sap_tmp->next;
1571    }
1572    AlnMgr2CondenseColumns((DenseSegPtr)(amaip->sharedaln->segs));
1573    AlnMgr2IndexSingleChildSeqAlign(amaip->sharedaln);
1574    set_prev->next = NULL;
1575    sap->segs = (Pointer)(set_head);
1576    SeqAlignListFree(sap_head);
1577    SeqIdFree(sharedsip);
1578    return TRUE;
1579 }
1580 
1581 /* SECTION 2c */
1582 /***************************************************************************
1583 *
1584 *  AlnMgr2IndexIndexedChain takes a linked list of indexed seqaligns
1585 *  and does an in-place transformation to an indexed parent-child
1586 *  seqalign set.
1587 *
1588 ***************************************************************************/
AlnMgr2IndexIndexedChain(SeqAlignPtr sap)1589 NLM_EXTERN void AlnMgr2IndexIndexedChain(SeqAlignPtr sap)
1590 {
1591    AMAlignIndex2Ptr  amaip;
1592    AMIntervalSetPtr  amint;
1593    AMIntervalSetPtr  amint_head;
1594    AMEdgePtr         edge_head;
1595    Int4              numvertices;
1596    AMVertexPtr       vertex_head;
1597    AMVertexPtr       PNTR vertexarray;
1598 
1599    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
1600       return;
1601    AlnMgr2IndexLite(sap);
1602    AlnMgr2DecomposeToPairwise(sap);
1603    amaip = (AMAlignIndex2Ptr)(sap->saip);
1604    amaip->alnstyle = AM2_FULLINDEX;
1605    AlnMgr2HidePairwiseConflicts(sap);
1606    amint_head = AlnMgr2MakeIntervals(sap);
1607    vertex_head = NULL;
1608    edge_head = NULL;
1609    vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1610    while (amint_head != NULL)
1611    {
1612       amint = amint_head->next;
1613       AMIntervalSetFree(amint_head);
1614       amint_head = amint;
1615    }
1616    AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1617    AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1618    MemFree(vertexarray);
1619 }
1620 
1621 /* SECTION 2c */
1622 /***************************************************************************
1623 *
1624 *  AlnMgr2DecomposeToPairwise takes a parent seqalign and goes through all
1625 *  its children, checking their dimensions. If a child seqalign is found
1626 *  with dimension greater than 2, that alignment is copied into a set of
1627 *  two-row alignments, each new alignment containing the first row of the
1628 *  original alignment and a different row. This function does NOT take out
1629 *  segs with only gaps (is this a problem????). The resulting seqaligns
1630 *  are all individually indexed and then the whole set is indexed lite.
1631 *
1632 ***************************************************************************/
AlnMgr2DecomposeToPairwise(SeqAlignPtr sap)1633 static void AlnMgr2DecomposeToPairwise(SeqAlignPtr sap)
1634 {
1635    DenseSegPtr  dsp;
1636    DenseSegPtr  dsp_orig;
1637    Int4         i;
1638    Int4         j;
1639    Int4         n;
1640    SAIndex2Ptr   saip;
1641    SAIndex2Ptr   saip_orig;
1642    SeqAlignPtr  salp;
1643    SeqAlignPtr  salp_new;
1644    SeqAlignPtr  salp_next;
1645    SeqAlignPtr  salp_prev;
1646 
1647    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1648       return;
1649    salp = (SeqAlignPtr)(sap->segs);
1650    salp_prev = NULL;
1651    while (salp)
1652    {
1653       n = AlnMgr2GetNumRows(salp);
1654       if (n > 2)
1655       {
1656          salp_next = salp->next;
1657          saip_orig = (SAIndex2Ptr)(salp->saip);
1658          for (i=2; i<=n; i++)
1659          {
1660             salp_new = SeqAlignNew();
1661             dsp_orig = (DenseSegPtr)(salp->segs);
1662             dsp = DenseSegNew();
1663             dsp->dim = 2;
1664             dsp->numseg = dsp_orig->numseg;
1665             dsp->ids = AlnMgr2GetNthSeqIdPtr(salp, 1);
1666             dsp->ids->next = AlnMgr2GetNthSeqIdPtr(salp, i);
1667             dsp->starts = (Int4Ptr)MemNew(dsp->numseg*2*sizeof(Int4));
1668             dsp->lens = (Int4Ptr)MemNew(dsp->numseg*sizeof(Int4));
1669             dsp->strands = (Uint1Ptr)MemNew(dsp->numseg*2*sizeof(Uint1));
1670             for (j=0; j<dsp->numseg; j++)
1671             {
1672                dsp->lens[j] = dsp_orig->lens[j];
1673                dsp->starts[2*j] = dsp_orig->starts[dsp_orig->dim*j];
1674                dsp->starts[2*j+1] = dsp_orig->starts[dsp_orig->dim*j+i-1];
1675                dsp->strands[2*j] = dsp_orig->strands[dsp_orig->dim*j];
1676                dsp->strands[2*j+1] = dsp_orig->strands[dsp_orig->dim*j+i-1];
1677             }
1678             salp_new = SeqAlignNew();
1679             salp_new->dim = 2;
1680             salp_new->segs = (Pointer)dsp;
1681             salp_new->segtype = SAS_DENSEG;
1682             AlnMgr2IndexSingleChildSeqAlign(salp_new);
1683             saip = (SAIndex2Ptr)(salp_new->saip);
1684             saip->numinchain = saip_orig->numinchain;
1685             saip->numsplitaln = i-1;
1686             if (salp_prev == NULL)
1687             {
1688                salp_prev = salp_new;
1689                sap->segs = (Pointer)salp_new;
1690             } else
1691             {
1692                salp_prev->next = salp_new;
1693                salp_prev = salp_new;
1694             }
1695          }
1696          salp_prev->next = salp_next;
1697          salp->next = NULL;
1698          SeqAlignFree(salp);
1699          salp = salp_next;
1700       } else
1701       {
1702          salp_prev = salp;
1703          salp = salp->next;
1704       }
1705    }
1706    AMAlignIndex2Free2(sap->saip);
1707    sap->saip = NULL;
1708    AlnMgr2IndexLite(sap);
1709 }
1710 
1711 /* SECTION 2c */
1712 /***************************************************************************
1713 *
1714 *  AlnMgr2HidePairwiseConflicts looks through a set of indexed seqaligns
1715 *  to find pairs of alignments that share the same seqids and that provide
1716 *  conflicting information. These pairs are then sent to AlnMgr2TossWorse,
1717 *  which hides the worse alignment by unaligning it. Note that the hidden
1718 *  alignments are not destroyed and are not taken out of the set.
1719 *
1720 ***************************************************************************/
AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap)1721 static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap)
1722 {
1723    AMAlignIndex2Ptr  amaip;
1724    Int4             i;
1725    Boolean          inset;
1726    Int4             j;
1727    Boolean          match;
1728    SeqIdPtr         sip11;
1729    SeqIdPtr         sip12;
1730    SeqIdPtr         sip21;
1731    SeqIdPtr         sip22;
1732    Int4             start11;
1733    Int4             start12;
1734    Int4             start21;
1735    Int4             start22;
1736    Int4             stop11;
1737    Int4             stop12;
1738    Int4             stop21;
1739    Int4             stop22;
1740    Int4Ptr          tossed;
1741 
1742    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1743       return;
1744    amaip = (AMAlignIndex2Ptr)(sap->saip);
1745    AlnMgr2SortBySeqId(sap);
1746    tossed = (Int4Ptr)MemNew(amaip->numsaps*sizeof(Int4));
1747    for (i=0; i<amaip->numsaps-1; i++)
1748    {
1749       for (j=0; j<amaip->numsaps; j++)
1750       {
1751          tossed[j] = 0;
1752       }
1753       inset = TRUE;
1754       for (j=i+1; amaip->aligned[i] && j<amaip->numsaps && inset == TRUE; j++)
1755       {
1756          if (tossed[j] == 0 && amaip->aligned[i] && amaip->aligned[j])
1757          {
1758             sip11 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], 1);
1759             sip12 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], 2);
1760             sip21 = AlnMgr2GetNthSeqIdPtr(amaip->saps[j], 1);
1761             sip22 = AlnMgr2GetNthSeqIdPtr(amaip->saps[j], 2);
1762             match = FALSE;
1763             if (SeqIdComp(sip11, sip21) == SIC_YES && SeqIdComp(sip12, sip22) == SIC_YES)
1764             {
1765                match = TRUE;
1766                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, &start11, &stop11);
1767                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &start12, &stop12);
1768                AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 1, &start21, &stop21);
1769                AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 2, &start22, &stop22);
1770             } else if (SeqIdComp(sip11, sip22) == SIC_YES && SeqIdComp(sip12, sip21) == SIC_YES)
1771             {
1772                match = TRUE;
1773                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, &start11, &stop11);
1774                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &start12, &stop12);
1775                AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 2, &start21, &stop21);
1776                AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 1, &start22, &stop22);
1777             } else if (SeqIdComp(sip11, sip21) != SIC_YES && SeqIdComp(sip11, sip22) != SIC_YES)
1778                inset = FALSE;
1779             if (match == TRUE)
1780             {
1781                if ((start11 < start21 && stop11 > stop21) || (start11 < stop21 && stop11 > stop21) || (start11 > start21 && stop11 < stop21))
1782                {
1783                   AlnMgr2TossWorse(sap, i, j);
1784                   if (amaip->aligned[j] == 0) /* j just got tossed -- put it in the list */
1785                      tossed[j] = 1;
1786                }
1787             }
1788             SeqIdFree(sip11);
1789 	    SeqIdFree(sip12);
1790 	    SeqIdFree(sip21);
1791 	    SeqIdFree(sip22);
1792          }
1793       }
1794       if (amaip->aligned[i] == 0) /* the query alignment got tossed -- restore */
1795       {                           /* all the ones that it tossed out */
1796          for (j=0; j<amaip->numsaps; j++)
1797          {
1798             if (tossed[j] == 1)
1799                amaip->aligned[j] = 1;
1800          }
1801       }
1802    }
1803    MemFree(tossed);
1804 }
1805 
1806 /* SECTION 2c */
AlnMgr2SortBySeqId(SeqAlignPtr sap)1807 static void AlnMgr2SortBySeqId(SeqAlignPtr sap)
1808 {
1809    AMAlignIndex2Ptr  amaip;
1810    Int4             i;
1811    SAIndex2Ptr       saip;
1812 
1813    amaip = (AMAlignIndex2Ptr)(sap->saip);
1814    for (i=0; i<amaip->numsaps; i++)
1815    {
1816       saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
1817       saip->aligned = amaip->aligned[i];
1818    }
1819    HeapSort(amaip->saps, amaip->numsaps, sizeof(amaip->saps), AlnMgr2CompareIds);
1820    for (i=0; i<amaip->numsaps; i++)
1821    {
1822       saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
1823       amaip->aligned[i] = saip->aligned;
1824    }
1825 }
1826 
1827 /* SECTION 2c */
AlnMgr2CompareIds(VoidPtr ptr1,VoidPtr ptr2)1828 static int LIBCALLBACK AlnMgr2CompareIds(VoidPtr ptr1, VoidPtr ptr2)
1829 {
1830    Int4         ret;
1831    SAIndex2Ptr  saip1;
1832    SAIndex2Ptr  saip2;
1833    SeqAlignPtr  sap1;
1834    SeqAlignPtr  sap2;
1835    SeqIdPtr     sip1;
1836    SeqIdPtr     sip2;
1837 
1838    if (ptr1 == NULL || ptr2 == NULL)
1839       return 0;
1840    sap1 = *((SeqAlignPtr PNTR) ptr1);
1841    sap2 = *((SeqAlignPtr PNTR) ptr2);
1842    sip1 = AlnMgr2GetNthSeqIdPtr(sap1, 1);
1843    sip2 = AlnMgr2GetNthSeqIdPtr(sap2, 1);
1844    ret = (AlnMgr2OrderSeqIds(sip1, sip2));
1845    SeqIdFree(sip1);
1846    SeqIdFree(sip2);
1847    if (ret != 0)
1848       return ret;
1849    saip1 = (SAIndex2Ptr)(sap1->saip);
1850    saip2 = (SAIndex2Ptr)(sap2->saip);
1851    if (saip1->score == 0)
1852       saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
1853    if (saip2->score == 0)
1854       saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
1855    if (saip1->score > saip2->score)
1856       return -1;
1857    if (saip1->score < saip2->score)
1858       return 1;
1859    return 0;
1860 }
1861 
1862 /* SECTION 2c */
1863 /***************************************************************************
1864 *
1865 *  Given an indexed seqalign set, AlnMgr2TossWorse looks at the indicated
1866 *  pair of seqaligns, gets their scores, and sets the unaligned bit of the
1867 *  seqalign with the worse score.
1868 *
1869 ***************************************************************************/
AlnMgr2TossWorse(SeqAlignPtr sap,Int4 i,Int4 j)1870 static void AlnMgr2TossWorse(SeqAlignPtr sap, Int4 i, Int4 j)
1871 {
1872    AMAlignIndex2Ptr  amaip;
1873    SAIndex2Ptr       saip1;
1874    SAIndex2Ptr       saip2;
1875    Int4              score1;
1876    Int4              score2;
1877 
1878    amaip = (AMAlignIndex2Ptr)(sap->saip);
1879    saip1 = (SAIndex2Ptr)(amaip->saps[i]->saip);
1880    saip2 = (SAIndex2Ptr)(amaip->saps[j]->saip);
1881    if (saip1->score == 0)
1882       saip1->score = score1 = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[i]);
1883    else
1884       score1 = saip1->score;
1885    if (saip1->score == 0)
1886       saip2->score = score2 = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[j]);
1887    else
1888       score2 = saip2->score;
1889    if (score1 >= score2)
1890       amaip->aligned[j] = FALSE;
1891    else if (score2 > score1)
1892       amaip->aligned[i] = FALSE;
1893 }
1894 
1895 /* SECTION 2c */
1896 /***************************************************************************
1897 *
1898 *  AlnMgr2MakeIntervals takes every row from every seqalign and bins it
1899 *  with other sequences with the same seqid and the same strand.
1900 *
1901 ***************************************************************************/
AlnMgr2MakeIntervals(SeqAlignPtr sap)1902 static AMIntervalSetPtr AlnMgr2MakeIntervals(SeqAlignPtr sap)
1903 {
1904    AMAlignIndex2Ptr   amaip;
1905    AMIntervalSetPtr  amint;
1906    AMIntervalSetPtr  amint_head;
1907    AMIntervalSetPtr  amint_prev;
1908    Boolean           found;
1909    Int4              i;
1910    AMIntervalPtr     intv;
1911    AMIntervalPtr     int_prev;
1912    Int4              j;
1913    Int4              k;
1914    SeqIdPtr          sip;
1915    Uint1             strand;
1916 
1917    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1918       return NULL;
1919    amaip = (AMAlignIndex2Ptr)(sap->saip);
1920    amint_head = amint_prev = NULL;
1921    for (i=0; i<amaip->numsaps; i++)
1922    {
1923       if (amaip->aligned[i])
1924       {
1925          j = AlnMgr2GetNumRows(amaip->saps[i]);
1926          for (k=0; k<j; k++)
1927          {
1928             intv = (AMIntervalPtr)MemNew(sizeof(AMInterval));
1929             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], k+1, &(intv->from), &(intv->to));
1930             sip = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], k+1);
1931             strand = AlnMgr2GetNthStrand(amaip->saps[i], k+1);
1932             if (strand != Seq_strand_minus)
1933                strand = Seq_strand_plus; /* to avoid dealing with Seq_strand_unknown */
1934             intv->strand = strand;
1935             if (amint_head != NULL) /* figure out which interval set this goes in */
1936             {
1937                amint = amint_head;
1938                found = FALSE;
1939                while (amint != NULL && !found)
1940                {
1941                   if (SeqIdComp(sip, amint->sip) == SIC_YES && strand == amint->strand)
1942                      found = TRUE;
1943                   else
1944                      amint = amint->next;
1945                }
1946                if (found) /* add this to the interval set matched */
1947                {
1948                   int_prev = amint->int_head;
1949                   while (int_prev->next != NULL)
1950                   {
1951                      int_prev = int_prev->next;
1952                   }
1953                   int_prev->next = intv;
1954                } else /* make a new interval set */
1955                {
1956                   amint = (AMIntervalSetPtr)MemNew(sizeof(AMIntervalSet));
1957                   amint->sip = SeqIdDup(sip);
1958                   amint->strand = strand;
1959                   amint->int_head = intv;
1960                   amint_prev = amint_head;
1961                   while (amint_prev->next != NULL)
1962                   {
1963                      amint_prev = amint_prev->next;
1964                   }
1965                   amint_prev->next = amint;
1966                }
1967             } else  /* make a new interval set */
1968             {
1969                amint = (AMIntervalSetPtr)MemNew(sizeof(AMIntervalSet));
1970                amint->sip = SeqIdDup(sip);
1971                amint->strand = strand;
1972                amint->int_head = intv;
1973                amint_head = amint;
1974             }
1975             SeqIdFree(sip);
1976          }
1977       }
1978    }
1979    return amint_head;
1980 }
1981 
1982 /* SECTION 2c */
1983 /***************************************************************************
1984 *
1985 *  AlnMgr2MakeVerticesFromIntervals takes the set of intervals created from
1986 *  the alignments, and makes nonoverlapping vertices. Each vertex is a
1987 *  single seqid plus a start and stop (so one seqid may have more than one
1988 *  vertex). Each vertex is also associated with edges, or alignments, which
1989 *  link the vertices together. An edge is simply two vertices plus a weight,
1990 *  which is the alignment quality score. This function creates the vertices,
1991 *  then creates the edges, and sorts the edges and vertices by quality and
1992 *  by number of edges per vertex.
1993 *
1994 ***************************************************************************/
AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap,AMIntervalSetPtr amint_head,AMVertexPtr PNTR vertexhead,AMEdgePtr PNTR edgehead,Int4Ptr numvertices)1995 static AMVertexPtr PNTR  AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap, AMIntervalSetPtr amint_head, AMVertexPtr PNTR vertexhead, AMEdgePtr PNTR edgehead, Int4Ptr numvertices)
1996 {
1997    AMAlignIndex2Ptr   amaip;
1998    AMIntervalSetPtr  amint;
1999    AMEdgePtr         edge;
2000    AMEdgePtr         edge_head;
2001    AMEdgePtr         edge_prev;
2002    Int4              i;
2003    AMIntervalPtr     intv;
2004    Int4              j;
2005    Int4              k;
2006    Int4              n;
2007    SAIndex2Ptr       saip;
2008    SeqIdPtr          sip1;
2009    SeqIdPtr          sip2;
2010    Int4              start;
2011    Int4              stop;
2012    Int4              v1;
2013    Int4              v2;
2014    AMVertexPtr       vertex;
2015    AMVertexPtr       vertex_head;
2016    AMVertexPtr       vertex_prev;
2017    AMVertexPtr       PNTR vertexarray;
2018 
2019    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
2020       return NULL;
2021    amint = amint_head;
2022    vertex_head = vertex_prev = NULL;
2023    while (amint != NULL)
2024    {
2025       AlnMgr2SortIntervals(amint);
2026       vertex = (AMVertexPtr)MemNew(sizeof(AMVertex));
2027       intv = amint->int_head;
2028       vertex->sip = SeqIdDup(amint->sip);
2029       vertex->strand = amint->strand;
2030       vertex->from = intv->from;
2031       vertex->to = intv->to;
2032       intv = intv->next;
2033       while (intv != NULL)
2034       {
2035          if ((intv->from <= vertex->to && intv->from >= vertex->from) || (intv->to <= vertex->to && intv->to >= vertex->from))
2036          {
2037             if (intv->from < vertex->from)
2038                vertex->from = intv->from;
2039             if (intv->to > vertex->to)
2040                vertex->to = intv->to;
2041          } else
2042          {
2043             if (vertex_head != NULL)
2044             {
2045                vertex_prev->next = vertex;
2046                vertex_prev = vertex;
2047             } else
2048                vertex_head = vertex_prev = vertex;
2049             vertex = (AMVertexPtr)MemNew(sizeof(AMVertex));
2050             vertex->from = intv->from;
2051             vertex->to = intv->to;
2052             vertex->sip = SeqIdDup(amint->sip);
2053             vertex->strand = amint->strand;
2054          }
2055          intv = intv->next;
2056       }
2057       if (vertex_head != NULL)
2058       {
2059          vertex_prev->next = vertex;
2060          vertex_prev = vertex;
2061       } else
2062          vertex_head = vertex_prev = vertex;
2063       amint = amint->next;
2064    }
2065    vertex = vertex_head;
2066    i = 0;
2067    while (vertex != NULL)
2068    {
2069       i++;
2070       vertex = vertex->next;
2071    }
2072    vertexarray = (AMVertexPtr PNTR)MemNew(i*sizeof(AMVertexPtr));
2073    *numvertices = i;
2074    vertex = vertex_head;
2075    i = 0;
2076    while (vertex != NULL)
2077    {
2078       vertexarray[i] = vertex;
2079       vertex = vertex->next;
2080       i++;
2081    }
2082    amaip = (AMAlignIndex2Ptr)(sap->saip);
2083    /* now make the edges from the alignments */
2084    edge_head = NULL;
2085    for (i=0; i<amaip->numsaps; i++)
2086    {
2087       if (amaip->aligned[i])
2088       {
2089          j = AlnMgr2GetNumRows(amaip->saps[i]);
2090          for (k=0; k<j; k++)
2091          {
2092             sip1 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], k+1);
2093             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], k+1, &start, &stop);
2094             v1 = AlnMgr2MatchToVertex(sip1, start, stop, vertexarray, *numvertices);
2095             for (n=k+1; n<j; n++)
2096             {
2097                vertexarray[v1]->numedges++;
2098                sip2 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], n+1);
2099                AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n+1, &start, &stop);
2100                v2 = AlnMgr2MatchToVertex(sip2, start, stop, vertexarray, *numvertices);
2101                vertexarray[v2]->numedges++;
2102                edge = (AMEdgePtr)MemNew(sizeof(AMEdge));
2103                edge->vertex1 = v1;
2104                edge->vertex2 = v2;
2105                saip = NULL;
2106                if (amaip->saps[i]->saip != NULL)
2107                   saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
2108                if (saip != NULL && saip->score != 0)
2109                   edge->weight = saip->score;
2110                else
2111                   edge->weight = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[i]);
2112                edge->sap = amaip->saps[i];
2113                edge->used = 0;
2114                if (edge_head != NULL)
2115                {
2116                   edge_prev->next = edge;
2117                   edge_prev = edge;
2118                } else
2119                   edge_head = edge_prev = edge;
2120                SeqIdFree(sip2);
2121             }
2122             SeqIdFree(sip1);
2123          }
2124       }
2125    }
2126    AlnMgr2SortEdgesByWeight(&edge_head);
2127    *vertexhead = vertexarray[0];
2128    *edgehead = edge_head;
2129    return vertexarray;
2130 }
2131 
2132 /* SECTION 2C */
2133 /***************************************************************************
2134 *
2135 *  AlnMgr2SortVerticesByNumEdges -- the name says it all -- each vertex is
2136 *  associated with one or more edges and the most populated vertices get
2137 *  put first.
2138 *
2139 ***************************************************************************/
AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray,Int4 numvertices)2140 static void AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray, Int4 numvertices)
2141 {
2142    Int4  i;
2143 
2144    HeapSort(vertexarray, numvertices, sizeof(vertexarray), AlnMgr2CompareVertices);
2145    for (i=0; i<numvertices-1; i++)
2146    {
2147       vertexarray[i]->next = vertexarray[i+1];
2148    }
2149    vertexarray[numvertices-1]->next = NULL;
2150 }
2151 
2152 /* SECTION 2c */
2153 /***************************************************************************
2154 *
2155 *  AlnMgr2CompareVertices is the HeapSort callback for
2156 *  AlnMgr2SortVerticesByNumEdges.
2157 *
2158 ***************************************************************************/
AlnMgr2CompareVertices(VoidPtr ptr1,VoidPtr ptr2)2159 static int LIBCALLBACK AlnMgr2CompareVertices(VoidPtr ptr1, VoidPtr ptr2)
2160 {
2161    AMVertexPtr  vertex1;
2162    AMVertexPtr  vertex2;
2163 
2164    if (ptr1 != NULL && ptr2 != NULL)
2165    {
2166       vertex1 = *((AMVertexPtr PNTR)ptr1);
2167       vertex2 = *((AMVertexPtr PNTR)ptr2);
2168       if (vertex1->numedges > vertex2->numedges)
2169          return -1;
2170       else if (vertex1->numedges < vertex2->numedges)
2171          return 1;
2172       else
2173          return 0;
2174    }
2175    return 0;
2176 }
2177 
2178 /* SECTION 2C */
2179 /***************************************************************************
2180 *
2181 *  AlnMgr2SortEdgesByWeight takes a set of edges (alignments) and sorts
2182 *  them by their preset weights (alignment scores), using AlnMgr2CompareEdges
2183 *  as its HeapSort callback.
2184 *
2185 ***************************************************************************/
AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head)2186 static void AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head)
2187 {
2188    AMEdgePtr  edge;
2189    AMEdgePtr  PNTR edgearray;
2190    Int4       i;
2191    Int4       j;
2192 
2193    if (edge_head == NULL || *edge_head == NULL)
2194       return;
2195    edge = *edge_head;
2196    i = 0;
2197    while (edge != NULL)
2198    {
2199       i++;
2200       edge = edge->next;
2201    }
2202    edgearray = (AMEdgePtr PNTR)MemNew(i*sizeof(AMEdgePtr));
2203    edge = *edge_head;
2204    i = 0;
2205    while (edge != NULL)
2206    {
2207       edgearray[i] = edge;
2208       edge = edge->next;
2209       i++;
2210    }
2211    HeapSort(edgearray, i, sizeof(edgearray), AlnMgr2CompareEdges);
2212    for (j=0; j<i-1; j++)
2213    {
2214       edgearray[j]->next = edgearray[j+1];
2215    }
2216    edgearray[i-1]->next = NULL;
2217    *edge_head = edgearray[0];
2218    MemFree(edgearray);
2219 }
2220 
2221 /* SECTION 2c */
2222 /***************************************************************************
2223 *
2224 *  AlnMgr2CompareEdges is the HeapSort callback for AlnMgr2SortEdgesByWeight.
2225 *  It simply compares the preset edge weights.
2226 *
2227 ***************************************************************************/
AlnMgr2CompareEdges(VoidPtr ptr1,VoidPtr ptr2)2228 static int LIBCALLBACK AlnMgr2CompareEdges(VoidPtr ptr1, VoidPtr ptr2)
2229 {
2230    AMEdgePtr  edge1;
2231    AMEdgePtr  edge2;
2232 
2233    if (ptr1 != NULL && ptr2 != NULL)
2234    {
2235       edge1 = *((AMEdgePtr PNTR)ptr1);
2236       edge2 = *((AMEdgePtr PNTR)ptr2);
2237       if (edge1->weight > edge2->weight)
2238          return -1;
2239       else if (edge1->weight < edge2->weight)
2240          return 1;
2241       else
2242          return 0;
2243    }
2244    return 0;
2245 }
2246 
2247 /* SECTION 2c */
2248 /***************************************************************************
2249 *
2250 *  AlnMgr2MatchToVertex is called by AlnMgr2MakeVerticesFromIntervals to
2251 *  figure out which vertex in the array the seqid, start, and stop match to.
2252 *
2253 ***************************************************************************/
AlnMgr2MatchToVertex(SeqIdPtr sip,Int4 start,Int4 stop,AMVertexPtr PNTR vertexarray,Int4 numvertices)2254 static Int4 AlnMgr2MatchToVertex(SeqIdPtr sip, Int4 start, Int4 stop, AMVertexPtr PNTR vertexarray, Int4 numvertices)
2255 {
2256    Int4  i;
2257 
2258    if (sip == NULL || vertexarray == NULL)
2259       return -1;
2260    i = 0;
2261    while (i<numvertices)
2262    {
2263       if (SeqIdComp(sip, vertexarray[i]->sip) == SIC_YES)
2264       {
2265          if (start >= vertexarray[i]->from && start <= vertexarray[i]->to && stop >= vertexarray[i]->from && stop <= vertexarray[i]->to)
2266             return i;
2267       }
2268       i++;
2269    }
2270    return -1;
2271 }
2272 
2273 /* SECTION 2c */
2274 /***************************************************************************
2275 *
2276 *  AlnMgr2SortIntervals sorts the AMIntervals by start position within the
2277 *  set, calling AlnMgr2CompareIntervals in a HeapSort.
2278 *
2279 ***************************************************************************/
AlnMgr2SortIntervals(AMIntervalSetPtr amint)2280 static void AlnMgr2SortIntervals(AMIntervalSetPtr amint)
2281 {
2282    Int4           i;
2283    AMIntervalPtr  PNTR intarray;
2284    AMIntervalPtr  intv;
2285    AMIntervalPtr  intv_head;
2286    Int4           j;
2287 
2288    i = 0;
2289    intv = amint->int_head;
2290    while (intv != NULL)
2291    {
2292       i++;
2293       intv = intv->next;
2294    }
2295    intarray = (AMIntervalPtr PNTR)MemNew(i*sizeof(AMIntervalPtr));
2296    intv = amint->int_head;
2297    i = 0;
2298    while (intv != NULL)
2299    {
2300       intarray[i] = intv;
2301       intv = intv->next;
2302       i++;
2303    }
2304    HeapSort(intarray, i, sizeof(intarray), AlnMgr2CompareIntervals);
2305    intv_head = intv = intarray[0];
2306    for (j=1; j<i; j++)
2307    {
2308       intv->next = intarray[j];
2309       intarray[j]->next = NULL;
2310       intv = intv->next;
2311    }
2312    amint->int_head = intv_head;
2313    MemFree(intarray);
2314 }
2315 
2316 /* SECTION 2c */
2317 /***************************************************************************
2318 *
2319 *  AlnMgr2CompareIntervals is the HeapSort callback for
2320 *  AlnMgr2SortIntervals, which sorts a set of AMIntervals by start position.
2321 *
2322 ***************************************************************************/
AlnMgr2CompareIntervals(VoidPtr ptr1,VoidPtr ptr2)2323 static int LIBCALLBACK AlnMgr2CompareIntervals(VoidPtr ptr1, VoidPtr ptr2)
2324 {
2325    AMIntervalPtr  intv1;
2326    AMIntervalPtr  intv2;
2327 
2328    if (ptr1 != NULL && ptr2 != NULL)
2329    {
2330       intv1 = *((AMIntervalPtr PNTR)ptr1);
2331       intv2 = *((AMIntervalPtr PNTR)ptr2);
2332       if (intv1->from > intv2->from)
2333          return 1;
2334       else if (intv1->from < intv2->from)
2335          return -1;
2336       else
2337       {
2338          if (intv1->to > intv2->to)
2339             return 1;
2340          else
2341             return -1;
2342       }
2343    }
2344    return 0;
2345 }
2346 
2347 /* SECTION 2c */
2348 /***************************************************************************
2349 *
2350 *  AlnMgr2UsePrimsAlgorithm takes the set of edges and vertices produced by
2351 *  earlier functions and creates a subset of edges that can be made into
2352 *  a multiple alignment.
2353 *
2354 ***************************************************************************/
AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray,Int4 numvertices,AMEdgePtr edge_head)2355 static void AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head)
2356 {
2357    if (vertexarray == NULL || edge_head == NULL)
2358       return;
2359    edge_head->used = AM_USED;
2360    vertexarray[edge_head->vertex1]->used = TRUE;
2361    vertexarray[edge_head->vertex2]->used = TRUE;
2362    AlnMgr2RecursePrims(vertexarray, edge_head);
2363    AlnMgr2CleanUpLeftovers(vertexarray, numvertices, edge_head);
2364    return;
2365 }
2366 
2367 /* SECTION 2C */
AlnMgr2GetEdgeList(Int4 vertexnum,AMEdgePtr edge_head,AMEdgePtr already_used)2368 static AMEdgePtr AlnMgr2GetEdgeList(Int4 vertexnum, AMEdgePtr edge_head, AMEdgePtr already_used)
2369 {
2370    AMEdgePtr  edge;
2371    AMEdgePtr  list;
2372    AMEdgePtr  list_head;
2373    AMEdgePtr  list_prev;
2374 
2375    edge = edge_head;
2376    list_head = NULL;
2377    while (edge != NULL)
2378    {
2379       if ((edge->vertex1 == vertexnum || edge->vertex2 == vertexnum) && edge != already_used)
2380       {
2381          list = (AMEdgePtr)MemNew(sizeof(AMEdge));
2382          list->vertex1 = edge->vertex1;
2383          list->vertex2 = edge->vertex2;
2384          list->weight = edge->weight;
2385          list->used = edge->used;
2386          if (list_head != NULL)
2387          {
2388             list_prev->next = list;
2389             list_prev = list;
2390          } else
2391             list_head = list_prev = list;
2392       }
2393       edge = edge->next;
2394    }
2395    return list_head;
2396 }
2397 
2398 /* SECTION 2C */
2399 /***************************************************************************
2400 *
2401 *  AlnMgr2GetBetterVertex returns the vertex of the edge indicated that
2402 *  is shared by the largest number of other edges.
2403 *
2404 ***************************************************************************/
AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray,AMEdgePtr edge)2405 static AMVertexPtr AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray, AMEdgePtr edge)
2406 {
2407    if (vertexarray[edge->vertex1]->numedges >= vertexarray[edge->vertex2]->numedges)
2408       return vertexarray[edge->vertex1];
2409    else
2410       return vertexarray[edge->vertex2];
2411 }
2412 
2413 /* SECTION 2C */
2414 /***************************************************************************
2415 *
2416 *  AlnMgr2RecursePrims is a simple yet powerful algorithm that builds a
2417 *  minimal spanning tree of the edges and vertexes by starting with a set
2418 *  of edges and vertices, picking the best/shortest edge, then picking
2419 *  other edges one by one that join a vertex in the set with a vertex not
2420 *  in the set, until all edges are used (or deemed impossible).
2421 *
2422 ***************************************************************************/
AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray,AMEdgePtr edge_head)2423 static void AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head)
2424 {
2425    AMEdgePtr  edge;
2426    Boolean    found;
2427 
2428    edge = edge_head;
2429    found = FALSE;
2430    /* find an edge that isn't used, that joins a vertex in the set */
2431    /* with a vertex outside the set, and add it and the new vertex */
2432    while (edge != NULL && !found)
2433    {
2434       if (edge->used == AM_NOTUSED)
2435       {
2436          if (vertexarray[edge->vertex1]->used != vertexarray[edge->vertex2]->used)
2437          {
2438             found = TRUE;
2439             vertexarray[edge->vertex1]->used = TRUE;
2440             vertexarray[edge->vertex2]->used = TRUE;
2441             edge->used = AM_USED;
2442             AlnMgr2RecursePrims(vertexarray, edge_head);
2443          }
2444       }
2445       edge = edge->next;
2446    }
2447 }
2448 
2449 /* SECTION 2C */
2450 /***************************************************************************
2451 *
2452 *  AlnMgr2CleanUpLeftovers takes the edges that are unused after
2453 *  AlnMgr2RecursePrims and looks for edges that duplicate another edge in
2454 *  the set or edges that share a seqid (but not a vertex) with another edge
2455 *  already in the set. It adds these edges to the set; they don't belong
2456 *  there in tree-based terms but as alignments they are related.
2457 *
2458 ***************************************************************************/
AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray,Int4 numvertices,AMEdgePtr edge_head)2459 static void AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head)
2460 {
2461    AMEdgePtr  edge;
2462    AMEdgePtr  edge_tmp;
2463    Boolean    found;
2464    Int4       i;
2465    BoolPtr    tmpverts;
2466 
2467    tmpverts = (BoolPtr)MemNew(numvertices*sizeof(Boolean));
2468    for (i=0; i<numvertices; i++)
2469    {
2470       tmpverts[i] = vertexarray[i]->used;
2471    }
2472    edge = edge_head;
2473    while (edge != NULL)
2474    {
2475       if (edge->used == AM_NOTUSED)
2476       {
2477          if (tmpverts[edge->vertex1] == TRUE && tmpverts[edge->vertex2] == TRUE)
2478          {
2479             /* see if this edge duplicates another edge; if so, add it */
2480             edge_tmp = edge_head;
2481             found = FALSE;
2482             while (edge_tmp != NULL && !found)
2483             {
2484                if ((edge->vertex1 == edge_tmp->vertex1 && edge->vertex2 == edge_tmp->vertex2) || (edge->vertex1 == edge_tmp->vertex2 && edge->vertex2 == edge_tmp->vertex1))
2485                {
2486                   found = TRUE;
2487                   edge->used = AM_USED;
2488                }
2489                edge_tmp = edge_tmp->next;
2490             }
2491             if (!found)
2492                edge->used = AM_CONFLICT;
2493          } else if (tmpverts[edge->vertex1] == FALSE && tmpverts[edge->vertex2] == FALSE)
2494          {
2495             /* if one of the vertices shares a seqid with a vertex in the set, put both vertices */
2496             /* and the edge in the set. */
2497             found = FALSE;
2498             for (i=0; i<numvertices && !found; i++)
2499             {
2500                if (tmpverts[i] == TRUE && (SeqIdComp(vertexarray[i]->sip, vertexarray[edge->vertex1]->sip) == SIC_YES || SeqIdComp(vertexarray[i]->sip, vertexarray[edge->vertex2]->sip) == SIC_YES))
2501                {
2502                   found = TRUE;
2503                   vertexarray[edge->vertex1]->used = TRUE;
2504                   vertexarray[edge->vertex2]->used = TRUE;
2505                   edge->used = AM_USED;
2506                }
2507             }
2508             if (!found)
2509                edge->used = AM_CONFLICT;
2510          }
2511       }
2512       edge = edge->next;
2513    }
2514    MemFree(tmpverts);
2515 }
2516 
2517 /* SECTION 2C */
2518 /***************************************************************************
2519 *
2520 *  AlnMgr2SameSeq decides whether two vertices come from the same
2521 *  sequence (simple seqid compare).
2522 *
2523 ***************************************************************************/
AlnMgr2SameSeq(AMVertexPtr vertex1,AMVertexPtr vertex2)2524 static Boolean AlnMgr2SameSeq(AMVertexPtr vertex1, AMVertexPtr vertex2)
2525 {
2526    if (vertex1 == NULL || vertex2 == NULL)
2527       return FALSE;
2528    if (SeqIdComp(vertex1->sip, vertex2->sip) == SIC_YES)
2529       return TRUE;
2530    else
2531       return FALSE;
2532 }
2533 
2534 
2535 /* SECTION 2C */
2536 /***************************************************************************
2537 *
2538 *  AlnMgr2BuildAlignmentFromTree performs a breadth-first traversal of
2539 *  the tree, adding edges to the growing alignment as it goes.
2540 *
2541 ***************************************************************************/
AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray,Int4 numvertices,AMEdgePtr edge_head,SeqAlignPtr sap)2542 static void AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head, SeqAlignPtr sap)
2543 {
2544    AMAlignIndex2Ptr  amaip;
2545    AMVertexPtr      adj;
2546    AMVertexPtr      adj_head;
2547    AMEdgePtr        edge;
2548    Int4             i;
2549    Int4             j;
2550    AMQueuePtr       q;
2551    AMQueuePtr       q_head;
2552    AMQueuePtr       q_prev;
2553 
2554    amaip = (AMAlignIndex2Ptr)(sap->saip);
2555    AlnMgr2AddInNewPairwiseSA(sap, edge_head->sap);
2556    edge_head->aligned = TRUE;
2557    q_head = (AMQueuePtr)MemNew(sizeof(AMQueue));
2558    q_head->vertex = AlnMgr2GetBetterVertex(vertexarray, edge_head);
2559    q_head->vertex->visited = TRUE;
2560    /* unlink the vertices */
2561    for (i=0; i<numvertices; i++)
2562    {
2563       vertexarray[i]->next = NULL;
2564    }
2565    while (q_head != NULL)
2566    {
2567       q_prev = q_head;
2568       while (q_prev->next != NULL)
2569       {
2570          q_prev = q_prev->next;
2571       }
2572       adj_head = AlnMgr2GetAdjacentVertices(q_head->vertex, vertexarray, edge_head);
2573       adj = adj_head;
2574       while (adj != NULL)
2575       {
2576          if (adj->visited == FALSE)
2577          {
2578             edge = edge_head;
2579             while (edge != NULL)
2580             {
2581                /* if the edge is used in the tree but not yet aligned, and it's adjacent, align it */
2582                if (edge->aligned == FALSE && edge->used == AM_USED && ((AlnMgr2SameSeq(vertexarray[edge->vertex1], q_head->vertex) && AlnMgr2SameSeq(vertexarray[edge->vertex2], adj)) || (AlnMgr2SameSeq(vertexarray[edge->vertex1], adj) && AlnMgr2SameSeq(vertexarray[edge->vertex2], q_head->vertex))))
2583                {
2584                  AlnMgr2AddInNewPairwiseSA(sap, edge->sap);
2585                  edge->aligned = TRUE;
2586                }
2587                edge = edge->next;
2588             }
2589             q = (AMQueuePtr)MemNew(sizeof(AMQueue));
2590             q->vertex = adj;
2591             q_prev->next = q;
2592             q_prev = q;
2593             adj->visited = TRUE;
2594          }
2595          adj = adj->next;
2596       }
2597       q = q_head->next;
2598       MemFree(q_head);
2599       q_head = q;
2600       if (q_head == NULL) /* look for discontinuous sets -- those will be left over */
2601       {
2602          edge = edge_head;
2603          while (edge != NULL && q_head == NULL)
2604          {
2605             if (edge->aligned == FALSE && (vertexarray[edge->vertex1]->visited == FALSE || vertexarray[edge->vertex2]->visited == FALSE))
2606             {
2607                q_head = (AMQueuePtr)MemNew(sizeof(AMQueue));
2608                q_head->vertex = AlnMgr2GetBetterVertex(vertexarray, edge);
2609                vertexarray[edge->vertex1]->visited = vertexarray[edge->vertex2]->visited = TRUE;
2610             }
2611             edge = edge->next;
2612          }
2613       }
2614    }
2615 /* now the vertices are no longer in a linked list -> put them back together */
2616    for (j=0; j<i-1; j++)
2617    {
2618       vertexarray[j]->next = vertexarray[j+1];
2619       vertexarray[j+1]->next = NULL;
2620    }
2621    AlnMgr2CondenseColumns((DenseSegPtr)(amaip->sharedaln->segs));
2622    AlnMgr2IndexSingleChildSeqAlign(amaip->sharedaln);
2623 }
2624 
2625 /* SECTION 2c */
2626 /***************************************************************************
2627 *
2628 *  AlnMgr2GetAdjacentVertices returns a linked list of all vertices which
2629 *  are adjacent to the given edge; that is, it returns a list of all
2630 *  vertices which are linked by an edge to either vertex of the given edge.
2631 *
2632 ***************************************************************************/
AlnMgr2GetAdjacentVertices(AMVertexPtr vertex,AMVertexPtr PNTR vertexarray,AMEdgePtr edge_head)2633 static AMVertexPtr AlnMgr2GetAdjacentVertices(AMVertexPtr vertex, AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head)
2634 {
2635    AMVertexPtr  adj_head;
2636    AMVertexPtr  adj_prev;
2637    AMEdgePtr    edge;
2638 
2639    edge = edge_head;
2640    adj_head = adj_prev = NULL;
2641    while (edge != NULL)
2642    {
2643       if (AlnMgr2SameSeq(vertexarray[edge->vertex1], vertex))
2644       {
2645          if (adj_head == NULL)
2646             adj_head = adj_prev = vertexarray[edge->vertex2];
2647          else
2648          {
2649             adj_prev->next = vertexarray[edge->vertex2];
2650             adj_prev = adj_prev->next;
2651          }
2652       } else if (AlnMgr2SameSeq(vertexarray[edge->vertex2], vertex))
2653       {
2654          if (adj_head == NULL)
2655             adj_head = adj_prev = vertexarray[edge->vertex1];
2656          else
2657          {
2658             adj_prev->next = vertexarray[edge->vertex1];
2659             adj_prev = adj_prev->next;
2660          }
2661       }
2662       if (adj_prev != NULL)
2663          adj_prev->next = NULL;
2664       edge = edge->next;
2665    }
2666    return adj_head;
2667 }
2668 
2669 /* SECTION 2c */
2670 
AlnMgr2GetFirstRowForSeqId(DenseSegPtr dsp,SeqIdPtr sip,Uint1 strand,Int4Ptr row_curr,SeqIdPtr PNTR sip_curr)2671 static Boolean AlnMgr2GetFirstRowForSeqId(
2672   DenseSegPtr dsp,
2673   SeqIdPtr sip,
2674   Uint1 strand,
2675   Int4Ptr row_curr,
2676   SeqIdPtr PNTR sip_curr)
2677 {
2678   Boolean found = FALSE;
2679 
2680   while (*sip_curr) {
2681     (*row_curr)++;
2682     if (SeqIdComp(sip, *sip_curr) == SIC_YES &&
2683         strand == dsp->strands[*row_curr]) {
2684       found = TRUE;
2685     }
2686     *sip_curr = (*sip_curr)->next;
2687     if (found) return TRUE;
2688   }
2689   return FALSE;
2690 }
2691 
2692 
AlnMgr2CreateSeqPieceSet(DenseSegPtr dsp,Int4 row)2693 static AMSeqPieceSetPtr AlnMgr2CreateSeqPieceSet(DenseSegPtr dsp, Int4 row)
2694 {
2695   AMSeqPieceSetPtr s_set = (AMSeqPieceSetPtr)MemNew(sizeof(AMSeqPieceSet));
2696   AMSeqPiecePtr s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2697   s->beg = -1;
2698   s->end = -1;
2699   s->seg = -1;
2700   s->pos = row - dsp->dim;
2701   s->set = s_set;
2702   s->prev = NULL;
2703   s->next = NULL;
2704   s->left = -1;
2705   s->right = -1;
2706   s->orig_left = -2;
2707   s->orig_right = -2;
2708   s->aligned = FALSE;
2709   s->alt_dsp = NULL;
2710   s->alt_seg = -1;
2711   s->alt_pos = -1;
2712 
2713   s->next = NULL;
2714 
2715   s_set->dsp = dsp;
2716   s_set->row = row;
2717   s_set->row2 = -1;
2718   s_set->alt_row = -1;
2719   s_set->alt_row2 = -1;
2720   s_set->head = s;
2721   s_set->tail = s;
2722   s_set->max_pos = dsp->dim * dsp->numseg;
2723   s_set->strand = dsp->strands[row];
2724   s_set->plus = s_set->strand != Seq_strand_minus;
2725   s_set->next = NULL;
2726 
2727   return s_set;
2728 }
2729 
AlnMgr2GetNextSeqPiece(AMSeqPiecePtr s)2730 static AMSeqPiecePtr AlnMgr2GetNextSeqPiece(AMSeqPiecePtr s)
2731 {
2732   DenseSegPtr dsp;
2733   Int4 max_pos;
2734   AMSeqPiecePtr s_new;
2735 
2736   dsp = s->set->dsp;
2737   max_pos = s->set->max_pos;
2738 
2739   if (s->pos < max_pos) {
2740     s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2741     s_new->pos = s->pos + dsp->dim;
2742     s_new->seg = s->seg + 1;
2743     s_new->set = s->set;
2744     s_new->prev = s;
2745     s = s->next = s_new;
2746     s->set->tail = s;
2747 
2748     s->next = NULL;
2749 
2750     /* initialize the following */
2751     s->left = -1;
2752     s->right = -1;
2753     s->aligned = FALSE;
2754     s->alt_dsp = NULL;
2755     s->alt_seg = -1;
2756     s->alt_pos = -1;
2757     s->orig_left = -2;
2758     s->orig_right = -2;
2759 
2760     /* find the beg and end */
2761     while (s->pos < max_pos) {
2762       if (dsp->starts[s->pos] != -1) {
2763         s->beg = s->end = dsp->starts[s->pos];
2764         if (s->set->plus) {
2765           s->end += dsp->lens[s->seg] - 1;
2766         } else {
2767           s->beg += dsp->lens[s->seg] - 1;
2768         }
2769         return s;
2770       } else {
2771         s->seg++;
2772         s->pos += dsp->dim;
2773       }
2774     }
2775     s->beg = -1;
2776     s->end = -1;
2777     return s;
2778   }
2779   return NULL;
2780 }
2781 
AlnMgr2GetNextLimitedSeqPiece(AMSeqPiecePtr s,AMSeqPiecePtr right)2782 static AMSeqPiecePtr AlnMgr2GetNextLimitedSeqPiece(
2783   AMSeqPiecePtr s,
2784   AMSeqPiecePtr right)
2785 {
2786   DenseSegPtr dsp;
2787   Int4 new_pos, new_seg, max_pos, max_seg;
2788   AMSeqPiecePtr s_new;
2789 
2790   AMSeqPiecePtr left = right->prev;
2791 
2792   dsp = s->set->dsp;
2793   max_pos = s->set->max_pos;
2794   max_seg = right->seg;
2795   new_pos = s->pos + dsp->dim;
2796   new_seg = s->seg + 1;
2797 
2798   while (new_pos < max_pos && new_seg <= max_seg) {
2799     if (dsp->starts[new_pos] != -1) {
2800       s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2801       s_new->pos = new_pos;
2802       s_new->seg = new_seg;
2803       s_new->set = s->set;
2804       s_new->next = NULL;
2805       s_new->prev = s;
2806       s = s->next = s_new;
2807       s->set->tail = s;
2808       s->beg = s->end = dsp->starts[s->pos];
2809       if (s->set->plus) {
2810         s->end += dsp->lens[s->seg] - 1;
2811       } else {
2812         s->beg += dsp->lens[s->seg] - 1;
2813       }
2814       /* aligned to a sequence in anchor or not */
2815       if (s->seg == right->seg) {
2816         s->aligned = TRUE;
2817         s->left = right->beg;
2818         s->right = right->end;
2819       } else {
2820         s->aligned = FALSE;
2821         s->left = left->end;
2822         s->right = right->beg;
2823       }
2824       /* these are not yet used */
2825       s->orig_left = -2;
2826       s->orig_right = -2;
2827       s->alt_dsp = NULL;
2828       s->alt_seg = -1;
2829       s->alt_pos = -1;
2830       return s;
2831     }
2832     new_pos += dsp->dim;
2833     new_seg++;
2834   }
2835   return NULL;
2836 }
2837 
AlnMgr2AddSeqPiece(AMSeqPieceSetPtr set,AMSeqPiecePtr what)2838 static void AlnMgr2AddSeqPiece(
2839   AMSeqPieceSetPtr set,
2840   AMSeqPiecePtr what)
2841 {
2842   AMSeqPiecePtr s;
2843   DenseSegPtr dsp = set->dsp;
2844   DenseSegPtr alt_dsp = what->set->dsp;
2845 
2846   s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2847   s->beg = what->beg;
2848   s->end = what->end;
2849 
2850   if (alt_dsp == dsp) {
2851     s->seg = what->seg;
2852     s->pos = what->pos;
2853     s->alt_dsp = NULL;
2854     s->alt_seg = -1;
2855     s->alt_pos = -1;
2856   } else {
2857     s->seg = -1;
2858     s->pos = -1;
2859     s->alt_dsp = alt_dsp;
2860     s->alt_seg = what->seg;
2861     s->alt_pos = what->pos;
2862   }
2863   s->left = what->left;
2864   s->right = what->right;
2865   s->orig_left = what->orig_left;
2866   s->orig_right = what->orig_right;
2867   s->aligned = what->aligned;
2868   s->set = set;
2869   s->next = NULL;
2870   if ((s->prev = set->tail) != NULL) {
2871     s->prev->next = s;
2872   }
2873   set->tail = s;
2874 }
2875 
AlnMgr2InsertSeqPiece(AMSeqPiecePtr where,AMSeqPiecePtr what,Int4 end)2876 static void AlnMgr2InsertSeqPiece(
2877   AMSeqPiecePtr where,
2878   AMSeqPiecePtr what,
2879   Int4 end)
2880 {
2881   AMSeqPiecePtr s;
2882   DenseSegPtr dsp = where->set->dsp;
2883   DenseSegPtr alt_dsp = what->set->dsp;
2884 
2885 
2886   s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2887   s->beg = what->beg;
2888   s->end = end;
2889 
2890   if (where->beg == what->beg) {
2891     s->seg = where->seg;
2892     s->pos = where->pos;
2893     where->beg = end + (where->set->plus? 1 : -1);
2894     if (alt_dsp == dsp) {
2895       s->alt_dsp = NULL;
2896       s->alt_seg = -1;
2897       s->alt_pos = -1;
2898     } else {
2899       s->alt_dsp = alt_dsp;
2900       s->alt_seg = what->seg;
2901       s->alt_pos = what->pos;
2902     }
2903   } else {
2904     if (alt_dsp == dsp) {
2905       s->seg = what->seg;
2906       s->pos = what->pos;
2907       s->alt_dsp = NULL;
2908       s->alt_seg = -1;
2909       s->alt_pos = -1;
2910     } else {
2911       s->seg = -1;
2912       s->pos = -1;
2913       s->alt_dsp = alt_dsp;
2914       s->alt_seg = what->seg;
2915       s->alt_pos = what->pos;
2916     }
2917   }
2918   s->left = what->left;
2919   s->right = what->right;
2920   s->orig_left = what->orig_left;
2921   s->orig_right = what->orig_right;
2922   s->aligned = what->aligned;
2923   s->set = where->set;
2924   s->next = where;
2925   if ((s->prev = where->prev) != NULL) {
2926     if (s->prev) {
2927       s->prev->next = s;
2928     } else {
2929       if (s->set->head == where) {
2930         s->set->head = s;
2931       }
2932     }
2933     where->prev = s;
2934   }
2935 }
2936 
AlnMgr2CopySeg(DenseSegPtr DSP,Int4 PNTR SEG_ptr,Int4 PNTR POS_ptr,DenseSegPtr Dsp,Int4 PNTR Seg_ptr,Int4 PNTR Pos_ptr,AMSeqPiecePtr PNTR s_ptr)2937 static void AlnMgr2CopySeg(
2938   DenseSegPtr DSP,
2939   Int4 PNTR SEG_ptr,
2940   Int4 PNTR POS_ptr,
2941   DenseSegPtr Dsp,
2942   Int4 PNTR Seg_ptr,
2943   Int4 PNTR Pos_ptr,
2944   AMSeqPiecePtr PNTR s_ptr)
2945 {
2946   Int4 i, rdelta, ldelta, POS, Pos, max_Pos, pos2, alt_pos2, SEG, Seg,
2947     beg, end;
2948   AMSeqPiecePtr s;
2949   Boolean plus;
2950 
2951   POS = *POS_ptr; Pos = *Pos_ptr;
2952   SEG = *SEG_ptr; Seg = *Seg_ptr;
2953   s = *s_ptr;
2954 
2955   if (s->set->row != s->set->row2) { /* if not a B */
2956     if (!(s->next)) {
2957       *s_ptr = NULL;
2958       return; /* skip the last A */
2959     }
2960   }
2961 
2962   max_Pos = POS+Dsp->dim;
2963 
2964   DSP->lens[SEG] = ABS(s->end - s->beg) + 1;
2965 
2966   if (s->set->dsp != Dsp) { /* the extra row for the non-anchor seq */
2967     for (i = 0; POS < max_Pos; POS++, i++) {
2968       DSP->starts[POS] = -1;
2969       DSP->strands[POS] = Dsp->strands[i];
2970     }
2971     DSP->starts[POS] = MIN(s->beg, s->end);
2972     DSP->strands[POS] = s->set->strand;
2973     POS++;
2974 
2975   } else { /* not dealing with the extra row itself */
2976 
2977     if (s->pos >= 0 && s->set->row != s->set->row2) { /* Dsp involved */
2978       beg = end = s->set->dsp->starts[s->pos];
2979       if (s->set->plus) {
2980         end += s->set->dsp->lens[s->seg]-1;
2981       } else {
2982         beg += s->set->dsp->lens[s->seg]-1;
2983       }
2984       if (ldelta = ABS(s->beg - beg)) {
2985         /* need to "continue" from the orig seg */
2986         Pos = s->pos - s->set->row;
2987         Seg = s->seg;
2988       }
2989       rdelta = ABS(end - s->end);
2990 
2991       for (; POS < max_Pos; POS++, Pos++) {
2992         DSP->strands[POS] = Dsp->strands[Pos];
2993         plus = DSP->strands[POS] != Seq_strand_minus;
2994         if (Dsp->starts[Pos] != -1) {
2995           DSP->starts[POS] = Dsp->starts[Pos] + (plus ? ldelta : rdelta);
2996         } else {
2997           DSP->starts[POS] = -1;
2998         }
2999       }
3000       if (ldelta) {
3001         /* restore these */
3002         Pos = *Pos_ptr;
3003         Seg = *Seg_ptr;
3004       } else {
3005         Seg++;
3006       }
3007 
3008       if (s->alt_dsp) { /* dsp involved too */
3009         alt_pos2 =
3010           s->alt_pos + s->set->alt_row2 - s->set->alt_row;
3011         beg = end = s->alt_dsp->starts[s->alt_pos];
3012         if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
3013           beg += s->alt_dsp->lens[s->alt_seg]-1;
3014         } else {
3015           end += s->alt_dsp->lens[s->alt_seg]-1;
3016         }
3017         ldelta = ABS(s->beg - beg);
3018         rdelta = ABS(end - s->end);
3019 
3020         if (s->set->row2 != -1) { /* 2nd row merged*/
3021           pos2 = POS - DSP->dim + s->set->row2;
3022         } else { /* extra row */
3023           pos2 = POS;
3024           POS++;
3025         }
3026         DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
3027         plus = DSP->strands[pos2] != Seq_strand_minus;
3028         if (s->alt_dsp->starts[alt_pos2] != -1) {
3029           DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
3030             (plus ? ldelta : rdelta);
3031         } else {
3032           DSP->starts[pos2] = -1;
3033         }
3034       } else { /* dsp not involved */
3035         if (s->set->row2 == -1) { /* 2nd row not merged */
3036           DSP->starts[POS] = -1;
3037           DSP->strands[POS] =
3038             s->set->alt_dsp->strands[s->set->alt_row2];
3039           POS++;
3040         }
3041       }
3042     } else { /* Dsp not involved */
3043       for (i = 0; POS < max_Pos; POS++, i++) {
3044         DSP->starts[POS] = -1;
3045         DSP->strands[POS] = Dsp->strands[i];
3046       }
3047       if (s->set->row == s->set->row2) { /* if a B */
3048         if (!(s->alt_dsp)) {
3049           Pos += s->set->dsp->dim; /* move to next seg */
3050           Seg++;
3051         }
3052       } else { /* not a B */
3053         alt_pos2 =
3054           s->alt_pos + s->set->alt_row2 - s->set->alt_row;
3055 
3056         beg = end = s->alt_dsp->starts[s->alt_pos];
3057         if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
3058           beg += s->alt_dsp->lens[s->alt_seg]-1;
3059         } else {
3060           end += s->alt_dsp->lens[s->alt_seg]-1;
3061         }
3062         ldelta = ABS(s->beg - beg);
3063         rdelta = ABS(end - s->end);
3064 
3065         if (s->set->row2 != -1) { /* merged row2 */
3066           pos2 = POS - DSP->dim + s->set->row2;
3067         } else {
3068           pos2 = POS;
3069           POS++;
3070         }
3071         DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
3072         plus = DSP->strands[pos2] != Seq_strand_minus;
3073         if (s->alt_dsp->starts[alt_pos2] != -1) {
3074           DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
3075             (plus ? ldelta : rdelta);
3076         } else {
3077           DSP->starts[pos2] = -1;
3078         }
3079       }
3080       DSP->starts[POS + s->set->row - DSP->dim] = MIN(s->beg, s->end);
3081     }
3082   }
3083   (*SEG_ptr)++;
3084   *Seg_ptr = Seg;
3085   *s_ptr = (*s_ptr)->next;
3086   *POS_ptr = POS;
3087   *Pos_ptr = Pos;
3088 }
3089 
AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent,SeqAlignPtr sap)3090 NLM_EXTERN void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap)
3091 {
3092   AMAlignIndex2Ptr amaip;
3093   DenseSegPtr dsp, Dsp, DSP;
3094   Int4 Seg, SEG;
3095   Int4 Pos, POS, max_POS;
3096   Int4 A_end, B_beg;
3097   Int4 anchor, Anchor;
3098   Int4 row;
3099   SeqIdPtr sip, extra_sip;
3100   AMSeqPieceSetPtr a_set, A_set, b_set, B_set_head, B_set;
3101   AMSeqPiecePtr a, A, b, B;
3102   Boolean conflict;
3103   Boolean a_plus, b_plus;
3104   Int4 upper_limit;
3105   Int4 extra_segs;
3106 
3107   dsp = (DenseSegPtr)(sap->segs);
3108   if (dsp->dim != 2) {
3109     if (dsp->dim == 0) {
3110       dsp->dim = 2; /* set to default */
3111     } else {
3112       ErrPostEx(SEV_ERROR, 0,0,
3113                 "AlnMgr2AddInNewPairwiseSA: dsp->dim (=%d) should be 2.",
3114                 dsp->dim);
3115       return;
3116     }
3117   }
3118   if (dsp->numseg < 1) {
3119     ErrPostEx(SEV_ERROR, 0,0,
3120               "AlnMgr2AddInNewPairwiseSA: dsp->numseg (=%d) should be > 0.",
3121               dsp->numseg);
3122     return;
3123   }
3124 
3125   amaip = (AMAlignIndex2Ptr)(parent->saip);
3126   if (amaip->sharedaln == NULL) {/* first alignment to be added */
3127     SeqAlignPtr salp;
3128     Int4 i;
3129 
3130     salp = SeqAlignDup(sap);
3131     AlnMgr2IndexSingleChildSeqAlign(salp);
3132     amaip->sharedaln = salp;
3133     amaip->numrows = dsp->dim;
3134     sip = dsp->ids;
3135     amaip->ids = (SeqIdPtr PNTR)MemNew((dsp->dim)*sizeof(SeqIdPtr));
3136     i = 0;
3137     while (sip != NULL) {
3138       amaip->ids[i] = SeqIdDup(sip);
3139       sip = sip->next;
3140       i++;
3141     }
3142     MemFree(amaip->saps);
3143     amaip->saps = (SeqAlignPtr PNTR)MemNew(sizeof(SeqAlignPtr));
3144     amaip->saps[0] = sap;
3145     amaip->numsaps = 1;
3146     MemFree(amaip->aligned);
3147     amaip->aligned =  (Boolean PNTR) MemNew(sizeof(Boolean));
3148     amaip->aligned[0] = TRUE;
3149 
3150     return;
3151   }
3152 
3153   /* add the new sap */
3154   amaip->numsaps++;
3155   amaip->saps = (SeqAlignPtr PNTR) MemMore
3156     (amaip->saps, amaip->numsaps*sizeof(SeqAlignPtr));
3157   amaip->saps[amaip->numsaps-1] = sap;
3158   amaip->aligned = (Boolean PNTR) MemMore
3159     (amaip->aligned, (amaip->numsaps)*sizeof(Boolean));
3160   amaip->aligned[amaip->numsaps-1] = TRUE;
3161 
3162   Dsp = (DenseSegPtr)(amaip->sharedaln->segs);
3163 
3164   AlnMgr2GetFirstSharedRow(amaip->sharedaln, sap, &Anchor, &anchor);
3165 
3166   {{ /* make sure the shared rows are on the same strand */
3167     Uint1 Strand, strand;
3168 
3169     Strand = AlnMgr2GetNthStrand(amaip->sharedaln, Anchor);
3170     if (Strand == Seq_strand_unknown)
3171       Strand = Seq_strand_plus;
3172     strand = AlnMgr2GetNthStrand(sap, anchor);
3173     if (strand == Seq_strand_unknown)
3174       strand = Seq_strand_plus;
3175     if (Strand != strand) {
3176       SeqAlignListReverseStrand(sap);
3177       SAIndex2Free2(sap->saip);
3178       sap->saip = NULL;
3179       AlnMgr2IndexSingleChildSeqAlign(sap);
3180       dsp = (DenseSegPtr)(sap->segs);
3181       strand = AlnMgr2GetNthStrand(sap, anchor);
3182       if (strand == Seq_strand_unknown)
3183         strand = Seq_strand_plus;
3184     }
3185     a_plus = strand != Seq_strand_minus;
3186   }}
3187   anchor--; Anchor--; /* make them 0-based */
3188 
3189   /* create new dsp */
3190   DSP = DenseSegNew();
3191   DSP->numseg = Dsp->numseg;
3192   DSP->dim = Dsp->dim;
3193 /*   DSP->ids = SeqIdDupList(Dsp->ids); */
3194 
3195   /* collect other shared seqids */
3196   b_set = B_set = B_set_head = NULL;
3197   row = -1; sip = Dsp->ids;
3198   extra_sip = dsp->ids;
3199   if (anchor == 0) {
3200     extra_sip = extra_sip->next;
3201   }
3202   while (AlnMgr2GetFirstRowForSeqId
3203          (Dsp, extra_sip, dsp->strands[1-anchor], &row, &sip)) {
3204     if (B_set) {
3205       B_set->next = AlnMgr2CreateSeqPieceSet(Dsp, row);
3206       B_set = B_set->next;
3207     } else {
3208       B_set = B_set_head = AlnMgr2CreateSeqPieceSet(Dsp, row);
3209     }
3210   }
3211   b_plus = dsp->strands[1-anchor] != Seq_strand_minus;
3212 
3213   /* ids */
3214   DSP->ids = Dsp->ids;
3215   Dsp->ids = NULL;
3216 
3217   /* collect a, b */
3218   a_set = AlnMgr2CreateSeqPieceSet(dsp, anchor);
3219   a = a_set->head;
3220   b_set = AlnMgr2CreateSeqPieceSet(dsp, 1-anchor);
3221   while (a = AlnMgr2GetNextSeqPiece(a)) {
3222     b = b_set->tail;
3223     while (b = AlnMgr2GetNextLimitedSeqPiece(b, a)) {
3224       if (!b->aligned) {
3225         DSP->numseg++;
3226       }
3227     }
3228   }
3229 
3230   /* collect A, B */
3231   A_set = AlnMgr2CreateSeqPieceSet(Dsp, Anchor);
3232   A = A_set->head;
3233   while (A = AlnMgr2GetNextSeqPiece(A)) {
3234     B_set = B_set_head;
3235     while (B_set) {
3236       B = B_set->tail;
3237       while (B = AlnMgr2GetNextLimitedSeqPiece(B, A)) {};
3238       B_set=B_set->next;
3239     }
3240   }
3241 
3242   /* resolve a, A */
3243   A_set->alt_row = a_set->row;
3244   a = a_set->head->next;
3245   A = A_set->head->next;
3246   while (a && A && a->next && A->next) {
3247     if (a_plus ? a->beg < A->beg : a->beg > A->beg) {
3248       AlnMgr2InsertSeqPiece
3249         (A, a, a_plus ? MIN(a->end, A->beg-1) : MAX(a->end, A->beg+1));
3250       DSP->numseg++;
3251       if (a_plus ? a->end < A->beg : a->end > A->beg) {
3252         a = a->next;
3253       } else {
3254         a->beg = A->beg;
3255       }
3256     } else if (a_plus ? A->beg < a->beg : A->beg > a->beg) {
3257       if (a_plus ? A->end < a->beg : A->end > a->beg) {
3258         A = A->next;
3259       } else {
3260         AlnMgr2InsertSeqPiece(A, A, a_plus ? a->beg - 1 : a->beg + 1);
3261         DSP->numseg++;
3262       }
3263     } else { /* a->beg == A->beg */
3264       if (a_plus ? a->end < A->end : a->end > A->end) {
3265         AlnMgr2InsertSeqPiece(A, a, a->end);
3266         DSP->numseg++;
3267         a = a->next;
3268       } else if (a_plus ? a->end > A->end : a->end < A->end) {
3269         a->beg = A->end + (a_plus ? 1 : -1);
3270         A->alt_dsp = a->set->dsp;
3271         A->alt_seg = a->seg;
3272         A->alt_pos = a->pos;
3273         A = A->next;
3274       } else { /* a->end == A->end */
3275         A->alt_dsp = a->set->dsp;
3276         A->alt_seg = a->seg;
3277         A->alt_pos = a->pos;
3278         a = a->next;
3279         A = A->next;
3280       }
3281     }
3282   }
3283   while (a && a->next) {
3284     AlnMgr2InsertSeqPiece(A, a, a->end);
3285     DSP->numseg++;
3286     a = a->next;
3287   }
3288 
3289   /* set the upper limits */
3290   if (B_set_head) {
3291     if (a_plus) {
3292       upper_limit =
3293         A_set->tail->end = A_set->tail->beg = A_set->tail->prev->end + 1;
3294 
3295       b = b_set->tail;
3296       while (b && b->right == -1) {
3297         b->right = upper_limit;
3298         b = b->prev;
3299       }
3300 
3301       B_set = B_set_head;
3302       while (B_set) {
3303         B = B_set->tail;
3304         while (B && B->right == -1) {
3305           B->right = upper_limit;
3306           B = B->prev;
3307         }
3308         B_set = B_set->next;
3309       }
3310 
3311     } else {
3312       upper_limit =
3313         A_set->head->beg = A_set->head->end = A_set->head->next->beg + 1;
3314 
3315       b = b_set->head;
3316       while (b && b->left == -1) {
3317         b->left = upper_limit;
3318         b = b->next;
3319       }
3320 
3321       B_set = B_set_head;
3322       while (B_set) {
3323         B = B_set->head;
3324         while (B && B->left == -1) {
3325           B->left = upper_limit;
3326           B = B->next;
3327         }
3328         B_set = B_set->next;
3329       }
3330 
3331     }
3332   }
3333 
3334   /* try to resolve b, B */
3335   if (B_set_head) {
3336     b = b_set->head->next;
3337     B_set = B_set_head;
3338     while (B_set) {
3339       B = B_set->head->next;
3340       conflict = FALSE;
3341       extra_segs = 0;
3342       while (b && B) {
3343         if (b_plus ? b->beg < B->beg : b->beg > B->beg) {
3344           if (b_plus ? b->end < B->beg : b->end > B->beg) {
3345             /* trim the limits */
3346             if (a_plus ? B->left <= b->left : B->left >= b->left) {
3347               if (a_plus ? B->right < b->left : B->right > b->left) {
3348                 conflict = TRUE; break;
3349               } else {
3350                 if (B->aligned) {
3351                   conflict = TRUE; break; /* no trimming allowed */
3352                 } else {
3353                   B->left = b->left;
3354                 }
3355               }
3356               if (a_plus ? b->right > B->right : b->right < B->right) {
3357                 if (b->aligned) {
3358                   conflict = TRUE; break; /* no trimming allowed */
3359                 } else {
3360                   b->orig_right = b->right; /* for recovering */
3361                   b->right = B->right;
3362                 }
3363               }
3364             }
3365             AlnMgr2InsertSeqPiece(B, b, b->end);
3366             if (!(b->aligned)) extra_segs++;
3367             b = b->next;
3368           } else {
3369             conflict = TRUE; break;
3370           }
3371 
3372         } else if (b_plus ? B->beg < b->beg : B->beg > b->beg) {
3373           if (b_plus ? B->end < b->beg : B->end > b->beg) {
3374             /* trim the limits */
3375             if (a_plus ? b->left < B->left : b->left > B->left) {
3376               if (a_plus ? b->right < B->left : b->right > B->left) {
3377                 conflict = TRUE; break;
3378               } else {
3379                 if (b->aligned) {
3380                   conflict = TRUE; break; /* no trimming allowed */
3381                 } else {
3382                   b->orig_left = b->left; /* for recovering */
3383                   b->left = B->left;
3384                 }
3385               }
3386               if (a_plus ? B->right > b->right : B->right < b->right) {
3387                 if (B->aligned) {
3388                   conflict = TRUE; break; /* no trimming allowed */
3389                 } else {
3390                   B->right = b->right;
3391                 }
3392               }
3393             }
3394 
3395             B = B->next;
3396 
3397           } else {
3398             conflict = TRUE; break;
3399           }
3400         } else { /* B->beg == b->beg */
3401           conflict = TRUE; break;
3402         }
3403       }
3404       if (!conflict) {
3405         while (b) {
3406           AlnMgr2AddSeqPiece(B_set, b);
3407           if (!(b->aligned)) extra_segs++;
3408           b = b->next;
3409         }
3410 /*         DSP->numseg += extra_segs; */
3411         break;
3412       }
3413       /* conflict, roll back b, recovering limits, try next B */
3414       if (!b) {
3415         b = b_set->tail;
3416       }
3417       while (b) {
3418         if (b->orig_left != -2) {
3419           b->left = b->orig_left;
3420         }
3421         if (b->orig_right != -2) {
3422           b->right = b->orig_right;
3423         }
3424         b = b->prev;
3425       }
3426       b = b_set->head->next;
3427       B_set = B_set->next;
3428     }
3429   }
3430   if (B_set) {  /* B_set has no conflict with b_set */
3431     B = B_set->head->next;
3432     B_set->row2 = B_set->row; /* mark the set */
3433     A_set->row2 = B_set->row;
3434     A_set->alt_row2 = b_set->row;
3435   } else {  /* this mean extra row */
3436     A_set->row2 = -1;
3437     A_set->alt_row2 = b_set->row;
3438     A_set->alt_dsp = b_set->dsp;
3439     DSP->dim++;
3440     sip = DSP->ids;
3441     while (sip->next) {
3442       sip = sip->next;
3443     }
3444     AddSeqId(&sip, extra_sip);
3445 
3446     /* fix the index too */
3447     amaip->numrows = DSP->dim;
3448     amaip->ids = (SeqIdPtr PNTR)MemMore
3449       (amaip->ids,amaip->numrows*sizeof(SeqIdPtr));
3450     amaip->ids[amaip->numrows-1] = SeqIdDup(extra_sip);
3451 
3452     b_set->row2 = b_set->row; /* mark the set */
3453     B = b_set->head->next;
3454     B_beg = -1; /* nothing to comp Bs to */
3455   }
3456 
3457   /* allocate memory for the new sharedaln matrix */
3458   DSP->starts = (Int4Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Int4));
3459   DSP->strands = (Uint1Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Uint1));
3460   DSP->lens = (Int4Ptr)MemNew(DSP->numseg * sizeof(Int4));
3461 
3462   /* loop through segments */
3463   POS = 0; Pos = 0; Seg = 0; SEG = 0;
3464   A = A_set->head->next;
3465   while (Seg < Dsp->numseg) {
3466 
3467     A_end = Dsp->starts[Pos+A_set->row];
3468     if (a_plus && A_end >= 0) {
3469       A_end += Dsp->lens[Seg] - 1;
3470     }
3471     if (B_set) {
3472       B_beg = Dsp->starts[Pos+B_set->row];
3473     }
3474 
3475     if (A_end >= 0) {
3476       while (A && (a_plus ? A->end <= A_end : A->end >= A_end)) {
3477         while (B && (a_plus ? B->left < A->beg : B->left > A->beg)) {
3478           if (B->aligned) {
3479             B = B->next;
3480             break; /* the aligned piece should be last */
3481           } else {
3482             AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3483           }
3484         }
3485         if (B && B->aligned && B->left == A->beg) {
3486           B = B->next;
3487         }
3488         AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3489       }
3490     } else if (B && B_beg >= 0) {
3491       while (B && (b_plus ? B->beg <= B_beg : B->beg >= B_beg)) {
3492         while (A && (a_plus ? A->beg <= B->left : A->beg >= B->left)) {
3493           AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3494         }
3495         if (B->aligned) {
3496           B = B->next;
3497         } else {
3498           AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3499         }
3500       }
3501     } else {
3502       /* just copy the Dsp segment */
3503       DSP->lens[SEG] = Dsp->lens[Seg];
3504       max_POS = POS + Dsp->dim;
3505       for (; POS < max_POS; POS++, Pos++) {
3506         DSP->starts[POS] = Dsp->starts[Pos];
3507         DSP->strands[POS] = Dsp->strands[Pos];
3508       }
3509       if (DSP->dim > Dsp->dim) {
3510         DSP->starts[POS] = -1;
3511         DSP->strands[POS] = dsp->strands[1-anchor];
3512         POS++;
3513       }
3514       SEG++;
3515       Seg++;
3516     }
3517   }
3518   while (A) {
3519     while (B && (a_plus ? B->right <= A->beg : B->right >= A->beg)) {
3520       if (B->aligned) {
3521         B = B->next;
3522       } else {
3523         AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3524       }
3525     }
3526     AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3527   }
3528   while (B) {
3529     if (B->aligned) {
3530       B = B->next;
3531     } else {
3532       AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3533     }
3534   }
3535 
3536   /* Done */
3537   AMSeqPieceSetFree(A_set);
3538   AMSeqPieceSetFree(a_set);
3539   AMSeqPieceSetFree(B_set_head);
3540   AMSeqPieceSetFree(b_set);
3541 
3542   amaip->sharedaln->segs = DSP;
3543   /* update the dim for the shared_aln to match the new DensegPtr */
3544   amaip->sharedaln->dim = DSP->dim;
3545 
3546   DenseSegFree(Dsp);
3547 }
3548 
3549 /***************************************************************************
3550 *
3551 *  AlnMgr2AddInNewSA adds a seqalign to an existing seqalign. The new
3552 *  seqalign must share at least one row with the existing seqalign. The
3553 *  new, combined dense-seg structure is computed, and then it is condensed
3554 *  using AlnMgr2CondenseRows to make sure that there are no superfluous rows.
3555 *
3556 ***************************************************************************/
AlnMgr2AddInNewSA(SeqAlignPtr parent,SeqAlignPtr sap)3557 static void AlnMgr2AddInNewSA(SeqAlignPtr parent, SeqAlignPtr sap)
3558 {
3559    AMAlignIndex2Ptr  amaip;
3560    AM_Small2Ptr      asp;
3561    AM_Small2Ptr      asp_head;
3562    AM_Small2Ptr      asp_prev;
3563    AM_Small2Ptr      asp_tmp;
3564    AM_Small2Ptr      asp_tmp2;
3565    AM_Small2Ptr      PNTR asparray;
3566    Int4             currstop;
3567    DenseSegPtr      dsp;
3568    DenseSegPtr      dsp_new;
3569    DenseSegPtr      dsp_shared;
3570    Boolean          found;
3571    Int4             i;
3572    Int4             j;
3573    Int4             k;
3574    Int4             n1;
3575    Int4             n2;
3576    Int4             numrows;
3577    Int4             offset;
3578    SeqAlignPtr      salp;
3579    SeqAlignPtr      sap_new;
3580    SeqAlignPtr      PNTR saptmp;
3581    SeqIdPtr         sip;
3582    SeqIdPtr         sip_head;
3583    SeqIdPtr         sip_tmp;
3584    Int4             state;
3585    Int4             stop1;
3586    Int4             stop2;
3587    Uint1            strand1;
3588    Uint1            strand2;
3589 
3590    amaip = (AMAlignIndex2Ptr)(parent->saip);
3591    if (amaip->sharedaln == NULL) /* this is the first alignment to be added */
3592    {
3593       salp = SeqAlignDup(sap);
3594       AlnMgr2IndexSingleChildSeqAlign(salp);
3595       dsp = (DenseSegPtr)(salp->segs);
3596       amaip->sharedaln = salp;
3597       amaip->numrows = dsp->dim;
3598       sip = dsp->ids;
3599       amaip->ids = (SeqIdPtr PNTR)MemNew((dsp->dim)*sizeof(SeqIdPtr));
3600       i = 0;
3601       while (sip != NULL)
3602       {
3603          amaip->ids[i] = SeqIdDup(sip);
3604          sip = sip->next;
3605          i++;
3606       }
3607       MemFree(amaip->saps);
3608       amaip->saps = (SeqAlignPtr PNTR)MemNew(sizeof(SeqAlignPtr));
3609       amaip->saps[0] = sap;
3610       amaip->numsaps = 1;
3611    } else
3612    {
3613       /* free ids */
3614       for (i=0; i<amaip->numrows; i++)
3615       {
3616          SeqIdFree(amaip->ids[i]);
3617       }
3618       MemFree(amaip->ids);
3619 
3620       /* add the new sap */
3621       saptmp = amaip->saps;
3622       amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps+1)*sizeof(SeqAlignPtr));
3623       for (i=0; i<amaip->numsaps; i++)
3624       {
3625          amaip->saps[i] = saptmp[i];
3626       }
3627       amaip->saps[amaip->numsaps] = sap;
3628       MemFree(saptmp);
3629       amaip->numsaps++;
3630 
3631       /* dsp, dsp_shared, n1, n2 */
3632       dsp = (DenseSegPtr)(sap->segs);
3633       dsp_shared = (DenseSegPtr)(amaip->sharedaln->segs);
3634       AlnMgr2GetFirstSharedRow(amaip->sharedaln, sap, &n1, &n2);
3635       if (n1 == n2 && n1 == 0)
3636          return;
3637 
3638       /* make sure the shared rows are on the same strand */
3639       strand1 = AlnMgr2GetNthStrand(amaip->sharedaln, n1);
3640       if (strand1 == Seq_strand_unknown)
3641          strand1 = Seq_strand_plus;
3642       strand2 = AlnMgr2GetNthStrand(sap, n2);
3643       if (strand2 == Seq_strand_unknown)
3644          strand2 = Seq_strand_plus;
3645       if (strand1 != strand2)
3646       {
3647          SeqAlignListReverseStrand(sap);
3648          SAIndex2Free2(sap->saip);
3649          sap->saip = NULL;
3650          AlnMgr2IndexSingleChildSeqAlign(sap);
3651          dsp = (DenseSegPtr)(sap->segs);
3652          strand2 = AlnMgr2GetNthStrand(sap, n2);
3653          if (strand2 == Seq_strand_unknown)
3654             strand2 = Seq_strand_plus;
3655       }
3656 
3657       /* numrows */
3658       numrows = dsp->dim + dsp_shared->dim - 1; /* for now this works; compress at the end */
3659       asp_head = NULL;
3660 
3661       /* currstop */
3662       if (strand1 == Seq_strand_minus)
3663          AlnMgr2GetNthSeqRangeInSA(amaip->sharedaln, n1, NULL, &currstop);
3664       else
3665          currstop = -1;
3666 
3667       /* add asp for each dsp_shared seg */
3668       for (i=0; i<dsp_shared->numseg; i++)
3669       {
3670          asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3671          if (dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1] < 0)
3672          {
3673             asp->n1 = currstop;
3674             asp->n2 = i+1;
3675             asp->n3 = AM_GAP;
3676             asp->n4 = dsp_shared->lens[i];
3677             if (asp_head != NULL)
3678             {
3679                asp_prev->next = asp;
3680                /*if (asp_prev->n1 == asp->n1)
3681                   asp->n5 = asp_prev->n5+1;*/
3682                asp_prev = asp;
3683             } else
3684                asp_head = asp_prev = asp;
3685          } else
3686          {
3687             asp->n1 = dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1];
3688             asp->n2 = 1;
3689             asp->n3 = AM_START;
3690             asp->n4 = dsp_shared->lens[i];
3691             if (asp_head != NULL)
3692             {
3693                asp_prev->next = asp;
3694                /*if (asp_prev->n1 == asp->n1)
3695                   asp->n5 = asp_prev->n5+1;*/
3696                asp_prev = asp;
3697             } else
3698                asp_head = asp_prev = asp;
3699             asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3700             asp->n1 = dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1] + dsp_shared->lens[i] - 1;
3701             asp->n2 = 1;
3702             j = i+1;
3703             while (j<dsp_shared->numseg && dsp_shared->starts[(dsp_shared->dim)*j + n1 - 1] == -1)
3704             {
3705                j++;
3706             }
3707             if (j<dsp_shared->numseg)
3708             {
3709                if (dsp_shared->starts[(dsp_shared->dim)*j + n1 - 1] > asp->n1 + 1)
3710                   asp->n3 = AM_HARDSTOP;
3711                else
3712                   asp->n3 = AM_STOP;
3713             } else
3714                asp->n3 = AM_HARDSTOP;
3715             if (asp->n3 == AM_HARDSTOP)
3716             {
3717                if (strand1 != Seq_strand_minus)
3718                   asp->n4 = -(dsp_shared->starts[(dsp_shared->dim)*i+n1-1] + dsp_shared->lens[i]-1);
3719                else
3720                   asp->n4 = -dsp_shared->starts[(dsp_shared->dim)*i+n1-1];
3721             } else
3722                asp->n4 = -dsp_shared->lens[i];
3723             if (strand1 != Seq_strand_minus)
3724                currstop = asp->n1;
3725             else
3726                currstop = asp_prev->n1-1;
3727             asp_prev->next = asp;
3728             /*if (asp_prev->n1 == asp->n1)
3729                asp->n5 = asp_prev->n5+1;*/
3730             asp_prev = asp;
3731          }
3732       } /* asp for each dsp_shared seg */
3733 
3734       /* currstop = start of sap's n2-th seq */
3735       if (strand1 == Seq_strand_minus)
3736          AlnMgr2GetNthSeqRangeInSA(sap, n2, NULL, &currstop);
3737       else
3738          AlnMgr2GetNthSeqRangeInSA(sap, n2, &currstop, NULL);
3739 
3740       /* add asp for each dsp seg */
3741       for (i=0; i<dsp->numseg; i++)
3742       {
3743          asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3744          if (dsp->starts[(dsp->dim)*i + n2 - 1] < 0)
3745          {
3746             asp->n1 = currstop;
3747             asp->n2 = dsp_shared->numseg+i+1;
3748             asp->n3 = AM_GAP;
3749             asp->n4 = dsp->lens[i];
3750             asp_prev->next = asp;
3751             /*if (asp_prev->n1 == asp->n1)
3752                asp->n5 = asp_prev->n5 + 1;*/
3753             asp_prev = asp;
3754          } else
3755          {
3756             asp->n1 = dsp->starts[(dsp->dim)*i + n2 - 1];
3757             asp->n2 = 1;
3758             asp->n3 = AM_START;
3759             asp->n4 = dsp->lens[i];
3760             asp_prev->next = asp;
3761             /*if (asp_prev->n1 == asp->n1)
3762                asp->n5 = asp_prev->n5+1;*/
3763             asp_prev = asp;
3764             asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3765             asp->n1 = dsp->starts[(dsp->dim)*i + n2 - 1] + dsp->lens[i] - 1;
3766             asp->n2 = 1;
3767             j = i+1;
3768             while (j<dsp->numseg && dsp->starts[(dsp->dim)* j + n2 - 1] == -1)
3769             {
3770                j++;
3771             }
3772             if (j<dsp->numseg)
3773             {
3774                if (dsp->starts[(dsp->dim)*j + n2 - 1] > asp->n1 + 1)
3775                   asp->n3 = AM_HARDSTOP;
3776                else
3777                   asp->n3 = AM_STOP;
3778             } else
3779                asp->n3 = AM_HARDSTOP;
3780             if (asp->n3 == AM_HARDSTOP)
3781             {
3782                if (strand1 != Seq_strand_minus)
3783                   asp->n4 = -(dsp->starts[(dsp->dim)*i+n1-1] + dsp->lens[i]-1);
3784                else
3785                   asp->n4 = -dsp->starts[(dsp->dim)*i+n1-1];
3786                /* so if n4 is negative, this is the highest-numbered residue in the interval */
3787             } else
3788                asp->n4 = dsp->lens[i];
3789             if (strand1 != Seq_strand_minus)
3790                currstop = asp->n1;
3791             else
3792                currstop = asp_prev->n1-1;
3793             asp_prev->next = asp;
3794             /*if (asp_prev->n1 == asp->n1)
3795                asp->n5 = asp_prev->n5 + 1;*/
3796             asp_prev = asp;
3797          }
3798       }
3799 
3800       /* create asparray and heapsort it */
3801       asp = asp_head;
3802       i = 0;
3803       while (asp != NULL)
3804       {
3805          i++;
3806          asp = asp->next;
3807       }
3808       asparray = (AM_Small2Ptr PNTR)MemNew(i*sizeof(AM_Small2Ptr));
3809       asp = asp_head;
3810       i = 0;
3811       while (asp != NULL)
3812       {
3813          asparray[i] = asp;
3814          i++;
3815          asp = asp->next;
3816       }
3817       if (strand1 != Seq_strand_minus)
3818          HeapSort(asparray, i, sizeof(asparray), AlnMgr2CompareAsps);
3819       else
3820          HeapSort(asparray, i, sizeof(asparray), AlnMgr2CompareAspsMinus);
3821       /* now need to remove redundant (identical) points        */
3822       /* but still need to count those points toward the states */
3823       asp = asparray[0];
3824       asp->next = NULL;
3825       for (j=0; j<i-1; j++)
3826       {
3827          if (asparray[j+1]->n1 != asp->n1 || asparray[j+1]->n3 != asp->n3 || asp->n3 == AM_GAP)
3828          {
3829             asp->next = asparray[j+1];
3830             asp->next->next = NULL;
3831             asp = asp->next;
3832          } else
3833          {
3834             k = j;
3835             while (asparray[k] == NULL && k >= 0)
3836             {
3837                k--;
3838             }
3839             if (k>=0 && asparray[k]->n3 != AM_GAP)
3840                asparray[k]->n2++;
3841             MemFree(asparray[j+1]);
3842             asparray[j+1] = NULL;
3843          }
3844       }
3845       asp_head = asparray[0];
3846       MemFree(asparray);
3847       j=0;
3848       asp = asp_head;
3849       asp_prev = NULL;
3850       /* count up the segments; two consecutive stops make a segment */
3851       state = 0;
3852       if (strand1 != Seq_strand_minus)
3853       {
3854          while (asp != NULL)
3855          {
3856             if (asp->n3 == AM_START)
3857             {
3858                state += asp->n2;
3859                j++;
3860             } else if (asp->n3 == AM_STOP)
3861             {
3862                state -= asp->n2;
3863                asp_tmp = asp->next;
3864                while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3865                {
3866                   asp_tmp = asp_tmp->next;
3867                }
3868                if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3869                   j++;
3870                else if (state != 0 && asp->next != NULL && asp_tmp != NULL && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3871                {
3872                   asp_tmp2 = asp_tmp;
3873                   while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3874                   {
3875                      asp_tmp2 = asp_tmp2->next;
3876                   }
3877                   if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1) && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3878                      j++;
3879                }
3880             } else if (asp->n3 == AM_GAP)
3881                j++;
3882             else if (asp->n3 == AM_HARDSTOP)
3883             {
3884                state -= asp->n2;
3885                asp_tmp = asp->next;
3886                while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3887                {
3888                   asp_tmp = asp_tmp->next;
3889                }
3890                if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1)
3891                   j++;
3892                else if (state != 0 && asp->next != NULL && asp_tmp != NULL)
3893                {
3894                   asp_tmp2 = asp_tmp;
3895                   while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3896                   {
3897                      asp_tmp2 = asp_tmp2->next;
3898                   }
3899                   if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1))
3900                      j++;
3901                   else if (asp_tmp2 == NULL)
3902                      j++;
3903                }
3904             }
3905             asp = asp->next;
3906          }
3907       } else
3908       {
3909          currstop = -1;
3910          while (asp != NULL)
3911          {
3912             if (asp->n3 == AM_STOP || asp->n3 == AM_HARDSTOP)
3913             {
3914                if (currstop != asp->n1 && state > 0)
3915                   j++;
3916                currstop = asp->n1;
3917                state += asp->n2;
3918             } else if (asp->n3 == AM_START)
3919             {
3920                state -= asp->n2;
3921                j++;
3922                currstop = asp->n1 - 1;
3923             } else if (asp->n3 == AM_GAP)
3924                j++;
3925             asp = asp->next;
3926          }
3927       }
3928 
3929       /* dsp_new */
3930       dsp_new = DenseSegNew();
3931       dsp_new->dim = numrows;
3932       dsp_new->numseg = j;
3933       dsp_new->ids = SeqIdDupList(dsp_shared->ids);
3934       dsp_new->starts = (Int4Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Int4));
3935       dsp_new->strands = (Uint1Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Uint1));
3936       dsp_new->lens = (Int4Ptr)MemNew((dsp_new->numseg)*sizeof(Int4));
3937 
3938       /* get all the ids except for the duplicated one */
3939       sip_head = NULL;
3940       sip_tmp = NULL;
3941       sip = dsp->ids;
3942       i=0;
3943       /* get all the ids except for the duplicated one */
3944       while (sip != NULL)
3945       {
3946          if (i+1 != n2)
3947          {
3948             if (sip_tmp != NULL)
3949             {
3950                sip_tmp->next = SeqIdDup(sip);
3951                sip_tmp = sip;
3952             } else
3953                sip_head = sip_tmp = SeqIdDup(sip);
3954          }
3955          i++;
3956          sip = sip->next;
3957       }
3958       sip = dsp_new->ids;
3959       while (sip->next != NULL)
3960       {
3961          sip = sip->next;
3962       }
3963       sip->next = sip_head;
3964 
3965       /* construct starts and lens from asps */
3966       asp = asp_head;
3967       i=0;
3968       state = 0;
3969       currstop = -1;
3970       if (strand1 != Seq_strand_minus)
3971       {
3972          while (asp != NULL)
3973          {
3974             if (asp->n3 == AM_START)
3975             {
3976                state += asp->n2;
3977                dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1;
3978                dsp_new->lens[i] = asp->n4;
3979                i++;
3980             } else if (asp->n3 == AM_STOP)
3981             {
3982                state -= asp->n2;
3983                asp_tmp = asp->next;
3984                while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3985                {
3986                   asp_tmp = asp_tmp->next;
3987                }
3988                if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3989                {
3990                   dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
3991                   dsp_new->lens[i] = asp->n4;
3992                   i++;
3993                } else if (state != 0 && asp->next != NULL && asp_tmp != NULL && i < dsp_new->numseg && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3994                {
3995                   asp_tmp2 = asp_tmp;
3996                   while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3997                   {
3998                      asp_tmp2 = asp_tmp2->next;
3999                   }
4000                   if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1) && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
4001                   {
4002                      dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4003                      dsp_new->lens[i] = asp->n4;
4004                      i++;
4005                   }
4006                }
4007             } else if (asp->n3 == AM_GAP)
4008             {
4009                dsp_new->starts[dsp_new->dim*i+n1-1] = -asp->n2;
4010                if (asp->n2 > dsp_shared->numseg)
4011                   dsp_new->lens[i] = dsp->lens[(asp->n2-1)-(dsp_shared->numseg)];
4012                else
4013                   dsp_new->lens[i] = dsp_shared->lens[asp->n2-1];
4014                i++;
4015             } else if (asp->n3 == AM_HARDSTOP)
4016             {
4017                state -= asp->n2;
4018                asp_tmp = asp->next;
4019                while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
4020                {
4021                   asp_tmp = asp_tmp->next;
4022                }
4023                if (state != 0 && asp->next != NULL && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && i < dsp_new->numseg)
4024                {
4025                   dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4026                   if (asp->n1 > -asp->n4)
4027                      dsp_new->lens[i] = asp->n4;
4028                   i++;
4029                } else if (state != 0 && asp->next != NULL && asp_tmp != NULL && i < dsp_new->numseg)
4030                {
4031                   asp_tmp2 = asp_tmp;
4032                   while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
4033                   {
4034                      asp_tmp2 = asp_tmp2->next;
4035                   }
4036                   if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1))
4037                   {
4038                      dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4039                      if (asp->n1 > -asp->n4)
4040                         dsp_new->lens[i] = asp->n4;
4041                      i++;
4042                   } else if (asp_tmp2 == NULL)
4043                   {
4044                      dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4045                      if (asp->n1 > -asp->n4)
4046                         dsp_new->lens[i] = asp->n4;
4047                      i++;
4048                   }
4049                }
4050             }
4051             asp = asp->next;
4052          }
4053          for (i=0; i<dsp_new->numseg; i++)
4054          {
4055             found = FALSE;
4056             for (j=i+1; j<dsp_new->numseg && !found; j++)
4057             {
4058                if (dsp_new->starts[dsp_new->dim*j+n1-1] > -1)
4059                {
4060                   if (dsp_new->lens[i] == 0)
4061                      dsp_new->lens[i] = dsp_new->starts[dsp_new->dim*j+n1-1] - dsp_new->starts[dsp_new->dim*i+n1-1];
4062                   else if (dsp_new->lens[i] > 0)
4063                      dsp_new->lens[i] = MIN(dsp_new->lens[i], dsp_new->starts[dsp_new->dim*j+n1-1] - dsp_new->starts[dsp_new->dim*i+n1-1]);
4064                   else if (dsp_new->lens[i] < 0)
4065                      dsp_new->lens[i] = -dsp_new->lens[i]-dsp_new->starts[dsp_new->dim*i+n1-1]+1;
4066                   found = TRUE;
4067                }
4068             }
4069             if (!found) /* last segment */
4070             {
4071                if (dsp_new->starts[dsp_new->dim*i+n1-1] >= 0)
4072                {
4073                   AlnMgr2GetNthSeqRangeInSA(amaip->sharedaln, n1, NULL, &stop1);
4074                   AlnMgr2GetNthSeqRangeInSA(sap, n2, NULL, &stop2);
4075                   dsp_new->lens[i] = (MAX(stop1, stop2) + 1) - dsp_new->starts[dsp_new->dim*i+n1-1];
4076                }
4077             }
4078          }
4079       } else
4080       {
4081          while (asp != NULL)
4082          {
4083             if (asp->n3 == AM_STOP)
4084             {
4085                if (currstop != asp->n1 && state > 0)
4086                {
4087                   dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1+1;
4088                   dsp_new->lens[i] = currstop - asp->n1;
4089                   i++;
4090                }
4091                currstop = asp->n1;
4092                state += asp->n2;
4093             } else if (asp->n3 == AM_START)
4094             {
4095                state -= asp->n2;
4096                dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1;
4097                dsp_new->lens[i] = currstop - asp->n1 + 1;
4098                i++;
4099                currstop = asp->n1 - 1;
4100             } else if (asp->n3 == AM_GAP)
4101             {
4102                dsp_new->starts[dsp_new->dim*i+n1-1] = -asp->n2;
4103                if (asp->n2 > dsp_shared->numseg)
4104                   dsp_new->lens[i] = dsp->lens[(asp->n2-1)-(dsp_shared->numseg)];
4105                else
4106                   dsp_new->lens[i] = dsp_shared->lens[asp->n2-1];
4107                i++;
4108             } else if (asp->n3 == AM_HARDSTOP)
4109             {
4110                if (currstop != asp->n1 && state > 0 && asp->next != NULL)
4111                {
4112                   dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1+1;
4113                   dsp_new->lens[i] = currstop - asp->n1;
4114                   i++;
4115                }
4116                currstop = asp->n1;
4117                state += asp->n2;
4118             }
4119             asp = asp->next;
4120          }
4121       }
4122       /* now add in the other rows, starting with rows from the sharedaln */
4123       for (i=0; i<dsp_shared->dim; i++)
4124       {
4125          if (i+1 != n1)
4126          {
4127             for (j=0; j<dsp_new->numseg; j++)
4128             {
4129                if (dsp_new->starts[dsp_new->dim*j+n1-1] >= 0)
4130                   dsp_new->starts[dsp_new->dim*j+i] = AlnMgr2MapSegStartToSegStart(amaip->sharedaln, dsp_new->starts[dsp_new->dim*j+n1-1], n2, i+1, dsp_new->lens[j]);
4131                else
4132                {
4133                   if (-(dsp_new->starts[dsp_new->dim*j+n1-1]) > dsp_shared->numseg)
4134                   /* this gap came from the new sap */
4135                      dsp_new->starts[dsp_new->dim*j+i] = -1;
4136                   else /* this gap came from the sharedaln */
4137                      dsp_new->starts[dsp_new->dim*j+i] = dsp_shared->starts[dsp_shared->dim*(-dsp_new->starts[dsp_new->dim*j+n1-1]-1)+i];
4138                }
4139                dsp_new->strands[dsp_new->dim*j+i] = AlnMgr2GetNthStrand(amaip->sharedaln, i+1);
4140             }
4141          }
4142       }
4143       for (i=0; i<dsp->dim; i++)
4144       {
4145          if (i+1 != n2)
4146          {
4147             if (i+1 > n2)
4148                offset = 1;
4149             else
4150                offset = 0;
4151             for (j=0; j<dsp_new->numseg; j++)
4152             {
4153                if (dsp_new->starts[dsp_new->dim*j+n1-1] >= 0)
4154                   dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = AlnMgr2MapSegStartToSegStart(sap, dsp_new->starts[dsp_new->dim*j+n1-1], n1, i+1, dsp_new->lens[j]);
4155                else
4156                {
4157                   if (-(dsp_new->starts[dsp_new->dim*j+n1-1]) > dsp_shared->numseg)
4158                   /* this gap is from the new sap */
4159                      dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = dsp->starts[dsp->dim*((-dsp_new->starts[dsp_new->dim*j+n1-1])-dsp_shared->numseg-1)+i];
4160                   else /* this gap is from the shared alignment */
4161                      dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = -1;
4162                }
4163                dsp_new->strands[dsp_new->dim*j+i+dsp_shared->dim-offset] = AlnMgr2GetNthStrand(sap, i+1);
4164             }
4165          }
4166       }
4167       /* fill in strand info for shared row, and get rid of segment keys (neg numbers) */
4168       for (j=0; j<dsp_new->numseg; j++)
4169       {
4170          dsp_new->strands[dsp_new->dim*j+n1-1] = AlnMgr2GetNthStrand(amaip->sharedaln, n1);
4171          if (dsp_new->starts[dsp_new->dim*j+n1-1] < 0)
4172             dsp_new->starts[dsp_new->dim*j+n1-1] = -1;
4173       }
4174 if (dsp_new->dim > 10)
4175    dsp_new->dim = dsp_new->dim;
4176       AlnMgr2CondenseRows(dsp_new, dsp_new->dim);
4177       sap_new = SeqAlignNew();
4178       sap_new->segtype = SAS_DENSEG;
4179       sap_new->segs = (Pointer)(dsp_new);
4180       AlnMgr2IndexSingleChildSeqAlign(sap_new);
4181       SeqAlignFree(amaip->sharedaln);
4182       amaip->sharedaln = sap_new;
4183       amaip->numrows = dsp_new->dim;
4184       amaip->ids = (SeqIdPtr PNTR)MemNew(amaip->numrows*sizeof(SeqIdPtr));
4185       sip = dsp_new->ids;
4186       for (i=0; i<amaip->numrows; i++)
4187       {
4188          amaip->ids[i] = SeqIdDup(sip);
4189          sip = sip->next;
4190       }
4191       while (asp_head != NULL)
4192       {
4193          asp = asp_head->next;
4194          MemFree(asp_head);
4195          asp_head = asp;
4196       }
4197    }
4198 }
4199 
4200 /* SECTION 2c */
AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap,Int4 pos,Int4 row1,Int4 row2,Int4 len)4201 static Int4 AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2, Int4 len)
4202 {
4203    Int4         diff;
4204    DenseSegPtr  dsp;
4205    Int4         pos2;
4206    Int4         seg;
4207    Uint1        strand1;
4208    Uint1        strand2;
4209 
4210    if (sap == NULL)
4211       return -1;
4212    seg = AlnMgr2GetSegForStartPos(sap, pos, row1);
4213    if (seg < 0)
4214       return -1;
4215    dsp = (DenseSegPtr)(sap->segs);
4216    if (dsp->starts[dsp->dim*seg+row2-1] == -1)
4217       return -1;
4218    strand1 = dsp->strands[dsp->dim*seg+row1-1];
4219    strand2 = dsp->strands[dsp->dim*seg+row2-1];
4220    if (strand1 != strand2)
4221       pos = pos + len - 1;
4222    if (strand1 == Seq_strand_minus)
4223       diff = dsp->lens[seg] - (pos - dsp->starts[dsp->dim*seg+row1-1]) - 1;
4224    else
4225       diff = pos - dsp->starts[dsp->dim*seg+row1-1];
4226    if (diff > dsp->lens[seg]) /* unaligned here */
4227       return -1;
4228    if (strand2 == Seq_strand_minus)
4229       pos2 = dsp->starts[dsp->dim*seg+row2-1] + dsp->lens[seg] - diff -1;
4230    else
4231       pos2 = dsp->starts[dsp->dim*seg+row2-1]+ diff;
4232    return pos2;
4233 }
4234 
4235 /* SECTION 2c */
AlnMgr2GetSegForStartPos(SeqAlignPtr sap,Int4 pos,Int4 row)4236 static Int4 AlnMgr2GetSegForStartPos(SeqAlignPtr sap, Int4 pos, Int4 row)
4237 {
4238    Uint2Ptr         array;
4239    DenseSegPtr      dsp;
4240    Int4             L;
4241    Int4             mid;
4242    Int4             offset;
4243    Int4             R;
4244    SAIndex2Ptr       saip;
4245    SARowDat2Ptr      srdp;
4246    Int4             start;
4247    Int4             stop;
4248    Uint1            strand;
4249 
4250    if (sap == NULL || sap->saip == NULL || row < 1)
4251       return -1;
4252    AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
4253    if (pos < start || pos > stop)
4254       return -1;
4255    saip = (SAIndex2Ptr)(sap->saip);
4256    if (row > saip->numrows)
4257       return -1;
4258    srdp = saip->srdp[row-1];
4259    strand = AlnMgr2GetNthStrand(sap, row);
4260    dsp = (DenseSegPtr)(sap->segs);
4261    L = 0;
4262    R = srdp->numsect - 1;
4263    if (strand != Seq_strand_minus)
4264    {
4265       while (L < R)
4266       {
4267          mid = MIN((L + R)/2, srdp->numsect-2);
4268          if (dsp->starts[(srdp->sect[mid + 1])*(dsp->dim)+row-1] <= pos)
4269             L = mid+1;
4270          else
4271             R = mid;
4272       }
4273    } else
4274    {
4275       while (L < R)
4276       {
4277          mid = (L + R)/2;
4278          if (dsp->starts[(srdp->sect[mid])*(dsp->dim)+row-1] > pos)
4279             L = mid + 1;
4280          else
4281             R = mid;
4282       }
4283    }
4284    offset = pos - dsp->starts[(srdp->sect[L])*(dsp->dim)+row-1];
4285    if (offset >= dsp->lens[srdp->sect[L]])
4286       return -2;  /* this is an insert */
4287    if (saip->anchor > 0)
4288    {
4289       array = saip->srdp[saip->anchor-1]->sect;
4290       R = binary_search_on_uint2_list(array, srdp->sect[L], saip->srdp[saip->anchor-1]->numsect);
4291       L = R;
4292    }
4293    return srdp->sect[L];
4294 }
4295 
GetNextStart(DenseSegPtr dsp,Int4 row,Int4 col,Int4Ptr pnext_start_col)4296 static Int4 GetNextStart (DenseSegPtr dsp, Int4 row, Int4 col, Int4Ptr pnext_start_col)
4297 {
4298   Int4 next_start_col;
4299 
4300   if (dsp == NULL || row < 0 || row >= dsp->dim || col < 0 || col >= dsp->numseg)
4301   {
4302     return -1;
4303   }
4304 
4305   for (next_start_col = col + 1;
4306        next_start_col < dsp->numseg
4307          && dsp->starts[(next_start_col * dsp->dim) + row] == -1;
4308        next_start_col++)
4309   {
4310   }
4311   if (next_start_col < dsp->numseg)
4312   {
4313     if (pnext_start_col != NULL)
4314     {
4315       *pnext_start_col = next_start_col;
4316     }
4317     return dsp->starts[(next_start_col * dsp->dim) + row];
4318   }
4319   else
4320   {
4321     return -1;
4322   }
4323 }
4324 
AlnMgr2CondenseColumns(DenseSegPtr dsp)4325 static void AlnMgr2CondenseColumns(DenseSegPtr dsp)
4326 /***************************************************************************
4327 *
4328 *  AlnMgr2CondenseColumns finds adjacent columns which appear to align but
4329 *  were not put in one column by the mixing mechanism because the input was
4330 *  a set of pairwise alignment with a gap on the common sequence in this
4331 *  segment. Or graphically:
4332 *
4333 *  ----- ----- ----- -----             -----
4334 *  AACCG ----- ----- -----   becomes   AACCG
4335 *  ----- AACCG ----- -----             AACCG
4336 *  ----- ----- AACCG -----             AACCG
4337 *  ----- ----- ----- AACCG             AACCG
4338 *
4339 ***************************************************************************/
4340 {
4341   int gap_start_seg = -1;
4342   int gap_end_seg = -1;
4343   int row, seg, base_col, col, next_start, next_start_col;
4344   Boolean can_fit;
4345 
4346   for (seg = 0;  seg < dsp->numseg;  ++seg) {
4347     if (dsp->starts[dsp->dim * seg] == -1) {
4348       if (gap_start_seg == -1) {
4349         gap_start_seg = seg;
4350       }
4351       else {
4352         if (seg == dsp->numseg - 1) {
4353           gap_end_seg = seg + 1;
4354         }
4355       }
4356     }
4357     else {
4358       if (gap_start_seg != -1) {
4359         gap_end_seg = seg;
4360       }
4361     }
4362 
4363     if (gap_end_seg != -1) {
4364       for (base_col = gap_start_seg;  base_col<gap_end_seg;  ++base_col) {
4365         int len = dsp->lens[base_col];
4366         for (col = base_col + 1;  col<gap_end_seg;  ++col) {
4367           if (dsp->lens[col] != len) {
4368             continue;
4369           }
4370 
4371           can_fit = TRUE;
4372           for (row = 0;  row < dsp->dim;  ++row) {
4373             if (dsp->starts[dsp->dim * col + row] != -1  &&
4374                 dsp->starts[dsp->dim * base_col + row] != -1) {
4375               can_fit = FALSE;
4376               break;
4377             }
4378             else if (dsp->starts[dsp->dim * col + row] != -1)
4379             {
4380               /* make sure we aren't going to disturb the order of
4381                * the starts */
4382               next_start = GetNextStart (dsp, row, base_col, &next_start_col);
4383               if (next_start > -1
4384                   && next_start < dsp->starts[dsp->dim * col + row]
4385                   && next_start_col < col)
4386               {
4387                 can_fit = FALSE;
4388               }
4389             }
4390           }
4391 
4392           if (can_fit) {
4393             for (row = 0;  row<dsp->dim;  ++row) {
4394               if (dsp->starts[dsp->dim * col + row] != -1) {
4395                 dsp->starts[dsp->dim * base_col + row] =
4396                   dsp->starts[dsp->dim * col + row];
4397               }
4398             }
4399 
4400             /* remove column col */
4401             {{
4402               Int4Ptr       starts, lens;
4403               Uint1Ptr      strands;
4404               Uint4         pos, new_pos;
4405 
4406               starts = (Int4Ptr)MemNew(dsp->dim*(dsp->numseg-1)*sizeof(Int4));
4407               strands = (Uint1Ptr)MemNew(dsp->dim*(dsp->numseg-1)*sizeof(Uint1));
4408               lens = (Int4Ptr)MemNew((dsp->numseg-1)*sizeof(Int4));
4409 
4410               for (pos=0; pos<dsp->dim*col; pos++) {
4411                 starts[pos] = dsp->starts[pos];
4412                 strands[pos] = dsp->strands[pos];
4413               }
4414               for (new_pos=pos, pos+=dsp->dim; pos<dsp->dim*dsp->numseg;
4415                    pos++, new_pos++) {
4416                 starts[new_pos] = dsp->starts[pos];
4417                 strands[new_pos] = dsp->strands[pos];
4418               }
4419 
4420               for (pos=0; pos<col; pos++) {
4421                 lens[pos] = dsp->lens[pos];
4422               }
4423               for (new_pos=pos, pos++; pos<dsp->numseg; pos++, new_pos++) {
4424                 lens[new_pos] = dsp->lens[pos];
4425               }
4426 
4427               MemFree(dsp->starts);
4428               MemFree(dsp->strands);
4429               dsp->starts = starts;
4430               dsp->strands = strands;
4431               dsp->lens = lens;
4432 
4433               dsp->numseg--;
4434 
4435             }}
4436 
4437             --gap_end_seg;
4438             --seg;
4439             --col;
4440           }
4441         }
4442       }
4443 
4444       gap_start_seg = -1;
4445       gap_end_seg = -1;
4446     }
4447   }
4448 }
4449 
4450 /* SECTION 2c */
4451 /***************************************************************************
4452 *
4453 *  AlnMgr2CondenseRows finds rows of a dense-seg structure that are related
4454 *  and that could be condensed into a single row (or fewer rows). It then
4455 *  calls AlnMgr2DoCondense to condense those rows into continuous or
4456 *  discontinuous rows. whichrow designates which row to merge, if
4457 *  less than 1, the function tries to merge the last row.
4458 *
4459 ***************************************************************************/
AlnMgr2CondenseRows(DenseSegPtr dsp,Int4 whichrow)4460 static void AlnMgr2CondenseRows(DenseSegPtr dsp, Int4 whichrow)
4461 {
4462    Boolean     done;
4463    Int4        i;
4464    Int4        j;
4465    Int4        k;
4466    Int4        numrows;
4467    AMCdRowPtr  row;
4468    AMCdRowPtr  PNTR rowarray;
4469    SeqIdPtr    sip;
4470    SeqIdPtr    targetsip;
4471 
4472    sip = dsp->ids;
4473    rowarray = (AMCdRowPtr PNTR)MemNew((dsp->dim)*sizeof(AMCdRowPtr));
4474    if (whichrow < 1 || whichrow > dsp->dim)
4475       whichrow = dsp->dim;
4476    for (i=0; i<dsp->dim; i++)
4477    {
4478       row = (AMCdRowPtr)MemNew(sizeof(AMCdRow));
4479       row->sip = SeqIdDup(sip);
4480       sip = sip->next;
4481       row->strand = dsp->strands[i];
4482       row->rownum = i+1;
4483       rowarray[i] = row;
4484       if (i+1 == whichrow)
4485          targetsip = row->sip;
4486    }
4487    HeapSort(rowarray, i, sizeof(rowarray), AlnMgr2CompareCdRows);
4488    numrows = dsp->dim;
4489    j = -1; /* j marks the first occurrence of each sip */
4490    for (i=0; j==-1 && i<numrows; i++)
4491    {
4492       if (SeqIdComp(rowarray[i]->sip, targetsip) == SIC_YES)
4493       {
4494          j = i;
4495          if (rowarray[i]->rownum == whichrow) /* no other rows w/sip */
4496          {
4497             for (i=0; i<numrows; i++)
4498             {
4499                SeqIdFree(rowarray[i]->sip);
4500                MemFree(rowarray[i]);
4501             }
4502             MemFree(rowarray);
4503             return;
4504          }
4505       }
4506    }
4507    sip = SeqIdDup(rowarray[j]->sip);
4508    done = FALSE;
4509    for (i=j; !done && rowarray[i]->rownum < whichrow; i++)
4510    {
4511       if (SeqIdComp(rowarray[i]->sip, sip) == SIC_YES)
4512       {
4513          if (rowarray[i]->strand == rowarray[j]->strand)
4514          {
4515             if (AlnMgr2DoCondense(dsp, rowarray[i]->rownum, whichrow))
4516             {
4517                for (k=0; k<numrows; k++)
4518                {
4519                   if (rowarray[k]->rownum > rowarray[i]->rownum)
4520                   {
4521                      rowarray[k]->rownum--;
4522                      whichrow--;
4523                   }
4524                }
4525             }
4526          }
4527       } else
4528       {
4529          done = TRUE;
4530          SeqIdFree(sip);
4531          sip = SeqIdDup(rowarray[i]->sip);
4532          j = i;
4533       }
4534    }
4535    SeqIdFree(sip);
4536    for (i=0; i<numrows; i++)
4537    {
4538       SeqIdFree(rowarray[i]->sip);
4539       MemFree(rowarray[i]);
4540    }
4541    MemFree(rowarray);
4542 }
4543 
4544 /* SECTION 2c */
4545 /***************************************************************************
4546 *
4547 *  AlnMgr2DoCondense arithmetically condenses two related rows of a dense-seg
4548 *  structure into a single continuous row, a single discontinuous row, or
4549 *  two rows with different information than before.
4550 *
4551 ***************************************************************************/
AlnMgr2DoCondense(DenseSegPtr dsp,Int4 rownum1,Int4 rownum2)4552 static Boolean AlnMgr2DoCondense(DenseSegPtr dsp, Int4 rownum1, Int4 rownum2)
4553 {
4554    Int4          aln;
4555    SeqAlignPtr   fake_sap;
4556    Boolean       fits;
4557    Boolean       found;
4558    Int4          i;
4559    SeqIdPtr      id;
4560    SeqIdPtr      id_head;
4561    SeqIdPtr      id_prev;
4562    Int4          j;
4563    Int4          k;
4564    Int4          max1;
4565    Int4          max2;
4566    Boolean       merged;
4567    Int4          min1;
4568    Int4          min2;
4569    SAIndex2Ptr   saip;
4570    Boolean       someseq1;
4571    Boolean       someseq2;
4572    Int4Ptr       starts;
4573    Uint1         strand1;
4574    Uint1         strand2;
4575    Uint1Ptr      strands;
4576    AM_Small2Ptr  window;
4577    AM_Small2Ptr  window_head;
4578    AM_Small2Ptr  window_prev;
4579 
4580    /* always merge up to rownum1 (better rows are first) */
4581    if (rownum1 > rownum2)
4582    {
4583       i = rownum2;
4584       rownum2 = rownum1;
4585       rownum1 = i;
4586    }
4587    strand1 = dsp->strands[rownum1-1];
4588    strand2 = dsp->strands[rownum2-1];
4589    if (strand1 != strand2)
4590       return FALSE;
4591    i = 0;
4592    window_head = window_prev = NULL;
4593    while (i < dsp->numseg)
4594    {
4595       j = i;
4596       someseq1 = someseq2 = FALSE;
4597       if (dsp->starts[dsp->dim*j+rownum1-1] >= 0)
4598       {
4599          someseq1 = TRUE;
4600          while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum2-1] < 0)
4601          {
4602             j++;
4603          }
4604       } else if (dsp->starts[dsp->dim*j+rownum2-1] >= 0)
4605       {
4606          someseq2 = TRUE;
4607          while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum1-1] < 0)
4608          {
4609             j++;
4610          }
4611       }
4612       fits = FALSE;
4613       if (j > i)
4614       {
4615          if (strand1 == Seq_strand_minus)
4616          {
4617             if (someseq1 == FALSE)
4618             {
4619                min1 = -1;
4620                for (k=j; min1 == -1 && k<dsp->numseg; k++)
4621                {
4622                   if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4623                      min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4624                }
4625                max1 = -1;
4626                for (k=(i-1); max1 == -1 && k>=0; k--)
4627                {
4628                   max1 = dsp->starts[dsp->dim*k+rownum1-1];
4629                }
4630             } else
4631             {
4632                min1 = -1;
4633                for (k=j-1; min1 == -1 && k>=i; k--)
4634                {
4635                   min1 = dsp->starts[dsp->dim*(k)+rownum1-1];
4636                }
4637                max1 = -1;
4638                for (k=i; min1 == -1 && k<j; k++)
4639                {
4640                   if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4641                      max1 = dsp->starts[dsp->dim*k+rownum1-1] + dsp->lens[k] -1;
4642                }
4643             }
4644          } else
4645          {
4646             if (someseq1 == FALSE)
4647             {
4648                min1 = -1;
4649                for (k=i-1; min1 == -1 && k >= 0; k--)
4650                {
4651                   if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4652                      min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4653                }
4654                max1 = -1;
4655                for (k=j; max1 == -1 && k<dsp->numseg; k++)
4656                {
4657                   max1 = dsp->starts[dsp->dim*k+rownum1-1];
4658                }
4659             } else
4660             {
4661                min1 = -1;
4662                for (k=i; min1 == -1 && k<j; k++)
4663                {
4664                   min1 = dsp->starts[dsp->dim*k+rownum1-1];
4665                }
4666                max1 = -1;
4667                for (k=j-1; max1 == -1 && k>i; k--)
4668                {
4669                   if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4670                      max1 = dsp->starts[dsp->dim*(k)+rownum1-1] + dsp->lens[k] - 1;
4671                }
4672             }
4673          }
4674          if (strand2 == Seq_strand_minus)
4675          {
4676             if (someseq2 == FALSE)
4677             {
4678                min2 = -1;
4679                for (k=j; min2 == -1 && k<dsp->numseg; k++)
4680                {
4681                   if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4682                      min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4683                }
4684                max2 = -1;
4685                for (k=(i-1); max2 == -1 && k>=0; k--)
4686                {
4687                   max2 = dsp->starts[dsp->dim*k+rownum2-1];
4688                }
4689             } else
4690             {
4691                min2 = -1;
4692                for (k=j-1; min2 == -1 && k>=i; k--)
4693                {
4694                   min2 = dsp->starts[dsp->dim*(k)+rownum2-1];
4695                }
4696                max2 = -1;
4697                for (k=i; max2 == -1 && k<j; k++)
4698                {
4699                   if (dsp->starts[dsp->dim*k+rownum2-1] >= 0)
4700                      max2 = dsp->starts[dsp->dim*k+rownum2-1] + dsp->lens[k]-1;
4701                }
4702             }
4703          } else
4704          {
4705             if (someseq2 == FALSE)
4706             {
4707                min2 = -1;
4708                for (k=i-1; min2 == -1 && k >= 0; k--)
4709                {
4710                   if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4711                      min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4712                }
4713                max2 = -1;
4714                for (k=j; max2 == -1 && k<dsp->numseg; k++)
4715                {
4716                   max2 = dsp->starts[dsp->dim*k+rownum2-1];
4717                }
4718             } else
4719             {
4720                min2 = -1;
4721                for (k=i; min2 == -1 && k<j; k++)
4722                {
4723                   min2 = dsp->starts[dsp->dim*k+rownum2-1];
4724                }
4725                max2 = -1;
4726                for (k=j-1; max2 == -1 && k>=i; k--)
4727                {
4728                   if (dsp->starts[dsp->dim*(k)+rownum2-1] >= 0)
4729                      max2 = dsp->starts[dsp->dim*(k)+rownum2-1] + dsp->lens[k] - 1;
4730                }
4731             }
4732          }
4733          if (someseq1 == FALSE)
4734          {
4735             if ((min1 < min2 || min2 == -1) && (max1 > max2 || max1 == -1))
4736                fits = TRUE;
4737          } else
4738          {
4739             if ((min2 < min1 || min1 == -1) && (max2 > max1 || max2 == -1))
4740                fits = TRUE;
4741          }
4742          window = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
4743          window->n1 = i;
4744          window->n2 = j-1;
4745          if (!fits)
4746             window->n4 = -1;
4747          if (window_head != NULL)
4748          {
4749             window_prev->next = window;
4750             window_prev = window;
4751          } else
4752             window_head = window_prev = window;
4753       }
4754       if (i == j)
4755          i++;
4756       else
4757          i = j;
4758    }
4759    if (window_head == NULL)
4760       return FALSE;
4761    fake_sap = SeqAlignNew();
4762    fake_sap->segtype = SAS_DENSEG;
4763    fake_sap->segs = (Pointer)dsp;
4764    AlnMgr2IndexSingleChildSeqAlign(fake_sap);
4765    aln = AlnMgr2GetNumAlnBlocks(fake_sap);
4766    if (aln == 1) /* only merge if there is a single fitted window flanked by gaps */
4767    /*or if there are several contiguous fitted windows flanked by gaps */
4768    {
4769       if (window_head->next != NULL && window_head->n4 == 0)
4770       {
4771          window = window_head->next;
4772          while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4773          {
4774             window_head->n2++;
4775          }
4776          while (window != NULL && window->n4 == 0 && window->n1 == window_head->n2+1)
4777          {
4778             window_head->n2 = window->n2;
4779             window = window->next;
4780             while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4781             {
4782                window_head->n2++;
4783             }
4784          }
4785          if (window != NULL)
4786          {
4787             while (window_head != NULL)
4788             {
4789                window = window_head->next;
4790                MemFree(window_head);
4791                window_head = window;
4792             }
4793             fake_sap->segs = NULL;
4794             SeqAlignFree(fake_sap);
4795             return FALSE;
4796          }
4797       }
4798       if (window_head->n4 == -1)
4799       {
4800          while (window_head != NULL)
4801          {
4802             window = window_head->next;
4803             MemFree(window_head);
4804             window_head = window;
4805          }
4806          fake_sap->segs = NULL;
4807          SeqAlignFree(fake_sap);
4808          return FALSE;
4809       }
4810       found = FALSE;
4811       for (i=0; !found && i<window_head->n1; i++)
4812       {
4813          if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4814             found = TRUE;
4815       }
4816       for (i=window_head->n2+1; !found && i<dsp->numseg; i++)
4817       {
4818          if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4819             found = TRUE;
4820       }
4821       if (found)
4822       {
4823          while (window_head != NULL)
4824          {
4825             window = window_head->next;
4826             MemFree(window_head);
4827             window_head = window;
4828          }
4829          fake_sap->segs = NULL;
4830          SeqAlignFree(fake_sap);
4831          return FALSE;
4832       }
4833       /* merge whole row up to rownum1 */
4834       for (i=0; i<dsp->numseg; i++)
4835       {
4836          dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
4837       }
4838       starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
4839       strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
4840       k = 0;
4841       for (i=0; i<dsp->dim; i++)
4842       {
4843          if (i != rownum2-1)
4844          {
4845             for (j=0; j<dsp->numseg; j++)
4846             {
4847                starts[(dsp->dim-1)*j+k] = dsp->starts[dsp->dim*j+i];
4848                strands[(dsp->dim-1)*j+k] = dsp->strands[dsp->dim*j+i];
4849             }
4850             k++;
4851          }
4852       }
4853       MemFree(dsp->starts);
4854       MemFree(dsp->strands);
4855       dsp->starts = starts;
4856       dsp->strands = strands;
4857       dsp->dim--;
4858       id_head = id_prev = NULL;
4859       id = dsp->ids;
4860       j = 0;
4861       while (id != NULL)
4862       {
4863          if (j+1 != rownum2)
4864          {
4865             if (id_head != NULL)
4866             {
4867                id_prev->next = SeqIdDup(id);
4868                id_prev = id_prev->next;
4869             } else
4870                id_head = id_prev = SeqIdDup(id);
4871          }
4872          j++;
4873          id = id->next;
4874       }
4875       SeqIdSetFree(dsp->ids);
4876       dsp->ids = id_head;
4877       while (window_head != NULL)
4878       {
4879          window = window_head->next;
4880          MemFree(window_head);
4881          window_head = window;
4882       }
4883       fake_sap->segs = NULL;
4884       SeqAlignFree(fake_sap);
4885       return TRUE;
4886    }
4887    /* now go through and find the largest piece of every window that can be merged */
4888    /* (can't split up an aligned region with the merge, though)                    */
4889    window = window_head;
4890    saip = (SAIndex2Ptr)(fake_sap->saip);
4891    while (window != NULL)
4892    {
4893       j = k = -1;
4894       found = FALSE;
4895       for (i=0; !found && i<window->n1; i++)
4896       {
4897          if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4898             found = TRUE;
4899       }
4900       if (!found)
4901          j = window->n1;
4902       found = FALSE;
4903       for (i=window->n2+1; !found && i<dsp->numseg; i++)
4904       {
4905          if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4906             found = TRUE;
4907       }
4908       if (!found)
4909          k = window->n2;
4910       if (j == -1)
4911       {
4912          found = FALSE;
4913          for (i = window->n1-1; !found && i<window->n2; i++)
4914          {
4915             j = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4916             if (j == i)
4917                found = TRUE;
4918             else
4919                j = -1;
4920          }
4921       }
4922       if (k == -1)
4923       {
4924          found = FALSE;
4925          for (i = window->n2; !found && i>=window->n1; i++)
4926          {
4927             k = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4928             if (k == i)
4929                found = TRUE;
4930             else
4931                k = -1;
4932          }
4933       }
4934       if (j > -1 && k > -1 && k > j)
4935       {
4936          window->n1 = j+1;
4937          window->n2 = k;
4938       } else
4939          window->n1 = -1;
4940       window = window->next;
4941    }
4942    window = window_head;
4943    while (window != NULL)
4944    {
4945       if (window->n4 == -1 && i >= 0) /* see if it fits now */
4946       {
4947          i = window->n1;
4948          j = window->n2+1;
4949          if (strand1 == Seq_strand_minus)
4950          {
4951             if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4952             {
4953                min1 = -1;
4954                for (k=j; min1 == -1 && k<dsp->numseg; k++)
4955                {
4956                   min1 = dsp->starts[dsp->dim*k+rownum1-1];
4957                }
4958                max1 = -1;
4959                for (k=(i-1); max1 == -1 && k>=0; k--)
4960                {
4961                   max1 = dsp->starts[dsp->dim*k+rownum1-1];
4962                }
4963             } else
4964             {
4965                min1 = dsp->starts[dsp->dim*(j-1)+rownum1-1];
4966                max1 = dsp->starts[dsp->dim*i+rownum1-1] + dsp->lens[i];
4967             }
4968          } else
4969          {
4970             if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4971             {
4972                min1 = -1;
4973                for (k=i-1; min1 == -1 && k >= 0; k--)
4974                {
4975                   min1 = dsp->starts[dsp->dim*k+rownum1-1];
4976                }
4977                max1 = -1;
4978                for (k=j; max1 == -1 && k<dsp->numseg; k++)
4979                {
4980                   max1 = dsp->starts[dsp->dim*k+rownum1-1];
4981                }
4982             } else
4983             {
4984                min1 = dsp->starts[dsp->dim*i+rownum1-1];
4985                max1 = dsp->starts[dsp->dim*(j-1)+rownum1-1] + dsp->lens[j-1];
4986             }
4987          }
4988          if (strand2 == Seq_strand_minus)
4989          {
4990             if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
4991             {
4992                min2 = -1;
4993                for (k=j; min2 == -1 && k<dsp->numseg; k++)
4994                {
4995                   min2 = dsp->starts[dsp->dim*k+rownum2-1];
4996                }
4997                max2 = -1;
4998                for (k=(i-1); max2 == -1 && k>=0; k--)
4999                {
5000                   max2 = dsp->starts[dsp->dim*k+rownum2-1];
5001                }
5002             } else
5003             {
5004                min2 = dsp->starts[dsp->dim*(j-1)+rownum2-1];
5005                max2 = dsp->starts[dsp->dim*i+rownum2-1] + dsp->lens[i];
5006             }
5007          } else
5008          {
5009             if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
5010             {
5011                min2 = -1;
5012                for (k=i-1; min2 == -1 && k >= 0; k--)
5013                {
5014                   min2 = dsp->starts[dsp->dim*k+rownum2-1];
5015                }
5016                max2 = -1;
5017                for (k=j; max2 == -1 && k<dsp->numseg; k++)
5018                {
5019                   max2 = dsp->starts[dsp->dim*k+rownum2-1];
5020                }
5021             } else
5022             {
5023                min2 = dsp->starts[dsp->dim*i+rownum2-1];
5024                max2 = dsp->starts[dsp->dim*(j-1)+rownum2-1] + dsp->lens[j-1];
5025             }
5026          }
5027          if (dsp->starts[dsp->dim*j+rownum1-1] == -1)
5028          {
5029             if (min1 < min2 && (max1 > max2 || max1 == -1))
5030                window->n4 = 0;
5031          } else
5032          {
5033             if (min2 < min1 && (max2 > max1 || max2 == -1))
5034                window->n4 = 0;
5035          }
5036       }
5037       if (window->n1 >= 0 && window->n4 >= 0)
5038       {
5039          for (i=window->n1; i<=window->n2; i++)
5040          {
5041             dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim+i+rownum2-1]);
5042          }
5043       }
5044       window = window->next;
5045    }
5046    found = FALSE;
5047    /* check to see if rownum2 is all gaps now */
5048    for (i=0; !found && i<dsp->numseg; i++)
5049    {
5050       if (dsp->starts[dsp->dim*i+rownum2-1] != -1)
5051          found = TRUE;
5052    }
5053    merged = FALSE;
5054    if (!found) /* just gaps */
5055    {
5056       /* merge whole row up to rownum1 */
5057       for (i=0; i<dsp->numseg; i++)
5058       {
5059          dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
5060       }
5061       starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
5062       strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
5063       k = 0;
5064       for (i=0; i<dsp->dim; i++)
5065       {
5066          if (i != rownum2-1)
5067          {
5068             for (j=0; j<dsp->numseg; j++)
5069             {
5070                starts[dsp->dim*j+k] = dsp->starts[dsp->dim*j+i];
5071                strands[dsp->dim*j+k] = dsp->strands[dsp->dim*j+i];
5072             }
5073             k++;
5074          }
5075       }
5076       MemFree(dsp->starts);
5077       MemFree(dsp->strands);
5078       dsp->starts = starts;
5079       dsp->strands = strands;
5080       dsp->dim--;
5081       id_head = id_prev = NULL;
5082       id = dsp->ids;
5083       j = 0;
5084       while (id != NULL)
5085       {
5086          if (j+1 != rownum2)
5087          {
5088             if (id_head != NULL)
5089             {
5090                id_prev->next = SeqIdDup(id);
5091                id_prev = id_prev->next;
5092             } else
5093                id_head = id_prev = SeqIdDup(id);
5094          }
5095          j++;
5096          id = id->next;
5097       }
5098       SeqIdSetFree(dsp->ids);
5099       dsp->ids = id_head;
5100       merged = TRUE;
5101    }
5102    while (window_head != NULL)
5103    {
5104       window = window_head->next;
5105       MemFree(window_head);
5106       window_head = window;
5107    }
5108    fake_sap->segs = NULL;
5109    SeqAlignFree(fake_sap);
5110    return merged;
5111 }
5112 
5113 /* SECTION 2c */
5114 /***************************************************************************
5115 *
5116 *  AlnMgr2CompareCdRows is the HeapSort callback for AlnMgr2CondenseRows.
5117 *  It puts the CDRows in order first by seqid and secondarily by row number.
5118 *
5119 ***************************************************************************/
AlnMgr2CompareCdRows(VoidPtr ptr1,VoidPtr ptr2)5120 static int LIBCALLBACK AlnMgr2CompareCdRows(VoidPtr ptr1, VoidPtr ptr2)
5121 {
5122    Int4        i;
5123    AMCdRowPtr  row1;
5124    AMCdRowPtr  row2;
5125 
5126    if (ptr1 == NULL || ptr2 == NULL)
5127       return 0;
5128    row1 = *((AMCdRowPtr PNTR)ptr1);
5129    row2 = *((AMCdRowPtr PNTR)ptr2);
5130    i = AlnMgr2OrderSeqIds(row1->sip, row2->sip);
5131    if (i == 0) /* sort from least rownum to greatest within each seqid */
5132    {
5133       if (row1->rownum < row2->rownum)
5134          return -1;
5135       else
5136          return 1;
5137    } else
5138       return i;
5139 }
5140 
5141 /* SECTION 2c */
5142 /***************************************************************************
5143 *
5144 *  AlnMgr2CompareAsps is a HeapSort callback for AlnMgr2AddInNewSA. It
5145 *  compares the starts (n1) of the two AM_Small2Ptrs; if those are the same
5146 *  it compares the types.
5147 *
5148 ***************************************************************************/
AlnMgr2CompareAsps(VoidPtr ptr1,VoidPtr ptr2)5149 static int LIBCALLBACK AlnMgr2CompareAsps(VoidPtr ptr1, VoidPtr ptr2)
5150 {
5151    AM_Small2Ptr  asp1;
5152    AM_Small2Ptr  asp2;
5153 
5154    if (ptr1 != NULL && ptr2 != NULL)
5155    {
5156       asp1 = *((AM_Small2Ptr PNTR)ptr1);
5157       asp2 = *((AM_Small2Ptr PNTR)ptr2);
5158       if (asp1->n1 < asp2->n1)
5159          return -1;
5160       else if (asp1->n1 > asp2->n1)
5161          return 1;
5162       else if (asp1->n5 < asp2->n5)
5163          return -1;
5164       else if (asp1->n5 > asp2->n5)
5165          return 1;
5166       else
5167       {
5168          if (asp1->n3 == AM_GAP && asp2->n3 == AM_GAP)
5169          {
5170             if (asp1->n2 < asp2->n2)
5171                return -1;
5172             if (asp1->n2 > asp2->n2)
5173                return 1;
5174          }
5175          if (asp1->n3 == AM_START)
5176          {
5177             if (asp2->n3 == AM_STOP)
5178                return -1;
5179             else if (asp2->n3 == AM_GAP)
5180                return -1;
5181             else if (asp2->n3 == AM_HARDSTOP)
5182                return -1;
5183             else
5184                return 0;
5185          } else if (asp1->n3 == AM_STOP)
5186          {
5187             if (asp2->n3 == AM_START)
5188                return 1;
5189             else if (asp2->n3 == AM_GAP)
5190                return 1;
5191             else if (asp2->n3 == AM_HARDSTOP)
5192                return -1;
5193             else
5194                return 0;
5195          } else if (asp1->n3 == AM_GAP)
5196          {
5197             if (asp2->n3 == AM_START)
5198                return 1;
5199             else if (asp2->n3 == AM_STOP)
5200                return -1;
5201             else if (asp2->n3 == AM_HARDSTOP)
5202                return -1;
5203             else
5204                return 0;
5205          } else if (asp1->n3 == AM_HARDSTOP)
5206          {
5207             if (asp2->n3 == AM_START)
5208                return 1;
5209             else if (asp2->n3 == AM_STOP)
5210                return 1;
5211             else if (asp2->n3 == AM_GAP)
5212                return 1;
5213             else
5214                return 0;
5215          }
5216       }
5217    }
5218    return 0;
5219 }
5220 
5221 /* SECTION 2c */
5222 /***************************************************************************
5223 *
5224 *  AlnMgr2CompareAspsMinus is a HeapSort callback for AlnMgr2AddInNewSA. It
5225 *  compares the starts (n1) of the two AM_Small2Ptrs; if those are the same
5226 *  it compares the types. The only difference from AlnMgr2CompareAsps is
5227 *  that it sorts the structures in the opposite order.
5228 *
5229 ***************************************************************************/
AlnMgr2CompareAspsMinus(VoidPtr ptr1,VoidPtr ptr2)5230 static int LIBCALLBACK AlnMgr2CompareAspsMinus(VoidPtr ptr1, VoidPtr ptr2)
5231 {
5232    AM_Small2Ptr  asp1;
5233    AM_Small2Ptr  asp2;
5234 
5235    if (ptr1 != NULL && ptr2 != NULL)
5236    {
5237       asp1 = *((AM_Small2Ptr PNTR)ptr1);
5238       asp2 = *((AM_Small2Ptr PNTR)ptr2);
5239       if (asp1->n1 > asp2->n1)
5240          return -1;
5241       else if (asp1->n1 < asp2->n1)
5242          return 1;
5243       else if (asp1->n5 < asp2->n5)
5244          return -1;
5245       else if (asp1->n5 > asp2->n5)
5246          return 1;
5247       else
5248       {
5249          if (asp1->n3 == AM_GAP && asp2->n3 == AM_GAP)
5250          {
5251             if (asp1->n2 < asp2->n2)
5252                return -1;
5253             if (asp1->n2 > asp2->n2)
5254                return 1;
5255          }
5256          if (asp1->n3 == AM_START)
5257          {
5258             if (asp2->n3 == AM_STOP)
5259                return 1;
5260             else if (asp2->n3 == AM_GAP)
5261                return -1;
5262             else if (asp2->n3 == AM_HARDSTOP)
5263                return 1;
5264             else
5265                return 0;
5266          } else if (asp1->n3 == AM_STOP)
5267          {
5268             if (asp2->n3 == AM_START)
5269                return -1;
5270             else if (asp2->n3 == AM_GAP)
5271                return -1;
5272             else if (asp2->n3 == AM_HARDSTOP)
5273                return 1;
5274             else
5275                return 0;
5276          } else if (asp1->n3 == AM_GAP)
5277          {
5278             if (asp2->n3 == AM_START)
5279                return 1;
5280             else if (asp2->n3 == AM_STOP)
5281                return 1;
5282             else if (asp2->n3 == AM_HARDSTOP)
5283                return 1;
5284             else
5285                return 0;
5286          } else if (asp1->n3 == AM_HARDSTOP)
5287          {
5288             if (asp2->n3 == AM_START)
5289                return -1;
5290             else if (asp2->n3 == AM_STOP)
5291                return -1;
5292             else if (asp2->n3 == AM_GAP)
5293                return -1;
5294             else
5295                return 0;
5296          }
5297       }
5298    }
5299    return 0;
5300 }
5301 
5302 
5303 /* SECTION 2c */
5304 /***************************************************************************
5305 *
5306 *  AlnMgr2GetFirstSharedRow takes two indexed or unindexed dense-seg
5307 *  seqaligns and returns the row numbers of the first sequence that is
5308 *  shared between the two alignments. If the alignments do not share any
5309 *  sequences, both n1 and n2 are set to 0.
5310 *
5311 ***************************************************************************/
AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1,SeqAlignPtr sap2,Int4Ptr n1,Int4Ptr n2)5312 static void AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1, SeqAlignPtr sap2, Int4Ptr n1, Int4Ptr n2)
5313 {
5314    DenseSegPtr  dsp1;
5315    DenseSegPtr  dsp2;
5316    Int4         i;
5317    Int4         j;
5318    SeqIdPtr     sip1;
5319    SeqIdPtr     sip2;
5320 
5321    dsp1 = (DenseSegPtr)(sap1->segs);
5322    dsp2 = (DenseSegPtr)(sap2->segs);
5323    sip1 = dsp1->ids;
5324    i = 1;
5325    while (sip1 != NULL)
5326    {
5327       j = 1;
5328       sip2 = dsp2->ids;
5329       while (sip2 != NULL)
5330       {
5331          if (SeqIdComp(sip1, sip2) == SIC_YES)
5332          {
5333             *n1 = i;
5334             *n2 = j;
5335             return;
5336          }
5337          sip2 = sip2->next;
5338          j++;
5339       }
5340       sip1 = sip1->next;
5341       i++;
5342    }
5343    /* nothing found */
5344    *n1 = 0;
5345    *n2 = 0;
5346 }
5347 
5348 /* SECTION 2d */
AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1,SeqIdPtr sip2)5349 static SeqIdPtr AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1, SeqIdPtr sip2)
5350 {
5351    SeqIdPtr  sip;
5352    SeqIdPtr  sip_tmp;
5353 
5354    if (sip1 == NULL || sip2 == NULL)
5355       return NULL;
5356    sip = sip1;
5357    while (sip != NULL)
5358    {
5359       sip_tmp = sip2;
5360       while (sip_tmp != NULL)
5361       {
5362          if (SeqIdComp(sip, sip_tmp) == SIC_YES)
5363             return sip;
5364          sip_tmp = sip_tmp->next;
5365       }
5366       sip = sip->next;
5367    }
5368    return NULL;
5369 }
5370 
5371 /***************************************************************************
5372 *
5373 *  AlnMgr2OrderSeqIds simply alphabetizes printed seqids in order to sort
5374 *  them in order to group identical ones in a set.
5375 *
5376 ***************************************************************************/
AlnMgr2OrderSeqIds(SeqIdPtr sip1,SeqIdPtr sip2)5377 static Int4 AlnMgr2OrderSeqIds(SeqIdPtr sip1, SeqIdPtr sip2)
5378 {
5379    Char  txt1[42];
5380    Char  txt2[42];
5381 
5382    if (sip1 == NULL && sip2 == NULL)
5383       return 0;
5384    if (sip1 == NULL && sip2 != NULL)
5385       return 1;
5386    if (sip1 != NULL && sip2 == NULL)
5387       return -1;
5388    SeqIdWrite(sip1, txt1, PRINTID_TEXTID_ACC_VER, 41);
5389    SeqIdWrite(sip2, txt2, PRINTID_TEXTID_ACC_VER, 41);
5390    txt1[41] = txt2[41] = '\0';
5391    return StringICmp(txt1, txt2);
5392 }
5393 
5394 /* SECTION 2d */
5395 /***************************************************************************
5396 *
5397 *  AlnMgr2SetUnaln takes an indexed alignment and sets the numunaln and
5398 *  unaln array fields. The unaligned regions are numbered the same
5399 *  regardless of whether the alignment is anchored, although they will
5400 *  most likely be accessed and displayed differently.
5401 *
5402 ***************************************************************************/
AlnMgr2SetUnaln(SeqAlignPtr sap)5403 static void AlnMgr2SetUnaln(SeqAlignPtr sap)
5404 {
5405    AMAlignIndex2Ptr  amaip;
5406    AM_Small2Ptr      ams;
5407    AM_Small2Ptr      ams_head;
5408    AM_Small2Ptr      ams_prev;
5409    AM_Small2Ptr      PNTR amsarray;
5410    DenseSegPtr       dsp;
5411    Int4              i;
5412    Int4              j;
5413    SAIndex2Ptr       saip;
5414 
5415    if (sap == NULL || sap->saip == NULL)
5416       return;
5417    if (sap->saip->indextype == INDEX_CHILD)
5418    {
5419       saip = (SAIndex2Ptr)(sap->saip);
5420       dsp = (DenseSegPtr)(sap->segs);
5421    } else if (sap->saip->indextype == INDEX_PARENT)
5422    {
5423       amaip = (AMAlignIndex2Ptr)(sap->saip);
5424       if (amaip->alnstyle == AM2_LITE)
5425          return;
5426       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
5427       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
5428    } else
5429       return;
5430    MemFree(saip->unaln);
5431    saip->unaln = NULL;
5432    ams_head = ams_prev = NULL;
5433    for (i=0; i<saip->numrows; i++)
5434    {
5435       for (j=0; j<saip->srdp[i]->numunaln; j++)
5436       {
5437          ams = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
5438          ams->n1 = saip->srdp[i]->unaligned[j];
5439          if (ams_head != NULL)
5440          {
5441             ams_prev->next = ams;
5442             ams_prev = ams;
5443          } else
5444             ams_head = ams_prev = ams;
5445       }
5446    }
5447    if (ams_head == NULL)
5448    {
5449       saip->numunaln = -1;
5450       return;
5451    }
5452    j = 0;
5453    ams = ams_head;
5454    while (ams != NULL)
5455    {
5456       j++;
5457       ams = ams->next;
5458    }
5459    amsarray = (AM_Small2Ptr PNTR)MemNew(j*sizeof(AM_Small2Ptr));
5460    j = 0;
5461    ams = ams_head;
5462    while (ams != NULL)
5463    {
5464       amsarray[j] = ams;
5465       j++;
5466       ams = ams->next;
5467    }
5468    HeapSort(amsarray, j, sizeof(AM_Small2Ptr), AlnMgr2CompareUnalnAMS);
5469    saip->numunaln = 1;
5470    for (i=1; i<j; i++)
5471    {
5472       if (amsarray[i]->n1 != amsarray[i-1]->n1)
5473          saip->numunaln++;
5474    }
5475    saip->unaln = (Uint4Ptr)MemNew(saip->numunaln*sizeof(Uint4));
5476    saip->unaln[0] = amsarray[0]->n1;
5477    saip->numunaln = 1;
5478    for (i=1; i<j; i++)
5479    {
5480       if (amsarray[i]->n1 != amsarray[i-1]->n1)
5481       {
5482          saip->unaln[saip->numunaln] = amsarray[i]->n1;
5483          saip->numunaln++;
5484       }
5485    }
5486    for (i=0; i<j; i++)
5487    {
5488       MemFree(amsarray[i]);
5489    }
5490    MemFree(amsarray);
5491 }
5492 
5493 /* SECTION 2d */
5494 /***************************************************************************
5495 *
5496 *  AlnMgr2CompareUnalnAMS is the HeapSort callback for AlnMgr2SetUnaln;
5497 *  it simply compares two AM_Small2 structures and orders them by their
5498 *  n1 fields.
5499 *
5500 ***************************************************************************/
AlnMgr2CompareUnalnAMS(VoidPtr ptr1,VoidPtr ptr2)5501 static int LIBCALLBACK AlnMgr2CompareUnalnAMS(VoidPtr ptr1, VoidPtr ptr2)
5502 {
5503    AM_Small2Ptr  ams1;
5504    AM_Small2Ptr  ams2;
5505 
5506    if (ptr1 == NULL || ptr2 == NULL)
5507       return 0;
5508    ams1 = *((AM_Small2Ptr PNTR)ptr1);
5509    ams2 = *((AM_Small2Ptr PNTR)ptr2);
5510    if (ams1->n1 < ams2->n1)
5511       return -1;
5512    else if (ams1->n1 > ams2->n1)
5513       return 1;
5514    else
5515       return 0;
5516 }
5517 
5518 /***************************************************************************
5519 *
5520 *  SECTION 3: Functions for debugging
5521 *
5522 ***************************************************************************/
5523 
5524 /* SECTION 3 */
am_print_sa_index(SeqAlignPtr sap,FILE * ofp)5525 NLM_EXTERN void am_print_sa_index(SeqAlignPtr sap, FILE *ofp)
5526 {
5527    Int4        i;
5528    Int4        j;
5529    SAIndex2Ptr  saip;
5530 
5531    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
5532       return;
5533    saip = (SAIndex2Ptr)(sap->saip);
5534    fprintf(ofp, "Rows: %d\n", saip->numrows);
5535    fprintf(ofp, "Segments: %d\n", saip->numseg);
5536    fprintf(ofp, "Anchor: %d\n", saip->anchor);
5537    fprintf(ofp, "Alignment coordinates: ");
5538    for (i=0; i<saip->numseg; i++)
5539    {
5540       fprintf(ofp, "%d ", saip->aligncoords[i]);
5541    }
5542    fprintf(ofp, "\n\n");
5543    for (i=0; i<saip->numrows; i++)
5544    {
5545       fprintf(ofp, "row %d\n", i+1);
5546       fprintf(ofp, "numsect: %d\n", saip->srdp[i]->numsect);
5547       for (j=0; j<saip->srdp[i]->numsect; j++)
5548       {
5549          fprintf(ofp, "%d ", saip->srdp[i]->sect[j]);
5550       }
5551       fprintf(ofp, "\n");
5552       fprintf(ofp, "numunsect: %d\n", saip->srdp[i]->numunsect);
5553       for (j=0; j<saip->srdp[i]->numunsect; j++)
5554       {
5555          fprintf(ofp, "%d ", saip->srdp[i]->unsect[j]);
5556       }
5557       fprintf(ofp, "\n");
5558       fprintf(ofp, "numinsect: %d\n", saip->srdp[i]->numinsect);
5559       for (j=0; j<saip->srdp[i]->numinsect; j++)
5560       {
5561          fprintf(ofp, "%d ", saip->srdp[i]->insect[j]);
5562       }
5563       fprintf(ofp, "\n");
5564    }
5565 }
5566 
5567 /* SECTION 3 */
5568 /***************************************************************************
5569 *
5570 *  AlnMgr2PrintSeqAlign prints an interleaved output of the entire
5571 *  indexed alignment, with 'linesize' characters on each line (max 200).
5572 *
5573 ***************************************************************************/
AlnMgr2PrintSeqAlign(SeqAlignPtr sap,Int4 linesize,Boolean isnuc,FILE * ofp)5574 NLM_EXTERN void AlnMgr2PrintSeqAlign(SeqAlignPtr sap, Int4 linesize, Boolean isnuc, FILE *ofp)
5575 {
5576    AlnMsg2Ptr   amp;
5577    BioseqPtr   bsp;
5578    Char        buf[201];
5579    Int4        ctr;
5580    Boolean     done;
5581    Int4        i;
5582    Int4        j;
5583    Int4        len;
5584    Boolean     more;
5585    Int4        numrows;
5586    Int4        row;
5587    Uint1       seqcode;
5588    SeqIdPtr    sip;
5589    SeqPortPtr  spp;
5590    Char        text[42];
5591 
5592    if (sap == NULL || sap->saip == NULL || linesize > 200)
5593       return;
5594    if (isnuc)
5595       seqcode = Seq_code_iupacna;
5596    else
5597       seqcode = Seq_code_iupacaa;
5598    amp = AlnMsgNew2();
5599    numrows = AlnMgr2GetNumRows(sap);
5600    len = AlnMgr2GetAlnLength(sap, FALSE);
5601    for (i=0; i<len; i+=linesize)
5602    {
5603       fprintf(ofp, "%d - %d\n", i, MIN(i+linesize-1, len-1));
5604       for (row=0; row<numrows; row++)
5605       {
5606          sip = AlnMgr2GetNthSeqIdPtr(sap, row+1);
5607          SeqIdWrite(sip, text, PRINTID_FASTA_SHORT, 41);
5608          done = FALSE;
5609          for (j=0; j<12; j++)
5610          {
5611             if (text[j] == '\0')
5612                done = TRUE;
5613             if (done == TRUE)
5614                fprintf(ofp, " ");
5615             else
5616                fprintf(ofp, "%c", text[j]);
5617          }
5618          bsp = BioseqLockById(sip);
5619          AlnMsgReNew2(amp);
5620          amp->row_num = row+1;
5621          amp->from_aln = i;
5622          amp->to_aln = MIN(i+linesize-1, len-1);
5623          while (more = AlnMgr2GetNextAlnBit(sap, amp))
5624          {
5625             if (amp->type == AM_GAP)
5626             {
5627                for (j=amp->from_row; j<=amp->to_row; j++)
5628                {
5629                   fprintf(ofp, "-");
5630                }
5631             } else
5632             {
5633                spp = SeqPortNew(bsp, amp->from_row, amp->to_row, amp->strand, seqcode);
5634                ctr = SeqPortRead(spp, (Uint1Ptr)buf, amp->to_row-amp->from_row+1);
5635                buf[ctr] = '\0';
5636                fwrite(buf, 1, ctr, ofp);
5637                SeqPortFree(spp);
5638             }
5639          }
5640          BioseqUnlock(bsp);
5641          fprintf(ofp, "\n");
5642       }
5643       fprintf(ofp, "\n\n");
5644    }
5645    AlnMsgFree2(amp);
5646 }
5647 
5648 /* SECTION 3 */
AlnMgr2DumpIndexedAlnToFile(SeqAlignPtr sap,CharPtr filename)5649 NLM_EXTERN void AlnMgr2DumpIndexedAlnToFile(SeqAlignPtr sap, CharPtr filename)
5650 {
5651    AsnIoPtr          aip;
5652    AMAlignIndex2Ptr  amaip;
5653    SeqAlignPtr       sap_tmp;
5654 
5655    if (sap == NULL || sap->saip == NULL)
5656       return;
5657    if (sap->saip->indextype == INDEX_CHILD)
5658    {
5659       if (sap->dim == 0)
5660          sap->dim = AlnMgr2GetNumRows(sap);
5661       aip = AsnIoOpen(filename, "w");
5662       SeqAlignAsnWrite(sap, aip, NULL);
5663       AsnIoClose(aip);
5664       return;
5665    }
5666    amaip = (AMAlignIndex2Ptr)(sap->saip);
5667    aip = AsnIoOpen(filename, "w");
5668    if (amaip->alnstyle != AM2_LITE)
5669    {
5670       amaip->sharedaln->dim = 0;  /* mark it as the sharedaln */
5671       SeqAlignAsnWrite(amaip->sharedaln, aip, NULL);
5672    }
5673    sap_tmp = sap;
5674    if (sap->dim == 0)
5675       sap->dim = AlnMgr2GetNumRows(sap);
5676    while (sap_tmp != NULL)
5677    {
5678       SeqAlignAsnWrite(sap_tmp, aip, NULL);
5679       sap_tmp = sap_tmp->next;
5680    }
5681    AsnIoClose(aip);
5682 }
5683 
5684 /***************************************************************************
5685 *
5686 *  SECTION 4: API-level functions (and their helper functions) used to
5687 *  access an indexed alignment.
5688 *    SECTION 4a: AlnMgr2GetNextAlnBit and associated functions
5689 *    SECTION 4b: "GetNth" functions
5690 *    SECTION 4c: other functions for accessing the alignment
5691 *
5692 ***************************************************************************/
5693 
5694 /* SECTION 4a */
5695 /***************************************************************************
5696 *
5697 *  AlnMgr2GetNextAlnBit takes an indexed seqalign and returns it, piece
5698 *  by piece, in the row and across the range specified in the AlnMsg
5699 *  structure. amp->from_aln and amp->to_aln must be filled in; these are
5700 *  in alignment coordinates. AlnMgr2GetNextAlnBit will return the AlnMsg
5701 *  structure with amp->from_row and amp->to_row filled in. If amp->type is
5702 *  AM_SEQ, these numbers are sequence coordinates; if amp->type is AM_GAP
5703 *  the numbers are alignment coordinates and there is a gap in that row.
5704 *  AlnMgr2GetNextAlnBit returns one continuous piece of sequence or gap
5705 *  at each call, and keeps returning TRUE until it has returned all the
5706 *  information for the piece of the alignment requested.
5707 *
5708 ***************************************************************************/
AlnMgr2GetNextAlnBit(SeqAlignPtr sap,AlnMsg2Ptr amp)5709 NLM_EXTERN Boolean AlnMgr2GetNextAlnBit(SeqAlignPtr sap, AlnMsg2Ptr amp) /* NEXT */
5710 {
5711    AMAlignIndex2Ptr  amaip;
5712    Uint2Ptr         array;
5713    Int4             arraylen;
5714    Int4             ctr;
5715    Int4             disc;
5716    Int4             disc1;
5717    DenseSegPtr      dsp;
5718    Int4             endoffset;
5719    Boolean          found;
5720    Int4             i;
5721    Int4             index;
5722    Int4             intfrom;
5723    Int4             intto;
5724    Int4             j;
5725    Int4             len;
5726    Int4             offset;
5727    SAIndex2Ptr       saip;
5728    SARowDat2Ptr      srdp;
5729    Int4             start_sect;
5730    Int4             stop_sect;
5731    Uint2Ptr         trans;
5732    Int4             translen;
5733 
5734    if (sap == NULL || sap->saip == NULL || amp == NULL)
5735       return FALSE;
5736    if (amp->left_interrupt != NULL)
5737    {
5738       MemFree(amp->left_interrupt);
5739       amp->left_interrupt = NULL;
5740    }
5741    if (amp->right_interrupt != NULL)
5742    {
5743       MemFree(amp->right_interrupt);
5744       amp->right_interrupt = NULL;
5745    }
5746    if (sap->saip->indextype == INDEX_CHILD)
5747    {
5748       dsp = (DenseSegPtr)(sap->segs);
5749       saip = (SAIndex2Ptr)(sap->saip);
5750    } else if (sap->saip->indextype == INDEX_PARENT)
5751    {
5752       amaip = (AMAlignIndex2Ptr)(sap->saip);
5753       if (amaip->alnstyle == AM2_LITE)
5754          return FALSE;
5755       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
5756       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
5757    }
5758    /* reality checks */
5759    if (amp->row_num > saip->numrows)
5760       return FALSE;
5761    if (amp->len <= 0)
5762       amp->len = AlnMgr2GetAlnLength(sap, FALSE);
5763    if (amp->from_aln < 0 || amp->from_aln > amp->len-1 || amp->real_from > amp->to_aln)
5764       return FALSE;
5765    if (amp->to_aln == -1)
5766       amp->to_aln = amp->len - 1;
5767    if (amp->to_aln < amp->from_aln || amp->to_aln > amp->len-1)
5768       return FALSE;
5769    if (amp->real_from == -2)
5770       amp->real_from = amp->from_aln;
5771    amp->strand = AlnMgr2GetNthStrand(sap, amp->row_num);
5772    srdp = saip->srdp[amp->row_num-1];
5773    len = 0;
5774    start_sect = binary_search_on_uint4_list(saip->aligncoords, amp->real_from, saip->numseg);
5775    offset = amp->real_from - saip->aligncoords[start_sect];
5776    endoffset = 0;
5777    stop_sect = binary_search_on_uint4_list(saip->aligncoords, amp->to_aln, saip->numseg);
5778    /* now figure out whether it starts in sequence or a gap, and figure out how       */
5779    /* long it continues in the same mode without interruption by inserts or unaligned */
5780    /* regions; the whole contiguous stretch will be reported                          */
5781    if (saip->anchor > 0)
5782    {
5783       trans = saip->srdp[saip->anchor-1]->sect;
5784       translen = saip->srdp[saip->anchor-1]->numsect;
5785    } else
5786    {
5787       trans = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
5788       for (i=0; i<dsp->numseg; i++)
5789       {
5790          trans[i] = i;
5791       }
5792       translen = dsp->numseg;
5793    }
5794    arraylen = -1;
5795    if ((index = binary_search_on_uint2_list(srdp->sect, trans[start_sect], srdp->numsect)) != -1)
5796    {
5797       amp->type = AM_SEQ;
5798       array = srdp->sect;
5799       arraylen = srdp->numsect;
5800    } else if ((index = binary_search_on_uint2_list(srdp->unsect, trans[start_sect], srdp->numunsect)) != -1)
5801    {
5802       amp->type = AM_GAP;
5803       array = srdp->unsect;
5804       arraylen = srdp->numunsect;
5805    }
5806    if (arraylen == -1) /* error */
5807       return FALSE;
5808    if (amp->row_num == saip->anchor)
5809    {
5810       amp->type = AM_SEQ;
5811       /* find limits of aligned region */
5812       i = start_sect;
5813       j = srdp->sect[start_sect];
5814       disc = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5815       while (j<srdp->sect[stop_sect] && disc == -1)
5816       {
5817          j++;
5818          disc = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5819       }
5820       i = binary_search_on_uint2_list(srdp->sect, j, srdp->numsect);
5821       if (i == -1)
5822       {
5823          i = binary_search_on_uint2_list(srdp->unsect, j, srdp->numunsect);
5824       }
5825       endoffset = dsp->lens[trans[i]] - (amp->to_aln - saip->aligncoords[i]) - 1;
5826       if (endoffset < 0)
5827          endoffset = 0;
5828       if (i<stop_sect && endoffset == 0) /* there's an unaligned region here, and we go to the end of the segment */
5829       {
5830          AlnMgr2GetUnalignedInfo(sap, trans[i], amp->row_num, &intfrom, &intto);
5831          amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5832          amp->right_interrupt->row = amp->row_num;
5833          amp->right_interrupt->unalnlen = intto - intfrom + 1;
5834          amp->right_interrupt->segnum = trans[i];
5835          amp->right_interrupt->which_side = AM2_RIGHT;
5836       }
5837       stop_sect = i;
5838       if (start_sect > 0 && offset == 0)
5839       {
5840          disc = binary_search_on_uint2_list(srdp->unaligned, trans[start_sect]-1, srdp->numunaln);
5841          if (disc != -1) /* there is a left unaligned region */
5842          {
5843             AlnMgr2GetUnalignedInfo(sap, trans[start_sect]-1, amp->row_num, &intfrom, &intto);
5844             amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5845             amp->left_interrupt->row = amp->row_num;
5846             amp->left_interrupt->unalnlen = intto - intfrom + 1;
5847             amp->left_interrupt->segnum = trans[start_sect];
5848             amp->left_interrupt->which_side = AM2_LEFT;
5849          }
5850       }
5851       len = 0;
5852       for (i=start_sect; i<= stop_sect; i++)
5853       {
5854          len += dsp->lens[trans[i]];
5855       }
5856       len = len - offset - endoffset;
5857       if (amp->strand == Seq_strand_minus)
5858          amp->from_row = dsp->starts[trans[stop_sect]*dsp->dim+amp->row_num-1] + endoffset;
5859       else
5860          amp->from_row = dsp->starts[trans[start_sect]*dsp->dim+amp->row_num-1] + offset;
5861       amp->to_row = amp->from_row + len - 1;
5862       amp->real_from += amp->to_row - amp->from_row + 1;
5863       if (saip->anchor <= 0)
5864          MemFree(trans);
5865       return TRUE;
5866    }
5867    /* look for limits of aligned/gapped region */
5868    i = index;
5869    j = start_sect+1;
5870    disc = -1;
5871    found = FALSE;
5872    while (i+1<arraylen && disc == -1 && array[i] <= trans[stop_sect] && array[i+1]-1 == array[i])
5873    {
5874       disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5875       if (disc == -1)
5876          i++;
5877    }
5878    disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5879    j = binary_search_on_uint2_list(trans, array[i], translen);
5880    if (amp->type == AM_SEQ && j <= stop_sect) /* there is an interrupting region, either seq/gap, insert, or unaligned, plus just check last piece */
5881    {
5882       i = binary_search_on_uint2_list(srdp->insect, trans[j]+1, srdp->numinsect);
5883       if (i != -1) /* there's an insert */
5884       {
5885          amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5886          amp->right_interrupt->row = amp->row_num;
5887          amp->right_interrupt->segnum = trans[j];
5888          amp->right_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5889          amp->right_interrupt->which_side = AM2_RIGHT;
5890          /* look for unaligned regions off insert */
5891          disc1 = -1;
5892          if (j > 0)
5893             disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1, srdp->numunaln);
5894          if (disc1 != -1)
5895          {
5896             AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5897             amp->right_interrupt->unalnlen = intto - intfrom + 1;
5898          }
5899          i++;
5900          ctr = 1;
5901          while (i<srdp->numinsect && srdp->insect[i] == srdp->insect[i-1]+1)
5902          {
5903             amp->right_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5904             /* look for unaligned regions off insert */
5905             disc1 = -1;
5906             if (j > 0) {
5907               disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1+ctr, srdp->numunaln);
5908             }
5909             if (disc1 != -1)
5910             {
5911                AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5912                amp->right_interrupt->unalnlen += intto - intfrom + 1;
5913             }
5914             i++;
5915             ctr++;
5916          }
5917       }
5918       if (disc != -1) /* there's an unaligned region */
5919       {
5920          if (amp->right_interrupt == NULL)
5921             amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5922          amp->right_interrupt->row = amp->row_num;
5923          amp->right_interrupt->segnum = trans[j];
5924          amp->right_interrupt->which_side = AM2_RIGHT;
5925          AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc], amp->row_num, &intfrom, &intto);
5926          amp->right_interrupt->unalnlen += intto - intfrom + 1;
5927       }
5928    }
5929    stop_sect = j;
5930    /* now look for left-side unaligned or inserted regions if offset == 0 */
5931    if (amp->type == AM_SEQ && offset == 0)
5932    {
5933       disc = -1;
5934       j = 1;
5935       i = -1;
5936       if ((Int2)trans[start_sect]-j > 0)
5937       i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j, srdp->numsect);
5938       while (i == -1 && (Int2)(trans[start_sect])-j-1 >= 0)
5939       {
5940          i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j-1, srdp->numsect);
5941          j++;
5942       }
5943       disc = binary_search_on_uint2_list(srdp->unaligned, trans[start_sect]-j, srdp->numunaln);;
5944       if (disc > -1)
5945       {
5946          AlnMgr2GetUnalignedInfo(sap, trans[start_sect]-j, amp->row_num, &intfrom, &intto);
5947          amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5948          amp->left_interrupt->row = amp->row_num;
5949          amp->left_interrupt->segnum = trans[start_sect];
5950          amp->left_interrupt->which_side = AM2_LEFT;
5951          amp->left_interrupt->unalnlen = intto - intfrom + 1;
5952       }
5953       i = binary_search_on_uint2_list(srdp->insect, trans[start_sect]-j, srdp->numinsect);
5954       if (i != -1) /* there's an insert */
5955       {
5956          if (amp->left_interrupt == NULL)
5957             amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5958          amp->left_interrupt->row = amp->row_num;
5959          amp->left_interrupt->segnum = trans[start_sect];
5960          amp->left_interrupt->which_side = AM2_LEFT;
5961          amp->left_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5962          /* look for unaligned regions off insert */
5963          j = trans[start_sect]-j;
5964          disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5965          if (disc1 != -1)
5966          {
5967             AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5968             amp->left_interrupt->unalnlen += intto - intfrom + 1;
5969          }
5970          i--;
5971          j--;
5972          while (i-1>=0 && srdp->insect[i] == srdp->insect[i+1]-1)
5973          {
5974             amp->left_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5975             disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5976             if (disc1 != -1)
5977             {
5978                AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5979                amp->left_interrupt->unalnlen += intto - intfrom + 1;
5980             }
5981             i--;
5982             j--;
5983          }
5984          if (i>=0) /* look one more over for unaligned */
5985          {
5986             disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5987             if (disc1 != -1)
5988             {
5989                AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5990                amp->left_interrupt->unalnlen += intto - intfrom + 1;
5991             }
5992          }
5993       }
5994    }
5995    endoffset = dsp->lens[trans[stop_sect]] - (amp->to_aln - saip->aligncoords[stop_sect]) - 1;
5996    if (endoffset < 0)
5997       endoffset = 0;
5998    if (amp->right_interrupt != NULL && endoffset > 0)
5999    {
6000       MemFree(amp->right_interrupt);
6001       amp->right_interrupt = NULL;
6002    }
6003    len = 0;
6004    for (i=start_sect; i<=stop_sect; i++)
6005    {
6006       len += dsp->lens[trans[i]];
6007    }
6008    len = len - offset - endoffset;
6009    if (amp->type == AM_GAP)
6010    {
6011       amp->from_row = amp->real_from;
6012       amp->to_row = amp->from_row + len - 1;
6013    } else
6014    {
6015       if (amp->strand == Seq_strand_minus)
6016       {
6017          amp->from_row = dsp->starts[trans[stop_sect]*dsp->dim+amp->row_num-1] + endoffset;
6018          amp->to_row = amp->from_row + len - 1;
6019       } else
6020       {
6021          amp->from_row = dsp->starts[trans[start_sect]*dsp->dim+amp->row_num-1] + offset;
6022          amp->to_row = amp->from_row + len - 1;
6023       }
6024    }
6025    if (saip->anchor <= 0)
6026       MemFree(trans);
6027    amp->real_from += amp->to_row - amp->from_row + 1;
6028    return TRUE;
6029 }
6030 
6031 /* SECTION 4a */
binary_search_on_uint4_list(Uint4Ptr list,Uint4 pos,Uint4 listlen)6032 static Int4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
6033 {
6034    Uint4  L;
6035    Uint4  mid;
6036    Uint4  R;
6037 
6038    if (list == NULL || listlen == 0)
6039       return 0;
6040    L = 0;
6041    R = listlen - 1;
6042    while (L < R)
6043    {
6044       mid = (L+R)/2;
6045       if (list[mid + 1] <= pos)
6046          L = mid + 1;
6047       else
6048          R = mid;
6049    }
6050    return R;
6051 }
6052 
6053 /* SECTION 4a */
binary_search_on_uint2_list(Uint2Ptr list,Int4 ele,Uint2 listlen)6054 static Int4 binary_search_on_uint2_list(Uint2Ptr list, Int4 ele, Uint2 listlen)
6055 {
6056    Uint2  L;
6057    Uint2  mid;
6058    Uint2  R;
6059 
6060    if (list == NULL || listlen == 0 || ele < 0)
6061       return -1;
6062    L = 0;
6063    R = listlen - 1;
6064    while (L < R)
6065    {
6066       mid = (L+R)/2;
6067       if (ele <= list[mid])
6068          R = mid;
6069       else
6070          L = mid+1;
6071    }
6072    if (ele == list[R])
6073       return R;
6074    else
6075       return -1;
6076 }
6077 
6078 /* SECTION 4a */
AlnMgr2GetUnalignedInfo(SeqAlignPtr sap,Int4 segment,Int4 row,Int4Ptr from,Int4Ptr to)6079 static void AlnMgr2GetUnalignedInfo(SeqAlignPtr sap, Int4 segment, Int4 row, Int4Ptr from, Int4Ptr to)
6080 {
6081    AMAlignIndex2Ptr  amaip;
6082    DenseSegPtr      dsp;
6083    Boolean          found;
6084    Int4             i;
6085    SAIndex2Ptr       saip;
6086    Uint1            strand;
6087    Int4             tmp;
6088 
6089    if (sap == NULL)
6090       return;
6091    strand = AlnMgr2GetNthStrand(sap, row);
6092    if (sap->saip->indextype == INDEX_CHILD)
6093    {
6094       saip = (SAIndex2Ptr)(sap->saip);
6095       dsp = (DenseSegPtr)(sap->segs);
6096    } else if (sap->saip->indextype == INDEX_PARENT)
6097    {
6098       amaip = (AMAlignIndex2Ptr)(sap->saip);
6099       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6100       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6101    }
6102    found = FALSE;
6103    *from = *to = -1;
6104    for (i=segment; i>=0 && !found; i--)
6105    {
6106       if (dsp->starts[dsp->dim*i+row-1] != -1)
6107       {
6108          found = TRUE;
6109          if (strand == Seq_strand_minus)
6110             *to = dsp->starts[dsp->dim*i+row-1]-1;
6111          else
6112             *from = dsp->starts[dsp->dim*i+row-1]+dsp->lens[i];
6113       }
6114    }
6115    found = FALSE;
6116    for (i=segment+1; i<dsp->numseg && !found; i++)
6117    {
6118       if (dsp->starts[dsp->dim*i+row-1] != -1)
6119       {
6120          found = TRUE;
6121          if (strand == Seq_strand_minus)
6122             *from = dsp->starts[dsp->dim*i+row-1]+dsp->lens[i];
6123          else
6124             *to = dsp->starts[dsp->dim*i+row-1]-1;
6125       }
6126    }
6127    if (*from > *to)
6128    {
6129       tmp = *from;
6130       *from = *to;
6131       *to = tmp;
6132    }
6133 }
6134 
6135 /* SECTION 4a */
6136 /***************************************************************************
6137 *
6138 *  AlnMgr2GetInterruptInfo returns a structure describing the inserts and
6139 *  unaligned regions in an interrupt. The structure is allocated by this
6140 *  function and must be freed with AlnMgr2FreeInterruptInfo.
6141 *
6142 ***************************************************************************/
AlnMgr2GetInterruptInfo(SeqAlignPtr sap,AMInterruptPtr interrupt)6143 NLM_EXTERN AMInterrInfoPtr AlnMgr2GetInterruptInfo(SeqAlignPtr sap, AMInterruptPtr interrupt)
6144 {
6145    AMAlignIndex2Ptr  amaip;
6146    Int4              disc;
6147    Boolean           done;
6148    DenseSegPtr       dsp;
6149    Int4              i;
6150    AMInterrInfoPtr   iip;
6151    Int4              inserts;
6152    Int4              intfrom;
6153    Int4              intto;
6154    Int4              j;
6155    Int4              k;
6156    Int4              n;
6157    SAIndex2Ptr       saip;
6158    SARowDat2Ptr      srdp;
6159    Uint1             strand;
6160    Uint2Ptr          trans;
6161    Int4              translen;
6162    Int4              u;
6163 
6164    if (interrupt == NULL || sap == NULL || sap->saip == NULL)
6165       return NULL;
6166    if (sap->saip->indextype == INDEX_CHILD)
6167    {
6168       dsp = (DenseSegPtr)(sap->segs);
6169       saip = (SAIndex2Ptr)(sap->saip);
6170    } else if (sap->saip->indextype == INDEX_PARENT)
6171    {
6172       amaip = (AMAlignIndex2Ptr)(sap->saip);
6173       if (amaip->alnstyle == AM2_LITE)
6174          return FALSE;
6175       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6176       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6177    }
6178    if (dsp->numseg < interrupt->segnum)
6179       return NULL;
6180    if (saip->anchor > 0)
6181    {
6182       trans = saip->srdp[saip->anchor-1]->sect;
6183       translen = saip->srdp[saip->anchor-1]->numsect;
6184    } else
6185    {
6186       trans = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
6187       for (i=0; i<dsp->numseg; i++)
6188       {
6189          trans[i] = i;
6190       }
6191       translen = dsp->numseg;
6192    }
6193    strand = AlnMgr2GetNthStrand(sap, interrupt->row-1);
6194    srdp = saip->srdp[interrupt->row-1];
6195    /* now look for inserts and unaligned regions on the side indicated */
6196    if (interrupt->which_side == AM2_RIGHT)
6197    {
6198       /* check if this is unaligned */
6199       disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6200       /* then look for inserts */
6201       done = FALSE;
6202       iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6203       if (disc != -1)
6204          iip->num = 1;
6205       inserts = 0;
6206       for (i=interrupt->segnum+1; !done; i++)
6207       {
6208          n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6209          if (n == -1)
6210             n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6211          if (n == -1)
6212          {
6213             done = TRUE;
6214          } else
6215          {
6216             inserts++; /* only increment if region gets interrupted */
6217             disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6218             if (disc != -1) /* this insert has an unaligned region */
6219             {
6220                iip->num += inserts;
6221                iip->num++;
6222                inserts = 0;
6223             }
6224          }
6225       }
6226       if (inserts != 0)
6227          iip->num++;
6228       iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6229       iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6230       iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6231       k = 0;
6232       disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6233       if (disc != -1) /* starts with unaligned */
6234       {
6235          AlnMgr2GetUnalignedInfo(sap, interrupt->segnum, interrupt->row, &intfrom, &intto);
6236          iip->starts[k] = intfrom;
6237          iip->lens[k] = intto - intfrom + 1;
6238          iip->types[k] = AM_UNALIGNED;
6239          k++;
6240       }
6241       disc = 0;
6242       done = FALSE;
6243       for (i=interrupt->segnum+1; !done; i++)
6244       {
6245          n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6246          u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numinsect);
6247          if (n == -1 && u == -1)
6248          {
6249             done = TRUE;
6250          } else
6251          {
6252             if (u == -1)
6253             {
6254                if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6255                   iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6256                iip->lens[k] += dsp->lens[i];
6257                iip->types[k] = AM_INSERT;
6258                disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6259                if (disc != -1) /* this insert has an unaligned region */
6260                {
6261                   k++;
6262                   AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6263                   iip->starts[k] = intfrom;
6264                   iip->lens[k] = intto - intfrom + 1;
6265                   iip->types[k] = AM_UNALIGNED;
6266                   k++;
6267                }
6268             }
6269          }
6270       }
6271    } else if (interrupt->which_side == AM2_LEFT)
6272    {
6273       /* check if the next non-gap segment to the left has unaligned */
6274       j = 1;
6275       n = 0;
6276       while (n != -1 && interrupt->segnum-j >= 0)
6277       {
6278          n = binary_search_on_uint2_list(srdp->unsect, interrupt->segnum-j, srdp->numunsect);
6279          if (n == -1)
6280             n = binary_search_on_uint2_list(srdp->insect, interrupt->segnum-j, srdp->numinsect);
6281          if (n != -1)
6282             j++;
6283       }
6284       disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum-j, srdp->numunaln);
6285       /* then look for inserts */
6286       done = FALSE;
6287       iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6288       if (disc != -1)
6289          iip->num = 1;
6290       inserts = 0;
6291       for (i=interrupt->segnum-1; !done; i--)
6292       {
6293          n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6294          if (n == -1)
6295             n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6296          if (n == -1)
6297          {
6298             done = TRUE;
6299          } else
6300          {
6301             inserts++; /* only increment if region gets interrupted */
6302             disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6303             if (disc != -1) /* this insert has an unaligned region */
6304             {
6305                iip->num += inserts;
6306                iip->num++;
6307                inserts = 0;
6308             }
6309          }
6310       }
6311       i++;
6312       iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6313       iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6314       iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6315       k = 0;
6316       disc = 0;
6317       /* check first non-inserted segment for unaligned */
6318       if (i >= 0)
6319       {
6320          disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6321          if (disc != -1) /* there's an unaligned region */
6322          {
6323             AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6324             iip->starts[k] = intfrom;
6325             iip->lens[k] = intto - intfrom + 1;
6326             iip->types[k] = AM_UNALIGNED;
6327             k++;
6328          }
6329       }
6330       i++; /* start from leftmost end of inserts/unaligned */
6331       for (i; i<interrupt->segnum; i++)
6332       {
6333          u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6334          if (u == -1)
6335          {
6336             if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6337                iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6338             iip->lens[k] += dsp->lens[i];
6339             iip->types[k] = AM_INSERT;
6340             disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6341             if (disc != -1) /* this insert has an unaligned region */
6342             {
6343                k++;
6344                AlnMgr2GetUnalignedInfo(sap, binary_search_on_uint2_list(trans, i, translen), interrupt->row, &intfrom, &intto);
6345                iip->starts[k] = intfrom;
6346                iip->lens[k] = intto - intfrom + 1;
6347                iip->types[k] = AM_UNALIGNED;
6348                k++;
6349             }
6350          }
6351       }
6352    }
6353    iip->strand = strand;
6354    return iip;
6355 }
6356 
6357 /* SECTION 4b */
6358 /***************************************************************************
6359 *
6360 *  AlnMgr2GetNthStrand takes an indexed seqalign and a row number and
6361 *  returns the strand of the row indicated. A return of 0 indicates
6362 *  an error.
6363 *
6364 ***************************************************************************/
AlnMgr2GetNthStrand(SeqAlignPtr sap,Int4 n)6365 NLM_EXTERN Uint1 AlnMgr2GetNthStrand(SeqAlignPtr sap, Int4 n)
6366 {
6367    AMAlignIndex2Ptr  amaip;
6368    DenseSegPtr      dsp;
6369 
6370    if (sap == NULL || sap->saip == NULL || n < 1)
6371       return 0;
6372    if (sap->saip->indextype == INDEX_CHILD)
6373    {
6374       dsp = (DenseSegPtr)(sap->segs);
6375       if (n > dsp->dim)
6376          return 0;
6377       if (dsp->strands == NULL)
6378          return Seq_strand_plus;
6379       return (dsp->strands[n-1]);
6380    } else if (sap->saip->indextype == INDEX_PARENT)
6381    {
6382       amaip = (AMAlignIndex2Ptr)(sap->saip);
6383       if (amaip->alnstyle == AM2_LITE) /* can't get Nth strand for this */
6384          return 0;
6385       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6386       if (n > dsp->dim)
6387          return 0;
6388       if (dsp->strands == NULL)
6389          return Seq_strand_plus;
6390       return (dsp->strands[n-1]);
6391    }
6392    return 0;
6393 }
6394 
6395 /* SECTION 4b */
6396 /***************************************************************************
6397 *
6398 *  AlnMgr2GetNthSeqIdPtr returns the seqid (this is a duplicated,
6399 *  allocated seqid that must be freed) of the nth row (1-based) of an
6400 *  indexed parent or child seqalign.
6401 *
6402 ***************************************************************************/
AlnMgr2GetNthSeqIdPtr(SeqAlignPtr sap,Int4 n)6403 NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtr(SeqAlignPtr sap, Int4 n)
6404 {
6405    AMAlignIndex2Ptr  amaip;
6406    DenseSegPtr      dsp;
6407    Int4             i;
6408    SeqIdPtr         sip;
6409 
6410    if (sap == NULL || sap->saip == NULL)
6411       return NULL;
6412    if (sap->saip->indextype == INDEX_CHILD)
6413    {
6414       dsp = (DenseSegPtr)(sap->segs);
6415       if (n > dsp->dim)
6416          return NULL;
6417       sip = dsp->ids;
6418       for (i=1; i<n && sip != NULL; i++)
6419       {
6420          sip = sip->next;
6421       }
6422       if (sip == NULL) return NULL;
6423       return (SeqIdDup(sip));
6424    } else if (sap->saip->indextype == INDEX_PARENT)
6425    {
6426       amaip = (AMAlignIndex2Ptr)(sap->saip);
6427       if (n > amaip->numrows)
6428          return NULL;
6429       sip = SeqIdDup(amaip->ids[n-1]);
6430       return sip;
6431    } else
6432       return NULL;
6433 }
6434 
6435 /* SECTION 4b */
6436 /***************************************************************************
6437 *
6438 *  AlnMgr2GetNthSeqRangeInSA returns the smallest and largest sequence
6439 *  coordinates contained in the nth row of an indexed seqalign. Either
6440 *  start or stop can be NULL to only retrieve one of the coordinates.
6441 *  If start and stop are -1, there is an error; if they are both -2, the
6442 *  row is just one big insert. RANGE
6443 *
6444 ***************************************************************************/
AlnMgr2GetNthSeqRangeInSA(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6445 NLM_EXTERN void AlnMgr2GetNthSeqRangeInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6446 {
6447    AMAlignIndex2Ptr  amaip;
6448    Int4             beg;
6449    DenseSegPtr      dsp;
6450    Int4             end;
6451    SAIndex2Ptr       saip;
6452    SARowDat2Ptr      srdp;
6453    Uint1            strand;
6454 
6455    if (start != NULL)
6456       *start = -1;
6457    if (stop != NULL)
6458       *stop = -1;
6459    if (sap == NULL || sap->saip == NULL)
6460       return;
6461    if (sap->saip->indextype == INDEX_CHILD)
6462    {
6463       saip = (SAIndex2Ptr)(sap->saip);
6464       dsp = (DenseSegPtr)(sap->segs);
6465    } else if (sap->saip->indextype == INDEX_PARENT)
6466    {
6467       amaip = (AMAlignIndex2Ptr)(sap->saip);
6468       if (amaip->alnstyle == AM2_LITE)
6469       {
6470          AlnMgr2GetNthSeqRangeInSASet(sap, n, start, stop);
6471          return;
6472       }
6473       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6474       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6475    }
6476    if (n > saip->numrows || n <= 0)
6477       return;
6478    srdp = saip->srdp[n-1];
6479    beg = -1;
6480    if (srdp->numsect == 0) /* just one big insert */
6481       beg = end = -2;
6482    strand = AlnMgr2GetNthStrand(sap, n);
6483    if (beg != -2 && strand != Seq_strand_minus)
6484    {
6485       beg = dsp->starts[srdp->sect[0]*(dsp->dim) + n-1];
6486       end = dsp->starts[srdp->sect[srdp->numsect-1]*(dsp->dim) + n-1] + dsp->lens[srdp->sect[srdp->numsect-1]] - 1;
6487    } else if (beg != -2)
6488    {
6489       beg = dsp->starts[srdp->sect[srdp->numsect-1]*(dsp->dim) + n-1];
6490       end = dsp->starts[srdp->sect[0]*(dsp->dim) + n-1] + dsp->lens[srdp->sect[0]] - 1;
6491    }
6492    if (start != NULL)
6493       *start = beg;
6494    if (stop != NULL)
6495       *stop = end;
6496    return;
6497 }
6498 
6499 /* SECTION 4b */
6500 /***************************************************************************
6501 *
6502 *  AlnMgr2GetNthRowSpanInSA returns the least and greatest alignment
6503 *  coordinates (inclusive) spanned by the indicated row. Either stop or
6504 *  start can be NULL to retrieve just one of the coordinates.
6505 *
6506 ***************************************************************************/
AlnMgr2GetNthRowSpanInSA(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6507 NLM_EXTERN void AlnMgr2GetNthRowSpanInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6508 {
6509    AMAlignIndex2Ptr  amaip;
6510    DenseSegPtr      dsp;
6511    Int4             i;
6512    SAIndex2Ptr       saip;
6513    SARowDat2Ptr      srdp;
6514 
6515    if (start != NULL)
6516       *start = -1;
6517    if (stop != NULL)
6518       *stop = -1;
6519    if (sap == NULL || sap->saip == NULL)
6520       return;
6521    if (sap->saip->indextype == INDEX_CHILD)
6522    {
6523       saip = (SAIndex2Ptr)(sap->saip);
6524       dsp = (DenseSegPtr)(sap->segs);
6525    } else if (sap->saip->indextype == INDEX_PARENT)
6526    {
6527       amaip = (AMAlignIndex2Ptr)(sap->saip);
6528       if (amaip->alnstyle == AM2_LITE)
6529          return;
6530       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6531       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6532    }
6533    if (n > saip->numrows || n <= 0)
6534       return;
6535    srdp = saip->srdp[n-1];
6536    if (srdp->numsect == 0)
6537    {
6538       if (start != NULL)
6539          *start = -1;
6540       if (stop != NULL)
6541          *stop = -1;
6542       return;
6543    }
6544    if (start != NULL)
6545    {
6546       if (saip->anchor > 0)
6547          i = binary_search_on_uint2_list(saip->srdp[saip->anchor-1]->sect, srdp->sect[0], saip->srdp[saip->anchor-1]->numsect);
6548       else
6549          i = srdp->sect[0];
6550       *start = saip->aligncoords[i];
6551    }
6552    if (stop != NULL)
6553    {
6554       if (saip->anchor > 0)
6555          i = binary_search_on_uint2_list(saip->srdp[saip->anchor-1]->sect, srdp->sect[srdp->numsect-1], saip->srdp[saip->anchor-1]->numsect);
6556       else
6557          i = srdp->sect[srdp->numsect-1];
6558       *stop = saip->aligncoords[i] + dsp->lens[srdp->sect[srdp->numsect-1]] - 1;
6559    }
6560    return;
6561 }
6562 
6563 /* SECTION 4b */
AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6564 static void AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6565 {
6566    AMAlignIndex2Ptr  amaip;
6567    Int4              from;
6568    Int4              i;
6569    Int4              max;
6570    Int4              min;
6571    Int4              to;
6572 
6573    if (start != NULL)
6574       *start = -1;
6575    if (stop != NULL)
6576       *stop = -1;
6577    if (sap == NULL || sap->saip == NULL || n < 0)
6578       return;
6579    if (sap->saip->indextype == INDEX_CHILD)
6580    {
6581       AlnMgr2GetNthSeqRangeInSA(sap, n, start, stop);
6582       return;
6583    }
6584    amaip = (AMAlignIndex2Ptr)(sap->saip);
6585    min = max = -1;
6586    for (i=0; i<amaip->numsaps; i++)
6587    {
6588       AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n, &from, &to);
6589       if (from != -1 && (from < min || min == -1))
6590          min = from;
6591       if (to > max)
6592          max = to;
6593    }
6594    if (start != NULL)
6595       *start = from;
6596    if (stop != NULL)
6597       *stop = to;
6598 }
6599 
6600 /* SECTION 4b */
AlnMgr2GetMaxTailLength(SeqAlignPtr sap,Uint1 which_tail)6601 NLM_EXTERN Int4 AlnMgr2GetMaxTailLength(SeqAlignPtr sap, Uint1 which_tail)
6602 {
6603    Int4   i;
6604    Int4   maxlen;
6605    Int4   n;
6606    Int4   start;
6607    Int4   stop;
6608    Uint1  strand;
6609 
6610    if (sap == NULL || sap->saip == NULL)
6611       return 0;
6612    n = AlnMgr2GetNumRows(sap);
6613    maxlen = -1;
6614    for (i=0; i<n; i++)
6615    {
6616       AlnMgr2GetNthRowTail(sap, i+1, which_tail, &start, &stop, &strand);
6617       if (stop - start + 1 > maxlen)
6618          maxlen = stop - start + 1;
6619    }
6620    return maxlen;
6621 }
6622 
6623 /* SECTION 4b */
6624 /***************************************************************************
6625 *
6626 *  AlnMgr2GetNthRowTail returns the sequence extremities that are not
6627 *  contained in the alignment (if the alignment starts at 10 in row 2, the
6628 *  tail in that row is 0-9). It takes an indexed seqalign, a 1-based row
6629 *  number, and AM2_LEFT_TAIL or AM2_RIGHT_TAIL, and returns the start, stop,
6630 *  and strand of the tail indicated in the row desired. AlnMgr2GetNthRowTail
6631 *  returns TRUE if the calculations were successfully completed.
6632 *
6633 ***************************************************************************/
AlnMgr2GetNthRowTail(SeqAlignPtr sap,Int4 n,Uint1 which_tail,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand)6634 NLM_EXTERN Boolean AlnMgr2GetNthRowTail(SeqAlignPtr sap, Int4 n, Uint1 which_tail, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
6635 {
6636    BioseqPtr  bsp;
6637    SeqIdPtr   sip;
6638    Int4       tmp_start;
6639    Int4       tmp_stop;
6640    Uint1      tmp_strand;
6641 
6642    if (sap == NULL || n < 1 || sap->saip == NULL)
6643       return FALSE;
6644    tmp_start = tmp_stop = -1;
6645    AlnMgr2GetNthSeqRangeInSA(sap, n, &tmp_start, &tmp_stop);
6646    if (tmp_start == -1 || tmp_stop == -1)
6647       return FALSE;
6648    tmp_strand = AlnMgr2GetNthStrand(sap, n);
6649    if (which_tail == AM2_LEFT_TAIL)
6650    {
6651       if (tmp_strand == Seq_strand_minus)
6652       {
6653          sip = AlnMgr2GetNthSeqIdPtr(sap, n);
6654          bsp = BioseqLockById(sip);
6655          SeqIdFree(sip);
6656          if (bsp == NULL)
6657             return FALSE;
6658          if (tmp_stop == bsp->length-1 || stop == NULL)
6659          {
6660             if (start)
6661                *start = -1;
6662             if (stop)
6663                *stop = -1;
6664          } else
6665          {
6666             if (start)
6667                *start = tmp_stop+1;
6668             if (stop)
6669                *stop = bsp->length-1;
6670          }
6671          BioseqUnlock(bsp);
6672          if (strand)
6673             *strand = tmp_strand;
6674       } else
6675       {
6676          if (tmp_start >= 1)
6677          {
6678             if (start)
6679                *start = 0;
6680             if (stop)
6681                *stop = tmp_start - 1;
6682          } else
6683          {
6684             if (start)
6685                *start = -1;
6686             if (stop)
6687                *stop = -1;
6688          }
6689          if (strand)
6690             *strand = tmp_strand;
6691       }
6692    } else if (which_tail == AM2_RIGHT_TAIL)
6693    {
6694       if (tmp_strand == Seq_strand_minus)
6695       {
6696          if (tmp_start >= 1)
6697          {
6698             if (start)
6699                *start = 0;
6700             if (stop)
6701                *stop = tmp_start - 1;
6702          } else
6703          {
6704             if (start)
6705                *start = -1;
6706             if (stop)
6707                *stop = -1;
6708          }
6709          if (strand)
6710             *strand = tmp_strand;
6711       } else
6712       {
6713          sip = AlnMgr2GetNthSeqIdPtr(sap, n);
6714          bsp = BioseqLockById(sip);
6715          SeqIdFree(sip);
6716          if (bsp == NULL)
6717             return FALSE;
6718          if (bsp->length-1 == tmp_stop)
6719          {
6720             if (start)
6721                *start = -1;
6722             if (stop)
6723                *stop = -1;
6724          } else
6725          {
6726             if (start)
6727                *start = tmp_stop + 1;
6728             if (stop)
6729                *stop = bsp->length-1;
6730          }
6731          if (strand)
6732             *strand = tmp_strand;
6733          BioseqUnlock(bsp);
6734       }
6735    }
6736    return TRUE;
6737 }
6738 
6739 /* SECTION 4c */
6740 /***************************************************************************
6741 *
6742 *  AlnMgr2GetAlnLength returns the total alignment length of an indexed
6743 *  alignment. If fill_in is TRUE, the function computes the total length
6744 *  of all the internal unaligned regions and adds that to the alignment
6745 *  length; otherwise only the aligned portions are considered. (LENGTH)
6746 *
6747 ***************************************************************************/
AlnMgr2GetAlnLength(SeqAlignPtr sap,Boolean fill_in)6748 NLM_EXTERN Int4 AlnMgr2GetAlnLength(SeqAlignPtr sap, Boolean fill_in)
6749 {
6750    AMAlignIndex2Ptr  amaip;
6751    DenseSegPtr      dsp;
6752    Int4             i;
6753    Uint2            lastseg;
6754    Int4             len;
6755    SAIndex2Ptr       saip;
6756    SeqAlignPtr      salp;
6757 
6758    if (sap == NULL || sap->saip == NULL)
6759       return -1;
6760    if (sap->saip->indextype == INDEX_CHILD)
6761    {
6762       dsp = (DenseSegPtr)(sap->segs);
6763       saip = (SAIndex2Ptr)(sap->saip);
6764       salp = sap;
6765    } else if (sap->saip->indextype == INDEX_PARENT)
6766    {
6767       amaip = (AMAlignIndex2Ptr)(sap->saip);
6768       if (amaip->alnstyle == AM2_LITE)
6769          return -1;
6770       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6771       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6772       salp = amaip->sharedaln;
6773    }
6774    if (saip->unaln == FALSE || fill_in == FALSE)
6775    {
6776       if (saip->anchor == -1)
6777          return (saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1]);
6778       else
6779       {
6780          lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
6781          return (saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg]);
6782       }
6783    } else
6784    {
6785       len = 0;
6786       for (i=0; i<dsp->numseg; i++)
6787       {
6788          len += dsp->lens[i];
6789          len += AlnMgr2GetMaxUnalignedLength(salp, i);
6790       }
6791    }
6792    return len;
6793 }
6794 
6795 /* SECTION 4c */ /* FOR DDV */
AlnMgr2IsSAPDiscAli(SeqAlignPtr sap)6796 NLM_EXTERN Boolean AlnMgr2IsSAPDiscAli(SeqAlignPtr sap)
6797 {
6798    AMAlignIndex2Ptr  amaip;
6799    SAIndex2Ptr       saip;
6800 
6801    if (sap == NULL || sap->saip == NULL)
6802       return FALSE;
6803    if (sap->saip->indextype == INDEX_CHILD)
6804    {
6805       saip = (SAIndex2Ptr)(sap->saip);
6806    } else if (sap->saip->indextype == INDEX_PARENT)
6807    {
6808       amaip = (AMAlignIndex2Ptr)(sap->saip);
6809       if (amaip->alnstyle == AM2_LITE)
6810          return FALSE;
6811       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6812    }
6813    if (saip->numunaln > 0)
6814       return TRUE;
6815    return FALSE;
6816 }
6817 
6818 /* SECTION 4c */ /* FOR DDV */
AlnMgr2GetNumAlnBlocks(SeqAlignPtr sap)6819 NLM_EXTERN Int4 AlnMgr2GetNumAlnBlocks(SeqAlignPtr sap)
6820 {
6821    AMAlignIndex2Ptr  amaip;
6822    SAIndex2Ptr       saip;
6823 
6824    if (sap == NULL || sap->saip == NULL)
6825       return -1;
6826    if (sap->saip->indextype == INDEX_CHILD)
6827    {
6828       saip = (SAIndex2Ptr)(sap->saip);
6829    } else if (sap->saip->indextype == INDEX_PARENT)
6830    {
6831       amaip = (AMAlignIndex2Ptr)(sap->saip);
6832       if (amaip->alnstyle == AM2_LITE)
6833          return -1;
6834       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6835    }
6836    if (saip->numunaln >= 0)
6837       return (saip->numunaln + 1);
6838    else if (saip->numunaln == -1)
6839       return 1;
6840    else
6841       return -1;
6842 }
6843 
6844 /* SECTION 4c */ /* FOR DDV */
AlnMgr2GetNthBlockRange(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)6845 NLM_EXTERN Boolean AlnMgr2GetNthBlockRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6846 {
6847    AMAlignIndex2Ptr  amaip;
6848    SAIndex2Ptr       saip;
6849 
6850    if (sap == NULL || sap->saip == NULL)
6851       return FALSE;
6852    if (sap->saip->indextype == INDEX_CHILD)
6853    {
6854       saip = (SAIndex2Ptr)(sap->saip);
6855    } else if (sap->saip->indextype == INDEX_PARENT)
6856    {
6857       amaip = (AMAlignIndex2Ptr)(sap->saip);
6858       if (amaip->alnstyle == AM2_LITE)
6859          return FALSE;
6860       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6861    }
6862    if (!start || !stop)
6863       return FALSE;
6864    *start = -1;
6865    *stop = -1;
6866    if (n >= saip->numunaln)
6867       return FALSE;
6868    if (n < saip->numunaln)
6869    {
6870       *start = saip->aligncoords[saip->unaln[n-1]+1];
6871       *stop = saip->aligncoords[saip->unaln[n]] - 1;
6872    }
6873    return TRUE;
6874 }
6875 
6876 /* SECTION 4c */ /* FOR DDV */
6877 /***************************************************************************
6878 *
6879 *  AlnMgr2GetNthUnalignedForNthRow returns the bioseq coordinates for the
6880 *  requested row, in the requested unaligned region. Any error will result
6881 *  in -1 returns for both start and stop.
6882 *
6883 ***************************************************************************/
AlnMgr2GetNthUnalignedForNthRow(SeqAlignPtr sap,Int4 unaligned,Int4 row,Int4Ptr start,Int4Ptr stop)6884 NLM_EXTERN Boolean AlnMgr2GetNthUnalignedForNthRow(SeqAlignPtr sap, Int4 unaligned, Int4 row, Int4Ptr start, Int4Ptr stop)
6885 {
6886    AMAlignIndex2Ptr  amaip;
6887    DenseSegPtr       dsp;
6888    Int4              i;
6889    SAIndex2Ptr       saip;
6890    Int4              seg;
6891    Uint1             strand;
6892 
6893    if (sap == NULL || sap->saip == NULL)
6894       return FALSE;
6895    if (sap->saip->indextype == INDEX_CHILD)
6896    {
6897       saip = (SAIndex2Ptr)(sap->saip);
6898       dsp = (DenseSegPtr)(sap->segs);
6899    } else if (sap->saip->indextype == INDEX_PARENT)
6900    {
6901       amaip = (AMAlignIndex2Ptr)(sap->saip);
6902       if (amaip->alnstyle == AM2_LITE)
6903          return FALSE;
6904       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6905       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6906    }
6907    if (row > saip->numrows)
6908       return FALSE;
6909    if (saip->numunaln == 0) /* not set yet */
6910       AlnMgr2SetUnaln(sap);
6911    if (saip->numunaln == -1 || unaligned > saip->numunaln)
6912    {
6913       if (start)
6914          *start = -1;
6915       if (stop)
6916          *stop = -1;
6917       return FALSE;
6918    }
6919    seg = -1;
6920    if (unaligned <= saip->numunaln && unaligned > 0)
6921       seg = saip->unaln[unaligned-1];
6922    if (start)
6923       *start = -1;
6924    if (stop)
6925       *stop = -1;
6926    i = binary_search_on_uint2_list(saip->srdp[row-1]->unaligned, seg, saip->srdp[row-1]->numunaln);
6927    if (i == -1 || saip->srdp[row-1]->unaligned[i] >= dsp->numseg-1)
6928       return FALSE;
6929    strand = AlnMgr2GetNthStrand(sap, row);
6930    if (strand == Seq_strand_minus)
6931    {
6932       *start = dsp->starts[(saip->srdp[row-1]->unaligned[i]+1)*dsp->dim+row-1] + dsp->lens[(saip->srdp[row-1]->unaligned[i])];
6933       *stop = dsp->starts[(saip->srdp[row-1]->unaligned[i])*dsp->dim+row-1] - 1;
6934    } else
6935    {
6936       *start = dsp->starts[(saip->srdp[row-1]->unaligned[i])*dsp->dim+row-1] + dsp->lens[(saip->srdp[row-1]->unaligned[i])];
6937       *stop = dsp->starts[(saip->srdp[row-1]->unaligned[i]+1)*dsp->dim+row-1] - 1;
6938    }
6939    return TRUE;
6940 }
6941 
6942 /* SECTION 4c */ /* FOR DDV */
6943 /***************************************************************************
6944 *
6945 *  AlnMgr2GetNextLengthBit is called in a loop on an indexed alignment, with
6946 *  seg starting at 0, to return the lengths of the aligned and unaligned
6947 *  regions. If the length returned is negative, it's an unaligned region;
6948 *  otherwise it's aligned.
6949 *
6950 ***************************************************************************/
AlnMgr2GetNextLengthBit(SeqAlignPtr sap,Int4Ptr len,Int4Ptr seg)6951 NLM_EXTERN Boolean AlnMgr2GetNextLengthBit(SeqAlignPtr sap, Int4Ptr len, Int4Ptr seg)
6952 {
6953    AMAlignIndex2Ptr  amaip;
6954    DenseSegPtr       dsp;
6955    Int4              i;
6956    Int4              lastseg;
6957    Int4              maxseg;
6958    SAIndex2Ptr       saip;
6959 
6960    if (sap == NULL || sap->saip == NULL || seg == NULL)
6961       return FALSE;
6962    if (sap->saip->indextype == INDEX_CHILD)
6963    {
6964       saip = (SAIndex2Ptr)(sap->saip);
6965       dsp = (DenseSegPtr)(sap->segs);
6966    } else if (sap->saip->indextype == INDEX_PARENT)
6967    {
6968       amaip = (AMAlignIndex2Ptr)(sap->saip);
6969       if (amaip->alnstyle == AM2_LITE)
6970          return FALSE;
6971       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6972       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6973    }
6974    if (saip->numunaln == -1) /* the whole thing is just one big aligned segment */
6975    {
6976       if (*seg != 0)
6977          return FALSE;
6978       if (saip->anchor == -1)
6979       {
6980          *len = saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1];
6981          *seg = 1;
6982       } else
6983       {
6984          lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
6985          *len = saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg];
6986          *seg = 1;
6987       }
6988       return TRUE;
6989    } else
6990    {
6991       if (saip->unaln == 0) /* not set */
6992       {
6993          AlnMgr2SetUnaln(sap);
6994          if (saip->numunaln == -1) /* no unaligned regions */
6995          {
6996             if (*seg != 0)
6997                return FALSE;
6998             if (saip->anchor == -1)
6999                *len = saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1];
7000             else
7001             {
7002                lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
7003                *len = saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg];
7004             }
7005             *seg = 1;
7006             return TRUE;
7007          }
7008       }
7009       if (*seg > saip->numunaln || -(*seg) > saip->numunaln)
7010          return FALSE;
7011       if (*seg >= 0)
7012       {
7013          *len = 0;
7014          if (*seg == 0)
7015             i = 0;
7016          else
7017             i = saip->unaln[*seg-1]+1;
7018          if (*seg < saip->numunaln)
7019             maxseg = saip->unaln[*seg];
7020          else
7021             maxseg = dsp->numseg-1;
7022          while (i<=maxseg)
7023          {
7024             (*len) += dsp->lens[i];
7025             i++;
7026          }
7027          *seg = -(*seg+1);
7028          return TRUE;
7029       } else
7030       {
7031          *len = -AlnMgr2GetMaxUnalignedLength(sap, saip->unaln[-(*seg)-1]);
7032          *seg = -(*seg);
7033          return TRUE;
7034       }
7035    }
7036 }
7037 
7038 /* SECTION 4c */
AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap,Int4 seg)7039 static Int4 AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap, Int4 seg)
7040 {
7041    AMAlignIndex2Ptr  amaip;
7042    DenseSegPtr      dsp;
7043    Boolean          found;
7044    Int4             from;
7045    Int4             i;
7046    Int4             max;
7047    Int4             row;
7048    SAIndex2Ptr       saip;
7049    Int4             to;
7050 
7051    if (sap == NULL)
7052       return -1;
7053    if (sap->saip->indextype == INDEX_CHILD)
7054    {
7055       saip = (SAIndex2Ptr)(sap->saip);
7056       dsp = (DenseSegPtr)(sap->segs);
7057    } else if (sap->saip->indextype == INDEX_PARENT)
7058    {
7059       amaip = (AMAlignIndex2Ptr)(sap->saip);
7060       if (amaip->alnstyle == AM2_LITE)
7061          return -1;
7062       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7063       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7064    }
7065    found = FALSE;
7066    for (row=0; row<dsp->dim && !found; row++)
7067    {
7068       for (i=0; i<saip->srdp[row]->numunaln && !found; i++)
7069       {
7070          if (saip->srdp[row]->unaligned[i] == seg)
7071             found = TRUE;
7072       }
7073    }
7074    if (!found)
7075       return 0;
7076    max = 0;
7077    for (i=0; i<dsp->dim; i++)
7078    {
7079       AlnMgr2GetUnalignedInfo(sap, seg, i+1, &from, &to);
7080       if (to - from > max)
7081          max = to - from;
7082    }
7083    return max;
7084 }
7085 
7086 /* SECTION 4c */
7087 /***************************************************************************
7088 *
7089 *  AlnMgr2GetNumRows returns the number of rows in an indexed seqalign.
7090 *
7091 ***************************************************************************/
AlnMgr2GetNumRows(SeqAlignPtr sap)7092 NLM_EXTERN Int4 AlnMgr2GetNumRows(SeqAlignPtr sap)
7093 {
7094    AMAlignIndex2Ptr  amaip;
7095    SAIndex2Ptr       saip;
7096 
7097    if (sap == NULL || sap->saip == NULL)
7098       return -1;
7099    if (sap->saip->indextype == INDEX_CHILD)
7100    {
7101       saip = (SAIndex2Ptr)(sap->saip);
7102       return (saip->numrows);
7103    } else if (sap->saip->indextype == INDEX_PARENT)
7104    {
7105       amaip = (AMAlignIndex2Ptr)(sap->saip);
7106       return (amaip->numrows);
7107    }
7108    return -1;
7109 }
7110 
7111 /* SECTION 4c */
7112 /***************************************************************************
7113 *
7114 *  AlnMgr2GetNumSegs returns the number of gap- or aligned- contiguous
7115 *  segments in the alignment (continuous or not).
7116 *
7117 ***************************************************************************/
AlnMgr2GetNumSegs(SeqAlignPtr sap)7118 NLM_EXTERN Int4 AlnMgr2GetNumSegs(SeqAlignPtr sap)
7119 {
7120    AMAlignIndex2Ptr  amaip;
7121    DenseSegPtr      dsp;
7122 
7123    if (sap == NULL || sap->saip == NULL)
7124       return -1;
7125    if (sap->saip->indextype == INDEX_CHILD)
7126    {
7127       dsp = (DenseSegPtr)(sap->segs);
7128       return dsp->numseg;
7129    } else if (sap->saip->indextype == INDEX_PARENT)
7130    {
7131       amaip = (AMAlignIndex2Ptr)(sap->saip);
7132       if (amaip->alnstyle == AM2_LITE)
7133          return -1;
7134       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7135       return dsp->numseg;
7136    }
7137    return -1;
7138 }
7139 
7140 /* SECTION 4c */
7141 /***************************************************************************
7142 *
7143 *  AlnMgr2GetNumSegsInRange returns the number of alignment segments
7144 *  spanned by the given range (partially or fully). The range is
7145 *  given in alignment coordinates.
7146 *
7147 ***************************************************************************/
AlnMgr2GetNumSegsInRange(SeqAlignPtr sap,Int4 from,Int4 to,Int4Ptr start_seg)7148 NLM_EXTERN Int4 AlnMgr2GetNumSegsInRange(SeqAlignPtr sap, Int4 from, Int4 to, Int4Ptr start_seg)
7149 {
7150    Uint4Ptr         aligncoords;
7151    AMAlignIndex2Ptr  amaip;
7152    DenseSegPtr      dsp;
7153    Int4             len;
7154    SAIndex2Ptr       saip;
7155    Int4             start;
7156    Int4             stop;
7157 
7158    if (start_seg != NULL)
7159       *start_seg = -1;
7160    if (sap == NULL || sap->saip == NULL)
7161       return -1;
7162    len = AlnMgr2GetAlnLength(sap, FALSE);
7163    if (from < 0 || to > len-1)
7164       return -1;
7165    if (sap->saip->indextype == INDEX_CHILD)
7166    {
7167       dsp = (DenseSegPtr)(sap->segs);
7168       saip = (SAIndex2Ptr)(sap->saip);
7169       aligncoords = saip->aligncoords;
7170    } else if (sap->saip->indextype == INDEX_PARENT)
7171    {
7172       amaip = (AMAlignIndex2Ptr)(sap->saip);
7173       if (amaip->alnstyle == AM2_LITE)
7174          return -1;
7175       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7176       saip = (SAIndex2Ptr)(sap->saip);
7177       aligncoords = saip->aligncoords;
7178    }
7179    if (from == 0 && to == len-1) /* whole alignment */
7180    {
7181       if (start_seg)
7182          *start_seg = 0;
7183       return dsp->numseg;
7184    }
7185    start = binary_search_on_uint4_list(aligncoords, from, dsp->numseg);
7186    stop = binary_search_on_uint4_list(aligncoords, to, dsp->numseg);
7187    if (start_seg != NULL)
7188       *start_seg = start;
7189    return (stop-start+1);
7190 }
7191 
7192 /* SECTION 4c */
7193 /***************************************************************************
7194 *
7195 *  AlnMgr2GetNthSegmentRange returns the alignment coordinate range of the
7196 *  Nth segment (count starts at 1) of the seqalign. start and stop are
7197 *  optional arguments (in case only one end is desired).
7198 *
7199 ***************************************************************************/
AlnMgr2GetNthSegmentRange(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)7200 NLM_EXTERN void AlnMgr2GetNthSegmentRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
7201 {
7202    AMAlignIndex2Ptr  amaip;
7203    Int4             i;
7204    SAIndex2Ptr       saip;
7205 
7206    if (sap == NULL || sap->saip == NULL)
7207       return;
7208    i = AlnMgr2GetNumSegs(sap);
7209    if (n > i || n < 0)
7210       return;
7211    if (sap->saip->indextype == INDEX_CHILD)
7212    {
7213       saip = (SAIndex2Ptr)(sap->saip);
7214       if (start != NULL)
7215          *start = saip->aligncoords[n-1];
7216       if (stop != NULL)
7217       {
7218          if (i > n) /* not the last segment */
7219             *stop = saip->aligncoords[n] - 1;
7220          else
7221             *stop = AlnMgr2GetAlnLength(sap, FALSE) - 1;
7222       }
7223       return;
7224    } else if (sap->saip->indextype == INDEX_PARENT)
7225    {
7226       amaip = (AMAlignIndex2Ptr)(sap->saip);
7227       if (amaip->alnstyle == AM2_LITE)
7228          return;
7229       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7230       if (start != NULL)
7231          *start = saip->aligncoords[n-1];
7232       if (stop != NULL)
7233       {
7234          if (i > n) /* not the last segment */
7235             *stop = saip->aligncoords[n] - 1;
7236          else
7237             *stop = AlnMgr2GetAlnLength(sap, FALSE) - 1;
7238       }
7239       return;
7240    }
7241 }
7242 
7243 /* SECTION 4c */
7244 /***************************************************************************
7245 *
7246 *  AlnMgr2GetFirstNForSip returns the first row that a seqid occurs on,
7247 *  or -1 if the seqid is not in the alignment or if there is another
7248 *  error.
7249 *
7250 ***************************************************************************/
AlnMgr2GetFirstNForSip(SeqAlignPtr sap,SeqIdPtr sip)7251 NLM_EXTERN Int4 AlnMgr2GetFirstNForSip(SeqAlignPtr sap, SeqIdPtr sip)
7252 {
7253    AMAlignIndex2Ptr  amaip;
7254    DenseSegPtr      dsp;
7255    Int4             i;
7256    SeqIdPtr         sip_tmp;
7257 
7258    if (sap == NULL || sip == NULL || sap->saip == NULL)
7259       return -1;
7260    if (sap->saip->indextype == INDEX_CHILD)
7261    {
7262       dsp = (DenseSegPtr)(sap->segs);
7263       sip_tmp = dsp->ids;
7264       i = 1;
7265       while (sip_tmp != NULL)
7266       {
7267          if (SeqIdComp(sip, sip_tmp) == SIC_YES)
7268             return i;
7269          sip_tmp = sip_tmp->next;
7270          i++;
7271       }
7272    } else if (sap->saip->indextype == INDEX_PARENT)
7273    {
7274       amaip = (AMAlignIndex2Ptr)(sap->saip);
7275       if (amaip->alnstyle == AM2_LITE)
7276          return -1;
7277       for (i=0; i<amaip->numrows; i++)
7278       {
7279          if (SeqIdComp(sip, amaip->ids[i]) == SIC_YES)
7280             return (i+1);
7281       }
7282    }
7283    return -1;
7284 }
7285 
7286 /***************************************************************************
7287 *
7288 *  AlnMgr2GetFirstNForSipList returns the first row that one of a list of seqids occur on,
7289 *  or -1 if none of the seqids are in the alignment or if there is another
7290 *  error.
7291 *  Handy if sip comes from a BioSeq, where it can point to a linked list
7292 *  of SeqIds.
7293 *
7294 ***************************************************************************/
AlnMgr2GetFirstNForSipList(SeqAlignPtr sap,SeqIdPtr sip)7295 NLM_EXTERN Int4 AlnMgr2GetFirstNForSipList(SeqAlignPtr sap, SeqIdPtr sip)
7296 {
7297     Int4    i;
7298     if (sap == NULL || sap->saip == NULL)
7299         return -1;
7300 
7301     for (; sip; sip = sip->next) {
7302         i = AlnMgr2GetFirstNForSip(sap, sip);
7303         if (i != -1)
7304             return i;
7305     }
7306     return -1;
7307 }
7308 
7309 /***************************************************************************
7310 *
7311 *  AlnMgr2GetParent returns the top-level seqalign associated with a given
7312 *  indexed alignment. It returns the actual pointer, not a copy.
7313 *
7314 ***************************************************************************/
AlnMgr2GetParent(SeqAlignPtr sap)7315 NLM_EXTERN SeqAlignPtr AlnMgr2GetParent(SeqAlignPtr sap)
7316 {
7317    SAIndex2Ptr  saip;
7318 
7319    if (sap == NULL || sap->saip == NULL)
7320       return NULL;
7321    if (sap->saip->indextype == INDEX_PARENT)
7322       return sap;
7323    saip = (SAIndex2Ptr)(sap->saip);
7324    return (saip->top);
7325 }
7326 
7327 /***************************************************************************
7328 *
7329 *  SECTION 5: Functions to change, assign or retrieve an anchor row.
7330 *    SECTION 5a: functions for child seqaligns
7331 *    SECTION 5b: functions for parent seqaligns
7332 *    SECTION 5c: functions to retrieve anchor row information
7333 *
7334 ***************************************************************************/
7335 
7336 /* SECTION 5a */
AlnMgr2AnchorChild(SeqAlignPtr sap,Int4 which_row)7337 static void AlnMgr2AnchorChild(SeqAlignPtr sap, Int4 which_row)
7338 {
7339    AMBitty2Ptr  abp;
7340    AMBitty2Ptr  abp_head;
7341    AMBitty2Ptr  abp_head2;
7342    AMBitty2Ptr  abp_prev;
7343    AMBitty2Ptr  abp_prev2;
7344    AMBitty2Ptr  abp_uhead;
7345    AMBitty2Ptr  abp_uprev;
7346    Uint2Ptr     anchor_unsect;
7347    Int4         curr;
7348    Int4         curr2;
7349    DenseSegPtr  dsp;
7350    Int4         i;
7351    Int4         j;
7352    Uint2        numunsect;
7353    SAIndex2Ptr   saip;
7354    SARowDat2Ptr  srdp;
7355 
7356    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
7357       return;
7358    saip = (SAIndex2Ptr)(sap->saip);
7359    if (which_row > saip->numrows)
7360       return;
7361    if (saip->anchor == which_row) /*already anchored to the right row */
7362       return;
7363    if (saip->anchor != -1 || which_row <= 0)  /* already anchored -- must reindex as a flat alignment first */
7364    {
7365       SAIndex2Free2(sap->saip);
7366       sap->saip = NULL;
7367       AlnMgr2IndexSingleDenseSegSA(sap);
7368       if (which_row <= 0)
7369          return;
7370       saip = (SAIndex2Ptr)(sap->saip);
7371    }
7372    numunsect = saip->srdp[which_row-1]->numunsect;
7373    if (numunsect > 0)
7374       anchor_unsect = saip->srdp[which_row-1]->unsect;
7375    else
7376       anchor_unsect = NULL;
7377    for (i=0; i<saip->numrows; i++)
7378    {
7379       if (i+1 != which_row)
7380       {
7381          abp_head = NULL;
7382          abp_head2 = NULL;
7383          abp_uhead = NULL;
7384          curr = 0;
7385          curr2 = 0;
7386          srdp = saip->srdp[i];
7387          for (j=0; j<srdp->numsect; j++)
7388          {
7389             if (anchor_unsect != NULL && curr < numunsect && srdp->sect[j] > anchor_unsect[curr])
7390             {
7391                while (curr < numunsect && srdp->sect[j] > anchor_unsect[curr])
7392                {
7393                   curr++;
7394                }
7395             }
7396             if (curr < numunsect && anchor_unsect != NULL && srdp->sect[j] == anchor_unsect[curr]) /* this one is an insert */
7397             {
7398                abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7399                abp->n = srdp->sect[j];
7400                if (abp_head == NULL)
7401                   abp_head = abp_prev = abp;
7402                else
7403                {
7404                   abp_prev->next = abp;
7405                   abp_prev = abp;
7406                }
7407                curr++;
7408             } else /* put it in the keeper pile */
7409             {
7410                abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7411                abp->n = srdp->sect[j];
7412                if (abp_head2 == NULL)
7413                   abp_head2 = abp_prev2 = abp;
7414                else
7415                {
7416                   abp_prev2->next = abp;
7417                   abp_prev2 = abp;
7418                }
7419             }
7420          }
7421          for (j=0; j<srdp->numunsect; j++)
7422          {
7423             if (anchor_unsect != NULL && curr2 < numunsect && srdp->unsect[j] > anchor_unsect[curr2])
7424             {
7425                while (curr2 < numunsect && srdp->unsect[j] > anchor_unsect[curr2])
7426                {
7427                   curr2++;
7428                }
7429             }
7430             if (curr2 >= numunsect || (curr2 < numunsect && (anchor_unsect == NULL || srdp->unsect[j] != anchor_unsect[curr2]))) /* these get kept */
7431             {
7432                abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7433                abp->n = srdp->unsect[j];
7434                if (abp_uhead == NULL)
7435                   abp_uhead = abp_uprev = abp;
7436                else
7437                {
7438                   abp_uprev->next = abp;
7439                   abp_uprev = abp;
7440                }
7441             }
7442          }
7443          MemFree(srdp->sect);
7444          MemFree(srdp->unsect);
7445          srdp->numsect = srdp->numunsect = srdp->numinsect = 0;
7446          abp = abp_head; /* inserts */
7447          while (abp != NULL)
7448          {
7449             srdp->numinsect++;
7450             abp = abp->next;
7451          }
7452          srdp->insect = (Uint2Ptr)MemNew((srdp->numinsect)*sizeof(Uint2));
7453          abp = abp_head;
7454          j = 0;
7455          while (abp != NULL)
7456          {
7457             srdp->insect[j] = abp->n;
7458             j++;
7459             abp_prev = abp;
7460             abp = abp->next;
7461             MemFree(abp_prev);
7462          }
7463          abp = abp_head2; /* aligned sections */
7464          while (abp != NULL)
7465          {
7466             srdp->numsect++;
7467             abp = abp->next;
7468          }
7469          srdp->sect = (Uint2Ptr)MemNew((srdp->numsect)*sizeof(Uint2));
7470          abp = abp_head2;
7471          j = 0;
7472          while (abp != NULL)
7473          {
7474             srdp->sect[j] = abp->n;
7475             j++;
7476             abp_prev = abp;
7477             abp = abp->next;
7478             MemFree(abp_prev);
7479          }
7480          abp = abp_uhead; /* aligned gaps */
7481          while (abp != NULL)
7482          {
7483             srdp->numunsect++;
7484             abp = abp->next;
7485          }
7486          srdp->unsect = (Uint2Ptr)MemNew((srdp->numunsect)*sizeof(Uint2));
7487          abp = abp_uhead;
7488          j = 0;
7489          while (abp != NULL)
7490          {
7491             srdp->unsect[j] = abp->n;
7492             j++;
7493             abp_prev = abp;
7494             abp = abp->next;
7495             MemFree(abp_prev);
7496          }
7497       } else /* this is the anchor row -- fill in the alignment coords*/
7498       {
7499          srdp = saip->srdp[i];
7500          MemFree(saip->aligncoords);
7501          saip->numseg = srdp->numsect;
7502          saip->aligncoords = (Uint4Ptr)MemNew((saip->numseg)*sizeof(Uint4));
7503          dsp = (DenseSegPtr)(sap->segs);
7504          for (j=1; j<saip->numseg; j++)
7505          {
7506             saip->aligncoords[j] = saip->aligncoords[j-1] + dsp->lens[srdp->sect[j-1]];
7507          }
7508          saip->anchor = i+1;
7509       }
7510    }
7511 }
7512 
7513 /* SECTION 5c */
7514 /***************************************************************************
7515 *
7516 *  AlnMgr2AnchorSeqAlign takes an indexed seqalign and a row (1-based) and
7517 *  reindexes the alignment so that there are no gaps in the row indicated.
7518 *  Other rows may contain inserts after this operation. After an alignment
7519 *  is anchored, its length often shrinks. If which_row is less than 1, the
7520 *  function reindexes the alignment as a flat alignment.
7521 *
7522 ***************************************************************************/
AlnMgr2AnchorSeqAlign(SeqAlignPtr sap,Int4 which_row)7523 NLM_EXTERN void AlnMgr2AnchorSeqAlign(SeqAlignPtr sap, Int4 which_row)
7524 {
7525    AMAlignIndex2Ptr  amaip;
7526 
7527    if (sap == NULL || sap->saip == NULL)
7528       return;
7529    if (sap->saip->indextype == INDEX_CHILD)
7530       AlnMgr2AnchorChild(sap, which_row);
7531    else if (sap->saip->indextype == INDEX_PARENT)
7532    {
7533       amaip = (AMAlignIndex2Ptr)(sap->saip);
7534       if (amaip->alnstyle == AM2_LITE)
7535          return;
7536       AlnMgr2AnchorChild(amaip->sharedaln, which_row);
7537       amaip->anchor = which_row;
7538    }
7539 }
7540 
7541 /* SECTION 5c */
7542 /***************************************************************************
7543 *
7544 *  AlnMgr2FindAnchor returns the row number (1-based) of the anchor row
7545 *  for an indexed seqalign, or -1 if the alignment is unanchored or if
7546 *  there is another type of error.
7547 *
7548 ***************************************************************************/
AlnMgr2FindAnchor(SeqAlignPtr sap)7549 NLM_EXTERN Int4 AlnMgr2FindAnchor(SeqAlignPtr sap)
7550 {
7551    AMAlignIndex2Ptr  amaip;
7552    SAIndex2Ptr       saip;
7553 
7554    if (sap == NULL || sap->saip == NULL)
7555       return -1;
7556    if (sap->saip->indextype == INDEX_CHILD)
7557    {
7558       saip = (SAIndex2Ptr)(sap->saip);
7559       return (saip->anchor);
7560    } else if (sap->saip->indextype == INDEX_PARENT)
7561    {
7562       amaip = (AMAlignIndex2Ptr)(sap->saip);
7563       if (amaip->alnstyle == AM2_LITE)
7564          return -1;
7565       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7566       return (saip->anchor);
7567    } else
7568       return -1;
7569 }
7570 
7571 /***************************************************************************
7572 *
7573 *  SECTION 6: Functions for coordinate conversion (bioseq to seqalign
7574 *  coordinates and vice versa)
7575 *
7576 ***************************************************************************/
7577 
7578 /* SECTION 6 */
7579 /***************************************************************************
7580 *
7581 *  AlnMgr2MapBioseqToSeqAlign takes an indexed seqalign, a position in a
7582 *  row of the alignment, and a 1-based row number, and maps the row position
7583 *  to alignment coordinates.
7584 *
7585 ***************************************************************************/
AlnMgr2MapBioseqToSeqAlign(SeqAlignPtr sap,Int4 pos,Int4 row)7586 NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlign(SeqAlignPtr sap, Int4 pos, Int4 row)
7587 {
7588    AMAlignIndex2Ptr  amaip;
7589    Uint2Ptr         array;
7590    DenseSegPtr      dsp;
7591    Int4             L;
7592    Int4             mid;
7593    Int4             offset;
7594    Int4             R;
7595    Int4             retval;
7596    SAIndex2Ptr       saip;
7597    SARowDat2Ptr      srdp;
7598    Int4             start;
7599    Int4             stop;
7600    Uint1            strand;
7601 
7602    if (sap == NULL || sap->saip == NULL || row < 1)
7603       return -1;
7604    AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
7605    if (pos < start || pos > stop)
7606       return -1;
7607    if (sap->saip->indextype == INDEX_CHILD)
7608    {
7609       saip = (SAIndex2Ptr)(sap->saip);
7610       dsp = (DenseSegPtr)(sap->segs);
7611    } else if (sap->saip->indextype == INDEX_PARENT)
7612    {
7613       amaip = (AMAlignIndex2Ptr)(sap->saip);
7614       if (amaip->alnstyle == AM2_LITE)
7615          return -1;
7616       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7617       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7618 }
7619    if (row > saip->numrows)
7620       return -1;
7621    srdp = saip->srdp[row-1];
7622    if (srdp->numsect < 1) {
7623      return -1;
7624    }
7625    strand = AlnMgr2GetNthStrand(sap, row);
7626    L = 0;
7627    R = srdp->numsect - 1;
7628    if (strand != Seq_strand_minus)
7629    {
7630       while (L < R)
7631       {
7632          mid = (L + R)/2;
7633          if (dsp->starts[(srdp->sect[mid+1])*(dsp->dim)+row-1] <= pos)
7634             L = mid + 1;
7635          else
7636             R = mid;
7637       }
7638    } else
7639    {
7640       while (L < R)
7641       {
7642          mid = ceil((L + R)/2);
7643          if (dsp->starts[(srdp->sect[mid])*(dsp->dim)+row-1] > pos)
7644             L = mid + 1;
7645          else
7646             R = mid;
7647       }
7648    }
7649    offset = pos - dsp->starts[(srdp->sect[L])*(dsp->dim)+row-1];
7650    if (offset > dsp->lens[srdp->sect[L]])
7651       return -2;  /* this is an insert */
7652    if (saip->anchor > 0)
7653    {
7654       array = saip->srdp[saip->anchor-1]->sect;
7655       R = binary_search_on_uint2_list(array, srdp->sect[L], saip->srdp[saip->anchor-1]->numsect);
7656       L = R;
7657       srdp = saip->srdp[saip->anchor-1];
7658       if (strand != Seq_strand_minus)
7659          retval = (saip->aligncoords[L] + offset);
7660       else
7661          retval = (saip->aligncoords[L] + dsp->lens[srdp->sect[L]] - offset - 1);
7662    } else
7663    {
7664       if (strand != Seq_strand_minus)
7665          retval = saip->aligncoords[srdp->sect[L]] + offset;
7666       else
7667          retval = (saip->aligncoords[srdp->sect[L]] + dsp->lens[srdp->sect[L]] - offset - 1);
7668    }
7669    return retval;
7670 }
7671 
7672 /* SECTION 6 */
7673 /***************************************************************************
7674 *
7675 *  AlnMgr2MapSeqAlignToBioseq takes an indexed seqalign, an alignment
7676 *  coordinate (pos), and the 1-based number of a row, and maps the alignment
7677 *  coordinate to the corresponding bioseq coordinate of the row desired.
7678 *  A return of -1 indicates an error; a return of -2 means that the bioseq
7679 *  is gapped at this alignment position.
7680 *
7681 ***************************************************************************/
AlnMgr2MapSeqAlignToBioseq(SeqAlignPtr sap,Int4 pos,Int4 row)7682 NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseq(SeqAlignPtr sap, Int4 pos, Int4 row)
7683 {
7684    AMAlignIndex2Ptr  amaip;
7685    DenseSegPtr      dsp;
7686    Int4             len;
7687    Int4             offset;
7688    SAIndex2Ptr      saip;
7689    Int4             sect;
7690    SARowDat2Ptr     srdp;
7691    Int4             start;
7692    Uint1            strand;
7693    Uint2Ptr         trans;
7694 
7695    if (sap == NULL || sap->saip == NULL)
7696       return -1;
7697    len = AlnMgr2GetAlnLength(sap, FALSE);
7698    if (pos < 0 || pos > len - 1)
7699       return -1;
7700    if (sap->saip->indextype == INDEX_CHILD)
7701    {
7702       saip = (SAIndex2Ptr)(sap->saip);
7703       dsp = (DenseSegPtr)(sap->segs);
7704    } else if (sap->saip->indextype == INDEX_PARENT)
7705    {
7706       amaip = (AMAlignIndex2Ptr)(sap->saip);
7707       if (amaip->alnstyle == AM2_LITE)
7708          return -1;
7709       saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7710       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7711    }
7712    if (row > saip->numrows)
7713    return -1;
7714 
7715    sect = binary_search_on_uint4_list(saip->aligncoords, pos, saip->numseg);
7716    offset = pos - saip->aligncoords[sect];
7717    if (saip->anchor > 0)
7718    {
7719       trans = saip->srdp[saip->anchor-1]->sect;
7720       sect = trans[sect];
7721    }
7722    srdp = saip->srdp[row-1];
7723    start = binary_search_on_uint2_list(srdp->sect, sect, srdp->numsect);
7724    if (start == -1)
7725       return -2; /* this row has a gap or insert at this alignment position */
7726    strand = AlnMgr2GetNthStrand(sap, row);
7727    if (strand != Seq_strand_minus)
7728       return (dsp->starts[sect*(dsp->dim)+row-1] + offset);
7729    else
7730       return (dsp->starts[sect*(dsp->dim)+row-1] + dsp->lens[sect] - 1 - offset);
7731 }
7732 
7733 /* SECTION 6 */
7734 /***************************************************************************
7735 *
7736 *  AlnMgr2MapRowToRow takes an indexed seqalign, a position in row1, the
7737 *  1-based number of row1, and a target row (row2), and maps the bioseq
7738 *  coordinate in row 1 to the corresponding (aligned) bioseq coordinate in
7739 *  row2. A return of -1 indicates an error while a return of -2 means that
7740 *  the bioseq in row2 is gapped at the desired position.
7741 *
7742 ***************************************************************************/
AlnMgr2MapRowToRow(SeqAlignPtr sap,Int4 pos,Int4 row1,Int4 row2)7743 NLM_EXTERN Int4 AlnMgr2MapRowToRow(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2)
7744 {
7745    Int4  alnpos;
7746 
7747    if (sap == NULL)
7748       return -1;
7749    alnpos = AlnMgr2MapBioseqToSeqAlign(sap, pos, row1);
7750    return (AlnMgr2MapSeqAlignToBioseq(sap, alnpos, row2));
7751 }
7752 
7753 /***************************************************************************
7754 *
7755 *  SECTION 7: Functions to change an alignment and retrieve parts of an
7756 *    alignment
7757 *
7758 ***************************************************************************/
7759 
7760 /***************************************************************************
7761 *
7762 *   AlnMgr2TruncateSeqAlign truncates a given seqalign to contain only the
7763 *   bioseq coordinates from start to stop on the indicated row.  Anything
7764 *   before those coordinates is discarded; anything remaining afterwards
7765 *   is made into another seqalign and put in sap->next (the original next,
7766 *   if any, is now at sap->next->next).  Doesn't work on parent seqaligns.
7767 *   The function returns TRUE if the orignal alignment extended past stop.
7768 *
7769 ***************************************************************************/
7770 /* SECTION 7 */
AlnMgr2TruncateSeqAlign(SeqAlignPtr sap,Int4 start,Int4 stop,Int4 row)7771 NLM_EXTERN Boolean AlnMgr2TruncateSeqAlign(SeqAlignPtr sap, Int4 start, Int4 stop, Int4 row)
7772 {
7773    DenseDiagPtr  ddp;
7774    DenseDiagPtr  ddp2;
7775    DenseSegPtr   dsp;
7776    Int4          from;
7777    Int4          i;
7778    Int4          mstart;
7779    Int4          mstop;
7780    SeqAlignPtr   sap1;
7781    SeqAlignPtr   sap2;
7782    Int4          tmp;
7783    Int4          to;
7784 
7785    if (sap == NULL || stop<start || row < 1)
7786       return FALSE;
7787    if (sap->segtype == SAS_DENSEG)
7788    {
7789       if (sap->saip == NULL)
7790          AlnMgr2IndexSingleChildSeqAlign(sap);
7791       AlnMgr2GetNthSeqRangeInSA(sap, row, &mstart, &mstop);
7792       if (mstart > start || mstop < stop)
7793          return FALSE;
7794       if (mstart == start)
7795       {
7796          if (mstop == stop)
7797             return FALSE;
7798          else if (mstop > stop)
7799          {
7800             from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
7801             to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
7802             if (to < from)
7803             {
7804                tmp = to;
7805                to = from;
7806                from = tmp;
7807             }
7808             sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7809             AlnMgr2IndexSingleChildSeqAlign(sap1);
7810             from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
7811             if (from < 0)
7812                return FALSE;
7813             to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
7814             if (to < from)
7815             {
7816                tmp = to;
7817                to = from;
7818                from = tmp;
7819             }
7820             sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7821             sap2->next = sap->next;
7822             sap->next = sap2;
7823             dsp = (DenseSegPtr)(sap->segs);
7824             sap->segs = (Pointer)(sap1->segs);
7825             sap1->segs = NULL;
7826             DenseSegFree(dsp);
7827             SeqAlignFree(sap1);
7828             AlnMgr2IndexSingleChildSeqAlign(sap);
7829             AlnMgr2IndexSingleChildSeqAlign(sap2);
7830             return TRUE;
7831          }
7832       } else if (mstart < start) /* throw away the first part */
7833       {
7834          from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
7835          to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
7836          if (to < from)
7837          {
7838             tmp = to;
7839             to = from;
7840             from = tmp;
7841          }
7842          sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7843          if (mstop == stop) /* done */
7844          {
7845             dsp = (DenseSegPtr)(sap->segs);
7846             sap->segs = (Pointer)(sap1->segs);
7847             sap1->segs = NULL;
7848             DenseSegFree(dsp);
7849             SeqAlignFree(sap1);
7850             AlnMgr2IndexSingleChildSeqAlign(sap);
7851             return FALSE;
7852          } else if (mstop > stop)
7853          {
7854             from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
7855             if (from < 0)
7856                return FALSE;
7857             to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
7858             if (to < from)
7859             {
7860                tmp = to;
7861                to = from;
7862                from = tmp;
7863             }
7864             sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7865             sap2->next = sap->next;
7866             sap->next = sap2;
7867             AlnMgr2IndexSingleChildSeqAlign(sap2);
7868             dsp = (DenseSegPtr)(sap->segs);
7869             sap->segs = (Pointer)(sap1->segs);
7870             sap1->segs = NULL;
7871             DenseSegFree(dsp);
7872             SeqAlignFree(sap1);
7873             AlnMgr2IndexSingleChildSeqAlign(sap);
7874             return TRUE;
7875          }
7876       }
7877    } else if (sap->segtype == SAS_DENDIAG)
7878    {
7879       ddp = (DenseDiagPtr)(sap->segs);
7880       if (ddp->dim < row)
7881          return FALSE;
7882       mstart = ddp->starts[row-1];
7883       mstop = mstart + ddp->len - 1;
7884       if (mstart > start || mstop < stop)
7885          return FALSE;
7886       if (mstart == start)
7887       {
7888          if (mstop == stop)
7889             return FALSE;
7890          else if (mstop > stop)
7891          {
7892             ddp2 = DenseDiagNew();
7893             ddp2->dim = ddp->dim;
7894             ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
7895             ddp2->id = SeqIdDupList(ddp->id);
7896             ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
7897             ddp2->scores = ScoreDup(ddp->scores);
7898             for (i=0; i<ddp->dim; i++)
7899             {
7900                ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
7901                ddp2->strands[i] = ddp->strands[i];
7902             }
7903             ddp2->len = mstop - stop;
7904             ddp->len = ddp->len - (mstop - stop);
7905             sap2 = SeqAlignNew();
7906             sap2->type = SAT_PARTIAL;
7907             sap2->segtype = SAS_DENSEG;
7908             sap2->segs = (Pointer)ddp2;
7909             sap2->next = sap->next;
7910             sap->next = sap2;
7911             AlnMgr2IndexSingleChildSeqAlign(sap2);
7912             return TRUE;
7913          }
7914       } else if (mstart < start)
7915       {
7916          for (i=0; i<ddp->dim; i++)
7917          {
7918             ddp->starts[i] = ddp->starts[i] + start - mstart;
7919          }
7920          ddp->len = ddp->len - (start - mstart);
7921          AlnMgr2IndexSingleChildSeqAlign(sap);
7922          if (mstop == stop)
7923             return FALSE;
7924          else if (mstop > stop)
7925          {
7926             ddp2 = DenseDiagNew();
7927             ddp2->dim = ddp->dim;
7928             ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
7929             ddp2->id = SeqIdDupList(ddp->id);
7930             ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
7931             ddp2->scores = ScoreDup(ddp->scores);
7932             for (i=0; i<ddp->dim; i++)
7933             {
7934                ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
7935                ddp2->strands[i] = ddp->strands[i];
7936             }
7937             ddp2->len = mstop - stop;
7938             ddp->len = ddp->len - (mstop - stop);
7939             sap2 = SeqAlignNew();
7940             sap2->type = SAT_PARTIAL;
7941             sap2->segtype = SAS_DENSEG;
7942             sap2->segs = (Pointer)ddp2;
7943             sap2->next = sap->next;
7944             sap->next = sap2;
7945             AlnMgr2IndexSingleChildSeqAlign(sap2);
7946             return TRUE;
7947          }
7948       }
7949    } else
7950       return FALSE;
7951    return FALSE;
7952 }
7953 
7954 /* SECTION 7 */
7955 /***************************************************************************
7956 *
7957 *  AlnMgr2GetSubAlign retrieves a portion of an indexed alignment, from
7958 *  'from' to 'to' in the row coordinates specified, or if which_row is 0,
7959 *  'from' and 'to' are assumed to be alignment coordinates. If 'to' is -1,
7960 *  the subalignment will go to the end of the specified row (or to the end
7961 *  of the whole alignment). If the alignment is discontinuous and fill_in
7962 *  is FALSE, the alignment will be returned as an SAS_DISC set, each piece
7963 *  represented by a single alignment. If the alignment is discontinuous and
7964 *  fill_in is TRUE, the unaligned regions will be added in to the alignment,
7965 *  with all gaps in all other rows. If the alignment is continuous, it
7966 *  doesn't matter whether fill_in is TRUE or FALSE. (SUBALIGN)
7967 *
7968 ***************************************************************************/
AlnMgr2GetSubAlign(SeqAlignPtr sap,Int4 from,Int4 to,Int4 which_row,Boolean fill_in)7969 NLM_EXTERN SeqAlignPtr AlnMgr2GetSubAlign(SeqAlignPtr sap, Int4 from, Int4 to, Int4 which_row, Boolean fill_in)
7970 {
7971    Int4             a;
7972    AMAlignIndex2Ptr  amaip;
7973    AlnMsg2Ptr        amp;
7974    Boolean          anchored;
7975    Int4             currlen;
7976    DenseSegPtr      dsp;
7977    DenseSegPtr      dsp_new;
7978    Int4             from_aln;
7979    Int4             from_seq;
7980    Int4             i;
7981    SeqIdPtr         id;
7982    Int4             j;
7983    Int4             k;
7984    Int4             len;
7985    Int4             lengthbit;
7986    Int4             minlen;
7987    Boolean          more;
7988    Int4             n;
7989    Int4             numseg;
7990    Int4             numunaln;
7991    AMRowInfoPtr     row;
7992    AMRowInfoPtr     row_head;
7993    AMRowInfoPtr     row_prev;
7994    AMRowInfoPtr     PNTR rowheads;
7995    AMRowInfoPtr     PNTR rows;
7996    SeqAlignPtr      salp;
7997    SeqAlignPtr      salp_head;
7998    SeqAlignPtr      salp_prev;
7999    SeqAlignPtr      sap_real;
8000    Int4             seg;
8001    Int4             start_seg;
8002    Uint1            strand;
8003    SeqAlignPtr      subsalp;
8004    Int4             tmp;
8005    Int4             to_aln;
8006    Int4             to_seq;
8007    Int4             ustart;
8008    Int4             ustop;
8009 
8010    if (sap == NULL || sap->saip == NULL)
8011       return NULL;
8012    len = AlnMgr2GetAlnLength(sap, FALSE);
8013    if (which_row == 0 && (to > len-1 || from < 0))
8014       return NULL;
8015    n = AlnMgr2GetNumRows(sap);
8016    if (which_row < 0 || which_row > n)
8017       return NULL;
8018    if (to == -1)
8019    {
8020       if (which_row == 0)
8021          to = len-1;
8022       else
8023          AlnMgr2GetNthSeqRangeInSA(sap, which_row, NULL, &to);
8024    }
8025    if (sap->saip->indextype == INDEX_CHILD)
8026       sap_real = sap;
8027    else if (sap->saip->indextype == INDEX_PARENT)
8028    {
8029       amaip = (AMAlignIndex2Ptr)(sap->saip);
8030       if (amaip->alnstyle == AM2_LITE)
8031          return NULL;
8032       sap_real = amaip->sharedaln;
8033       if (from == 0 && to == len-1 && !AlnMgr2IsSAPDiscAli(sap_real))  /* need whole aln -- take a shortcut! */
8034          return SeqAlignDup(sap_real);
8035    }
8036    if ((a = AlnMgr2FindAnchor(sap_real)) > 0)
8037    {
8038       anchored = TRUE;
8039       salp = SeqAlignDup(sap_real);
8040       AlnMgr2IndexSingleChildSeqAlign(salp);
8041       if (which_row == 0) /* anchor coordinates */
8042       {
8043          AlnMgr2GetNthSeqRangeInSA(salp, a, &from_seq, &to_seq);
8044          from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from_seq, a);
8045          to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to_seq, a);
8046          if (from_aln > to_aln)
8047          {
8048             tmp = from_aln;
8049             from_aln = to_aln;
8050             to_aln = tmp;
8051          }
8052       } else
8053       {
8054          from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from, which_row);
8055          to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to, which_row);
8056          if (from_aln > to_aln)
8057          {
8058             tmp = from_aln;
8059             from_aln = to_aln;
8060             to_aln = tmp;
8061          }
8062       }
8063    } else
8064    {
8065       anchored = FALSE;
8066       salp = sap_real;
8067       if (which_row == 0) /* alignment coordinates */
8068       {
8069          from_aln = from;
8070          to_aln = to;
8071       } else
8072       {
8073          from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from, which_row);
8074          to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to, which_row);
8075          if (from_aln > to_aln)
8076          {
8077             tmp = from_aln;
8078             from_aln = to_aln;
8079             to_aln = tmp;
8080          }
8081       }
8082    }
8083    rows = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
8084    amp = AlnMsgNew2();
8085    seg = lengthbit = 0;
8086    currlen = 0;
8087    numunaln = 0;
8088    salp_head = salp_prev = NULL;
8089    while (AlnMgr2GetNextLengthBit(sap, &lengthbit, &seg))
8090    {
8091       if (currlen <= to_aln && seg >= 0 && currlen+lengthbit-1 >= from_aln)
8092       {
8093          numseg = AlnMgr2GetNumSegsInRange(sap, currlen, currlen+lengthbit-1, &start_seg);
8094          numunaln = 0;
8095          for (i=0; i<n; i++)
8096          {
8097             row_head = NULL;
8098             for (j=start_seg; j<numseg+start_seg; j++)
8099             {
8100                AlnMsgReNew2(amp);
8101                AlnMgr2GetNthSegmentRange(sap, j+1, &amp->from_aln, &amp->to_aln);
8102                amp->from_aln = MAX(amp->from_aln, from_aln);
8103                amp->to_aln = MIN(amp->to_aln, to_aln);
8104                amp->row_num = i+1;
8105                while ((more = AlnMgr2GetNextAlnBit(salp, amp)) == TRUE)
8106                {
8107                   if (amp->right_interrupt != NULL && amp->right_interrupt->unalnlen > 0)
8108                      numunaln++;
8109                   row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8110                   if (amp->type == AM_GAP)
8111                      row->from = -1;
8112                   else
8113                      row->from = amp->from_row;
8114                   row->len = amp->to_row - amp->from_row + 1;
8115                   if (row_head != NULL)
8116                   {
8117                      row_prev->next = row;
8118                      row_prev = row;
8119                   } else
8120                      row_head = row_prev = row;
8121                }
8122             }
8123             rows[i] = row_head;
8124          }
8125       }
8126       rowheads = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
8127       for (i=0; i<n; i++)
8128       {
8129          rowheads[i] = rows[i];
8130       }
8131       while (rows[0] != NULL)
8132       {
8133          minlen = -1;
8134          for (i=0; i<n; i++)
8135          {
8136             if (rows[i]->len < minlen || minlen == -1)
8137                minlen = rows[i]->len;
8138          }
8139          for (i=0; i<n; i++)
8140          {
8141             if (rows[i]->len > minlen)
8142             {
8143                row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8144                row->next = rows[i]->next;
8145                rows[i]->next = row;
8146                if (rows[i]->from == -1)
8147                   row->from = -1;
8148                else if (AlnMgr2GetNthStrand(salp, i) == Seq_strand_minus)
8149                {
8150                   row->from = rows[i]->from;
8151                   rows[i]->from = rows[i]->from + rows[i]->len - 1 - minlen;
8152                } else
8153                   row->from = rows[i]->from + minlen;
8154                row->len = rows[i]->len - minlen;
8155                rows[i]->len = minlen;
8156             }
8157             rows[i] = rows[i]->next;
8158          }
8159       }
8160       for (i=0; i<n; i++)
8161       {
8162          rows[i] = rowheads[i];
8163       }
8164       MemFree(rowheads);
8165       dsp = DenseSegNew();
8166       row = rows[0];
8167       while (row != NULL)
8168       {
8169          dsp->numseg++;
8170          row = row->next;
8171       }
8172       if (fill_in)
8173          dsp->numseg += numunaln;
8174       dsp->dim = n;
8175       dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8176       dsp->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8177       dsp->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8178       j = 0;
8179       row = rows[0];
8180       while (row != NULL)
8181       {
8182          dsp->lens[j] = row->len;
8183          j++;
8184          row = row->next;
8185       }
8186       id = AlnMgr2GetNthSeqIdPtr(salp, 0);
8187       dsp->ids = id;
8188       for (i=0; i<n; i++)
8189       {
8190          if (i > 0)
8191          {
8192             id->next = AlnMgr2GetNthSeqIdPtr(salp, i+1);
8193             id = id->next;
8194          }
8195          row = rows[i];
8196          j = 0;
8197          strand = AlnMgr2GetNthStrand(salp, i+1);
8198          while (row != NULL)
8199          {
8200             dsp->starts[n*j + i] = row->from;
8201             dsp->strands[n*j + i] = strand;
8202             j++;
8203             row = row->next;
8204          }
8205       }
8206       if (fill_in)
8207       {
8208          for (i=0; i<n; i++)
8209          {
8210             AlnMgr2GetNthUnalignedForNthRow(sap, seg+1, i+1, &ustart, &ustop);
8211             if (ustart >= 0 && ustop >= ustart)
8212             {
8213                for (k=0; k<n; k++)
8214                {
8215                   dsp->starts[n*j + k] = -1;
8216                   dsp->strands[n*j + k] = dsp->strands[i];
8217                }
8218                dsp->starts[n*j + i] = ustart;
8219                j++;
8220             }
8221          }
8222       }
8223       subsalp = SeqAlignNew();
8224       subsalp->type = SAT_PARTIAL;
8225       subsalp->segtype = SAS_DENSEG;
8226       subsalp->dim = n;
8227       subsalp->segs = (Pointer)(dsp);
8228       for (i=0; i<n; i++)
8229       {
8230          row = rows[i];
8231          while (row != NULL)
8232          {
8233             row_prev = row->next;
8234             MemFree(row);
8235             row = row_prev;
8236          }
8237       }
8238       if (seg < 0)
8239          seg = -seg;
8240       currlen += lengthbit;
8241       seg++;
8242       if (salp_head != NULL)
8243       {
8244          salp_prev->next = subsalp;
8245          salp_prev = subsalp;
8246       } else
8247          salp_head = salp_prev = subsalp;
8248    }
8249    MemFree(rows);
8250    AlnMsgFree2(amp);
8251    if (fill_in && salp_head->next != NULL)  /* stick subsalps together into a big aln */
8252    {
8253       j = 0;
8254       subsalp = salp_head;
8255       while (subsalp != NULL)
8256       {
8257          dsp = (DenseSegPtr)(subsalp->segs);
8258          j += dsp->numseg;
8259          subsalp = subsalp->next;
8260       }
8261       dsp_new = DenseSegNew();
8262       dsp_new->dim = n;
8263       dsp_new->numseg = j;
8264       dsp_new->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8265       dsp_new->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8266       dsp_new->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8267       subsalp = salp_head;
8268       k = 0;
8269       while (subsalp != NULL)
8270       {
8271          dsp = (DenseSegPtr)(subsalp->segs);
8272          for (j=0; j<dsp->numseg; j++)
8273          {
8274             dsp_new->lens[k] = dsp->lens[j];
8275             for (i=0; i<n; i++)
8276             {
8277                dsp_new->starts[k*n+i] = dsp->starts[j*n+i];
8278                dsp_new->strands[k*n+i] = dsp->strands[j*n+i];
8279             }
8280             k++;
8281          }
8282          subsalp = subsalp->next;
8283       }
8284       subsalp = SeqAlignNew();
8285       subsalp->type = SAT_PARTIAL;
8286       subsalp->segtype = SAS_DENSEG;
8287       subsalp->dim = n;
8288       subsalp->segs = (Pointer)(dsp_new);
8289       SeqAlignSetFree(salp_head);
8290    } else if (!fill_in && salp_head->next != NULL)
8291    {
8292       subsalp = SeqAlignNew();
8293       subsalp->segtype = SAS_DISC;
8294       subsalp->type = SAT_PARTIAL;
8295       subsalp->segs = (SeqAlignPtr)(salp_head);
8296       salp_prev = salp_head;
8297       while (salp_prev != NULL)
8298       {
8299          AMAlignIndexFreeEitherIndex(salp_prev);
8300          salp_prev = salp_prev->next;
8301       }
8302    } else  /* if !salp_head->next */
8303    {
8304       subsalp = salp_head;
8305       subsalp->dim = AlnMgr2GetNumRows(subsalp);
8306       subsalp->type = SAT_PARTIAL;
8307       AMAlignIndexFreeEitherIndex(subsalp);
8308    }
8309    if (anchored)
8310       SeqAlignFree(salp);
8311    return subsalp;
8312 }
8313 
8314 /***************************************************************************
8315 *
8316 *  SECTION 8: Miscellaneous functions to compute useful information
8317 *    about an alignment
8318 *
8319 ***************************************************************************/
8320 /* SECTION 8 */
8321 /***************************************************************************
8322 *
8323 *  AlnMgr2ComputeScoreForSeqAlign computes an ad hoc numerical score for
8324 *  an indexed alignment by computing a similarity score for the whole
8325 *  alignment (residue pair by residue pair score, from a matrix for proteins
8326 *  and identity for nucleotides) and then subtracting gap open and gap
8327 *  extension penalties.
8328 *
8329 ***************************************************************************/
AlnMgr2ComputeScoreForSeqAlign(SeqAlignPtr sap)8330 NLM_EXTERN Int4 AlnMgr2ComputeScoreForSeqAlign(SeqAlignPtr sap)
8331 {
8332    AMFreqPtr    afp;
8333    DenseSegPtr  dsp;
8334    Int4         gaplen;
8335    Int4         i;
8336    Boolean      is_prot;
8337    Int4         j;
8338    Int4         len;
8339    Int4         mismatch;
8340    Int4         numgaps;
8341    Int4         numseqs;
8342    Boolean      open;
8343    Int4         res1;
8344    Int4         res2;
8345    Int4         score;
8346    Int4         seqscore;
8347 
8348    if (sap->segtype == SAS_DISC)
8349       return -1;
8350    if (sap->saip == NULL)
8351       AlnMgr2IndexSingleChildSeqAlign(sap);
8352    is_prot = AlnMgr2IsItProtein(sap);
8353    len = AlnMgr2GetAlnLength(sap, FALSE);
8354    dsp = (DenseSegPtr)(sap->segs);
8355    numseqs = dsp->dim;
8356    open = FALSE;
8357    gaplen = 0;
8358    numgaps = 0;
8359    for (i=0; i<dsp->dim; i++)
8360    {
8361       for (j=0; j<dsp->numseg; j++)
8362       {
8363          if (dsp->starts[(dsp->dim)*j+i] == -1)
8364          {
8365             if (!open)
8366             {
8367                gaplen += dsp->lens[j];
8368                numgaps++;
8369                open = TRUE;
8370             } else
8371                gaplen += dsp->lens[j];
8372          } else
8373             open = FALSE;
8374       }
8375    }
8376    mismatch = 0;
8377    seqscore = 0;
8378    afp = AlnMgr2ComputeFreqMatrix(sap, 0, -1, 0);
8379    if (afp == NULL)
8380      return -1;
8381    for (i=0; i<afp->len; i++)
8382    {
8383       res1 = -1;
8384       res2 = -1;
8385       for (j=0; j<afp->size; j++)
8386       {
8387          if (afp->freq[j][i] == 1)
8388          {
8389             if (res1 == -1)
8390                res1 = j;
8391             else
8392                res2 = j;
8393          } else if (afp->freq[j][i] == 2)
8394             res1 = res2 = j;
8395       }
8396       if (res1 > 0 && res2 > 0) /* don't penalize gaps */
8397          seqscore += AlnMgr2GetScoreForPair(res1, res2, is_prot);
8398    }
8399    AMFreqFree(afp);
8400    score = seqscore + numgaps*AM_GAPOPEN + gaplen*AM_GAPEXT;
8401    return score;
8402 }
8403 
AlnMgr2SeqPortRead(SeqPortPtr PNTR spp,Uint1Ptr buf,Int4Ptr bufpos,Int4 start,Int4 stop,Uint1 strand,Uint1 code,BioseqPtr bsp)8404 static Int4 AlnMgr2SeqPortRead(SeqPortPtr PNTR spp, Uint1Ptr buf, Int4Ptr bufpos, Int4 start, Int4 stop, Uint1 strand, Uint1 code, BioseqPtr bsp)
8405 {
8406     if (*spp == NULL) /* first call */ {
8407         if (strand == Seq_strand_minus){
8408             *spp = SeqPortNew(bsp, MAX(0, stop-AM_SEQPORTSIZE), stop, strand, code);
8409             *bufpos = MAX(0, stop-AM_SEQPORTSIZE);
8410         }
8411         else {
8412             *spp = SeqPortNew(bsp, start, MIN(start+AM_SEQPORTSIZE, bsp->length-1), strand, code);
8413             *bufpos = start;
8414         }
8415     }
8416     /* see if what we need is in current seqport or a new one is needed */
8417     else if ((start < *bufpos) || (start > *bufpos+AM_SEQPORTSIZE)
8418              || (stop < *bufpos) || (stop > *bufpos+AM_SEQPORTSIZE)) {
8419         SeqPortFree(*spp);
8420         if (strand == Seq_strand_minus) {
8421             *spp = SeqPortNew(bsp, MAX(0, stop-AM_SEQPORTSIZE), stop, strand, code);
8422             *bufpos = MAX(0, stop-AM_SEQPORTSIZE);
8423         }
8424         else {
8425             *spp = SeqPortNew(bsp, start, MIN(start+AM_SEQPORTSIZE, bsp->length-1), strand, code);
8426             *bufpos = start;
8427         }
8428     }
8429     return (SeqPortRead(*spp, buf, (MIN(start+AM_SEQPORTSIZE-1, stop)) - start+1));
8430 }
8431 
8432 /* SECTION 8 */
8433 /***************************************************************************
8434 *
8435 *  AlnMgr2ComputeFreqMatrix takes an indexed seqalign and returns a matrix
8436 *  indicating nucleotide or amino acid frequency at each position of the
8437 *  alignment. The matrix can be made over only a part of the alignment, if
8438 *  from and to are nonzero, and if row is nonzero, from and to are taken
8439 *  to be bioseq coordinates from that row (if row == 0 from and to are
8440 *  assumed to be alignment coordinates).
8441 *
8442 ***************************************************************************/
AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap,Int4 from,Int4 to,Int4 row)8443 NLM_EXTERN AMFreqPtr AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row)
8444 {
8445    AMFreqPtr   afp;
8446    AlnMsg2Ptr  amp;
8447    BioseqPtr   bsp;
8448    Uint1       buf[AM_SEQPORTSIZE];
8449    Int4        bufpos;
8450    Uint1       code;
8451    Int4        counter;
8452    Int4        ctr;
8453    Int4        from_a;
8454    Int4        i;
8455    Boolean     isna;
8456    Int4        j;
8457    Int4        l;
8458    Int4        len;
8459    Boolean     more;
8460    Int4        n;
8461    Int4        numrows;
8462    Uint1       res;
8463    SeqIdPtr    sip;
8464    SeqPortPtr  spp;
8465    Int4        tmp;
8466    Int4        to_a;
8467 
8468    if (sap == NULL || sap->saip == NULL || (from > to && to != -1))
8469       return NULL;
8470    numrows = AlnMgr2GetNumRows(sap);
8471    bufpos = -1;
8472    if (row > numrows || row < 0)
8473       return NULL;
8474    len = AlnMgr2GetAlnLength(sap, FALSE);
8475    if (to >= len)
8476       return NULL;
8477    if (to == -1)
8478       to = len-1;
8479    sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
8480    bsp = BioseqLockById(sip);
8481    if (bsp != NULL)
8482       isna = ISA_na(bsp->mol);
8483    else
8484    {
8485       SeqIdFree(sip);
8486       return NULL;
8487    }
8488    BioseqUnlock(bsp);
8489    SeqIdFree(sip);
8490    if (isna)
8491       code = Seq_code_ncbi4na;
8492    else
8493       code = Seq_code_ncbistdaa;
8494    afp = (AMFreqPtr)MemNew(sizeof(AMFreq));
8495    afp->len = len;
8496    if (isna)
8497       afp->size = AM_NUCSIZE;
8498    else
8499       afp->size = AM_PROTSIZE;
8500    afp->freq = (Int4Ptr PNTR)MemNew((afp->size)*sizeof(Int4Ptr));
8501    for (i=0; i<afp->size; i++)
8502    {
8503       afp->freq[i] = (Int4Ptr)MemNew((afp->len)*sizeof(Int4));
8504    }
8505    amp = AlnMsgNew2();
8506    if (row != 0)
8507    {
8508       from_a = AlnMgr2MapBioseqToSeqAlign(sap, from, row);
8509       to_a = AlnMgr2MapBioseqToSeqAlign(sap, to, row);
8510       if (from_a > to_a)
8511       {
8512          tmp = to_a;
8513          to_a = from_a;
8514          from_a = tmp;
8515       }
8516    } else
8517    {
8518       from_a = from;
8519       to_a = to;
8520    }
8521    for (i=0; i<numrows; i++)
8522    {
8523       spp = NULL;
8524       AlnMsgReNew2(amp);
8525       amp->from_aln = from_a;
8526       amp->to_aln = to_a;
8527       amp->row_num = i+1;
8528       j = 0;
8529       while ((more = AlnMgr2GetNextAlnBit(sap, amp)))
8530       {
8531          if (amp->type == AM_GAP)
8532          {
8533             for (n=0; n<(amp->to_row - amp->from_row+1); n++)
8534             {
8535                afp->freq[0][j] = afp->freq[0][j]+1;
8536                j++;
8537             }
8538          } else if (amp->type == AM_SEQ)
8539          {
8540             sip = AlnMgr2GetNthSeqIdPtr(sap, i+1);
8541             bsp = BioseqLockById(sip);
8542             if (bsp != NULL) {
8543               for (l=amp->from_row; l<=amp->to_row; l+=AM_SEQPORTSIZE)
8544               {
8545                  counter = AlnMgr2SeqPortRead(&spp, buf, &bufpos, l, MIN(l+AM_SEQPORTSIZE, amp->to_row), amp->strand, code, bsp);
8546                  ctr = 0;
8547                  while (ctr < counter)
8548                  {
8549                     res = buf[ctr];
8550                     if (isna)
8551                     {
8552                        if (res == 1 || res == 2)
8553                           afp->freq[res][j]++;
8554                        else if (res == 4)
8555                           afp->freq[3][j]++;
8556                        else if (res == 8)
8557                           afp->freq[4][j]++;
8558                        else
8559                           afp->freq[5][j]++;
8560                     } else
8561                        afp->freq[res][j]++;
8562                     j++;
8563                     ctr++;
8564                  }
8565               }
8566               BioseqUnlock(bsp);
8567             }
8568             SeqIdFree(sip);
8569          }
8570       }
8571       SeqPortFree(spp);
8572    }
8573    AlnMsgFree2(amp);
8574    return afp;
8575 }
8576 
8577 /* SECTION 8 */
8578 /***************************************************************************
8579 *
8580 *  AlnMgr2GetScoreForPair assigns scores to nucleotide and protein residue
8581 *  pairs. Nucleotide pairs are scored according to a standard mismatch
8582 *  penalty, and amino acid pairs are scored according to the BLOSUM62
8583 *  matrix below. This matrix has been rearranged so that the rows and
8584 *  columns appear in alphabetical order, so that it directly correlates
8585 *  to the NCBIstdaa alphabet (with a minus-one difference).
8586 *
8587 ***************************************************************************/
AlnMgr2GetScoreForPair(Int4 res1,Int4 res2,Boolean is_prot)8588 static Int4 AlnMgr2GetScoreForPair(Int4 res1, Int4 res2, Boolean is_prot)
8589 {
8590    Int4  matrix[24][24] = {
8591      {4, -2, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, 0, -2, -1, -4},
8592      {-2, 4, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -2, 0, -1, 0, -1, -3, -4, -1, -3, 1, -4},
8593      {0, -3, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2, -2, -3, -4},
8594      {-2, 4, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -1, -3, 1, -4},
8595      {-1, 1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -1, -2, 4, -4},
8596      {-2, -3, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, -1, 3, -3, -4},
8597      {0, -1, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -1, -3, -2, -4},
8598      {-2, 0, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, -1, 2, 0, -4},
8599      {-1, -3, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1, -1, -3, -4},
8600      {-1, 0, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -1, -2, 1, -4},
8601      {-1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1, -1, -3, -4},
8602      {-1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1, -1, -1, -4},
8603      {-2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -1, -2, 0, -4},
8604      {-1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -2, -3, -1, -4},
8605      {-1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1, -1, 3, -4},
8606      {-1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -1, -2, 0, -4},
8607      {1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, 0, -2, 0, -4},
8608      {0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, 0, -2, -1, -4},
8609      {0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1, -1, -2, -4},
8610      {-3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, -2, 2, -3, -4},
8611      {0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1, -4},
8612      {-2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, -2, -4},
8613      {-1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4, -4},
8614      {-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1}};
8615 
8616    if (is_prot) /* protein->use BLOSUM62 matrix */
8617       return matrix[res1-1][res2-1];
8618    else /* nucleotide->use match score/mismatch penalty */
8619    {
8620       if (res1 == 0 || res2 == 0) /* don't count gaps */
8621          return 0;
8622       if (res1 == res2)
8623          return 1;
8624       else
8625          return -3;
8626    }
8627 }
8628 
8629 /* SECTION 8 */
8630 /***************************************************************************
8631 *
8632 *  AlnMgr2IsItProtein takes an indexed alignment and quickly decides if
8633 *  it's a protein or nucleotide alignment, returning TRUE for protein.
8634 *
8635 ***************************************************************************/
AlnMgr2IsItProtein(SeqAlignPtr sap)8636 NLM_EXTERN Boolean AlnMgr2IsItProtein(SeqAlignPtr sap)
8637 {
8638    BioseqPtr  bsp;
8639    Boolean    is_na;
8640    SeqIdPtr   sip;
8641 
8642    if (sap == NULL || sap->saip == NULL)
8643       return FALSE;
8644    sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
8645    bsp = BioseqLockById(sip);
8646    if (bsp == NULL)
8647       return FALSE;
8648    is_na = ISA_na(bsp->mol);
8649    SeqIdFree(sip);
8650    BioseqUnlock(bsp);
8651    return (!is_na);
8652 }
8653 
8654 /***************************************************************************
8655 *
8656 *  SECTION 9: Sorting functions and other algorithms to help order
8657 *  alignments for various purposes
8658 *
8659 ***************************************************************************/
8660 
8661 /* SECTION 9 */
AMCompareStarts(VoidPtr ptr1,VoidPtr ptr2)8662 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2)
8663 {
8664    AMBitty2Ptr  bit1;
8665    AMBitty2Ptr  bit2;
8666 
8667    if (ptr1 != NULL && ptr2 != NULL)
8668    {
8669       bit1 = (AMBitty2Ptr)ptr1;
8670       bit2 = (AMBitty2Ptr)ptr2;
8671       if (bit1->num2 < bit2->num2)
8672          return -1;
8673       else if (bit1->num2 > bit2->num2)
8674          return 1;
8675       else if (bit1->num3 > bit2->num3) /* compare aln lengths */
8676          return -1;
8677       else if (bit1->num3 < bit2->num3)
8678          return 1;
8679       else
8680          return 0;
8681    }
8682    return 0;
8683 }
8684 
8685 /* SECTION 9 */
8686 /***************************************************************************
8687 *
8688 *  AlnMgr2SortAlnSetByNthRowPos takes an indexed parent alignment and sorts
8689 *  all the child alignments along the row indicated. If the indicated row
8690 *  is aligned on the plus strand, the alignments are sorted from smaller
8691 *  to larger coordinates along that row; otherwise they are sorted in
8692 *  reverse order.
8693 *
8694 ***************************************************************************/
AlnMgr2SortAlnSetByNthRowPos(SeqAlignPtr sap,Int4 row)8695 NLM_EXTERN void AlnMgr2SortAlnSetByNthRowPos(SeqAlignPtr sap, Int4 row)
8696 {
8697    AMAlignIndex2Ptr  amaip;
8698    AMBitty2Ptr       bit;
8699    Int4             i;
8700    SeqAlignPtr      PNTR saparray;
8701    Uint1            strand;
8702 
8703    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
8704       return;
8705    amaip = (AMAlignIndex2Ptr)(sap->saip);
8706    bit = (AMBitty2Ptr)MemNew((amaip->numsaps)*sizeof(AMBitty2));
8707    saparray = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
8708    for (i=0; i<amaip->numsaps; i++)
8709    {
8710       bit[i].num1 = i;
8711       AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], row, &bit[i].num2, NULL);
8712       bit[i].num3 = AlnMgr2GetAlnLength(amaip->saps[i], FALSE);
8713       strand = AlnMgr2GetNthStrand(amaip->saps[i], row);
8714       if (strand == Seq_strand_minus)
8715          bit[i].num2 = -bit[i].num2;
8716       saparray[i] = amaip->saps[i];
8717    }
8718    HeapSort(bit, amaip->numsaps, sizeof(AMBitty2), AMCompareStarts);
8719    for (i=0; i<amaip->numsaps; i++)
8720    {
8721       amaip->saps[i] = saparray[bit[i].num1];
8722    }
8723    MemFree(saparray);
8724    MemFree(bit);
8725    if (amaip->alnstyle != AM2_LITE)
8726       AlnMgr2ReIndexSeqAlign(sap);
8727 }
8728 
8729 
8730 /***************************************************************************
8731 *
8732 *  SECTION 10: Basic alignment operations
8733 *
8734 ***************************************************************************/
8735 
8736 /***************************************************************************
8737 *
8738 *  AlnMgr2MergeTwoAlignments takes two alignments, with identical rows in
8739 *  the same order (otherwise it rejects the alignments), and merges them
8740 *  into a single alignment. If there is unaligned space between the two
8741 *  alignments and this space is the same length for every row, the function
8742 *  aligns those sequences; it rejects alignments when the unaligned spaces
8743 *  are different sizes. The function returns a newly allocated alignment.
8744 *
8745 ***************************************************************************/
AlnMgr2MergeTwoAlignments(SeqAlignPtr sap1_orig,SeqAlignPtr sap2_orig)8746 NLM_EXTERN SeqAlignPtr AlnMgr2MergeTwoAlignments(SeqAlignPtr sap1_orig, SeqAlignPtr sap2_orig)
8747 {
8748    Int4         c;
8749    DenseSegPtr  dsp;
8750    DenseSegPtr  dsp1;
8751    DenseSegPtr  dsp2;
8752    DenseSegPtr  dsp_new;
8753    Int4         i;
8754    Int4         j;
8755    Int4         n1;
8756    Int4         n2;
8757    SeqAlignPtr  sap1;
8758    SeqAlignPtr  sap2;
8759    SeqAlignPtr  sap_new;
8760    SeqIdPtr     sip1;
8761    SeqIdPtr     sip2;
8762    Int4         start1;
8763    Int4         start2;
8764    Int4         stop1;
8765    Int4         stop2;
8766    Uint1        strand1;
8767    Uint1        strand2;
8768    SeqAlignPtr  tmp;
8769 
8770    if (sap1_orig == NULL || sap2_orig == NULL)
8771       return NULL;
8772    if (sap1_orig->next != NULL)
8773    {
8774       AlnMgr2IndexSeqAlign(sap1_orig);
8775       sap1 = AlnMgr2GetSubAlign(sap1_orig, 0, -1, 0, TRUE);
8776    } else
8777       sap1 = SeqAlignDup(sap1_orig);
8778    if (sap2_orig->next != NULL)
8779    {
8780       AlnMgr2IndexSeqAlign(sap2_orig);
8781       sap2 = AlnMgr2GetSubAlign(sap2_orig, 0, -1, 0, TRUE);
8782    } else
8783       sap2 = SeqAlignDup(sap2_orig);
8784    AlnMgr2IndexSingleChildSeqAlign(sap1);
8785    AlnMgr2IndexSingleChildSeqAlign(sap2);
8786    n1 = AlnMgr2GetNumRows(sap1);
8787    n2 = AlnMgr2GetNumRows(sap2);
8788    if (n1 != n2)
8789    {
8790       SeqAlignFree(sap1);
8791       SeqAlignFree(sap2);
8792       return NULL;
8793    }
8794    /* put the alignments in order by the first row */
8795    AlnMgr2GetNthSeqRangeInSA(sap1, 1, &start1, &stop1);
8796    AlnMgr2GetNthSeqRangeInSA(sap2, 1, &start2, &stop2);
8797    strand1 = AlnMgr2GetNthStrand(sap1, 1);
8798    if (strand1 == Seq_strand_minus)
8799    {
8800       if (stop2 > start1)
8801       {
8802          tmp = sap1;
8803          sap1 = sap2;
8804          sap2 = tmp;
8805       }
8806    } else
8807    {
8808       if (stop1 > start2)
8809       {
8810          tmp = sap1;
8811          sap1 = sap2;
8812          sap2 = tmp;
8813       }
8814    }
8815    dsp1 = (DenseSegPtr)(sap1->segs);
8816    dsp2 = (DenseSegPtr)(sap2->segs);
8817    sip1 = dsp1->ids;
8818    sip2 = dsp2->ids;
8819    while (sip1 != NULL && sip2 != NULL)
8820    {
8821       if (SeqIdComp(sip1, sip2) != SIC_YES)
8822       {
8823          SeqAlignFree(sap1);
8824          SeqAlignFree(sap2);
8825          return NULL;
8826       }
8827       sip1 = sip1->next;
8828       sip2 = sip2->next;
8829    }
8830    dsp = DenseSegNew();
8831    dsp->dim = n1;
8832    dsp->numseg = 1;
8833    dsp->starts = (Int4Ptr)MemNew(n1*sizeof(Int4));
8834    dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
8835    dsp->strands = (Uint1Ptr)MemNew(n1*sizeof(Int4));
8836    for (i=0; i<n1; i++)
8837    {
8838       strand1 = AlnMgr2GetNthStrand(sap1, i+1);
8839       strand2 = AlnMgr2GetNthStrand(sap2, i+1);
8840       if (strand1 != strand2)
8841       {
8842          DenseSegFree(dsp);
8843          SeqAlignFree(sap1);
8844          SeqAlignFree(sap2);
8845          return NULL;
8846       }
8847       AlnMgr2GetNthSeqRangeInSA(sap1, i+1, &start1, &stop1);
8848       AlnMgr2GetNthSeqRangeInSA(sap2, i+1, &start2, &stop2);
8849       if (strand1 == Seq_strand_minus)
8850       {
8851          dsp->starts[i] = stop2 + 1;
8852          if (i == 0)
8853             dsp->lens[0] = start2 - (stop2 + 1);
8854          else
8855          {
8856             if (start2 - (stop2 + 1) != dsp->lens[0])
8857             {
8858                DenseSegFree(dsp);
8859                SeqAlignFree(sap1);
8860                SeqAlignFree(sap2);
8861                return NULL;
8862             }
8863          }
8864       } else
8865       {
8866          dsp->starts[i] = stop1 + 1;
8867          if (i == 0)
8868             dsp->lens[0] = start2 - (stop1 + 1);
8869          else
8870          {
8871             if (start2 - (stop1 + 1) != dsp->lens[0])
8872             {
8873                DenseSegFree(dsp);
8874                SeqAlignFree(sap1);
8875                SeqAlignFree(sap2);
8876                return NULL;
8877             }
8878          }
8879       }
8880       dsp->strands[i] = strand1;
8881    }
8882    if (dsp->lens[0] == 0)
8883    {
8884       DenseSegFree(dsp);
8885       dsp = NULL;
8886    }
8887    dsp_new = DenseSegNew();
8888    dsp_new->numseg = dsp1->numseg + dsp2->numseg;
8889    if (dsp != NULL)
8890       dsp_new->numseg++;
8891    dsp_new->dim = n1;
8892    dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
8893    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
8894    dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
8895    for (i=0; i<dsp1->numseg; i++)
8896    {
8897       for (j=0; j<n1; j++)
8898       {
8899          dsp_new->starts[i*n1 + j] = dsp1->starts[i*n1 + j];
8900          dsp_new->strands[i*n1 + j] = dsp1->strands[i*n1 + j];
8901       }
8902       dsp_new->lens[i] = dsp1->lens[i];
8903    }
8904    c = dsp1->numseg;
8905    if (dsp != NULL)
8906    {
8907       for (j=0; j<n1; j++)
8908       {
8909          dsp_new->starts[c*n1 + j] = dsp->starts[j];
8910          dsp_new->strands[c*n1 + j] = dsp->strands[j];
8911       }
8912       dsp_new->lens[c] = dsp->lens[0];
8913       c++;
8914    }
8915    for (i=0; i<dsp2->numseg; i++, c++)
8916    {
8917       for (j=0; j<n1; j++)
8918       {
8919          dsp_new->starts[c*n1 + j] = dsp2->starts[i*n1 + j];
8920          dsp_new->strands[c*n1 + j] = dsp2->strands[i*n1 + j];
8921       }
8922       dsp_new->lens[c] = dsp2->lens[i];
8923    }
8924    dsp_new->ids = SeqIdDupList(dsp1->ids);
8925    sap_new = SeqAlignNew();
8926    sap_new->segtype = SAS_DENSEG;
8927    sap_new->dim = n1;
8928    sap_new->segs = (Pointer)dsp_new;
8929    if (dsp != NULL)
8930       DenseSegFree(dsp);
8931    SeqAlignFree(sap1);
8932    SeqAlignFree(sap2);
8933    return sap_new;
8934 }
8935 
8936 /* SECTION 10 */
8937 /***************************************************************************
8938 *
8939 *  AlnMgr2ExtendToCoords takes an indexed child seqalign and blindly extends
8940 *  it to the coordinates specified on the given row. If other rows are too
8941 *  short to allow this extension, the alignment is extended as far as
8942 *  possible. If to == -1 the extension goes to the end of the sequence
8943 *  specified.
8944 *
8945 ***************************************************************************/
AlnMgr2ExtendToCoords(SeqAlignPtr sap,Int4 from,Int4 to,Int4 row)8946 NLM_EXTERN void AlnMgr2ExtendToCoords(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row)
8947 {
8948    BioseqPtr    bsp;
8949    Int4         diff1;
8950    Int4         diff2;
8951    DenseSegPtr  dsp;
8952    DenseSegPtr  dsp_new;
8953    Int4         i;
8954    Int4         j;
8955    Int4         numrows;
8956    Int4         numseg;
8957    Int4         prediff1;
8958    Int4         prediff2;
8959    Int4         seg;
8960    SeqIdPtr     sip;
8961    Int4         start;
8962    Int4         stop;
8963 
8964    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
8965       return;
8966    numrows = AlnMgr2GetNumRows(sap);
8967    if (row < 1 || row > numrows)
8968       return;
8969    AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
8970    numseg = 0;
8971    dsp = (DenseSegPtr)(sap->segs);
8972    if (start <= from)
8973       from = start;
8974    else
8975       numseg++;
8976    diff1 = start - from;
8977    sip = AlnMgr2GetNthSeqIdPtr(sap, row);
8978    bsp = BioseqLockById(sip);
8979    if (to == -1)
8980       to = bsp->length - 1;
8981    BioseqUnlock(bsp);
8982    SeqIdFree(sip);
8983    if (stop >= to)
8984       to = stop;
8985    else
8986       numseg++;
8987    diff2 = to - stop;
8988    if (numseg == 0)
8989       return;
8990    sip = dsp->ids;
8991    prediff1 = diff1;
8992    prediff2 = diff2;
8993    for (i=0; i<numrows; i++)
8994    {
8995       bsp = BioseqLockById(sip);
8996       if (dsp->strands[i] == Seq_strand_minus)
8997       {
8998          if (dsp->starts[i]+dsp->lens[0]+diff1 > bsp->length)
8999             diff1 = bsp->length - (dsp->starts[i] + dsp->lens[0]);
9000          if (dsp->starts[(dsp->numseg-1)*dsp->dim+i] > diff2)
9001             diff2 = dsp->starts[(dsp->numseg-1)*dsp->dim+i];
9002       } else
9003       {
9004          if (dsp->starts[i] < diff1)
9005             diff1 = dsp->starts[i];
9006          if (dsp->starts[(dsp->numseg-1)*dsp->dim+i]+dsp->lens[dsp->numseg-1]+diff2 > bsp->length)
9007             diff2 = bsp->length - (dsp->starts[(dsp->numseg-1)*dsp->dim+i] + dsp->lens[dsp->numseg-1]);
9008       }
9009       sip = sip->next;
9010       BioseqUnlock(bsp);
9011    }
9012    if (diff1 == 0 && prediff1 != 0)
9013       numseg--;
9014    else if (diff1 < 0)
9015       numseg--;
9016    if (diff2 == 0 && prediff2 != 0)
9017       numseg--;
9018    else if (diff2 < 0)
9019       numseg--;
9020    if (numseg == 0)
9021       return;
9022    dsp_new = DenseSegNew();
9023    dsp_new->dim = dsp->dim;
9024    dsp_new->numseg = dsp->numseg+numseg;
9025    dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
9026    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9027    dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
9028    seg = 0;
9029    if (diff1 > 0)
9030    {
9031       for (j=0; j<dsp->dim; j++)
9032       {
9033          AlnMgr2GetNthSeqRangeInSA(sap, j+1, &start, &stop);
9034          if (dsp->strands[j] == Seq_strand_minus)
9035             dsp_new->starts[j] = stop+1;
9036          else
9037             dsp_new->starts[j] = start-diff1;
9038          dsp_new->strands[j] = dsp->strands[j];
9039       }
9040       dsp_new->lens[0] = diff1;
9041       seg++;
9042    }
9043    for (i=0; i<dsp->numseg; i++)
9044    {
9045       for (j=0; j<dsp->dim; j++)
9046       {
9047          dsp_new->starts[(seg)*dsp->dim+j] = dsp->starts[i*dsp->dim+j];
9048          dsp_new->strands[(seg)*dsp->dim+j] = dsp->strands[i*dsp->dim+j];
9049       }
9050       dsp_new->lens[seg] = dsp->lens[i];
9051       seg++;
9052    }
9053    if (diff2 > 0)
9054    {
9055       for (j=0; j<dsp->dim; j++)
9056       {
9057          AlnMgr2GetNthSeqRangeInSA(sap, j+1, &start, &stop);
9058          if (dsp->strands[j] == Seq_strand_minus)
9059             dsp_new->starts[seg*dsp->dim+j] = start-diff2;
9060          else
9061             dsp_new->starts[seg*dsp->dim+j] = stop+1;
9062          dsp_new->strands[seg*dsp->dim+j] = dsp->strands[j];
9063       }
9064       dsp_new->lens[seg] = diff2;
9065    }
9066    dsp_new->ids = dsp->ids;
9067    dsp->ids = NULL;
9068    DenseSegFree(dsp);
9069    sap->segs = (Pointer)dsp_new;
9070    SAIndex2Free2(sap->saip);
9071    sap->saip = NULL;
9072    AlnMgr2IndexSingleChildSeqAlign(sap);
9073 }
9074 
9075 /* SECTION 10 */
9076 /***************************************************************************
9077 *
9078 *  AlnMgr2PadConservatively extends an alignment so that the whole of
9079 *  all sequences is included. If two sequences have tails on the same
9080 *  side, they are each aligned with columns of all gaps:
9081 *
9082 *   <-new aln region->
9083 *   xxxxxxxx----------xxxxxxxxxxxxxxxxxxxx
9084 *   --------xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
9085 *
9086 *  This function returns a newly allocated alignment and doesn't change
9087 *  the original (except for indexing). If the extension was not done for
9088 *  some reason, the function returns NULL;
9089 *
9090 ***************************************************************************/
AlnMgr2PadConservatively(SeqAlignPtr sap)9091 NLM_EXTERN SeqAlignPtr AlnMgr2PadConservatively(SeqAlignPtr sap)
9092 {
9093    AMAlignIndex2Ptr  amaip;
9094    BioseqPtr         bsp;
9095    Int4              ctr1;
9096    Int4              ctr2;
9097    DenseSegPtr       dsp;
9098    DenseSegPtr       dsp_new;
9099    Int4              i;
9100    Int4              j;
9101    Int4Ptr           lenarray;
9102    Int4              n1;
9103    Int4              n2;
9104    Int4              newseg;
9105    SeqAlignPtr       sap_new;
9106    SeqIdPtr          sip;
9107    Int4              start;
9108    Int4              stop;
9109    Uint1             strand;
9110 
9111    if (sap == NULL || sap->next != NULL)
9112       return NULL;
9113    if (sap->saip == NULL)
9114       AlnMgr2IndexSeqAlign(sap);
9115    if (sap->saip->indextype == INDEX_PARENT)
9116    {
9117       amaip = (AMAlignIndex2Ptr)(sap->saip);
9118       if (amaip->alnstyle == AM2_LITE)
9119          return NULL;
9120       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
9121    } else
9122       dsp = (DenseSegPtr)(sap->segs);
9123    newseg = 0;
9124    lenarray = (Int4Ptr)MemNew(dsp->dim*sizeof(Int4));
9125    n1 = n2 = 0;
9126    for (i=0; i<dsp->dim; i++)
9127    {
9128       sip = AlnMgr2GetNthSeqIdPtr(sap, i+1);
9129       bsp = BioseqLockById(sip);
9130       lenarray[i] = bsp->length;
9131       BioseqUnlock(bsp);
9132       SeqIdFree(sip);
9133       AlnMgr2GetNthSeqRangeInSA(sap, i+1, &start, &stop);
9134       if (start > 0)
9135       {
9136          n1++;
9137          newseg++;
9138       }
9139       if (stop < lenarray[i]-1)
9140       {
9141          newseg++;
9142       }
9143    }
9144    if (newseg == 0)
9145    {
9146       MemFree(lenarray);
9147       return NULL;
9148    }
9149    dsp_new = DenseSegNew();
9150    dsp_new->numseg = dsp->numseg + newseg;
9151    dsp_new->dim = dsp->dim;
9152    dsp_new->ids = SeqIdDupList(dsp->ids);
9153    dsp_new->starts = (Int4Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9154    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9155    dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Uint1));
9156    n2 = n1+dsp->numseg;
9157    ctr1 = 0;
9158    ctr2 = 0;
9159    for (i=0; i<dsp->dim; i++)
9160    {
9161       AlnMgr2GetNthSeqRangeInSA(sap, i+1, &start, &stop);
9162       strand = AlnMgr2GetNthStrand(sap, i+1);
9163       if (strand == Seq_strand_minus && lenarray[i]-1-stop > 0)
9164       {
9165          for (j=0; j<ctr1; j++)
9166          {
9167             dsp_new->starts[dsp->dim*j+i] = -1;
9168             dsp_new->strands[dsp->dim*j+i] = strand;
9169          }
9170          dsp_new->starts[dsp->dim*ctr1+i] = stop+1;
9171          dsp_new->lens[ctr1] = lenarray[i]-1-stop;
9172          dsp_new->strands[dsp->dim*ctr1+i] = strand;
9173          for (j=ctr1+1; j<n1; j++)
9174          {
9175             dsp_new->starts[dsp->dim*j+i] = -1;
9176             dsp_new->strands[dsp->dim*j+i] = strand;
9177          }
9178          ctr1++;
9179       } else if (strand == Seq_strand_plus && start > 0)
9180       {
9181          for (j=0; j<ctr1; j++)
9182          {
9183             dsp_new->starts[dsp->dim*j+i] = -1;
9184             dsp_new->strands[dsp->dim*j+i] = strand;
9185          }
9186          dsp_new->starts[dsp->dim*ctr1+i] = 0;
9187          dsp_new->lens[ctr1] = start;
9188          dsp_new->strands[dsp->dim*ctr1+i] = strand;
9189          for (j=ctr1+1; j<n1; j++)
9190          {
9191             dsp_new->starts[dsp->dim*j+i] = -1;
9192             dsp_new->strands[dsp->dim*j+i] = strand;
9193          }
9194          ctr1++;
9195       } else /* nothing to add on this row, just fill in with -1s */
9196       {
9197          for (j=0; j<n1; j++)
9198          {
9199             dsp_new->starts[dsp->dim*j+i] = -1;
9200             dsp_new->strands[dsp->dim*j+i] = strand;
9201          }
9202       }
9203    /* now fill in the non-extended part of the alignment (copy from original) */
9204       for (j=0; j<dsp->numseg; j++)
9205       {
9206          dsp_new->starts[dsp->dim*(j+n1)+i] = dsp->starts[dsp->dim*j+i];
9207          dsp_new->lens[j+n1] = dsp->lens[j];
9208          dsp_new->strands[dsp->dim*(j+n1)+i] = dsp->strands[dsp->dim*j+i];
9209       }
9210   /* now the other ends */
9211       if (strand == Seq_strand_minus && start > 0)
9212       {
9213          for (j=n2; j<n2+ctr2; j++)
9214          {
9215             dsp_new->starts[dsp->dim*j+i] = -1;
9216             dsp_new->strands[dsp->dim*j+i] = strand;
9217          }
9218          dsp_new->starts[dsp->dim*(ctr2+n2)+i] = 0;
9219          dsp_new->lens[ctr2+n2] = start;
9220          dsp_new->strands[dsp->dim*(ctr2+n2)+i] = strand;
9221          for (j=n2+ctr2+1; j<dsp_new->numseg; j++)
9222          {
9223             dsp_new->starts[dsp->dim*j+i] = -1;
9224             dsp_new->strands[dsp->dim*j+i] = strand;
9225          }
9226          ctr2++;
9227       } else if (strand == Seq_strand_plus && lenarray[i]-1-stop > 0)
9228       {
9229          for (j=n2; j<ctr2+n2; j++)
9230          {
9231             dsp_new->starts[dsp->dim*j+i] = -1;
9232             dsp_new->strands[dsp->dim*j+i] = strand;
9233          }
9234          dsp_new->starts[dsp->dim*(ctr2+n2)+i] = stop+1;
9235          dsp_new->lens[ctr2+n2] = lenarray[i]-1-stop;
9236          dsp_new->strands[dsp->dim*(ctr2+n2)+i] = strand;
9237          for (j=ctr2+n2+1; j<dsp_new->numseg; j++)
9238          {
9239             dsp_new->starts[dsp->dim*j+i] = -1;
9240             dsp_new->strands[dsp->dim*j+i] = strand;
9241          }
9242          ctr2++;
9243       } else /* nothing to add on this row, just fill in with -1s */
9244       {
9245          for (j=n2; j<dsp_new->numseg; j++)
9246          {
9247             dsp_new->starts[dsp->dim*j+i] = -1;
9248             dsp_new->strands[dsp->dim*j+i] = strand;
9249          }
9250       }
9251    }
9252    sap_new = SeqAlignNew();
9253    sap_new->dim = dsp->dim;
9254    sap_new->segtype = SAS_DENSEG;
9255    sap_new->segs = (Pointer)(dsp_new);
9256    MemFree(lenarray);
9257    return sap_new;
9258 }
9259 
9260 /* SECTION 10 */
9261 /***************************************************************************
9262 *
9263 *  AlnMgr2ExtractPairwiseSeqAlign takes an indexed alignment (parent or
9264 *  child, but must be fully indexed, not lite) and extracts a pairwise
9265 *  subalignment containing the two requested rows. The subalignment is
9266 *  unindexed and may have internal unaligned regions.
9267 *
9268 ***************************************************************************/
AlnMgr2ExtractPairwiseSeqAlign(SeqAlignPtr sap,Int4 n1,Int4 n2)9269 NLM_EXTERN SeqAlignPtr AlnMgr2ExtractPairwiseSeqAlign(SeqAlignPtr sap, Int4 n1, Int4 n2)
9270 {
9271    AMAlignIndex2Ptr  amaip;
9272    DenseSegPtr       dsp;
9273    DenseSegPtr       dsp_new;
9274    Int4              i;
9275    Int4              j;
9276    Int4              n;
9277    SeqAlignPtr       sap_new;
9278 
9279    if (sap == NULL || sap->saip == NULL || n1 == n2 || n1 <= 0 || n2 <= 0)
9280       return NULL;
9281    if (sap->saip->indextype == INDEX_CHILD)
9282       dsp = (DenseSegPtr)(sap->segs);
9283    else
9284    {
9285       amaip = (AMAlignIndex2Ptr)(sap->saip);
9286       dsp = (DenseSegPtr)(amaip->sharedaln->segs);
9287    }
9288    if (n1 > dsp->dim || n2 > dsp->dim)
9289       return NULL;
9290    n = 0;
9291    for (i=0; i<dsp->numseg; i++)
9292    {
9293       if (dsp->starts[dsp->dim*i+n1-1] == -1 && dsp->starts[dsp->dim*i+n2-1] == -1)
9294          n++;
9295    }
9296    if (n == dsp->numseg) /* no overlap at all */
9297       return NULL;
9298    dsp_new = DenseSegNew();
9299    dsp_new->numseg = dsp->numseg - n;
9300    dsp_new->starts = (Int4Ptr)MemNew(2*dsp_new->numseg*sizeof(Int4));
9301    dsp_new->strands = (Uint1Ptr)MemNew(2*dsp_new->numseg*sizeof(Uint1));
9302    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9303    dsp_new->dim = 2;
9304    dsp_new->ids = AlnMgr2GetNthSeqIdPtr(sap, n1);
9305    dsp_new->ids->next = AlnMgr2GetNthSeqIdPtr(sap, n2);
9306    j = 0;
9307    for (i=0; i<dsp->numseg; i++)
9308    {
9309       if (dsp->starts[dsp->dim*i+n1-1] > -1 || dsp->starts[dsp->dim*i+n2-1] > -1)
9310       {
9311          dsp_new->starts[2*j] = dsp->starts[dsp->dim*i+n1-1];
9312          dsp_new->starts[2*j+1] = dsp->starts[dsp->dim*i+n2-1];
9313          dsp_new->strands[2*j] = dsp->strands[n1-1];
9314          dsp_new->strands[2*j+1] = dsp->strands[n2-1];
9315          dsp_new->lens[j] = dsp->lens[i];
9316          j++;
9317       }
9318    }
9319    sap_new = SeqAlignNew();
9320    sap_new->dim = 2;
9321    sap_new->type = SAT_PARTIAL;
9322    sap_new->segtype = SAS_DENSEG;
9323    sap_new->segs = (Pointer)dsp_new;
9324    return sap_new;
9325 }
9326 
9327 /* SECTION 10 */
amconssetfree(AMConsSetPtr acp)9328 static void amconssetfree(AMConsSetPtr acp)
9329 {
9330    AMConsSetPtr  acp_next;
9331 
9332    while (acp != NULL)
9333    {
9334       acp_next = acp->next;
9335       MemFree(acp->starts);
9336       MemFree(acp->stops);
9337       MemFree(acp->strands);
9338       MemFree(acp);
9339       acp = acp_next;
9340    }
9341 }
9342 
AlnMgr2SortForConsistent(VoidPtr ptr1,VoidPtr ptr2)9343 static int LIBCALLBACK AlnMgr2SortForConsistent(VoidPtr ptr1, VoidPtr ptr2)
9344 {
9345    AMConsSetPtr  acp1;
9346    AMConsSetPtr  acp2;
9347    SAIndex2Ptr   saip1;
9348    SAIndex2Ptr   saip2;
9349 
9350    acp1 = *((AMConsSetPtr PNTR)ptr1);
9351    acp2 = *((AMConsSetPtr PNTR)ptr2);
9352    saip1 = (SAIndex2Ptr)(acp1->sap->saip);
9353    saip2 = (SAIndex2Ptr)(acp2->sap->saip);
9354    if (saip1->score == 0)
9355       saip1->score = AlnMgr2ComputeScoreForSeqAlign(acp1->sap);
9356    if (saip2->score == 0)
9357       saip2->score = AlnMgr2ComputeScoreForSeqAlign(acp2->sap);
9358    if (saip1->score > saip2->score)
9359       return -1;
9360    else if (saip1->score < saip2->score)
9361       return 1;
9362    else
9363       return 0;
9364 }
9365 
9366 /* SECTION 10 */
9367 /***************************************************************************
9368 *
9369 *  AlnMgr2RemoveInconsistentAlnsFromSet takes an alignment that is
9370 *  indexed at least at the AM2_LITE level, and prunes the child
9371 *  alignments so that the remaining alignments form a consistent,
9372 *  nonoverlapping set. All alignments must have the same number of rows,
9373 *  and they must be the same rows (although not necessarily in the same
9374 *  order). The function uses a simple greedy algorithm to construct the
9375 *  nonoverlapping set, starting with the highest-scoring alignment.
9376 *  If fuzz is negative, the function creates the best nonoverlapping set
9377 *  by actually truncating alignments.
9378 *
9379 ***************************************************************************/
AlnMgr2RemoveInconsistentAlnsFromSet(SeqAlignPtr sap_head,Int4 fuzz)9380 NLM_EXTERN void AlnMgr2RemoveInconsistentAlnsFromSet(SeqAlignPtr sap_head, Int4 fuzz)
9381 {
9382    AMConsSetPtr  acp;
9383    AMConsSetPtr  acp_head;
9384    AMConsSetPtr  acp_prev;
9385    AMConsSetPtr  PNTR acparray;
9386    DenseSegPtr   dsp;
9387    Int4          i;
9388    Int4          j;
9389    Int4          k;
9390    Int4          lfuzz;
9391    SeqAlignPtr   newsap;
9392    Int4          numrows;
9393    Int4          numsaps;
9394    Int4          orientation;
9395    Int4          row;
9396    SAIndex2Ptr   saip;
9397    SeqAlignPtr   salp_head;
9398    SeqAlignPtr   salp_prev;
9399    SeqAlignPtr   sap;
9400    SeqAlignPtr   sapnext;
9401    Int4          score;
9402    SeqIdPtr      sip;
9403    SeqIdPtr      sip_head;
9404    Uint1         strand;
9405 
9406    lfuzz = fuzz;
9407    if (fuzz < 0)
9408       fuzz = 1;
9409    sap = (SeqAlignPtr)(sap_head->segs);
9410    if (sap->next == NULL)
9411       return;
9412    dsp = (DenseSegPtr)(sap->segs);
9413    sip_head = dsp->ids;
9414    numrows = AlnMgr2GetNumRows(sap);
9415    acp_head = NULL;
9416    strand = AlnMgr2GetNthStrand(sap, 1);
9417    numsaps = 0;
9418    while (sap != NULL)
9419    {
9420       if (AlnMgr2GetNumRows(sap) != numrows)
9421       {
9422          amconssetfree(acp_head);
9423          return;
9424       }
9425       numsaps++;
9426       acp = (AMConsSetPtr)MemNew(sizeof(AMConsSet));
9427       acp->starts = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9428       acp->stops = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9429       acp->strands = (Uint1Ptr)MemNew(numrows*sizeof(Uint1));
9430       acp->which = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9431       acp->sap = sap;
9432       if (acp_head != NULL)
9433       {
9434          acp_prev->next = acp;
9435          acp_prev = acp;
9436       } else
9437          acp_head = acp_prev = acp;
9438       sip = sip_head;
9439       row = AlnMgr2GetFirstNForSip(sap, sip);
9440       if (row <= 0)
9441       {
9442          amconssetfree(acp_head);
9443          return;
9444       }
9445       if (acp->strands[row] != strand)
9446       {
9447          sapnext = acp->sap->next;
9448          acp->sap->next = NULL;
9449          score = ((SAIndex2Ptr)(acp->sap->saip))->score;
9450          SeqAlignListReverseStrand(acp->sap);
9451          AMAlignIndexFreeEitherIndex(acp->sap);
9452          AlnMgr2IndexSingleChildSeqAlign(acp->sap);
9453          saip = (SAIndex2Ptr)(acp->sap->saip);
9454          saip->score = score;
9455          acp->strands[row] = strand;
9456          acp->sap->next = sapnext;
9457       }
9458       for (i=0; i<numrows; i++)
9459       {
9460          acp->which[i] = row;
9461          AlnMgr2GetNthSeqRangeInSA(sap, i+1, &acp->starts[i], &acp->stops[i]);
9462          acp->strands[i] = AlnMgr2GetNthStrand(sap, i+1);
9463       }
9464       sap = sap->next;
9465    }
9466    acparray = (AMConsSetPtr PNTR)MemNew(numsaps*sizeof(AMConsSetPtr));
9467    acp = acp_head;
9468    i = 0;
9469    while (acp != NULL)
9470    {
9471       acparray[i] = acp;
9472       acp = acp->next;
9473       i++;
9474    }
9475    HeapSort(acparray, numsaps, sizeof(AMConsSetPtr), AlnMgr2SortForConsistent);
9476    /* orientation -1 means that ith is before jth in ALL rows, 1 means ith is after jth in ALL rows */
9477    for (i=0; i<numsaps; i++)
9478    {
9479       if (acparray[i]->used != -1)
9480       {
9481          for (j=i+1; j<numsaps; j++)
9482          {
9483             orientation = 0;
9484             for (k=0; acparray[j]->used != -1 && k<numrows; k++)
9485             {
9486                if (acparray[i]->strands[k] != acparray[j]->strands[k])
9487                   acparray[j]->used = -1;
9488                if (acparray[i]->starts[k] - fuzz < acparray[j]->starts[k])
9489                {
9490                   if (acparray[i]->stops[k] - fuzz < acparray[j]->starts[k])
9491                   {
9492                      if ((acparray[i]->strands[k] == Seq_strand_plus && orientation == 1) || (acparray[i]->strands[k] == Seq_strand_minus && orientation == -1))
9493                         acparray[j]->used = -1;
9494                      else if (orientation == 0)
9495                      {
9496                         if (acparray[i]->strands[k] == Seq_strand_minus)
9497                            orientation = 1;
9498                         else
9499                            orientation = -1;
9500                      }
9501                   } else
9502                   {
9503                      if (lfuzz >= 0) /* just mark it for deletion */
9504                         acparray[j]->used = -1;
9505                      else /* truncate it */
9506                      {
9507                         if (acparray[j]->stops[k] > acparray[i]->stops[k])
9508                         {
9509                            newsap = AlnMgr2GetSubAlign(acparray[j]->sap, acparray[i]->stops[k]+1, acparray[j]->stops[k], k+1, TRUE);
9510                            SeqAlignFree(acparray[j]->sap);
9511                            acparray[j]->sap = newsap;
9512                            acparray[j]->starts[k] = acparray[i]->stops[k]+1;
9513                         } else
9514                            acparray[j]->used = -1;
9515                      }
9516                   }
9517                } else if (acparray[i]->starts[k] - fuzz > acparray[j]->starts[k])
9518                {
9519                  if (acparray[i]->starts[k] + fuzz > acparray[j]->stops[k])
9520                   {
9521                      if ((acparray[i]->strands[k] == Seq_strand_plus && orientation == -1) || (acparray[i]->strands[k] == Seq_strand_minus && orientation == 1))
9522                         acparray[j]->used = -1;
9523                      else if (orientation == 0)
9524                      {
9525                         if (acparray[i]->strands[k] == Seq_strand_minus)
9526                            orientation = -1;
9527                         else
9528                            orientation = 1;
9529                      }
9530                   } else
9531                   {
9532                      if (lfuzz >= 0) /* mark for deletion */
9533                         acparray[j]->used = -1;
9534                      else /* truncate */
9535                      {
9536                         if (acparray[j]->starts[k] < acparray[i]->starts[k])
9537                         {
9538                            newsap = AlnMgr2GetSubAlign(acparray[j]->sap, acparray[j]->starts[k], acparray[i]->starts[k]-1, k+1, TRUE);
9539                            SeqAlignFree(acparray[j]->sap);
9540                            acparray[j]->sap = newsap;
9541                            AlnMgr2IndexSingleChildSeqAlign(newsap);
9542                            acparray[j]->starts[k] = acparray[i]->stops[k]+1;
9543                         } else
9544                            acparray[j]->used = -1;
9545                      }
9546                   }
9547                } else
9548                   acparray[j]->used = -1;
9549             }
9550          }
9551       }
9552    }
9553    /* now free all the unused ones, stick the rest back together, reindex, and return */
9554    salp_head = salp_prev = NULL;
9555    for (i=0; i<numsaps; i++)
9556    {
9557       if (acparray[i]->used == -1)
9558       {
9559          SeqAlignFree(acparray[i]->sap);
9560          acparray[i]->sap = NULL;
9561       } else
9562       {
9563          if (salp_head != NULL)
9564          {
9565             salp_prev->next = acparray[i]->sap;
9566             salp_prev = acparray[i]->sap;
9567             salp_prev->next = NULL;
9568          } else
9569          {
9570             salp_head = salp_prev = acparray[i]->sap;
9571             salp_prev->next = NULL;
9572          }
9573       }
9574    }
9575    amconssetfree(acp_head);
9576    MemFree(acparray);
9577    sap_head->segs = (Pointer)(salp_head);
9578    AMAlignIndex2Free2(sap_head->saip);
9579    AlnMgr2IndexLite(sap_head);
9580 }
9581 
AlnMgr2CompareByScore(VoidPtr ptr1,VoidPtr ptr2)9582 static int LIBCALLBACK AlnMgr2CompareByScore(VoidPtr ptr1, VoidPtr ptr2)
9583 {
9584    SAIndex2Ptr  saip1;
9585    SAIndex2Ptr  saip2;
9586    SeqAlignPtr  sap1;
9587    SeqAlignPtr  sap2;
9588 
9589    if (ptr1 == NULL || ptr2 == NULL)
9590       return 0;
9591    sap1 = *((SeqAlignPtr PNTR) ptr1);
9592    sap2 = *((SeqAlignPtr PNTR) ptr2);
9593    saip1 = (SAIndex2Ptr)(sap1->saip);
9594    saip2 = (SAIndex2Ptr)(sap2->saip);
9595    if (saip1->score == 0)
9596       saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
9597    if (saip2->score == 0)
9598       saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
9599    if (saip1->score > saip2->score)
9600       return -1;
9601    if (saip1->score < saip2->score)
9602       return 1;
9603    return 0;
9604 }
9605 
9606 /***************************************************************************
9607 *
9608 *  AlnMgr2FuseSet takes a set of alignments sharing all their rows and orders
9609 *  the alignments, then fuses together any adjacent alignments. If returnall
9610 *  is TRUE, all pieces are returned; if not, then only the largest piece is
9611 *  returned. This function will work best when called after
9612 *  AlnMgr2RemoveInconsistentAlnsFromSet(sap_head, -1).
9613 *
9614 ***************************************************************************/
AlnMgr2FuseSet(SeqAlignPtr sap_head,Boolean returnall)9615 NLM_EXTERN SeqAlignPtr AlnMgr2FuseSet(SeqAlignPtr sap_head, Boolean returnall)
9616 {
9617    AMAlignIndex2Ptr  amaip;
9618    DenseSegPtr       dsp_new;
9619    DenseSegPtr       dsp1;
9620    DenseSegPtr       dsp2;
9621    Boolean           found;
9622    Int4              i;
9623    Int4              n;
9624    Int4              numrows;
9625    Int4              r;
9626    SeqAlignPtr       sap_keep;
9627    SeqAlignPtr       sap_keep_head;
9628    SeqAlignPtr       sap_keep_prev;
9629    SAIndex2Ptr       saip;
9630    SeqAlignPtr       PNTR saparray;
9631    Int4              start1;
9632    Int4              start2;
9633    Int4              stop1;
9634    Int4              stop2;
9635    Uint1             strand;
9636 
9637    if (sap_head == NULL || sap_head->saip == NULL)
9638       return NULL;
9639    AlnMgr2SortAlnSetByNthRowPos(sap_head, 1);
9640    amaip = (AMAlignIndex2Ptr)(sap_head->saip);
9641    sap_keep = amaip->saps[0];
9642    sap_keep_head = sap_keep_prev = NULL;
9643    numrows = AlnMgr2GetNumRows(sap_keep);
9644    for (i=1; i<amaip->numsaps; i++)
9645    {
9646       /* check for consistency with sap_keep; fuse if possible */
9647       found = FALSE;
9648       for (n=0; !found && n<numrows; n++)
9649       {
9650          strand = AlnMgr2GetNthStrand(sap_keep, n+1);
9651          AlnMgr2GetNthSeqRangeInSA(sap_keep, n+1, &start1, &stop1);
9652          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n+1, &start2, &stop2);
9653          if (strand == Seq_strand_minus)
9654          {
9655             if (stop2+1 != start1)
9656                found = TRUE;
9657          } else
9658          {
9659             if (start2 != stop1+1)
9660                found = TRUE;
9661          }
9662       }
9663       if (!found) /* fuse together */
9664       {
9665          dsp1 = (DenseSegPtr)(sap_keep->segs);
9666          dsp2 = (DenseSegPtr)(amaip->saps[i]->segs);
9667          dsp_new = DenseSegNew();
9668          dsp_new->dim = dsp1->dim;
9669          dsp_new->numseg = dsp1->numseg+dsp2->numseg;
9670          dsp_new->starts = (Int4Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9671          dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9672          dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9673          for (n=0; n<dsp_new->numseg; n++)
9674          {
9675             for (r=0; r<dsp_new->dim; r++)
9676             {
9677                if (n >= dsp1->numseg)
9678                   dsp_new->starts[r*n*r] = dsp2->starts[r*(n-dsp1->numseg)+r];
9679                else
9680                   dsp_new->starts[r*n+r] = dsp1->starts[r*n+r];
9681                dsp_new->strands[r*n*r] = dsp1->strands[r];
9682             }
9683             if (n >= dsp1->numseg)
9684                dsp_new->lens[n] = dsp2->lens[n-dsp1->numseg];
9685             else
9686                dsp_new->lens[n] = dsp1->lens[n];
9687          }
9688          SeqAlignFree(amaip->saps[i]);
9689          amaip->saps[i] = NULL;
9690       } else /* add next alignment to keepers pile */
9691       {
9692          if (sap_keep_head == NULL)
9693          {
9694             if (sap_keep != NULL)
9695             {
9696                sap_keep_head = sap_keep;
9697                sap_keep->next = amaip->saps[i];
9698                sap_keep_prev = amaip->saps[i];
9699             } else
9700                sap_keep_head = sap_keep_prev = amaip->saps[i];
9701          } else
9702          {
9703             sap_keep_prev->next = amaip->saps[i];
9704             sap_keep_prev = amaip->saps[i];
9705          }
9706       }
9707    }
9708    if (sap_keep_head == NULL || sap_keep_head->next == NULL) /* everything was fused */
9709       sap_keep_head = sap_keep;
9710    if (returnall)
9711    {
9712       sap_head->segs = (Pointer)(sap_keep_head);
9713       return sap_keep_head;
9714    }
9715    i=0;
9716    sap_keep = sap_keep_head;
9717    while (sap_keep != NULL)
9718    {
9719       sap_keep = sap_keep->next;
9720       i++;
9721    }
9722    saparray = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
9723    i = 0;
9724    sap_keep = sap_keep_head;
9725    while (sap_keep != NULL)
9726    {
9727       saip = (SAIndex2Ptr)(sap_keep->saip);
9728       saip->score = 0;
9729       saparray[i] = sap_keep;
9730       i++;
9731       sap_keep = sap_keep->next;
9732    }
9733    HeapSort(saparray, i, sizeof(SeqAlignPtr), AlnMgr2CompareByScore);
9734    sap_keep = saparray[0];
9735    for (n=1; n<i; n++)
9736    {
9737       SeqAlignFree(saparray[n]);
9738    }
9739    MemFree(saparray);
9740    return sap_keep;
9741 }
9742 
AlnMgr2FillInUnaligned(SeqAlignPtr sap)9743 NLM_EXTERN void AlnMgr2FillInUnaligned(SeqAlignPtr sap)
9744 {
9745    Int4         curr;
9746    DenseSegPtr  dsp;
9747    DenseSegPtr  dsp_new;
9748    Boolean      found;
9749    Int4         i;
9750    Int4         j;
9751    Int4         k;
9752    Int4         last;
9753    Int4         n;
9754    Int4         offset;
9755    Int4         start;
9756    Int4         stop;
9757    Uint1        strand;
9758 
9759    if (sap == NULL || (sap->saip != NULL && sap->saip->indextype != INDEX_CHILD))
9760       return;
9761    n = 0;
9762    dsp = (DenseSegPtr)(sap->segs);
9763    for (i=0; i<dsp->dim; i++)
9764    {
9765       j = 0;
9766       AlnMgr2GetNthSeqRangeInSA(sap, i, &start, &stop);
9767       strand = dsp->strands[i];
9768       last = -1;
9769       while (j<dsp->numseg-1)
9770       {
9771          if (strand == Seq_strand_minus)
9772          {
9773             if (last != -1)
9774             {
9775                found = FALSE;
9776                while (j<dsp->numseg && !found)
9777                {
9778                   if (dsp->starts[j*dsp->dim+i] != -1)
9779                   {
9780                      if (dsp->starts[j*dsp->dim+i]+dsp->lens[j] != last)
9781                         n++;
9782                      found = TRUE;
9783                   }
9784                   if (!found)
9785                      j++;
9786                }
9787             } else
9788                last = dsp->starts[j*dsp->dim+i];
9789          } else
9790          {
9791             if (last != -1)
9792             {
9793                found = FALSE;
9794                while (j<dsp->numseg && !found)
9795                {
9796                   if (dsp->starts[j*dsp->dim+i] != -1)
9797                   {
9798                      if (dsp->starts[j*dsp->dim+i]+dsp->lens[j] != last)
9799                         n++;
9800                      found = TRUE;
9801                   }
9802                   if (!found)
9803                      j++;
9804                }
9805             } else
9806             {
9807                last = dsp->starts[j*dsp->dim+i];
9808                if (last != -1)
9809                   last += dsp->lens[j];
9810             }
9811          }
9812       }
9813    }
9814    if (n == 0) /* no unaligned regions */
9815       return;
9816    dsp_new = DenseSegNew();
9817    dsp_new->numseg = dsp->numseg + n;
9818    dsp_new->dim = dsp->dim;
9819    dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
9820    dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
9821    for (i=0; i<dsp_new->numseg; i++)
9822    {
9823       for (j=0; j<dsp_new->dim; j++)
9824       {
9825          dsp_new->strands[i*dsp_new->dim+j] = dsp->strands[j];
9826       }
9827    }
9828    dsp_new->ids = SeqIdDupList(dsp->ids);
9829    dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9830    curr = 0;
9831    for (j=0; j<dsp->numseg; j++)
9832    {
9833       for (i=0; i<dsp->dim; i++)
9834       {
9835          offset = 0;
9836          strand = dsp->strands[i];
9837          if (dsp->starts[j*dsp->dim+i] == -1)
9838             dsp_new->starts[curr*dsp_new->dim+i] = -1;
9839          else
9840          {
9841             k = j+1;
9842             found = FALSE;
9843             while (k < dsp->numseg)
9844             {
9845                if (dsp->starts[k*dsp->dim+i] != -1)
9846                {
9847                   found = TRUE;
9848                   if (strand == Seq_strand_minus)
9849                   {
9850                      if (dsp->starts[k*dsp->dim+i] + dsp->lens[k] != dsp->starts[j*dsp->dim+i])
9851                      {
9852                         dsp_new->lens[curr+offset] = dsp->starts[j*dsp->dim+i] - dsp->starts[k*dsp->dim+i] - dsp->lens[k];
9853                         dsp_new->starts[(curr+offset)*dsp->dim+i] = dsp->starts[k*dsp->dim+i] + dsp->lens[k];
9854                         offset++;
9855                      }
9856                   } else
9857                   {
9858                      if (dsp->starts[j*dsp->dim+i] + dsp->lens[j] != dsp->starts[k*dsp->dim+i])
9859                      {
9860                         dsp_new->lens[curr+offset] = dsp->starts[k*dsp->dim+i] - dsp->starts[j*dsp->dim+i] - dsp->lens[j];
9861                         dsp_new->starts[(curr+offset)*dsp->dim+i] = dsp->starts[j*dsp->dim+i] + dsp->lens[j];
9862                      }
9863                   }
9864                }
9865                k++;
9866             }
9867          }
9868       }
9869       curr = curr + 1 + offset;
9870    }
9871    DenseSegFree(dsp);
9872    sap->segs = (Pointer)(dsp_new);
9873    AMAlignIndexFreeEitherIndex(sap);
9874 }
9875 
9876 /* SECTION 11 -- functions for std-segs */
AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap,Int4 n)9877 NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap, Int4 n)
9878 {
9879    SeqLocPtr  slp;
9880    StdSegPtr  ssp;
9881 
9882    if (sap == NULL || sap->segtype != SAS_STD)
9883       return NULL;
9884    ssp = (StdSegPtr)(sap->segs);
9885    slp = ssp->loc;
9886    n--;
9887    while (n > 0)
9888    {
9889       if (slp == NULL)
9890          return NULL;
9891       slp = slp->next;
9892       n--;
9893    }
9894    return (SeqIdDup(SeqLocId(slp)));
9895 }
9896 
AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap,SeqIdPtr sip)9897 NLM_EXTERN Int4 AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap, SeqIdPtr sip)
9898 {
9899    Int4       i;
9900    SeqIdPtr   sip_tmp;
9901    StdSegPtr  ssp;
9902 
9903    if (sap == NULL || sap->segtype != SAS_STD)
9904       return -1;
9905    ssp = (StdSegPtr)(sap->segs);
9906    sip_tmp = ssp->ids;
9907    i = 1;
9908    while (sip_tmp != NULL)
9909    {
9910       if (SeqIdComp(sip, sip_tmp) == SIC_YES)
9911          return i;
9912       sip_tmp = sip_tmp->next;
9913       i++;
9914    }
9915    return -1;
9916 }
9917 
AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)9918 NLM_EXTERN void AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
9919 {
9920    SeqLocPtr  slp;
9921    StdSegPtr  ssp;
9922 
9923    if (start != NULL)
9924       *start = -1;
9925    if (stop != NULL)
9926       *stop = -1;
9927    if (sap == NULL || sap->segtype != SAS_STD)
9928       return;
9929    ssp = (StdSegPtr)(sap->segs);
9930    slp = ssp->loc;
9931    n--;
9932    while (n > 0)
9933    {
9934       if (slp == NULL)
9935          return;
9936       slp = slp->next;
9937       n--;
9938    }
9939    if (slp == NULL)
9940       return;
9941    if (start != NULL)
9942       *start = SeqLocStart(slp);
9943    if (stop != NULL)
9944       *stop = SeqLocStop(slp);
9945 }
9946 
9947 
9948 /***************************************************************************
9949 *
9950 *   AlnMgr2GetSeqRangeForSipInSAStdSeg  returns the smallest and largest sequence
9951 *  coordinates in in a Std-Seg seqalign for a given Sequence Id.  Also return the
9952 *  strand type.  Either start, stop or strand can be NULL to only retrieve some of them.
9953 *  If start and stop are -1, there is an error (not a std-seg), the SeqID does not participate in this
9954 *  alignment or the alignment is one big insert on that id.  Returns true if the sip was found
9955 *  in the alignment with real coordinates, i.e. *start would not be -1.  RANGE
9956 *
9957 ***************************************************************************/
AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap,SeqIdPtr sip,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand)9958 NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
9959 {
9960     Int4        c_start, c_stop;
9961     Uint1       c_strand;
9962     StdSegPtr   ssp;
9963     Boolean     range_found = FALSE;
9964     Boolean     strands_inconsistent = FALSE;
9965 
9966     if (start) *start = -1;
9967     if (stop)  *stop  = -1;
9968     if (strand) *strand = Seq_strand_unknown;
9969 
9970     if (sap->segtype != SAS_STD)
9971         return FALSE;
9972 
9973     ssp = (StdSegPtr)(sap->segs);
9974     while (ssp) {
9975         if (AlnMgr2GetSeqRangeForSipInStdSeg(ssp, sip, &c_start, &c_stop, &c_strand, NULL) &&
9976             c_start != -1) /* skip inserts on our bioseq */
9977         {
9978              range_found = TRUE;
9979 
9980             if (start) {
9981                 if (*start == -1) {
9982                     *start = c_start;
9983                 } else {
9984                     *start = MIN(*start, c_start);
9985                 }
9986             }
9987             if (stop) {
9988                 *stop = MAX(*stop, c_stop);
9989             }
9990             if (strand && ! strands_inconsistent) {
9991             /* if strands are different each time, ignore them. */
9992                 if (*strand != Seq_strand_unknown && *strand != c_strand) {
9993                     *strand = Seq_strand_unknown;
9994                     strands_inconsistent = TRUE;
9995                 } else {
9996                     *strand = c_strand;
9997                 }
9998             }
9999         }
10000         ssp = ssp->next;
10001     }
10002     return range_found;
10003 }
10004 
10005 
10006 /***************************************************************************
10007 *
10008 *   AlnMgr2GetSeqRangeForSipInStdSeg  returns the start and stop sequence
10009 *  coordinates in a Std-Segment for a given Sequence Id.  Also return the
10010 *  strand type.  Either start, stop or strand can be NULL to only retrieve some of them.
10011 *  If start and stop are -1, the SeqID was not found in this segment.
10012 *  Returns true if the sip was found, even if it is a gap (start, stop = -1).  RANGE
10013 *
10014 ***************************************************************************/
AlnMgr2GetSeqRangeForSipInStdSeg(StdSegPtr ssp,SeqIdPtr sip,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand,Uint1Ptr segType)10015 NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInStdSeg(
10016     StdSegPtr   ssp,
10017     SeqIdPtr    sip,
10018     Int4Ptr     start,
10019     Int4Ptr     stop,
10020     Uint1Ptr    strand,
10021     Uint1Ptr    segType) /* AM_SEQ, AM_GAP, AM_INSERT */
10022 {
10023     SeqLocPtr   loc;
10024     Uint1       m_strand;
10025     Int4        m_start, m_stop, m_swap;
10026     Boolean     s_present = FALSE;
10027     Boolean     m_present = FALSE;
10028     Boolean     found_id = FALSE;
10029 
10030     for ( loc = ssp->loc;
10031           loc != NULL;
10032           loc = loc->next ) {
10033     /* One SeqLoc for each Sequence aligned by this segment. */
10034         /* find the one that matches the sip parameter. */
10035         if (SeqIdForSameBioseq(sip, SeqLocId(loc))) {
10036             m_strand = SeqLocStrand(loc);
10037             m_start  = SeqLocStart(loc);
10038             m_stop   = SeqLocStop(loc);
10039             /* Might have to reverse the order of start and stop on
10040                minus strands so that start is less than stop. */
10041             if (m_start > m_stop) {
10042               m_swap  = m_start;
10043               m_start = m_stop;
10044               m_stop = m_swap;
10045             }
10046             if (start)  *start  = m_start;
10047             if (stop)   *stop   = m_stop;
10048             if (strand) *strand = m_strand;
10049             if (m_start != -1)
10050                 m_present = TRUE;
10051 
10052             /* found our sequence in this segment. */
10053             found_id = TRUE;
10054         } else { /* a different sequence */
10055             if (SeqLocStart(loc) != -1)
10056                 s_present = TRUE;
10057         }
10058     }
10059 
10060     if (segType) {
10061         if (m_present && s_present)
10062             *segType = AM_SEQ;
10063         else if (!m_present && s_present)
10064             *segType = AM_INSERT;
10065         else if (m_present && !s_present)
10066             *segType = AM_GAP;
10067         else
10068             *segType = AM_GAP; /* start will be -1 */
10069     }
10070     return found_id;
10071 }
10072 
10073 
10074 /***************************************************************************
10075 *
10076 *   AlnMgr2GetNthStdSeg  returns the a pointer to the Nth segment of
10077 *   a standard segment alignment.  Numbering starts with 1.
10078 *   returns NULL if not n segments or is not a std-seg aligment.
10079 *   Useful to pass its return value to AlnMgr2GetSeqRangeForSipInStdSeg()
10080 *
10081 ***************************************************************************/
AlnMgr2GetNthStdSeg(SeqAlignPtr sap,Int2 n)10082 NLM_EXTERN StdSegPtr AlnMgr2GetNthStdSeg(SeqAlignPtr sap, Int2 n)
10083 {
10084     StdSegPtr   ssp;
10085 	Int2        i;
10086 
10087     if (sap == NULL || sap->segtype != SAS_STD || n < 1)
10088         return NULL;
10089 
10090     i = 1;
10091     ssp = (StdSegPtr)(sap->segs);
10092     while(ssp)
10093     {
10094         if (i == n)
10095             return ssp;
10096         ++i;
10097         ssp = ssp->next;
10098     }
10099 
10100     return NULL;
10101 }
10102 
10103 /***************************************************************************
10104 *
10105 *  AlnMgr2GetNumStdSegs returns the number of segments in a standar-seg alignment.
10106 *   returns -1 if sap is null or not a standard-seg alignment.
10107 *
10108 ***************************************************************************/
AlnMgr2GetNumStdSegs(SeqAlignPtr sap)10109 NLM_EXTERN Int4 AlnMgr2GetNumStdSegs(SeqAlignPtr sap)
10110 {
10111     Int4        seg_count = 0;
10112     StdSegPtr   ssp;
10113 
10114     if (sap == NULL || sap->segtype != SAS_STD)
10115         return -1;
10116 
10117     ssp = (StdSegPtr)(sap->segs);
10118 	while(ssp)
10119 	{
10120 		++seg_count;
10121 		ssp = ssp->next;
10122 	}
10123 	return seg_count;
10124 }
10125 
AlnMgr2GetLongestSeqLoc(SeqAlignPtr sap)10126 static SeqLocPtr AlnMgr2GetLongestSeqLoc(SeqAlignPtr sap)
10127 {
10128    Int4       longest;
10129    Int4       n;
10130    SeqLocPtr  slp;
10131    SeqLocPtr  slp_longest;
10132    StdSegPtr  ssp;
10133 
10134    if (sap == NULL || sap->segtype != SAS_STD)
10135       return NULL;
10136    longest = -1;
10137    ssp = (StdSegPtr)(sap->segs);
10138    slp = ssp->loc;
10139    while (slp != NULL)
10140    {
10141       n = SeqLocLen(slp);
10142       if (n > longest)
10143       {
10144          slp_longest = slp;
10145          longest = n;
10146       }
10147       slp = slp->next;
10148    }
10149    return slp_longest;
10150 }
10151 
10152 /***************************************************************************
10153 *
10154 *  The two mapping functions act a little differently for std-segs. The
10155 *  alignment coordinates are 1:1 linearly correlated with the longest
10156 *  seqloc in the set; the others may be significantly shorter.
10157 *  The mapping functions deal with % lengths, and map those instead of
10158 *  coordinates (which may not be linear);
10159 *
10160 ***************************************************************************/
AlnMgr2MapBioseqToSeqAlignStdSeg(SeqAlignPtr sap,Int4 n,Int4 pos)10161 NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlignStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos)
10162 {
10163    SeqLocPtr  slp;
10164    SeqLocPtr  slp_longest;
10165    StdSegPtr  ssp;
10166    Int4       start1;
10167    Int4       start2;
10168    Int4       stop1;
10169    Int4       stop2;
10170 
10171    if (sap == NULL || sap->segtype != SAS_STD)
10172       return -1;
10173    slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10174    start1 = SeqLocStart(slp_longest);
10175    stop1 = SeqLocStop(slp_longest);
10176    ssp = (StdSegPtr)(sap->segs);
10177    slp = ssp->loc;
10178    n--;
10179    while (n > 0)
10180    {
10181       if (slp == NULL)
10182          return -1;
10183       n--;
10184       slp = slp->next;
10185    }
10186    if (slp == NULL)
10187       return -1;
10188    start2 = SeqLocStart(slp);
10189    stop2 = SeqLocStop(slp);
10190    if (start2 == -1) /* NULL */
10191       return -1;
10192    return (((stop1-start1)*(pos - start2))/(stop2-start2));
10193 }
10194 
AlnMgr2MapSeqAlignToBioseqStdSeg(SeqAlignPtr sap,Int4 n,Int4 pos)10195 NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseqStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos)
10196 {
10197    SeqLocPtr  slp;
10198    SeqLocPtr  slp_longest;
10199    StdSegPtr  ssp;
10200    Int4       start1;
10201    Int4       start2;
10202    Int4       stop1;
10203    Int4       stop2;
10204 
10205    if (sap == NULL || sap->segtype != SAS_STD)
10206       return -1;
10207    slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10208    start1 = SeqLocStart(slp_longest);
10209    stop1 = SeqLocStop(slp_longest);
10210    ssp = (StdSegPtr)(sap->segs);
10211    slp = ssp->loc;
10212    n--;
10213    while (n > 0)
10214    {
10215       if (slp == NULL)
10216          return -1;
10217       n--;
10218       slp = slp->next;
10219    }
10220    if (slp == NULL)
10221       return -1;
10222    start2 = SeqLocStart(slp);
10223    stop2 = SeqLocStop(slp);
10224    if (start2 == -1)  /* NULL */
10225       return -1;
10226    return (start2 + ((stop2-start2)*(pos-start1))/(stop1-start1));
10227 }
10228 
AlnMgr2GetAlnLengthStdSeg(SeqAlignPtr sap)10229 NLM_EXTERN Int4 AlnMgr2GetAlnLengthStdSeg(SeqAlignPtr sap)
10230 {
10231    SeqLocPtr  slp_longest;
10232 
10233    if (sap == NULL || sap->segtype != SAS_STD)
10234       return -1;
10235    slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10236    return (SeqLocLen(slp_longest));
10237 }
10238