1 /* ===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information (NCBI)
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government do not place any restriction on its use or reproduction.
12 * We would, however, appreciate having the NCBI and the author cited in
13 * any work or product based on this material.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * ===========================================================================
24 *
25 * File Name: alignmgr.c
26 *
27 * Author: Sarah Wheelan
28 *
29 * Version Creation Date: 7/99
30 *
31 * $Revision: 6.181 $
32 *
33 * File Description: SeqAlign indexing and messaging functions
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * $Log: alignmgr.c,v $
38 * Revision 6.181 2012/03/30 14:17:43 choi
39 * Fixed bug in AlnMgrGetNextAlnBit so that it correctly calculates
40 * amp->from_b coordinates for minus strand sequences.
41 *
42 * Revision 6.180 2008/10/22 17:18:40 bollin
43 * Improvement to function for freeing an alignment index - if a freefunc was
44 * provided, use it.
45 *
46 * Revision 6.179 2004/05/20 19:44:28 bollin
47 * removed unused variables
48 *
49 * Revision 6.178 2001/11/09 17:22:34 wheelan
50 * fixed bug in TruncateSeqAlign
51 *
52 * Revision 6.177 2001/08/07 14:39:34 wheelan
53 * added am_cleanupsalp
54 *
55 * Revision 6.176 2001/07/10 16:44:01 wheelan
56 * added AlnMgrMakeFakeMultipleEx for AlnMgrIndexIndexedSet
57 *
58 * Revision 6.175 2001/07/10 11:12:23 wheelan
59 * added AlnMgrIndexIndexedChain
60 *
61 * Revision 6.174 2001/05/30 12:13:58 wheelan
62 * AlnMsgNew and AlnMsgReNew initialize from_m and to_m
63 *
64 * Revision 6.173 2001/04/30 17:51:58 wheelan
65 * minor bug fix
66 *
67 * Revision 6.172 2001/04/19 17:59:58 wheelan
68 * added protection against NULL strands in AlnMgrIndexSingleChildSeqAlign
69 *
70 * Revision 6.171 2001/03/21 19:59:21 hurwitz
71 * remove AlnMgrMergeNeighbors call from AlnMgrMakeMultByIntersectOnMaster
72 *
73 * Revision 6.170 2001/03/08 21:04:39 hurwitz
74 * rolled back AlnMgrMakeMultByIntersectOnMaster to rev 6.156
75 *
76 * Revision 6.169 2001/03/08 17:07:10 wheelan
77 * added AlnMgrGetParent and structure to support it
78 *
79 * Revision 6.168 2001/03/01 19:15:15 wheelan
80 * fixed bug in MapBioseqToSeqAlign
81 *
82 * Revision 6.167 2001/02/16 13:29:38 wheelan
83 * Added AMFreeAllIndexes
84 *
85 * Revision 6.166 2001/02/07 12:04:19 wheelan
86 * bug fix in AlnMgrGetNthUnalignedForNthRow
87 *
88 * Revision 6.165 2001/02/05 13:21:17 wheelan
89 * bug fix in AlnMgrGetNthUnalignedForNthRow
90 *
91 * Revision 6.164 2001/02/01 00:39:18 lewisg
92 * fix uninitialized variable bugs
93 *
94 * Revision 6.163 2001/01/29 12:29:16 wheelan
95 * fixed bug which missed residues in AlnMgrGetNthUnalignedForNthRow
96 *
97 * Revision 6.162 2001/01/25 14:05:11 wheelan
98 * fixed bug in AlnMgrSetUnalignedLengths
99 *
100 * Revision 6.161 2001/01/23 13:35:20 wheelan
101 * bug fix in AlnMgrConstructOverlaps
102 *
103 * Revision 6.160 2001/01/19 03:08:15 bauer
104 * commented-out debug printfs
105 *
106 * Revision 6.159 2001/01/18 19:09:00 wheelan
107 * added functions to better handle jagged-edged segmented master-slave alignments
108 *
109 * Revision 6.158 2001/01/12 20:58:25 wheelan
110 * backed out prev changes
111 *
112 * Revision 6.157 2001/01/12 19:00:29 wheelan
113 * changes in AlnMgrGetNthUnalignedForNthRow to avoid problems when flanking seqalign regions are NULL for that row
114 *
115 * Revision 6.156 2001/01/09 23:18:55 lewisg
116 * fix memory leaks
117 *
118 * Revision 6.155 2001/01/05 20:02:32 wheelan
119 * fixed some memory leaks
120 *
121 * Revision 6.154 2000/10/06 10:34:20 wheelan
122 * changed behavior of AlnMgrGetSubAlign
123 *
124 * Revision 6.153 2000/10/02 13:52:31 wheelan
125 * fixed memory leak in SAIndexFree
126 *
127 * Revision 6.152 2000/09/26 16:10:58 kans
128 * removed const from AlnMgrCompareSortStruct heapsort callback - error caught by Mac compiler
129 *
130 * Revision 6.151 2000/09/26 14:23:49 lewisg
131 * use AlnMgrSortbyID instead of AlnMgrSortSeqAligns
132 *
133 * Revision 6.150 2000/09/25 15:25:36 wheelan
134 * bug fixes in AlnMgrMapBioseqToSeqAlign
135 *
136 * Revision 6.149 2000/09/20 12:20:16 wheelan
137 * bug fixes in AlnMgrMakeSegmentedMasterSlave to guide better preservation of input row structure
138 *
139 * Revision 6.148 2000/09/14 19:37:13 wheelan
140 * *** empty log message ***
141 *
142 * Revision 6.147 2000/09/14 19:32:21 wheelan
143 * bug fix in AlnMgrMapBioseqToSeqAlign
144 *
145 * Revision 6.146 2000/09/14 18:29:46 wheelan
146 * fixed binary search in MapBioseqToSeqAlign, took out merge behavior of AlnMgrGetSubAlignSpecial
147 *
148 * Revision 6.145 2000/09/08 20:34:31 lewisg
149 * hacks to speed up bioseq to align coord computation
150 *
151 * Revision 6.144 2000/09/07 04:53:42 sicotte
152 * fix alignment calls, bad matrix calls, and misc alignments problems for sequence update
153 *
154 * Revision 6.142 2000/09/05 22:28:06 lewisg
155 * PLEASE DO NOT DELETE THE STARTSIZE FIELD
156 *
157 * Revision 6.141 2000/08/30 10:33:55 wheelan
158 * fixed gcc compiler warnings
159 *
160 * Revision 6.140 2000/08/29 20:12:09 lewisg
161 * speed up color by alignment
162 *
163 * Revision 6.139 2000/08/28 16:18:20 sicotte
164 * moved AlnMgrSeqAlignMergeTwoPairwiseEx AlnMgrSeqAlignMergeTwoPairwise AlnMgrSeqAlignMergePairwiseSet to actutils.c
165 *
166 * Revision 6.138 2000/08/28 13:39:00 sicotte
167 * Get around Indexing bug in AlnMgrSeqAlignMergePairwiseSet
168 *
169 * Revision 6.137 2000/08/25 19:24:32 sicotte
170 * Add many functions to deal with merging alignment to go from pairwise sets to a single global (or local) alignment
171 *
172 * Revision 6.136 2000/08/23 20:01:07 hurwitz
173 * fixed bug in AlnMgrGetMaxUnalignedLength
174 *
175 * Revision 6.135 2000/08/18 14:20:50 lewisg
176 * add startsize field to AMAlignIndex so that lnMgrCopyIndexedParentIntoSap knows how big starts is
177 *
178 * Revision 6.134 2000/08/14 14:40:58 lewisg
179 * bug fixes for mixed alignment
180 *
181 * Revision 6.133 2000/08/11 12:53:57 wheelan
182 * bug fixes in AlnMgrMakeMultipleByScoreExEx
183 *
184 * Revision 6.132 2000/08/10 19:09:37 wheelan
185 * bug fixes in AlnMgrMakeMultipleByScoreExEx
186 *
187 * Revision 6.131 2000/07/27 19:38:40 hurwitz
188 * fixes split block bug
189 *
190 * Revision 6.130 2000/07/26 17:26:25 lewisg
191 * fix code for c++ inclusion
192 *
193 * Revision 6.129 2000/07/26 16:48:48 sicotte
194 * Fix bug and Memory leaks in AlnMgrGetSubAlign wrt SeqIds
195 *
196 * Revision 6.128 2000/07/26 14:58:13 sicotte
197 * bug fixes to AlnMgrGetNextAlnBit. bug fix (overlapping fuzz) in AlnMgrMakeMultipleByScore, Added AlnMgrMakeMultipleByScoreExEx and AlnMgrRemoveInconsistentEx and AlnMgrDeleteHiddenEx to allow optional deletion of sealigns when converting indexes to seqaligns
198 *
199 * Revision 6.127 2000/07/25 18:55:53 sicotte
200 * Added AlnMgrDeleteHiddenEx and AlnMgrRemoveInconsistentFromPairwiseSetEx to make optional deleting of SeqAligns. Needed for Sequence Update
201 *
202 * Revision 6.126 2000/07/24 19:07:56 sicotte
203 * Fix Master-Slave bugs in AlnMgrMakeFakeMultiple and alignment coordinate bugs in AlnMgrGetNextAlnBit
204 *
205 * Revision 6.125 2000/07/21 21:36:20 sicotte
206 * fix bug for sequence update in sequin when the alignment was two
207 * discontinous seqaligns. Fixed AlnMgrMakeFakeMultiple.
208 *
209 * Revision 6.124 2000/07/21 21:07:43 hurwitz
210 * bug fix when deleting last block and block preceeding it has just one aligned column
211 *
212 * Revision 6.123 2000/07/20 22:27:41 hurwitz
213 * working on bug fixes
214 *
215 * Revision 6.122 2000/06/29 23:15:13 hurwitz
216 * leave single space between aligned blocks with no unaligned sequence between them, no auto-merge of adjacent aligned blocks
217 *
218 * Revision 6.121 2000/06/15 14:15:45 wheelan
219 * alignmgr.c
220 *
221 * Revision 6.120 2000/06/02 18:37:45 wheelan
222 * bug fix in am_is_consistent (for editing)
223 *
224 * Revision 6.119 2000/06/01 17:37:46 wheelan
225 * various bug fixes
226 *
227 * Revision 6.118 2000/06/01 14:18:10 wheelan
228 * added AlnMgrCheckOrdered and AlnMgrMakeRowsForOrdered
229 *
230 * Revision 6.117 2000/05/24 15:46:53 wheelan
231 * added AlnMgrRemoveInconsistentFromPairwiseSet and AlnMgrSortAlnSetByNthRowPos
232 *
233 * Revision 6.116 2000/05/23 22:00:14 hurwitz
234 * working on launch of DDE from DDV
235 *
236 * Revision 6.115 2000/05/19 17:52:07 wheelan
237 * fixed incorrect strands in AlnMgrGetSubAlign
238 *
239 * Revision 6.114 2000/05/18 20:54:32 wheelan
240 * bug fix in AlnMgrIsEditable
241 *
242 * Revision 6.113 2000/05/18 11:29:19 wheelan
243 * finished AlnMgrIsIBMable and AlnMgrIsEditable
244 *
245 * Revision 6.112 2000/05/16 17:14:46 wheelan
246 * added AlnMgrIsIBMable, AlnMgrIsEditable; made am_guess_numrows extern
247 *
248 * Revision 6.111 2000/05/15 13:12:21 wheelan
249 * fixes to AlnMgrAddBlock to allow creation of a new block in the tail of an alignment with only one block
250 *
251 * Revision 6.110 2000/05/14 22:28:32 wheelan
252 * added am_is_new_row to fix row numbering problems in IntersectOnMaster function
253 *
254 * Revision 6.109 2000/05/10 16:46:48 wheelan
255 * bug fix in IntersectByMaster
256 *
257 * Revision 6.108 2000/05/10 15:40:24 wheelan
258 * bug fixes in IntersectOnMaster
259 *
260 * Revision 6.107 2000/05/10 13:09:36 wheelan
261 * bug fix in am_is_consistent; added am_is_ok_block to check newly edited blocks
262 *
263 * Revision 6.106 2000/05/09 18:42:49 wheelan
264 * fixes for editing
265 *
266 * Revision 6.105 2000/05/09 14:23:00 wheelan
267 * added AlnMgrMakeMultipleByScoreEx
268 *
269 * Revision 6.104 2000/05/08 13:17:05 wheelan
270 * added AlnMgrGetNumAlnBlocks and AlnMgrGetNthBlockRange; fixed memory leaks
271 *
272 * Revision 6.103 2000/05/05 12:48:12 wheelan
273 * fixed crash when deleting last block of an alignment
274 *
275 * Revision 6.102 2000/05/05 11:53:39 wheelan
276 * bug fix in AlnMgrMapBioseqToSeqAlign
277 *
278 * Revision 6.101 2000/05/04 14:07:45 wheelan
279 * several changes to correctly merge blocks when edits remove an unaligned region
280 *
281 * Revision 6.100 2000/05/03 19:30:37 wheelan
282 * fixed bugs in NULL alignment handling
283 *
284 * Revision 6.99 2000/05/02 19:50:38 hurwitz
285 * fixed some bugs with launching DDE from DDV, added new alnMgr fn for positioning DDE on proper column
286 *
287 * Revision 6.98 2000/05/02 12:00:03 wheelan
288 * added SASeqDatFree and fixed more memory leaks
289 *
290 * Revision 6.97 2000/05/01 19:54:51 wheelan
291 * fixed memory leaks
292 *
293 * Revision 6.96 2000/05/01 13:58:17 wheelan
294 * fixed am_is_consistent to recognize row rearrangments
295 *
296 * Revision 6.95 2000/05/01 12:12:17 wheelan
297 * fixes in AlnMgrMapBioseqToSeqAlign
298 *
299 * Revision 6.94 2000/04/26 21:53:21 hurwitz
300 * added save function to tell AlnMgr about edits made in DDE
301 *
302 * Revision 6.93 2000/04/22 15:54:09 wheelan
303 * added AlnMgrIndexLite; several assorted bug fixes
304 *
305 * Revision 6.92 2000/04/17 17:03:33 wheelan
306 * fixes in AlnMgrNeatlyIndex and AlnMgrIntersectByMaster
307 *
308 * Revision 6.91 2000/04/10 19:35:15 wheelan
309 * added AlnMgrIsSAPNULL, bug fixes in AlnMgrMakeFakeMultiple, added ability to deal with NULL alignments, completed AlnMgrReplaceBlock and AlnMgrAddBlock
310 *
311 * Revision 6.90 2000/04/07 13:21:17 wheelan
312 * bug fixes in MapBioseqToAlnCoords and free functions
313 *
314 * Revision 6.89 2000/04/05 17:41:29 wheelan
315 * added AlnMgrAddBlock, AlnMgrReplaceBlock, and fixed AlnMgrGetSubAlignSpecial
316 *
317 * Revision 6.88 2000/04/04 13:39:14 wheelan
318 * fixed bug in mapping bioseq coords for segmented alignments
319 *
320 * Revision 6.87 2000/04/03 17:20:22 wheelan
321 * finished AlnMgrGetSubAlignSpecial, bug fix in AlnMgrGetNextAlnBit
322 *
323 * Revision 6.86 2000/04/03 12:50:31 wheelan
324 * bug fixes for partial alignments
325 *
326 * Revision 6.85 2000/03/17 14:25:24 wheelan
327 * changes to AlnMgrGetSubAlign
328 *
329 * Revision 6.84 2000/03/16 19:53:32 wheelan
330 * fixed bug which allowed all-gap columns after AlnMgrForceMasterSlave
331 *
332 * Revision 6.83 2000/03/16 15:07:15 wheelan
333 * bug fix in AlnMgrTruncateByOverlap
334 *
335 * Revision 6.82 2000/03/15 20:40:19 lewisg
336 * bug fixes for AlnMgrCarefulIndex
337 *
338 * Revision 6.81 2000/03/10 18:47:01 lewisg
339 * add show/hide
340 *
341 * Revision 6.80 2000/03/09 20:24:20 wheelan
342 * bug fixes in AlnMgrSetUnalignedLengths and IBM
343 *
344 * Revision 6.79 2000/03/07 18:32:22 wheelan
345 * miscellaneous bug fixes
346 *
347 * Revision 6.78 2000/03/03 19:58:35 wheelan
348 * added AlnMgrDupTopNByScore
349 *
350 * Revision 6.77 2000/03/02 20:00:33 wheelan
351 * bug fixes to more gracefully handle a sequence aligned with itself
352 *
353 * Revision 6.76 2000/02/29 18:02:34 wheelan
354 * added AlnMgrMergeNeighbors to get rid of unaligned regions of length 0 after intersection; bug fixes in copy functions
355 *
356 * Revision 6.75 2000/02/28 17:18:14 wheelan
357 * Added AlnMgrTossNeatRows for Cn3D
358 *
359 * Revision 6.74 2000/02/28 14:49:44 wheelan
360 * added AlnMgrSetUnalignedLengths
361 *
362 * Revision 6.73 2000/02/24 18:07:33 wheelan
363 * bug fixes in alignment truncation function
364 *
365 * Revision 6.72 2000/02/23 20:06:05 thiessen
366 * added missing pointer assignment
367 *
368 * Revision 6.71 2000/02/23 18:45:17 wheelan
369 * added AlnMgrNeatlyIndex for structure alignments, added more careful row indexing, finished AlnMgrMapBioseqToSeqAlign function
370 *
371 * Revision 6.70 2000/02/16 15:47:57 wheelan
372 * changed behavior of AlnMgrMakeMultByIntersectOnMaster
373 *
374 * Revision 6.69 2000/02/11 17:30:50 kans
375 * AlnMgrForcePairwiseContinuous moved to tools/actutils (SW)
376 *
377 * Revision 6.68 2000/02/10 19:13:13 wheelan
378 * bug fixes for IntersectOnMaster
379 *
380 * Revision 6.67 2000/02/10 15:20:17 lewisg
381 * sarah's fixes for < 0 indices
382 *
383 * Revision 6.66 2000/02/09 20:23:03 wheelan
384 * finished AlnMgrDeleteNthRow
385 *
386 * Revision 6.65 2000/02/07 16:15:50 wheelan
387 * added AlnMgrTruncateSAP and several helper functions
388 *
389 * Revision 6.64 2000/02/04 22:57:01 kans
390 * changed BioseqUnlockById to BioseqUnlock to avoid scoping problem
391 *
392 * Revision 6.63 2000/02/02 17:55:50 wheelan
393 * bug fixes
394 *
395 * Revision 6.62 2000/02/02 14:37:30 wheelan
396 * added AlnMgrGetNthAlignedSegInNthRow and AlnMgrGetNthSegmentRange to make alignment editing easier
397 *
398 * Revision 6.60 2000/02/01 13:14:24 wheelan
399 * took out debugging AsnWrite, bug fixes in AlnMgrGetNextAlnBit and GetNthUnaligned
400 *
401 * Revision 6.59 2000/01/31 21:00:53 kans
402 * changes to AlnMgrForcePairwiseContinuous and AlnMgrGetNthSeqRangeInSA to support Update Sequence with very long sequences in Sequin (SW)
403 *
404 * Revision 6.58 2000/01/31 16:08:33 wheelan
405 * added unpacking functions, and AlnMgrMakeMultByIntersectOnMaster (does not work yet)
406 *
407 * Revision 6.57 2000/01/29 14:03:15 wheelan
408 * added AlnMgrDeleteHidden and AlnMgrForceContinuous (uses bandalign) plus a couple utilities for these, plus many bug fixes
409 *
410 * Revision 6.56 2000/01/19 15:45:09 wheelan
411 * many, many bug fixes in AlnMgrGetSubAlign and AlnMgrGetNextAlnBit
412 *
413 * Revision 6.55 2000/01/14 18:50:36 wheelan
414 * fixed bug in AlnMgrGetSubAlign
415 *
416 * Revision 6.54 2000/01/12 17:43:19 wheelan
417 * added AlnMgrGetNumSegments, AlnMgrDeleteRow
418 *
419 * Revision 6.53 1999/12/02 20:31:59 lewisg
420 * put seqentries into bioseqset and fix calling convention in alignmgr.c
421 *
422 * Revision 6.52 1999/11/30 14:36:39 wheelan
423 * added AlnMgrMakeMultipleByScore; bug fixes
424 *
425 * Revision 6.51 1999/11/26 15:42:19 vakatov
426 * Fixed for the C++ and/or MSVC DLL compilation
427 *
428 * Revision 6.50 1999/11/24 11:29:52 wheelan
429 * added missing return values
430 *
431 * Revision 6.49 1999/11/18 19:30:33 wheelan
432 * added AlnMgrDeleteChildByPointer, bug fixes
433 *
434 * Revision 6.48 1999/11/03 12:47:05 wheelan
435 * added code to correctly handle internal gaps in segmented master-slave alignments
436 *
437 * Revision 6.47 1999/11/02 12:38:38 wheelan
438 * bug fixes when only one child
439 *
440 * Revision 6.46 1999/10/25 18:17:23 wheelan
441 * Added AlnMgrGetUniqueSeqs, fixed merge function to handle single child seqalign correctly
442 *
443 * Revision 6.45 1999/10/19 19:27:03 wheelan
444 * added static defines; changed behavior of AlnMgrGetNextNthSeqRange; rewrote AlnMgrMakeSegmentedMasterSlave to handle more cases
445 *
446 * Revision 6.44 1999/10/15 21:51:02 durand
447 * add AlnMgrIsSAPDiscAli()
448 *
449 * Revision 6.43 1999/10/15 18:19:05 wheelan
450 * added rudimentary ability to default to master-slave type if possible
451 *
452 * Revision 6.42 1999/10/15 13:48:47 wheelan
453 * added AlnMgrGetNthRowTail, extended capability of AlnMgrGetNthStrand
454 *
455 * Revision 6.41 1999/10/14 16:10:30 kans
456 * new includes and prototypes added
457 *
458 * Revision 6.40 1999/10/13 19:29:03 wheelan
459 * added speedup for segmented master-slave creation
460 *
461 * Revision 6.39 1999/10/07 13:37:16 wheelan
462 * added AlnMgrIndexSingleSeqAlign, which only indexes the first seqalign in a list; also added automatic computation of max length of unaligned regions for time savings
463 *
464 * Revision 6.38 1999/10/06 19:35:09 wheelan
465 * added several viewer and editor management functions; fixed many bugs in AlnMgrGetNextAlnBit
466 *
467 * Revision 6.37 1999/10/05 15:15:31 wheelan
468 * added AlnMgrGetNthUnalignedForNthRow
469 *
470 * Revision 6.36 1999/10/05 14:02:31 wheelan
471 * bug fixes in AlnMgrGetNextAlnBit
472 *
473 * Revision 6.35 1999/10/04 14:58:08 wheelan
474 * bug fixes; added AlnMgrMapBioseqToSeqAlign
475 *
476 * Revision 6.34 1999/09/24 15:04:55 lewisg
477 * AlnMgrGetNextAlnBit: amp->to_m changed when calling child
478 *
479 * Revision 6.33 1999/09/24 14:29:58 wheelan
480 * changed behavior of AlnMgrGetNextLengthBit to mimic other GetNext functions, completed functionality of AlnMgrGetSubAlign, bug fixes
481 *
482 * Revision 6.32 1999/09/23 16:03:32 wheelan
483 * Added structures and functions to support segmented master-slave alignments
484 *
485 * Revision 6.31 1999/09/22 13:19:15 wheelan
486 * made AlnMsg row_num field 1-based, added AlnMgrGetNextNthSeqRange, started adding functions to handle a segmented master-slave alignment
487 *
488 * Revision 6.30 1999/09/21 19:15:28 wheelan
489 * changed AlnMgrGetNextAlnBit to return FALSE if called once more past the end; various bug fixes; implemented part of AlnMgrGetSubAlign
490 *
491 * Revision 6.29 1999/09/20 12:12:58 wheelan
492 * added safety checks in case input seqalign has no strand or score information
493 *
494 * Revision 6.28 1999/09/20 11:58:52 wheelan
495 * modified AlnMgrGetNthSeqRange to use new row information structures
496 *
497 * Revision 6.27 1999/09/17 16:55:33 wheelan
498 * bug fixes, added AlnMgrPropagateSeqIdsBySapList to correctly associate seqids with rows
499 *
500 * Revision 6.26 1999/09/14 15:48:50 kans
501 * AlnMgrMapRowCoords returns -1 on failure at end of function
502 *
503 * Revision 6.25 1999/09/13 19:57:10 sicotte
504 * Make AlnMgrMapBsqCoord work for continous alignments
505 *
506 * Revision 6.24 1999/09/13 19:43:09 sicotte
507 * bug fixes
508 *
509 * Revision 6.23 1999/09/13 14:33:24 wheelan
510 * added support for row numbers in AlnMgrGetNextAlnBit
511 *
512 * Revision 6.22 1999/09/08 13:36:16 wheelan
513 * fixed bugs found by Patrick Durand
514 *
515 * Revision 6.21 1999/09/08 11:55:35 sicotte
516 * fix bug that was missing end segments
517 *
518 * Revision 6.20 1999/09/08 11:49:13 wheelan
519 * added capability to return length of unaligned regions
520 *
521 * Revision 6.19 1999/09/07 12:11:17 wheelan
522 * fixed bugs pointed out by Hugues
523 *
524 * Revision 6.18 1999/09/06 16:37:44 wheelan
525 * added AlnMgrGetNextLengthBit and associated function
526 *
527 * Revision 6.17 1999/09/06 15:55:55 wheelan
528 * IndexSeqAlign now makes the fake multiple if possible
529 *
530 * Revision 6.16 1999/09/06 15:52:25 wheelan
531 * added row management functions, made most functions minus-strand compliant, added smarter test for master-slave vs partial
532 *
533 * Revision 6.15 1999/09/01 20:11:56 wheelan
534 * added new merge function and the typedef for the structure it uses
535 *
536 * Revision 6.14 1999/09/01 14:40:06 wheelan
537 * added AlnMgrGetStrand, fixed bugs in GetNextAlnBit, added more cases to AlnMgrIndexSeqAlign
538 *
539 * Revision 6.13 1999/08/30 19:28:06 wheelan
540 * modified AlnMgrGetNextAlnBit to handle master-slave alignments
541 *
542 * Revision 6.12 1999/08/26 20:35:21 wheelan
543 * added parent indexing and pairwise-to-multiple functions
544 *
545 * Revision 6.11 1999/08/20 11:23:53 wheelan
546 * fixed AlnMgrGetNthSeqRange for minus strands
547 *
548 * Revision 6.10 1999/08/19 19:30:26 wheelan
549 * made case for SAT_PARTIAL in AlnMgrGetNextAlnBit
550 *
551 * Revision 6.9 1999/08/19 17:24:50 wheelan
552 * changed AMAlignIndex structure, added more api functions
553 *
554 * Revision 6.8 1999/08/12 20:56:56 vakatov
555 * [WIN32] Added missed LIBCALLBACK
556 *
557 * Revision 6.7 1999/08/12 12:41:53 wheelan
558 * added comments, and functions to index the parent
559 *
560 * Revision 6.6 1999/08/06 18:31:19 wheelan
561 * fixed compiler error
562 *
563 * Revision 6.5 1999/08/06 16:38:43 kans
564 * fixed Mac compiler complaints
565 *
566 * Revision 6.4 1999/08/06 13:44:14 wheelan
567 * added several functions; changed all function names to AlnMgr..
568 *
569 * Revision 6.3 1999/07/30 14:17:52 wheelan
570 * fixes to keep Mac compiler happy
571 *
572 * Revision 6.2 1999/07/30 14:08:37 wheelan
573 * added api functions to access indexes
574 *
575 * Revision 6.1 1999/07/29 12:56:25 wheelan
576 * initial checkin
577 *
578
579 * ==========================================================================
580 */
581
582
583
584 #include <alignmgr.h>
585 #include <needleman.h>
586 #include <stdlib.h>
587
588 /***************************************************************************
589 *
590 * static functions
591 *
592 ***************************************************************************/
593 static void heapsort_with_userdata (VoidPtr b, size_t nel, size_t width, int (LIBCALLBACK *compar)PROTO((VoidPtr, VoidPtr, VoidPtr)), VoidPtr userdata);
594 static void heapify_with_userdata(CharPtr base0, CharPtr base, CharPtr lim, CharPtr last, size_t width, int(LIBCALLBACK *compar)PROTO((VoidPtr, VoidPtr, VoidPtr)), VoidPtr userdata);
595 static void AlnMgrSetUnalignedLengths(SeqAlignPtr sap);
596 static Boolean am_get_nth_range_for_partial(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop, Int4Ptr where, BoolPtr is_aligned, Boolean unaligned);
597 static AMmsmsPtr am_sort_ammsms(AMmsmsPtr ams_head, Int4 n);
598 static AMmsmsPtr am_sort_masterams(AMmsmsPtr ams_head, Int4 n);
599 static Int4 am_get_first_rsp_for_sip(SeqIdPtr sip, AMsiplistPtr siplist);
600 static int LIBCALLBACK AMCompareAlignInfoProc(VoidPtr ptr1, VoidPtr ptr2);
601 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2);
602 static Int4 AlnMgrMapSegmentCoords(SeqAlignPtr sap, Uint4 pos, Int4 row, SeqIdPtr master, Int4Ptr len);
603 static void am_set_master(SeqAlignPtr sap, SeqIdPtr sip);
604 static SeqIdPtr am_find_master(SeqAlignPtr sap);
605 static AMmsmsPtr AlnMgrFindOverlapOnMaster(SeqAlignPtr sap);
606 static void AlnMgrMergeNeighbors(SeqAlignPtr salp);
607 static Boolean am_is_new_row(SeqIdPtr sip1, SeqIdPtr sip2);
608 static Int4Ptr am_save_rowinfo(SeqAlignPtr sap, Int4 numrows);
609 static void am_set_rows(AMmsmsPtr ams, Int4Ptr rowarray, Int4 numrows);
610 static AMmsmsPtr am_create_overlap(SeqAlignPtr sap);
611 static void am_densediag_reverse(DenseDiagPtr ddp);
612 static AMmsmsPtr AlnMgrTruncateByOverlap(SeqAlignPtr sap, AMmsmsPtr ams_head);
613 static void am_compare_alignids(AMmsmsPtr ams_prev, AMmsmsPtr ams);
614 static Boolean AlnMgrCarefulIndex(SeqAlignPtr sap, AMmsmsPtr ams_head, AMmsmsPtr *ams_mhead, Boolean allinblock, Int4 numrows);
615 static Boolean am_make_null_alignment(SeqAlignPtr sap);
616 static void am_trim_master(AMmsmsPtr PNTR ams_mhead, AMmsmsPtr ams_head, Int4 numrows);
617 static Boolean am_check_gaps(SeqAlignPtr sap);
618 static void am_fix_empty_columns(SeqAlignPtr sap);
619 static Int4 am_translate_row_num (AMAlignIndexPtr amaip, Int4 n, Int4 row);
620 static Boolean am_is_consistent(SeqAlignPtr sap, SeqAlignPtr sap_new, Int4Ptr block_num);
621 static Boolean am_is_ok_block(DenseSegPtr dsp);
622 static void am_do_merge (AMAlignIndexPtr amaip, Int4 left, Int4 right);
623 static Boolean am_merge_after_edit (SeqAlignPtr sap);
624 static Boolean am_same_ids(SeqIdPtr sip1, SeqIdPtr sip2);
625 static AMmsmsPtr AlnMgrConstructOverlaps(AMmsmsPtr ams_head);
626 static Boolean AlnMgrJaggedIndex(SeqAlignPtr sap, AMmsmsPtr ams_head, AMmsmsPtr *ams_mhead, Int4 numrows);
627 static Boolean AlnMgrMakeFakeMultipleEx(SeqAlignPtr sap, Boolean forcestraightms);
628
629
630
631
632
633 /*******************************************************************
634 *
635 * all the memory allocation/deallocation functions
636 *
637 *******************************************************************/
638
SeqAlignIndexNew(void)639 NLM_EXTERN SeqAlignIndexPtr SeqAlignIndexNew(void)
640 {
641 return (SeqAlignIndexPtr)(MemNew(sizeof(SeqAlignIndex)));
642 }
643
SAIndexFreeFunc(VoidPtr index)644 static Boolean LIBCALLBACK SAIndexFreeFunc(VoidPtr index)
645 {
646 return SAIndexFree(index);
647 }
648
SAIndexNew(void)649 NLM_EXTERN SAIndexPtr SAIndexNew(void)
650 {
651 SAIndexPtr saip;
652
653 saip = (SAIndexPtr)MemNew(sizeof(SAIndex));
654 saip->master = -1;
655 saip->freefunc = (SeqAlignIndexFreeFunc)(SAIndexFreeFunc);
656 return saip;
657 }
658
SAIndexFree(VoidPtr index)659 NLM_EXTERN Boolean SAIndexFree(VoidPtr index)
660 {
661 Int4 i;
662 Boolean retval;
663 SAIndexPtr saip;
664
665 retval = FALSE;
666 if (!index)
667 return retval;
668 saip = (SAIndexPtr)index;
669 if (saip->indextype != INDEX_SEGS)
670 return retval;
671 MemFree(saip->aligncoords);
672 for (i=0; i<saip->numseqs; i++)
673 {
674 SASeqDatFree(saip->ssdp[i]);
675 }
676 MemFree(saip->ssdp);
677 MemFree(saip);
678 retval = TRUE;
679 return retval;
680 }
681
SASeqDatNew(void)682 NLM_EXTERN SASeqDatPtr SASeqDatNew(void)
683 {
684 return (SASeqDatPtr)(MemNew(sizeof(SASeqDat)));
685 }
686
SASeqDatFree(SASeqDatPtr ssdp)687 NLM_EXTERN void SASeqDatFree(SASeqDatPtr ssdp)
688 {
689 if (ssdp == NULL)
690 return;
691 if (ssdp->sect != NULL)
692 MemFree(ssdp->sect);
693 if (ssdp->unsect != NULL)
694 MemFree(ssdp->unsect);
695 MemFree(ssdp);
696 }
697
RowSourceNew(void)698 NLM_EXTERN RowSourcePtr RowSourceNew(void)
699 {
700 return (RowSourcePtr)(MemNew(sizeof(RowSource)));
701 }
702
RowSourceFree(RowSourcePtr rsp)703 NLM_EXTERN RowSourcePtr RowSourceFree(RowSourcePtr rsp)
704 {
705 if (rsp == NULL)
706 return NULL;
707 rsp->id = SeqIdSetFree(rsp->id);
708 MemFree(rsp->which_saps);
709 MemFree(rsp->num_in_sap);
710 MemFree(rsp);
711 return NULL;
712 }
713
AMAlignIndexFreeFunc(VoidPtr data)714 static Boolean LIBCALLBACK AMAlignIndexFreeFunc (VoidPtr data)
715 {
716 return AMAlignIndexFree(data);
717 }
718
719
AMAlignIndexNew(void)720 NLM_EXTERN AMAlignIndexPtr AMAlignIndexNew(void)
721 {
722 AMAlignIndexPtr amaip;
723
724 amaip = (AMAlignIndexPtr)MemNew(sizeof(AMAlignIndex));
725 amaip->freefunc = (SeqAlignIndexFreeFunc)(AMAlignIndexFreeFunc);
726 amaip->master = -2;
727 amaip->indextype = INDEX_PARENT;
728 return amaip;
729 }
730
AMAlignIndexFree(VoidPtr index)731 NLM_EXTERN Boolean AMAlignIndexFree(VoidPtr index)
732 {
733 AMAlignIndexPtr amaip;
734 Int4 i;
735 Boolean retval;
736
737 retval = FALSE;
738 amaip = (AMAlignIndexPtr)(index);
739 if (!amaip)
740 return retval;
741 if (amaip->indextype != INDEX_PARENT)
742 return retval;
743 if (amaip->mstype == AM_NEATINDEX)
744 {
745 MemFree(amaip->saps);
746 MemFree(amaip);
747 return TRUE;
748 }
749 amaip->ids = SeqIdSetFree(amaip->ids);
750 for (i=0; i<(amaip->numbsqs); i++)
751 {
752 amaip->amadp[i] = AMAlignDatFree(amaip->amadp[i]);
753 }
754 if (amaip->saps != NULL)
755 MemFree(amaip->saps);
756 if (amaip->amadp != NULL)
757 MemFree(amaip->amadp);
758 if (amaip->aligncoords != NULL)
759 MemFree(amaip->aligncoords);
760 if (amaip->lens != NULL)
761 MemFree(amaip->lens);
762 if (amaip->ulens != NULL)
763 MemFree(amaip->ulens);
764 if (amaip->starts != NULL)
765 MemFree(amaip->starts);
766 if (amaip->rowsource != NULL)
767 {
768 for (i=0; i<(amaip->numrows); i++)
769 {
770 amaip->rowsource[i] = RowSourceFree(amaip->rowsource[i]);
771 }
772 MemFree(amaip->rowsource);
773 }
774 MemFree(amaip);
775 retval = TRUE;
776 return retval;
777 }
778
AMAlignDatNew(void)779 NLM_EXTERN AMAlignDatPtr AMAlignDatNew(void)
780 {
781 return (AMAlignDatPtr)(MemNew(sizeof(AMAlignDat)));
782 }
783
AMAlignDatFree(AMAlignDatPtr amadp)784 NLM_EXTERN AMAlignDatPtr AMAlignDatFree(AMAlignDatPtr amadp)
785 {
786 if (amadp == NULL)
787 return NULL;
788 SeqIdFree(amadp->sip);
789 MemFree(amadp->saps);
790 MemFree(amadp->segments);
791 MemFree(amadp);
792 return NULL;
793 }
794
AMFreeAllIndexes(SeqAlignPtr sap)795 NLM_EXTERN void AMFreeAllIndexes(SeqAlignPtr sap)
796 {
797 SeqAlignPtr salp;
798
799 if (sap->saip->indextype == INDEX_PARENT)
800 {
801 salp = (SeqAlignPtr)(sap->segs);
802 while (salp != NULL)
803 {
804 SAIndexFree((Pointer)(salp->saip));
805 salp->saip = NULL;
806 salp = salp->next;
807 }
808 AMAlignIndexFree((Pointer)(sap->saip));
809 sap->saip = NULL;
810 } else
811 {
812 while (sap != NULL)
813 {
814 if (sap->saip != NULL)
815 {
816 if (sap->saip->freefunc != NULL)
817 {
818 (sap->saip->freefunc) (sap->saip);
819 }
820 else
821 {
822 SAIndexFree((Pointer)(sap->saip));
823 }
824 sap->saip = NULL;
825 }
826 sap = sap->next;
827 }
828 }
829 }
830
AlnMsgNew(void)831 NLM_EXTERN AlnMsgPtr AlnMsgNew(void)
832 {
833 AlnMsgPtr amp;
834
835 amp = (AlnMsgPtr)MemNew(sizeof(AlnMsg));
836 amp->to_m = -1;
837 amp->send_space = FALSE;
838 amp->row_num = -1;
839 amp->prev = -2;
840 amp->prev_sap = -2;
841 amp->place = 0;
842 amp->flag = FALSE;
843 amp->which_bsq = NULL;
844 return amp;
845 }
846
AlnMsgFree(AlnMsgPtr amp)847 NLM_EXTERN AlnMsgPtr AlnMsgFree(AlnMsgPtr amp)
848 {
849 return ((AlnMsgPtr)MemFree(amp));
850 }
851
AlnMsgReNew(AlnMsgPtr amp)852 NLM_EXTERN AlnMsgPtr AlnMsgReNew(AlnMsgPtr amp)
853 {
854 amp->from_m = 0;
855 amp->to_m = -1;
856 amp->send_space = FALSE;
857 amp->row_num = -1;
858 amp->prev = -2;
859 amp->prev_sap = -2;
860 amp->place = 0;
861 amp->flag = FALSE;
862 amp->which_bsq = NULL;
863 return amp;
864 }
865
866 /********************************************************************************
867 *
868 * AlnMgrIndexSingleSeqAlign indexes (in place) only the first seqalign or
869 * seqalign set in the chain that is passed in. It will extensively
870 * rearrange the first seqalign given.
871 *
872 ********************************************************************************/
AlnMgrIndexSingleSeqAlign(SeqAlignPtr sap)873 NLM_EXTERN Boolean AlnMgrIndexSingleSeqAlign(SeqAlignPtr sap)
874 {
875 SeqAlignPtr sap_next;
876
877 if (sap == NULL)
878 return TRUE;
879 sap_next = NULL;
880 if (sap->next)
881 sap_next = sap->next;
882 sap->next = NULL;
883 AlnMgrIndexSeqAlign(sap);
884 sap->next = sap_next;
885 if (sap->saip)
886 return TRUE;
887 else
888 return FALSE;
889 }
890
AlnMgrIndexSingleChildSeqAlign(SeqAlignPtr sap)891 NLM_EXTERN Boolean AlnMgrIndexSingleChildSeqAlign(SeqAlignPtr sap)
892 {
893 DenseSegPtr dsp;
894 Int4 i;
895 SeqAlignPtr sap_next;
896
897 if (sap == NULL)
898 return FALSE;
899 if (sap->segtype == SAS_DISC)
900 return FALSE;
901 sap_next = NULL;
902 if (sap->next)
903 sap_next = sap->next;
904 sap->next = NULL;
905 if (sap->saip != NULL)
906 {
907 if (sap->saip->indextype == INDEX_SEGS)
908 SAIndexFree(sap->saip);
909 }
910 if (sap->segtype == SAS_DENSEG)
911 AlnMgrIndexLinkedSegs(sap);
912 else if (sap->segtype == SAS_DENDIAG)
913 AlnMgrIndexSingleSeqAlign(sap);
914 dsp = (DenseSegPtr)(sap->segs);
915 if (dsp->strands == NULL)
916 {
917 dsp->strands = (Uint1Ptr)MemNew((dsp->dim*dsp->numseg)*sizeof(Uint1));
918 for (i=0; i<dsp->dim*dsp->numseg; i++)
919 {
920 dsp->strands[i] = Seq_strand_plus;
921 }
922 }
923 sap->next = sap_next;
924 if (sap->saip)
925 return TRUE;
926 else
927 return FALSE;
928 }
929
930 /********************************************************************************
931 *
932 * AlnMgrReIndexSeqAlign frees the parent indexes, indexes any child
933 * seqaligns that are not indexed (it assumes that any indexed child
934 * seqaligns are correctly indexed), and reindexes the set.
935 *
936 ********************************************************************************/
AlnMgrReIndexSeqAlign(SeqAlignPtr sap)937 NLM_EXTERN Boolean AlnMgrReIndexSeqAlign(SeqAlignPtr sap)
938 {
939 SeqAlignPtr sap_tmp;
940 SeqAlignPtr tmp_next;
941
942 if (sap == NULL)
943 return FALSE;
944 if (sap->segtype != SAS_DISC) /* we don't know what we're dealing with */
945 return FALSE;
946 if (!AMAlignIndexFree((Pointer)sap->saip))
947 return FALSE;
948 sap->saip = NULL;
949 sap_tmp = (SeqAlignPtr)sap->segs;
950 while (sap_tmp)
951 {
952 if (sap_tmp->saip == NULL)
953 {
954 tmp_next = sap_tmp->next;
955 sap_tmp->next = NULL;
956 if (!AlnMgrIndexLinkedSegs(sap_tmp))
957 return FALSE;
958 sap_tmp->next = tmp_next;
959 }
960 sap_tmp = sap_tmp->next;
961 }
962 if (!AlnMgrIndexParentSA(sap))
963 return FALSE;
964 if (!AlnMgrMakeFakeMultiple(sap))
965 return FALSE;
966 return TRUE;
967 }
968
969 /********************************************************************************
970 *
971 * AlnMgrIndexSeqAlign indexes (in place) the ENTIRE chain of seqaligns
972 * and seqalign sets passed in, and extensively rearranges the seqalign.
973 *
974 ********************************************************************************/
AlnMgrIndexSeqAlign(SeqAlignPtr sap)975 NLM_EXTERN Boolean AlnMgrIndexSeqAlign(SeqAlignPtr sap)
976 {
977 SAIndexPtr saip;
978 SeqAlignPtr salp;
979
980 if (!sap)
981 return FALSE;
982 if (sap->saip != NULL)
983 {
984 return TRUE;
985 }
986 if (!AlnMgrUnpackSeqAlign(sap))
987 return FALSE;
988 if (!AlnMgrRearrangeUnpacked(sap))
989 return FALSE;
990 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)(sap->segs)))
991 return FALSE;
992 if (!AlnMgrIndexParentSA(sap))
993 return FALSE;
994 if (!AlnMgrMakeFakeMultiple(sap))
995 return FALSE;
996 salp = (SeqAlignPtr)(sap->segs);
997 while (salp != NULL)
998 {
999 saip = (SAIndexPtr)(salp->saip);
1000 saip->parent = sap;
1001 salp = salp->next;
1002 }
1003 return TRUE;
1004 }
1005
1006 /***************************************************************************
1007 *
1008 * AlnMgrIndexIndexedChain takes a linked list of indexed seqaligns and
1009 * indexes them as a set.
1010 *
1011 ***************************************************************************/
AlnMgrIndexIndexedChain(SeqAlignPtr sap)1012 NLM_EXTERN SeqAlignPtr AlnMgrIndexIndexedChain(SeqAlignPtr sap)
1013 {
1014 SAIndexPtr saip;
1015 SeqAlignPtr sap_new;
1016
1017 if (sap == NULL || sap->saip == NULL || sap->saip->indextype == INDEX_PARENT)
1018 return NULL;
1019 sap_new = SeqAlignNew();
1020 sap_new->segtype = SAS_DISC;
1021 sap_new->segs = (Pointer)(sap);
1022 if (!AlnMgrIndexParentSA(sap_new))
1023 return NULL;
1024 if (!AlnMgrMakeFakeMultipleEx(sap_new, TRUE))
1025 return NULL;
1026 sap = (SeqAlignPtr)(sap_new->segs);
1027 while (sap != NULL)
1028 {
1029 saip = (SAIndexPtr)(sap->saip);
1030 saip->parent = sap_new;
1031 sap = sap->next;
1032 }
1033 return sap_new;
1034 }
1035
1036
1037
1038 /**********************************************************************
1039 *
1040 * AlnMgrIndexLite disassembles the input alignment, indexes all child
1041 * alignments, and then puts them in the amaip->saps array. It does
1042 * not attempt to create alignment coordinates across the whole set.
1043 * This is useful to keep sets of child alignments together (managing
1044 * BLAST hits, for example) when creating an overall alignment is
1045 * unnecessary. This alignment can be freed normally, but many
1046 * alignmgr functions will not work on the parent alignment (they
1047 * will work on the child alignments).
1048 *
1049 **********************************************************************/
AlnMgrIndexLite(SeqAlignPtr sap)1050 NLM_EXTERN Boolean AlnMgrIndexLite(SeqAlignPtr sap)
1051 {
1052 AMAlignIndexPtr amaip;
1053 Int4 i;
1054 SAIndexPtr saip;
1055 SeqAlignPtr sap_tmp;
1056
1057 if (!sap)
1058 return FALSE;
1059 if (sap->saip != NULL)
1060 {
1061 return TRUE;
1062 }
1063 if (!AlnMgrUnpackSeqAlign(sap))
1064 return FALSE;
1065 if (!AlnMgrRearrangeUnpacked(sap))
1066 return FALSE;
1067 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)(sap->segs)))
1068 return FALSE;
1069 amaip = AMAlignIndexNew();
1070 amaip->mstype = AM_LITE;
1071 i = 0;
1072 sap_tmp = (SeqAlignPtr)(sap->segs);
1073 while (sap_tmp != NULL)
1074 {
1075 sap_tmp = sap_tmp->next;
1076 i++;
1077 }
1078 amaip->saps = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
1079 amaip->numsaps = i;
1080 amaip->parent = sap;
1081 sap_tmp = (SeqAlignPtr)(sap->segs);
1082 for (i=0; i<amaip->numsaps; i++)
1083 {
1084 amaip->saps[i] = sap_tmp;
1085 saip = (SAIndexPtr)(sap_tmp->saip);
1086 saip->parent = sap;
1087 sap_tmp = sap_tmp->next;
1088 }
1089 sap->saip = (Pointer)amaip;
1090 return TRUE;
1091 }
1092
AlnMgrGetParent(SeqAlignPtr sap)1093 NLM_EXTERN SeqAlignPtr AlnMgrGetParent(SeqAlignPtr sap)
1094 {
1095 SAIndexPtr saip;
1096
1097 if (sap->saip->indextype == INDEX_PARENT)
1098 return sap;
1099 saip = (SAIndexPtr)sap->saip;
1100 return saip->parent;
1101 }
1102
1103
1104 /***************************************************************************
1105 *
1106 * AlnMgrUnpackSeqAlign rearranges any seqalign (except alignments with
1107 * more than two levels of nested discontinuous alignments) to a simple
1108 * discontinuous alignment or a linked list of alignments.
1109 *
1110 ***************************************************************************/
AlnMgrUnpackSeqAlign(SeqAlignPtr sap)1111 NLM_EXTERN Boolean AlnMgrUnpackSeqAlign(SeqAlignPtr sap)
1112 {
1113 SeqAlignPtr sap_new;
1114 SeqAlignPtr sap_next;
1115 SeqAlignPtr sap_segs;
1116 SeqAlignPtr sap_segs_head;
1117 SeqAlignPtr sap_segs_prev;
1118
1119 if (sap == NULL)
1120 return FALSE;
1121 if (sap->segtype == SAS_DISC)
1122 {
1123 sap_segs_head = (SeqAlignPtr)(sap->segs);
1124 if (sap_segs_head->segtype == SAS_DISC)
1125 {
1126 sap_segs_prev = (SeqAlignPtr)(sap_segs_head->segs);
1127 sap_segs_head->segs = NULL;
1128 sap_next = sap_segs_head->next;
1129 sap_segs_head->next = NULL;
1130 SeqAlignFree(sap_segs_head);
1131 sap_segs_head = sap_segs_prev;
1132 sap->segs = (Pointer)(sap_segs_head);
1133 while (sap_segs_prev->next)
1134 {
1135 sap_segs_prev = sap_segs_prev->next;
1136 if (sap_segs_prev->segtype == SAS_DISC)
1137 return FALSE;
1138 }
1139 sap_segs_prev->next = sap_next;
1140 sap_segs = sap_next;
1141 } else
1142 sap_segs = sap_segs_head->next;
1143 while (sap_segs)
1144 {
1145 if (sap_segs->segtype == SAS_DISC)
1146 {
1147 sap_next = sap_segs->next;
1148 sap_segs->next = NULL;
1149 sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1150 sap_segs->segs = NULL;
1151 SeqAlignFree(sap_segs);
1152 while (sap_segs_prev->next)
1153 {
1154 sap_segs_prev = sap_segs_prev->next;
1155 if (sap_segs_prev->segtype == SAS_DISC)
1156 return FALSE;
1157 }
1158 sap_segs_prev->next = sap_next;
1159 sap_segs = sap_next;
1160 } else
1161 sap_segs = sap_segs->next;
1162 }
1163 } else
1164 {
1165 sap_new = SeqAlignNew();
1166 sap_new->type = SAT_GLOBAL;
1167 sap_new->segtype = sap->segtype;
1168 sap_new->dim = sap->dim;
1169 sap_new->segs = sap->segs;
1170 sap_new->master = sap->master;
1171 sap_new->bounds = sap->bounds;
1172 sap_new->next = sap->next;
1173 sap_new->score = sap->score;
1174 sap->next = NULL;
1175 sap->segtype = SAS_DISC;
1176 sap->type = 0;
1177 sap->dim = 0;
1178 sap->master = NULL;
1179 sap->bounds = NULL;
1180 sap->score = NULL;
1181 sap->segs = (Pointer)sap_new;
1182 sap_segs_prev = sap_new;
1183 sap_segs = sap_new->next;
1184 while (sap_segs)
1185 {
1186 if (sap_segs->segtype == SAS_DISC)
1187 {
1188 sap_next = sap_segs->next;
1189 sap_segs->next = NULL;
1190 sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1191 sap_segs->segs = NULL;
1192 SeqAlignFree(sap_segs);
1193 while (sap_segs_prev->next)
1194 {
1195 sap_segs_prev = sap_segs_prev->next;
1196 if (sap_segs_prev->segtype == SAS_DISC)
1197 return FALSE;
1198 }
1199 sap_segs_prev->next = sap_next;
1200 sap_segs = sap_next;
1201 } else
1202 sap_segs = sap_segs->next;
1203 }
1204 }
1205 return TRUE;
1206 }
1207
1208 /***************************************************************************
1209 *
1210 * AlnMgrRearrangeUnpacked transforms all child seqaligns into dense-seg
1211 * types, requiring some rearrangement for dense-diag sets. This function
1212 * presumes that AlnMgrUnpackSeqAlign has already been called on the
1213 * alignment.
1214 *
1215 ***************************************************************************/
AlnMgrRearrangeUnpacked(SeqAlignPtr sap)1216 NLM_EXTERN Boolean AlnMgrRearrangeUnpacked(SeqAlignPtr sap)
1217 {
1218 DenseDiagPtr ddp;
1219 DenseDiagPtr ddp_prev;
1220 DenseSegPtr dsp;
1221 Int4 i;
1222 SeqAlignPtr salp;
1223 SeqAlignPtr salp_tmp;
1224 SeqAlignPtr sap_head;
1225 SeqAlignPtr sap_new;
1226 SeqAlignPtr sap_prev;
1227 StdSegPtr ssp;
1228 StdSegPtr ssp_next;
1229
1230 if (sap == NULL || sap->segtype != SAS_DISC)
1231 return FALSE;
1232 salp = (SeqAlignPtr)(sap->segs);
1233 sap_head = sap_prev = NULL;
1234 while (salp)
1235 {
1236 if (salp->segtype < 1)
1237 {
1238 return FALSE;
1239 } else if (salp->segtype == SAS_DENDIAG)
1240 {
1241 ddp = (DenseDiagPtr)salp->segs;
1242 while (ddp)
1243 {
1244 sap_new = SeqAlignNew();
1245 sap_new->type = SAT_GLOBAL;
1246 sap_new->segtype = SAS_DENSEG;
1247 sap_new->dim = ddp->dim;
1248 dsp = DenseSegNew();
1249 dsp->dim = sap_new->dim;
1250 dsp->numseg = 1;
1251 dsp->starts = ddp->starts;
1252 ddp->starts = NULL;
1253 dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
1254 dsp->lens[0] = ddp->len;
1255 ddp->len = 0;
1256 dsp->scores = ddp->scores;
1257 ddp->scores = NULL;
1258 dsp->strands = ddp->strands;
1259 ddp->strands = NULL;
1260 if (dsp->strands == NULL)
1261 {
1262 dsp->strands = (Uint1Ptr)MemNew(dsp->dim * sizeof(Uint1));
1263 for (i=0; i<dsp->dim; i++)
1264 {
1265 dsp->strands[i] = Seq_strand_plus;
1266 }
1267 }
1268 dsp->ids = SeqIdDupList(ddp->id);
1269 sap_new->segs = (Pointer)dsp;
1270 if (dsp->scores)
1271 sap_new->score = ScoreDup(dsp->scores);
1272 if (!sap_head)
1273 {
1274 sap_head = sap_prev = sap_new;
1275 } else
1276 {
1277 sap_prev->next = sap_new;
1278 sap_prev = sap_new;
1279 }
1280 ddp_prev = ddp;
1281 ddp = ddp->next;
1282 DenseDiagFree(ddp_prev);
1283 }
1284 salp_tmp = salp->next;
1285 sap_prev->next = salp_tmp;
1286 salp->next = NULL;
1287 salp->segs = NULL;
1288 SeqAlignFree(salp);
1289 salp = salp_tmp;
1290 } else if (salp->segtype == SAS_DENSEG)
1291 {
1292 if (!sap_head)
1293 sap_head = sap_prev = salp;
1294 else
1295 {
1296 sap_prev->next = salp;
1297 sap_prev = salp;
1298 }
1299 dsp = (DenseSegPtr)salp->segs;
1300 if (dsp->strands == NULL)
1301 {
1302 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)* sizeof(Uint1));
1303 for (i=0; i<(dsp->dim)*(dsp->numseg); i++)
1304 {
1305 dsp->strands[i] = Seq_strand_plus;
1306 }
1307 }
1308 salp = salp->next;
1309 } else if (salp->segtype == SAS_STD)
1310 {
1311 sap_prev = sap_head = NULL;
1312 ssp = (StdSegPtr)salp->segs;
1313 while (ssp)
1314 {
1315 sap_new = SeqAlignNew();
1316 if (sap_head)
1317 {
1318 sap_prev->next = sap_new;
1319 sap_prev = sap_new;
1320 } else
1321 {
1322 sap_head = sap_prev = sap_new;
1323 }
1324 sap_new->segtype = SAS_STD;
1325 sap_new->type = SAT_GLOBAL;
1326 sap_new->segs = (Pointer)ssp;
1327 ssp_next = ssp->next;
1328 ssp->next = NULL;
1329 ssp = ssp_next;
1330 }
1331 salp_tmp = salp->next;
1332 salp->next = NULL;
1333 salp->segs = NULL;
1334 SeqAlignFree(salp);
1335 salp = (Pointer)sap_head;
1336 sap_prev->next = salp_tmp;
1337 salp = salp_tmp;
1338 }
1339 }
1340 sap->segs = (Pointer)sap_head;
1341 return TRUE;
1342 }
1343
1344
1345 /***************************************************************************
1346 *
1347 * AlnMgrAnythingToSeg takes any SeqAlign and does an in-place transformation
1348 * to the parent-child structure. Each dense-seg, dense-diag and std-seg
1349 * is put into its own seqalign, and the child seqaligns are linked
1350 * together in no particular order and put in the sap->segs field of the
1351 * new parent (which takes over the pointer passed in). The parent
1352 * has segtype SAS_DISC, and each child has segtype SAS_DENSEG or SAS_STD.
1353 * Each child, then, is a continuous, nonoverlapping alignment and therefore
1354 * may be indexed.
1355 *
1356 ***************************************************************************/
AlnMgrAnythingToSeg(SeqAlignPtr sap)1357 NLM_EXTERN Boolean AlnMgrAnythingToSeg (SeqAlignPtr sap)
1358 {
1359 DenseDiagPtr ddp;
1360 DenseDiagPtr ddp_prev;
1361 DenseSegPtr dsp;
1362 Int4 i;
1363 Boolean retval;
1364 SeqAlignPtr salp;
1365 SeqAlignPtr salp_tmp;
1366 SeqAlignPtr sap_head;
1367 SeqAlignPtr sap_new;
1368 SeqAlignPtr sap_prev;
1369 StdSegPtr ssp;
1370 StdSegPtr ssp_next;
1371
1372 retval = FALSE;
1373 if (!sap)
1374 return retval;
1375 sap_new = SeqAlignNew();
1376 sap_new->type = SAT_GLOBAL;
1377 sap_new->segtype = sap->segtype;
1378 sap_new->dim = sap->dim;
1379 sap_new->segs = sap->segs;
1380 sap_new->master = sap->master;
1381 sap_new->bounds = sap->bounds;
1382 sap_new->next = sap->next;
1383 sap_new->score = sap->score;
1384 sap->next = NULL;
1385 sap->segtype = SAS_DISC;
1386 sap->type = 0;
1387 sap->dim = 0;
1388 sap->master = NULL;
1389 sap->bounds = NULL;
1390 sap->score = NULL;
1391 salp = sap_new;
1392 sap_head = sap_prev = NULL;
1393 while (salp)
1394 {
1395 if (salp->segtype < 1)
1396 {
1397 return retval;
1398 } else if (salp->segtype == SAS_DENDIAG)
1399 {
1400 ddp = (DenseDiagPtr)salp->segs;
1401 while (ddp)
1402 {
1403 sap_new = SeqAlignNew();
1404 sap_new->type = SAT_GLOBAL;
1405 sap_new->segtype = SAS_DENSEG;
1406 sap_new->dim = ddp->dim;
1407 dsp = DenseSegNew();
1408 dsp->dim = sap_new->dim;
1409 dsp->numseg = 1;
1410 dsp->starts = ddp->starts;
1411 ddp->starts = NULL;
1412 dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
1413 dsp->lens[0] = ddp->len;
1414 ddp->len = 0;
1415 dsp->scores = ddp->scores;
1416 ddp->scores = NULL;
1417 dsp->strands = ddp->strands;
1418 ddp->strands = NULL;
1419 if (dsp->strands == NULL)
1420 {
1421 dsp->strands = (Uint1Ptr)MemNew(dsp->dim * sizeof(Uint1));
1422 for (i=0; i<dsp->dim; i++)
1423 {
1424 dsp->strands[i] = Seq_strand_plus;
1425 }
1426 }
1427 dsp->ids = SeqIdDupList(ddp->id);
1428 sap_new->segs = (Pointer)dsp;
1429 if (dsp->scores)
1430 sap_new->score = ScoreDup(dsp->scores);
1431 if (!sap_head)
1432 {
1433 sap_head = sap_prev = sap_new;
1434 } else
1435 {
1436 sap_prev->next = sap_new;
1437 sap_prev = sap_new;
1438 }
1439 ddp_prev = ddp;
1440 ddp = ddp->next;
1441 DenseDiagFree(ddp_prev);
1442 }
1443 salp_tmp = salp->next;
1444 sap_prev->next = salp_tmp;
1445 salp = salp_tmp;
1446 retval = TRUE;
1447 } else if (salp->segtype == SAS_DENSEG)
1448 {
1449 if (!sap_head)
1450 sap_head = sap_prev = salp;
1451 else
1452 {
1453 sap_prev->next = salp;
1454 sap_prev = salp;
1455 }
1456 dsp = (DenseSegPtr)salp->segs;
1457 if (dsp->strands == NULL)
1458 {
1459 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)* sizeof(Uint1));
1460 for (i=0; i<(dsp->dim)*(dsp->numseg); i++)
1461 {
1462 dsp->strands[i] = Seq_strand_plus;
1463 }
1464 }
1465 salp = salp->next;
1466 retval = TRUE;
1467 } else if (salp->segtype == SAS_STD)
1468 {
1469 sap_prev = sap_head = NULL;
1470 ssp = (StdSegPtr)salp->segs;
1471 while (ssp)
1472 {
1473 sap_new = SeqAlignNew();
1474 if (sap_head)
1475 {
1476 sap_prev->next = sap_new;
1477 sap_prev = sap_new;
1478 } else
1479 {
1480 sap_head = sap_prev = sap_new;
1481 }
1482 sap_new->segtype = SAS_STD;
1483 sap_new->type = SAT_GLOBAL;
1484 sap_new->segs = (Pointer)ssp;
1485 ssp_next = ssp->next;
1486 ssp->next = NULL;
1487 ssp = ssp_next;
1488 }
1489 salp_tmp = salp->next;
1490 salp = (Pointer)sap_head;
1491 sap_prev->next = salp_tmp;
1492 salp = salp_tmp;
1493 }
1494 }
1495 sap->segs = (Pointer)sap_head;
1496 return retval;
1497 }
1498
1499
1500 /***********************************************************************
1501 *
1502 * AlnMgrIndexLinkedSegs and AlnMgrIndexParentSA create and fill in the
1503 * SASeqIndex and AMAlignIndex structures on the children and the parent,
1504 * respectively. IndexLinkedSegs is called on the sap->segs field of
1505 * the parent, so that the pointer of the first child in the list
1506 * gets passed in. AlnMgrIndexParentSA is called on the parent, and
1507 * the children must already be indexed (the function does check) in order
1508 * for it to work. AlnMgrIndexParentSA calls AlnMgrPropagateUpSeqIdPtrs
1509 * to create a list of all SeqIdPtrs present in all the children (each
1510 * is only listed once, in the order that its AMAlignDat structure occurs
1511 * in).
1512 *
1513 ***********************************************************************/
AlnMgrIndexLinkedSegs(SeqAlignPtr sap)1514 NLM_EXTERN Boolean AlnMgrIndexLinkedSegs (SeqAlignPtr sap)
1515 { /* all the Uint2's may have to be changed to Uint4's */
1516 Int4 currseq;
1517 DenseSegPtr dsp;
1518 Uint2 i;
1519 Uint4 qlen;
1520 Boolean retval;
1521 SAIndexPtr saip;
1522 SASeqDatPtr ssdp;
1523
1524 retval = FALSE;
1525 while (sap)
1526 {
1527 if (sap->segtype == SAS_DENSEG)
1528 {
1529 dsp = (DenseSegPtr)sap->segs;
1530 saip = SAIndexNew();
1531 saip->aligncoords = (Uint4Ptr)MemNew((dsp->numseg+1)*sizeof(Uint4));
1532 qlen = 0;
1533 saip->ssdp = (SASeqDatPtr PNTR)MemNew((dsp->dim+1)*sizeof(SASeqDatPtr));
1534 saip->numseqs = dsp->dim+1;
1535 for (i = 0; i<(dsp->dim); i++)
1536 {
1537 ssdp = SASeqDatNew();
1538 saip->ssdp[i] = ssdp;
1539 }
1540 for (i = 0; i<(dsp->numseg); i++)
1541 {
1542 saip->aligncoords[i] = qlen;
1543 qlen += dsp->lens[i];
1544 for (currseq = 0; currseq<(dsp->dim); currseq++)
1545 {
1546 if ((dsp->starts[dsp->dim*i+currseq]) != -1)
1547 {
1548 saip->ssdp[currseq]->numsect++;
1549 }
1550 }
1551 }
1552 for (currseq = 0; currseq<(dsp->dim); currseq++)
1553 {
1554 saip->ssdp[currseq]->sect = (Uint2Ptr)MemNew((saip->ssdp[currseq]->numsect)*sizeof(Uint2));
1555 saip->ssdp[currseq]->unsect = (Uint2Ptr)MemNew((dsp->numseg - saip->ssdp[currseq]->numsect)*sizeof(Uint2));
1556 saip->ssdp[currseq]->numsect = 0;
1557 }
1558 for (i=0; i<(dsp->numseg); i++)
1559 {
1560 for (currseq=0; currseq<(dsp->dim); currseq++)
1561 {
1562 if ((dsp->starts[dsp->dim*i+currseq]) != -1)
1563 {
1564 saip->ssdp[currseq]->sect[saip->ssdp[currseq]->numsect] = i;
1565 saip->ssdp[currseq]->numsect++;
1566 } else
1567 {
1568 saip->ssdp[currseq]->unsect[saip->ssdp[currseq]->numunsect]=i;
1569 saip->ssdp[currseq]->numunsect++;
1570 }
1571 }
1572 }
1573 saip->indextype = INDEX_SEGS;
1574 sap->saip = (SeqAlignIndexPtr)saip;
1575 }
1576 sap = sap->next;
1577 retval = TRUE;
1578 }
1579 return retval;
1580 }
1581
AlnMgrIndexParentSA(SeqAlignPtr sap)1582 NLM_EXTERN Boolean AlnMgrIndexParentSA(SeqAlignPtr sap)
1583 {
1584 AMAlignDatPtr amadp;
1585 AMAlignIndexPtr amaip;
1586 Int4 count;
1587 Boolean done;
1588 Int4 i;
1589 Int4 notfound;
1590 Int4 numsap;
1591 Boolean retval;
1592 SeqAlignPtr salp;
1593 SeqIdPtr sip;
1594
1595 retval = FALSE;
1596 if (!sap)
1597 return retval;
1598 if (sap->segtype != SAS_DISC)
1599 return retval;
1600 if (((SeqAlignPtr)(sap->segs))->saip == NULL)
1601 {
1602 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)(sap->segs)))
1603 return retval;
1604 }
1605 amaip = (AMAlignIndexPtr)sap->saip;
1606 if (amaip)
1607 sap->saip = (Pointer)AMAlignIndexFree(amaip);
1608 sap->saip = NULL;
1609 amaip = AMAlignIndexNew();
1610 count = 0;
1611 amaip->indextype = INDEX_PARENT;
1612 amaip->ids = AlnMgrPropagateUpSeqIdPtrs(sap, &count);
1613 sip = amaip->ids;
1614 amaip->numbsqs = count;
1615 amaip->amadp = (AMAlignDatPtr PNTR)MemNew((count+1)*sizeof(AMAlignDatPtr));
1616 for (count = 0; count < amaip->numbsqs; count++)
1617 {
1618 amadp = AMAlignDatNew();
1619 amaip->amadp[count] = amadp;
1620 numsap = 0;
1621 /*amadp->saps = AlnMgrSortSeqAligns((SeqAlignPtr)sap->segs, AlnMgrCompareIncreasingBySeqIdPtr, sip, &numsap);*/
1622 amadp->saps = AlnMgrSortbyID((SeqAlignPtr)sap->segs, sip, &numsap);
1623 done = FALSE;
1624 notfound = 0;
1625 for (i=0; i<numsap && !done; i++)
1626 {
1627 if (AlnMgrGetNForSip(amadp->saps[i], sip) < 0)
1628 {
1629 notfound++;
1630 } else
1631 {
1632 done = TRUE;
1633 }
1634 }
1635 amadp->numsaps = numsap - notfound;
1636 for (i=0; i<(numsap - notfound); i++)
1637 {
1638 amadp->saps[i] = amadp->saps[i+notfound];
1639 }
1640 for (i=(numsap - notfound); i<numsap; i++)
1641 {
1642 amadp->saps[i] = NULL;
1643 }
1644 amadp->sip = SeqIdDup(sip);
1645 sip = sip->next;
1646 }
1647 i = 0;
1648 salp = (SeqAlignPtr)sap->segs;
1649 while (salp)
1650 {
1651 i++;
1652 salp = salp->next;
1653 }
1654 amaip->numsaps = i;
1655 amaip->parent = sap;
1656 sap->saip = (Pointer)amaip;
1657 retval = TRUE;
1658 return retval;
1659 }
1660
AlnMgrPropagateUpSeqIdPtrs(SeqAlignPtr sap,Int4Ptr num)1661 NLM_EXTERN SeqIdPtr AlnMgrPropagateUpSeqIdPtrs(SeqAlignPtr sap, Int4Ptr num)
1662 {
1663 Int4 count;
1664 DenseSegPtr dsp;
1665 Boolean found;
1666 SeqAlignPtr salp;
1667 SeqIdPtr sip_head;
1668 SeqIdPtr sip_list;
1669 SeqIdPtr sip_tmp;
1670 SeqIdPtr sip_tmp2;
1671
1672 if (!sap)
1673 return NULL;
1674 if (sap->segtype == SAS_DISC)
1675 salp = (SeqAlignPtr)(sap->segs);
1676 else
1677 salp = sap;
1678 count = 0;
1679 sip_list = sip_head = NULL;
1680 while (salp)
1681 {
1682 dsp = (DenseSegPtr)salp->segs;
1683 sip_tmp = dsp->ids;
1684 if (!sip_list)
1685 {
1686 sip_head = sip_list = SeqIdDup(sip_tmp);
1687 sip_tmp = sip_tmp->next;
1688 count++;
1689 }
1690 while (sip_tmp)
1691 {
1692 sip_tmp2 = sip_head;
1693 found = FALSE;
1694 while (sip_tmp2 && !found)
1695 {
1696 if (SeqIdComp(sip_tmp, sip_tmp2) == SIC_YES)
1697 found = TRUE;
1698 sip_tmp2 = sip_tmp2->next;
1699 }
1700 if (!found)
1701 {
1702 sip_list->next = SeqIdDup(sip_tmp);
1703 sip_list = sip_list->next;
1704 sip_list->next = NULL;
1705 count++;
1706 }
1707 sip_tmp = sip_tmp->next;
1708 }
1709 salp = salp->next;
1710 }
1711 if (num)
1712 *num = count;
1713 return sip_head;
1714 }
1715
AlnMgrPropagateSeqIdsBySapList(AMAlignIndexPtr amaip)1716 NLM_EXTERN SeqIdPtr AlnMgrPropagateSeqIdsBySapList(AMAlignIndexPtr amaip)
1717 {
1718 DenseSegPtr dsp;
1719 Int4 i;
1720 Int4 j;
1721 SAIndexPtr saip;
1722 SeqAlignPtr salp;
1723 SeqIdPtr sip;
1724 SeqIdPtr sip_head;
1725 SeqIdPtr sip_tmp;
1726 SeqIdPtr sip_tmp2;
1727
1728 if (amaip == NULL)
1729 return NULL;
1730 if (amaip->saps == NULL)
1731 return NULL;
1732 sip_head = NULL;
1733 for (i=0; i<(amaip->alnsaps); i++)
1734 {
1735 j=1;
1736 salp = amaip->saps[i];
1737 saip = (SAIndexPtr)salp->saip;
1738 dsp = (DenseSegPtr)(salp->segs);
1739 sip_tmp = dsp->ids;
1740 while (j<saip->master)
1741 {
1742 sip_tmp = sip_tmp->next;
1743 j++;
1744 }
1745 if (sip_head == NULL)
1746 sip_head = sip = SeqIdDup(sip_tmp);
1747 sip_tmp = dsp->ids;
1748 j=0;
1749 while (sip_tmp)
1750 {
1751 j++;
1752 if (j!=saip->master)
1753 {
1754 sip_tmp2 = SeqIdDup(sip_tmp);
1755 sip->next = sip_tmp2;
1756 sip = sip->next;
1757 }
1758 sip_tmp = sip_tmp->next;
1759 }
1760 }
1761 return sip_head;
1762 }
1763
AlnMgrPropagateSeqIdsByRow(AMAlignIndexPtr amaip)1764 NLM_EXTERN SeqIdPtr AlnMgrPropagateSeqIdsByRow(AMAlignIndexPtr amaip)
1765 {
1766 Int4 i;
1767 SeqIdPtr sip;
1768 SeqIdPtr sip_head;
1769 SeqIdPtr sip_tmp;
1770
1771 if (amaip->rowsource == NULL)
1772 return NULL;
1773 sip_head = sip = SeqIdDup(amaip->rowsource[0]->id);
1774 for (i=1; i<amaip->numrows; i++)
1775 {
1776 sip_tmp = SeqIdDup(amaip->rowsource[i]->id);
1777 sip->next = sip_tmp;
1778 sip = sip->next;
1779 }
1780 return sip_head;
1781 }
1782
1783 /***************************************************************************
1784 *
1785 * AlnMgrRemoveInconsistentFromPairwiseSet is a greedy function to make
1786 * a consistent (nonoverlapping, linear) subset of alignments from a
1787 * set of pairwise alignments (often BLAST output, gapped or ungapped).
1788 * The input seqalign should either not be indexed or indexed using
1789 * AlnMgrIndexLite (just call it on the BLAST output). fuzz specifies
1790 * how much overlap, if any, is allowed between alignments that are kept (for
1791 * example, if fuzz = 5, any alignments that overlap by 5 or less are
1792 * considered consistent). If fuzz is less than 0, this will force spaces
1793 * between alignments (not sure why someone would want to do that, but
1794 * it is allowed).
1795 *
1796 * The "Ex" version also returns the Discarded SeqAligns separated into two
1797 * lists according to the reason for their rejection.
1798 *
1799 *
1800 ***************************************************************************/
AlnMgrRemoveInconsistentFromPairwiseSet(SeqAlignPtr sap,Int4 fuzz)1801 NLM_EXTERN void AlnMgrRemoveInconsistentFromPairwiseSet(SeqAlignPtr sap, Int4 fuzz) {
1802 AlnMgrRemoveInconsistentFromPairwiseSetEx(sap, fuzz,NULL,NULL,NULL);
1803 }
1804
1805
AlnMgrRemoveInconsistentFromPairwiseSetEx(SeqAlignPtr sap,Int4 fuzz,SeqAlignPtr PNTR wrong_strand,SeqAlignPtr PNTR overlaps_m,SeqAlignPtr PNTR overlaps_s)1806 NLM_EXTERN void AlnMgrRemoveInconsistentFromPairwiseSetEx(SeqAlignPtr sap, Int4 fuzz, SeqAlignPtr PNTR wrong_strand, SeqAlignPtr PNTR overlaps_m,SeqAlignPtr PNTR overlaps_s)
1807 {
1808 AMAlignInfoPtr aip_list;
1809 AMAlignIndexPtr amaip;
1810 FloatHi bit_score;
1811 Boolean conflict;
1812 FloatHi evalue;
1813 Int4 i;
1814 Int4 j;
1815 Int4 number;
1816 SAIndexPtr saip1;
1817 SAIndexPtr saip2;
1818 SeqAlignPtr salp;
1819 Int4 score;
1820 Int4 start1;
1821 Int4 start2;
1822 Int4 startm1;
1823 Int4 startm2;
1824 Int4 stop1;
1825 Int4 stop2;
1826 Int4 stopm1;
1827 Int4 stopm2;
1828 Uint1 strand;
1829 Uint1 strand_curr;
1830 Uint1 conflict_type;
1831 if(wrong_strand)
1832 *wrong_strand = NULL;
1833 if(overlaps_m)
1834 *overlaps_m=NULL;
1835 if(overlaps_s)
1836 *overlaps_s=NULL;
1837 if (sap == NULL || (sap->saip != NULL && sap->saip->indextype != INDEX_PARENT))
1838 return;
1839 if (sap->saip == NULL)
1840 {
1841 if (!AlnMgrIndexLite(sap))
1842 return;
1843 }
1844 amaip = (AMAlignIndexPtr)(sap->saip);
1845 if (amaip->numbsqs > 2)
1846 return;
1847
1848 salp = (SeqAlignPtr)(sap->segs);
1849 aip_list = (AMAlignInfoPtr)MemNew((amaip->numsaps)*sizeof(AMAlignInfo));
1850 for (i=0; i<amaip->numsaps && salp != NULL; i++)
1851 {
1852 aip_list[i].align = salp;
1853 GetScoreAndEvalue(salp, &score, &bit_score, &evalue, &number);
1854 aip_list[i].align_len = score;
1855 salp = salp->next;
1856 }
1857 HeapSort(aip_list, amaip->numsaps, sizeof(AMAlignInfo), AMCompareAlignInfoProc);
1858 saip1 = (SAIndexPtr)aip_list[0].align->saip;
1859 if (saip1 == NULL)
1860 return;
1861 strand = AlnMgrGetNthStrand(aip_list[0].align, 2);
1862 if (strand != Seq_strand_minus)
1863 strand = Seq_strand_plus;
1864 amaip->alnsaps = 0;
1865 for (i=0; i<amaip->numsaps; i++)
1866 {
1867 if ((saip1 = (SAIndexPtr)aip_list[i].align->saip) == NULL)
1868 return;
1869 AlnMgrGetNthSeqRangeInSA(aip_list[i].align, 1, &startm1, &stopm1);
1870 AlnMgrGetNthSeqRangeInSA(aip_list[i].align, 2, &start1, &stop1);
1871 strand_curr = AlnMgrGetNthStrand(aip_list[i].align, 2);
1872 if (strand_curr != Seq_strand_minus)
1873 strand_curr = Seq_strand_plus;
1874 if (strand_curr != strand) {
1875 conflict = TRUE;
1876 conflict_type = 1; /* wrong strand */
1877 } else
1878 conflict = FALSE;
1879 for (j=0; j<amaip->alnsaps && !conflict; j++)
1880 {
1881 if ((saip2 = (SAIndexPtr)(amaip->saps[j]->saip)) == NULL)
1882 return;
1883 AlnMgrGetNthSeqRangeInSA(amaip->saps[j], 1, &startm2, &stopm2);
1884 AlnMgrGetNthSeqRangeInSA(amaip->saps[j], 2, &start2, &stop2);
1885 if (startm1 < startm2)
1886 {
1887 if (stopm1 >= startm2 + fuzz) {
1888 conflict = TRUE;
1889 conflict_type = 2 ; /* overlap on master */
1890 } else if (strand == Seq_strand_minus)
1891 {
1892 if (start1 <= stop2 - fuzz) {
1893 conflict = TRUE;
1894 conflict_type = 3 ; /*overlap on query */
1895 }
1896 } else
1897 {
1898 if (stop1 >= start2 + fuzz) {
1899 conflict = TRUE;
1900 conflict_type = 3 ; /*overlap on query */
1901 }
1902 }
1903 } else if (startm1 > startm2)
1904 {
1905 if (startm1 <= stopm2 - fuzz) {
1906 conflict = TRUE;
1907 conflict_type = 2 ; /* overlap on master */
1908 } else if (strand == Seq_strand_minus)
1909 {
1910 if (stop1 >= start2 + fuzz) {
1911 conflict = TRUE;
1912 conflict_type = 3 ; /*overlap on query */
1913 }
1914 } else
1915 {
1916 if (stop2 >= start1 + fuzz) {
1917 conflict = TRUE;
1918 conflict_type = 3 ; /*overlap on query */
1919 }
1920 }
1921 } else if (startm1 == startm2) {
1922 conflict = TRUE;
1923 conflict_type = 2 ; /*overlap on master */
1924 }
1925 }
1926 if (!conflict)
1927 {
1928 amaip->saps[amaip->alnsaps] = aip_list[i].align;
1929 amaip->alnsaps++;
1930 } else {
1931 switch(conflict_type) {
1932 case 1:
1933 if(wrong_strand) {
1934 if(*wrong_strand) {
1935 aip_list[i].align->next = *wrong_strand;
1936 }
1937 *wrong_strand = aip_list[i].align;
1938 } else {
1939 SeqAlignFree(aip_list[i].align);
1940 }
1941 break;
1942 case 2:
1943 if(overlaps_m) {
1944 if(*overlaps_m) {
1945 aip_list[i].align->next = *overlaps_m;
1946 }
1947 *overlaps_m = aip_list[i].align;
1948 } else {
1949 SeqAlignFree(aip_list[i].align);
1950 }
1951 break;
1952 case 3:
1953 if(overlaps_s) {
1954 if(*overlaps_s) {
1955 aip_list[i].align->next = *overlaps_s;
1956 }
1957 *overlaps_s = aip_list[i].align;
1958 } else {
1959 SeqAlignFree(aip_list[i].align);
1960 }
1961 break;
1962 default:
1963 SeqAlignFree(aip_list[i].align);
1964 }
1965 }
1966 }
1967 MemFree(aip_list);
1968 AlnMgrDeleteHiddenEx(sap, FALSE,FALSE);
1969 {
1970 /* Revert the SeqAlign order, to reorder them by score */
1971 SeqAlignPtr salp_next,salp_last=NULL;
1972 if(wrong_strand && *wrong_strand) {
1973 salp_next = (*wrong_strand)->next;
1974 while(salp_next!=NULL) {
1975 (*wrong_strand)->next = salp_last;
1976 salp_last = *wrong_strand;
1977 *wrong_strand = salp_next;
1978 salp_next = (*wrong_strand)->next;
1979 }
1980 }
1981 if(overlaps_m && *overlaps_m) {
1982 salp_next = (*overlaps_m)->next;
1983 while(salp_next!=NULL) {
1984 (*overlaps_m)->next = salp_last;
1985 salp_last = *overlaps_m;
1986 *overlaps_m = salp_next;
1987 salp_next = (*overlaps_m)->next;
1988 }
1989 }
1990 if(overlaps_s && *overlaps_s) {
1991 salp_next = (*overlaps_s)->next;
1992 while(salp_next!=NULL) {
1993 (*overlaps_s)->next = salp_last;
1994 salp_last = *overlaps_s;
1995 *overlaps_s = salp_next;
1996 salp_next = (*overlaps_s)->next;
1997 }
1998 }
1999
2000 }
2001 }
2002
2003
AlnMgrMakeMultipleByScore(SeqAlignPtr sap)2004 NLM_EXTERN Boolean AlnMgrMakeMultipleByScore(SeqAlignPtr sap)
2005 {
2006 return (AlnMgrMakeMultipleByScoreExEx(sap, 0,NULL,NULL,NULL));
2007 }
2008
AlnMgrMakeMultipleByScoreEx(SeqAlignPtr sap,Int4 fuzz)2009 NLM_EXTERN Boolean AlnMgrMakeMultipleByScoreEx(SeqAlignPtr sap, Int4 fuzz) {
2010 return (AlnMgrMakeMultipleByScoreExEx(sap, fuzz,NULL,NULL,NULL));
2011 }
2012
2013
AlnMgrMakeMultipleByScoreExEx(SeqAlignPtr sap,Int4 fuzz,SeqAlignPtr PNTR wrong_strand,SeqAlignPtr PNTR overlaps_m,SeqAlignPtr PNTR overlaps_s)2014 NLM_EXTERN Boolean AlnMgrMakeMultipleByScoreExEx(SeqAlignPtr sap, Int4 fuzz,SeqAlignPtr PNTR wrong_strand, SeqAlignPtr PNTR overlaps_m,SeqAlignPtr PNTR overlaps_s)
2015 {
2016 AMAlignIndexPtr amaip;
2017 FloatHi bit_score;
2018 Boolean conflict;
2019 FloatHi evalue;
2020 Int4 i;
2021 Int4 j;
2022 Int4 n;
2023 Int4 number;
2024 SAIndexPtr saip1;
2025 SAIndexPtr saip2;
2026 SeqAlignPtr salp;
2027 AMAlignInfoPtr salp_list;
2028 SeqAlignPtr PNTR saparray;
2029 Int4 score;
2030 SeqIdPtr sip;
2031 Int4 start1;
2032 Int4 start2;
2033 Int4 startm1;
2034 Int4 startm2;
2035 Int4 stop1;
2036 Int4 stop2;
2037 Int4 stopm1;
2038 Int4 stopm2;
2039 Int4 fuzzstop;
2040 Uint1 strand;
2041 Uint1 strand_curr;
2042 AMTinyInfoPtr PNTR tiparray;
2043 Uint1 conflict_type;
2044
2045 if(wrong_strand)
2046 *wrong_strand = NULL;
2047 if(overlaps_m)
2048 *overlaps_m=NULL;
2049 if(overlaps_s)
2050 *overlaps_s=NULL;
2051
2052 if (sap == NULL)
2053 return FALSE;
2054 i = AlnMgrCheckAlignForParent(sap);
2055 if (i != AM_PARENT)
2056 return FALSE;
2057 amaip = (AMAlignIndexPtr)sap->saip;
2058 if (amaip == NULL)
2059 return FALSE;
2060 if (amaip->numbsqs > 2)
2061 return FALSE;
2062 if (sap->master == NULL)
2063 return FALSE;
2064 salp = (SeqAlignPtr)sap->segs;
2065 n = amaip->numsaps;
2066 if(n==1)
2067 return TRUE;
2068
2069 salp_list = Calloc(n, sizeof (AMAlignInfo));
2070 for (i=0; i<n; i++, salp=salp->next)
2071 {
2072 salp_list[i].align=salp;
2073 GetScoreAndEvalue(salp, &score, &bit_score, &evalue, &number);
2074 salp_list[i].align_len = score;
2075 }
2076 HeapSort (salp_list, n, sizeof (AMAlignInfo), AMCompareAlignInfoProc);
2077 saip1 = (SAIndexPtr)salp_list[0].align->saip;
2078 if (saip1 == NULL)
2079 return FALSE;
2080 strand = AlnMgrGetNthStrand(salp_list[0].align, 3-saip1->master);
2081 if (strand != Seq_strand_minus)
2082 strand = Seq_strand_plus;
2083 amaip->alnsaps = 0;
2084 for (i=0; i<n; i++)
2085 {
2086 if ((saip1 = (SAIndexPtr)salp_list[i].align->saip) == NULL)
2087 return FALSE;
2088 AlnMgrGetNthSeqRangeInSA(salp_list[i].align, saip1->master, &startm1, &stopm1);
2089 AlnMgrGetNthSeqRangeInSA(salp_list[i].align, 3-saip1->master, &start1, &stop1);
2090 strand_curr = AlnMgrGetNthStrand(salp_list[i].align, 3-saip1->master);
2091 if (strand_curr != Seq_strand_minus)
2092 strand_curr = Seq_strand_plus;
2093 if (strand_curr != strand) {
2094 conflict = TRUE;
2095 conflict_type = 1;
2096 } else
2097 conflict = FALSE;
2098 for (j=0; j<amaip->alnsaps && !conflict; j++)
2099 {
2100 if ((saip2 = (SAIndexPtr)(amaip->saps[j]->saip)) == NULL)
2101 return FALSE;
2102 AlnMgrGetNthSeqRangeInSA(amaip->saps[j], saip2->master, &startm2, &stopm2);
2103 AlnMgrGetNthSeqRangeInSA(amaip->saps[j], 3-saip2->master, &start2, &stop2);
2104 if (startm1 < startm2)
2105 {
2106 fuzzstop = stopm1-fuzz;
2107 if (fuzzstop<startm1)
2108 fuzzstop = startm1;
2109 if (fuzzstop >= startm2) {
2110 conflict = TRUE;
2111 conflict_type = 2; /* overlap on master */
2112 } else if (strand == Seq_strand_minus)
2113 {
2114 fuzzstop = stop2-fuzz;
2115 if (fuzzstop<start2)
2116 fuzzstop = start2;
2117 if (start1 <= fuzzstop) {
2118 conflict = TRUE;
2119 conflict_type = 3; /* overlap on subject */
2120 }
2121 } else
2122 {
2123 fuzzstop = stop1-fuzz;
2124 if (fuzzstop<start1)
2125 fuzzstop = start1;
2126 if (fuzzstop >= start2) {
2127 conflict = TRUE;
2128 conflict_type = 3; /* overlap on subject */
2129 }
2130 }
2131 } else if (startm1 > startm2)
2132 {
2133 fuzzstop = stopm2-fuzz;
2134 if (fuzzstop<startm2)
2135 fuzzstop = startm2;
2136 if (startm1 <= fuzzstop) {
2137 conflict = TRUE;
2138 conflict_type = 2; /* overlap on master */
2139 } else if (strand == Seq_strand_minus)
2140 {
2141 fuzzstop = stop1-fuzz;
2142 if (fuzzstop<start1)
2143 fuzzstop = start1;
2144 if (fuzzstop >= start2) {
2145 conflict = TRUE;
2146 conflict_type = 3; /* overlap on subject */
2147 }
2148 } else
2149 {
2150 fuzzstop = stop2-fuzz;
2151 if (fuzzstop<start2)
2152 fuzzstop = start2;
2153 if (fuzzstop >= start1) {
2154 conflict = TRUE;
2155 conflict_type = 3; /* overlap on subject */
2156 }
2157 }
2158 } else if (startm1 == startm2) {
2159 conflict = TRUE;
2160 conflict_type = 2; /* overlap on master */
2161 }
2162 }
2163 if (!conflict)
2164 {
2165 amaip->saps[amaip->alnsaps] = salp_list[i].align;
2166 amaip->alnsaps++;
2167 } else {
2168 switch(conflict_type) {
2169 case 1:
2170 if(wrong_strand) {
2171 if(*wrong_strand) {
2172 (salp_list[i].align)->next = *wrong_strand;
2173 }
2174 *wrong_strand = salp_list[i].align;
2175 } else {
2176 SeqAlignFree(salp_list[i].align);
2177 }
2178 break;
2179 case 2:
2180 if(overlaps_m) {
2181 if(*overlaps_m) {
2182 (salp_list[i].align)->next = *overlaps_m;
2183 }
2184 *overlaps_m = salp_list[i].align;
2185 } else {
2186 SeqAlignFree(salp_list[i].align);
2187 }
2188 break;
2189 case 3:
2190 if(overlaps_s) {
2191 if(*overlaps_s) {
2192 (salp_list[i].align)->next = *overlaps_s;
2193 }
2194 *overlaps_s = salp_list[i].align;
2195 } else {
2196 SeqAlignFree(salp_list[i].align);
2197 }
2198 break;
2199 default:
2200 SeqAlignFree(salp_list[i].align);
2201 }
2202 }
2203 }
2204 tiparray = (AMTinyInfoPtr PNTR)MemNew((amaip->alnsaps)*sizeof(AMTinyInfoPtr));
2205 for (i=0; i<amaip->alnsaps; i++)
2206 {
2207 saip1 = (SAIndexPtr)amaip->saps[i]->saip;
2208 AlnMgrGetNthSeqRangeInSA(amaip->saps[i], saip1->master, &start1, &stop1);
2209 tiparray[i] = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
2210 tiparray[i]->start = start1;
2211 tiparray[i]->stop = stop1;
2212 tiparray[i]->numgap = saip1->master;
2213 tiparray[i]->numsap = i;
2214 }
2215 HeapSort((Pointer)tiparray, (size_t)(amaip->alnsaps), sizeof(AMTinyInfoPtr), AlnMgrCompareTips);
2216 saparray = (SeqAlignPtr PNTR)(MemNew((amaip->alnsaps)*sizeof(SeqAlignPtr)));
2217 for (i=0; i<amaip->alnsaps; i++)
2218 {
2219 saparray[i] = amaip->saps[i];
2220 }
2221 for (i=0; i<amaip->alnsaps; i++)
2222 {
2223 amaip->saps[i] = saparray[tiparray[i]->numsap];
2224 tiparray[i]->numsap = i;
2225 }
2226 MemFree(saparray);
2227 amaip->numseg = amaip->alnsaps;
2228 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
2229 amaip->lens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
2230 amaip->rowsource = (RowSourcePtr PNTR)MemNew(2*sizeof(RowSourcePtr));
2231 amaip->rowsource[0] = (RowSourcePtr)MemNew(sizeof(RowSource));
2232 amaip->rowsource[0]->id = SeqIdDup(sap->master);
2233 amaip->rowsource[0]->which_saps = (Uint4Ptr)MemNew((amaip->alnsaps+1)*sizeof(Uint4));
2234 amaip->rowsource[0]->num_in_sap = (Uint4Ptr)MemNew((amaip->alnsaps+1)*sizeof(Uint4));
2235 amaip->rowsource[1] = (RowSourcePtr)MemNew(sizeof(RowSource));
2236 sip = AlnMgrGetNthSeqIdPtr(amaip->saps[0], 3-((SAIndexPtr)amaip->saps[0]->saip)->master);
2237 amaip->rowsource[1]->id = sip;
2238 amaip->rowsource[1]->which_saps = (Uint4Ptr)MemNew((amaip->alnsaps+1)*sizeof(Uint4));
2239 amaip->rowsource[1]->num_in_sap = (Uint4Ptr)MemNew((amaip->alnsaps+1)*sizeof(Uint4));
2240 for (i=0; i<amaip->alnsaps; i++)
2241 {
2242 amaip->rowsource[0]->which_saps[i] = amaip->rowsource[1]->which_saps[i] = tiparray[i]->numsap + 1;
2243 amaip->rowsource[0]->num_in_sap[i] = tiparray[i]->numgap;
2244 amaip->rowsource[1]->num_in_sap[i] = 3-tiparray[i]->numgap;
2245 amaip->lens[i] = AlnMgrGetAlnLength(amaip->saps[tiparray[i]->numsap], FALSE);
2246 if (i>0)
2247 amaip->aligncoords[i] = amaip->aligncoords[i-1] + amaip->lens[i-1];
2248 else
2249 amaip->aligncoords[i] = 0;
2250 }
2251 amaip->rowsource[0]->numsaps = amaip->rowsource[1]->numsaps = amaip->alnsaps;
2252 amaip->master = 1;
2253 amaip->numrows = 2;
2254 for (i=0; i<amaip->alnsaps; i++)
2255 {
2256 MemFree(tiparray[i]);
2257 }
2258 MemFree(tiparray);
2259 sap->segs = (Pointer)(amaip->saps[0]);
2260 for (i=1; i<amaip->alnsaps; i++)
2261 {
2262 amaip->saps[i-1]->next = amaip->saps[i];
2263 }
2264 amaip->saps[amaip->alnsaps-1]->next = NULL;
2265 /* amaip->numsaps = amaip->alnsaps; */
2266 MemFree(salp_list);
2267 sap->type = SAT_MASTERSLAVE;
2268 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
2269 AlnMgrDeleteHiddenEx(sap, FALSE,FALSE);
2270 return TRUE;
2271 }
2272
AlnMgrDupTopNByScore(SeqAlignPtr sap,Int4 n)2273 NLM_EXTERN SeqAlignPtr AlnMgrDupTopNByScore(SeqAlignPtr sap, Int4 n)
2274 {
2275 AMAlignIndexPtr amaip;
2276 FloatHi bit_score;
2277 FloatHi evalue;
2278 Int4 i;
2279 Int4 num;
2280 Int4 number;
2281 SeqAlignPtr salp;
2282 SeqAlignPtr salp_head;
2283 AMAlignInfoPtr salp_list;
2284 SeqAlignPtr salp_prev;
2285 Int4 score;
2286
2287 if (sap == NULL)
2288 return FALSE;
2289 i = AlnMgrCheckAlignForParent(sap);
2290 if (i != AM_PARENT)
2291 return FALSE;
2292 amaip = (AMAlignIndexPtr)sap->saip;
2293 if (amaip == NULL)
2294 return FALSE;
2295 salp = (SeqAlignPtr)sap->segs;
2296 num = amaip->numsaps;
2297 if (n > num || n < 0)
2298 n = num;
2299 salp_list = Calloc(num, sizeof (AMAlignInfo));
2300 for (i=0; i<num; i++, salp=salp->next)
2301 {
2302 salp_list[i].align=salp;
2303 GetScoreAndEvalue(salp, &score, &bit_score, &evalue, &number);
2304 salp_list[i].align_len = score;
2305 }
2306 HeapSort (salp_list, num, sizeof (AMAlignInfo), AMCompareAlignInfoProc);
2307 salp_head = salp_prev = SeqAlignDup(salp_list[0].align);
2308 for (i=1; i<n; i++)
2309 {
2310 salp_prev->next = SeqAlignDup(salp_list[i].align);
2311 salp_prev = salp_prev->next;
2312 }
2313 return salp_head;
2314 }
2315
AMCompareAlignInfoProc(VoidPtr ptr1,VoidPtr ptr2)2316 static int LIBCALLBACK AMCompareAlignInfoProc(VoidPtr ptr1, VoidPtr ptr2)
2317 {
2318 AMAlignInfoPtr aip_1;
2319 AMAlignInfoPtr aip_2;
2320 if (ptr1 != NULL && ptr2 != NULL)
2321 {
2322 aip_1 = (AMAlignInfoPtr) ptr1;
2323 aip_2 = (AMAlignInfoPtr) ptr2;
2324 if(aip_1->align_len > aip_2->align_len)
2325 return -1;
2326 else if(aip_1->align_len < aip_2->align_len)
2327 return 1;
2328 else
2329 return 0;
2330 }
2331 return 0;
2332 }
2333
2334 /***************************************************************************
2335 *
2336 * AlnMgrSortAlnSetByNthRowPos sorts a set of alignments so that they
2337 * are in (increasing) order along the specified row (to make sense, this
2338 * set of alignments should all have the same rows).
2339 *
2340 ***************************************************************************/
AlnMgrSortAlnSetByNthRowPos(SeqAlignPtr sap,Int4 row)2341 NLM_EXTERN void AlnMgrSortAlnSetByNthRowPos(SeqAlignPtr sap, Int4 row)
2342 {
2343 AMAlignIndexPtr amaip;
2344 AMBittyPtr bit;
2345 Int4 i;
2346 SeqAlignPtr PNTR saparray;
2347
2348 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
2349 return;
2350 amaip = (AMAlignIndexPtr)(sap->saip);
2351 bit = (AMBittyPtr)MemNew((amaip->numsaps)*sizeof(AMBitty));
2352 saparray = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
2353 for (i=0; i<amaip->numsaps; i++)
2354 {
2355 bit[i].num2 = i;
2356 AlnMgrGetNthSeqRangeInSA(amaip->saps[i], row, &bit[i].num1, NULL);
2357 saparray[i] = amaip->saps[i];
2358 }
2359 HeapSort(bit, amaip->numsaps, sizeof(AMBitty), AMCompareStarts);
2360 for (i=0; i<amaip->numsaps; i++)
2361 {
2362 amaip->saps[i] = saparray[bit[i].num2];
2363 }
2364 MemFree(saparray);
2365 MemFree(bit);
2366 }
2367
AMCompareStarts(VoidPtr ptr1,VoidPtr ptr2)2368 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2)
2369 {
2370 AMBittyPtr bit1;
2371 AMBittyPtr bit2;
2372
2373 if (ptr1 != NULL && ptr2 != NULL)
2374 {
2375 bit1 = (AMBittyPtr)ptr1;
2376 bit2 = (AMBittyPtr)ptr2;
2377 if (bit1->num1 < bit2->num1)
2378 return -1;
2379 else if (bit1->num1 > bit2->num1)
2380 return 1;
2381 else
2382 return 0;
2383 }
2384 return 0;
2385 }
2386
am_print_seqalign_indexes(SeqAlignPtr sap)2387 NLM_EXTERN void am_print_seqalign_indexes(SeqAlignPtr sap)
2388 {
2389 AMAlignIndexPtr amaip;
2390 DenseSegPtr dsp;
2391 Int4 i;
2392 Int4 j;
2393 SAIndexPtr saip;
2394
2395 if (!sap)
2396 return;
2397 if (!sap->saip)
2398 return;
2399 while (sap)
2400 {
2401 if (sap->segtype == SAS_DENSEG && sap->saip)
2402 {
2403 dsp = (DenseSegPtr)sap->segs;
2404 if (sap->saip->indextype == INDEX_SEGS)
2405 saip = (SAIndexPtr)(sap->saip);
2406 printf("\naligncoords: ");
2407 for (i=0; i<(dsp->numseg); i++)
2408 {
2409 printf("%d ", saip->aligncoords[i]);
2410 }
2411 fflush(stdout);
2412 for (i=0; i<(dsp->dim); i++)
2413 {
2414 printf("\n");
2415 printf("Sequence %d:", i);
2416 for (j=0; j<(saip->ssdp[i]->numsect); j++)
2417 {
2418 printf("%d ", saip->ssdp[i]->sect[j]);
2419 }
2420 fflush(stdout);
2421 }
2422 } else if (sap->segtype == SAS_DISC && sap->saip)
2423 {
2424 if (sap->saip->indextype == INDEX_PARENT)
2425 amaip = (AMAlignIndexPtr)(sap->saip);
2426 if (sap->type == SAT_PARTIAL)
2427 printf("SAT_PARTIAL\n");
2428 else if (sap->type == SAT_MASTERSLAVE)
2429 printf("SAT_MASTERSLAVE\n");
2430 printf("Parent info:\n");
2431 printf("numbsqs = %d\n", amaip->numbsqs);
2432 printf("numsaps = %d\n", amaip->numsaps);
2433 printf("alnsaps = %d\n", amaip->alnsaps);
2434 printf("numseg = %d\n", amaip->numseg);
2435 fflush(stdout);
2436 for (i=0; i<amaip->numbsqs; i++)
2437 {
2438 printf("Sequence %d:", i);
2439 printf(" %d saps\n", amaip->amadp[i]->numsaps);
2440 fflush(stdout);
2441 }
2442 printf("Starts: ");
2443 if (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_SEGMENTED_MASTERSLAVE)
2444 {
2445 printf("Segmented\n");
2446 } else
2447 {
2448 for (i=0; i<(amaip->numseg*amaip->numsaps); i++)
2449 {
2450 printf("%d ", amaip->starts[i]);
2451 if (!fmod(i+1, amaip->numsaps))
2452 printf("\n");
2453 fflush(stdout);
2454 }
2455 }
2456 fflush(stdout);
2457 printf("\nTotal Length: %d \n", AlnMgrGetAlnLength(sap, TRUE));
2458 printf("Alignment Length: %d\n", AlnMgrGetAlnLength(sap, FALSE));
2459 if (amaip->lens)
2460 {
2461 printf("lens: ");
2462 for (i=0; i<amaip->numseg; i++)
2463 {
2464 printf("%i ", amaip->lens[i]);
2465 }
2466 printf("\n");
2467 fflush(stdout);
2468 printf("aligncoords: ");
2469 for (i=0; i<amaip->numseg; i++)
2470 {
2471 printf("%i ", amaip->aligncoords[i]);
2472 }
2473 printf("\n");
2474 fflush(stdout);
2475 }
2476 if (amaip->saps)
2477 {
2478 for (i=0; i<amaip->numbsqs; i++)
2479 {
2480 printf("Segments: ");
2481 for (j=0; j<(amaip->amadp[i]->numseg); j++)
2482 {
2483 printf("%d ", amaip->amadp[i]->segments[j]);
2484 }
2485 printf("\n");
2486 fflush(stdout);
2487 }
2488 }
2489 if (amaip->rowsource)
2490 {
2491 printf("Rowsource arrays:\n");
2492 for (i=0; i<(amaip->numrows); i++)
2493 {
2494 printf("row %d ", (i+1));
2495 for (j=0; j<(amaip->rowsource[i]->numsaps); j++)
2496 {
2497 printf("%d: %d ", amaip->rowsource[i]->which_saps[j], amaip->rowsource[i]->num_in_sap[j]);
2498 }
2499 printf("\n");
2500 }
2501 }
2502 am_print_seqalign_indexes((SeqAlignPtr)sap->segs);
2503 }
2504 sap = sap->next;
2505 }
2506 return;
2507 }
2508
2509 /*CHECK*/
AlnMgrCheckAlignForParent(SeqAlignPtr sap)2510 NLM_EXTERN Int4 AlnMgrCheckAlignForParent(SeqAlignPtr sap)
2511 {
2512 AMAlignIndexPtr amaip;
2513
2514 if (sap->segtype == SAS_DISC)
2515 {
2516 if (!sap->saip)
2517 {
2518 if (!AlnMgrIndexSeqAlign(sap))
2519 return -1;
2520 else
2521 return AM_PARENT;
2522 } else if (sap->saip->indextype == INDEX_PARENT)
2523 {
2524 amaip = (AMAlignIndexPtr)(sap->saip);
2525 if (amaip->mstype != AM_LITE)
2526 return AM_PARENT;
2527 else
2528 return -1;
2529 } else
2530 {
2531 return -1;
2532 }
2533 } else if (sap->segtype == SAS_DENSEG)
2534 {
2535 if (!sap->saip)
2536 {
2537 if (sap->segs == NULL)
2538 return -1;
2539 AlnMgrAnythingToSeg(sap);
2540 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)sap->segs))
2541 return -1;
2542 return AM_PARENT;
2543 } else if (sap->saip->indextype == INDEX_SEGS)
2544 {
2545 return AM_CHILD;
2546 } else
2547 {
2548 return -1;
2549 }
2550 }
2551 return -1;
2552 }
2553
2554 /* check to see if a seqalign contains a sip, return row */
AlnMgrContainsID(SeqAlignPtr sap,SeqId * sip)2555 static Int4 AlnMgrContainsID(SeqAlignPtr sap, SeqId *sip)
2556 {
2557 DenseSegPtr dsp;
2558 SeqId *sip_tmp;
2559 Int4 count;
2560
2561 if (!sap || !sip) return FALSE;
2562 dsp = (DenseSegPtr)sap->segs;
2563 if (!dsp) return 0;
2564
2565 for(sip_tmp = dsp->ids, count = 1; sip_tmp;
2566 sip_tmp = sip_tmp->next, count++)
2567 if (SeqIdComp(sip_tmp, sip) == SIC_YES) return count;
2568 return 0;
2569 }
2570
2571 /* used to pass in values for sorting using AlnMgrCompareSortStruct() */
2572 typedef struct _AlnMgrSortStruct
2573 {
2574 SeqId *sip;
2575 SeqAlign *sap;
2576 Int4 row, start, stop;
2577 Uint2 strand;
2578 } AlnMgrSortStruct;
2579
2580 /* compare two seqaligns by position on a particular sequence. Assumes both
2581 seqaligns contains the sequence */
AlnMgrCompareSortStruct(void * e1,void * e2)2582 static int LIBCALLBACK AlnMgrCompareSortStruct(void *e1, void *e2)
2583 {
2584 AlnMgrSortStruct *amss1, *amss2;
2585
2586 if (!e1||!e2) return 0;
2587 amss1 = (AlnMgrSortStruct *)e1;
2588 amss2 = (AlnMgrSortStruct *)e2;
2589 if(!amss1->sap || !amss2->sap) return 0;
2590
2591 if (amss1->strand == 0)
2592 amss1->strand = Seq_strand_plus;
2593 if (amss2->strand == 0)
2594 amss2->strand = Seq_strand_plus;
2595
2596 if ((amss1->strand == amss2->strand) && amss1->strand != Seq_strand_minus)
2597 {
2598 if (amss1->start < amss2->start)
2599 return -1;
2600 else if (amss2->start < amss1->start)
2601 return 1;
2602 else if (amss1->start == amss2->start)
2603 {
2604 if (amss1->stop < amss2->stop)
2605 return -1;
2606 else if (amss2->stop < amss1->stop)
2607 return 1;
2608 else
2609 return 0;
2610 }
2611 } else if ((amss1->strand == amss2->strand) && amss1->strand == Seq_strand_minus)
2612 {
2613 if (amss1->start > amss2->start)
2614 return -1;
2615 else if (amss2->start > amss1->start)
2616 return 1;
2617 else if (amss1->start == amss2->start)
2618 {
2619 if (amss1->stop < amss2->stop)
2620 return -1;
2621 else if (amss2->stop < amss1->stop)
2622 return 1;
2623 else
2624 return 0;
2625 }
2626 }
2627 return 0;
2628 }
2629
2630 /* same functionality as AlnMgrSortSeqAligns */
AlnMgrSortbyID(SeqAlignPtr sap,SeqId * sip,Int4Ptr numsap)2631 NLM_EXTERN SeqAlignPtr PNTR AlnMgrSortbyID (SeqAlignPtr sap, SeqId *sip, Int4Ptr numsap)
2632 {
2633 SeqAlignPtr PNTR head;
2634 Int4 i, j;
2635 SeqAlignPtr tmp;
2636 AlnMgrSortStruct *amss;
2637 Int4 row;
2638
2639 if(!sap || !sap || !numsap) return NULL;
2640
2641 /* count number of seqaligns and init return array */
2642 for(tmp = sap; tmp; tmp=tmp->next) (*numsap)++;
2643 head = MemNew((*numsap)*sizeof(SeqAlignPtr));
2644 amss = MemNew((*numsap)*sizeof(AlnMgrSortStruct));
2645
2646 /* separate out the seqaligns that don't contain the sip */
2647 for(tmp = sap, i = j = 0; tmp; tmp = tmp->next) {
2648 row = AlnMgrContainsID(tmp, sip);
2649 if(!row) {
2650 head[i] = tmp;
2651 i++;
2652 } else {
2653 amss[j].sip = sip;
2654 amss[j].sap = tmp;
2655 amss[j].row = row;
2656 AlnMgrGetNthSeqRangeInSA(tmp, row, &(amss[j].start), &(amss[j].stop));
2657 amss[j].strand = AlnMgrGetNthStrand(tmp, row);
2658 j++;
2659 }
2660 }
2661
2662 /* qsort(amss, j, sizeof(AlnMgrSortStruct), AlnMgrCompareSortStruct); */
2663 HeapSort((Pointer)amss, (size_t)j, sizeof(AlnMgrSortStruct), AlnMgrCompareSortStruct);
2664
2665 for(i = 0; i < j; i++) head[*numsap - j + i] = amss[i].sap;
2666
2667 MemFree(amss);
2668 return head;
2669 }
2670
2671 /***********************************************************************
2672 *
2673 * AlnMgrSortSeqAligns is a variant of the ValNodeSort function, and
2674 * calls very similar heapsort functions. It can take a comparison
2675 * function that needs userdata, so more specific sorts are possible
2676 * without defining special structures for every type of sort.
2677 *
2678 ***********************************************************************/
AlnMgrSortSeqAligns(SeqAlignPtr sap,int (LIBCALLBACK * compar)(VoidPtr,VoidPtr,VoidPtr),VoidPtr userdata,Int4Ptr numsap)2679 NLM_EXTERN SeqAlignPtr PNTR AlnMgrSortSeqAligns (SeqAlignPtr sap, int (LIBCALLBACK *compar)(VoidPtr, VoidPtr, VoidPtr), VoidPtr userdata, Int4Ptr numsap)
2680 {
2681 SeqAlignPtr PNTR head;
2682 Int4 i;
2683 Int4 num;
2684 SeqAlignPtr tmp;
2685
2686 if (!sap)
2687 return NULL;
2688 tmp = sap;
2689 num = 0;
2690 while (tmp)
2691 {
2692 num++;
2693 tmp = tmp->next;
2694 }
2695 head = MemNew(((size_t) num + 1)*sizeof(SeqAlignPtr));
2696 tmp = sap;
2697
2698 for (i = 0; i<num; i++)
2699 {
2700 head[i]=tmp;
2701 tmp = tmp->next;
2702 if (!tmp)
2703 break;
2704 }
2705 heapsort_with_userdata(head, (size_t)num, sizeof(SeqAlignPtr), compar, userdata);
2706 if (numsap)
2707 *numsap = num;
2708 return head;
2709 }
2710
heapsort_with_userdata(VoidPtr b,size_t nel,size_t width,int (LIBCALLBACK * compar)PROTO ((VoidPtr,VoidPtr,VoidPtr)),VoidPtr userdata)2711 static void heapsort_with_userdata (VoidPtr b, size_t nel, size_t width, int (LIBCALLBACK *compar)PROTO((VoidPtr, VoidPtr, VoidPtr)), VoidPtr userdata)
2712 {
2713 register CharPtr base = (CharPtr)b;
2714 register size_t i;
2715 register char ch;
2716 register CharPtr base0=(CharPtr)base, lim, basef;
2717
2718 if (nel<2)
2719 return;
2720 lim = &base[((nel-2)/2)*width];
2721 basef = &base[(nel-1)*width];
2722 i = nel/2;
2723 for (base = &base0[(i-1)*width]; i>0; base=base-width)
2724 {
2725 heapify_with_userdata(base0, base, lim, basef, width, compar, userdata);
2726 i--;
2727 }
2728 for (base=&base0[(nel-1)*width]; base>base0; base -= width)
2729 {
2730 for (i = 0; i<width; i++)
2731 {
2732 ch = base0[i];
2733 base0[i] = base[i];
2734 base[i] = ch;
2735 }
2736 lim = base0 + ((base-base0)/2 - width);
2737 if (base> (base0+width))
2738 heapify_with_userdata(base0, base0, lim, base-width, width, compar, userdata);
2739 }
2740 return;
2741 }
2742
heapify_with_userdata(CharPtr base0,CharPtr base,CharPtr lim,CharPtr last,size_t width,int (LIBCALLBACK * compar)PROTO ((VoidPtr,VoidPtr,VoidPtr)),VoidPtr userdata)2743 static void heapify_with_userdata(CharPtr base0, CharPtr base, CharPtr lim, CharPtr last, size_t width, int(LIBCALLBACK *compar)PROTO((VoidPtr, VoidPtr, VoidPtr)), VoidPtr userdata)
2744 {
2745 register size_t i;
2746 register char ch;
2747 register CharPtr left_son, large_son;
2748
2749 left_son = base0 + 2*(base-base0) + width;
2750 while (base<=lim)
2751 {
2752 if (left_son == last)
2753 {
2754 large_son = left_son;
2755 } else
2756 {
2757 if((*compar)(left_son, left_son+width, userdata) >= 0)
2758 large_son = left_son;
2759 else
2760 large_son = left_son + width;
2761 }
2762 if ((*compar)(base, large_son, userdata) < 0)
2763 {
2764 for (i = 0; i<width; i++)
2765 {
2766 ch = base[i];
2767 base[i] = large_son[i];
2768 large_son[i] = ch;
2769 }
2770 base = large_son;
2771 left_son = base0 + 2*(base-base0) + width;
2772 } else
2773 {
2774 break;
2775 }
2776 }
2777 return;
2778 }
2779
2780 /*************************************************************************
2781 *
2782 * sorting comparison functions
2783 *
2784 *************************************************************************/
2785 /**********************************************************************
2786 *
2787 * AlnMgrCompareIncreasingBySeqIdPtr takes a SeqIdPtr as userdata,
2788 * and sorts the alignments in increasing order according to the
2789 * region of the bioseq indicated that is contained in the alignment.
2790 * If the bioseq is not in the alignment, the alignment will be put
2791 * first, so all alignments in which the given bioseq does not
2792 * participate occur at the beginning of the list, making it easy to
2793 * check for them and remove them.
2794 *
2795 **********************************************************************/
AlnMgrCompareIncreasingBySeqIdPtr(VoidPtr base,VoidPtr large_son,VoidPtr userdata)2796 NLM_EXTERN int LIBCALLBACK AlnMgrCompareIncreasingBySeqIdPtr (VoidPtr base, VoidPtr large_son, VoidPtr userdata)
2797 {
2798 Boolean done;
2799 DenseSegPtr dsp1;
2800 DenseSegPtr dsp2;
2801 Int4 n1;
2802 Int4 n2;
2803 SeqAlignPtr sap1;
2804 SeqAlignPtr sap2;
2805 SeqIdPtr sip;
2806 SeqIdPtr sip_tmp;
2807 Int4 start1;
2808 Int4 start2;
2809 Int4 stop1;
2810 Int4 stop2;
2811 Uint2 strand1;
2812 Uint2 strand2;
2813
2814 sap1 = *((SeqAlignPtr PNTR) base);
2815 sip = (SeqIdPtr)userdata;
2816 if (!sap1||!sip) return 0;
2817 dsp1 = (DenseSegPtr)sap1->segs;
2818 if (!dsp1) return 0;
2819 n1 = 0;
2820 done = FALSE;
2821 sip_tmp = dsp1->ids;
2822 while (sip_tmp)
2823 {
2824 n1++;
2825 if (SeqIdComp(sip_tmp, sip) == SIC_YES) {
2826 done = TRUE;
2827 break;
2828 }
2829 sip_tmp = sip_tmp->next;
2830 }
2831 if (!done) return -1;
2832 sap2 = *((SeqAlignPtr PNTR) large_son);
2833 if (!sap2) return 0;
2834 dsp2 = (DenseSegPtr)sap2->segs;
2835 if (!dsp2) return 0;
2836 n2 = 0;
2837 done = FALSE;
2838 sip_tmp = dsp2->ids;
2839 while (sip_tmp)
2840 {
2841 n2++;
2842 if (SeqIdComp(sip_tmp, sip) == SIC_YES) {
2843 done = TRUE;
2844 break;
2845 }
2846 sip_tmp = sip_tmp->next;
2847 }
2848 if (!done)
2849 return 1;
2850 AlnMgrGetNthSeqRangeInSA(sap1, n1, &start1, &stop1);
2851 AlnMgrGetNthSeqRangeInSA(sap2, n2, &start2, &stop2);
2852 strand1 = AlnMgrGetNthStrand(sap1, n1);
2853 strand2 = AlnMgrGetNthStrand(sap2, n2);
2854 if (strand1 == 0)
2855 strand1 = Seq_strand_plus;
2856 if (strand2 == 0)
2857 strand2 = Seq_strand_plus;
2858 if ((strand1 == strand2) && strand1 != Seq_strand_minus)
2859 {
2860 if (start1 < start2)
2861 return -1;
2862 else if (start2 < start1)
2863 return 1;
2864 else if (start1 == start2)
2865 {
2866 if (stop1 < stop2)
2867 return -1;
2868 else if (stop2 < stop1)
2869 return 1;
2870 else
2871 return 0;
2872 }
2873 } else if ((strand1 == strand2) && strand1 == Seq_strand_minus)
2874 {
2875 if (start1 > start2)
2876 return -1;
2877 else if (start2 > start1)
2878 return 1;
2879 else if (start1 == start2)
2880 {
2881 if (stop1 < stop2)
2882 return -1;
2883 else if (stop2 < stop1)
2884 return 1;
2885 else
2886 return 0;
2887 }
2888 }
2889 else
2890 return 0;
2891 return 0;
2892 }
2893
2894 /*********************************************************************
2895 *
2896 * AlnMgrFindFirst is crucial to the AlnMgrMakeFakeMultiple function;
2897 * it uses the sorted order of the seqaligns in each AMAlignDat
2898 * structure to guide a heapsort of all the seqaligns.
2899 *
2900 *********************************************************************/
AlnMgrFindFirst(VoidPtr base,VoidPtr large_son,VoidPtr userdata)2901 NLM_EXTERN int LIBCALLBACK AlnMgrFindFirst(VoidPtr base, VoidPtr large_son, VoidPtr userdata)
2902 {
2903 AMAlignDatPtr amadp;
2904 AMAlignIndexPtr amaip;
2905 Int4 i;
2906 SeqAlignPtr sap1;
2907 SeqAlignPtr sap2;
2908 Int4 x;
2909 Int4 y;
2910 Int4 z;
2911
2912 amaip = (AMAlignIndexPtr)userdata;
2913 if (amaip == NULL || base == NULL || large_son == NULL)
2914 return 0;
2915 sap1 = *((SeqAlignPtr PNTR) base);
2916 sap2 = *((SeqAlignPtr PNTR) large_son);
2917 if (base == large_son)
2918 return 0;
2919 x = y = -1;
2920 z = amaip->numbsqs;
2921 while (z)
2922 {
2923 amadp = amaip->amadp[(amaip->numbsqs - z)];
2924 for (i=0; i<(amadp->numsaps); i++)
2925 {
2926 if (amadp->saps[i] == sap1)
2927 x=i;
2928 else if (amadp->saps[i] == sap2)
2929 y=i;
2930 }
2931 if (x!=-1 && y!=-1)
2932 {
2933 if (x < y)
2934 return -1;
2935 else if (y < x)
2936 return 1;
2937 }
2938 z--;
2939 }
2940 return 0;
2941 }
2942
AlnMgrCompareTips(VoidPtr base,VoidPtr large_son)2943 NLM_EXTERN int LIBCALLBACK AlnMgrCompareTips(VoidPtr base, VoidPtr large_son)
2944 {
2945 AMTinyInfoPtr tip1;
2946 AMTinyInfoPtr tip2;
2947
2948 tip1 = *((AMTinyInfoPtr PNTR) base);
2949 tip2 = *((AMTinyInfoPtr PNTR) large_son);
2950 if (tip1 == NULL || tip2 == NULL)
2951 return 0;
2952 if (tip1->start < tip2->start)
2953 return -1;
2954 else if (tip1->start > tip2->start)
2955 return 1;
2956 else
2957 {
2958 if (tip1->which < tip2->which)
2959 return -1;
2960 else if (tip1->which > tip2->which)
2961 return 1;
2962 else if(tip1->stop>tip2->stop)
2963 return -1; /* put longer segments first */
2964 else if (tip1->stop<tip2->stop)
2965 return 1;
2966 else
2967 return 0;
2968 }
2969 }
2970
2971
2972 /************************************************************************
2973 *
2974 * AlnMgrGetNextLengthBit should be called iteratively on an alignment
2975 * to return the lengths of all the aligned and unaligned pieces in
2976 * the alignment. Don't change the value in r, just pass in a pointer
2977 * to an allocated Int4 set to 0 initially. The lengths of the unaligned
2978 * pieces are precomputed using AlnMgrGetMaxUnalignedLength; if no
2979 * precomputed values are found, this function is used to compute the
2980 * lengths on the fly.
2981 *
2982 ************************************************************************/
AlnMgrGetNextLengthBit(SeqAlignPtr sap,Int4Ptr length,Int4Ptr r)2983 NLM_EXTERN Boolean AlnMgrGetNextLengthBit(SeqAlignPtr sap, Int4Ptr length, Int4Ptr r)
2984 {
2985 AMAlignIndexPtr amaip;
2986 Int4 i;
2987 RowSourcePtr rsp;
2988
2989 if (sap == NULL || length == NULL || r == NULL)
2990 return FALSE;
2991 i = AlnMgrCheckAlignForParent(sap);
2992 if (i == AM_CHILD)
2993 {
2994 if (*r == 1)
2995 return FALSE;
2996 *length = AlnMgrGetAlnLength(sap, FALSE);
2997 *r = 1;
2998 return TRUE;
2999 } else if (i == AM_PARENT)
3000 {
3001 amaip = (AMAlignIndexPtr)sap->saip;
3002 if (amaip->mstype == AM_LITE)
3003 return FALSE;
3004 if (sap->type == SAT_PARTIAL)
3005 {
3006 if (*r < 0)
3007 {
3008 if ((-*r) >= amaip->numsaps)
3009 return FALSE;
3010 if (amaip->ulens == NULL)
3011 AlnMgrSetUnalignedLengths(sap);
3012 *length = -(amaip->ulens[(-*r)-1]);
3013 *r = -(*r);
3014 return TRUE;
3015 } else
3016 {
3017 if (*r >= amaip->numsaps)
3018 return FALSE;
3019 *length = AlnMgrGetAlnLength(amaip->saps[*r], FALSE);
3020 *r = -((*r)+1);
3021 return TRUE;
3022 }
3023 } else if (sap->type == SAT_MASTERSLAVE)
3024 {
3025 if (amaip->mstype == AM_MASTERSLAVE)
3026 {
3027 if (*r == 1)
3028 return FALSE;
3029 *length = amaip->aligncoords[amaip->numseg-1] + amaip->lens[amaip->numseg-1];
3030 *r = 1;
3031 return TRUE;
3032 } else if (amaip->mstype == AM_SEGMENTED_MASTERSLAVE || amaip->mstype == AM_NULL)
3033 {
3034 if (*r < 0)
3035 {
3036 if ((-*r) >= amaip->numseg)
3037 return FALSE;
3038 rsp = amaip->rowsource[amaip->master-1];
3039 if (amaip->ulens == NULL)
3040 AlnMgrSetUnalignedLengths(sap);
3041 *length = -(amaip->ulens[(-*r)-1]);
3042 *r = -(*r);
3043 return TRUE;
3044 } else
3045 {
3046 if (*r >= amaip->numseg)
3047 {
3048 if (amaip->numseg > 0)
3049 return FALSE;
3050 else
3051 {
3052 *length = -(amaip->ulens[0]);
3053 *r = -((*r)+1);
3054 return TRUE;
3055 }
3056 }
3057 rsp = amaip->rowsource[amaip->master-1];
3058 *length = AlnMgrGetAlnLength(amaip->saps[rsp->which_saps[*r]-1], FALSE);
3059 *r = -((*r)+1);
3060 return TRUE;
3061 }
3062 }
3063 }
3064 }
3065 return FALSE;
3066 }
3067
AlnMgrGetMaxUnalignedLength(SeqAlignPtr sap1,SeqAlignPtr sap2)3068 NLM_EXTERN Int4 AlnMgrGetMaxUnalignedLength(SeqAlignPtr sap1, SeqAlignPtr sap2)
3069 {
3070 Int4 max;
3071 Int4 n1, n1max;
3072 Int4 start1;
3073 Int4 start2;
3074 Int4 stop1;
3075 Int4 stop2;
3076 SeqId *sip1, *sip2;
3077
3078 if (sap1 == NULL || sap2 == NULL)
3079 return 0;
3080 max = 0;
3081 n1max = AlnMgrGetNumRows(sap1);
3082 if(n1max != AlnMgrGetNumRows(sap2)) return 0;
3083 for (n1 = 1; n1 <= n1max; n1++)
3084 {
3085 sip1 = AlnMgrGetNthSeqIdPtr(sap1, n1);
3086 sip2 = AlnMgrGetNthSeqIdPtr(sap2, n1);
3087 if(SeqIdComp(sip1, sip2) != SIC_YES) {
3088 SeqIdFree(sip1);
3089 SeqIdFree(sip2);
3090 return 0;
3091 }
3092 SeqIdFree(sip1);
3093 SeqIdFree(sip2);
3094 AlnMgrGetNthSeqRangeInSA(sap1, n1, &start1, &stop1);
3095 if (n1 >= 0)
3096 {
3097 AlnMgrGetNthSeqRangeInSA(sap2, n1, &start2, &stop2);
3098 if (start2 > stop1)
3099 {
3100 if (start2 - stop1 - 1 > max)
3101 max = start2 - stop1 - 1;
3102 } else
3103 {
3104 if (start1 - stop2 - 1 > max)
3105 max = start1 - stop2 - 1;
3106 }
3107 }
3108 }
3109 return max;
3110 }
3111
AlnMgrSetUnalignedLengths(SeqAlignPtr sap)3112 static void AlnMgrSetUnalignedLengths(SeqAlignPtr sap)
3113 {
3114 AMAlignIndexPtr amaip;
3115 Int4 i;
3116 Int4 l;
3117 Int4 max;
3118 Int4 n;
3119 RowSourcePtr rsp;
3120 SeqAlignPtr sap1;
3121 SeqAlignPtr sap2;
3122 Int4 start1;
3123 Int4 start2;
3124 Int4 stop1;
3125 Int4 stop2;
3126
3127 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
3128 return;
3129 amaip = (AMAlignIndexPtr)(sap->saip);
3130 if (amaip->ulens != NULL)
3131 MemFree(amaip->ulens);
3132 amaip->ulens = (Int4Ptr)MemNew((amaip->numsaps)*sizeof(Int4));
3133 for (i=1; i<amaip->numseg; i++)
3134 {
3135 max = 0;
3136 for (n=0; n<amaip->numrows; n++)
3137 {
3138 rsp = amaip->rowsource[n];
3139 if (rsp != NULL)
3140 {
3141 if (rsp->which_saps[i-1] != 0 && rsp->which_saps[i] != 0 && rsp->which_saps[i-1] <= amaip->numsaps && rsp->which_saps[i] <= amaip->numsaps)
3142 {
3143 sap1 = amaip->saps[rsp->which_saps[i-1]-1];
3144 sap2 = amaip->saps[rsp->which_saps[i]-1];
3145 AlnMgrGetNthSeqRangeInSA(sap1, rsp->num_in_sap[i-1], &start1, &stop1);
3146 AlnMgrGetNthSeqRangeInSA(sap2, rsp->num_in_sap[i], &start2, &stop2);
3147 if (rsp->strand != Seq_strand_minus)
3148 l = start2 - stop1 - 1;
3149 else
3150 l = start1 - stop2 - 1;
3151 if (l > max)
3152 max = l;
3153 }
3154 }
3155 }
3156 amaip->ulens[i-1] = max;
3157 }
3158 return;
3159 }
3160
3161 /*************************************************************************
3162 *
3163 * AlnMgrGetNextAlnBit takes an AlnMsgPtr, with (at the minimum) the
3164 * which_bsq field filled in to indicate which bioseq should be returned.
3165 * The function returns the segments of the bioseq which span the region
3166 * of the alignment indicated, and can return them according to either
3167 * alignment coordinates (if which_master is NULL) or a master coordinate
3168 * system (need to fill in the SeqIdPtr of the master). The function
3169 * returns TRUE if there are more segments of the bioseq to retrieve,
3170 * and FALSE if not. It uses the two binary search functions to quickly
3171 * retrieve the required data from the indexes. (NEXT)
3172 *
3173 *************************************************************************/
AlnMgrGetNextAlnBit(SeqAlignPtr sap,AlnMsgPtr amp)3174 NLM_EXTERN Boolean AlnMgrGetNextAlnBit (SeqAlignPtr sap, AlnMsgPtr amp)
3175 {
3176 AMAlignIndexPtr amaip;
3177 DenseSegPtr dsp;
3178 Int4 endoffset;
3179 Boolean found;
3180 Int4 i;
3181 Int4 len;
3182 Int4 offset;
3183 Boolean retval;
3184 Int4 rf_tmp;
3185 SAIndexPtr saip;
3186 SASeqDatPtr ssdp;
3187 Int4 start_b;
3188 Uint4 start_m;
3189 Uint4 start_tmp;
3190 Uint4 stop_m;
3191 Uint4 stop_tmp;
3192
3193 retval = FALSE;
3194 if (!sap)
3195 return retval;
3196 if (!amp)
3197 return retval;
3198 i = AlnMgrCheckAlignForParent(sap);
3199 if (i == AM_CHILD)
3200 {
3201 saip = (SAIndexPtr)sap->saip;
3202 dsp = (DenseSegPtr)sap->segs;
3203 if (!dsp)
3204 return retval;
3205 if (!amp->which_master)
3206 {
3207 if (amp->place == 1)
3208 return retval;
3209 len = AlnMgrGetAlnLength(sap, FALSE);
3210 if (amp->to_m > len-1) /* len-1 was len : HS 7/24/00 */
3211 return retval;
3212 if (amp->to_m < 0)
3213 amp->to_m = len - 1;
3214 if (amp->row_num == -1)
3215 {
3216 if (!amp->which_bsq)
3217 return retval;
3218 amp->row_num = AlnMgrGetNForSip(sap, amp->which_bsq) - 1;
3219 if (amp->row_num == -1)
3220 return retval;
3221 }
3222 if (amp->prev != -2)
3223 {
3224 start_m = amp->prev;
3225 } else
3226 {
3227 start_m = binary_search_on_uint4_list(saip->aligncoords, amp->from_m, dsp->numseg);
3228 amp->real_from = amp->from_m;
3229 }
3230 stop_m = binary_search_on_uint4_list(saip->aligncoords, amp->to_m, dsp->numseg);
3231 ssdp = saip->ssdp[amp->row_num-1];
3232 offset = amp->real_from - saip->aligncoords[start_m];
3233 start_b = binary_search_on_uint2_list(ssdp->sect, start_m, ssdp->numsect);
3234
3235 /* bug fix -- Dave & Lewis 7/20/00 */
3236 if (start_b == -1) {
3237 amp->strand = Seq_strand_plus;
3238 }
3239 /* end of fix */
3240 else {
3241 if (dsp->strands != NULL && dsp->strands[start_b*(dsp->dim)+amp->row_num-1] == Seq_strand_minus)
3242 amp->strand = Seq_strand_minus;
3243 else
3244 amp->strand = Seq_strand_plus;
3245 }
3246 if ((stop_m - start_m) > 0)
3247 {
3248 retval = TRUE;
3249 if (start_b >= 0)
3250 {
3251 if (amp->strand != Seq_strand_minus)
3252 {
3253 amp->from_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + offset;
3254 amp->to_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + dsp->lens[start_b] - 1;
3255 } else
3256 {
3257 amp->from_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1];
3258 amp->to_b = amp->from_b + dsp->lens[start_b] - 1 - offset;
3259 }
3260 amp->gap = 0;
3261 } else
3262 {
3263 amp->from_b = amp->real_from;
3264 amp->to_b = saip->aligncoords[start_m + 1] - 1;
3265 amp->gap = 1;
3266 }
3267 amp->real_from = saip->aligncoords[start_m + 1];
3268 amp->prev = start_m + 1;
3269 } else
3270 {
3271 amp->place = 1;
3272 endoffset = amp->to_m - saip->aligncoords[start_m];
3273 if (start_b >= 0)
3274 {
3275 if (amp->strand != Seq_strand_minus)
3276 {
3277 amp->from_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + offset;
3278 amp->to_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + endoffset;
3279 } else
3280 {
3281 amp->from_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + dsp->lens[start_b] - endoffset - 1;
3282 amp->to_b = amp->from_b + amp->to_m - amp->real_from;
3283 }
3284 amp->gap = 0;
3285 } else
3286 {
3287 amp->from_b = amp->real_from;
3288 amp->to_b = amp->to_m;
3289 amp->gap = 1;
3290 }
3291 amp->real_from = 0;
3292 amp->prev = -2;
3293 retval = TRUE;
3294 }
3295 }
3296 } else if (i == AM_PARENT)
3297 {
3298 amaip = (AMAlignIndexPtr)(sap->saip);
3299 if (amaip && (!amaip->saps || amaip->mstype == AM_LITE || amaip->mstype == AM_NEATINDEX || amaip->mstype == AM_NULL))
3300 return retval;
3301 if (amp->place == 1)
3302 return retval;
3303 if (!amp->which_bsq && amp->row_num==-1)
3304 return retval;
3305 if (sap->type == SAT_PARTIAL && amp->which_master == NULL)
3306 {
3307 len = AlnMgrGetAlnLength(sap, FALSE);
3308 if (amp->to_m < 0)
3309 amp->to_m = len-1;
3310 if (amp->to_m > len-1)
3311 return FALSE;
3312 if (amp->prev_sap != -2)
3313 {
3314 start_m = amp->prev_sap;
3315 amp->len_left = amp->to_m - amp->real_from + 1;
3316 } else
3317 {
3318 start_m = binary_search_on_uint4_list(amaip->aligncoords, amp->from_m, amaip->alnsaps);
3319 amp->real_from = amp->from_m;
3320 amp->prev_sap = start_m;
3321 amp->len_left = amp->to_m - amp->from_m + 1;
3322 }
3323 stop_m = binary_search_on_uint4_list(amaip->aligncoords, amp->to_m, amaip->alnsaps);
3324 offset = amp->real_from - amaip->aligncoords[start_m];
3325 if (amp->len_left > (amaip->lens[start_m]-offset))
3326 {
3327 endoffset = amaip->lens[start_m] - offset;
3328 } else
3329 {
3330 endoffset = amp->len_left;
3331 }
3332 stop_tmp = amp->to_m;
3333 start_tmp = amp->from_m;
3334 if ((stop_m - start_m) == 0)
3335 {
3336 amp->from_m = offset + amaip->starts[start_m];
3337 amp->to_m = amp->from_m + endoffset - 1;
3338 /* amp->prev = -2; HS 7/24/00 removed */
3339 rf_tmp = amp->real_from;
3340 AlnMgrGetNextAlnBit((amaip->saps[start_m]), amp);
3341 /* HS 7/24/00 amp->len_left = amp->len_left - (amp->to_b - amp->from_b + 1); */
3342 amp->len_left = amp->len_left - (amp->to_m - amp->from_m + 1);
3343 amp->to_m = stop_tmp;
3344 amp->from_m = start_tmp;
3345 if (amp->len_left == 0)
3346 {
3347 amp->real_from = amp->to_m + 1;
3348 amp->prev_sap = -2;
3349 amp->place = 1;
3350 amp->send_space = 1;
3351 } else
3352 {
3353 amp->real_from = rf_tmp + (amp->to_b - amp->from_b + 1);
3354 amp->place = 0;
3355 }
3356 retval = TRUE;
3357 } else
3358 {
3359 retval = TRUE;
3360 amp->from_m = offset + amaip->starts[start_m];
3361 amp->to_m = amp->from_m + endoffset - 1;
3362 AlnMgrGetNextAlnBit((amaip->saps[start_m]), amp);
3363 amp->len_left = amp->len_left - (amp->to_m - amp->from_m + 1);
3364 amp->to_m = stop_tmp;
3365 amp->real_from = amp->to_m - amp->len_left + 1;
3366 amp->from_m = start_tmp;
3367 if (amp->place == 1)
3368 {
3369 amp->prev_sap += 1;
3370 amp->send_space = TRUE;
3371 if (amp->len_left > 0)
3372 amp->place = 0;
3373 }
3374 /* bug fix -- Dave 7/21/00 */
3375 /* if (amp->len_left == 0 || amp->real_from >= amp->to_m) */
3376 if (amp->len_left == 0 || amp->real_from > amp->to_m)
3377 {
3378 amp->place = 1;
3379 retval = FALSE;
3380 amp->prev_sap = -2;
3381 }
3382 }
3383 } else if (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_MASTERSLAVE && amp->which_master == NULL)
3384 {
3385 int j;
3386 if (amp->place == 1)
3387 return retval;
3388 len = AlnMgrGetAlnLength(sap, FALSE);
3389 if (amp->to_m > len-1)
3390 return retval;
3391 if (amp->to_m < 0)
3392 amp->to_m = len-1;
3393 if (amp->row_num == -1)
3394 {
3395 if(!amp->which_bsq)
3396 return retval;
3397 else
3398 {
3399 amp->row_num = AlnMgrGetNForSip(sap,amp->which_bsq);
3400 if(amp->row_num == -1)
3401 return retval;
3402 }
3403 }
3404 if (amp->row_num == amaip->master)
3405 {
3406 amp->strand = Seq_strand_plus;
3407 if (amp->prev != -2)
3408 {
3409 amp->prev += 1;
3410 start_m = amp->prev;
3411 } else
3412 {
3413 start_m = binary_search_on_uint4_list(amaip->aligncoords, amp->from_m, amaip->numseg);
3414 amp->real_from = amp->from_m;
3415 amp->prev = start_m;
3416 }
3417 offset = amp->real_from - amaip->aligncoords[start_m];
3418 endoffset = amaip->lens[start_m] - offset - (amp->to_m - amp->real_from + 1);
3419 if (endoffset < 0 && (start_m+1) < amaip->numseg)
3420 retval = TRUE;
3421 else
3422 {
3423 retval = TRUE;
3424 amp->place = 1;
3425 amp->row_num = -1;
3426 amp->prev = -2;
3427 }
3428 j=0;
3429 found = FALSE;
3430 while (!found && j < amaip->numsaps)
3431 {
3432 if (amaip->starts[j+(amaip->numsaps)*start_m] >= 0)
3433 found = TRUE;
3434 else if (amaip->starts[j+(amaip->numsaps)*start_m] == -3)
3435 found = TRUE;
3436 else
3437 j++;
3438 }
3439 if (amaip->starts[j+(amaip->numsaps)*start_m] == -3)
3440 {
3441 found = FALSE;
3442 j=0;
3443 while (!found && j < amaip->numsaps)
3444 {
3445 if (amaip->starts[j+(amaip->numsaps)*(start_m-1)] >= 0)
3446 found = TRUE;
3447 else
3448 j++;
3449 }
3450 amp->from_b = AlnMgrMapToBsqCoords(amaip->saps[j], amaip->starts[j+(amaip->numsaps)*(start_m-1)]+offset, NULL, NULL) + amaip->lens[start_m - 1];
3451 if (endoffset >=0)
3452 amp->to_b = amp->from_b + amaip->lens[start_m] - 1 - offset - endoffset;
3453 else
3454 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1;
3455 amp->gap = 0;
3456 amp->real_from += amp->to_b - amp->from_b + 1;
3457 } else if (j<amaip->numsaps)
3458 {
3459 amp->from_b = AlnMgrMapToBsqCoords(amaip->saps[j], amaip->starts[j+(amaip->numsaps)*start_m]+offset, NULL, NULL);
3460 if (amp->from_b >= 0)
3461 {
3462 if (endoffset >=0)
3463 amp->to_b = amp->from_b + amaip->lens[start_m] - 1 - offset - endoffset;
3464 else
3465 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1;
3466 amp->gap = 0;
3467 } else
3468 {
3469 amp->from_b = amp->real_from;
3470 amp->gap = 1;
3471 if (endoffset >= 0)
3472 amp->to_b = amp->from_b + amaip->lens[start_m] - 1 - offset - endoffset;
3473 else
3474 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1;
3475 }
3476 amp->real_from += amp->to_b - amp->from_b + 1;
3477 }
3478 } else
3479 {
3480 if (amp->prev != -2)
3481 {
3482 amp->prev += 1;
3483 start_m = amp->prev;
3484 } else
3485 {
3486 start_m = binary_search_on_uint4_list(amaip->aligncoords, amp->from_m, amaip->numseg);
3487 amp->real_from = amp->from_m;
3488 amp->prev = start_m;
3489 }
3490 if (amp->prev_sap == -2)
3491 amp->prev_sap=amaip->rowsource[amp->row_num-1]->which_saps[0];
3492 j = amp->prev_sap-1;
3493 amp->strand = AlnMgrGetNthStrand(amaip->saps[j], amaip->rowsource[amp->row_num-1]->num_in_sap[0]);
3494 offset = amp->real_from - amaip->aligncoords[start_m];
3495 endoffset = amaip->lens[start_m] - offset - (amp->to_m - amp->real_from + 1);
3496 if (endoffset <= 0 && (start_m + 1) < amaip->numseg)
3497 retval = TRUE;
3498 else
3499 {
3500 retval = TRUE;
3501 amp->place = 1;
3502 amp->prev = amp->prev_sap = -2;
3503 }
3504 if (amaip->starts[j+(amaip->numsaps)*start_m] < 0)
3505 amp->from_b = -1;
3506 else
3507 amp->from_b = AlnMgrMapRowCoords(amaip->saps[j], amaip->starts[j+(amaip->numsaps)*start_m]+offset, amaip->rowsource[amp->row_num-1]->num_in_sap[0], NULL);
3508 if (amp->from_b >= 0)
3509 {
3510 if (amp->strand != Seq_strand_minus)
3511 {
3512 if (endoffset >=0)
3513 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1 - endoffset;
3514 else
3515 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1;
3516 } else
3517 {
3518 amp->to_b = amp->from_b;
3519 if (endoffset >= 0)
3520 amp->from_b = amp->to_b - amaip->lens[start_m] + offset + 1 + endoffset;
3521 else
3522 amp->from_b = amp->to_b - amaip->lens[start_m] + offset + 1;
3523 }
3524 amp->gap = 0;
3525 } else
3526 {
3527 amp->from_b = amp->real_from;
3528 amp->gap = 1;
3529 if (endoffset >= 0)
3530 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1 - endoffset;
3531 else
3532 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1;
3533 }
3534 amp->real_from += amp->to_b - amp->from_b + 1;
3535 if (amp->real_from > amp->to_m)
3536 {
3537 retval = TRUE;
3538 amp->place = 1;
3539 amp->row_num = -1;
3540 amp->prev = -2;
3541 }
3542 }
3543 } else if (sap->type == SAT_MASTERSLAVE && amp->which_master)
3544 {
3545 } else if (sap->type == SAT_DIAGS && amp->which_master)
3546 {
3547 } else if (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_SEGMENTED_MASTERSLAVE && amp->which_master == NULL)
3548 {
3549 if (amp->place == 1)
3550 return retval;
3551 len = AlnMgrGetAlnLength(sap, FALSE);
3552 if (amp->to_m > len-1)
3553 return retval;
3554 if (amp->to_m < 0)
3555 amp->to_m = len-1;
3556 if (amp->row_num == -1)
3557 {
3558 if(!amp->which_bsq)
3559 return retval;
3560 else
3561 {
3562 amp->row_num = AlnMgrGetNForSip(sap,amp->which_bsq);
3563 if(amp->row_num == -1)
3564 return retval;
3565 }
3566 }
3567 if (amp->prev == -2)
3568 {
3569 start_m = binary_search_on_uint4_list(amaip->aligncoords, amp->from_m, amaip->numseg);
3570 amp->real_from = amp->from_m;
3571 amp->prev = start_m;
3572 } else
3573 start_m = amp->prev;
3574 offset = amp->real_from - amaip->aligncoords[start_m];
3575 if (offset < 0)
3576 offset = 0;
3577 if (amaip->rowsource[amp->row_num-1]->which_saps[start_m] == 0)
3578 {
3579 len = amaip->lens[start_m];
3580 amp->from_b = amaip->aligncoords[start_m]+offset;
3581 amp->gap = 2;
3582 amp->strand = Seq_strand_unknown;
3583 } else
3584 {
3585 len = 0;
3586 amp->strand = AlnMgrGetNthStrand(amaip->saps[amaip->rowsource[amp->row_num-1]->which_saps[start_m]-1], amaip->rowsource[amp->row_num-1]->num_in_sap[start_m]);
3587 amp->from_b = AlnMgrMapSegmentCoords(amaip->saps[amaip->rowsource[amp->row_num-1]->which_saps[start_m]-1], offset, amaip->rowsource[amp->row_num-1]->num_in_sap[start_m], NULL, &len);
3588 if (amp->from_b == -1)
3589 {
3590 amp->from_b = amaip->aligncoords[start_m]+offset;
3591 amp->gap = 1;
3592 } else
3593 amp->gap = 0;
3594 }
3595 endoffset = amp->to_m - (amaip->aligncoords[start_m] + len + offset -1);
3596 if (endoffset <= 0)
3597 {
3598 amp->send_space = 1;
3599 amp->place = 1;
3600 } else if (len >= amaip->lens[start_m] - offset)
3601 {
3602 amp->prev++;
3603 amp->send_space = 1;
3604 } else
3605 amp->send_space = 0;
3606 if (endoffset <= 0)
3607 amp->to_b = amp->from_b + len + endoffset -1;
3608 else
3609 {
3610 amp->to_b = amp->from_b + len - 1;
3611 amp->real_from = amp->real_from + amp->to_b - amp->from_b + 1;
3612 }
3613 if (amp->strand == Seq_strand_minus && amp->gap == 0)
3614 {
3615 offset = amp->to_b - amp->from_b;
3616 amp->to_b = amp->from_b;
3617 amp->from_b = amp->to_b - offset;
3618 }
3619 retval = TRUE;
3620 }
3621 }
3622 return retval;
3623 }
3624
binary_search_on_uint4_list(Uint4Ptr list,Uint4 pos,Uint4 listlen)3625 NLM_EXTERN Uint4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
3626 {
3627 Uint4 L;
3628 Uint4 mid;
3629 Uint4 R;
3630
3631 if (list == NULL || listlen == 0)
3632 return 0;
3633 L = 0;
3634 R = listlen - 1;
3635 while (L < R)
3636 {
3637 mid = (L+R)/2;
3638 if (list[mid + 1] <= pos)
3639 {
3640 L = mid + 1;
3641 } else
3642 {
3643 R = mid;
3644 }
3645 }
3646 return R;
3647 }
3648
binary_search_on_uint2_list(Uint2Ptr list,Uint2 ele,Uint2 listlen)3649 NLM_EXTERN Int4 binary_search_on_uint2_list(Uint2Ptr list, Uint2 ele, Uint2 listlen)
3650 {
3651 Uint2 L;
3652 Uint2 mid;
3653 Uint2 R;
3654
3655 if (list == NULL || listlen == 0)
3656 return -1;
3657 L = 0;
3658 R = listlen - 1;
3659 while (L < R)
3660 {
3661 mid = (L+R)/2;
3662 if (ele <= list[mid])
3663 {
3664 R = mid;
3665 } else
3666 {
3667 L = mid+1;
3668 }
3669 }
3670 if (ele == list[R])
3671 return list[R];
3672 else
3673 return -1;
3674 }
3675
binary_search_by_chunk(Int4Ptr list,Int4 ele,Int4 listlen,Int4 chunksize,Int4 offset)3676 NLM_EXTERN Int4 binary_search_by_chunk(Int4Ptr list, Int4 ele, Int4 listlen, Int4 chunksize, Int4 offset)
3677 {
3678 Int4 L;
3679 Int4 mid;
3680 Int4 R;
3681
3682 if (list == NULL || listlen == 0)
3683 return -1;
3684 L = 0;
3685 R = (listlen/chunksize) - 1;
3686 while (L < R)
3687 {
3688 mid = (L+R)/2;
3689 if (ele <= list[(mid)*chunksize + offset] && list[(mid)*chunksize + offset] >= 0)
3690 {
3691 R = mid;
3692 } else
3693 {
3694 L = mid + 1;
3695 }
3696 }
3697 return R;
3698 }
3699
binary_search_segment_array(SASeqDatPtr ssdp,Int4 pos,Int4 numseq,Int4 offset,DenseSegPtr dsp)3700 NLM_EXTERN Int4 binary_search_segment_array(SASeqDatPtr ssdp, Int4 pos, Int4 numseq, Int4 offset, DenseSegPtr dsp)
3701 {
3702 Int4 L;
3703 Int4 mid;
3704 Int4 R;
3705
3706 if (ssdp == NULL || numseq == 0)
3707 return -1;
3708 L = 0;
3709 R = ssdp->numsect - 1;
3710 while (L < R)
3711 {
3712 mid = (L+R)/2;
3713 if (pos <= (dsp->starts[(ssdp->sect[mid])*numseq + offset]))
3714 {
3715 R = mid;
3716 } else
3717 {
3718 L = mid+1;
3719 }
3720 }
3721 return (ssdp->sect[R]);
3722 }
3723
3724 /************************************************************************
3725 *
3726 * These are several utility functions which get needed data from the
3727 * indexes. "N" refers to row number.
3728 *
3729 ************************************************************************/
AlnMgrGetAlnLength(SeqAlignPtr sap,Boolean fill_in)3730 NLM_EXTERN Int4 AlnMgrGetAlnLength(SeqAlignPtr sap, Boolean fill_in)
3731 {
3732 AMAlignIndexPtr amaip;
3733 DenseSegPtr dsp;
3734 Int4 i;
3735 Int4 length;
3736 SAIndexPtr saip;
3737
3738 if (!sap)
3739 return 0;
3740 i = AlnMgrCheckAlignForParent(sap);
3741 if (i<0)
3742 {
3743 return 0;
3744 } else if (i == AM_CHILD)
3745 {
3746 dsp = (DenseSegPtr)sap->segs;
3747 if (!dsp)
3748 return 0;
3749 saip = (SAIndexPtr)sap->saip;
3750 return ((saip->aligncoords[dsp->numseg-1])+dsp->lens[dsp->numseg-1]);
3751 } else if (i == AM_PARENT)
3752 {
3753 amaip = (AMAlignIndexPtr)sap->saip;
3754 if (!amaip || amaip->mstype == AM_LITE)
3755 return 0;
3756 if (!amaip->saps)
3757 {
3758 if (!AlnMgrMakeFakeMultiple(sap))
3759 return 0;
3760 }
3761 if (fill_in)
3762 {
3763 if (sap->type == SAT_MASTERSLAVE && (amaip->mstype == AM_MASTERSLAVE || amaip->mstype == AM_NULL))
3764 return (amaip->lens[(amaip->numseg)-1] + amaip->aligncoords[amaip->numseg-1]);
3765 else if (sap->type == SAT_PARTIAL)
3766 {
3767 length = 0;
3768 for (i=0; i<(amaip->numsaps-1); i++)
3769 {
3770 length += AlnMgrGetMaxUnalignedLength(amaip->saps[i], amaip->saps[i+1]);
3771 }
3772 return (length + amaip->lens[(amaip->numseg)-1] + amaip->aligncoords[amaip->numseg-1]);
3773 }
3774 } else
3775 {
3776 if (amaip->mstype == AM_NULL)
3777 return 0;
3778 return (amaip->lens[(amaip->numseg)-1] + amaip->aligncoords[amaip->numseg-1]);
3779 }
3780 }
3781 return 0;
3782 }
3783
AlnMgrGetNumSeqs(SeqAlignPtr sap)3784 NLM_EXTERN Int4 AlnMgrGetNumSeqs(SeqAlignPtr sap)
3785 {
3786 AMAlignIndexPtr amaip;
3787 DenseSegPtr dsp;
3788 Int4 i;
3789
3790 if (!sap)
3791 return 0;
3792 i = AlnMgrCheckAlignForParent(sap);
3793 if (i<0)
3794 return 0;
3795 if (i == AM_CHILD)
3796 {
3797 dsp = (DenseSegPtr)sap->segs;
3798 if (!dsp)
3799 return 0;
3800 return (dsp->dim);
3801 } else if (i == AM_PARENT)
3802 {
3803 amaip = (AMAlignIndexPtr)(sap->saip);
3804 if (!amaip || amaip->mstype == AM_LITE)
3805 return 0;
3806 return (amaip->numbsqs);
3807 }
3808 return 0;
3809 }
3810
AlnMgrGetUniqueSeqs(SeqAlignPtr sap,Int4Ptr n)3811 NLM_EXTERN SeqIdPtr AlnMgrGetUniqueSeqs(SeqAlignPtr sap, Int4Ptr n)
3812 {
3813 AMAlignIndexPtr amaip;
3814 Int4 c;
3815 DenseSegPtr dsp;
3816 Boolean found;
3817 Int4 i;
3818 Int4 m;
3819 SeqIdPtr sip;
3820 SeqIdPtr sip_head;
3821 SeqIdPtr sip_prev;
3822 SeqIdPtr sip_tmp;
3823
3824 if (sap == NULL)
3825 return 0;
3826 i = AlnMgrCheckAlignForParent(sap);
3827 if (i<0)
3828 return 0;
3829 sip_head = sip_prev = NULL;
3830 if (i == AM_CHILD)
3831 {
3832 dsp = (DenseSegPtr)sap->segs;
3833 if (dsp == NULL)
3834 return 0;
3835 sip = dsp->ids;
3836 m = 0;
3837 while (sip)
3838 {
3839 sip_tmp = sip_head;
3840 found = FALSE;
3841 while (!found && sip_tmp != NULL)
3842 {
3843 if (SAM_OrderSeqID(sip, sip_tmp) == 0)
3844 found = TRUE;
3845 sip_tmp = sip_tmp->next;
3846 }
3847 if (!found)
3848 {
3849 m++;
3850 if (sip_head == NULL)
3851 {
3852 sip_head = sip_prev = SeqIdDup(sip);
3853 } else
3854 {
3855 sip_prev->next = SeqIdDup(sip);
3856 sip_prev = sip_prev->next;
3857 }
3858 }
3859 sip = sip->next;
3860 }
3861 if (n)
3862 *n = m;
3863 return sip_head;
3864 } else if (i == AM_PARENT)
3865 {
3866 amaip = (AMAlignIndexPtr)(sap->saip);
3867 if (amaip == NULL || amaip->mstype == AM_LITE)
3868 return 0;
3869 m = 0;
3870 if (amaip->alnsaps == 1)
3871 {
3872 return (AlnMgrGetUniqueSeqs((SeqAlignPtr)sap->segs, n));
3873 }
3874 for (c=0; c<amaip->numrows; c++)
3875 {
3876 sip = amaip->rowsource[c]->id;
3877 sip_tmp = sip_head;
3878 found = FALSE;
3879 while (!found && sip_tmp != NULL)
3880 {
3881 if (SAM_OrderSeqID(sip, sip_tmp) == 0)
3882 found = TRUE;
3883 sip_tmp = sip_tmp->next;
3884 }
3885 if (!found)
3886 {
3887 m++;
3888 if (sip_head == NULL)
3889 {
3890 sip_head = sip_prev = SeqIdDup(sip);
3891 } else
3892 {
3893 sip_prev->next = SeqIdDup(sip);
3894 sip_prev = sip_prev->next;
3895 }
3896 }
3897 }
3898 if (n)
3899 *n = m;
3900 return sip_head;
3901 }
3902 return NULL;
3903 }
3904
AlnMgrGetNthSeqIdPtr(SeqAlignPtr sap,Int4 n)3905 NLM_EXTERN SeqIdPtr AlnMgrGetNthSeqIdPtr(SeqAlignPtr sap, Int4 n)
3906 {
3907 AMAlignIndexPtr amaip;
3908 Int4 count;
3909 DenseSegPtr dsp;
3910 Int4 i;
3911 SeqIdPtr sip;
3912
3913 if (!sap)
3914 return NULL;
3915 i = AlnMgrCheckAlignForParent(sap);
3916 if (i<0)
3917 return NULL;
3918 else if (i == AM_CHILD)
3919 {
3920 dsp = (DenseSegPtr)sap->segs;
3921 if (!dsp)
3922 return NULL;
3923 sip = dsp->ids;
3924 count = 0;
3925 while (sip)
3926 {
3927 count++;
3928 if (count == n)
3929 return (SeqIdDup(sip));
3930 sip = sip->next;
3931 }
3932 } else if (i == AM_PARENT)
3933 {
3934 amaip = (AMAlignIndexPtr)(sap->saip);
3935 if (amaip->mstype == AM_LITE)
3936 return NULL;
3937 if (n <= amaip->numrows)
3938 {
3939 return (SeqIdDup(amaip->rowsource[n-1]->id));
3940 } else
3941 {
3942 if (AlnMgrIsSAPNULL(sap))
3943 {
3944 sip = amaip->ids;
3945 count = 1;
3946 while (sip != NULL && count < n)
3947 {
3948 sip = sip->next;
3949 count++;
3950 }
3951 return SeqIdDup(sip);
3952 } else
3953 return NULL;
3954 }
3955 }
3956 return NULL;
3957 }
3958
3959 /* (RANGE) */
AlnMgrGetNthSeqRangeInSA(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)3960 NLM_EXTERN void AlnMgrGetNthSeqRangeInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
3961 {
3962 AMAlignDatPtr amadp;
3963 AMAlignIndexPtr amaip;
3964 Uint2 beg;
3965 Int4 bsq;
3966 DenseSegPtr dsp;
3967 Uint2 end;
3968 Int4 i;
3969 Int4 j;
3970 RowSourcePtr rsp;
3971 SAIndexPtr saip;
3972 SeqIdPtr sip;
3973 Uint2 strand;
3974 Int4 tmp_beg;
3975 Int4 tmp_end;
3976 Int4 tmp_start;
3977 Int4 tmp_stop;
3978
3979 if (!sap)
3980 return;
3981 i = AlnMgrCheckAlignForParent(sap);
3982 if (i < 0)
3983 {
3984 return;
3985 } else if (i == AM_CHILD)
3986 {
3987 if (n<1)
3988 return;
3989 saip = (SAIndexPtr)(sap->saip);
3990 if(!saip)
3991 return;
3992 bsq = n-1;
3993 dsp = (DenseSegPtr)sap->segs;
3994 if (n > dsp->dim)
3995 return;
3996 if (!dsp)
3997 return;
3998 strand = dsp->strands[bsq];
3999 if (strand != Seq_strand_minus)
4000 {
4001 if (saip->ssdp[bsq]->numsect == 0)
4002 {
4003 if (start)
4004 *start = -1;
4005 if (stop)
4006 *stop = -1;
4007 return;
4008 }
4009 beg = saip->ssdp[bsq]->sect[0];
4010 if (start)
4011 *start = dsp->starts[beg*(dsp->dim)+bsq];
4012 end = saip->ssdp[bsq]->sect[(saip->ssdp[bsq]->numsect)-1];
4013 if (stop)
4014 *stop = (dsp->starts[end*(dsp->dim)+bsq] + dsp->lens[end] - 1);
4015 return;
4016 } else
4017 {
4018 if (saip->ssdp[bsq]->numsect == 0)
4019 {
4020 if (start)
4021 *start = -1;
4022 if (stop)
4023 *stop = -1;
4024 return;
4025 }
4026 beg = saip->ssdp[bsq]->sect[(saip->ssdp[bsq]->numsect)-1];
4027 if (start)
4028 *start = dsp->starts[beg*(dsp->dim)+bsq];
4029 end = saip->ssdp[bsq]->sect[0];
4030 if (stop)
4031 *stop = (dsp->starts[end*(dsp->dim)+bsq] + dsp->lens[end] - 1);
4032 return;
4033 }
4034 } else if (i == AM_PARENT)
4035 {
4036 if (n<1)
4037 return;
4038 bsq = n-1;
4039 amaip = (AMAlignIndexPtr)(sap->saip);
4040 if (amaip->mstype == AM_LITE)
4041 return;
4042 if (amaip->numseg == 0)
4043 {
4044 if (start)
4045 *start = -1;
4046 if (stop)
4047 *stop = -1;
4048 return;
4049 }
4050 if (amaip->rowsource == NULL)
4051 {
4052 amadp = amaip->amadp[bsq];
4053 sip = amaip->ids;
4054 for (j = 0; j<bsq; j++)
4055 {
4056 sip = sip->next;
4057 if (sip == NULL)
4058 return;
4059 }
4060 for (j = 0; j<(amadp->numsaps); j++)
4061 {
4062 tmp_start = tmp_stop = 0;
4063 AlnMgrGetNthSeqRangeInSA(amadp->saps[j], AlnMgrGetNForSip(amadp->saps[j], sip), &tmp_start, &tmp_stop);
4064 if (j == 0)
4065 {
4066 tmp_beg = tmp_start;
4067 tmp_end = tmp_stop;
4068 } else
4069 {
4070 if (tmp_start < tmp_beg)
4071 tmp_beg = tmp_start;
4072 if (tmp_stop > tmp_end)
4073 tmp_end = tmp_stop;
4074 }
4075 }
4076 if (start)
4077 *start = tmp_beg;
4078 if (stop)
4079 *stop = tmp_end;
4080 return;
4081 } else
4082 {
4083 sip = amaip->ids;
4084 if (n > amaip->numrows)
4085 return;
4086 rsp = (RowSourcePtr)amaip->rowsource[n-1];
4087 for (j=0; j<(rsp->numsaps); j++)
4088 {
4089 tmp_start = tmp_stop = 0;
4090 if(rsp->which_saps[j] == 0)
4091 {
4092 if (start)
4093 *start = 0;
4094 if (stop)
4095 *stop = 0;
4096 return;
4097 }
4098 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[j]-1], rsp->num_in_sap[j], &tmp_start, &tmp_stop);
4099
4100 if (j==0)
4101 {
4102 tmp_beg = tmp_start;
4103 tmp_end = tmp_stop;
4104 } else
4105 {
4106 if (tmp_start < tmp_beg)
4107 tmp_beg = tmp_start;
4108 if (tmp_stop > tmp_end)
4109 tmp_end = tmp_stop;
4110 }
4111 }
4112 if (start)
4113 *start = tmp_beg;
4114 if (stop)
4115 *stop = tmp_end;
4116 return;
4117 }
4118 }
4119 return;
4120 }
4121
AlnMgrGetNumSegments(SeqAlignPtr sap)4122 NLM_EXTERN Int4 AlnMgrGetNumSegments(SeqAlignPtr sap)
4123 {
4124 AMAlignIndexPtr amaip;
4125 DenseSegPtr dsp;
4126 Int4 i;
4127
4128 if (sap == NULL)
4129 return -1;
4130 i = AlnMgrCheckAlignForParent(sap);
4131 if (i == AM_CHILD)
4132 {
4133 dsp = (DenseSegPtr)(sap->segs);
4134 return (dsp->numseg);
4135 } else if (i == AM_PARENT)
4136 {
4137 amaip = (AMAlignIndexPtr)(sap->saip);
4138 return (amaip->numseg);
4139 } else
4140 return -1;
4141 }
4142
4143 /***************************************************************************
4144 *
4145 * AlnMgrGetNumAlnBlocks returns the number of separate aligned regions
4146 * in the seqalign. A return value of -1 indicates an error; a return
4147 * value of 0 indicates a NULL alignment (only one unaligned region and
4148 * no aligned regions.
4149 *
4150 ***************************************************************************/
AlnMgrGetNumAlnBlocks(SeqAlignPtr sap)4151 NLM_EXTERN Int4 AlnMgrGetNumAlnBlocks(SeqAlignPtr sap)
4152 {
4153 AMAlignIndexPtr amaip;
4154
4155 if (sap == NULL || sap->saip == NULL)
4156 return -1;
4157 if (sap->saip->indextype == INDEX_SEGS)
4158 return 1;
4159 else if (sap->saip->indextype == INDEX_PARENT)
4160 {
4161 amaip = (AMAlignIndexPtr)(sap->saip);
4162 if (amaip->mstype == AM_LITE || amaip->mstype == AM_NEATINDEX)
4163 return -1;
4164 else if (amaip->mstype == AM_NULL)
4165 return 0;
4166 else if (amaip->mstype == AM_MASTERSLAVE)
4167 return 1;
4168 else if (amaip->mstype == AM_SEGMENTED_MASTERSLAVE)
4169 {
4170 if (amaip->rowsource == NULL || amaip->master < 1 || amaip->master > amaip->numrows)
4171 return -1;
4172 return (amaip->rowsource[amaip->master-1]->numsaps);
4173 } else if (sap->type == SAT_PARTIAL)
4174 return amaip->alnsaps;
4175 } else
4176 return -1;
4177 return -1;
4178 }
4179
AlnMgrGetNthBlockRange(SeqAlignPtr sap,Int4 n,Int4Ptr from,Int4Ptr to)4180 NLM_EXTERN Boolean AlnMgrGetNthBlockRange(SeqAlignPtr sap, Int4 n, Int4Ptr from, Int4Ptr to)
4181 {
4182 AMAlignIndexPtr amaip;
4183 Int4 i;
4184 Int4 len;
4185
4186 if (sap == NULL || sap->saip == NULL)
4187 return FALSE;
4188 if (sap->saip->indextype == INDEX_SEGS)
4189 {
4190 len = AlnMgrGetAlnLength(sap, FALSE);
4191 if (from)
4192 *from = 0;
4193 if (to)
4194 *to = len - 1;
4195 return TRUE;
4196 } else if (sap->saip->indextype == INDEX_PARENT)
4197 {
4198 i = AlnMgrGetNumAlnBlocks(sap);
4199 if (n > i || i < 1)
4200 return FALSE;
4201 amaip = (AMAlignIndexPtr)(sap->saip);
4202 if (i == 1)
4203 {
4204 len = AlnMgrGetAlnLength(sap, FALSE);
4205 if (from)
4206 *from = 0;
4207 if (to)
4208 *to = len - 1;
4209 return TRUE;
4210 } else
4211 return (AlnMgrGetNthSegmentRange(sap, n, from, to));
4212 } else
4213 return FALSE;
4214 }
4215
4216
4217 /***************************************************************************
4218 *
4219 * AlnMgrGetNthAlignedSegInNthRow is similar to AlnMgrGetNextAlignBit,
4220 * but it takes an extra argument -- the number (1-based) of the segment
4221 * for which you want the alignment. Fill in the AlnMsg structure as for
4222 * AlnMgrGetNextAlignBit, but leave out the from_m and to_m; and as usual,
4223 * don't modify the AlnMsg structure in between calls, and call AlnMsgReNew
4224 * for a new segment. The from_b and to_b fields will work as in
4225 * AlnMgrGetNextAlnBit -- if there is a gap, these are alignment coordinates;
4226 * otherwise they're sequence coordinates.
4227 *
4228 ***************************************************************************/
AlnMgrGetNthAlignedSegInNthRow(SeqAlignPtr sap,AlnMsgPtr amp,Int4 segnum)4229 NLM_EXTERN Boolean AlnMgrGetNthAlignedSegInNthRow(SeqAlignPtr sap, AlnMsgPtr amp, Int4 segnum)
4230 {
4231 Int4 n;
4232 Boolean retval;
4233
4234 if (sap == NULL || amp == NULL)
4235 return FALSE;
4236 if (amp->flag == FALSE)
4237 {
4238 n = AlnMgrGetNumSegments(sap);
4239 if (segnum > n)
4240 return FALSE;
4241 if (!AlnMgrGetNthSegmentRange(sap, segnum, &->from_m, &->to_m))
4242 return FALSE;
4243 amp->flag = TRUE;
4244 }
4245 retval = AlnMgrGetNextAlnBit(sap, amp);
4246 if (retval == FALSE)
4247 amp->flag = FALSE;
4248 return retval;
4249 }
4250
4251
AlnMgrGetNthSegmentRange(SeqAlignPtr sap,Int4 n,Int4Ptr from,Int4Ptr to)4252 NLM_EXTERN Boolean AlnMgrGetNthSegmentRange(SeqAlignPtr sap, Int4 n, Int4Ptr from, Int4Ptr to)
4253 {
4254 AMAlignIndexPtr amaip;
4255 Int4 i;
4256 Boolean last;
4257 Int4 num;
4258 SAIndexPtr saip;
4259
4260 if (sap == NULL)
4261 return FALSE;
4262 i = AlnMgrCheckAlignForParent(sap);
4263 num = AlnMgrGetNumSegments(sap);
4264 if (n > num)
4265 return FALSE;
4266 last = FALSE;
4267 if (n == num)
4268 last = TRUE;
4269 if (i == AM_CHILD)
4270 {
4271 saip = (SAIndexPtr)(sap->saip);
4272 if (!last)
4273 {
4274 if (from)
4275 *from = saip->aligncoords[n-1];
4276 if (to)
4277 *to = saip->aligncoords[n] - 1;
4278 return TRUE;
4279 } else
4280 {
4281 if (from)
4282 *from = saip->aligncoords[n-1];
4283 if (to)
4284 *to = AlnMgrGetAlnLength(sap, FALSE);
4285 return TRUE;
4286 }
4287 } else if (i == AM_PARENT)
4288 {
4289 amaip = (AMAlignIndexPtr)(sap->saip);
4290 if (!last)
4291 {
4292 if (from)
4293 *from = amaip->aligncoords[n-1];
4294 if (to)
4295 *to = amaip->aligncoords[n] - 1;
4296 return TRUE;
4297 } else
4298 {
4299 if (from)
4300 *from = amaip->aligncoords[n-1];
4301 if (to)
4302 *to = amaip->aligncoords[n-1] + amaip->lens[n-1] - 1;
4303 return TRUE;
4304 }
4305 } else
4306 return FALSE;
4307 }
4308
4309
4310 /********************************************************************************
4311 *
4312 * AlnMgrGetNextNthSeqRange is called recursively to return the lengths of
4313 * all aligned and all internal unaligned regions of any row in a seqalign.
4314 * If there is an error, or if the function is called past the last block,
4315 * the function returns FALSE. Set where to point to an allocated integer
4316 * equal to 0 on the first call and don't change it during the loop. Set
4317 * the boolean unaligned to FALSE to get only the aligned regions, and TRUE to
4318 * get the aligned regions plus all internal unaligned regions. For unaligned
4319 * regions, *is_aligned will be FALSE.
4320 *
4321 ********************************************************************************/
AlnMgrGetNextNthSeqRange(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop,Int4Ptr where,BoolPtr is_aligned,Boolean unaligned)4322 NLM_EXTERN Boolean AlnMgrGetNextNthSeqRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop, Int4Ptr where, BoolPtr is_aligned, Boolean unaligned)
4323 {
4324 if (sap == NULL || n <= 0)
4325 return FALSE;
4326 if (sap->saip == NULL)
4327 return FALSE;
4328 if (sap->saip->indextype == INDEX_PARENT && sap->type == SAT_PARTIAL)
4329 {
4330 return (am_get_nth_range_for_partial(sap, n, start, stop, where, is_aligned, unaligned));
4331 } else
4332 {
4333 if (*where == 0)
4334 {
4335 AlnMgrGetNthSeqRangeInSA(sap, n, start, stop);
4336 *where = 1;
4337 return TRUE;
4338 } else
4339 return FALSE;
4340 }
4341 }
4342
am_get_nth_range_for_partial(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop,Int4Ptr where,BoolPtr is_aligned,Boolean unaligned)4343 static Boolean am_get_nth_range_for_partial(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop, Int4Ptr where, BoolPtr is_aligned, Boolean unaligned)
4344 {
4345 AMAlignIndexPtr amaip;
4346 RowSourcePtr rsp;
4347 Uint2 strand;
4348 Int4 tmp_start;
4349 Int4 tmp_stop;
4350 Int4 tmp_where;
4351
4352 amaip = (AMAlignIndexPtr)sap->saip;
4353 if (amaip->mstype == AM_LITE)
4354 return FALSE;
4355 rsp = amaip->rowsource[n-1];
4356 tmp_where = *where;
4357 if (tmp_where >= 0)
4358 {
4359 if (tmp_where >= rsp->numsaps)
4360 return FALSE;
4361 if (is_aligned)
4362 *is_aligned = TRUE;
4363 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[tmp_where]-1], rsp->num_in_sap[tmp_where], start, stop);
4364 if (unaligned && (sap->type == SAT_PARTIAL || (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_SEGMENTED_MASTERSLAVE)))
4365 tmp_where = -(tmp_where+1);
4366 else
4367 tmp_where += 1;
4368 } else if (tmp_where < 0 && unaligned == TRUE)
4369 {
4370 if (-tmp_where >= rsp->numsaps)
4371 return FALSE;
4372 if (is_aligned)
4373 *is_aligned = FALSE;
4374 strand = AlnMgrGetNthStrand(amaip->saps[rsp->which_saps[(-tmp_where)]-1], n);
4375 tmp_start = tmp_stop = 0;
4376 if (start)
4377 {
4378 if (strand == Seq_strand_minus)
4379 {
4380 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[(-tmp_where)]-1], rsp->num_in_sap[(-tmp_where)], &tmp_start, NULL);
4381 *start = tmp_start + 1;
4382 } else
4383 {
4384 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[(-tmp_where)-1]-1], rsp->num_in_sap[(-tmp_where)-1], NULL, &tmp_start);
4385 *start = tmp_start + 1;
4386 }
4387 }
4388 if (stop)
4389 {
4390 if (strand == Seq_strand_minus)
4391 {
4392 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[(-tmp_where)-1]-1], rsp->num_in_sap[(-tmp_where)-1], NULL, &tmp_stop);
4393 *stop = tmp_stop - 1;
4394 } else
4395 {
4396 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[-tmp_where]-1], rsp->num_in_sap[-tmp_where], &tmp_stop, NULL);
4397 *stop = tmp_stop - 1;
4398 }
4399 }
4400 if (tmp_start + 1 > tmp_stop - 1)
4401 {
4402 if (start)
4403 *start = -1;
4404 if (stop)
4405 *stop = -1;
4406 }
4407 tmp_where = -tmp_where;
4408 }
4409 *where = tmp_where;
4410 return TRUE;
4411 }
4412
4413 /********************************************************************************
4414 *
4415 * AlnMgrGetNthRowTail retrieves the blocks of sequence on either end of the
4416 * alignment, by row. which_tail is LEFT_TAIL to retrieve the ends which come
4417 * before alignment coordinate 0, and RIGHT_TAIL to retrieve the other ends.
4418 * The function returns TRUE if successful, FALSE for an error.
4419 *
4420 ********************************************************************************/
AlnMgrGetNthRowTail(SeqAlignPtr sap,Int4 n,Uint1 which_tail,Int4Ptr start,Int4Ptr stop,Uint1Ptr strand)4421 NLM_EXTERN Boolean AlnMgrGetNthRowTail(SeqAlignPtr sap, Int4 n, Uint1 which_tail, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
4422 {
4423 BioseqPtr bsp;
4424 SeqIdPtr sip = NULL;
4425 Int4 tmp_start;
4426 Int4 tmp_stop;
4427 Uint1 tmp_strand;
4428
4429 if (sap == NULL || n < 1 || sap->segs == NULL)
4430 return FALSE;
4431 tmp_start = tmp_stop = -1;
4432 AlnMgrGetNthSeqRangeInSA(sap, n, &tmp_start, &tmp_stop);
4433 if (tmp_start == -1 || tmp_stop == -1)
4434 return FALSE;
4435 tmp_strand = AlnMgrGetNthStrand(sap, n);
4436 if (which_tail == LEFT_TAIL)
4437 {
4438 if (tmp_strand == Seq_strand_minus)
4439 {
4440 sip = AlnMgrGetNthSeqIdPtr(sap, n);
4441 bsp = BioseqLockById(sip);
4442 if (tmp_stop == bsp->length-1 || stop == NULL)
4443 {
4444 if (start)
4445 *start = -1;
4446 if (stop)
4447 *stop = -1;
4448 } else
4449 {
4450 if (bsp == NULL)
4451 return FALSE;
4452 if (start)
4453 *start = tmp_stop-1;
4454 if (stop)
4455 *stop = bsp->length-1;
4456 }
4457 BioseqUnlock(bsp);
4458 if (strand)
4459 *strand = tmp_strand;
4460 } else
4461 {
4462 if (tmp_start >= 1)
4463 {
4464 if (start)
4465 *start = 0;
4466 if (stop)
4467 *stop = tmp_start - 1;
4468 } else
4469 {
4470 if (start)
4471 *start = -1;
4472 if (stop)
4473 *stop = -1;
4474 }
4475 if (strand)
4476 *strand = tmp_strand;
4477 }
4478 } else if (which_tail == RIGHT_TAIL)
4479 {
4480 if (tmp_strand == Seq_strand_minus)
4481 {
4482 if (tmp_start >= 1)
4483 {
4484 if (start)
4485 *start = 0;
4486 if (stop)
4487 *stop = tmp_start - 1;
4488 } else
4489 {
4490 if (start)
4491 *start = -1;
4492 if (stop)
4493 *stop = -1;
4494 }
4495 if (strand)
4496 *strand = tmp_strand;
4497 } else
4498 {
4499 sip = AlnMgrGetNthSeqIdPtr(sap, n);
4500 bsp = BioseqLockById(sip);
4501 if (bsp == NULL)
4502 return FALSE;
4503 if (bsp->length-1 == tmp_stop)
4504 {
4505 if (start)
4506 *start = -1;
4507 if (stop)
4508 *stop = -1;
4509 } else
4510 {
4511 if (start)
4512 *start = tmp_stop + 1;
4513 if (stop)
4514 *stop = bsp->length-1;
4515 }
4516 if (strand)
4517 *strand = tmp_strand;
4518 BioseqUnlock(bsp);
4519 }
4520 } else
4521 return FALSE;
4522 SeqIdFree(sip);
4523 return TRUE;
4524 }
4525
AlnMgrGetMaxTailLength(SeqAlignPtr sap,Uint1 which_tail)4526 NLM_EXTERN Int4 AlnMgrGetMaxTailLength (SeqAlignPtr sap, Uint1 which_tail)
4527 {
4528 Int4 i;
4529 Int4 max;
4530 Int4 n;
4531 Int4 start;
4532 Int4 stop;
4533
4534 if (sap == NULL)
4535 return -1;
4536 n = AlnMgrGetNumRows(sap);
4537 max = -1;
4538 for (i=0; i<n; i++)
4539 {
4540 if (!AlnMgrGetNthRowTail (sap, i+1, which_tail, &start, &stop, NULL))
4541 return -1;
4542 if (stop - start + 1 > max)
4543 max = stop - start + 1;
4544 }
4545 return max;
4546 }
4547
AlnMgrGetNthUnalignedForNthRow(SeqAlignPtr sap,Int4 unaligned,Int4 row,Int4Ptr start,Int4Ptr stop)4548 NLM_EXTERN Boolean AlnMgrGetNthUnalignedForNthRow(SeqAlignPtr sap, Int4 unaligned, Int4 row, Int4Ptr start, Int4Ptr stop)
4549 {
4550 AMAlignIndexPtr amaip;
4551 Int4 beg;
4552 BioseqPtr bsp;
4553 Int4 end;
4554 Boolean go = TRUE;
4555 Int4 i;
4556 Int4 j;
4557 RowSourcePtr rsp;
4558 SeqIdPtr sip;
4559 Uint2 strand;
4560 Int4 tmp_start;
4561 Int4 tmp_stop;
4562
4563 i = AlnMgrCheckAlignForParent(sap);
4564 if (i == AM_PARENT)
4565 {
4566 amaip = (AMAlignIndexPtr)(sap->saip);
4567 if (amaip->numseg > 0 && row > amaip->numrows)
4568 return FALSE;
4569 if (sap->type == SAT_PARTIAL || (sap->type == SAT_MASTERSLAVE && (amaip->mstype == AM_SEGMENTED_MASTERSLAVE || amaip->mstype == AM_NULL)))
4570 {
4571 if (amaip->numseg == 0)
4572 {
4573 if (row > amaip->numbsqs)
4574 return FALSE;
4575 sip = amaip->ids;
4576 for (j=1; j<row; j++)
4577 {
4578 sip = sip->next;
4579 }
4580 bsp = BioseqLockById(sip);
4581 if (bsp == NULL)
4582 return FALSE;
4583 if (start != NULL)
4584 *start = 0;
4585 if (stop != NULL)
4586 *stop = bsp->length-1;
4587 BioseqUnlock(bsp);
4588 return TRUE;
4589 } else if (unaligned > amaip->numseg - 1)
4590 return FALSE;
4591 tmp_start = tmp_stop = 0;
4592 rsp = amaip->rowsource[row-1];
4593 if (rsp->which_saps[unaligned-1] == 0 || rsp->which_saps[unaligned] == 0)
4594 {
4595 beg = unaligned-1;
4596 while (rsp->which_saps[beg] == 0 && beg >= 0)
4597 {
4598 beg--;
4599 }
4600 if (beg<0)
4601 go = FALSE;
4602 end = unaligned;
4603 if (rsp->which_saps[end] == 0)
4604 go = FALSE;
4605 } else
4606 {
4607 beg = unaligned-1;
4608 end = unaligned;
4609 }
4610 if (go)
4611 strand = AlnMgrGetNthStrand(amaip->saps[rsp->which_saps[end]-1], row);
4612 if (start && go)
4613 {
4614 if (strand == Seq_strand_minus)
4615 {
4616 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[end]-1], rsp->num_in_sap[end], NULL, &tmp_start);
4617 *start = tmp_start + 1;
4618 } else
4619 {
4620 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[beg]-1], rsp->num_in_sap[beg], NULL, &tmp_start);
4621 *start = tmp_start + 1;
4622 }
4623 }
4624 if (stop && go)
4625 {
4626 if (strand == Seq_strand_minus)
4627 {
4628 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[beg]-1], rsp->num_in_sap[beg], &tmp_stop, NULL);
4629 *stop = tmp_stop - 1;
4630 } else
4631 {
4632 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[end]-1], rsp->num_in_sap[end], &tmp_stop, NULL);
4633 *stop = tmp_stop - 1;
4634 }
4635 }
4636 if (tmp_start + 1 > tmp_stop - 1)
4637 {
4638 if (start)
4639 *start = -1;
4640 if (stop)
4641 *stop = -1;
4642 }
4643 return TRUE;
4644 } else
4645 return FALSE;
4646 } else
4647 return FALSE;
4648 }
4649
4650
AlnMgrGetStrand(SeqAlignPtr sap,SeqIdPtr sip)4651 NLM_EXTERN Uint1 AlnMgrGetStrand(SeqAlignPtr sap, SeqIdPtr sip)
4652 {
4653 Int4 i;
4654
4655 i = AlnMgrGetNForSip(sap, sip);
4656 return (AlnMgrGetNthStrand(sap, i));
4657 }
4658
AlnMgrGetNthStrand(SeqAlignPtr sap,Int4 n)4659 NLM_EXTERN Uint1 AlnMgrGetNthStrand(SeqAlignPtr sap, Int4 n)
4660 {
4661 AMAlignIndexPtr amaip;
4662 Int4 c;
4663 DenseSegPtr dsp;
4664 Int4 m;
4665 SeqAlignPtr salp;
4666
4667 if (!sap || n < 1 || sap->segs == NULL)
4668 return 0;
4669 if (sap->segtype != SAS_DENSEG)
4670 {
4671 if (sap->saip == NULL)
4672 return 0;
4673 amaip = (AMAlignIndexPtr)sap->saip;
4674 if (n > amaip->numrows)
4675 return 0;
4676 c = 0;
4677 while (amaip->rowsource[n-1]->which_saps[c] == 0)
4678 {
4679 c++;
4680 if (c >= amaip->alnsaps)
4681 return (Seq_strand_unknown);
4682 }
4683 salp = amaip->saps[amaip->rowsource[n-1]->which_saps[c]-1];
4684 dsp = (DenseSegPtr)salp->segs;
4685 m = amaip->rowsource[n-1]->num_in_sap[0];
4686 if (m > dsp->dim)
4687 return 0;
4688 return (dsp->strands[m-1]);
4689 } else
4690 {
4691 dsp = (DenseSegPtr)sap->segs;
4692 if (!dsp)
4693 return 0;
4694 if (n==0)
4695 return 0;
4696 if (dsp->strands)
4697 return (dsp->strands[n-1]);
4698 else
4699 return (Seq_strand_plus);
4700 }
4701 }
4702
AlnMgrGetNForSip(SeqAlignPtr sap,SeqIdPtr sip)4703 NLM_EXTERN Int4 AlnMgrGetNForSip(SeqAlignPtr sap, SeqIdPtr sip)
4704 {
4705 AMAlignIndexPtr amaip;
4706 DenseSegPtr dsp;
4707 Int4 i;
4708 Int4 n;
4709 SeqIdPtr sip_tmp;
4710
4711 i = AlnMgrCheckAlignForParent(sap);
4712 if (i<0)
4713 return -1;
4714 if (i == AM_PARENT)
4715 {
4716 amaip = (AMAlignIndexPtr)(sap->saip);
4717 sip_tmp = amaip->ids;
4718 n = 0;
4719 while (sip_tmp)
4720 {
4721 n++;
4722 if (SeqIdComp(sip_tmp, sip) == SIC_YES)
4723 return n;
4724 sip_tmp = sip_tmp->next;
4725 }
4726 } else if (i == AM_CHILD)
4727 {
4728 dsp = (DenseSegPtr)(sap->segs);
4729 sip_tmp = dsp->ids;
4730 n = 0;
4731 while (sip_tmp)
4732 {
4733 n++;
4734 if (SeqIdComp(sip_tmp, sip) == SIC_YES)
4735 return n;
4736 sip_tmp = sip_tmp->next;
4737 }
4738 }
4739 return -1;
4740 }
4741
AlnMgrGetNForSap(AMAlignIndexPtr amaip,SeqAlignPtr sap)4742 NLM_EXTERN Int4 AlnMgrGetNForSap(AMAlignIndexPtr amaip, SeqAlignPtr sap)
4743 {
4744 Int4 i;
4745
4746 if (sap == NULL || amaip == NULL)
4747 return -1;
4748 if (sap->saip->indextype != INDEX_SEGS)
4749 return -1;
4750 i = 0;
4751 while (i<amaip->alnsaps)
4752 {
4753 if (amaip->saps[i] == sap)
4754 return (i+1);
4755 i++;
4756 }
4757 return -1;
4758 }
4759
4760
4761 /********************************************************************************
4762 *
4763 * AlnMgrGetAllNForSip is called in a while loop to return all the rows that a
4764 * seqid appears in in a given seqalign. Use n = 0 to start, and then on
4765 * return, if the return is TRUE, n will be the row number of the next row
4766 * that the seqid appears in. If the return is FALSE, either there was an
4767 * error or there are no (more) rows containing that seqid.
4768 *
4769 ********************************************************************************/
AlnMgrGetAllNForSip(SeqAlignPtr sap,SeqIdPtr sip,Int4Ptr n)4770 NLM_EXTERN Boolean AlnMgrGetAllNForSip(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr n)
4771 {
4772 AMAlignIndexPtr amaip;
4773 DenseSegPtr dsp;
4774 Int4 i;
4775 SeqIdPtr sip_tmp;
4776
4777 if (sap == NULL || sip == NULL || n == NULL)
4778 return FALSE;
4779 if (sap->saip == NULL)
4780 return FALSE;
4781 if (sap->saip->indextype == INDEX_SEGS)
4782 {
4783 i = 1;
4784 dsp = (DenseSegPtr)sap->segs;
4785 sip_tmp = dsp->ids;
4786 while (i <= *n)
4787 {
4788 sip_tmp = sip_tmp->next;
4789 i++;
4790 }
4791 while (sip_tmp)
4792 {
4793 if (SeqIdComp(sip_tmp, sip) == SIC_YES)
4794 {
4795 *n = i;
4796 return TRUE;
4797 }
4798 i++;
4799 sip_tmp = sip_tmp->next;
4800 }
4801 } else if (sap->saip->indextype == INDEX_PARENT)
4802 {
4803 amaip = (AMAlignIndexPtr)sap->saip;
4804 if (amaip->mstype == AM_LITE)
4805 return FALSE;
4806 i = *n + 1;
4807 while (i <= amaip->numrows)
4808 {
4809 if (SeqIdComp(amaip->rowsource[i-1]->id, sip) == SIC_YES)
4810 {
4811 *n = i;
4812 return TRUE;
4813 }
4814 i++;
4815 }
4816 }
4817 return FALSE;
4818 }
4819
AlnMgrGetSapForSip(AMAlignIndexPtr amaip,SeqIdPtr sip,Int4 which)4820 NLM_EXTERN Int4 AlnMgrGetSapForSip(AMAlignIndexPtr amaip, SeqIdPtr sip, Int4 which)
4821 {
4822 Int4 i;
4823 Int4 j;
4824 Int4 n;
4825
4826 i = 0;
4827 for (n=0; n<(amaip->numsaps); n++)
4828 {
4829 j = AlnMgrGetNForSip(amaip->saps[n], sip);
4830 if (j != -1)
4831 {
4832 if (i==which)
4833 return n;
4834 else
4835 i++;
4836 }
4837 }
4838 return -1;
4839 }
4840
4841 /********************************************************************************
4842 *
4843 * AlnMgrMapToBsqCoords returns the bioseq coordinate for an alignment
4844 * position. If master is NULL, the alignment position is taken to be from
4845 * a flattened alignment; otherwise, the function returns the corresponding
4846 * position in the given master.
4847 *
4848 ********************************************************************************/
4849
AlnMgrMapToBsqCoords(SeqAlignPtr sap,Uint4 pos,SeqIdPtr sip,SeqIdPtr master)4850 NLM_EXTERN Int4 AlnMgrMapToBsqCoords(SeqAlignPtr sap, Uint4 pos, SeqIdPtr sip, SeqIdPtr master)
4851 {
4852 DenseSegPtr dsp;
4853 Int4 n;
4854 Int4 offset;
4855 SAIndexPtr saip;
4856 Int4 start;
4857
4858 if (!sap)
4859 return -1;
4860 if (sap->segtype == SAS_DENSEG)
4861 {
4862 saip = (SAIndexPtr)(sap->saip);
4863 dsp = (DenseSegPtr)(sap->segs);
4864 if (sip == NULL)
4865 n = saip->master;
4866 else
4867 n = AlnMgrGetNForSip(sap, sip);
4868 if (!master)
4869 {
4870 start = binary_search_on_uint4_list(saip->aligncoords, pos, dsp->numseg);
4871 offset = pos - saip->aligncoords[start];
4872 if (dsp->starts[(dsp->dim*start) + n - 1] == -1)
4873 return -1;
4874 else
4875 if (dsp->strands[(dsp->dim*start) + n - 1] != Seq_strand_minus)
4876 return (dsp->starts[(dsp->dim*start) + n - 1] + offset);
4877 else
4878 return (dsp->starts[(dsp->dim*start) + n - 1] + dsp->lens[start] - 1 - offset);
4879 } else
4880 {
4881 }
4882 } else if (sap->segtype == SAS_DISC)
4883 {
4884 SeqAlignPtr salp;
4885 salp = (SeqAlignPtr)sap->segs;
4886 if(salp->next==NULL)
4887 return AlnMgrMapToBsqCoords(salp, pos, sip, master);
4888 }
4889 return -1;
4890 }
4891
AlnMgrMapSegmentCoords(SeqAlignPtr sap,Uint4 pos,Int4 row,SeqIdPtr master,Int4Ptr len)4892 static Int4 AlnMgrMapSegmentCoords(SeqAlignPtr sap, Uint4 pos, Int4 row, SeqIdPtr master, Int4Ptr len)
4893 {
4894 DenseSegPtr dsp;
4895 Int4 offset;
4896 SAIndexPtr saip;
4897 Int4 start;
4898
4899 if (sap == NULL || row < 0 || len == NULL)
4900 return -1;
4901 if (sap->saip == NULL)
4902 return -1;
4903 if (sap->saip->indextype == INDEX_SEGS)
4904 {
4905 saip = (SAIndexPtr)sap->saip;
4906 dsp = (DenseSegPtr)sap->segs;
4907 if (master == NULL)
4908 {
4909 start = binary_search_on_uint4_list(saip->aligncoords, pos, dsp->numseg);
4910 offset = pos - saip->aligncoords[start];
4911 *len = dsp->lens[start]-offset;
4912 if (dsp->starts[(dsp->dim*start) + row - 1] == -1)
4913 return -1;
4914 else
4915 if (dsp->strands[(dsp->dim*start) + row - 1] != Seq_strand_minus)
4916 return (dsp->starts[(dsp->dim*start) + row - 1] + offset);
4917 else
4918 return (dsp->starts[(dsp->dim*start) + row - 1] + dsp->lens[start] - 1 - offset);
4919 } else
4920 {
4921 }
4922 }
4923 return -1;
4924 }
4925
4926
4927 /********************************************************************************
4928 *
4929 * AlnMgrMapRowCoords maps a position in a given row to the bioseq coordinate
4930 * of that row. If master is NULL, the alignment is taken to be flattened;
4931 * otherwise it is an alignment according to that master (this will change the
4932 * correspondence between row coordinates and bioseq coordinates). The return
4933 * value will be either a positive bioseq coordinate, or -1 if the bioseq is
4934 * gapped at that row position.
4935 *
4936 ********************************************************************************/
AlnMgrMapRowCoords(SeqAlignPtr sap,Uint4 pos,Int4 row,SeqIdPtr master)4937 NLM_EXTERN Int4 AlnMgrMapRowCoords(SeqAlignPtr sap, Uint4 pos, Int4 row, SeqIdPtr master)
4938 {
4939 AMAlignIndexPtr amaip;
4940 AlnMsg amp;
4941 DenseSegPtr dsp;
4942 Boolean more;
4943 Int4 offset;
4944 SAIndexPtr saip;
4945 Int4 start;
4946
4947 if (sap == NULL || row < 0)
4948 return -1;
4949 if (sap->saip == NULL)
4950 return -1;
4951 if (sap->saip->indextype == INDEX_PARENT)
4952 {
4953 amaip = (AMAlignIndexPtr) sap->saip;
4954 if (row > amaip->numrows)
4955 return -1;
4956
4957 AlnMsgReNew(&);
4958 amp.row_num = row;
4959 amp.which_master = master;
4960 amp.from_m = pos;
4961 amp.to_m = pos;
4962 more = AlnMgrGetNextAlnBit(sap, &);
4963 if (more && amp.gap == 0)
4964 start = amp.from_b;
4965 else
4966 start = -1;
4967 return start;
4968 } else if (sap->saip->indextype == INDEX_SEGS)
4969 {
4970 saip = (SAIndexPtr)sap->saip;
4971 dsp = (DenseSegPtr)sap->segs;
4972 if (master == NULL)
4973 {
4974 start = binary_search_on_uint4_list(saip->aligncoords, pos, dsp->numseg);
4975 offset = pos - saip->aligncoords[start];
4976 if (dsp->starts[(dsp->dim*start) + row - 1] == -1)
4977 return -1;
4978 else
4979 if (dsp->strands[(dsp->dim*start) + row - 1] != Seq_strand_minus)
4980 return (dsp->starts[(dsp->dim*start) + row - 1] + offset);
4981 else
4982 return (dsp->starts[(dsp->dim*start) + row - 1] + dsp->lens[start] - 1 - offset);
4983 } else
4984 {
4985 }
4986 }
4987 return -1;
4988 }
4989
4990
4991 /********************************************************************************
4992 *
4993 * AlnMgrMapBioseqToSeqAlign takes a position in bioseq coordinates in a
4994 * row and maps it to seqalign coordinates, using the given master as
4995 * the alignment master (if master is NULL the alignment is flat). A
4996 * return value of -1 indicates an error; a return value of -2 indicates
4997 * that the given bioseq coordinates are not contained in the alignment
4998 * specified.
4999 *
5000 ********************************************************************************/
AlnMgrMapBioseqToSeqAlign(SeqAlignPtr sap,Int4 pos,Int4 row_num,SeqIdPtr master)5001 NLM_EXTERN Int4 AlnMgrMapBioseqToSeqAlign(SeqAlignPtr sap, Int4 pos, Int4 row_num, SeqIdPtr master)
5002 {
5003 Int4 start, stop;
5004
5005 if (sap == NULL || row_num < 0)
5006 return -1;
5007 AlnMgrGetNthSeqRangeInSA(sap, row_num, &start, &stop);
5008 if (pos < start || pos > stop)
5009 return -2;
5010 return AlnMgrMapBioseqToSeqAlignEx(sap, pos, row_num, master, NULL);
5011 }
5012
AlnMgrMapBioseqToSeqAlignEx(SeqAlignPtr sap,Int4 pos,Int4 row_num,SeqIdPtr master,Int4 * oldj)5013 NLM_EXTERN Int4 AlnMgrMapBioseqToSeqAlignEx(SeqAlignPtr sap, Int4 pos, Int4 row_num, SeqIdPtr master,
5014 Int4 *oldj)
5015 {
5016 AMAlignIndexPtr amaip;
5017 Boolean done;
5018 DenseSegPtr dsp;
5019 Int4 i;
5020 Int4 j;
5021 Int4 k;
5022 Int2 L;
5023 Int4 m;
5024 Int4 mid;
5025 Uint1 n;
5026 Int4 offset;
5027 Int2 R;
5028 SAIndexPtr saip;
5029 Int4 seg;
5030 SASeqDatPtr ssdp;
5031
5032 if (sap == NULL || row_num < 0)
5033 return -1;
5034 i = AlnMgrCheckAlignForParent(sap);
5035 if (i == AM_CHILD)
5036 {
5037 if (master == NULL)
5038 {
5039 saip = (SAIndexPtr)sap->saip;
5040 ssdp = saip->ssdp[row_num-1];
5041 if (ssdp == NULL)
5042 return -1;
5043 dsp = (DenseSegPtr)sap->segs;
5044 L = 0;
5045 R = ssdp->numsect - 1;
5046 n = AlnMgrGetNthStrand(sap, row_num);
5047 if (n != Seq_strand_minus)
5048 {
5049 while (L < R)
5050 {
5051 mid = (L + R)/2;
5052 if (dsp->starts[dsp->dim*ssdp->sect[mid+1]+row_num-1] <= pos)
5053 L = mid+1;
5054 else
5055 R = mid;
5056 }
5057 } else
5058 {
5059 while (L < R)
5060 {
5061 mid = (L + R)/2;
5062 if (dsp->starts[dsp->dim*ssdp->sect[mid]+row_num-1] > pos)
5063 L = mid+1;
5064 else
5065 R = mid;
5066 }
5067 }
5068 seg = L;
5069 offset = pos - dsp->starts[dsp->dim*ssdp->sect[seg]+row_num-1];
5070 if (n!=Seq_strand_minus)
5071 return (saip->aligncoords[ssdp->sect[seg]] + offset);
5072 else
5073 return (saip->aligncoords[ssdp->sect[seg]] + dsp->lens[ssdp->sect[seg]] - offset -1);
5074 } else
5075 {
5076 }
5077 } else if (i == AM_PARENT)
5078 {
5079 amaip = (AMAlignIndexPtr)sap->saip;
5080 if (row_num > amaip->numrows)
5081 return -1;
5082 if (amaip->mstype != AM_SEGMENTED_MASTERSLAVE)
5083 {
5084 j = k = 0;
5085 m = -1;
5086 done = FALSE;
5087 while (!done && j < amaip->numseg)
5088 {
5089 k = AlnMgrMapRowCoords(sap, amaip->aligncoords[j], row_num, master);
5090 if (k == -1)
5091 j++;
5092 else if (k > pos)
5093 done = TRUE;
5094 else if (k <= pos)
5095 {
5096 m = j;
5097 offset = pos - k;
5098 j++;
5099 }
5100 }
5101 if (m == -1 || offset >= amaip->lens[m])
5102 return -2;
5103 n = AlnMgrGetNthStrand(sap, row_num);
5104 if (n != Seq_strand_minus)
5105 {
5106 return (amaip->aligncoords[m] + offset);
5107 } else
5108 {
5109 return (amaip->aligncoords[m] + amaip->lens[m] - 1 - offset);
5110 }
5111 } else
5112 {
5113 if(oldj) j = *oldj;
5114 else j = 0;
5115 k = 0;
5116 done = FALSE;
5117 while (!done && j < amaip->aligncoords[amaip->numseg-1]+amaip->lens[amaip->numseg-1])
5118 {
5119 k = AlnMgrMapRowCoords(sap, j, row_num, master);
5120 if (k == -1 || k < pos)
5121 j++;
5122 else if (k > pos)
5123 return -2;
5124 else if (k == pos)
5125 return j;
5126 }
5127 if (!done)
5128 return -2;
5129 }
5130 } else
5131 return -1;
5132 return -1;
5133 }
5134
5135
5136 /***********************************************************************
5137 *
5138 * AlnMgrMakeFakeMultiple calls AlnMgrCheckOverlapping to decide whether
5139 * an alignment is linear. Then, if possible, it calls AlnMgrMakeAlignCoords
5140 * to create alignment coordinates across all children contained in the
5141 * parent. (MULT)
5142 *
5143 ***********************************************************************/
AlnMgrMakeFakeMultiple(SeqAlignPtr sap)5144 NLM_EXTERN Boolean AlnMgrMakeFakeMultiple(SeqAlignPtr sap)
5145 {
5146 return AlnMgrMakeFakeMultipleEx(sap, FALSE);
5147 }
5148
AlnMgrMakeFakeMultipleEx(SeqAlignPtr sap,Boolean forcestraightms)5149 static Boolean AlnMgrMakeFakeMultipleEx(SeqAlignPtr sap, Boolean forcestraightms)
5150 {
5151 AMAlignDatPtr amadp;
5152 AMAlignIndexPtr amaip;
5153 Int4 i;
5154 Int4 j;
5155 Boolean ms;
5156 Int4 n;
5157 Boolean nogap;
5158 Boolean retval;
5159 int unaligned=0;
5160
5161 retval = FALSE;
5162 if (!sap)
5163 return retval;
5164 i = AlnMgrCheckAlignForParent(sap);
5165 if (i<0)
5166 {
5167 return retval;
5168 }
5169 if (i==AM_PARENT)
5170 {
5171 n = AlnMgrCheckOrdered(sap);
5172 if (n == AM_ORDERED)
5173 {
5174 sap->type = SAT_PARTIAL;
5175 amaip = (AMAlignIndexPtr)sap->saip;
5176 amaip->alnsaps = amaip->numsaps;
5177 amaip->startsize = (amaip->alnsaps)*(amaip->alnsaps);
5178 amaip->starts = (Int4Ptr)MemNew((amaip->alnsaps)*(amaip->alnsaps)*sizeof(Int4));
5179 amaip->lens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
5180 amaip->ulens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
5181 amaip->numseg = amaip->alnsaps;
5182 for (j=0; j<(amaip->alnsaps); j++)
5183 {
5184 amaip->lens[j] = AlnMgrGetAlnLength(amaip->saps[j], FALSE);
5185 amaip->starts[j] = 0;
5186 }
5187 AlnMgrMakeAlignCoords(sap);
5188 if (!AlnMgrMakeRowsForOrdered(sap))
5189 return FALSE;
5190 for (j=0; j<(amaip->alnsaps-1); j++)
5191 {
5192 amaip->ulens[j] = AlnMgrGetMaxUnalignedLength(amaip->saps[j], amaip->saps[j+1]);
5193 unaligned += amaip->ulens[j];
5194 }
5195 { /* HS 07/24/00 */
5196 sap->master = AlnMgrFindMaster(sap);
5197 ms = FALSE;
5198 ms = AlnMgrCheckRealMaster(sap, sap->master);
5199 if (sap->master && ms == TRUE)
5200 {
5201 sap->type = SAT_MASTERSLAVE;
5202 /* if there's more than 1 denseseg, call it segmented, DIH, 7/27/00 */
5203 if (amaip->numseg > 1) {
5204 /* if (unaligned > 0) { */
5205 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
5206 } else {
5207 amaip->mstype = AM_MASTERSLAVE;
5208 }
5209 AlnMgrSetMaster(sap, sap->master);
5210 AlnMgrMakeMasterPlus(sap);
5211 n = AlnMgrGetNForSip(sap, sap->master);
5212 sap->type = SAT_MASTERSLAVE;
5213 amaip->master = n;
5214 } else {
5215 sap->master=NULL;
5216 }
5217 }
5218 return TRUE;
5219 }
5220 n = AlnMgrCheckOverlapping(sap);
5221 nogap = am_check_gaps(sap);
5222 if (n == NO_OVERLAP)
5223 {
5224 sap->type = SAT_PARTIAL;
5225 amaip = (AMAlignIndexPtr)sap->saip;
5226 if (amaip->saps)
5227 MemFree(amaip->saps);
5228 amaip->saps = AlnMgrSortSeqAligns((SeqAlignPtr)(sap->segs), AlnMgrFindFirst, amaip, &amaip->numsaps);
5229 amaip->alnsaps = amaip->numsaps;
5230 amaip->startsize = (amaip->alnsaps)*(amaip->alnsaps);
5231 amaip->starts = (Int4Ptr)MemNew((amaip->alnsaps)*(amaip->alnsaps)*sizeof(Int4));
5232 amaip->lens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
5233 amaip->ulens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
5234 amaip->numseg = amaip->alnsaps;
5235 for (j=0; j<(amaip->alnsaps); j++)
5236 {
5237 amaip->lens[j] = AlnMgrGetAlnLength(amaip->saps[j], FALSE);
5238 amaip->starts[j] = 0;
5239 }
5240 AlnMgrMakeAlignCoords(sap);
5241 if (!AlnMgrGetRowsForPartial(sap))
5242 return retval;
5243 for (j=0; j<(amaip->alnsaps-1); j++)
5244 {
5245 amaip->ulens[j] = AlnMgrGetMaxUnalignedLength(amaip->saps[j], amaip->saps[j+1]);
5246 unaligned += amaip->ulens[j];
5247 }
5248 { /* HS */
5249 sap->master = AlnMgrFindMaster(sap);
5250 ms = FALSE;
5251 ms = AlnMgrCheckRealMaster(sap, sap->master);
5252 if (sap->master && ms == TRUE)
5253 {
5254 sap->type = SAT_MASTERSLAVE;
5255 if(unaligned>0) {
5256 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
5257 } else {
5258 amaip->mstype = AM_MASTERSLAVE;
5259 }
5260 AlnMgrSetMaster(sap, sap->master);
5261 AlnMgrMakeMasterPlus(sap);
5262 n = AlnMgrGetNForSip(sap, sap->master);
5263 sap->type = SAT_MASTERSLAVE;
5264 amaip->master = n;
5265 } else {
5266 sap->master=NULL;
5267 }
5268 }
5269 retval = TRUE;
5270 } else /*should add function to check for pairwise multiple vs. diags*/
5271 {
5272 amaip = (AMAlignIndexPtr)sap->saip;
5273 if (amaip->saps)
5274 MemFree(amaip->saps);
5275 sap->master = AlnMgrFindMaster(sap);
5276 amaip->alnsaps = amaip->numsaps;
5277 ms = FALSE;
5278 ms = AlnMgrCheckRealMaster(sap, sap->master);
5279 if (sap->master && ms == TRUE)
5280 {
5281 retval = TRUE;
5282 AlnMgrSetMaster(sap, sap->master);
5283 AlnMgrMakeMasterPlus(sap);
5284 n = AlnMgrGetNForSip(sap, sap->master);
5285 sap->type = SAT_MASTERSLAVE;
5286 amaip->master = n; /* HS 7/24/00 was ==1 */
5287 amaip->numseg = AlnMgrGetMaxSegments((SeqAlignPtr)(sap->segs));
5288 amaip->alnsaps = amaip->numsaps;
5289 amaip->lens = (Int4Ptr)MemNew((amaip->numseg)*sizeof(Int4));
5290 amadp = amaip->amadp[n-1];
5291 amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
5292 for (j=0; j<amaip->numsaps; j++)
5293 {
5294 amaip->saps[j] = amadp->saps[j];
5295 }
5296 if (forcestraightms || amaip->numsaps < amaip->numbsqs)
5297 {
5298 amaip->ids = SeqIdSetFree(amaip->ids);
5299 amaip->ids = AlnMgrPropagateSeqIdsBySapList(amaip);
5300 if (!AlnMgrMergeIntoMSMultByMaster(amaip, amaip->lens, &amaip->numseg))
5301 retval = FALSE;
5302 amaip->startsize = (amaip->numseg)*(amaip->numsaps);
5303 amaip->starts = (Int4Ptr)MemNew((amaip->numseg)*(amaip->numsaps)*sizeof(Int4));
5304 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->numseg)*sizeof(Uint4));
5305 if (!AlnMgrFillInStarts(amadp->saps, amaip->starts, amaip->numseg, amaip->lens, amaip->numsaps, amaip->aligncoords))
5306 retval = FALSE;
5307 if (amaip->numseg > 1)
5308 amaip->numseg -= 1;
5309 if (!AlnMgrMakeMultSegments(amaip))
5310 retval = FALSE;
5311 if (!AlnMgrGetRowsForMasterSlave(sap))
5312 retval = FALSE;
5313 } else
5314 retval = FALSE;
5315 }
5316 if (retval == FALSE && sap->master != NULL)
5317 {
5318 if (AlnMgrMakeSegmentedMasterSlave(sap))
5319 {
5320 sap->type = SAT_MASTERSLAVE;
5321 amaip->ids = SeqIdSetFree(amaip->ids);
5322 amaip->ids = AlnMgrPropagateSeqIdsByRow(amaip);
5323 retval = TRUE;
5324 } else
5325 {
5326 if (AlnMgrForceMasterSlave(sap))
5327 {
5328 amaip->ids = SeqIdSetFree(amaip->ids);
5329 amaip->ids = AlnMgrPropagateSeqIdsByRow(amaip);
5330 amaip->mstype = AM_MASTERSLAVE;
5331 retval = TRUE;
5332 }
5333 }
5334 } else
5335 {
5336 amaip->mstype = AM_MASTERSLAVE;
5337 }
5338 }
5339 }
5340 return retval;
5341 }
5342
5343 /**********************************************************************
5344 *
5345 * AlnMgrNeatlyIndex is a very specialized function for structure
5346 * alignments and other alignments that have an implied row order.
5347 * It puts an index on the first seqalign in the set, and that index
5348 * only designates which seqaligns belong in which rows. The input
5349 * alignments are not changed at all, and only the first alignment is
5350 * indexed. This alignment cannot be displayed or accessed; it's only
5351 * a way to designate rows.
5352 *
5353 **********************************************************************/
AlnMgrNeatlyIndex(SeqAlignPtr sap)5354 NLM_EXTERN Boolean AlnMgrNeatlyIndex(SeqAlignPtr sap)
5355 {
5356 AMAlignIndexPtr amaip;
5357 Int4 numrows;
5358 SeqAlignPtr salp;
5359
5360 if (sap == NULL)
5361 return FALSE;
5362 if (sap->saip != NULL)
5363 {
5364 if (sap->saip->indextype == INDEX_PARENT)
5365 AMAlignIndexFree((Pointer)(sap->saip));
5366 else if (sap->saip->indextype == INDEX_SEGS)
5367 SAIndexFree((Pointer)(sap->saip));
5368 }
5369 sap->saip = NULL;
5370 if (sap->next != NULL)
5371 salp = sap;
5372 else if (sap->segtype == SAS_DISC)
5373 salp = (SeqAlignPtr)(sap->segs);
5374 else
5375 salp = sap;
5376 numrows = 0;
5377 while (salp != NULL)
5378 {
5379 numrows++;
5380 salp = salp->next;
5381 }
5382 amaip = AMAlignIndexNew();
5383 sap->saip = (SeqAlignIndexPtr)(amaip);
5384 amaip->mstype = AM_NEATINDEX;
5385 amaip->master = 1;
5386 if (sap->next != NULL)
5387 salp = sap;
5388 else if (sap->segtype == SAS_DISC)
5389 salp = (SeqAlignPtr)(sap->segs);
5390 else
5391 salp = sap;
5392 amaip->saps = (SeqAlignPtr PNTR)MemNew(numrows*sizeof(SeqAlignPtr));
5393 numrows = 0;
5394 while (salp != NULL)
5395 {
5396 amaip->saps[numrows] = salp;
5397 numrows++;
5398 salp = salp->next;
5399 }
5400 amaip->numrows = numrows;
5401 amaip->master = 1;
5402 return TRUE;
5403 }
5404
AlnMgrMergeNeighbors(SeqAlignPtr salp)5405 static void AlnMgrMergeNeighbors(SeqAlignPtr salp)
5406 {
5407 DenseDiagPtr ddp;
5408 DenseDiagPtr ddp_prev;
5409 Boolean gap;
5410 Int4 i;
5411
5412 if (salp == NULL)
5413 return;
5414 if (salp->segtype == SAS_DENDIAG)
5415 {
5416 ddp_prev = (DenseDiagPtr)(salp->segs);
5417 while (ddp_prev && ddp_prev->next != NULL)
5418 {
5419 ddp = ddp_prev->next;
5420 if (ddp_prev->dim != ddp->dim)
5421 return;
5422 gap = FALSE;
5423 for (i=0; i<ddp->dim && !gap; i++)
5424 {
5425 if (ddp->strands != NULL && ddp->strands[i] == Seq_strand_minus)
5426 {
5427 if (ddp->starts[i] + ddp->len < ddp_prev->starts[i])
5428 gap = TRUE;
5429 } else
5430 {
5431 if (ddp_prev->starts[i] + ddp_prev->len < ddp->starts[i])
5432 gap = TRUE;
5433 }
5434 }
5435 if (gap == FALSE)
5436 {
5437 if (ddp->strands != NULL && ddp->strands[i] == Seq_strand_minus)
5438 {
5439 ddp_prev->len += ddp->len;
5440 for (i=0; i<ddp->dim; i++)
5441 {
5442 ddp_prev->starts[i] = ddp->starts[i];
5443 }
5444 } else
5445 ddp_prev->len += ddp->len;
5446 ddp_prev->next = ddp->next;
5447 ddp->next = NULL;
5448 DenseDiagFree(ddp);
5449 } else
5450 ddp_prev = ddp_prev->next;
5451 }
5452 } else if (salp->segtype == SAS_DISC)
5453 {
5454 return;
5455 } else
5456 return;
5457 return;
5458 }
5459
5460 /**********************************************************************
5461 *
5462 * AlnMgrTossNeatRows is called to create a subset of the NeatlyIndexed
5463 * alignment, only containing certain rows. The array throwarray, of
5464 * length len, contains the (1-based) numbers of the rows to be left
5465 * out. The function returns a duplicated alignment, which is not
5466 * yet indexed. This function assumes a very strict row structure --
5467 * each row is represented by a single seqalign or seqalign set, and
5468 * the first row is the master.
5469 *
5470 **********************************************************************/
AlnMgrTossNeatRows(SeqAlignPtr sap,Int4Ptr throwarray,Int4 len)5471 NLM_EXTERN SeqAlignPtr AlnMgrTossNeatRows(SeqAlignPtr sap, Int4Ptr throwarray, Int4 len)
5472 {
5473 AMAlignIndexPtr amaip;
5474 Int4 i;
5475 Int4 n;
5476 SeqAlignPtr salp;
5477 SeqAlignPtr sap_head;
5478 SeqAlignPtr sap_new;
5479 SeqAlignPtr sap_prev;
5480 Boolean toss;
5481
5482 if (sap == NULL || throwarray == NULL)
5483 return NULL;
5484 if (sap->saip == NULL)
5485 return NULL;
5486 if (sap->next != NULL)
5487 salp = sap;
5488 else if (sap->segtype == SAS_DISC)
5489 salp = (SeqAlignPtr)(sap->segs);
5490 else
5491 salp = sap;
5492 amaip = (AMAlignIndexPtr)(sap->saip);
5493 if (amaip->mstype != AM_NEATINDEX)
5494 return NULL;
5495 i = 2;
5496 sap_head = NULL;
5497 while (salp != NULL)
5498 {
5499 toss = FALSE;
5500 for (n=0; n<len; n++)
5501 {
5502 if (throwarray[n] == i)
5503 toss = TRUE;
5504 }
5505 if (toss == FALSE)
5506 {
5507 sap_new = SeqAlignDup(salp);
5508 if (sap_head != NULL)
5509 {
5510 sap_prev->next = sap_new;
5511 sap_prev = sap_new;
5512 } else
5513 sap_prev = sap_head = sap_new;
5514 }
5515 salp = salp->next;
5516 i++;
5517 }
5518 return sap_head;
5519 }
5520
5521 /***************************************************************************
5522 *
5523 * AlnMgrMakeMultByIntersectOnMaster is a specialized function that
5524 * truncates the segments of segmented master-slave alignments to
5525 * force them to line up:
5526 *
5527 * Master XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
5528 * seq1 XXXXXXXX XXXXXXXXXXXXXX
5529 * seq2 XXXXXXXXXX XXXXXXX
5530 * seq3 XXXXXXXXXXXXXXXXXXXXXXX
5531 *
5532 * becomes
5533 *
5534 * Master XXXXXX XXXXXXX
5535 * seq1 XXXXXX XXXXXXX
5536 * seq2 XXXXXX XXXXXXX
5537 * seq3 XXXXXX XXXXXXX
5538 *
5539 * This indexing is different from the other type -- the input
5540 * alignments may actually be truncated, instead of just rearranged.
5541 * If allinblock is TRUE, then the function throws away any blocks that
5542 * have a missing sequence.
5543 *
5544 ***************************************************************************/
AlnMgrMakeMultByIntersectOnMaster(SeqAlignPtr sap,Boolean allinblock)5545 NLM_EXTERN Boolean AlnMgrMakeMultByIntersectOnMaster(SeqAlignPtr sap, Boolean allinblock)
5546 {
5547 AMmsmsPtr ams;
5548 AMmsmsPtr ams_master;
5549 AMmsmsPtr ams_tmp;
5550 Int4 numrows;
5551 Int4Ptr rowarray;
5552 SeqAlignPtr salp;
5553 SeqIdPtr sip_tmp;
5554
5555 if (sap == NULL)
5556 return FALSE;
5557 if (sap->saip != NULL)
5558 {
5559 if (sap->saip->indextype == INDEX_PARENT)
5560 AMAlignIndexFree((Pointer)(sap->saip));
5561 else if (sap->saip->indextype == INDEX_SEGS)
5562 SAIndexFree((Pointer)(sap->saip));
5563 }
5564 sap->saip = NULL;
5565 if (sap->segtype == SAS_DISC)
5566 salp = (SeqAlignPtr)(sap->segs);
5567 else
5568 salp = sap;
5569 while (salp != NULL)
5570 {
5571 /* AlnMgrMergeNeighbors(salp); */ /* dih - 3/21/01 */
5572 salp = salp->next;
5573 }
5574 rowarray = NULL;
5575 if (allinblock)
5576 {
5577 numrows = am_guess_numrows(sap);
5578 rowarray = am_save_rowinfo(sap, numrows-1);
5579 } else
5580 numrows = 0;
5581 if (!AlnMgrUnpackSeqAlign(sap))
5582 return FALSE;
5583 if (!AlnMgrRearrangeUnpacked(sap))
5584 return FALSE;
5585 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)(sap->segs)))
5586 return FALSE;
5587 if (sap->master == NULL) /* if a master is already set, believe it */
5588 sip_tmp = am_find_master(sap); /* if not, find it */
5589 if (sip_tmp == NULL)
5590 return FALSE;
5591 am_set_master(sap, sip_tmp);
5592 SeqIdFree(sip_tmp);
5593 ams = AlnMgrFindOverlapOnMaster(sap);
5594 if (ams == NULL)
5595 return FALSE;
5596 if (allinblock)
5597 am_set_rows(ams, rowarray, numrows);
5598 ams_master = AlnMgrTruncateByOverlap(sap, ams);
5599 if (ams_master == NULL)
5600 return FALSE;
5601 if (!AlnMgrCarefulIndex(sap, ams, &ams_master, allinblock, numrows))
5602 return FALSE;
5603 while (ams)
5604 {
5605 ams_tmp = ams->next;
5606 ams->next = NULL;
5607 ams->sip = NULL;
5608 ams->sap = NULL;
5609 MemFree(ams);
5610 ams = ams_tmp;
5611 }
5612 while (ams_master)
5613 {
5614 ams_tmp = ams_master->next;
5615 ams_master->next = NULL;
5616 ams_master->sip = NULL;
5617 ams_master->sap = NULL;
5618 MemFree(ams_master);
5619 ams_master = ams_tmp;
5620 }
5621 if (rowarray != NULL)
5622 MemFree(rowarray);
5623 return TRUE;
5624 }
5625
am_is_new_row(SeqIdPtr sip1,SeqIdPtr sip2)5626 static Boolean am_is_new_row(SeqIdPtr sip1, SeqIdPtr sip2)
5627 {
5628 if (sip1 == NULL || sip2 == NULL)
5629 return FALSE;
5630 while (sip1 != NULL && sip2 != NULL)
5631 {
5632 if (SeqIdComp(sip1, sip2) != SIC_YES)
5633 return TRUE;
5634 sip1 = sip1->next;
5635 sip2 = sip2->next;
5636 }
5637 if (sip1 != NULL || sip2 != NULL)
5638 return TRUE;
5639 return FALSE;
5640 }
5641
am_guess_numrows(SeqAlignPtr sap)5642 NLM_EXTERN Int4 am_guess_numrows(SeqAlignPtr sap)
5643 {
5644 DenseDiagPtr ddp;
5645 Boolean disc;
5646 DenseSegPtr dsp;
5647 SeqIdPtr id_prev;
5648 Int4 n;
5649 SeqAlignPtr salp;
5650
5651 if (sap == NULL)
5652 return 0;
5653 disc = FALSE;
5654 if (sap->segtype == SAS_DISC)
5655 {
5656 disc = TRUE;
5657 salp = (SeqAlignPtr)(sap->segs);
5658 sap = sap->next;
5659 } else
5660 salp = sap;
5661 n = 0;
5662 id_prev = NULL;
5663 while (salp)
5664 {
5665 if (salp->segtype == SAS_DENSEG)
5666 {
5667 dsp = (DenseSegPtr)(salp->segs);
5668 if (am_is_new_row(dsp->ids, id_prev) || salp->next == NULL)
5669 n+= dsp->dim - 1;
5670 id_prev = dsp->ids;
5671 } else if (salp->segtype == SAS_DENDIAG)
5672 {
5673 ddp = (DenseDiagPtr)(salp->segs);
5674 n+=ddp->dim;
5675 n = n-1;
5676 id_prev = ddp->id;
5677 } else if (salp->segtype == SAS_DISC)
5678 n++;
5679 salp = salp->next;
5680 if (salp == NULL)
5681 {
5682 if (disc && sap!=NULL)
5683 {
5684 if (sap->segtype == SAS_DISC)
5685 {
5686 salp = (SeqAlignPtr)(sap->segs);
5687 sap = sap->next;
5688 }
5689 }
5690 }
5691 }
5692 return (n+1);
5693 }
5694
am_save_rowinfo(SeqAlignPtr sap,Int4 numrows)5695 static Int4Ptr am_save_rowinfo(SeqAlignPtr sap, Int4 numrows)
5696 {
5697 DenseDiagPtr ddp;
5698 Int4 dim_prev;
5699 Boolean disc;
5700 DenseSegPtr dsp;
5701 Int4 i;
5702 SeqIdPtr id_prev;
5703 Int4 n;
5704 Int4 num;
5705 Int4Ptr rowarray;
5706 SeqAlignPtr salp;
5707 SeqAlignPtr sap2;
5708
5709 if (sap == NULL)
5710 return 0;
5711 disc = FALSE;
5712 if (sap->segtype == SAS_DISC)
5713 {
5714 disc = TRUE;
5715 salp = (SeqAlignPtr)(sap->segs);
5716 sap = sap->next;
5717 } else
5718 salp = sap;
5719 n = 0;
5720 id_prev = NULL;
5721 dim_prev = 0;
5722 num = 1;
5723 rowarray = (Int4Ptr)MemNew((numrows+1)*sizeof(Int4));
5724 while (salp)
5725 {
5726 if (salp->segtype == SAS_DENSEG)
5727 {
5728 dsp = (DenseSegPtr)(salp->segs);
5729 if (am_is_new_row(dsp->ids, id_prev) || salp->next == NULL)
5730 {
5731 for (i=n; i<(n+dim_prev-1); i++)
5732 {
5733 rowarray[i] = num;
5734 }
5735 n += dim_prev-1;
5736 num = 1;
5737 } else
5738 num++;
5739 id_prev = dsp->ids;
5740 dim_prev = dsp->dim;
5741 } else if (salp->segtype == SAS_DENDIAG)
5742 {
5743 ddp = (DenseDiagPtr)(salp->segs);
5744 num = 0;
5745 while (ddp)
5746 {
5747 num++;
5748 ddp = ddp->next;
5749 }
5750 ddp = (DenseDiagPtr)(salp->segs);
5751 for (i=n; i<(n+ddp->dim-1); i++)
5752 {
5753 rowarray[i] = num;
5754 }
5755 n+=ddp->dim;
5756 id_prev = ddp->id;
5757 n = n-1;
5758 } else if (salp->segtype == SAS_DISC)
5759 {
5760 sap2 = (SeqAlignPtr)(salp->segs);
5761 num = 0;
5762 while (sap2 != NULL)
5763 {
5764 num++;
5765 sap2 = sap2->next;
5766 }
5767 sap2 = (SeqAlignPtr)(salp->segs);
5768 for (i=n; i<(n+sap2->dim-1); i++)
5769 {
5770 rowarray[i] = num;
5771 }
5772 n+=sap2->dim;
5773 n = n - 1;
5774 }
5775 salp = salp->next;
5776 if (salp == NULL)
5777 {
5778 if (disc && sap!=NULL)
5779 {
5780 if (sap->segtype == SAS_DISC)
5781 {
5782 salp = (SeqAlignPtr)(sap->segs);
5783 sap = sap->next;
5784 }
5785 }
5786 }
5787 }
5788 return rowarray;
5789 }
5790
am_set_rows(AMmsmsPtr ams,Int4Ptr rowarray,Int4 numrows)5791 static void am_set_rows(AMmsmsPtr ams, Int4Ptr rowarray, Int4 numrows)
5792 {
5793 Int4 i;
5794 Int4 n;
5795
5796 if (ams == NULL || rowarray == NULL)
5797 return;
5798 for (n=0; n<numrows; n++)
5799 {
5800 if (ams == NULL)
5801 return;
5802 ams->stop = 1;
5803 for (i=0; i<rowarray[n]; i++)
5804 {
5805 ams = ams->next;
5806 }
5807 }
5808 return;
5809 }
5810
am_set_master(SeqAlignPtr sap,SeqIdPtr sip)5811 static void am_set_master(SeqAlignPtr sap, SeqIdPtr sip)
5812 {
5813 SeqAlignPtr salp;
5814
5815 if (sap == NULL || sip == NULL)
5816 return;
5817 if (sap->segtype == SAS_DISC)
5818 {
5819 if (sap->master != NULL)
5820 SeqIdSetFree(sap->master);
5821 sap->master = SeqIdDup(sip);
5822 salp = (SeqAlignPtr)(sap->segs);
5823 } else
5824 salp = sap;
5825 while (salp)
5826 {
5827 if (salp->master != NULL)
5828 SeqIdSetFree(salp->master);
5829 salp->master = SeqIdDup(sip);
5830 salp = salp->next;
5831 }
5832 return;
5833 }
5834
am_find_master(SeqAlignPtr sap)5835 static SeqIdPtr am_find_master(SeqAlignPtr sap)
5836 {
5837 DenseDiagPtr ddp;
5838 Boolean done;
5839 DenseSegPtr dsp;
5840 Boolean found;
5841 Boolean here;
5842 SeqAlignPtr salp;
5843 SeqAlignPtr sap_tmp;
5844 SeqIdPtr sip;
5845 SeqIdPtr sip_head;
5846 SeqIdPtr sip_tmp;
5847
5848 if (sap == NULL)
5849 return NULL;
5850 if (sap->segtype == SAS_DISC)
5851 salp = (SeqAlignPtr)(sap->segs);
5852 else
5853 salp = sap;
5854 if (salp->segtype == SAS_DENSEG)
5855 {
5856 dsp = (DenseSegPtr)(salp->segs);
5857 sip_head = dsp->ids;
5858 } else if (salp->segtype == SAS_DENDIAG)
5859 {
5860 ddp = (DenseDiagPtr)(salp->segs);
5861 sip_head = ddp->id;
5862 }
5863 sip = sip_head;
5864 done = FALSE;
5865 while (sip && !done)
5866 {
5867 sap_tmp = salp->next;
5868 found = TRUE;
5869 while (sap_tmp != NULL && found)
5870 {
5871 if (sap_tmp->segtype == SAS_DENSEG)
5872 {
5873 dsp = (DenseSegPtr)(sap_tmp->segs);
5874 sip_tmp = dsp->ids;
5875 } else if (sap_tmp->segtype == SAS_DENDIAG)
5876 {
5877 ddp = (DenseDiagPtr)(sap_tmp->segs);
5878 sip_tmp = ddp->id;
5879 }
5880 here = FALSE;
5881 while (sip_tmp != NULL && !here)
5882 {
5883 if (SAM_OrderSeqID(sip_tmp, sip) == 0)
5884 here = TRUE;
5885 sip_tmp = sip_tmp->next;
5886 }
5887 if (!here)
5888 found = FALSE;
5889 sap_tmp = sap_tmp->next;
5890 }
5891 if (found)
5892 done = TRUE;
5893 else
5894 sip = sip->next;
5895 }
5896 if (!done)
5897 return NULL;
5898 else
5899 return (SeqIdDup(sip));
5900 }
5901
AlnMgrFindOverlapOnMaster(SeqAlignPtr sap)5902 static AMmsmsPtr AlnMgrFindOverlapOnMaster(SeqAlignPtr sap)
5903 {
5904 AMmsmsPtr ams;
5905 AMmsmsPtr ams_head;
5906 AMmsmsPtr ams_prev;
5907 SeqAlignPtr salp;
5908 SeqAlignPtr salp_tmp;
5909
5910 if (sap == NULL)
5911 return NULL;
5912 if (sap->master == NULL)
5913 return NULL;
5914 ams_head = ams_prev = NULL;
5915 while (sap)
5916 {
5917 if (sap->segtype == SAS_DISC)
5918 {
5919 salp = (SeqAlignPtr)(sap->segs);
5920 while (salp)
5921 {
5922 if (salp->segtype == SAS_DISC)
5923 {
5924 salp_tmp = (SeqAlignPtr)(sap->segs);
5925 while (salp_tmp)
5926 {
5927 ams = am_create_overlap(salp_tmp);
5928 if (ams == NULL)
5929 return NULL;
5930 if (ams_head != NULL)
5931 {
5932 ams_prev->next = ams;
5933 while (ams->next)
5934 {
5935 ams = ams->next;
5936 }
5937 ams_prev = ams;
5938 } else
5939 {
5940 ams_head = ams;
5941 while (ams->next)
5942 {
5943 ams = ams->next;
5944 }
5945 ams_prev = ams;
5946 }
5947 }
5948 } else
5949 {
5950 ams = am_create_overlap(salp);
5951 if (ams == NULL)
5952 return NULL;
5953 if (ams_head != NULL)
5954 {
5955 ams_prev->next = ams;
5956 while (ams->next)
5957 {
5958 ams = ams->next;
5959 }
5960 ams_prev = ams;
5961 } else
5962 {
5963 ams_head = ams;
5964 while (ams->next)
5965 {
5966 ams = ams->next;
5967 }
5968 ams_prev = ams;
5969 }
5970 }
5971 salp = salp->next;
5972 }
5973 } else
5974 {
5975 ams = am_create_overlap(sap);
5976 if (ams == NULL)
5977 return NULL;
5978 if (ams_head != NULL)
5979 {
5980 ams_prev->next = ams;
5981 while (ams->next)
5982 {
5983 ams = ams->next;
5984 }
5985 ams_prev = ams;
5986 } else
5987 {
5988 ams_head = ams;
5989 while (ams->next)
5990 {
5991 ams = ams->next;
5992 }
5993 ams_prev = ams;
5994 }
5995 }
5996 sap = sap->next;
5997 }
5998 return ams_head;
5999 }
6000
am_create_overlap(SeqAlignPtr sap)6001 static AMmsmsPtr am_create_overlap(SeqAlignPtr sap)
6002 {
6003 AMmsmsPtr ams;
6004 AMmsmsPtr ams_head;
6005 AMmsmsPtr ams_prev;
6006 DenseDiagPtr ddp;
6007 DenseSegPtr dsp;
6008 Boolean found;
6009 SeqIdPtr id;
6010 Int4 n;
6011 SeqAlignPtr sap_tmp;
6012 Int4 start;
6013 Int4 stop;
6014
6015 if (sap == NULL || sap->master == NULL)
6016 return NULL;
6017 ams_head = ams_prev = NULL;
6018 if (sap->segtype == SAS_DENSEG)
6019 {
6020 dsp = (DenseSegPtr)(sap->segs);
6021 id = (dsp->ids);
6022 found = FALSE;
6023 n = 0;
6024 while (id!=NULL && !found)
6025 {
6026 n++;
6027 if (SAM_OrderSeqID(id, sap->master)== 0)
6028 found = TRUE;
6029 id = id->next;
6030 }
6031 if (!found || n>dsp->dim)
6032 return NULL;
6033 if (dsp->strands != NULL)
6034 {
6035 if (dsp->strands[n-1] == Seq_strand_minus)
6036 {
6037 sap_tmp = sap->next;
6038 sap->next = NULL;
6039 SeqAlignListReverseStrand(sap);
6040 sap->next = sap_tmp;
6041 }
6042 }
6043 if (!AlnMgrIndexSingleChildSeqAlign(sap))
6044 return NULL;
6045 AlnMgrGetNthSeqRangeInSA(sap, n, &start, &stop);
6046 ams = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6047 ams->sap = sap;
6048 ams->n = n;
6049 ams->sstart = start;
6050 ams->sstop = stop;
6051 ams->count = AlnMgrGetNumRows(sap);
6052 ams->count = ams->count - 1;
6053 return ams;
6054 } else if (sap->segtype == SAS_DENDIAG)
6055 {
6056 ddp = (DenseDiagPtr)(sap->segs);
6057 while (ddp)
6058 {
6059 id = ddp->id;
6060 found = FALSE;
6061 n = 0;
6062 while (id!=NULL && !found)
6063 {
6064 n++;
6065 if (SeqIdComp(id, sap->master) == SIC_YES)
6066 found = TRUE;
6067 id = id->next;
6068 }
6069 if (!found || n > ddp->dim)
6070 {
6071 while (ams_head != NULL)
6072 {
6073 ams = ams_head->next;
6074 ams_head->next = NULL;
6075 MemFree(ams_head);
6076 ams_head = ams;
6077 }
6078 return NULL;
6079 }
6080 if (ddp->strands != NULL)
6081 {
6082 if (ddp->strands[n-1] == Seq_strand_minus)
6083 am_densediag_reverse(ddp);
6084 }
6085 start = ddp->starts[n-1];
6086 stop = start + ddp->len-1;
6087 ams = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6088 ams->sip = id;
6089 ams->sap = sap;
6090 ams->n = n;
6091 ams->sstart = start;
6092 ams->sstop = stop;
6093 ams->count = AlnMgrGetNumRows(sap);
6094 ams->count = ams->count - 1;
6095 if (ams_head != NULL)
6096 {
6097 ams_prev->next = ams;
6098 ams_prev = ams;
6099 } else
6100 ams_head = ams_prev = ams;
6101 ddp = ddp->next;
6102 }
6103 return ams_head;
6104 } else
6105 return NULL;
6106 }
6107
am_densediag_reverse(DenseDiagPtr ddp)6108 static void am_densediag_reverse(DenseDiagPtr ddp)
6109 {
6110 Int4 i;
6111
6112 if (ddp == NULL || ddp->strands == NULL)
6113 return;
6114 for (i=0; i<ddp->dim; i++)
6115 {
6116 if (ddp->strands[i] == Seq_strand_minus)
6117 ddp->strands[i] = Seq_strand_plus;
6118 else
6119 ddp->strands[i] = Seq_strand_minus;
6120 }
6121 }
6122
AlnMgrConstructOverlaps(AMmsmsPtr ams_head)6123 static AMmsmsPtr AlnMgrConstructOverlaps(AMmsmsPtr ams_head)
6124 {
6125 AMmsmsPtr ams;
6126 AMmsmsPtr ams_master;
6127 AMmsmsPtr ams_mhead;
6128 AMmsmsPtr ams_mprev;
6129 AMmsmsPtr ams_new;
6130 Boolean found;
6131 Int4 i;
6132 Int4 n;
6133 Int4 open;
6134 Int4 start;
6135 AMTinyInfoPtr tip;
6136 AMTinyInfoPtr PNTR tiparray;
6137 AMTinyInfoPtr tip_head;
6138 AMTinyInfoPtr tip_prev;
6139
6140 if (ams_head == NULL)
6141 return NULL;
6142 n = 0;
6143 tip_head = NULL;
6144 ams = ams_head;
6145 while (ams != NULL)
6146 {
6147 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
6148 tip->start = ams->sstart;
6149 tip->which = 1;
6150 if (tip_head != NULL)
6151 {
6152 tip_prev->next = tip;
6153 tip_prev = tip;
6154 } else
6155 tip_prev = tip_head = tip;
6156 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
6157 tip->start = ams->sstop;
6158 tip->which = 2;
6159 tip_prev->next = tip;
6160 tip_prev = tip;
6161 n += 2;
6162 ams = ams->next;
6163 }
6164 tiparray = (AMTinyInfoPtr PNTR)MemNew(n*sizeof(AMTinyInfoPtr));
6165 tip = tip_head;
6166 i = 0;
6167 while (tip != NULL)
6168 {
6169 tiparray[i] = tip;
6170 tip = tip->next;
6171 i++;
6172 }
6173 HeapSort((Pointer)tiparray, (size_t)n, sizeof(AMTinyInfoPtr), AlnMgrCompareTips);
6174 tip_head = tip_prev = tiparray[0];
6175 tip_head->numsap = 1;
6176 for (i=1; i<n; i++)
6177 {
6178 if (tiparray[i]->start == tip_prev->start && tiparray[i]->which == tip_prev->which)
6179 {
6180 tip_prev->numsap++;
6181 MemFree(tiparray[i]);
6182 } else
6183 {
6184 tip_prev->next = tiparray[i];
6185 tiparray[i]->numsap = 1;
6186 tip_prev = tiparray[i];
6187 }
6188 }
6189 tip_prev->next = NULL;
6190 MemFree(tiparray);
6191 open = 0;
6192 ams_mhead = NULL;
6193 tip = tip_head;
6194 while (tip != NULL)
6195 {
6196 if (open > 0 && ((tip->which==1 && start <=tip->start-1)||(tip->which==2 && start <=tip->start)))
6197 {
6198 ams_master = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6199 ams_master->sstart = start;
6200 if (tip->which == 1)
6201 ams_master->sstop = tip->start-1;
6202 else
6203 ams_master->sstop = tip->start;
6204 if (ams_mhead != NULL)
6205 {
6206 ams_mprev->next = ams_master;
6207 ams_mprev = ams_master;
6208 } else
6209 ams_mhead = ams_mprev = ams_master;
6210 }
6211 if (tip->which == 1)
6212 {
6213 start = tip->start;
6214 open += tip->numsap;
6215 } else
6216 {
6217 start = tip->start+1;
6218 open -= tip->numsap;
6219 }
6220 tip = tip->next;
6221 }
6222 while (tip_head != NULL)
6223 {
6224 tip = tip_head->next;
6225 MemFree(tip_head);
6226 tip_head = tip;
6227 }
6228 ams = ams_head;
6229 while (ams)
6230 {
6231 ams_master = ams_mhead;
6232 found = FALSE;
6233 while (ams_master != NULL && !found)
6234 {
6235 if (ams->sstart >= ams_master->sstart && ams->sstart <= ams_master->sstop)
6236 {
6237 found = TRUE;
6238 ams->sstop = ams_master->sstop;
6239 if (AlnMgrTruncateSAP(ams->sap, ams_master->sstart, ams_master->sstop, ams->n))
6240 {
6241 ams_new = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6242 ams_new->sap = ams->sap->next;
6243 ams->sap->next = NULL;
6244 AlnMgrGetNthSeqRangeInSA(ams_new->sap, ams->n, &ams_new->sstart, &ams_new->sstop);
6245 ams_new->next = ams->next;
6246 ams->next = ams_new;
6247 ams_new->n = ams->n;
6248 ams_new->count = ams->count;
6249 }
6250 ams_master->sap = ams->sap;
6251 ams_master->n = ams->n;
6252 }
6253 ams_master = ams_master->next;
6254 }
6255 ams = ams->next;
6256 }
6257 return ams_mhead;
6258 }
6259
AlnMgrTruncateByOverlap(SeqAlignPtr sap,AMmsmsPtr ams_head)6260 static AMmsmsPtr AlnMgrTruncateByOverlap(SeqAlignPtr sap, AMmsmsPtr ams_head)
6261 {
6262 AMmsmsPtr ams;
6263 AMmsmsPtr ams_master;
6264 AMmsmsPtr ams_mhead;
6265 AMmsmsPtr ams_mprev;
6266 AMmsmsPtr ams_new;
6267 AMmsmsPtr ams_prev;
6268 Boolean found;
6269 Int4 i;
6270 Int4 n;
6271 SeqAlignPtr salp_prev;
6272 AMTinyInfoPtr tip;
6273 AMTinyInfoPtr PNTR tiparray;
6274 AMTinyInfoPtr tip_head;
6275 AMTinyInfoPtr tip_prev;
6276
6277 if (sap == NULL || ams_head == NULL)
6278 return NULL;
6279 ams_mhead = ams_mprev = NULL;
6280 n = 0;
6281 ams = ams_head;
6282 tip_head = NULL;
6283 while (ams != NULL) /* create linked list of starts and stops */
6284 {
6285 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
6286 tip->start = ams->sstart;
6287 tip->which = 1;
6288 if (tip_head != NULL)
6289 {
6290 tip_prev->next = tip;
6291 tip_prev = tip;
6292 } else
6293 tip_head = tip_prev = tip;
6294 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
6295 tip->start = ams->sstop;
6296 tip->which = 2;
6297 tip_prev->next = tip;
6298 tip_prev = tip;
6299 n+=2;
6300 ams = ams->next;
6301 }
6302 /* sort the list */
6303 tiparray = (AMTinyInfoPtr PNTR)MemNew(n*sizeof(AMTinyInfoPtr));
6304 tip = tip_head;
6305 for (i=0; i<n; i++)
6306 {
6307 if (tip == NULL)
6308 return FALSE;
6309 tiparray[i] = tip;
6310 tip = tip->next;
6311 }
6312 HeapSort((Pointer)tiparray, (size_t)(n), sizeof(AMTinyInfoPtr), AlnMgrCompareTips);
6313 tip_head = tiparray[0];
6314 for (i=0; i<n-1; i++)
6315 {
6316 tiparray[i]->next = tiparray[i+1];
6317 tiparray[i+1]->next = NULL;
6318 }
6319 /* now look for start-stop pairs -- these are the blocks */
6320 tip = tip_head;
6321 while (tip->next != NULL)
6322 {
6323 if (tip->which == 1 && tip->next->which == 2)
6324 {
6325 ams_master = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6326 ams_master->sstart = tip->start;
6327 ams_master->sstop = tip->next->start;
6328 if (ams_mhead != NULL)
6329 {
6330 ams_mprev->next = ams_master;
6331 ams_mprev = ams_master;
6332 } else
6333 ams_mhead = ams_mprev = ams_master;
6334 }
6335 tip = tip->next;
6336 }
6337 for (i=0; i<n; i++)
6338 {
6339 MemFree(tiparray[i]);
6340 }
6341 MemFree(tiparray);
6342 /* now compare the ams with the master blocks and truncate as needed */
6343 ams = ams_head;
6344 ams_prev = NULL;
6345 while (ams)
6346 {
6347 ams_master = ams_mhead;
6348 found = FALSE;
6349 while (ams_master && !found)
6350 {
6351 if (ams->sstart <= ams_master->sstart && ams->sstop >= ams_master->sstart)
6352 {
6353 found = TRUE;
6354 ams->sstart = ams_master->sstart;
6355 ams->sstop = ams_master->sstop;
6356 if (AlnMgrTruncateSAP(ams->sap, ams_master->sstart, ams_master->sstop, ams->n))
6357 {
6358 ams_new = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6359 ams_new->sap = ams->sap->next;
6360 AlnMgrGetNthSeqRangeInSA(ams_new->sap, ams->n, &ams_new->sstart, &ams_new->sstop);
6361 ams_new->next = ams->next;
6362 ams_new->n = ams->n;
6363 ams_new->count = ams->count;
6364 ams->next = ams_new;
6365 }
6366 ams_master->sap = ams->sap;
6367 ams_master->masternum = ams->n;
6368 ams_prev = ams;
6369 ams = ams->next;
6370 } else if (ams->sstart <= ams_master->sstart && ams->sstop < ams_master->sstart)
6371 { /*this one isn't going to get used -- free it */
6372 found = TRUE;
6373 if (ams_prev != NULL)
6374 {
6375 salp_prev = ams_prev->sap;
6376 salp_prev->next = ams->sap->next;
6377 ams_prev->next = ams->next;
6378 ams->next = NULL;
6379 ams->sap->next = NULL;
6380 SeqAlignFree(ams->sap);
6381 MemFree(ams);
6382 ams = ams_prev->next;
6383 } else
6384 {
6385 ams->sap->next = NULL;
6386 SeqAlignFree(ams->sap);
6387 ams_prev = ams;
6388 ams = ams->next;
6389 MemFree(ams_prev);
6390 ams_prev = NULL;
6391 }
6392 }
6393 ams_master = ams_master->next;
6394 }
6395 if (!found)
6396 {
6397 found = TRUE;
6398 if (ams_prev != NULL)
6399 {
6400 salp_prev = ams_prev->sap;
6401 salp_prev->next = ams->sap->next;
6402 ams_prev->next = ams->next;
6403 ams->next = NULL;
6404 ams->sap->next = NULL;
6405 SeqAlignFree(ams->sap);
6406 MemFree(ams);
6407 ams = ams_prev->next;
6408 } else
6409 {
6410 ams->sap->next = NULL;
6411 SeqAlignFree(ams->sap);
6412 ams_prev = ams;
6413 ams = ams->next;
6414 MemFree(ams_prev);
6415 ams_prev = NULL;
6416 }
6417 }
6418 }
6419 return ams_mhead;
6420 }
6421
6422 /***************************************************************************
6423 *
6424 * AlnMgrTruncateSAP truncates a given seqalign to contain only the
6425 * bioseq coordinates from start to stop on the indicated row. Anything
6426 * before those coordinates is discarded; anything remaining afterwards
6427 * is made into another seqalign and put in sap->next (the original next,
6428 * if any, is now at sap->next->next). Doesn't work on parent seqaligns.
6429 * The function returns TRUE if the orignal alignment extended past stop.
6430 *
6431 ***************************************************************************/
AlnMgrTruncateSAP(SeqAlignPtr sap,Int4 start,Int4 stop,Int4 row)6432 NLM_EXTERN Boolean AlnMgrTruncateSAP(SeqAlignPtr sap, Int4 start, Int4 stop, Int4 row)
6433 {
6434 DenseDiagPtr ddp;
6435 DenseDiagPtr ddp2;
6436 DenseSegPtr dsp;
6437 Int4 from;
6438 Int4 i;
6439 Int4 mstart;
6440 Int4 mstop;
6441 SeqAlignPtr sap1;
6442 SeqAlignPtr sap2;
6443 Int4 tmp;
6444 Int4 to;
6445
6446 if (sap == NULL || stop<start || row < 1)
6447 return FALSE;
6448 if (sap->segtype == SAS_DENSEG)
6449 {
6450 if (sap->saip == NULL)
6451 AlnMgrIndexSingleChildSeqAlign(sap);
6452 AlnMgrGetNthSeqRangeInSA(sap, row, &mstart, &mstop);
6453 if (mstart > start || mstop < stop)
6454 return FALSE;
6455 if (mstart == start)
6456 {
6457 if (mstop == stop)
6458 return FALSE;
6459 else if (mstop > stop)
6460 {
6461 from = AlnMgrMapBioseqToSeqAlign(sap, start, row, NULL);
6462 to = AlnMgrMapBioseqToSeqAlign(sap, stop, row, NULL);
6463 if (to < from)
6464 {
6465 tmp = to;
6466 to = from;
6467 from = tmp;
6468 }
6469 sap1 = AlnMgrGetSubAlign(sap, NULL, from, to);
6470 AlnMgrIndexSingleChildSeqAlign(sap1);
6471 from = AlnMgrMapBioseqToSeqAlign(sap, stop+1, row, NULL);
6472 if (from < 0)
6473 return FALSE;
6474 to = AlnMgrMapBioseqToSeqAlign(sap, mstop, row, NULL);
6475 if (to < from)
6476 {
6477 tmp = to;
6478 to = from;
6479 from = tmp;
6480 }
6481 sap2 = AlnMgrGetSubAlign(sap, NULL, from, to);
6482 sap2->next = sap->next;
6483 sap->next = sap2;
6484 dsp = sap->segs;
6485 sap->segs = (Pointer)(sap1->segs);
6486 sap1->segs = NULL;
6487 DenseSegFree(dsp);
6488 SeqAlignFree(sap1);
6489 AlnMgrIndexSingleChildSeqAlign(sap);
6490 AlnMgrIndexSingleChildSeqAlign(sap2);
6491 return TRUE;
6492 }
6493 } else if (mstart < start) /* throw away the first part */
6494 {
6495 from = AlnMgrMapBioseqToSeqAlign(sap, start, row, NULL);
6496 to = AlnMgrMapBioseqToSeqAlign(sap, stop, row, NULL);
6497 if (to < from)
6498 {
6499 tmp = to;
6500 to = from;
6501 from = tmp;
6502 }
6503 sap1 = AlnMgrGetSubAlign(sap, NULL, from, to);
6504 if (mstop == stop) /* done */
6505 {
6506 dsp = sap->segs;
6507 sap->segs = (Pointer)(sap1->segs);
6508 sap1->segs = NULL;
6509 DenseSegFree(dsp);
6510 SeqAlignFree(sap1);
6511 AlnMgrIndexSingleChildSeqAlign(sap);
6512 return TRUE;
6513 } else if (mstop > stop)
6514 {
6515 from = AlnMgrMapBioseqToSeqAlign(sap, stop+1, row, NULL);
6516 if (from < 0)
6517 return FALSE;
6518 to = AlnMgrMapBioseqToSeqAlign(sap, mstop, row, NULL);
6519 if (to < from)
6520 {
6521 tmp = to;
6522 to = from;
6523 from = tmp;
6524 }
6525 sap2 = AlnMgrGetSubAlign(sap, NULL, from, to);
6526 sap2->next = sap->next;
6527 sap->next = sap2;
6528 AlnMgrIndexSingleChildSeqAlign(sap2);
6529 dsp = sap->segs;
6530 sap->segs = (Pointer)(sap1->segs);
6531 sap1->segs = NULL;
6532 DenseSegFree(dsp);
6533 SeqAlignFree(sap1);
6534 AlnMgrIndexSingleChildSeqAlign(sap);
6535 return TRUE;
6536 }
6537 }
6538 } else if (sap->segtype == SAS_DENDIAG)
6539 {
6540 ddp = (DenseDiagPtr)(sap->segs);
6541 if (ddp->dim < row)
6542 return FALSE;
6543 mstart = ddp->starts[row-1];
6544 mstop = mstart + ddp->len - 1;
6545 if (mstart > start || mstop < stop)
6546 return FALSE;
6547 if (mstart == start)
6548 {
6549 if (mstop == stop)
6550 return FALSE;
6551 else if (mstop > stop)
6552 {
6553 ddp2 = DenseDiagNew();
6554 ddp2->dim = ddp->dim;
6555 ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
6556 ddp2->id = SeqIdDupList(ddp->id);
6557 ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
6558 ddp2->scores = ScoreDup(ddp->scores);
6559 for (i=0; i<ddp->dim; i++)
6560 {
6561 ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
6562 ddp2->strands[i] = ddp->strands[i];
6563 }
6564 ddp2->len = mstop - stop;
6565 ddp->len = ddp->len - (mstop - stop);
6566 sap2 = SeqAlignNew();
6567 sap2->type = SAT_PARTIAL;
6568 sap2->segtype = SAS_DENSEG;
6569 sap2->segs = (Pointer)ddp2;
6570 sap2->next = sap->next;
6571 sap->next = sap2;
6572 AlnMgrIndexSingleChildSeqAlign(sap2);
6573 return TRUE;
6574 }
6575 } else if (mstart < start)
6576 {
6577 for (i=0; i<ddp->dim; i++)
6578 {
6579 ddp->starts[i] = ddp->starts[i] + start - mstart;
6580 }
6581 ddp->len = ddp->len - (start - mstart);
6582 AlnMgrIndexSingleChildSeqAlign(sap);
6583 if (mstop == stop)
6584 return FALSE;
6585 else if (mstop > stop)
6586 {
6587 ddp2 = DenseDiagNew();
6588 ddp2->dim = ddp->dim;
6589 ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
6590 ddp2->id = SeqIdDupList(ddp->id);
6591 ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
6592 ddp2->scores = ScoreDup(ddp->scores);
6593 for (i=0; i<ddp->dim; i++)
6594 {
6595 ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
6596 ddp2->strands[i] = ddp->strands[i];
6597 }
6598 ddp2->len = mstop - stop;
6599 ddp->len = ddp->len - (mstop - stop);
6600 sap2 = SeqAlignNew();
6601 sap2->type = SAT_PARTIAL;
6602 sap2->segtype = SAS_DENSEG;
6603 sap2->segs = (Pointer)ddp2;
6604 sap2->next = sap->next;
6605 sap->next = sap2;
6606 AlnMgrIndexSingleChildSeqAlign(sap2);
6607 return TRUE;
6608 }
6609 }
6610 } else
6611 return FALSE;
6612 return FALSE;
6613 }
6614
am_compare_alignids(AMmsmsPtr ams_prev,AMmsmsPtr ams)6615 static void am_compare_alignids(AMmsmsPtr ams_prev, AMmsmsPtr ams)
6616 {
6617 DenseSegPtr dsp1;
6618 DenseSegPtr dsp2;
6619 Boolean found;
6620 SeqIdPtr sip;
6621 SeqIdPtr sip1;
6622 SeqIdPtr sip2;
6623
6624 if (ams_prev == NULL || ams == NULL)
6625 return;
6626 dsp1 = (DenseSegPtr)(ams_prev->sap->segs);
6627 dsp2 = (DenseSegPtr)(ams->sap->segs);
6628 sip1 = dsp1->ids;
6629 sip2 = dsp2->ids;
6630 while (sip1 != NULL)
6631 {
6632 sip = sip2;
6633 found = FALSE;
6634 while (sip != NULL && !found)
6635 {
6636 if (SeqIdComp(sip1, sip) == SIC_YES)
6637 found = TRUE;
6638 sip = sip->next;
6639 }
6640 if (!found)
6641 {
6642 ams->stop = 1;
6643 return;
6644 }
6645 sip1 = sip1->next;
6646 }
6647 return;
6648 }
6649
AlnMgrJaggedIndex(SeqAlignPtr sap,AMmsmsPtr ams_head,AMmsmsPtr * ams_mhead,Int4 numrows)6650 static Boolean AlnMgrJaggedIndex(SeqAlignPtr sap, AMmsmsPtr ams_head, AMmsmsPtr *ams_mhead, Int4 numrows)
6651 {
6652 AMAlignIndexPtr amaip;
6653 AMmsmsPtr ams;
6654 AMmsmsPtr ams_master;
6655 AMmsmsPtr ams_prev;
6656 Boolean found;
6657 Int4 i;
6658 Int4 j;
6659 Int4 n;
6660 Int4 numblocks;
6661 RowSourcePtr rsp;
6662 RowSourcePtr rsp_curr_head;
6663 RowSourcePtr rsp_head;
6664 RowSourcePtr rsp_prev;
6665 SeqAlignPtr PNTR saparray;
6666 Boolean usethis;
6667
6668 if (sap == NULL || ams_head == NULL || ams_mhead == NULL)
6669 return FALSE;
6670 if (!AlnMgrIndexParentSA(sap))
6671 return FALSE;
6672 AlnMgrSetMaster(sap, sap->master);
6673 ams_master = *ams_mhead;
6674 numblocks = 0;
6675 while (ams_master != NULL)
6676 {
6677 numblocks++;
6678 ams_master->count = numblocks;
6679 ams_master = ams_master->next;
6680 }
6681 ams = ams_head;
6682 j = 0;
6683 while (ams != NULL)
6684 {
6685 ams->count = j;
6686 ams = ams->next;
6687 j++;
6688 }
6689 saparray = (SeqAlignPtr PNTR)MemNew(j*sizeof(SeqAlignPtr));
6690 ams = ams_head;
6691 j = 0;
6692 while (ams != NULL)
6693 {
6694 saparray[j] = ams->sap;
6695 ams = ams->next;
6696 j++;
6697 }
6698 ams_master = *ams_mhead;
6699 rsp_head = RowSourceNew();
6700 rsp_head->which_saps = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6701 rsp_head->num_in_sap = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6702 rsp_head->id = SeqIdDup(sap->master);
6703 rsp_head->strand = Seq_strand_plus;
6704 rsp_head->numsaps = 0;
6705 while (ams_master != NULL)
6706 {
6707 found = FALSE;
6708 for (i=0; i<j && !found; i++)
6709 {
6710 if (saparray[i] == ams_master->sap)
6711 found = TRUE;
6712 }
6713 if (!found)
6714 return FALSE;
6715 rsp_head->which_saps[rsp_head->numsaps] = i;
6716 rsp_head->num_in_sap[rsp_head->numsaps] = ams_master->n;
6717 rsp_head->numsaps++;
6718 ams_master = ams_master->next;
6719 }
6720 rsp_prev = rsp_head;
6721 ams = ams_head;
6722 amaip = (AMAlignIndexPtr)(sap->saip);
6723 amaip->numsaps = amaip->alnsaps = j;
6724 amaip->saps = saparray;
6725 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
6726 amaip->numrows = 1;
6727 sap->type = SAT_MASTERSLAVE;
6728 ams_prev = ams_head = NULL;
6729 while (ams)
6730 {
6731 ams_master = *ams_mhead;
6732 n = AlnMgrGetNumRows(ams->sap);
6733 usethis = FALSE;
6734 while (!usethis && ams_master != NULL)
6735 {
6736 if (ams->sstart == ams_master->sstart && ams->sstop == ams_master->sstop)
6737 usethis = TRUE;
6738 else
6739 ams_master = ams_master->next;
6740 }
6741 if (usethis)
6742 {
6743 if (ams_prev != NULL && ams->sstart > ams_prev->sstart && ams->stop != 1)
6744 {
6745 rsp = rsp_curr_head;
6746 for (i=0; i<n; i++)
6747 {
6748 if (i+1 != ams->n)
6749 {
6750 if (rsp == NULL)
6751 return FALSE;
6752 rsp->which_saps[ams_master->count-1] = ams->count+1;
6753 rsp->num_in_sap[ams_master->count-1] = i+1;
6754 rsp = rsp->next;
6755 }
6756 }
6757 } else /* new row */
6758 {
6759 found = FALSE;
6760 for (i=0; i<n; i++)
6761 {
6762 if (i+1 != ams->n)
6763 {
6764 rsp = RowSourceNew();
6765 amaip->numrows++;
6766 rsp->which_saps = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6767 rsp->num_in_sap = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6768 rsp->id = AlnMgrGetNthSeqIdPtr(ams->sap, i+1);
6769 rsp->strand = AlnMgrGetNthStrand(ams->sap, i+1);
6770 rsp->which_saps[ams_master->count-1] = ams->count + 1;
6771 rsp->num_in_sap[ams_master->count-1] = i+1;
6772 rsp->numsaps = numblocks;
6773 rsp_prev->next = rsp;
6774 rsp_prev = rsp;
6775 if (!found)
6776 {
6777 rsp_curr_head = rsp;
6778 found = TRUE;
6779 }
6780 }
6781 }
6782 }
6783 ams_prev = ams;
6784 }
6785 ams = ams->next;
6786 }
6787 amaip->master = 1;
6788 amaip->rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows)*sizeof(RowSourcePtr));
6789 rsp = rsp_head;
6790 for (i=0; i<amaip->numrows; i++)
6791 {
6792 if (rsp == NULL)
6793 return FALSE;
6794 amaip->rowsource[i] = rsp;
6795 rsp = rsp->next;
6796 }
6797 rsp = rsp_head;
6798 amaip->numseg = rsp->numsaps;
6799 amaip->lens = (Int4Ptr)MemNew((amaip->numseg)*sizeof(Int4));
6800 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->numseg+1)*sizeof(Uint4));
6801 amaip->aligncoords[0] = 0;
6802 for (i=0; i<amaip->numseg; i++)
6803 {
6804 sap = amaip->saps[rsp->which_saps[i]-1];
6805 amaip->lens[i] = AlnMgrGetAlnLength(sap, FALSE);
6806 amaip->aligncoords[i+1] = amaip->aligncoords[i] + amaip->lens[i];
6807 }
6808 amaip->parent = sap;
6809 return TRUE;
6810 }
6811
AlnMgrCarefulIndex(SeqAlignPtr sap,AMmsmsPtr ams_head,AMmsmsPtr * ams_mhead,Boolean allinblock,Int4 numrows)6812 static Boolean AlnMgrCarefulIndex(SeqAlignPtr sap, AMmsmsPtr ams_head, AMmsmsPtr *ams_mhead, Boolean allinblock, Int4 numrows)
6813 {
6814 AMAlignIndexPtr amaip;
6815 AMmsmsPtr ams;
6816 AMmsmsPtr ams_master;
6817 AMmsmsPtr ams_prev;
6818 AMmsmsPtr currmaster;
6819 Boolean found;
6820 Int4 i;
6821 Int4 j;
6822 Int4 n;
6823 Int4 numblocks;
6824 RowSourcePtr rsp;
6825 RowSourcePtr rsp_curr_head;
6826 RowSourcePtr rsp_head;
6827 RowSourcePtr rsp_prev;
6828 SeqAlignPtr PNTR saparray;
6829 Boolean usethis;
6830
6831 if (sap == NULL || ams_head == NULL || ams_mhead == NULL)
6832 return FALSE;
6833 if (!AlnMgrIndexParentSA(sap))
6834 return FALSE;
6835 AlnMgrSetMaster(sap, sap->master);
6836 if (allinblock)
6837 am_trim_master(ams_mhead, ams_head, numrows);
6838 ams_master = *ams_mhead;
6839 if (ams_master == NULL) /* these alignments do not overlap at all */
6840 {
6841 return (am_make_null_alignment(sap));
6842 }
6843 numblocks = 0;
6844 while (ams_master != NULL)
6845 {
6846 numblocks++;
6847 ams_master->count = numblocks;
6848 ams_master = ams_master->next;
6849 }
6850 ams = ams_head;
6851 j = 0;
6852 while (ams)
6853 {
6854 j++;
6855 ams = ams->next;
6856 }
6857 saparray = (SeqAlignPtr PNTR)MemNew(j*sizeof(SeqAlignPtr));
6858 ams = ams_head;
6859 j = 0;
6860 currmaster = *ams_mhead;
6861 rsp_head = RowSourceNew();
6862 rsp_head->which_saps = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6863 rsp_head->num_in_sap = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6864 rsp_head->id = SeqIdDup(sap->master);
6865 rsp_head->strand = Seq_strand_plus;
6866 rsp_head->numsaps = 0;
6867 while (ams)
6868 {
6869 if (currmaster && currmaster->sstart < ams->sstart)
6870 {
6871 while (currmaster && currmaster->sstart < ams->sstart)
6872 {
6873 currmaster = currmaster->next;
6874 }
6875 if (currmaster == NULL)
6876 currmaster = *ams_mhead;
6877 } else if (currmaster && currmaster->sstart > ams->sstart)
6878 currmaster = *ams_mhead;
6879 if (currmaster && currmaster->sstart == ams->sstart && currmaster->sstop == ams->sstop && rsp_head->numsaps < numblocks && rsp_head->which_saps[currmaster->count-1] == 0)
6880 {
6881 rsp_head->which_saps[currmaster->count-1] = j+1;
6882 rsp_head->num_in_sap[currmaster->count-1] = ams->n;
6883 rsp_head->numsaps++;
6884 if (rsp_head->numsaps < numblocks)
6885 currmaster = *ams_mhead;
6886 }
6887 saparray[j] = ams->sap;
6888 ams->count = j;
6889 j++;
6890 ams_prev = ams;
6891 ams = ams->next;
6892 if (!allinblock)
6893 am_compare_alignids(ams_prev, ams);
6894 }
6895 rsp_prev = rsp_head;
6896 ams = ams_head;
6897 amaip = (AMAlignIndexPtr)(sap->saip);
6898 amaip->numsaps = amaip->alnsaps = j;
6899 amaip->saps = saparray;
6900 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
6901 amaip->numrows = 1;
6902 sap->type = SAT_MASTERSLAVE;
6903 ams_prev = ams_head = NULL;
6904 while (ams)
6905 {
6906 ams_master = *ams_mhead;
6907 n = AlnMgrGetNumRows(ams->sap);
6908 usethis = FALSE;
6909 while (!usethis && ams_master != NULL)
6910 {
6911 if (ams->sstart == ams_master->sstart && ams->sstop == ams_master->sstop)
6912 usethis = TRUE;
6913 else
6914 ams_master = ams_master->next;
6915 }
6916 if (usethis)
6917 {
6918 if (ams_prev != NULL && ams->sstart > ams_prev->sstart && ams->stop != 1)
6919 {
6920 rsp = rsp_curr_head;
6921 for (i=0; i<n; i++)
6922 {
6923 if (i+1 != ams->n)
6924 {
6925 if (rsp == NULL)
6926 return FALSE;
6927 rsp->which_saps[ams_master->count-1] = ams->count+1;
6928 rsp->num_in_sap[ams_master->count-1] = i+1;
6929 rsp = rsp->next;
6930 }
6931 }
6932 } else /* new row */
6933 {
6934 found = FALSE;
6935 for (i=0; i<n; i++)
6936 {
6937 if (i+1 != ams->n)
6938 {
6939 rsp = RowSourceNew();
6940 amaip->numrows++;
6941 rsp->which_saps = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6942 rsp->num_in_sap = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6943 rsp->id = AlnMgrGetNthSeqIdPtr(ams->sap, i+1);
6944 rsp->strand = AlnMgrGetNthStrand(ams->sap, i+1);
6945 rsp->which_saps[ams_master->count-1] = ams->count + 1;
6946 rsp->num_in_sap[ams_master->count-1] = i+1;
6947 rsp->numsaps = numblocks;
6948 rsp_prev->next = rsp;
6949 rsp_prev = rsp;
6950 if (!found)
6951 {
6952 rsp_curr_head = rsp;
6953 found = TRUE;
6954 }
6955 }
6956 }
6957 }
6958 ams_prev = ams;
6959 }
6960 ams = ams->next;
6961 }
6962 amaip->master = 1;
6963 amaip->rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows)*sizeof(RowSourcePtr));
6964 rsp = rsp_head;
6965 for (i=0; i<amaip->numrows; i++)
6966 {
6967 if (rsp == NULL)
6968 return FALSE;
6969 amaip->rowsource[i] = rsp;
6970 rsp = rsp->next;
6971 }
6972 rsp = rsp_head;
6973 amaip->numseg = rsp->numsaps;
6974 amaip->lens = (Int4Ptr)MemNew((amaip->numseg)*sizeof(Int4));
6975 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->numseg+1)*sizeof(Uint4));
6976 amaip->aligncoords[0] = 0;
6977 for (i=0; i<amaip->numseg; i++)
6978 {
6979 sap = amaip->saps[rsp->which_saps[i]-1];
6980 amaip->lens[i] = AlnMgrGetAlnLength(sap, FALSE);
6981 amaip->aligncoords[i+1] = amaip->aligncoords[i] + amaip->lens[i];
6982 }
6983 amaip->parent = sap;
6984 return TRUE;
6985 }
6986
am_make_null_alignment(SeqAlignPtr sap)6987 static Boolean am_make_null_alignment(SeqAlignPtr sap)
6988 {
6989 AMAlignIndexPtr amaip;
6990 BioseqPtr bsp;
6991 Int4 count;
6992 SeqAlignPtr salp;
6993 SeqAlignPtr salp_tmp;
6994 SeqIdPtr sip;
6995
6996 amaip = (AMAlignIndexPtr)(sap->saip);
6997 amaip->numsaps = amaip->alnsaps = 0;
6998 amaip->mstype = AM_NULL;
6999 amaip->ids = AlnMgrPropagateUpSeqIdPtrs(sap, &(count));
7000 if (count <= 0)
7001 return FALSE;
7002 else
7003 amaip->numbsqs = count;
7004 amaip->numrows = 0;
7005 amaip->numseg = 0;
7006 sap->type = SAT_MASTERSLAVE;
7007 sap->dim = amaip->numrows;
7008 salp = (SeqAlignPtr)(sap->segs);
7009 sap->segs = NULL;
7010 while (salp != NULL)
7011 {
7012 salp_tmp = salp->next;
7013 salp->next = NULL;
7014 SeqAlignFree(salp);
7015 salp = salp_tmp;
7016 }
7017 amaip->ulens = (Int4Ptr)MemNew(sizeof(Int4));
7018 amaip->ulens[0] = 0;
7019 sip = amaip->ids;
7020 while (sip != NULL)
7021 {
7022 bsp = BioseqLockById(sip);
7023 if (bsp->length > amaip->ulens[0])
7024 amaip->ulens[0] = bsp->length;
7025 BioseqUnlock(bsp);
7026 sip = sip->next;
7027 }
7028 amaip->parent = sap;
7029 return TRUE;
7030 }
7031
am_trim_master(AMmsmsPtr PNTR ams_mhead,AMmsmsPtr ams_head,Int4 numrows)7032 static void am_trim_master(AMmsmsPtr PNTR ams_mhead, AMmsmsPtr ams_head, Int4 numrows)
7033 {
7034 AMmsmsPtr ams;
7035 AMmsmsPtr ams_master;
7036 AMmsmsPtr ams_prev;
7037 Int4 n;
7038
7039 if (ams_mhead == NULL || ams_head == NULL || numrows < 1)
7040 return;
7041 ams_master = *ams_mhead;
7042 ams_prev = NULL;
7043 while (ams_master)
7044 {
7045 ams = ams_head;
7046 n = 1;
7047 while (ams)
7048 {
7049 if (ams_master->sstart == ams->sstart && ams_master->sstop == ams->sstop)
7050 n+=ams->count;
7051 ams = ams->next;
7052 }
7053 if (n < numrows)
7054 {
7055 if (ams_prev != NULL)
7056 {
7057 ams_prev->next = ams_master->next;
7058 ams_master->next = NULL;
7059 ams_master->sap = NULL;
7060 ams_master->sip = NULL;
7061 MemFree(ams_master);
7062 ams_master = ams_prev->next;
7063 } else
7064 {
7065 *ams_mhead = ams_master->next;
7066 ams_master->next = NULL;
7067 ams_master->sap = NULL;
7068 ams_master->sip = NULL;
7069 MemFree(ams_master);
7070 ams_master = *ams_mhead;
7071 }
7072 } else
7073 {
7074 ams_prev = ams_master;
7075 ams_master = ams_master->next;
7076 }
7077 }
7078 return;
7079 }
7080
7081
AlnMgrMakeAlignCoords(SeqAlignPtr sap)7082 NLM_EXTERN void AlnMgrMakeAlignCoords(SeqAlignPtr sap)
7083 {
7084 AMAlignIndexPtr amaip;
7085 Int4 i;
7086 Int4 j;
7087
7088 i = AlnMgrCheckAlignForParent(sap);
7089 if (i < 0 || i==AM_CHILD)
7090 return;
7091 amaip = (AMAlignIndexPtr)(sap->saip);
7092 if (!amaip->saps)
7093 return;
7094 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7095 amaip->aligncoords[0] = 0;
7096 for (j=0; j<((amaip->alnsaps)-1); j++)
7097 {
7098 amaip->aligncoords[j+1] = AlnMgrGetAlnLength(amaip->saps[j], FALSE) + amaip->aligncoords[j];
7099 }
7100 return;
7101 }
7102
AlnMgrMergeIntoMSMultByMaster(AMAlignIndexPtr amaip,Int4Ptr lens,Uint4Ptr numseg)7103 NLM_EXTERN Boolean AlnMgrMergeIntoMSMultByMaster(AMAlignIndexPtr amaip, Int4Ptr lens, Uint4Ptr numseg)
7104 {
7105 Uint4 count;
7106 DenseSegPtr dsp;
7107 Int4 gap;
7108 Int4 i;
7109 Int4 j;
7110 Int4 n;
7111 Boolean retval;
7112 SAIndexPtr saip;
7113 AMTinyInfoPtr tip;
7114 AMTinyInfoPtr PNTR tiparray;
7115
7116 retval = FALSE;
7117 if (numseg == NULL)
7118 return retval;
7119 tiparray = (AMTinyInfoPtr PNTR)MemNew((*numseg+1)*sizeof(AMTinyInfoPtr));
7120 j = 0;
7121 count = 0;
7122 for (i=0; i<(amaip->alnsaps); i++)
7123 {
7124 dsp = (DenseSegPtr)(amaip->saps[i]->segs);
7125 saip = (SAIndexPtr)amaip->saps[i]->saip;
7126 gap = 0;
7127 for (n=0; n<(dsp->numseg); n++)
7128 {
7129 if (dsp->starts[n*(dsp->dim)+saip->master-1] != -1)
7130 {
7131 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
7132 tip->start = dsp->starts[n*(dsp->dim)+saip->master-1];
7133 tip->which = i+1;
7134 tip->numgap = gap;
7135 tiparray[j] = tip;
7136 j++;
7137 count++;
7138 gap = 0;
7139 } else
7140 {
7141 gap++;
7142 }
7143 }
7144 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
7145 AlnMgrGetNthSeqRangeInSA(amaip->saps[i], saip->master, NULL, &tip->start);
7146 tip->start += 1;
7147 tip->which = i+1;
7148 tip->numgap = gap;
7149 tiparray[j] = tip;
7150 j++;
7151 count++;
7152 }
7153 *numseg = count;
7154 HeapSort((Pointer)tiparray, (size_t)(*numseg), sizeof(AMTinyInfoPtr), AlnMgrCompareTips);
7155 *numseg = count-1;
7156 count = 0;
7157 for (i=0; i<=(*numseg); i++)
7158 {
7159 if (count!=0 && (tiparray[i]->start == lens[count-1]))
7160 count--;
7161 for (j=1; j<=(tiparray[i]->numgap); j++)
7162 {
7163 lens[count] = -(tiparray[i]->which);
7164 count++;
7165 }
7166 lens[count] = tiparray[i]->start;
7167 count++;
7168 }
7169 for (i=0; i<=(*numseg); i++)
7170 {
7171 MemFree(tiparray[i]);
7172 }
7173 MemFree(tiparray);
7174 *numseg = count;
7175 return TRUE;
7176 }
7177
AlnMgrMergeSegments(Int4Ptr lens,SeqAlignPtr sap,SeqIdPtr master,Int4Ptr where,Int4 which)7178 NLM_EXTERN Boolean AlnMgrMergeSegments(Int4Ptr lens, SeqAlignPtr sap, SeqIdPtr master, Int4Ptr where, Int4 which)
7179 {
7180 DenseSegPtr dsp;
7181 Boolean found;
7182 Int4 i;
7183 Int4 j;
7184 Int4 n;
7185 Int4 num;
7186 Int4 r;
7187 Boolean retval;
7188 Int4 s;
7189 SAIndexPtr saip;
7190 Int4Ptr tmp;
7191 Int4 z;
7192
7193 retval = FALSE;
7194 if (!sap || !master || !lens)
7195 return retval;
7196 if (!where)
7197 return retval;
7198 n = AlnMgrGetNForSip(sap, master);
7199 if (n<0)
7200 return retval;
7201 if (sap->segtype == SAS_DENSEG)
7202 {
7203 dsp = (DenseSegPtr)(sap->segs);
7204 if (!dsp)
7205 return retval;
7206 saip = (SAIndexPtr)(sap->saip);
7207 if (!saip)
7208 return retval;
7209 } else
7210 {
7211 return retval;
7212 }
7213 if (*where == 0)
7214 {
7215 for(j=0; j<(dsp->numseg); j++)
7216 {
7217 if (dsp->starts[(j*(dsp->dim)) + n - 1] < 0)
7218 {
7219 s = -(which);
7220 } else
7221 {
7222 s = dsp->starts[(j*(dsp->dim)) + n - 1];
7223 }
7224 lens[*where] = s;
7225 *where = *where + 1;
7226 }
7227 AlnMgrGetNthSeqRangeInSA(sap, saip->master, NULL, &lens[dsp->numseg]);
7228 lens[dsp->numseg] += 1;
7229 *where = *where + 1;
7230 } else
7231 {
7232 tmp = (Int4Ptr)MemNew((dsp->numseg+1)*sizeof(Int4));
7233 for(j=0; j<(dsp->numseg); j++)
7234 {
7235 if (dsp->starts[(j*(dsp->dim)) + n - 1] < 0)
7236 {
7237 s = -(which);
7238 } else
7239 {
7240 s = dsp->starts[(j*(dsp->dim)) + n - 1];
7241 }
7242 tmp[j] = s;
7243 }
7244 AlnMgrGetNthSeqRangeInSA(sap, saip->master, NULL, &tmp[dsp->numseg]);
7245 tmp[dsp->numseg] += 1;
7246 s = 0;
7247 for (j=0; j<=(dsp->numseg); j++)
7248 {
7249 num = 0;
7250 while (tmp[j] < 0 && num<(dsp->numseg))
7251 {
7252 num++;
7253 j++;
7254 }
7255 num++;
7256 found = FALSE;
7257 for (i=s; !found && i<*where; i++)
7258 {
7259 r = 0;
7260 if (lens[i] < 0)
7261 {
7262 } else if (tmp[j] < lens[i])
7263 {
7264 if (i>0)
7265 {
7266 while (((i-r-1)>=0) && (lens[i-r-1] < 0))
7267 {
7268 r++;
7269 }
7270 }
7271 s = i;
7272 for (z = *where-1; z >= i-r; z--)
7273 {
7274 lens[z+num] = lens[z];
7275 }
7276 for (z = num; z > 0; z--)
7277 {
7278 lens[i-r] = tmp[j-z+1];
7279 i++;
7280 }
7281 found = TRUE;
7282 *where = *where + num;
7283 } else if (tmp[j] == lens[i])
7284 {
7285 s = i;
7286 for (z = *where-1; z >= i; z--)
7287 {
7288 lens[z+num-1] = lens[z];
7289 }
7290 for (z = num-1; z > 0; z--)
7291 {
7292 lens[i] = tmp[j-z];
7293 i++;
7294 }
7295 found = TRUE;
7296 *where = *where + num - 1;
7297 }
7298 }
7299 if (!found)
7300 {
7301 s = *where;
7302 for (z = *where+num-1; z >= *where; z--)
7303 {
7304 lens[z+num] = lens[z];
7305 }
7306 for (z = num-1; z >= 0; z--)
7307 {
7308 lens[i] = tmp[j-z];
7309 i++;
7310 }
7311 found = TRUE;
7312 *where = *where + num;
7313 }
7314 }
7315 MemFree(tmp);
7316 }
7317 retval = TRUE;
7318 return retval;
7319 }
7320
7321
AlnMgrFillInStarts(SeqAlignPtr PNTR saparray,Int4Ptr starts,Int4 numseg,Int4Ptr lens,Int4 numsaps,Uint4Ptr aligncoords)7322 NLM_EXTERN Boolean AlnMgrFillInStarts(SeqAlignPtr PNTR saparray, Int4Ptr starts, Int4 numseg, Int4Ptr lens, Int4 numsaps, Uint4Ptr aligncoords)
7323 {
7324 Int4Ptr alnlen;
7325 Boolean done;
7326 Int4 gap_pos;
7327 Int4 i;
7328 Int4 j;
7329 Int4 length;
7330 Boolean retval;
7331
7332 retval = FALSE;
7333 for (i=0; i<numsaps; i++)
7334 {
7335 gap_pos = 0;
7336 for (j=0; j<numseg; j++)
7337 {
7338 if(lens[j] >= 0)
7339 {
7340 starts[(numsaps*j)+i] = AlnMgrGetStartFromMaster(saparray[i], lens[j]);
7341 } else
7342 {
7343 if (lens[j] == -(i+1))
7344 {
7345 starts[(numsaps*j)+i] = AlnMgrGetMasterGapStartForSeg(saparray[i], gap_pos, &aligncoords[j]);
7346 gap_pos += 1;
7347 } else
7348 {
7349 starts[(numsaps*j)+i] = -1;
7350 }
7351 }
7352 }
7353 }
7354 if (!AlnMgrReconcileGaps(lens, aligncoords, numseg))
7355 return retval;
7356 alnlen = (Int4Ptr)MemNew(numsaps*sizeof(Int4));
7357 for (i=0; i<numsaps; i++)
7358 {
7359 alnlen[i] = AlnMgrGetAlnLength(saparray[i], FALSE);
7360 }
7361 for (i=0; i<numsaps; i++)
7362 {
7363 length = 0;
7364 done = FALSE;
7365 for (j=0; j<numseg; j++)
7366 {
7367 if (starts[(numsaps*j)+i] == -2)
7368 {
7369 if (length > 0)
7370 {
7371 if (lens[j]+length-1 < alnlen[i])
7372 {
7373 starts[(numsaps*j)+i] = length;
7374 length += lens[j];
7375 } else
7376 {
7377 done = TRUE;
7378 }
7379 }
7380 } else if (starts[(numsaps*j)+i] == -1)
7381 {
7382 if (length == 0)
7383 starts[(numsaps*j)+i] = -2;
7384 else if (done)
7385 starts[(numsaps*j)+i] = -2;
7386 } else
7387 {
7388 length = starts[(numsaps*j)+i] + lens[j];
7389 }
7390 }
7391 }
7392 j = 0;
7393 numseg -= 1;
7394 done = FALSE;
7395 if (numseg != 0)
7396 done = FALSE;
7397 else
7398 done = TRUE;
7399 for (i=(numsaps*(numseg-1)); (!done && i<(numsaps*numseg)); i++)
7400 {
7401 if (starts[i] >= 0)
7402 {
7403 done = TRUE;
7404 lens[numseg-1] = alnlen[j]-starts[i];
7405 }
7406 else
7407 j++;
7408 }
7409 MemFree(alnlen);
7410 retval = TRUE;
7411 return retval;
7412 }
7413
AlnMgrGetStartFromMaster(SeqAlignPtr sap,Int4 pos)7414 NLM_EXTERN Int4 AlnMgrGetStartFromMaster(SeqAlignPtr sap, Int4 pos)
7415 {
7416 DenseSegPtr dsp;
7417 SAIndexPtr saip;
7418 Int4 start;
7419
7420 saip = (SAIndexPtr)(sap->saip);
7421 dsp = (DenseSegPtr)(sap->segs);
7422 start = binary_search_segment_array(saip->ssdp[saip->master-1], pos, dsp->dim, saip->master - 1, (DenseSegPtr)sap->segs);
7423 if (dsp->starts[(start*dsp->dim)+saip->master-1] != pos)
7424 {
7425 return -2;
7426 } else
7427 {
7428 return (saip->aligncoords[start]);
7429 }
7430 }
7431
AlnMgrGetMasterGapStartForSeg(SeqAlignPtr sap,Int4 which_gap,Uint4Ptr aligncoord)7432 NLM_EXTERN Uint4 AlnMgrGetMasterGapStartForSeg(SeqAlignPtr sap, Int4 which_gap, Uint4Ptr aligncoord)
7433 {
7434 DenseSegPtr dsp;
7435 SAIndexPtr saip;
7436
7437 saip = (SAIndexPtr)(sap->saip);
7438 dsp = (DenseSegPtr)(sap->segs);
7439 if (which_gap >= saip->ssdp[saip->master-1]->numunsect)
7440 {
7441 if (aligncoord)
7442 *aligncoord = dsp->lens[dsp->numseg-1];
7443 return saip->aligncoords[dsp->numseg-1];
7444 }
7445 if (aligncoord)
7446 *aligncoord = dsp->lens[saip->ssdp[saip->master-1]->unsect[which_gap]];
7447 return saip->aligncoords[saip->ssdp[saip->master-1]->unsect[which_gap]];
7448 }
7449
7450
AlnMgrReconcileGaps(Int4Ptr lens,Uint4Ptr aligncoords,Int4 num)7451 NLM_EXTERN Boolean AlnMgrReconcileGaps(Int4Ptr lens, Uint4Ptr aligncoords, Int4 num)
7452 {
7453 Int4 i;
7454 Int4 j;
7455 Int4 r;
7456
7457 for (i=0; i<num; i++)
7458 {
7459 if (lens[i] < 0)
7460 {
7461 r = 1;
7462 while (lens[i+r] < 0)
7463 {
7464 r++;
7465 }
7466 lens[i] = lens[i+r];
7467 for (j=i+1; j<num; j++)
7468 {
7469 if (lens[j] >= 0)
7470 lens[j] = lens[j] + aligncoords[i];
7471 }
7472 }
7473 }
7474 for (i=0; i<num; i++)
7475 {
7476 aligncoords[i] = lens[i] - lens[0];
7477 }
7478 for (i=0; i<num-1; i++)
7479 {
7480 lens[i] = lens[i+1] - lens[i];
7481 }
7482 return TRUE;
7483 }
7484
AlnMgrMakeMultSegments(AMAlignIndexPtr amaip)7485 NLM_EXTERN Boolean AlnMgrMakeMultSegments(AMAlignIndexPtr amaip)
7486 {
7487 Int4 i;
7488 Int4 j;
7489 Uint2 n;
7490 Boolean retval;
7491 Uint2Ptr segments;
7492 Uint2Ptr tmp;
7493
7494 retval = FALSE;
7495 tmp = (Uint2Ptr)MemNew((amaip->numseg)*sizeof(Uint2));
7496 for (i=0; i<amaip->numsaps; i++)
7497 {
7498 n = 0;
7499 for (j=0; j<amaip->numseg; j++)
7500 {
7501 if (amaip->starts[((amaip->numsaps)*j)+i] >= 0)
7502 {
7503 tmp[n] = j;
7504 n++;
7505 }
7506 }
7507 segments = (Uint2Ptr)MemNew(n*sizeof(Uint2));
7508 for (j=0; j<n; j++)
7509 {
7510 segments[j] = tmp[j];
7511 }
7512 if (!amaip->amadp[i])
7513 return retval;
7514 amaip->amadp[i]->segments = segments;
7515 amaip->amadp[i]->numseg = n;
7516 amaip->amadp[i]->numseg = n;
7517 }
7518 MemFree(tmp);
7519 retval = TRUE;
7520 return retval;
7521 }
7522
AlnMgrCheckOrdered(SeqAlignPtr sap)7523 NLM_EXTERN Int4 AlnMgrCheckOrdered(SeqAlignPtr sap)
7524 {
7525 AMAlignIndexPtr amaip;
7526 Int4 dim;
7527 DenseSegPtr dsp;
7528 Int4 i;
7529 Int4 n;
7530 SeqAlignPtr salp;
7531 SeqIdPtr sip;
7532 SeqIdPtr sip_prev;
7533 SeqIdPtr sip_tmp;
7534 Int4 start1;
7535 Int4 start2;
7536 Int4 stop1;
7537 Int4 stop2;
7538 Uint1Ptr strands;
7539
7540 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
7541 return CHECK_ERROR;
7542 amaip = (AMAlignIndexPtr)(sap->saip);
7543 salp = (SeqAlignPtr)(sap->segs);
7544 if (salp->next == NULL) /* only one child alignment */
7545 return AM_NOTORDERED;
7546 sip_prev = NULL;
7547 dim = -1;
7548 n = 0;
7549 strands = NULL;
7550 while (salp)
7551 {
7552 n++;
7553 dsp = (DenseSegPtr)(salp->segs);
7554 if (dim != -1)
7555 {
7556 if (dsp->dim != dim)
7557 return AM_NOTORDERED;
7558 } else
7559 dim = dsp->dim;
7560 sip = dsp->ids;
7561 if (sip_prev)
7562 {
7563 sip_tmp = sip_prev;
7564 while (sip && sip_tmp)
7565 {
7566 if (SeqIdComp(sip_tmp, sip) != SIC_YES)
7567 return AM_NOTORDERED;
7568 sip = sip->next;
7569 sip_tmp = sip_tmp->next;
7570 }
7571 if (sip || sip_tmp)
7572 return AM_NOTORDERED;
7573 } else
7574 sip_prev = sip;
7575 if (strands)
7576 {
7577 if (dsp->strands)
7578 {
7579 for (i=0; i<dsp->dim; i++)
7580 {
7581 if ((dsp->strands[i] == Seq_strand_minus && strands[i] != Seq_strand_minus) || (strands[i] == Seq_strand_minus && dsp->strands[i] != Seq_strand_minus))
7582 return AM_NOTORDERED;
7583 }
7584 }
7585 } else
7586 strands = dsp->strands;
7587 salp = salp->next;
7588 }
7589 if (amaip->saps)
7590 MemFree(amaip->saps);
7591 amaip->saps = (SeqAlignPtr PNTR)MemNew(n*sizeof(SeqAlignPtr));
7592 amaip->numrows = dim;
7593 salp = (SeqAlignPtr)(sap->segs);
7594 n = 0;
7595 while (salp)
7596 {
7597 amaip->saps[n] = salp;
7598 salp = salp->next;
7599 n++;
7600 }
7601 amaip->numsaps = n;
7602 AlnMgrSortAlnSetByNthRowPos(sap, 1);
7603 for (n=0; n<amaip->numsaps-1; n++)
7604 {
7605 for (i=0; i<dim; i++)
7606 {
7607 AlnMgrGetNthSeqRangeInSA(amaip->saps[n], i+1, &start1, &stop1);
7608 AlnMgrGetNthSeqRangeInSA(amaip->saps[n+1], i+1, &start2, &stop2);
7609 if (AlnMgrGetNthStrand(amaip->saps[n], i+1) == Seq_strand_minus)
7610 {
7611 if (start1 <= stop2)
7612 return AM_NOTORDERED;
7613 } else
7614 {
7615 if (stop1 >= start2)
7616 return AM_NOTORDERED;
7617 }
7618 }
7619 }
7620 return AM_ORDERED;
7621 }
7622
AlnMgrCheckOverlapping(SeqAlignPtr sap)7623 NLM_EXTERN Int4 AlnMgrCheckOverlapping(SeqAlignPtr sap)
7624 {
7625 AMAlignDatPtr amadp;
7626 AMAlignIndexPtr amaip;
7627 Int4 end;
7628 Int4 c;
7629 Int4 i;
7630 Int4 j;
7631 Int4 n;
7632 Int4 prevstrand;
7633 SeqIdPtr sip;
7634 Int4 start;
7635 Int4 stop;
7636 Uint2 strand;
7637
7638 i = AlnMgrCheckAlignForParent(sap);
7639 if (i<0)
7640 return CHECK_ERROR;
7641 else if (i==AM_PARENT)
7642 {
7643 amaip = (AMAlignIndexPtr)sap->saip;
7644 if (amaip->numsaps == 1)
7645 return 1;
7646 sip = amaip->ids;
7647 for (j=0; j<(amaip->numbsqs); j++)
7648 {
7649 end = -1;
7650 amadp = amaip->amadp[j];
7651 prevstrand = -1;
7652 for (c=0; c<(amadp->numsaps); c++)
7653 {
7654 n = AlnMgrGetNForSip(amadp->saps[c], sip);
7655 strand = AlnMgrGetNthStrand(amadp->saps[c], n);
7656 if (strand == 0)
7657 {
7658 if (prevstrand != -1)
7659 strand = prevstrand;
7660 } else if (prevstrand != -1)
7661 {
7662 if (strand != prevstrand)
7663 return j;
7664 } else
7665 prevstrand = strand;
7666 AlnMgrGetNthSeqRangeInSA(amadp->saps[c], n, &start, &stop);
7667 if (strand != Seq_strand_minus)
7668 {
7669 if (start <= end && end != -1)
7670 return j;
7671 else
7672 end = stop;
7673 } else
7674 {
7675 if (end != -1 && stop >= end)
7676 return j;
7677 else
7678 end = start;
7679 }
7680 }
7681 sip = sip->next;
7682 }
7683 } else if (i==AM_CHILD)
7684 {
7685 return NO_OVERLAP;
7686 }
7687 return NO_OVERLAP;
7688 }
7689
am_check_gaps(SeqAlignPtr sap)7690 static Boolean am_check_gaps(SeqAlignPtr sap)
7691 {
7692 DenseSegPtr dsp;
7693 SeqAlignPtr salp;
7694
7695 salp = (SeqAlignPtr)(sap->segs);
7696 while (salp)
7697 {
7698 dsp = (DenseSegPtr)(salp->segs);
7699 if (dsp->dim > 1)
7700 return FALSE;
7701 salp = salp->next;
7702 }
7703 return TRUE;
7704 }
7705
7706 /*****************************************************************************
7707 *
7708 * AlnMgrGetMaxSegments simply adds up the number of segments for each
7709 * SeqAlign in a linked list, to get the maximum number of segments
7710 * for the merge of the list (for memory allocation in AlnMgrMakeFakeMultiple).
7711 *
7712 ******************************************************************************/
7713
AlnMgrGetMaxSegments(SeqAlignPtr sap)7714 NLM_EXTERN Int4 AlnMgrGetMaxSegments(SeqAlignPtr sap)
7715 {
7716 DenseSegPtr dsp;
7717 Int4 ernie; /* the running total, also a happy hamster */
7718
7719 ernie = 0;
7720 while (sap)
7721 {
7722 if (sap->segtype == SAS_DENSEG)
7723 {
7724 dsp = (DenseSegPtr)(sap->segs);
7725 ernie += dsp->numseg;
7726 } else if (sap->segtype == SAS_STD)
7727 {
7728 ernie += 1;
7729 } else
7730 return 0;
7731 sap = sap->next;
7732 ernie += 1;
7733 }
7734 return ernie;
7735 }
7736
7737 /*******************************************************************************
7738 *
7739 * Row Management functions:
7740 *
7741 *******************************************************************************/
AlnMgrGetNumRows(SeqAlignPtr sap)7742 NLM_EXTERN Int4 AlnMgrGetNumRows(SeqAlignPtr sap)
7743 {
7744 AMAlignIndexPtr amaip;
7745 DenseSegPtr dsp;
7746
7747 if (sap == NULL || sap->saip == NULL)
7748 return -1;
7749 if (sap->saip->indextype == INDEX_SEGS)
7750 {
7751 dsp = (DenseSegPtr)sap->segs;
7752 if (dsp == NULL)
7753 return -1;
7754 return (dsp->dim);
7755 } else if (sap->saip->indextype == INDEX_PARENT)
7756 {
7757 if ((amaip = (AMAlignIndexPtr)sap->saip) == NULL)
7758 return -1;
7759 if (amaip->numseg == 0)
7760 return (amaip->numbsqs);
7761 if (amaip->numrows)
7762 return (amaip->numrows);
7763 }
7764 return 0;
7765 }
7766
AlnMgrGetMaxRowsForParentPartial(SeqAlignPtr sap)7767 NLM_EXTERN Int4 AlnMgrGetMaxRowsForParentPartial(SeqAlignPtr sap)
7768 {
7769 AMAlignIndexPtr amaip;
7770 Int4 i;
7771 Int4 j;
7772 Int4 max;
7773
7774 if (sap == NULL || sap->saip == NULL)
7775 return -1;
7776 max = -1;
7777 if (sap->saip->indextype == INDEX_PARENT)
7778 {
7779 amaip = (AMAlignIndexPtr)sap->saip;
7780 for (i=0; i<(amaip->alnsaps); i++)
7781 {
7782 j = AlnMgrGetNumRows(amaip->saps[i]);
7783 if (j==-1)
7784 return -1;
7785 if (j>max)
7786 max = j;
7787 }
7788 }
7789 return max;
7790 }
7791
AlnMgrMakeRowsForOrdered(SeqAlignPtr sap)7792 NLM_EXTERN Boolean AlnMgrMakeRowsForOrdered(SeqAlignPtr sap)
7793 {
7794 AMAlignIndexPtr amaip;
7795 DenseSegPtr dsp;
7796 Int4 i;
7797 SeqIdPtr id;
7798 Int4 n;
7799 RowSourcePtr PNTR rowsource;
7800 RowSourcePtr rsp;
7801
7802 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
7803 return FALSE;
7804 amaip = (AMAlignIndexPtr)(sap->saip);
7805 rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows)*sizeof(RowSourcePtr));
7806 dsp = (DenseSegPtr)(((SeqAlignPtr)(sap->segs))->segs);
7807 id = dsp->ids;
7808 for (i=0; i<amaip->numrows; i++)
7809 {
7810 rsp = RowSourceNew();
7811 rsp->id = SeqIdDup(id);
7812 id = id->next;
7813 rsp->which_saps = (Uint4Ptr)MemNew((amaip->numsaps)*sizeof(Uint4));
7814 rsp->num_in_sap = (Uint4Ptr)MemNew((amaip->numsaps)*sizeof(Uint4));
7815 rsp->numsaps = amaip->numsaps;
7816 if (dsp->strands)
7817 rsp->strand = dsp->strands[i];
7818 for (n=0; n<amaip->numsaps; n++)
7819 {
7820 rsp->which_saps[n] = n+1;
7821 rsp->num_in_sap[n] = i+1;
7822 }
7823 rowsource[i] = rsp;
7824 }
7825 amaip->rowsource = rowsource;
7826 amaip->master = -2;
7827 return TRUE;
7828 }
7829
AlnMgrGetRowsForPartial(SeqAlignPtr sap)7830 NLM_EXTERN Boolean AlnMgrGetRowsForPartial(SeqAlignPtr sap)
7831 {
7832 AMAlignIndexPtr amaip;
7833 Int4 curr;
7834 DenseSegPtr dsp;
7835 Boolean found;
7836 Int4 i;
7837 Int4 j;
7838 Int4 k;
7839 Boolean retval;
7840 RowSourcePtr PNTR rowsource;
7841 RowSourcePtr rsp;
7842 SeqAlignPtr salp;
7843 SeqIdPtr sip;
7844
7845 retval = FALSE;
7846 if (sap == NULL || sap->saip == NULL)
7847 return retval;
7848 if (sap->saip->indextype != INDEX_PARENT)
7849 return retval;
7850 if (sap->type != SAT_PARTIAL)
7851 return retval;
7852 amaip = (AMAlignIndexPtr)sap->saip;
7853 i = AlnMgrGetMaxRowsForParentPartial(sap);
7854 if (i < 0)
7855 return retval;
7856 else
7857 amaip->numrows = i;
7858 rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows)*sizeof(RowSourcePtr));
7859 curr = -1;
7860 for (i=0; i<(amaip->alnsaps); i++)
7861 {
7862 salp = amaip->saps[i];
7863 dsp = (DenseSegPtr)salp->segs;
7864 sip = dsp->ids;
7865 for (j=0; j<(dsp->dim); j++)
7866 {
7867 found = FALSE;
7868 k = 0;
7869 while (!found && k <= curr)
7870 {
7871 if (SeqIdComp(sip, rowsource[k]->id) == SIC_YES)
7872 found = TRUE;
7873 else
7874 k++;
7875 }
7876 if (!found)
7877 {
7878 curr++;
7879 rsp = RowSourceNew();
7880 rsp->which_saps = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7881 rsp->num_in_sap = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7882 rowsource[curr] = rsp;
7883 rsp->id = SeqIdDup(sip);
7884 rsp->which_saps[rsp->numsaps] = i+1;
7885 rsp->num_in_sap[rsp->numsaps] = AlnMgrGetNForSip(salp, sip);
7886 (rsp->numsaps)++;
7887 } else
7888 {
7889 rowsource[k]->which_saps[rowsource[k]->numsaps] = i+1;
7890 rowsource[k]->num_in_sap[rowsource[k]->numsaps] = AlnMgrGetNForSip(salp, sip);
7891 (rowsource[k]->numsaps)++;
7892 }
7893 sip = sip->next;
7894 }
7895 }
7896 amaip->numrows = curr+1;
7897 amaip->rowsource = rowsource;
7898 amaip->master = -2;
7899 return TRUE;
7900 }
7901
AlnMgrGetRowsForMasterSlave(SeqAlignPtr sap)7902 NLM_EXTERN Boolean AlnMgrGetRowsForMasterSlave(SeqAlignPtr sap)
7903 {
7904 AMAlignIndexPtr amaip;
7905 DenseSegPtr dsp;
7906 Int4 i;
7907 Int4 j;
7908 Int4 k;
7909 Boolean retval;
7910 RowSourcePtr PNTR rowsource;
7911 RowSourcePtr rsp;
7912 SAIndexPtr saip;
7913 SeqAlignPtr salp;
7914 SeqIdPtr sip;
7915
7916 retval = FALSE;
7917 if (sap == NULL || sap->saip == NULL)
7918 return retval;
7919 if (sap->saip->indextype != INDEX_PARENT)
7920 return retval;
7921 if (sap->type != SAT_MASTERSLAVE)
7922 return retval;
7923 amaip = (AMAlignIndexPtr)sap->saip;
7924 i = 1;
7925 salp = (SeqAlignPtr)sap->segs;
7926 while (salp)
7927 {
7928 j = AlnMgrGetNumRows(salp);
7929 if (j < 0)
7930 return retval;
7931 else
7932 i += (j-1); /*don't count the master over and over*/
7933 salp = salp->next;
7934 }
7935 rowsource = (RowSourcePtr PNTR)MemNew((i+1)*sizeof(RowSourcePtr));
7936 rsp = RowSourceNew();
7937 rsp->id = SeqIdDup(sap->master);
7938 rsp->which_saps = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7939 rsp->num_in_sap = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7940 rsp->numsaps = amaip->alnsaps;
7941 rowsource[0] = rsp;
7942 amaip->numrows = 1;
7943 for (j=0; j<(amaip->alnsaps); j++)
7944 {
7945 salp = amaip->saps[j];
7946 saip = (SAIndexPtr)salp->saip;
7947 dsp = (DenseSegPtr)(salp->segs);
7948 sip = dsp->ids;
7949 k=1;
7950 while (sip)
7951 {
7952 if (k != saip->master)
7953 {
7954 rsp = RowSourceNew();
7955 rsp->id = SeqIdDup(sip);
7956 rsp->which_saps = (Uint4Ptr)MemNew(sizeof(Uint4));
7957 rsp->num_in_sap = (Uint4Ptr)MemNew(sizeof(Uint4));
7958 rsp->numsaps = 1;
7959 rsp->which_saps[0] = j+1;
7960 rsp->num_in_sap[0] = k;
7961 rowsource[amaip->numrows] = rsp;
7962 amaip->numrows++;
7963 } else
7964 {
7965 rowsource[0]->which_saps[j] = j+1;
7966 rowsource[0]->num_in_sap[j] = k;
7967 amaip->master = 1;
7968 }
7969 k++;
7970 sip = sip->next;
7971 }
7972 }
7973 amaip->rowsource = rowsource;
7974 return TRUE;
7975 }
7976
7977
7978 /*******************************************************************************
7979 *
7980 * AlnMgrFindMaster returns the (duplicated) SeqIdPtr of the first bioseq
7981 * that is present in every child alignment, unless the sap->master field
7982 * is set in the child alignments, in which case that SeqIdPtr is returned
7983 * (if it's the same in all children).
7984 *
7985 *******************************************************************************/
7986
AlnMgrFindMaster(SeqAlignPtr sap)7987 NLM_EXTERN SeqIdPtr AlnMgrFindMaster(SeqAlignPtr sap)
7988 {
7989 AMAlignDatPtr amadp;
7990 AMAlignIndexPtr amaip;
7991 Boolean done;
7992 Int4 i;
7993 SeqAlignPtr salp;
7994 SeqIdPtr sip;
7995
7996 i = AlnMgrCheckAlignForParent(sap);
7997 if (i<0)
7998 return NULL;
7999 else if (i==AM_CHILD)
8000 {
8001 return SeqIdDup(sap->master);
8002 } else if (i==AM_PARENT)
8003 {
8004 salp = (SeqAlignPtr)(sap->segs);
8005 sip = NULL;
8006 done = FALSE;
8007 while (salp && !done)
8008 {
8009 if (salp->master)
8010 return (SeqIdDup(salp->master));
8011 salp = salp->next;
8012 }
8013 amaip = (AMAlignIndexPtr)(sap->saip);
8014 sip = amaip->ids;
8015 for (i=0; i<(amaip->numbsqs); i++)
8016 {
8017 amadp = amaip->amadp[i];
8018 if (!amadp || !sip)
8019 return NULL;
8020 else
8021 {
8022 if (amadp->numsaps == amaip->numsaps)
8023 return (SeqIdDup(sip));
8024 }
8025 sip = sip->next;
8026 }
8027 return NULL;
8028 }
8029 return NULL;
8030 }
8031
8032
8033 /*******************************************************************************
8034 *
8035 * AlnMgrCheckRealMaster makes sure that the master seqid given appears
8036 * once and only once in each seqalign in the set if a parent is given,
8037 * or once and only one in the seqalign if a child is given.
8038 *
8039 *******************************************************************************/
AlnMgrCheckRealMaster(SeqAlignPtr sap,SeqIdPtr master)8040 NLM_EXTERN Boolean AlnMgrCheckRealMaster(SeqAlignPtr sap, SeqIdPtr master)
8041 {
8042 DenseSegPtr dsp;
8043 Int4 i;
8044 Boolean retval;
8045 SeqAlignPtr salp;
8046 SeqIdPtr sip;
8047
8048 retval = FALSE;
8049 if (!sap || !master)
8050 return retval;
8051 if (sap->segtype == SAS_DISC)
8052 {
8053 salp = (SeqAlignPtr)sap->segs;
8054 while (salp)
8055 {
8056 dsp = (DenseSegPtr)salp->segs;
8057 sip = dsp->ids;
8058 i = 0;
8059 while (sip)
8060 {
8061 if (SeqIdComp(sip, master) == SIC_YES)
8062 {
8063 i++;
8064 if (i > 1)
8065 return retval;
8066 }
8067 sip = sip->next;
8068 }
8069 salp = salp->next;
8070 }
8071 } else if (sap->segtype == SAS_DENSEG)
8072 {
8073 dsp = (DenseSegPtr)sap->segs;
8074 sip = dsp->ids;
8075 i = 0;
8076 while (sip)
8077 {
8078 if (SeqIdComp(sip, master) == SIC_YES)
8079 {
8080 i++;
8081 if (i > 1)
8082 return retval;
8083 }
8084 sip = sip->next;
8085 }
8086 }
8087 return TRUE;
8088 }
8089
AlnMgrMakeSegmentedMasterSlave(SeqAlignPtr sap)8090 NLM_EXTERN Boolean AlnMgrMakeSegmentedMasterSlave(SeqAlignPtr sap)
8091 {
8092 AMAlignIndexPtr amaip;
8093 AMmsmsPtr ams;
8094 AMmsmsPtr PNTR amsarray;
8095 AMmsmsPtr ams_head;
8096 AMmsmsPtr ams_master;
8097 AMmsmsPtr ams_mtmp;
8098 AMmsmsPtr ams_tmp;
8099 Int4 c;
8100 Boolean done;
8101 DenseSegPtr dsp;
8102 Boolean found;
8103 Int4 i;
8104 Int4 j;
8105 Int4 n;
8106 Int4 max;
8107 Boolean ok;
8108 RowSourcePtr rsp;
8109 Int4 rspnum;
8110 SAIndexPtr saip;
8111 SeqAlignPtr salp;
8112 SeqIdPtr sip;
8113 AMsiplistPtr siplist;
8114 AMsiplistPtr siplist_new;
8115 AMsiplistPtr siplist_tmp;
8116 Int4 sstart;
8117 Int4 sstop;
8118 Int4 start;
8119 Int4 stop;
8120 Int4Ptr tmparray;
8121
8122 if (sap == NULL)
8123 return FALSE;
8124 amaip = (AMAlignIndexPtr)sap->saip;
8125 if (amaip == NULL)
8126 return FALSE;
8127 if (amaip->master < 0)
8128 return FALSE;
8129 ams_head = NULL;
8130 n = 0;
8131 salp = (SeqAlignPtr)(sap->segs);
8132 for (i=0; i<(amaip->numsaps); i++)
8133 {
8134 amaip->saps[i] = salp;
8135 saip = (SAIndexPtr)(salp->saip);
8136 if (saip->master < 0)
8137 return FALSE;
8138 AlnMgrGetNthSeqRangeInSA(salp, saip->master, &start, &stop);
8139 dsp = (DenseSegPtr)salp->segs;
8140 sip = dsp->ids;
8141 j = 1;
8142 while (sip != NULL)
8143 {
8144 if (j != saip->master)
8145 {
8146 n++;
8147 ams = (AMmsmsPtr)MemNew(sizeof(AMmsms));
8148 ams->start = start;
8149 ams->stop = stop;
8150 ams->sap = salp;
8151 ams->nsap = i+1;
8152 ams->sip = sip;
8153 ams->n = j;
8154 ams->j = i;
8155 AlnMgrGetNthSeqRangeInSA(salp, j, &sstart, &sstop);
8156 ams->sstart = sstart;
8157 ams->sstop = sstop;
8158 ams->strand = AlnMgrGetNthStrand(salp, j);
8159 if (ams_head == NULL)
8160 {
8161 ams_head = ams_tmp = ams;
8162 } else
8163 {
8164 ams_tmp->next = ams;
8165 ams_tmp = ams;
8166 }
8167 }
8168 sip = sip->next;
8169 j++;
8170 }
8171 salp = salp->next;
8172 }
8173 ams_head = am_sort_ammsms(ams_head, n);
8174 ams_master = NULL;
8175 ams = ams_head;
8176 n = 0;
8177 while (ams)
8178 {
8179 if (ams_master)
8180 {
8181 ams_mtmp = ams_master;
8182 found = FALSE;
8183 while (!found && ams_mtmp)
8184 {
8185 if (ams->start == ams_mtmp->start && ams->stop == ams_mtmp->stop)
8186 {
8187 found = TRUE;
8188 ams->masternum = ams_mtmp->masternum;
8189 ams_mtmp->count++;
8190 }
8191 else
8192 ams_mtmp = ams_mtmp->next;
8193 }
8194 if (!found)
8195 {
8196 n++;
8197 ams_tmp = (AMmsmsPtr)MemNew(sizeof(AMmsms));
8198 ams_tmp->start = ams->start;
8199 ams_tmp->stop = ams->stop;
8200 ams_tmp->sap = ams->sap;
8201 ams_tmp->nsap = ams->nsap;
8202 ams_tmp->sip = sap->master;
8203 ams_tmp->count = 1;
8204 ams_tmp->masternum = ams->masternum = n;
8205 saip = (SAIndexPtr)(ams->sap->saip);
8206 ams_tmp->n = saip->master;
8207 ams_tmp->next = ams_master;
8208 ams_tmp->j = ams->n;
8209 ams_master = ams_tmp;
8210 }
8211 } else
8212 {
8213 n++;
8214 ams_tmp = (AMmsmsPtr)MemNew(sizeof(AMmsms));
8215 ams_tmp->start = ams->start;
8216 ams_tmp->stop = ams->stop;
8217 ams_tmp->sap = ams->sap;
8218 ams_tmp->nsap = ams->nsap;
8219 ams_tmp->sip = sap->master;
8220 ams_tmp->count = 1;
8221 ams_tmp->masternum = ams->masternum = n;
8222 saip = (SAIndexPtr)(ams->sap->saip);
8223 ams_tmp->n = saip->master;
8224 ams_tmp->j = ams->n;
8225 ams_master = ams_tmp;
8226 }
8227 ams = ams->next;
8228 }
8229 ams_master = am_sort_masterams(ams_master, n);
8230 max = c = 0;
8231 ams = ams_master;
8232 ams_tmp = NULL;
8233 amsarray = (AMmsmsPtr PNTR)MemNew((n+1)*sizeof(AMmsmsPtr));
8234 while (ams)
8235 {
8236 amsarray[c] = ams;
8237 if (ams_tmp)
8238 {
8239 if (ams->start <= ams_tmp->stop)
8240 {
8241 MemFree(amsarray);
8242 return FALSE; /* add code here to compress all lines??? */
8243 }
8244 }
8245 max += ams->count;
8246 c++;
8247 ams_tmp = ams;
8248 ams = ams->next;
8249 }
8250 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
8251 amaip->rowsource = (RowSourcePtr PNTR)MemNew((max+1)*sizeof(RowSourcePtr));
8252 if (amaip->aligncoords)
8253 MemFree(amaip->aligncoords);
8254 amaip->aligncoords = (Uint4Ptr)MemNew((c+1)*sizeof(Uint4));
8255 amaip->lens = (Int4Ptr)MemNew((c+1)*sizeof(Int4));
8256 amaip->numseg = c;
8257 tmparray = (Int4Ptr)MemNew((c+1)*sizeof(Int4));
8258 ams = ams_master;
8259 for (j=0; ams && j < c; j++)
8260 {
8261 amaip->lens[j] = AlnMgrGetAlnLength(ams->sap, FALSE);
8262 amaip->aligncoords[j+1] = amaip->aligncoords[j] + amaip->lens[j];
8263 tmparray[ams->masternum] = j;
8264 ams = ams->next;
8265 }
8266 rsp = RowSourceNew();
8267 rsp->id = SeqIdDup(ams_master->sip);
8268 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8269 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8270 rsp->numsaps = 0;
8271 ams = ams_master;
8272 while (ams)
8273 {
8274 rsp->which_saps[rsp->numsaps] = ams->nsap;
8275 rsp->num_in_sap[rsp->numsaps] = ams->n;
8276 rsp->numsaps++;
8277 ams = ams->next;
8278 }
8279 amaip->rowsource[0] = rsp;
8280 amaip->numrows = 1;
8281 siplist = (AMsiplistPtr)MemNew(sizeof(AMsiplist));
8282 siplist->sip = rsp->id;
8283 siplist->first_row = 0;
8284 siplist_tmp = siplist;
8285 ams = ams_head;
8286 rsp = RowSourceNew();
8287 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8288 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8289 amaip->rowsource[amaip->numrows] = rsp;
8290 amaip->numrows++;
8291 while (ams && amaip->numrows <= max)
8292 {
8293 if (rsp->id == NULL) /* new rsp */
8294 {
8295 rsp->id = SeqIdDup(ams->sip);
8296 rsp->strand = ams->strand;
8297 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8298 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8299 rsp->numsaps = c;
8300 rspnum = am_get_first_rsp_for_sip(ams->sip, siplist);
8301 if (rspnum == -1) /* need to add to seqid list */
8302 {
8303 siplist_new = (AMsiplistPtr)MemNew(sizeof(AMsiplist));
8304 siplist_new->sip = ams->sip;
8305 siplist_new->first_row = amaip->numrows-1;
8306 siplist_tmp->next = siplist_new;
8307 siplist_tmp = siplist_new;
8308 }
8309 } else /* some fields already filled -- check for conflicts or new row */
8310 {
8311 n = SeqIdComp(rsp->id, ams->sip);
8312 if (n == SIC_YES && ams->strand == rsp->strand) /* could be same row -- check for conflicts */
8313 {
8314 ok = FALSE;
8315 if (rsp->which_saps[tmparray[ams->masternum]] == 0) /* put in same row */
8316 {
8317 done = FALSE;
8318 i = 0;
8319 while (!done && i<c)
8320 {
8321 if (rsp->which_saps[i] != 0)
8322 done = TRUE;
8323 else
8324 i++;
8325 }
8326 if (done)
8327 {
8328 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[i] - 1], rsp->num_in_sap[i], &start, &stop);
8329 if (ams->strand == Seq_strand_minus)
8330 {
8331 if (tmparray[ams->masternum] < i)
8332 {
8333 if (stop >= ams->sstart)
8334 ok = FALSE;
8335 else
8336 ok = TRUE;
8337 } else
8338 {
8339 if (start <= ams->sstop)
8340 ok = FALSE;
8341 else
8342 ok = TRUE;
8343 }
8344 } else
8345 {
8346 if (tmparray[ams->masternum] < i)
8347 {
8348 if (start <= ams->sstop)
8349 ok = FALSE;
8350 else
8351 ok = TRUE;
8352 } else
8353 {
8354 if (stop >= ams->sstart)
8355 ok = FALSE;
8356 else
8357 ok = TRUE;
8358 }
8359 }
8360 }
8361 }
8362 if (ok)
8363 {
8364 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8365 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8366 rsp->numsaps=c;
8367 } else
8368 {
8369 rspnum = am_get_first_rsp_for_sip(ams->sip, siplist);
8370 if (rspnum == -1) /* make a new row */
8371 {
8372 rsp = RowSourceNew();
8373 rsp->strand = ams->strand;
8374 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8375 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8376 amaip->rowsource[amaip->numrows] = rsp;
8377 amaip->numrows++;
8378 rsp->id = SeqIdDup(ams->sip);
8379 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8380 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8381 rsp->numsaps = c;
8382 siplist_new = (AMsiplistPtr)MemNew(sizeof(AMsiplist));
8383 siplist_new->sip = ams->sip;
8384 siplist_new->first_row = amaip->numrows-1;
8385 siplist_tmp->next = siplist_new;
8386 siplist_tmp = siplist_new;
8387 } else
8388 {
8389 done = FALSE;
8390 while (rspnum < amaip->numrows && !done && SAM_OrderSeqID(ams->sip, amaip->rowsource[rspnum]->id) == 0)
8391 {
8392 rsp = amaip->rowsource[rspnum];
8393 if (rsp->which_saps[tmparray[ams->masternum]] == 0) /* fits here */
8394 {
8395 done = TRUE;
8396 found = FALSE;
8397 i = 0;
8398 while (!found && i<c)
8399 {
8400 if (rsp->which_saps[i] != 0)
8401 found = TRUE;
8402 else
8403 i++;
8404 }
8405 if (found)
8406 {
8407 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[i] - 1], rsp->num_in_sap[i], &start, &stop);
8408 if (ams->strand == Seq_strand_minus)
8409 {
8410 if (tmparray[ams->masternum] < i)
8411 {
8412 if (stop >= ams->sstart)
8413 ok = FALSE;
8414 else
8415 ok = TRUE;
8416 } else
8417 {
8418 if (start <= ams->sstop)
8419 ok = FALSE;
8420 else
8421 ok = TRUE;
8422 }
8423 } else
8424 {
8425 if (tmparray[ams->masternum] < i)
8426 {
8427 if (start <= ams->sstop)
8428 ok = FALSE;
8429 else
8430 ok = TRUE;
8431 } else
8432 {
8433 if (stop >= ams->sstart)
8434 ok = FALSE;
8435 else
8436 ok = TRUE;
8437 }
8438 }
8439 }
8440 if (ok && found)
8441 {
8442 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8443 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8444 rsp->numsaps = c;
8445 } else
8446 {
8447 rsp = RowSourceNew();
8448 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8449 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8450 rsp->strand = ams->strand;
8451 amaip->rowsource[amaip->numrows] = rsp;
8452 amaip->numrows++;
8453 rsp->id = SeqIdDup(ams->sip);
8454 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8455 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8456 rsp->numsaps = c;
8457 }
8458 }
8459 rspnum++;
8460 }
8461 if (!done) /* didn't fit */
8462 {
8463 rsp = RowSourceNew();
8464 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8465 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8466 amaip->rowsource[amaip->numrows] = rsp;
8467 amaip->numrows++;
8468 rsp->id = SeqIdDup(ams->sip);
8469 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8470 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8471 rsp->numsaps=c;
8472 rsp->strand = ams->strand;
8473 }
8474 }
8475 }
8476 } else /* make a new row */
8477 {
8478 rsp = RowSourceNew();
8479 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8480 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8481 amaip->rowsource[amaip->numrows] = rsp;
8482 amaip->numrows++;
8483 rsp->id = SeqIdDup(ams->sip);
8484 rsp->strand = ams->strand;
8485 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8486 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8487 rsp->numsaps=c;
8488 siplist_new = (AMsiplistPtr)MemNew(sizeof(AMsiplist));
8489 siplist_new->sip = ams->sip;
8490 siplist_new->first_row = amaip->numrows-1;
8491 siplist_tmp->next = siplist_new;
8492 siplist_tmp = siplist_new;
8493 }
8494 }
8495 ams = ams->next;
8496 }
8497 siplist_tmp = siplist;
8498 while (siplist_tmp)
8499 {
8500 siplist_new = siplist_tmp->next;
8501 siplist_tmp->sip = NULL;
8502 siplist_tmp->next = NULL;
8503 MemFree(siplist_tmp);
8504 siplist_tmp = siplist_new;
8505 }
8506 ams = ams_master;
8507 while (ams)
8508 {
8509 ams_tmp = ams->next;
8510 ams->sap = NULL;
8511 ams->sip = NULL;
8512 ams->next = NULL;
8513 MemFree(ams);
8514 ams = ams_tmp;
8515 }
8516 ams = ams_head;
8517 while (ams)
8518 {
8519 ams_tmp = ams->next;
8520 ams->sap = NULL;
8521 ams->sip = NULL;
8522 ams->next = NULL;
8523 MemFree(ams);
8524 ams = ams_tmp;
8525 }
8526 MemFree(amsarray);
8527 MemFree(tmparray);
8528 amaip->startsize = amaip->numseg;
8529 amaip->starts = (Int4Ptr)MemNew(amaip->numseg*sizeof(Int4));
8530 return TRUE;
8531 }
8532
am_get_first_rsp_for_sip(SeqIdPtr sip,AMsiplistPtr siplist)8533 static Int4 am_get_first_rsp_for_sip(SeqIdPtr sip, AMsiplistPtr siplist)
8534 {
8535 AMsiplistPtr siplist_tmp;
8536
8537 if (sip == NULL || siplist == NULL)
8538 return -1;
8539 siplist_tmp = siplist;
8540 while (siplist_tmp)
8541 {
8542 if (SeqIdComp(sip, siplist_tmp->sip) == SIC_YES)
8543 {
8544 return (siplist_tmp->first_row);
8545 }
8546 siplist_tmp = siplist_tmp->next;
8547 }
8548 return -1;
8549 }
8550
am_sort_ammsms(AMmsmsPtr ams_head,Int4 n)8551 static AMmsmsPtr am_sort_ammsms(AMmsmsPtr ams_head, Int4 n)
8552 {
8553 AMmsmsPtr ams;
8554 AMmsmsPtr ams_tmp;
8555 AMmsmsPtr PNTR ams_array;
8556 Int4 i;
8557
8558 if (ams_head == NULL || n == 0)
8559 return NULL;
8560 if (n == 1)
8561 return ams_head;
8562 ams_array = (AMmsmsPtr PNTR)MemNew((n+1)*sizeof(AMmsmsPtr));
8563 ams = ams_head;
8564 for (i=0; ams!=NULL && i<n; i++)
8565 {
8566 ams_array[i] = ams;
8567 ams = ams->next;
8568 }
8569 HeapSort((Pointer)ams_array, (size_t)(n), sizeof(AMmsmsPtr), AlnMgrCompareAMS);
8570 ams_tmp = NULL;
8571 for (i=0; i<n; i++)
8572 {
8573 if (ams_tmp != NULL)
8574 {
8575 ams->next = ams_array[i];
8576 ams = ams->next;
8577 ams->next = NULL;
8578 } else
8579 {
8580 ams_tmp = ams = ams_array[i];
8581 ams_tmp->next = NULL;
8582 }
8583 }
8584 MemFree(ams_array);
8585 return ams_tmp;
8586 }
8587
AlnMgrCompareAMS(VoidPtr base,VoidPtr large_son)8588 NLM_EXTERN int LIBCALLBACK AlnMgrCompareAMS(VoidPtr base, VoidPtr large_son)
8589 {
8590 AMmsmsPtr ams1;
8591 AMmsmsPtr ams2;
8592 Int4 r;
8593
8594 ams1 = *((AMmsmsPtr PNTR) base);
8595 ams2 = *((AMmsmsPtr PNTR) large_son);
8596 if (ams1 == NULL || ams2 == NULL)
8597 return 0;
8598 r = SAM_OrderSeqID(ams1->sip, ams2->sip);
8599 if (r == 0)
8600 {
8601 if (ams1->sstart < ams2->sstart)
8602 return -1;
8603 else
8604 return 1;
8605 } else
8606 return r;
8607 }
8608
am_sort_masterams(AMmsmsPtr ams_head,Int4 n)8609 static AMmsmsPtr am_sort_masterams(AMmsmsPtr ams_head, Int4 n)
8610 {
8611 AMmsmsPtr ams;
8612 AMmsmsPtr ams_tmp;
8613 AMmsmsPtr PNTR ams_array;
8614 Int4 i;
8615
8616 if (ams_head == NULL || n == 0)
8617 return NULL;
8618 if (n == 1)
8619 return ams_head;
8620 ams_array = (AMmsmsPtr PNTR)MemNew((n+1)*sizeof(AMmsmsPtr));
8621 ams = ams_head;
8622 for (i=0; ams!=NULL && i<n; i++)
8623 {
8624 ams_array[i] = ams;
8625 ams = ams->next;
8626 }
8627 HeapSort((Pointer)ams_array, (size_t)(n), sizeof(AMmsmsPtr), AlnMgrCompareMasterAMS);
8628 ams_tmp = NULL;
8629 for (i=0; i<n; i++)
8630 {
8631 if (ams_tmp != NULL)
8632 {
8633 ams->next = ams_array[i];
8634 ams = ams->next;
8635 ams->next = NULL;
8636 } else
8637 {
8638 ams_tmp = ams = ams_array[i];
8639 ams_tmp->next = NULL;
8640 }
8641 }
8642 return ams_tmp;
8643 }
8644
AlnMgrCompareMasterAMS(VoidPtr base,VoidPtr large_son)8645 NLM_EXTERN int LIBCALLBACK AlnMgrCompareMasterAMS(VoidPtr base, VoidPtr large_son)
8646 {
8647 AMmsmsPtr ams1;
8648 AMmsmsPtr ams2;
8649
8650 ams1 = *((AMmsmsPtr PNTR) base);
8651 ams2 = *((AMmsmsPtr PNTR) large_son);
8652 if (ams1 == NULL || ams2 == NULL)
8653 return 0;
8654 if (ams1->start < ams2->start)
8655 return -1;
8656 else if (ams1->start > ams2->start)
8657 return 1;
8658 else if (ams1->stop < ams2->stop)
8659 return -1;
8660 else if (ams1->j < ams2->j)
8661 return -1;
8662 else
8663 return 1;
8664 }
8665
8666
AlnMgrSetMaster(SeqAlignPtr sap,SeqIdPtr master)8667 NLM_EXTERN void AlnMgrSetMaster(SeqAlignPtr sap, SeqIdPtr master)
8668 {
8669 SAIndexPtr saip;
8670 SeqAlignPtr salp;
8671
8672 if (sap->segtype != SAS_DISC || !master)
8673 return;
8674 sap->master = SeqIdDup(master);
8675 salp = (SeqAlignPtr)(sap->segs);
8676 while (salp)
8677 {
8678 if (!salp->saip)
8679 return;
8680 if (salp->master != NULL)
8681 SeqIdFree(salp->master);
8682 salp->master = SeqIdDup(master);
8683 if (salp->saip != NULL)
8684 {
8685 saip = (SAIndexPtr)(salp->saip);
8686 saip->master = AlnMgrGetNForSip(salp, master);
8687 }
8688 salp = salp->next;
8689 }
8690 return;
8691 }
8692
AlnMgrMakeMasterPlus(SeqAlignPtr sap)8693 NLM_EXTERN void AlnMgrMakeMasterPlus(SeqAlignPtr sap)
8694 {
8695 DenseSegPtr dsp;
8696 Int4 i;
8697 Int4 master;
8698 SAIndexPtr saip;
8699 SeqAlignPtr sap_tmp;
8700
8701 i = AlnMgrCheckAlignForParent(sap);
8702 if (i==AM_CHILD)
8703 {
8704 saip = (SAIndexPtr)(sap->saip);
8705 if (saip->master < 0)
8706 return;
8707 else
8708 master = saip->master;
8709 dsp = (DenseSegPtr)(sap->segs);
8710 if (dsp->strands[saip->master-1] == Seq_strand_minus)
8711 {
8712 sap_tmp = sap;
8713 sap = sap->next;
8714 sap_tmp->next = NULL;
8715 sap_tmp = SeqAlignListReverseStrand(sap_tmp);
8716 if (!AlnMgrIndexSingleChildSeqAlign(sap_tmp))
8717 return;
8718 saip = (SAIndexPtr)(sap_tmp->saip);
8719 saip->master = master;
8720 sap_tmp->next = sap;
8721 sap = sap_tmp;
8722 }
8723 } else if (i==AM_PARENT)
8724 {
8725 sap_tmp = (SeqAlignPtr)(sap->segs);
8726 while (sap_tmp)
8727 {
8728 AlnMgrMakeMasterPlus(sap_tmp);
8729 sap_tmp = sap_tmp->next;
8730 }
8731 }
8732 return;
8733 }
8734
AlnMgrForceMasterSlave(SeqAlignPtr sap)8735 NLM_EXTERN Boolean AlnMgrForceMasterSlave(SeqAlignPtr sap)
8736 {
8737 AMAlignIndexPtr amaip;
8738 AMAlignDatPtr amadp;
8739 Int4 j;
8740 Int4 n;
8741
8742 if (sap == NULL || sap->master == NULL || sap->saip == NULL)
8743 return FALSE;
8744 amaip = (AMAlignIndexPtr)sap->saip;
8745 n = AlnMgrGetNForSip(sap, sap->master);
8746 if (n < 1)
8747 return FALSE;
8748 amadp = amaip->amadp[n-1];
8749 if (amaip->saps == NULL)
8750 {
8751 amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
8752 for (j=0; j<amaip->numsaps; j++)
8753 {
8754 amaip->saps[j] = amadp->saps[j];
8755 }
8756 n = AlnMgrGetMaxSegments((SeqAlignPtr)(sap->segs));
8757 amaip->lens = (Int4Ptr)MemNew(n*sizeof(Int4));
8758 AlnMgrSetMaster(sap, sap->master);
8759 amaip->numseg = n;
8760 }
8761 if (!AlnMgrMergeIntoMSMultByMaster(amaip, amaip->lens, &amaip->numseg))
8762 return FALSE;
8763 amaip->startsize = (amaip->numseg)*(amaip->numsaps);
8764 amaip->starts = (Int4Ptr)MemNew((amaip->numseg)*(amaip->numsaps)*sizeof(Int4));
8765 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->numseg)*sizeof(Uint4));
8766 if (!AlnMgrFillInStarts(amadp->saps, amaip->starts, amaip->numseg, amaip->lens, amaip->numsaps, amaip->aligncoords))
8767 return FALSE;
8768 if (amaip->numseg > 1)
8769 amaip->numseg -= 1;
8770 sap->type = SAT_MASTERSLAVE;
8771 if (!AlnMgrGetRowsForMasterSlave(sap))
8772 return FALSE;
8773 am_fix_empty_columns(sap);
8774 return TRUE;
8775 }
8776
am_fix_empty_columns(SeqAlignPtr sap)8777 static void am_fix_empty_columns(SeqAlignPtr sap)
8778 {
8779 AMAlignIndexPtr amaip;
8780 Boolean found;
8781 Int4 i;
8782 Int4 j;
8783
8784 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
8785 return;
8786 amaip = (AMAlignIndexPtr)sap->saip;
8787 for (i=0; i<amaip->numseg; i++)
8788 {
8789 found = FALSE;
8790 for (j=0; j<amaip->numsaps && !found; j++)
8791 {
8792 if (amaip->starts[(i*amaip->numsaps)+j] >= 0)
8793 found = TRUE;
8794 }
8795 if (!found)
8796 {
8797 for (j=0; j<amaip->numsaps && !found; j++)
8798 {
8799 amaip->starts[(i*amaip->numsaps)+j] = -3;
8800 }
8801 }
8802 }
8803 }
8804
8805 /* a little kludge function to tide us over until the new */
8806 /* alignment manager arrives. */
8807
am_cleanupsalp(SeqAlignPtr salp)8808 static SeqAlignPtr am_cleanupsalp(SeqAlignPtr salp)
8809 {
8810 Int4 badseg;
8811 DenseSegPtr dsp;
8812 DenseSegPtr dsp_new;
8813 Boolean found;
8814 Int4 i;
8815 Int4 j;
8816 Int4 n;
8817
8818 dsp = (DenseSegPtr)(salp->segs);
8819 badseg = 0;
8820 for (i=0; i<dsp->numseg; i++)
8821 {
8822 found = FALSE;
8823 for (j=0; found==FALSE && j<dsp->dim; j++)
8824 {
8825 if (dsp->starts[dsp->dim*i + j] != -1)
8826 found = TRUE;
8827 }
8828 if (!found)
8829 badseg++;
8830 }
8831 if (badseg == 0)
8832 return salp;
8833 dsp_new = DenseSegNew();
8834 dsp_new->numseg = dsp->numseg-badseg;
8835 dsp_new->dim = dsp->dim;
8836 dsp_new->ids = dsp->ids;
8837 dsp->ids = NULL;
8838 dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
8839 dsp_new->starts = (Int4Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
8840 dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
8841 n = 0;
8842 for (i=0; i<dsp->numseg; i++)
8843 {
8844 found = FALSE;
8845 for (j=0; found==FALSE && j<dsp->dim; j++)
8846 {
8847 if (dsp->starts[dsp->dim*i + j] != -1)
8848 found = TRUE;
8849 }
8850 if (found)
8851 {
8852 for (j=0; j<dsp->dim; j++)
8853 {
8854 dsp_new->starts[dsp->dim*n+j] = dsp->starts[dsp->dim*i+j];
8855 dsp_new->strands[dsp->dim*n+j] = dsp->strands[dsp->dim*i+j];
8856 }
8857 dsp_new->lens[n] = dsp->lens[i];
8858 n++;
8859 }
8860 }
8861 DenseSegFree(dsp);
8862 salp->segs = (Pointer)dsp_new;
8863 return salp;
8864 }
8865
8866 /***************************************************************************
8867 *
8868 * AlnMgrGetSubAlign returns a flattened multiple or pairwise alignment
8869 * corresponding to the indexed input alignment. To get the entire
8870 * alignment, set from = 0 and to = -1. (SUBALIGN)
8871 *
8872 ***************************************************************************/
AlnMgrGetSubAlign(SeqAlignPtr sap,SeqIdPtr which_master,Int4 from,Int4 to)8873 NLM_EXTERN SeqAlignPtr AlnMgrGetSubAlign(SeqAlignPtr sap, SeqIdPtr which_master, Int4 from, Int4 to)
8874 {
8875 AMAlignIndexPtr amaip;
8876 AlnMsgPtr amp;
8877 Int4 c;
8878 DenseSegPtr dsp;
8879 DenseSegPtr dsp_orig;
8880 Int4 i;
8881 Int4 j;
8882 Boolean more;
8883 Uint4 n;
8884 Int4 numaln;
8885 SeqAlignPtr salp;
8886 SeqAlignPtr salp_head;
8887 SeqAlignPtr salp_prev;
8888 SeqIdPtr sip;
8889 SeqIdPtr sip_curr,sip_next;
8890 SeqIdPtr sip_prev;
8891 Int4Ptr trackarray;
8892
8893 i = AlnMgrCheckAlignForParent(sap);
8894 if (i == AM_CHILD)
8895 {
8896 salp = SeqAlignNew();
8897 amp = AlnMsgNew();
8898 amp->row_num = 1;
8899 amp->which_master = which_master;
8900 amp->from_m = from;
8901 amp->to_m = to;
8902 dsp_orig = (DenseSegPtr)(sap->segs);
8903 dsp = DenseSegNew();
8904 dsp->ids = SeqIdDupList(dsp_orig->ids);
8905 while ((Boolean) (more = AlnMgrGetNextAlnBit(sap, amp)))
8906 {
8907 dsp->numseg++; /* what to do if a row has all gaps?? */
8908 }
8909 dsp->dim = dsp_orig->dim;
8910 dsp->starts = (Int4Ptr)MemNew((dsp->numseg*dsp_orig->dim)*sizeof(Int4));
8911 dsp->strands = (Uint1Ptr)MemNew((dsp->numseg*dsp_orig->dim)*sizeof(Int4));
8912 dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8913 for (j=0; j<dsp_orig->dim; j++)
8914 {
8915 amp = AlnMsgReNew(amp);
8916 amp->row_num = j+1;
8917 amp->which_master = which_master;
8918 amp->from_m = from;
8919 amp->to_m = to;
8920 c = 0;
8921 while ((Boolean) (more = AlnMgrGetNextAlnBit(sap, amp)))
8922 {
8923 dsp->lens[c] = amp->to_b - amp->from_b + 1;
8924 if (amp->gap == 0)
8925 dsp->starts[(c*dsp->dim)+j] = amp->from_b;
8926 else
8927 dsp->starts[(c*dsp->dim)+j] = -1;
8928 c++;
8929 }
8930 }
8931 for (c=0; c<(dsp->dim*dsp->numseg); c++)
8932 {
8933 dsp->strands[c] = dsp_orig->strands[c];
8934 }
8935 salp->type = SAT_PARTIAL;
8936 salp->segtype = SAS_DENSEG;
8937 salp->segs = (Pointer)dsp;
8938 AlnMsgFree(amp);
8939 salp = am_cleanupsalp(salp);
8940 return salp;
8941 } else if (i == AM_PARENT)
8942 {
8943 amaip = (AMAlignIndexPtr)(sap->saip);
8944 if (amaip == NULL)
8945 return NULL;
8946 if (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_MASTERSLAVE)
8947 {
8948 salp = SeqAlignNew();
8949 salp->type = SAT_MASTERSLAVE;
8950 salp->segtype = SAS_DENSEG;
8951 salp->dim = amaip->numrows;
8952 dsp = DenseSegNew();
8953 dsp->dim = amaip->numrows;
8954 dsp->numseg = amaip->numseg;
8955 dsp->starts = (Int4Ptr)MemNew((amaip->numseg+1)*(amaip->numrows)*sizeof(Int4));
8956 dsp->lens = (Int4Ptr)MemNew((amaip->numseg+1)* sizeof(Int4));
8957 dsp->strands = (Uint1Ptr)MemNew((amaip->numseg+1)*(amaip->numrows)*sizeof(Uint1));
8958 sip_curr = NULL;
8959 for (j=0; j<amaip->numrows; j++)
8960 {
8961 sip = AlnMgrGetNthSeqIdPtr(sap, j+1);
8962 if (sip_curr != NULL)
8963 {
8964 sip_prev->next = sip;
8965 sip_prev = sip;
8966 } else
8967 sip_curr = sip_prev = sip;
8968 }
8969 dsp->ids = SeqIdDupList(sip_curr);
8970 amp = AlnMsgNew();
8971 for (j=0; j<(amaip->numrows); j++)
8972 {
8973 if (j == amaip->master - 1)
8974 salp->master = SeqIdDup(sip_curr);
8975 sip_next = sip_curr->next;
8976 SeqIdFree(sip_curr);
8977 sip_curr = sip_next;
8978 amp->which_master = which_master;
8979 amp->from_m = from;
8980 amp->to_m = to;
8981 amp->row_num = j + 1;
8982 more = TRUE;
8983 n = 0;
8984 while (more)
8985 {
8986 more = AlnMgrGetNextAlnBit(sap, amp);
8987 if (amp->gap == 0)
8988 {
8989 dsp->starts[n*(dsp->dim) + j] = amp->from_b;
8990 } else
8991 {
8992 dsp->starts[n*(dsp->dim) + j] = -1;
8993 }
8994 if (j == 0)
8995 dsp->lens[n] = amp->to_b - amp->from_b + 1;
8996 dsp->strands[n*(dsp->dim) + j] = amp->strand;
8997 n++;
8998 }
8999 amp = AlnMsgReNew(amp);
9000 }
9001 AlnMsgFree(amp);
9002 salp->segs = (Pointer)dsp;
9003 salp = am_cleanupsalp(salp);
9004 return salp;
9005 } else if (sap->type == SAT_PARTIAL || (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_SEGMENTED_MASTERSLAVE))
9006 {
9007 amp = AlnMsgNew();
9008 amp->which_master = which_master;
9009 amp->from_m = from;
9010 amp->to_m = to;
9011 amp->row_num = 1;
9012 trackarray = (Int4Ptr)MemNew((amaip->numseg+1)*sizeof(Int4));
9013 numaln = 0;
9014 while ((Boolean) (more = AlnMgrGetNextAlnBit(sap, amp)))
9015 {
9016 if (amp->send_space)
9017 {
9018 numaln++;
9019 amp->send_space = FALSE;
9020 } else
9021 trackarray[numaln]++;
9022 }
9023 salp_head = NULL;
9024 sip_curr = NULL;
9025 for (j=0; j<amaip->numrows; j++)
9026 {
9027 sip = AlnMgrGetNthSeqIdPtr(sap, j+1);
9028 if (sip_curr != NULL)
9029 {
9030 sip_prev->next = sip;
9031 sip_prev = sip;
9032 } else
9033 sip_curr = sip_prev = sip;
9034 }
9035 for (j=0; j<numaln; j++)
9036 {
9037 salp = SeqAlignNew();
9038 if (salp_head != NULL)
9039 {
9040 salp_prev->next = salp;
9041 salp_prev = salp;
9042 } else
9043 salp_prev = salp_head = salp;
9044 salp->type = SAT_PARTIAL;
9045 salp->segtype = SAS_DENSEG;
9046 salp->dim = amaip->numrows;
9047 dsp = DenseSegNew();
9048 dsp->dim = amaip->numrows;
9049 dsp->numseg = trackarray[j]+1;
9050 dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(trackarray[j]+1)*sizeof(Int4));
9051 dsp->lens = (Int4Ptr)MemNew((dsp->dim)*(trackarray[j]+1)*sizeof(Int4));
9052 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(trackarray[j]+1)*sizeof(Uint1));
9053 dsp->ids = SeqIdDupList(sip_curr);
9054 salp->segs = (Pointer)dsp;
9055 }
9056 amp = AlnMsgReNew(amp);
9057 for (j=0; j<(amaip->numrows); j++)
9058 {
9059 salp = salp_head;
9060 dsp = (Pointer)(salp->segs);
9061 if (j == amaip->master - 1)
9062 salp->master = SeqIdDup(sip_curr);
9063 sip_next = sip_curr->next;
9064 SeqIdFree(sip_curr);
9065 sip_curr = sip_next;
9066 amp->which_master = which_master;
9067 amp->from_m = from;
9068 amp->to_m = to;
9069 amp->row_num = j + 1;
9070 more = TRUE;
9071 n = 0;
9072 while ((more = AlnMgrGetNextAlnBit(sap, amp)) == TRUE)
9073 {
9074 if (amp->gap == 0)
9075 {
9076 dsp->starts[n*(dsp->dim) + j] = amp->from_b;
9077 } else
9078 {
9079 dsp->starts[n*(dsp->dim) + j] = -1;
9080 }
9081 if (j == 0)
9082 dsp->lens[n] = amp->to_b - amp->from_b + 1;
9083 dsp->strands[n*(dsp->dim) + j] = amp->strand;
9084 n++;
9085 if (amp->send_space == TRUE && amp->place != 1)
9086 {
9087 salp = salp->next;
9088 dsp = (DenseSegPtr)(salp->segs);
9089 amp->send_space = FALSE;
9090 n=0;
9091 }
9092 }
9093 amp = AlnMsgReNew(amp);
9094 }
9095 MemFree(trackarray);
9096 AlnMsgFree(amp);
9097 salp = am_cleanupsalp(salp);
9098 return salp_head;
9099 } else if (sap->type == SAT_DIAGS)
9100 {
9101 salp = SeqAlignDup(sap);
9102 return salp;
9103 }
9104 }
9105 return NULL;
9106 }
9107
AlnMgrGetSubAlignSpecial(SeqAlignPtr sap,Int4 master,Int4 from,Int4 to)9108 NLM_EXTERN SeqAlignPtr AlnMgrGetSubAlignSpecial(SeqAlignPtr sap, Int4 master, Int4 from, Int4 to)
9109 {
9110 AMAlignIndexPtr amaip;
9111 AlnMsgPtr amp1;
9112 AlnMsgPtr amp2;
9113 DenseDiagPtr ddp_head;
9114 DenseDiagPtr ddp_new;
9115 DenseDiagPtr ddp_prev;
9116 Int4 i;
9117 Boolean more1;
9118 Boolean more2;
9119 SeqAlignPtr sap_head;
9120 SeqAlignPtr sap_new;
9121 SeqAlignPtr sap_prev;
9122
9123 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
9124 return NULL;
9125 amaip = (AMAlignIndexPtr)(sap->saip);
9126 if (master > amaip->numrows)
9127 return NULL;
9128 amp1 = AlnMsgNew();
9129 amp2 = AlnMsgNew();
9130 sap_head = sap_prev = NULL;
9131 for (i=0; i<amaip->numrows; i++)
9132 {
9133 if ((i+1)!=master)
9134 {
9135 sap_new = SeqAlignNew();
9136 sap_new->type = SAT_PARTIAL;
9137 sap_new->segtype = SAS_DENDIAG;
9138 sap_new->dim = 2;
9139 amp1 = AlnMsgReNew(amp1);
9140 amp2 = AlnMsgReNew(amp2);
9141 amp1->row_num = master;
9142 amp2->row_num = i+1;
9143 amp1->from_m = amp2->from_m = from;
9144 amp1->to_m = amp2->to_m = to;
9145 ddp_head = ddp_prev = NULL;
9146 while ((Boolean) (more1 = AlnMgrGetNextAlnBit(sap, amp1)) && (Boolean)(more2 = AlnMgrGetNextAlnBit(sap, amp2)))
9147 {
9148 if (amp1->gap == 0 && amp2->gap == 0)
9149 {
9150 ddp_new = DenseDiagNew();
9151 ddp_new->dim = 2;
9152 ddp_new->starts = (Int4Ptr)MemNew(2*sizeof(Int4));
9153 ddp_new->len = amp1->to_b - amp1->from_b + 1;
9154 ddp_new->starts[0] = amp1->from_b;
9155 ddp_new->starts[1] = amp2->from_b;
9156 ddp_new->strands = (Uint1Ptr)MemNew(2*sizeof(Uint1));
9157 ddp_new->strands[0] = amp1->strand;
9158 ddp_new->strands[1] = amp2->strand;
9159 ddp_new->id = AlnMgrGetNthSeqIdPtr(sap, master);
9160 ddp_new->id->next = AlnMgrGetNthSeqIdPtr(sap, i+1);
9161 if (ddp_head != NULL)
9162 {
9163 ddp_prev->next = ddp_new;
9164 ddp_prev = ddp_new;
9165 } else
9166 ddp_head = ddp_prev = ddp_new;
9167 }
9168 }
9169 sap_new->segs = (Pointer)ddp_head;
9170 /* AlnMgrMergeNeighbors(sap_new); */
9171 if (sap_head != NULL)
9172 {
9173 sap_prev->next = sap_new;
9174 sap_prev = sap_new;
9175 } else
9176 sap_head = sap_prev = sap_new;
9177 }
9178 }
9179 amp1 = AlnMsgFree(amp1);
9180 amp2 = AlnMsgFree(amp2);
9181 return sap_head;
9182 }
9183
9184
9185 /********************************************************************************
9186 *
9187 * viewer and editor management functions
9188 *
9189 ********************************************************************************/
9190
AlnMgrCopyIndexedParentSeqAlign(SeqAlignPtr sap)9191 NLM_EXTERN SeqAlignPtr AlnMgrCopyIndexedParentSeqAlign(SeqAlignPtr sap)
9192 {
9193 AMAlignIndexPtr amaip;
9194 AMAlignIndexPtr amaip_new;
9195 Boolean found;
9196 Int4 i;
9197 Int4Ptr orderarray;
9198 Int4 r;
9199 SeqAlignPtr sap_new;
9200 SeqAlignPtr sap_tmp;
9201 SeqAlignPtr seg_head;
9202 SeqAlignPtr seg_new;
9203 SeqAlignPtr seg_prev;
9204 SeqAlignPtr seg_tmp;
9205
9206 if (sap->saip == NULL)
9207 return NULL;
9208 if (sap->saip->indextype != INDEX_PARENT)
9209 return NULL;
9210 amaip = (AMAlignIndexPtr)sap->saip;
9211 amaip_new = AMAlignIndexNew();
9212 sap_new = SeqAlignDup(sap);
9213 sap_new->saip = (SeqAlignIndexPtr)amaip_new;
9214 amaip_new->indextype = amaip->indextype;
9215 amaip_new->freefunc = amaip->freefunc;
9216 amaip_new->mstype = amaip->mstype;
9217 amaip_new->aligncoords = (Uint4Ptr)MemNew((amaip->numseg+1)*sizeof(Uint4));
9218 amaip_new->numseg = amaip->numseg;
9219 amaip_new->lens = (Int4Ptr)MemNew((amaip->numseg+1)*sizeof(Int4));
9220 for (i=0; i<amaip->numseg; i++)
9221 {
9222 amaip_new->aligncoords[i] = amaip->aligncoords[i];
9223 amaip_new->lens[i] = amaip->lens[i];
9224 }
9225 amaip_new->starts = (Int4Ptr)MemNew(amaip->startsize*sizeof(Int4));
9226 amaip_new->startsize = amaip->startsize;
9227 MemCpy(amaip_new->starts, amaip->starts, amaip->startsize*sizeof(Int4));
9228 amaip_new->alnsaps = amaip->alnsaps;
9229 amaip_new->numsaps = amaip->numsaps;
9230 amaip_new->ids = SeqIdDupList(amaip->ids);
9231 amaip_new->numbsqs = amaip->numbsqs;
9232 amaip_new->rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows+1)*sizeof(RowSourcePtr));
9233 for (i=0; i<amaip->numrows; i++)
9234 {
9235 amaip_new->rowsource[i] = AlnMgrCopyRowSource(amaip->rowsource[i]);
9236 }
9237 amaip_new->numrows = amaip->numrows;
9238 amaip_new->master = amaip->master;
9239 seg_head = NULL;
9240 sap_tmp = (SeqAlignPtr)sap->segs;
9241 while (sap_tmp != NULL)
9242 {
9243 seg_new = SeqAlignDup(sap_tmp);
9244 if (seg_head != NULL)
9245 {
9246 seg_prev->next = seg_new;
9247 seg_prev = seg_new;
9248 } else
9249 seg_head = seg_prev = seg_new;
9250 sap_tmp = sap_tmp->next;
9251 }
9252 sap_new->segs = (Pointer)seg_head;
9253 i = 0;
9254 orderarray = (Int4Ptr)MemNew((amaip->numsaps)*sizeof(Int4));
9255 seg_new = seg_head;
9256 sap_tmp = (SeqAlignPtr)sap->segs;
9257 while (sap_tmp != NULL && seg_new != NULL)
9258 {
9259 seg_new->saip = AlnMgrCopyIndexesForChildSeqAlign(sap_tmp);
9260 found = FALSE;
9261 r = 0;
9262 while (!found && r < amaip->numsaps)
9263 {
9264 if (sap_tmp == amaip->saps[r])
9265 {
9266 orderarray[i] = r;
9267 found = TRUE;
9268 }
9269 r++;
9270 }
9271 i++;
9272 seg_new = seg_new->next;
9273 sap_tmp = sap_tmp->next;
9274 }
9275 amaip_new->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps+1)*sizeof(SeqAlignPtr));
9276 seg_tmp = (SeqAlignPtr)sap_new->segs;
9277 i = 0;
9278 while (seg_tmp)
9279 {
9280 amaip_new->saps[orderarray[i]] = seg_tmp;
9281 i++;
9282 seg_tmp = seg_tmp->next;
9283 }
9284 sap_tmp = (SeqAlignPtr)sap->segs;
9285 amaip_new->amadp = (AMAlignDatPtr PNTR)MemNew((amaip->numbsqs+1)*sizeof(AMAlignDatPtr));
9286 seg_head = (SeqAlignPtr)sap_new->segs;
9287 for (i=0; i<amaip->numbsqs; i++)
9288 {
9289 amaip_new->amadp[i] = AlnMgrCopyamadp(amaip->amadp[i], sap_tmp, seg_head);
9290 }
9291 MemFree(orderarray);
9292 return sap_new;
9293 }
9294
AlnMgrCopyRowSource(RowSourcePtr rsp)9295 NLM_EXTERN RowSourcePtr AlnMgrCopyRowSource(RowSourcePtr rsp)
9296 {
9297 Int4 i;
9298 RowSourcePtr rsp_new;
9299
9300 rsp_new = RowSourceNew();
9301 rsp_new->id = SeqIdDup(rsp->id);
9302 rsp_new->which_saps = (Uint4Ptr)MemNew((rsp->numsaps+1)*sizeof(Uint4));
9303 rsp_new->num_in_sap = (Uint4Ptr)MemNew((rsp->numsaps+1)*sizeof(Uint4));
9304 for (i=0; i<rsp->numsaps; i++)
9305 {
9306 rsp_new->which_saps[i] = rsp->which_saps[i];
9307 rsp_new->num_in_sap[i] = rsp->num_in_sap[i];
9308 }
9309 rsp_new->numsaps = rsp->numsaps;
9310 return rsp_new;
9311 }
9312
AlnMgrCopyamadp(AMAlignDatPtr amadp,SeqAlignPtr sap_tmp,SeqAlignPtr seg_head)9313 NLM_EXTERN AMAlignDatPtr AlnMgrCopyamadp(AMAlignDatPtr amadp, SeqAlignPtr sap_tmp, SeqAlignPtr seg_head)
9314 {
9315 AMAlignDatPtr amadp_new;
9316 Boolean found;
9317 Int4 i;
9318 Int4 j;
9319 Int4Ptr orderarray;
9320 SeqAlignPtr sap_old;
9321 SeqAlignPtr sap_new;
9322
9323 if (sap_tmp == NULL || amadp == NULL || seg_head == NULL)
9324 return NULL;
9325 amadp_new = AMAlignDatNew();
9326 amadp_new->sip = SeqIdDup(amadp->sip);
9327 amadp_new->numsaps = amadp->numsaps;
9328 amadp_new->saps = (SeqAlignPtr PNTR)MemNew((amadp->numsaps+1)*sizeof(SeqAlignPtr));
9329 orderarray = (Int4Ptr)MemNew((amadp->numsaps+1)*sizeof(Int4));
9330 sap_old = sap_tmp;
9331 j = 0;
9332 while (sap_old)
9333 {
9334 i=0;
9335 found = FALSE;
9336 while (!found && i<amadp->numsaps)
9337 {
9338 if (sap_old == amadp->saps[i])
9339 {
9340 orderarray[i] = j;
9341 found = TRUE;
9342 }
9343 i++;
9344 }
9345 sap_old = sap_old->next;
9346 j++;
9347 }
9348 for (i=0; i<amadp->numsaps; i++)
9349 {
9350 sap_new = seg_head;
9351 j=0;
9352 while (j<orderarray[i])
9353 {
9354 sap_new = sap_new->next;
9355 j++;
9356 }
9357 amadp_new->saps[i] = sap_new;
9358 }
9359 amadp_new->segments = (Uint2Ptr)MemNew((amadp->numseg+1)*sizeof(Uint2));
9360 for (i=0; i<amadp->numseg; i++)
9361 {
9362 amadp_new->segments[i] = amadp->segments[i];
9363 }
9364 amadp_new->numseg = amadp->numseg;
9365 MemFree(orderarray);
9366 return amadp_new;
9367 }
9368
AlnMgrCopyIndexesForChildSeqAlign(SeqAlignPtr sap)9369 NLM_EXTERN SeqAlignIndexPtr AlnMgrCopyIndexesForChildSeqAlign(SeqAlignPtr sap)
9370 {
9371 DenseSegPtr dsp;
9372 Int4 i;
9373 SAIndexPtr saip;
9374 SAIndexPtr saip_new;
9375
9376 if (sap == NULL || sap->saip == NULL)
9377 return NULL;
9378 dsp = (DenseSegPtr)sap->segs;
9379 saip = (SAIndexPtr)sap->saip;
9380 saip_new = SAIndexNew();
9381 saip_new->indextype = saip->indextype;
9382 saip_new->freefunc = saip->freefunc;
9383 saip_new->master = saip->master;
9384 saip_new->aligncoords = (Uint4Ptr)MemNew((dsp->numseg + 1)*sizeof(Uint4));
9385 for (i=0; i<dsp->numseg; i++)
9386 {
9387 saip_new->aligncoords[i] = saip->aligncoords[i];
9388 }
9389 saip_new->ssdp = (SASeqDatPtr PNTR)MemNew((dsp->dim+1)*sizeof(SASeqDatPtr));
9390 for (i=0; i<dsp->dim; i++)
9391 {
9392 saip_new->ssdp[i] = AlnMgrCopySASeqDat(saip->ssdp[i]);
9393 }
9394 saip_new->numseqs = dsp->dim;
9395 return (SeqAlignIndexPtr)saip_new;
9396 }
9397
AlnMgrCopySASeqDat(SASeqDatPtr ssdp)9398 NLM_EXTERN SASeqDatPtr AlnMgrCopySASeqDat(SASeqDatPtr ssdp)
9399 {
9400 Int4 i;
9401 SASeqDatPtr ssdp_new;
9402
9403 if (ssdp == NULL)
9404 return NULL;
9405 ssdp_new = SASeqDatNew();
9406 ssdp_new->numsect = ssdp->numsect;
9407 ssdp_new->numunsect = ssdp->numunsect;
9408 ssdp_new->sect = (Uint2Ptr)MemNew((ssdp->numsect+1)*sizeof(Uint2));
9409 for (i=0; i<ssdp->numsect; i++)
9410 {
9411 ssdp_new->sect[i] = ssdp->sect[i];
9412 }
9413 ssdp_new->unsect = (Uint2Ptr)MemNew((ssdp->numunsect+1)*sizeof(Uint2));
9414 for (i=0; i<ssdp->numunsect; i++)
9415 {
9416 ssdp_new->unsect[i] = ssdp->unsect[i];
9417 }
9418 return ssdp_new;
9419 }
9420
AlnMgrCopyAndIndexSingleAlignment(SeqAlignPtr sap)9421 NLM_EXTERN SeqAlignPtr AlnMgrCopyAndIndexSingleAlignment(SeqAlignPtr sap)
9422 {
9423 SeqAlignPtr sap_new;
9424
9425 if (sap == NULL)
9426 return NULL;
9427 sap_new = SeqAlignDup(sap);
9428 sap_new->type = SAT_MASTERSLAVE;
9429 sap_new->saip = AlnMgrCopyIndexesForChildSeqAlign(sap);
9430 return sap_new;
9431 }
9432
AlnMgrCopyIndexedParentIntoSap(SeqAlignPtr sap,SeqAlignPtr target)9433 NLM_EXTERN Boolean AlnMgrCopyIndexedParentIntoSap(SeqAlignPtr sap, SeqAlignPtr target)
9434 {
9435 AMAlignIndexPtr amaip;
9436 AMAlignIndexPtr amaip_new;
9437 DenseDiagPtr ddp;
9438 DenseDiagPtr ddp_next;
9439 DenseSegPtr dsp;
9440 Boolean found;
9441 Int4 i;
9442 Int4Ptr orderarray;
9443 Int4 r;
9444 SeqAlignPtr salp;
9445 SeqAlignPtr sap_tmp;
9446 SeqAlignPtr seg_head;
9447 SeqAlignPtr seg_new;
9448 SeqAlignPtr seg_prev;
9449 SeqAlignPtr seg_tmp;
9450
9451 if (sap->saip == NULL || target == NULL)
9452 return FALSE;
9453 if (sap->saip->indextype != INDEX_PARENT)
9454 return FALSE;
9455 AMAlignIndexFree((Pointer)target->saip);
9456 target->saip = NULL;
9457 amaip = (AMAlignIndexPtr)sap->saip;
9458 amaip_new = AMAlignIndexNew();
9459 target->type = sap->type;
9460 target->dim = sap->dim;
9461 target->score = ScoreSetFree(target->score);
9462 target->score = ScoreDup(sap->score);
9463 target->master = SeqIdFree(target->master);
9464 target->master = SeqIdDup(sap->master);
9465 SeqAlignSetFree(target->next);
9466 target->next = NULL;
9467 if (target->segtype == SAS_DISC)
9468 {
9469 salp = (SeqAlignPtr)(target->segs);
9470 SeqAlignSetFree(salp);
9471 } else if (target->segtype == SAS_DENSEG)
9472 {
9473 dsp = (DenseSegPtr)(target->segs);
9474 DenseSegFree(dsp);
9475 } else if (target->segtype == SAS_DENDIAG)
9476 {
9477 ddp = (DenseDiagPtr)(target->segs);
9478 while (ddp != NULL)
9479 {
9480 ddp_next = ddp->next;
9481 ddp->next = NULL;
9482 DenseDiagFree(ddp);
9483 ddp = ddp_next;
9484 }
9485 }
9486 target->segs = NULL;
9487 target->segtype = sap->segtype;
9488 target->saip = (SeqAlignIndexPtr)amaip_new;
9489 amaip_new->indextype = amaip->indextype;
9490 amaip_new->freefunc = amaip->freefunc;
9491 amaip_new->mstype = amaip->mstype;
9492 amaip_new->aligncoords = (Uint4Ptr)MemNew((amaip->numseg+1)*sizeof(Uint4));
9493 amaip_new->numseg = amaip->numseg;
9494 amaip_new->lens = (Int4Ptr)MemNew((amaip->numseg+1)*sizeof(Int4));
9495 for (i=0; i<amaip->numseg; i++)
9496 {
9497 amaip_new->aligncoords[i] = amaip->aligncoords[i];
9498 amaip_new->lens[i] = amaip->lens[i];
9499 }
9500 if (amaip->numseg == 0)
9501 {
9502 amaip_new->ulens = (Int4Ptr)MemNew(sizeof(Int4));
9503 amaip_new->ulens[0] = amaip->ulens[0];
9504 } else if(amaip->ulens != NULL) {
9505 amaip_new->ulens = (Int4Ptr)MemNew(sizeof(Int4)*amaip->numseg);
9506 MemCopy(amaip_new->ulens, amaip->ulens, sizeof(Int4)*amaip->numseg);
9507 }
9508
9509 if (amaip->starts != NULL)
9510 {
9511 amaip_new->starts = (Int4Ptr)MemNew(amaip->startsize*sizeof(Int4));
9512 amaip_new->startsize = amaip->startsize;
9513 MemCpy(amaip_new->starts, amaip->starts, amaip->startsize*sizeof(Int4));
9514 }
9515 amaip_new->alnsaps = amaip->alnsaps;
9516 amaip_new->numsaps = amaip->numsaps;
9517 amaip_new->ids = SeqIdDupList(amaip->ids);
9518 amaip_new->numbsqs = amaip->numbsqs;
9519 amaip_new->rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows+1)*sizeof(RowSourcePtr));
9520 for (i=0; i<amaip->numrows; i++)
9521 {
9522 amaip_new->rowsource[i] = AlnMgrCopyRowSource(amaip->rowsource[i]);
9523 }
9524 amaip_new->numrows = amaip->numrows;
9525 amaip_new->master = amaip->master;
9526 sap_tmp = (SeqAlignPtr)sap->segs;
9527 seg_head = NULL;
9528 while (sap_tmp)
9529 {
9530 seg_new = SeqAlignDup(sap_tmp);
9531 sap_tmp = sap_tmp->next;
9532 if (seg_head != NULL)
9533 {
9534 seg_prev->next = seg_new;
9535 seg_prev = seg_new;
9536 } else
9537 seg_head = seg_prev = seg_new;
9538 }
9539 sap_tmp = (SeqAlignPtr)sap->segs;
9540 i = 0;
9541 orderarray = (Int4Ptr)MemNew((amaip->numsaps)*sizeof(Int4));
9542 target->segs = (SeqAlignPtr)seg_head;
9543 seg_new = seg_head;
9544 seg_head = NULL;
9545 while (sap_tmp && seg_new)
9546 {
9547 seg_new->saip = AlnMgrCopyIndexesForChildSeqAlign(sap_tmp);
9548 found = FALSE;
9549 r = 0;
9550 while (!found && r < amaip->numsaps)
9551 {
9552 if (sap_tmp == amaip->saps[r])
9553 {
9554 orderarray[i] = r;
9555 found = TRUE;
9556 }
9557 r++;
9558 }
9559 i++;
9560 seg_new = seg_new->next;
9561 sap_tmp = sap_tmp->next;
9562 }
9563 amaip_new->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps+1)*sizeof(SeqAlignPtr));
9564 seg_tmp = (SeqAlignPtr)target->segs;
9565 i = 0;
9566 while (seg_tmp)
9567 {
9568 amaip_new->saps[orderarray[i]] = seg_tmp;
9569 i++;
9570 seg_tmp = seg_tmp->next;
9571 }
9572 sap_tmp = (SeqAlignPtr)sap->segs;
9573 amaip_new->amadp = (AMAlignDatPtr PNTR)MemNew((amaip->numbsqs+1)*sizeof(AMAlignDatPtr));
9574 seg_head = (SeqAlignPtr)target->segs;
9575 for (i=0; i<amaip->numbsqs; i++)
9576 {
9577 amaip_new->amadp[i] = AlnMgrCopyamadp(amaip->amadp[i], sap_tmp, seg_head);
9578 }
9579 MemFree(orderarray);
9580 return TRUE;
9581 }
9582
9583
9584 /**********************************************************************
9585 *
9586 * AlnMgrDeleteChildByPointer removes the specified child seqalign
9587 * from the set. Note that this function does not reindex the seqalign;
9588 * the calling function must do that if the return is TRUE (use
9589 * AlnMgrReIndexSeqAlign).
9590 *
9591 **********************************************************************/
AlnMgrDeleteChildByPointer(SeqAlignPtr parent,SeqAlignPtr child)9592 NLM_EXTERN Boolean AlnMgrDeleteChildByPointer(SeqAlignPtr parent, SeqAlignPtr child)
9593 {
9594 Boolean found;
9595 Int4 i;
9596 SeqAlignPtr salp;
9597 SeqAlignPtr salp_head;
9598 SeqAlignPtr salp_prev;
9599
9600 if (parent == NULL || child == NULL)
9601 return FALSE;
9602 i = AlnMgrCheckAlignForParent(parent);
9603 if (i != INDEX_PARENT)
9604 return FALSE;
9605 salp_head = salp_prev = NULL;
9606 salp = (SeqAlignPtr)(parent->segs);
9607 found = FALSE;
9608 while (salp && !found)
9609 {
9610 if (salp == child)
9611 found = TRUE;
9612 else
9613 {
9614 if (salp_head)
9615 salp_prev = salp;
9616 else
9617 salp_head = salp_prev = salp;
9618 salp = salp->next;
9619 }
9620 }
9621 if (!found)
9622 return FALSE;
9623 if (salp_head != NULL)
9624 {
9625 salp_prev->next = salp->next;
9626 salp->next = NULL;
9627 SeqAlignFree(salp);
9628 } else
9629 {
9630 salp_head = salp->next;
9631 salp->next = NULL;
9632 SeqAlignFree(salp);
9633 }
9634 parent->segs = (Pointer)salp_head;
9635 return TRUE;
9636 }
9637
AlnMgrFreeBitty(AMBittyPtr abp)9638 static AMBittyPtr AlnMgrFreeBitty(AMBittyPtr abp)
9639 {
9640 AMBitty *abp_temp;
9641 while(abp) {
9642 abp_temp = abp->next;
9643 MemFree(abp);
9644 abp = abp_temp;
9645 }
9646 return NULL;
9647 }
9648
9649 /**********************************************************************
9650 *
9651 * AlnMgrDeleteNthRow deletes the specified row from either a parent
9652 * or a child seqalign. Note that with a parent seqalign, this may
9653 * result in the deletion of a child seqalign. The function does NOT
9654 * reindex the alignment; the calling function must do that upon a
9655 * TRUE return (use AlnMgrReIndexSeqAlign).
9656 *
9657 **********************************************************************/
AlnMgrDeleteNthRow(SeqAlignPtr sap,Int4 row)9658 NLM_EXTERN Boolean AlnMgrDeleteNthRow(SeqAlignPtr sap, Int4 row)
9659 {
9660 AMBittyPtr abp;
9661 AMBittyPtr abp_head;
9662 AMBittyPtr abp_next;
9663 AMBittyPtr abp_prev;
9664 AMAlignIndexPtr amaip;
9665 DenseSegPtr dsp;
9666 DenseSegPtr dsp_new;
9667 Boolean empty;
9668 Boolean found;
9669 Int4 i;
9670 SeqIdPtr id;
9671 Int4 j;
9672 Int4 n1;
9673 Int4 n2;
9674 Int4 numsame;
9675 Int4 r;
9676 RowSourcePtr rsp;
9677 SAIndexPtr saip;
9678 Boolean same;
9679 SeqIdPtr sip;
9680 SeqIdPtr sip_head;
9681 SeqIdPtr sip_prev;
9682
9683 if (sap == NULL || row < 1)
9684 return FALSE;
9685 abp_head = NULL;
9686 i = AlnMgrCheckAlignForParent(sap);
9687 if (i == AM_PARENT)
9688 {
9689 amaip = (AMAlignIndexPtr)sap->saip;
9690 if (amaip == NULL)
9691 return FALSE;
9692 if (row > amaip->numrows)
9693 return FALSE;
9694 rsp = amaip->rowsource[row-1];
9695 for (i=0; i<rsp->numsaps; i++)
9696 {
9697 if (rsp->which_saps[i] != 0)
9698 {
9699 abp = (AMBittyPtr)MemNew(sizeof(AMBitty));
9700 abp->num1 = rsp->which_saps[i]-1;
9701 abp->num2 = rsp->num_in_sap[i];
9702 if (abp_head != NULL)
9703 {
9704 abp_prev->next = abp;
9705 abp_prev = abp;
9706 } else
9707 abp_head = abp_prev = abp;
9708 }
9709 }
9710 abp = abp_head;
9711 abp_prev = NULL;
9712 while (abp != NULL)
9713 {
9714 abp_next = abp->next;
9715 found = FALSE;
9716 while (abp_next != NULL && !found)
9717 {
9718 if (abp_next->num1 == abp->num1)
9719 found = TRUE;
9720 abp_next = abp_next->next;
9721 }
9722 if (found)
9723 {
9724 if (abp_prev != NULL)
9725 {
9726 abp_prev->next = abp->next;
9727 abp->next = NULL;
9728 MemFree(abp);
9729 abp = abp_prev->next;
9730 } else
9731 {
9732 abp_head = abp->next;
9733 abp->next = NULL;
9734 MemFree(abp);
9735 abp = abp_head;
9736 }
9737 } else
9738 abp = abp->next;
9739 }
9740 abp = abp_head;
9741 while (abp != NULL)
9742 {
9743 if (AlnMgrGetNumRows(amaip->saps[abp->num1]) > 2)
9744 {
9745 if (!AlnMgrDeleteNthRow(amaip->saps[abp->num1], abp->num2)) {
9746 abp = AlnMgrFreeBitty(abp);
9747 return FALSE;
9748 }
9749 } else
9750 {
9751 if (!AlnMgrDeleteChildByPointer(sap, amaip->saps[abp->num1])) {
9752 abp = AlnMgrFreeBitty(abp);
9753 return FALSE;
9754 }
9755 }
9756 abp = abp->next;
9757 }
9758 abp = AlnMgrFreeBitty(abp);
9759 return TRUE;
9760 } else if (i == AM_CHILD)
9761 {
9762 saip = (SAIndexPtr)(sap->saip);
9763 dsp = (DenseSegPtr)(sap->segs);
9764 if (dsp->dim <=2 || row > dsp->dim)
9765 return FALSE;
9766 for (i=0; i<=dsp->numseg-1; i++)
9767 {
9768 same = TRUE;
9769 j = 0;
9770 empty = TRUE;
9771 while (j<dsp->dim)
9772 {
9773 if (j!=row-1)
9774 {
9775 n1 = binary_search_on_uint2_list(saip->ssdp[j]->sect, i, saip->ssdp[j]->numsect);
9776 if (i != dsp->numseg-1)
9777 n2 = binary_search_on_uint2_list(saip->ssdp[j]->sect, i+1, saip->ssdp[j]->numsect);
9778 else
9779 n2 = -2;
9780 if (n1 != -1)
9781 {
9782 empty = FALSE;
9783 n1 = 1;
9784 }
9785 if (n2 != -1)
9786 n2 = 1;
9787 if ((n1 != n2) || (dsp->numseg == 1))
9788 same = FALSE;
9789 }
9790 j++;
9791 }
9792 if (empty)
9793 {
9794 abp = (AMBittyPtr)MemNew(sizeof(AMBitty));
9795 abp->num1 = i;
9796 abp->num2 = 1;
9797 if (abp_head != NULL)
9798 {
9799 abp_prev->next = abp;
9800 abp_prev = abp;
9801 } else
9802 abp_head = abp_prev = abp;
9803 } else if (same)
9804 {
9805 abp = (AMBittyPtr)MemNew(sizeof(AMBitty));
9806 abp->num1 = i;
9807 if (abp_head != NULL)
9808 {
9809 abp_prev->next = abp;
9810 abp_prev = abp;
9811 } else
9812 abp_head = abp_prev = abp;
9813 }
9814 }
9815 abp = abp_head;
9816 numsame = 0;
9817 empty = FALSE;
9818 while (abp != NULL)
9819 {
9820 numsame++;
9821 abp = abp->next;
9822 }
9823 dsp_new = DenseSegNew();
9824 dsp_new->numseg = dsp->numseg - numsame;
9825 dsp_new->dim = dsp->dim - 1;
9826 dsp_new->starts = (Int4Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Int4));
9827 dsp_new->strands = (Uint1Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Uint1));
9828 dsp_new->lens = (Int4Ptr)MemNew((dsp_new->numseg)*sizeof(Int4));
9829 abp = abp_head;
9830 j = i = 0;
9831 while (i<dsp->numseg)
9832 {
9833 n1 = 0;
9834 empty = FALSE;
9835 if (abp != NULL)
9836 {
9837 if (abp->num1 == i && abp->num2 == 1)
9838 {
9839 empty = TRUE;
9840 abp = abp->next;
9841 } else if (abp->num1 == i)
9842 {
9843 n1 = 1;
9844 while (abp->next != NULL && abp->next->num1 == abp->num1 + 1)
9845 {
9846 n1++;
9847 abp = abp->next;
9848 }
9849 abp = abp->next;
9850 }
9851 }
9852 if (!empty)
9853 {
9854 n2 = 0;
9855 for (r=0; r<dsp->dim; r++)
9856 {
9857 if (r != row - 1)
9858 {
9859 dsp_new->starts[j*(dsp_new->dim) + n2] = dsp->starts[i*(dsp->dim) + r];
9860 dsp_new->strands[j*(dsp_new->dim) + n2] = dsp->strands[i*(dsp->dim) + r];
9861 n2++;
9862 }
9863 }
9864 for (r=0; r<=n1; r++)
9865 {
9866 dsp_new->lens[j] += dsp->lens[i+r];
9867 }
9868 j++;
9869 }
9870 i = i + n1 + 1;
9871 }
9872 id = dsp->ids;
9873 sip_head = NULL;
9874 for (j=0; j<dsp->dim; j++)
9875 {
9876 if (j != row-1)
9877 {
9878 sip = SeqIdDup(id);
9879 if (sip_head != NULL)
9880 {
9881 sip_prev->next = sip;
9882 sip_prev = sip;
9883 } else
9884 sip_head = sip_prev = sip;
9885 }
9886 id = id->next;
9887 }
9888 dsp_new->ids = sip_head;
9889 DenseSegFree(dsp);
9890 sap->segs = (Pointer)dsp_new;
9891 sap->dim = dsp_new->dim;
9892 SAIndexFree(sap->saip);
9893 sap->saip = NULL;
9894 abp = abp_head;
9895 abp_head = AlnMgrFreeBitty(abp_head);
9896 return TRUE;
9897 }
9898 return FALSE;
9899 }
9900
9901 /*
9902 AlnMgrDeleteHiddenEx
9903
9904 Reads the Index, and updates the ->segs SeqAligns according
9905 to the content of the index. If DeleteSalp then either
9906 Frees unused SeqAligns.. OR
9907 deletes them from the Object Manager.
9908 Be careful to have saved the SeqAligns PNTRS before calling this function with
9909 DeleteSalp=FALSE.. otherwise will have dangling pointers.
9910
9911 AlnMgrDeleteHidden calls the EX function with DeleteSalp=TRUE;
9912
9913 */
9914
9915
AlnMgrDeleteHiddenEx(SeqAlignPtr sap,Boolean UseOM,Boolean DeleteSalp)9916 NLM_EXTERN void AlnMgrDeleteHiddenEx(SeqAlignPtr sap, Boolean UseOM, Boolean DeleteSalp)
9917 {
9918 AMAlignIndexPtr amaip;
9919 AMAlnKeeperPtr PNTR amkp;
9920 Int4 i;
9921 Int4 j;
9922 SeqAlignPtr salp;
9923 SeqAlignPtr salp_next;
9924
9925 if (sap == NULL || sap->saip == NULL)
9926 return;
9927 if (sap->saip->indextype != INDEX_PARENT)
9928 return;
9929 amaip = (AMAlignIndexPtr)(sap->saip);
9930 if (amaip->numsaps == amaip->alnsaps)
9931 return;
9932 amkp = (AMAlnKeeperPtr PNTR)MemNew((amaip->numsaps)*sizeof(AMAlnKeeperPtr));
9933 salp = (SeqAlignPtr)(sap->segs);
9934 i = 0;
9935 while (salp)
9936 {
9937 salp_next = salp->next;
9938 salp->next = NULL;
9939 amkp[i] = (AMAlnKeeperPtr)MemNew(sizeof(AMAlnKeeper));
9940 amkp[i]->align = salp;
9941 amkp[i]->am_delete = TRUE;
9942 salp = salp_next;
9943 i++;
9944 }
9945 amaip->numsaps = i; /* should be equal already! */
9946 for (i=0; i<amaip->numsaps; i++)
9947 {
9948 j = 0;
9949 while (j<amaip->alnsaps && (amkp[i]->am_delete))
9950 {
9951 if (amaip->saps[j] == amkp[i]->align)
9952 amkp[i]->am_delete = FALSE;
9953 j++;
9954 }
9955 }
9956 for (i=0; i<amaip->numsaps; i++)
9957 {
9958 if (amkp[i]->am_delete)
9959 {
9960 if(DeleteSalp) {
9961 if (UseOM)
9962 ObjMgrDelete(OBJ_SEQALIGN, (Pointer)(amkp[i]->align));
9963 else
9964 SeqAlignFree(amkp[i]->align);
9965 }
9966 }
9967 }
9968 for (i=0; i<amaip->alnsaps-1; i++)
9969 {
9970 amaip->saps[i]->next = amaip->saps[i+1];
9971 }
9972 for (i=0; i<amaip->numsaps; i++)
9973 {
9974 MemFree(amkp[i]);
9975 }
9976 MemFree(amkp);
9977 amaip->numsaps = amaip->alnsaps;
9978 sap->segs = (Pointer)amaip->saps[0];
9979 AlnMgrReIndexSeqAlign(sap);
9980 return;
9981 }
9982
AlnMgrDeleteHidden(SeqAlignPtr sap,Boolean UseOM)9983 NLM_EXTERN void AlnMgrDeleteHidden(SeqAlignPtr sap, Boolean UseOM) {
9984 AlnMgrDeleteHiddenEx(sap, UseOM, TRUE);
9985 return;
9986 }
9987
9988
am_translate_row_num(AMAlignIndexPtr amaip,Int4 n,Int4 row)9989 static Int4 am_translate_row_num (AMAlignIndexPtr amaip, Int4 n, Int4 row)
9990 {
9991 RowSourcePtr rsp;
9992
9993 if (amaip == NULL || row == 0)
9994 return 0;
9995 rsp = amaip->rowsource[row-1];
9996 if (n >= rsp->numsaps)
9997 return 0;
9998 return (rsp->num_in_sap[n]);
9999 }
10000
am_is_consistent(SeqAlignPtr sap,SeqAlignPtr sap_new,Int4Ptr block_num)10001 static Boolean am_is_consistent(SeqAlignPtr sap, SeqAlignPtr sap_new, Int4Ptr block_num)
10002 {
10003 AMAlignIndexPtr amaip;
10004 Boolean done;
10005 DenseSegPtr dsp;
10006 DenseSegPtr dsp_new;
10007 Int4 i;
10008 Int4 j;
10009 Int4 n;
10010 Int4 m_start;
10011 Int4 m_stop;
10012 SeqAlignPtr sap_tmp;
10013 SeqIdPtr sip1;
10014 SeqIdPtr sip2;
10015 Uint1 strand1;
10016 Uint1 strand2;
10017 Boolean switched;
10018 Int4 u_start;
10019 Int4 u_stop;
10020
10021 if (block_num == NULL)
10022 return FALSE;
10023 amaip = (AMAlignIndexPtr)(sap->saip);
10024 dsp = (DenseSegPtr)(sap_new->segs);
10025 sip2 = dsp->ids;
10026 for (i=0; i<amaip->numrows; i++)
10027 {
10028 sip1 = AlnMgrGetNthSeqIdPtr(sap, i+1);
10029 if (SAM_OrderSeqID(sip1, sip2) != 0)
10030 return FALSE;
10031 SeqIdFree(sip1);
10032 sip2 = sip2->next;
10033 }
10034 for (i=0; i<amaip->numrows; i++)
10035 {
10036 strand1 = AlnMgrGetNthStrand(sap, i+1);
10037 strand2 = dsp->strands[i];
10038 if ((strand1 == Seq_strand_minus && strand2 != Seq_strand_minus) || (strand2 == Seq_strand_minus && strand1 != Seq_strand_minus))
10039 return FALSE;
10040 }
10041 switched = FALSE;
10042 for (i=0; i<amaip->numrows && !switched; i++)
10043 {
10044 j = am_translate_row_num (amaip, 0, i+1);
10045 if (j!= i+1)
10046 switched = TRUE;
10047 }
10048 if (switched)
10049 {
10050 dsp_new = DenseSegNew();
10051 dsp_new->dim = dsp->dim;
10052 dsp_new->numseg = dsp->numseg;
10053 dsp_new->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
10054 dsp_new->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*(sizeof(Uint1)));
10055 dsp_new->lens = (Int4Ptr)MemNew(dsp->numseg*sizeof(Int4));
10056 for (i=0; i<amaip->numrows; i++)
10057 {
10058 j = am_translate_row_num (amaip, 0, i+1);
10059 for (n=0; n<dsp->numseg; n++)
10060 {
10061 dsp_new->lens[n] = dsp->lens[n];
10062 dsp_new->starts[(dsp->dim)*n + i] = dsp->starts[(dsp->dim)*n + j - 1];
10063 dsp_new->strands[(dsp->dim)*n + i] = dsp->strands[(dsp->dim)*n + j - 1];
10064 }
10065 }
10066 dsp_new->scores = dsp->scores;
10067 dsp->scores = NULL;
10068 dsp_new->ids = SeqIdDupList(((DenseSegPtr)(amaip->saps[0]->segs))->ids);;
10069 DenseSegFree(dsp);
10070 dsp = dsp_new;
10071 sap_new->segs = (Pointer)dsp;
10072 SAIndexFree(sap_new->saip);
10073 sap_new->saip = NULL;
10074 AlnMgrIndexSingleChildSeqAlign(sap_new);
10075 }
10076 AlnMgrGetNthSeqRangeInSA(sap_new, 1, &m_start, &m_stop);
10077 if (*block_num < 0) /* adding a block -- make sure it doesn't conflict */
10078 { /* and that all the new pieces fit the same way into the rows */
10079 AlnMgrGetNthSeqRangeInSA(sap, 1, &u_start, &u_stop);
10080 if (m_stop < u_start) /* in left tail */
10081 {
10082 for (i=2; i<=amaip->numrows; i++)
10083 {
10084 AlnMgrGetNthSeqRangeInSA(sap_new, i, &m_start, &m_stop);
10085 AlnMgrGetNthRowTail(sap, i, LEFT_TAIL, &u_start, &u_stop, NULL);
10086 if (m_start < u_start && m_stop > u_stop)
10087 return FALSE;
10088 }
10089 } else if (m_start > u_stop) /* in right tail */
10090 {
10091 for (i=2; i<=amaip->numrows; i++)
10092 {
10093 AlnMgrGetNthSeqRangeInSA(sap_new, i, &m_start, &m_stop);
10094 AlnMgrGetNthRowTail(sap, i, RIGHT_TAIL, &u_start, &u_stop, NULL);
10095 if (m_start < u_start && m_stop > u_stop)
10096 return FALSE;
10097 }
10098 } else /* in the middle somewhere */
10099 {
10100 done = FALSE;
10101 for (i=1; i<amaip->numsaps && !done; i++)
10102 {
10103 if (!AlnMgrGetNthUnalignedForNthRow(sap, i, 1, &u_start, &u_stop))
10104 return FALSE;
10105 if (m_start >= u_start && m_stop <= u_stop)
10106 {
10107 n = i;
10108 done = TRUE;
10109 }
10110 }
10111 for (i=2; i<=amaip->numrows; i++)
10112 {
10113 AlnMgrGetNthSeqRangeInSA(sap_new, i, &m_start, &m_stop);
10114 AlnMgrGetNthUnalignedForNthRow(sap, n, i, &u_start, &u_stop);
10115 if (m_start < u_start && m_stop > u_stop)
10116 return FALSE;
10117 }
10118 }
10119 } else /* replacing a block -- make sure it only conflicts with replaced */
10120 { /* block and that the pieces fit correctly into the rows */
10121 if (*block_num == 0) /* no block number specified -- take a guess */
10122 {
10123 done = FALSE;
10124 for (i=0; !done && i<amaip->numsaps; i++)
10125 {
10126 sap_tmp = amaip->saps[i];
10127 AlnMgrGetNthSeqRangeInSA(sap_tmp, 1, &u_start, &u_stop);
10128 if (m_start <= u_start)
10129 {
10130 if (m_stop >= u_start)
10131 {
10132 done = TRUE;
10133 n = i+1;
10134 }
10135 } else if (m_start > u_start && m_start <= u_stop)
10136 {
10137 done = TRUE;
10138 n = i+1;
10139 }
10140 }
10141 if (!done)
10142 return FALSE;
10143 else
10144 *block_num = n;
10145 } else
10146 n = *block_num;
10147 if (n == 1 && amaip->numsaps <=1)
10148 return TRUE;
10149 if (n == 1 && amaip->numsaps > 1) /* first block */
10150 {
10151 sap_tmp = amaip->saps[1];
10152 for (i=2; i<=amaip->numrows; i++)
10153 {
10154 strand1 = AlnMgrGetNthStrand(sap_new, i);
10155 AlnMgrGetNthSeqRangeInSA(sap_new, i, &m_start, &m_stop);
10156 AlnMgrGetNthSeqRangeInSA(sap_tmp, i, &u_start, &u_stop);
10157 if (strand1 != Seq_strand_minus)
10158 {
10159 if (m_stop >= u_start)
10160 return FALSE;
10161 } else
10162 {
10163 if (m_start <= u_stop)
10164 return FALSE;
10165 }
10166 }
10167 } else if (n == amaip->numsaps) /* last block */
10168 {
10169 sap_tmp = amaip->saps[amaip->numsaps-2];
10170 for (i=2; i<=amaip->numrows; i++)
10171 {
10172 strand1 = AlnMgrGetNthStrand(sap_new, i);
10173 AlnMgrGetNthSeqRangeInSA(sap_new, i, &m_start, &m_stop);
10174 AlnMgrGetNthSeqRangeInSA(sap_tmp, i, &u_start, &u_stop);
10175 if (strand1 != Seq_strand_minus)
10176 {
10177 if (m_start <= u_stop)
10178 return FALSE;
10179 } else
10180 {
10181 if (m_stop >= u_start)
10182 return FALSE;
10183 }
10184 }
10185 } else /* one in the middle */
10186 {
10187 for (i=2; i<=amaip->numrows; i++)
10188 {
10189 sap_tmp = amaip->saps[n-2];
10190 strand1 = AlnMgrGetNthStrand(sap_new, i);
10191 AlnMgrGetNthSeqRangeInSA(sap_new, i, &m_start, &m_stop);
10192 AlnMgrGetNthSeqRangeInSA(sap_tmp, i, &u_start, &u_stop);
10193 if (strand1 != Seq_strand_minus)
10194 {
10195 if (m_start <= u_stop)
10196 return FALSE;
10197 } else
10198 {
10199 if (m_stop >= u_start)
10200 return FALSE;
10201 }
10202 sap_tmp = amaip->saps[n];
10203 AlnMgrGetNthSeqRangeInSA(sap_tmp, i, &u_start, &u_stop);
10204 if (strand1 != Seq_strand_minus)
10205 {
10206 if (m_stop >= u_start)
10207 return FALSE;
10208 } else
10209 {
10210 if (m_start <= u_stop)
10211 return FALSE;
10212 }
10213 }
10214 }
10215 }
10216 return TRUE;
10217 }
10218
am_is_ok_block(DenseSegPtr dsp)10219 static Boolean am_is_ok_block(DenseSegPtr dsp)
10220 {
10221 Boolean allgap;
10222 Int4 i;
10223 Int4 j;
10224 Int4Ptr prev;
10225 SeqIdPtr sip;
10226 Uint1Ptr strands;
10227
10228 if (dsp == NULL)
10229 return TRUE;
10230 sip = dsp->ids;
10231 for (i=0; i<dsp->dim; i++)
10232 {
10233 if (sip == NULL)
10234 return FALSE;
10235 sip = sip->next;
10236 }
10237 prev = (Int4Ptr)MemNew((dsp->dim)*sizeof(Int4));
10238 strands = (Uint1Ptr)MemNew((dsp->dim)*sizeof(Uint1));
10239 for (i=0; i<dsp->dim; i++)
10240 {
10241 prev[i] = -2;
10242 if (dsp->strands != NULL)
10243 strands[i] = dsp->strands[i];
10244 else
10245 strands[i] = Seq_strand_plus;
10246 }
10247 for (i=0; i<dsp->numseg; i++)
10248 {
10249 allgap = TRUE;
10250 for (j=0; j<dsp->dim; j++)
10251 {
10252 if (dsp->strands != NULL)
10253 {
10254 if ((dsp->strands[(dsp->dim)*i + j] == Seq_strand_minus && strands[j] != Seq_strand_minus) || (strands[j] == Seq_strand_minus && dsp->strands[(dsp->dim)*i + j] != Seq_strand_minus))
10255 {
10256 MemFree(prev);
10257 MemFree(strands);
10258 return FALSE;
10259 }
10260 }
10261 if (dsp->starts[(dsp->dim)*i + j] != -1)
10262 {
10263 allgap = FALSE;
10264 if (prev[j] == -2)
10265 {
10266 if (strands[i] != Seq_strand_minus)
10267 prev[j] = dsp->starts[(dsp->dim)*i + j] + dsp->lens[i];
10268 else
10269 prev[j] = dsp->starts[(dsp->dim)*i + j];
10270 } else
10271 {
10272 if (strands[i] != Seq_strand_minus)
10273 {
10274 if (dsp->starts[(dsp->dim)*i + j] != prev[j])
10275 {
10276 MemFree(prev);
10277 MemFree(strands);
10278 return FALSE;
10279 }
10280 prev[j] = dsp->starts[(dsp->dim)*i + j] + dsp->lens[i];
10281 } else
10282 {
10283 if (dsp->starts[(dsp->dim)*i + j] + dsp->lens[i] != prev[j])
10284 {
10285 MemFree(prev);
10286 MemFree(strands);
10287 return FALSE;
10288 }
10289 prev[j] = dsp->starts[(dsp->dim)*i + j];
10290 }
10291 }
10292 }
10293 }
10294 if (allgap)
10295 {
10296 MemFree(prev);
10297 MemFree(strands);
10298 return FALSE;
10299 }
10300 }
10301 for (i=0; i<dsp->dim; i++)
10302 {
10303 if (prev[i] == -2) /*row with all gaps*/
10304 {
10305 MemFree(prev);
10306 MemFree(strands);
10307 return FALSE;
10308 }
10309 }
10310 MemFree(prev);
10311 MemFree(strands);
10312 return TRUE;
10313 }
10314
10315 /***************************************************************************
10316 *
10317 * Both AlnMgrReplaceBlock and AlnMgrAddBlock require a flattened multiple
10318 * alignment plus a dense-seg structure (with the same number of rows
10319 * as the multiple alignment. Discontinuous multiple alignments are fine,
10320 * and gapped alignments can also be edited; multiple pairwise alignments
10321 * cannot be edited (first call AlnMgrGetSubAlign to flatten the alignment,
10322 * or only edit one of the alignments in the set). If the addition,
10323 * removal, or replacement of a block causes an unaligned region to
10324 * disappear, the functions will merge adjacent blocks to get rid of
10325 * unaligned regions of length 0.
10326 *
10327 ***************************************************************************/
AlnMgrReplaceBlock(SeqAlignPtr sap,DenseSegPtr new_block,Int4 block_num)10328 NLM_EXTERN Boolean AlnMgrReplaceBlock(SeqAlignPtr sap, DenseSegPtr new_block, Int4 block_num)
10329 {
10330 AMAlignIndexPtr amaip;
10331 Int4 i;
10332 SeqAlignPtr sap_new;
10333 SeqAlignPtr sap_head;
10334 SeqAlignPtr sap_tmp;
10335
10336 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
10337 return FALSE;
10338 if (!(am_is_ok_block(new_block)))
10339 return FALSE;
10340 amaip = (AMAlignIndexPtr)(sap->saip);
10341 if (sap->type != SAT_PARTIAL)
10342 {
10343 if (sap->type == SAT_MASTERSLAVE && (amaip->mstype != AM_MASTERSLAVE && amaip->mstype != AM_SEGMENTED_MASTERSLAVE && amaip->mstype != AM_NULL))
10344 return FALSE;
10345 else if (sap->type != SAT_MASTERSLAVE)
10346 return FALSE;
10347 }
10348 if (amaip->numseg < block_num)
10349 {
10350 if (amaip->numseg == 0) /* making a block in a NULL alignment */
10351 {
10352 sap_new = SeqAlignNew();
10353 sap_new->segtype = SAS_DENSEG;
10354 sap_new->segs = (Pointer)(new_block);
10355 sap_new->dim = new_block->dim;
10356 sap->segs = (Pointer)(sap_new);
10357 return (AlnMgrReIndexSeqAlign(sap));
10358 } else
10359 return FALSE;
10360 }
10361 if (new_block != NULL) /* replacing a block -- check for merge */
10362 {
10363 if (amaip->numrows != new_block->dim)
10364 return FALSE;
10365 sap_new = SeqAlignNew();
10366 sap_new->segtype = SAS_DENSEG;
10367 sap_new->segs = (Pointer)(new_block);
10368 sap_new->dim = new_block->dim;
10369 AlnMgrIndexSingleChildSeqAlign(sap_new);
10370 if (!am_is_consistent(sap, sap_new, &block_num))
10371 {
10372 sap_new->segs = NULL;
10373 SeqAlignFree(sap_new);
10374 return FALSE;
10375 }
10376 sap_tmp = amaip->saps[block_num - 1];
10377 amaip->saps[block_num - 1] = sap_new;
10378 sap_tmp->next = NULL;
10379 SeqAlignFree(sap_tmp);
10380 for (i=0; i<amaip->numsaps-1; i++)
10381 {
10382 sap_tmp = amaip->saps[i];
10383 sap_tmp->next = amaip->saps[i+1];
10384 sap_tmp->next->next = NULL;
10385 }
10386 sap->segs = (Pointer)(amaip->saps[0]);
10387 if (!AlnMgrReIndexSeqAlign(sap))
10388 return FALSE;
10389 /* no auto-merge */
10390 /* return (am_merge_after_edit(sap)); */
10391 return TRUE;
10392 } else /* delete block */
10393 {
10394 if (block_num == 0 || block_num > amaip->numsaps)
10395 return FALSE;
10396 sap_tmp = amaip->saps[block_num - 1];
10397 sap_tmp->next = NULL;
10398 SeqAlignFree(sap_tmp);
10399 if (block_num == 1)
10400 {
10401 sap_head = amaip->saps[1];
10402 for (i=1; i<amaip->numsaps-1; i++)
10403 {
10404 amaip->saps[i]->next = amaip->saps[i+1];
10405 amaip->saps[i+1]->next = NULL;
10406 }
10407 } else
10408 {
10409 sap_head = amaip->saps[0];
10410 for (i=0; i<block_num-1; i++)
10411 {
10412 sap_tmp = amaip->saps[i];
10413 sap_tmp->next = amaip->saps[i+1];
10414 sap_tmp->next->next = NULL;
10415 }
10416
10417 /* bug fix -- Dave & Lewis 7/20/00 */
10418 if (block_num == amaip->numsaps)
10419 sap_tmp->next = NULL;
10420 /* end of fix */
10421 else {
10422 sap_tmp->next = amaip->saps[block_num];
10423 }
10424 if (block_num < amaip->numsaps)
10425 sap_tmp->next->next = NULL;
10426 for (i=block_num; i<amaip->numsaps-1; i++)
10427 {
10428 sap_tmp = amaip->saps[i];
10429 sap_tmp->next = amaip->saps[i+1];
10430 sap_tmp->next->next = NULL;
10431 }
10432 }
10433 sap->segs = (Pointer)(sap_head);
10434 return (AlnMgrReIndexSeqAlign(sap));
10435 }
10436 }
10437
AlnMgrAddBlock(SeqAlignPtr sap,DenseSegPtr new_block)10438 NLM_EXTERN Boolean AlnMgrAddBlock(SeqAlignPtr sap, DenseSegPtr new_block)
10439 {
10440 AMAlignIndexPtr amaip;
10441 Int4 i;
10442 SeqAlignPtr sap_new;
10443 SeqAlignPtr sap_tmp;
10444
10445 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT || new_block == NULL)
10446 return FALSE;
10447 if (!(am_is_ok_block(new_block)))
10448 return FALSE;
10449 amaip = (AMAlignIndexPtr)(sap->saip);
10450 if (sap->type != SAT_PARTIAL)
10451 {
10452 if (sap->type == SAT_MASTERSLAVE && (amaip->mstype != AM_MASTERSLAVE && amaip->mstype != AM_SEGMENTED_MASTERSLAVE && amaip->mstype != AM_NULL))
10453 return FALSE;
10454 else if (sap->type != SAT_MASTERSLAVE)
10455 return FALSE;
10456 }
10457 sap_new = SeqAlignNew();
10458 sap_new->segtype = SAS_DENSEG;
10459 sap_new->segs = (Pointer)(new_block);
10460 sap_new->dim = new_block->dim;
10461 AlnMgrIndexSingleChildSeqAlign(sap_new);
10462 i = -1;
10463 if (!am_is_consistent(sap, sap_new, &i))
10464 {
10465 sap_new->segs = NULL;
10466 SeqAlignFree(sap_new);
10467 return FALSE;
10468 }
10469 sap_tmp = (SeqAlignPtr)(sap->segs);
10470 sap_new->next = sap_tmp;
10471 sap->segs = (Pointer)(sap_new);
10472 if (!AlnMgrReIndexSeqAlign(sap))
10473 return FALSE;
10474 /* no auto-merge */
10475 /* return (am_merge_after_edit(sap)); */
10476 return TRUE;
10477 }
10478
am_do_merge(AMAlignIndexPtr amaip,Int4 left,Int4 right)10479 static void am_do_merge (AMAlignIndexPtr amaip, Int4 left, Int4 right)
10480 {
10481 Boolean consistent;
10482 DenseSegPtr dsp;
10483 DenseSegPtr dsp1;
10484 DenseSegPtr dsp2;
10485 Int4 i;
10486 Int4 n;
10487 Int4 n1;
10488 SeqAlignPtr sap1;
10489 SeqAlignPtr sap2;
10490 SeqAlignPtr sap_new;
10491 Int4 startseg;
10492
10493 sap1 = amaip->saps[left];
10494 sap2 = amaip->saps[right];
10495 dsp1 = (DenseSegPtr)(sap1->segs);
10496 dsp2 = (DenseSegPtr)(sap2->segs);
10497 if (dsp1->dim != dsp2->dim)
10498 return;
10499 dsp = DenseSegNew();
10500 n = dsp1->numseg + dsp2->numseg;
10501 dsp->dim = dsp1->dim;
10502 dsp->starts = (Int4Ptr)MemNew((dsp1->dim)*n*sizeof(Int4));
10503 dsp->lens = (Int4Ptr)MemNew(n*sizeof(Int4));
10504 dsp->strands = (Uint1Ptr)MemNew((dsp1->dim)*n*sizeof(Uint1));
10505 dsp->ids = dsp1->ids;
10506 dsp1->ids = NULL;
10507 for (i=0; i<amaip->numrows; i++)
10508 {
10509 for (n=0; n<dsp1->numseg; n++)
10510 {
10511 dsp->lens[n] = dsp1->lens[n];
10512 dsp->starts[(dsp1->dim)*n + i] = dsp1->starts[(dsp1->dim)*n + i];
10513 dsp->strands[(dsp1->dim)*n + i] = dsp1->strands[(dsp1->dim)*n + i];
10514 }
10515 }
10516 consistent = TRUE;
10517 for (i=0; i<amaip->numrows && consistent; i++)
10518 {
10519 if (dsp->starts[(dsp1->dim)*dsp1->numseg + i] == -1)
10520 {
10521 if (dsp2->starts[(dsp1->dim) + i] != -1)
10522 consistent = FALSE;
10523 } else
10524 {
10525 if (dsp2->starts[(dsp1->dim) + i] == -1)
10526 consistent = FALSE;
10527 }
10528 }
10529 startseg = 0;
10530 if (consistent)
10531 {
10532 startseg = 1;
10533 for (i=0; i<amaip->numrows; i++)
10534 {
10535 if (dsp->strands[i] == Seq_strand_minus)
10536 dsp->starts[(dsp1->dim)*(dsp1->numseg-1)+i] = dsp2->starts[i];
10537 }
10538 dsp->lens[dsp1->numseg-1] += dsp2->lens[0];
10539 }
10540 for (i=0; i<amaip->numrows; i++)
10541 {
10542 for (n=startseg; n<dsp2->numseg; n++)
10543 {
10544 n1 = n+dsp1->numseg;
10545 dsp->lens[n] = dsp2->lens[n];
10546 dsp->starts[(dsp2->dim)*n1 + i] = dsp2->starts[(dsp2->dim)*n + i];
10547 dsp->strands[(dsp2->dim)*n1 + i] = dsp2->strands[(dsp2->dim)*n + i];
10548 }
10549 }
10550 dsp->numseg = dsp1->numseg + dsp2->numseg - startseg;
10551 sap_new = SeqAlignNew();
10552 sap_new->type = SAT_PARTIAL;
10553 sap_new->segtype = SAS_DENSEG;
10554 sap_new->dim = dsp->dim;
10555 sap_new->segs = (Pointer)(dsp);
10556 AlnMgrIndexSingleChildSeqAlign(sap_new);
10557 amaip->saps[left]->next = NULL;
10558 amaip->saps[right]->next = NULL;
10559 SeqAlignFree(amaip->saps[left]);
10560 SeqAlignFree(amaip->saps[right]);
10561 if (left == 0)
10562 {
10563 amaip->saps[left] = sap_new;
10564 amaip->saps[right] = NULL;
10565 } else
10566 {
10567 amaip->saps[right] = sap_new;
10568 amaip->saps[left] = NULL;
10569 }
10570 }
10571
am_merge_after_edit(SeqAlignPtr sap)10572 static Boolean am_merge_after_edit (SeqAlignPtr sap)
10573 {
10574 AMAlignIndexPtr amaip;
10575 Boolean found;
10576 Int4 i;
10577 Int4 j;
10578 RowSourcePtr rsp;
10579 SeqAlignPtr sap_prev;
10580
10581 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
10582 return FALSE;
10583 AlnMgrSetUnalignedLengths(sap);
10584 amaip = (AMAlignIndexPtr)(sap->saip);
10585 found = FALSE;
10586 rsp = amaip->rowsource[0];
10587 for (i=1; i<amaip->numsaps && !found; i++)
10588 {
10589 if (amaip->ulens[i-1] == 0)
10590 {
10591 found = TRUE;
10592 am_do_merge (amaip, rsp->which_saps[i-1]-1, rsp->which_saps[i]-1);
10593 sap_prev = amaip->saps[0];
10594 for (j=1; j<amaip->numsaps; j++)
10595 {
10596 if (amaip->saps[j] != NULL)
10597 {
10598 sap_prev->next = amaip->saps[j];
10599 sap_prev = amaip->saps[j];
10600 }
10601 }
10602 sap->segs = (Pointer)(amaip->saps[0]);
10603 }
10604 }
10605 if (!found)
10606 return TRUE;
10607 if (!AlnMgrReIndexSeqAlign(sap))
10608 return FALSE;
10609 return (am_merge_after_edit(sap));
10610 }
10611
10612 /*******************************************************************************
10613
10614 Function : AlnMgrIsSAPDiscAli()
10615
10616 Purpose : check if a SeqAlign is discontinuous
10617
10618 Parameters : SeqAlignPtr
10619
10620 Return value : TRUE if discontinous, FALSE otherwise
10621
10622 *******************************************************************************/
AlnMgrIsSAPDiscAli(SeqAlignPtr sap)10623 NLM_EXTERN Boolean AlnMgrIsSAPDiscAli(SeqAlignPtr sap)
10624 {
10625 AMAlignIndexPtr amaip;
10626 Boolean bRet=FALSE;
10627
10628 if (!sap || !sap->saip) return(bRet);
10629
10630 if (sap->saip->indextype == INDEX_PARENT){
10631 amaip = (AMAlignIndexPtr)sap->saip;
10632 if (sap->type == SAT_PARTIAL || (sap->type == SAT_MASTERSLAVE &&
10633 amaip->mstype == AM_SEGMENTED_MASTERSLAVE)){
10634 bRet=TRUE;
10635 }
10636 }
10637
10638 return(bRet);
10639 }
10640
AlnMgrIsSAPNULL(SeqAlignPtr sap)10641 NLM_EXTERN Boolean AlnMgrIsSAPNULL(SeqAlignPtr sap)
10642 {
10643 AMAlignIndexPtr amaip;
10644
10645 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
10646 return FALSE;
10647 amaip = (AMAlignIndexPtr)(sap->saip);
10648 if (amaip->mstype == AM_NULL)
10649 return TRUE;
10650 if (sap->segs == NULL)
10651 return TRUE;
10652 return FALSE;
10653 }
10654
am_compare_diags(VoidPtr ptr1,VoidPtr ptr2)10655 static int LIBCALLBACK am_compare_diags(VoidPtr ptr1, VoidPtr ptr2)
10656 {
10657 DenseDiagPtr ddp1;
10658 DenseDiagPtr ddp2;
10659
10660 ddp1 = *((DenseDiagPtr PNTR) ptr1);
10661 ddp2 = *((DenseDiagPtr PNTR) ptr2);
10662 if (ddp1 == NULL || ddp2 == NULL)
10663 return 0;
10664 if (ddp1->starts[0] < ddp2->starts[0])
10665 return -1;
10666 else if (ddp1->starts[0] > ddp2->starts[0])
10667 return 1;
10668 else
10669 return 0;
10670 }
10671
AlnMgrIsIBMable(SeqAlignPtr sap)10672 NLM_EXTERN Int4 AlnMgrIsIBMable(SeqAlignPtr sap)
10673 {
10674 Boolean changed;
10675 Int4 dim;
10676 DenseDiagPtr ddp;
10677 DenseDiagPtr PNTR ddparray;
10678 DenseSegPtr dsp;
10679 Int4 i;
10680 Int4 j;
10681 SeqIdPtr ids;
10682 SeqAlignPtr salp;
10683
10684 if (sap == NULL)
10685 return AM_ERROR;
10686 if (sap->segtype == SAS_DISC)
10687 salp = (SeqAlignPtr)(sap->segs);
10688 else
10689 salp = sap;
10690 changed = FALSE;
10691 while (salp != NULL)
10692 {
10693 if (salp->segtype == SAS_DISC)
10694 return AM_NOIBM;
10695 else if (salp->segtype == SAS_DENSEG)
10696 {
10697 dsp = (DenseSegPtr)(salp->segs);
10698 if (dsp->numseg > 1)
10699 return AM_NOIBM;
10700 } else if (salp->segtype == SAS_DENDIAG)
10701 {
10702 ddp = (DenseDiagPtr)(salp->segs);
10703 dim = ddp->dim;
10704 ids = ddp->id;
10705 i = 0;
10706 while (ddp != NULL)
10707 {
10708 if (ddp->dim != dim)
10709 return AM_NOIBM;
10710 if (am_is_new_row(ddp->id, ids))
10711 return AM_NOIBM;
10712 i++;
10713 ddp = ddp->next;
10714 }
10715 ddparray = (DenseDiagPtr PNTR)MemNew(i*sizeof(DenseDiagPtr));
10716 ddp = (DenseDiagPtr)(salp->segs);
10717 for (j=0; j<i && ddp!=NULL; j++)
10718 {
10719 ddparray[j] = ddp;
10720 ddp = ddp->next;
10721 }
10722 HeapSort (ddparray, i, sizeof(DenseDiagPtr), am_compare_diags);
10723 for (j=0; j<i-1; j++)
10724 {
10725 if (ddparray[j]->starts[0]+ddparray[j]->len > ddparray[j+1]->starts[0])
10726 return AM_NOIBM;
10727 if (ddparray[j]->next != ddparray[j+1])
10728 changed = TRUE;
10729 ddparray[j]->next = ddparray[j+1];
10730 }
10731 ddparray[i-1]->next = NULL;
10732 salp->segs = (Pointer)ddparray[0];
10733 } else
10734 return AM_ERROR;
10735 salp = salp->next;
10736 }
10737 if (changed)
10738 return AM_IBMCHANGE;
10739 else
10740 return AM_IBMNOCHANGE;
10741 }
10742
am_same_ids(SeqIdPtr sip1,SeqIdPtr sip2)10743 static Boolean am_same_ids(SeqIdPtr sip1, SeqIdPtr sip2)
10744 {
10745 Boolean found;
10746 SeqIdPtr sip_tmp;
10747
10748 while (sip2 != NULL)
10749 {
10750 sip_tmp = sip1;
10751 found = FALSE;
10752 while (sip_tmp != NULL && !found)
10753 {
10754 if (SeqIdComp(sip_tmp, sip2) == SIC_YES)
10755 found = TRUE;
10756 else
10757 sip_tmp = sip_tmp->next;
10758 }
10759 if (!found)
10760 return FALSE;
10761 sip2 = sip2->next;
10762 }
10763 return TRUE;
10764 }
10765
AlnMgrIsEditable(SeqAlignPtr sap)10766 NLM_EXTERN Int4 AlnMgrIsEditable(SeqAlignPtr sap)
10767 {
10768 DenseDiagPtr ddp;
10769 DenseSegPtr dsp;
10770 Boolean gapped;
10771 SeqIdPtr id_prev;
10772 SeqAlignPtr salp;
10773
10774 if (sap == NULL)
10775 return AM_ERROR;
10776 if (sap->segtype == SAS_DISC)
10777 salp = (SeqAlignPtr)(sap->segs);
10778 else
10779 salp = sap;
10780 id_prev = NULL;
10781 gapped = FALSE;
10782 while (salp != NULL)
10783 {
10784 if (salp->segtype == SAS_DISC)
10785 return AM_ERROR;
10786 else if (salp->segtype == SAS_DENSEG)
10787 {
10788 dsp = (DenseSegPtr)(salp->segs);
10789 if (dsp->numseg > 1)
10790 gapped = TRUE;
10791 if (am_same_ids(dsp->ids, id_prev))
10792 return AM_NOEDIT;
10793 id_prev = dsp->ids;
10794 } else if (salp->segtype == SAS_DENDIAG)
10795 {
10796 ddp = (DenseDiagPtr)(salp->segs);
10797 while (ddp != NULL)
10798 {
10799 if (am_same_ids(ddp->id, id_prev))
10800 return AM_NOEDIT;
10801 id_prev = ddp->id;
10802 ddp = ddp->next;
10803 }
10804 }
10805 salp = salp->next;
10806 }
10807 if (gapped)
10808 return AM_EDITGAPS;
10809 else
10810 return AM_EDITNOGAP;
10811 }
10812
10813 /*
10814 Function to map the Bioseq Coordinates of one sequence to the
10815 bioseq coordinate of the base that aligns with it in the other sequence.
10816 a return value of -2 indicates that the bioseq coordinate is not in the alignment,
10817 while a return value of -1 indicates that the coordinate maps to a gap.
10818 if(GetNextNonGap==TRUE) .. then function will try to find the next aligned
10819 base in the alignment after the gap. In this case the position of the next base
10820 after the gap is in PostGap.. unless it is and end-gap.. in which case PostGap==-1.
10821 .. For "-" strand, this would thus return the next aligned block.. thus
10822 the previous base in the subject sequence.
10823
10824 */
AlnMgrMapBioseqToBioseq(SeqAlignPtr salp,Int4 pos,Int4 source_row,Int4 target_row,Boolean GetNextNonGap,Int4Ptr PostGap)10825 NLM_EXTERN Int4 AlnMgrMapBioseqToBioseq(SeqAlignPtr salp,Int4 pos,Int4 source_row,Int4 target_row,Boolean GetNextNonGap,Int4Ptr PostGap) {
10826 Int4 aln_coord,pos_target;
10827 AlnMsgPtr amp1;
10828 Boolean status;
10829 if(!salp)
10830 return -1;
10831 aln_coord = AlnMgrMapBioseqToSeqAlign(salp, pos, source_row, NULL);
10832 if(aln_coord>=0) { /*
10833 If that coord exist in SeqAlign
10834 even gaps are alignment coordinates.
10835 */
10836 pos_target = AlnMgrMapRowCoords(salp, aln_coord, target_row, NULL);
10837 if(pos_target>=0 || !GetNextNonGap)
10838 return pos_target;
10839 else {
10840 Uint1 gap;
10841 amp1 = AlnMsgNew();
10842 amp1->from_m = aln_coord;
10843 amp1->to_m = -1;
10844 amp1->row_num = target_row;
10845 amp1->which_master = 0; /* align coordinates */
10846 status = AlnMgrGetNextAlnBit(salp, amp1);
10847 gap = amp1->gap;
10848 /*
10849 Search for 1st aligned base after gap
10850 */
10851
10852 while(status && amp1->gap) { /* Search for 1st non-gap bit */
10853 AlnMgrGetNextAlnBit(salp, amp1);
10854 }
10855 if(amp1->gap) {
10856 /*
10857 XXX Should only happen for alignment with end-gaps of
10858 if there is a bug in AlnMgrMapBioseqToSeqAlign
10859 */
10860 *PostGap = -1;
10861 Free(amp1);
10862 return -1;
10863 } else {
10864 *PostGap = amp1->from_b;
10865 Free(amp1);
10866 return -1;
10867 }
10868 }
10869 } else
10870 return aln_coord;
10871 }
10872 /*
10873 convert a global densediag alignment into a single denseseg
10874 no error checking done.
10875 */
DenseDiagToGlobalDenseSeg(DenseDiagPtr ddp_head)10876 NLM_EXTERN DenseSegPtr DenseDiagToGlobalDenseSeg(DenseDiagPtr ddp_head) {
10877 DenseDiagPtr ddp;
10878 DenseSegPtr dsp;
10879 Int4 numseg=0,dim=0,i,j;
10880
10881 if(!ddp_head)
10882 return NULL;
10883
10884 ddp = ddp_head;
10885 numseg=0;
10886 while (ddp) {
10887 numseg++;
10888 ddp=ddp->next;
10889 }
10890
10891 ddp = ddp_head;
10892 i=0;
10893 dim = ddp->dim;
10894 dsp = DenseSegNew();
10895 dsp->dim = dim;
10896 dsp->numseg = numseg;
10897 dsp->starts = MemNew(dim*numseg*sizeof(Int4));
10898 dsp->lens = MemNew(numseg*sizeof(Int4));
10899 dsp->strands = MemNew(dim*numseg*sizeof(Uint1));
10900 dsp->ids = SeqIdDupList(ddp->id);
10901 while (ddp) {
10902 for(j=0;j<dim;j++)
10903 dsp->starts[j+i*dim] = ddp->starts[j];
10904 for(j=0;j<dim;j++)
10905 dsp->strands[j+i*dim] = ddp->strands[j];
10906 dsp->lens[i] = ddp->len;
10907 ddp = ddp->next;
10908 i++;
10909 }
10910 return dsp;
10911 }
10912 /*
10913 Make a DenseDiag Linked List out of an Interval on a seqalign
10914 (interval specified in alignment coordinates )
10915 */
10916
AlnMgrSeqAlignToDDP(SeqAlignPtr sap,Int4 aln_cut_from,Int4 aln_cut_to,Int4Ptr numseg_ptr)10917 NLM_EXTERN DenseDiagPtr AlnMgrSeqAlignToDDP(SeqAlignPtr sap,Int4 aln_cut_from,Int4 aln_cut_to,Int4Ptr numseg_ptr) {
10918 AlnMsgPtr amp1;
10919 Int4 numseg=0,curr_m,from_q,to_q,len,i;
10920 DenseDiagPtr ddp,ddp_head=NULL,ddp_last=NULL;
10921 SeqAlignPtr salp;
10922 Boolean gap,status;
10923
10924 i = AlnMgrCheckAlignForParent(sap);
10925 if (i == AM_PARENT) {
10926 salp = (SeqAlignPtr) sap->segs;
10927 } else {
10928 salp = sap;
10929 }
10930 if((i != AM_PARENT || !salp->next) && salp->segtype == SAS_DENSEG) {
10931 Int4 dim,j;
10932 DenseSegPtr dsp;
10933 Int4 alnlen=0,newlen;
10934 dsp = (DenseSegPtr) salp->segs;
10935 numseg = dsp->numseg;
10936 dim = dsp->dim;
10937 i=0;
10938 while(i<numseg) {
10939 len = dsp->lens[i];
10940 newlen = alnlen+len;
10941 if(newlen > aln_cut_from && alnlen<=aln_cut_to) {
10942 Int4 beglen,endlen;
10943 ddp =DenseDiagNew();
10944 ddp->starts = MemNew(dim*sizeof(Int4));
10945 ddp->strands = MemNew(dim*sizeof(Uint1));
10946 for(j=0;j<dim;j++)
10947 ddp->strands[j]=dsp->strands[j+i*dim];
10948 if(alnlen<aln_cut_from && newlen>aln_cut_from ) {
10949 beglen = aln_cut_from-alnlen;
10950 } else
10951 beglen = 0;
10952 if(alnlen<=aln_cut_to && newlen>aln_cut_to ) {
10953 endlen = newlen-aln_cut_to-1;
10954 } else
10955 endlen =0;
10956 ddp->len = len -beglen-endlen;
10957 for(j=0;j<dim;j++) {
10958 if(dsp->starts[j+i*dim]!=-1) {
10959 if(dsp->strands[j]!=Seq_strand_minus)
10960 ddp->starts[j]=dsp->starts[j+i*dim]+beglen;
10961 else
10962 ddp->starts[j]=dsp->starts[j+i*dim]-endlen;
10963 } else
10964 ddp->starts[j]=-1;
10965 }
10966 ddp->id = SeqIdDupList(SeqAlignId(salp,0));
10967 ddp->dim=dim;
10968 if(ddp_head) {
10969 ddp_last->next = ddp;
10970 } else {
10971 ddp_head = ddp;
10972 }
10973 ddp_last = ddp;
10974 alnlen=newlen;
10975 }
10976 i++;
10977 }
10978 } else if ((i != AM_PARENT || !salp->next) && salp->segtype == SAS_DENDIAG) {
10979 Int4 dim,j;
10980 Int4 alnlen=0,newlen;
10981 DenseDiagPtr ddp0 = (DenseDiagPtr)salp->segs;
10982 i=0;
10983 while(ddp0) {
10984 len = ddp0->len;
10985 newlen = alnlen+len;
10986 dim = ddp0->dim;
10987 if(newlen > aln_cut_from && alnlen<=aln_cut_to) {
10988 Int4 beglen,endlen;
10989 ddp =DenseDiagNew();
10990 ddp->starts = MemNew(dim*sizeof(Int4));
10991 ddp->strands = MemNew(dim*sizeof(Uint1));
10992 dim = ddp0->dim;
10993 len = ddp0->len;
10994 for(j=0;j<dim;j++)
10995 ddp->strands[j]=ddp0->strands[j];
10996
10997 if(alnlen<aln_cut_from && newlen>aln_cut_from ) {
10998 beglen = aln_cut_from-alnlen;
10999 } else
11000 beglen = 0;
11001 if(alnlen<=aln_cut_to && newlen>aln_cut_to ) {
11002 endlen = newlen-aln_cut_to-1;
11003 } else
11004 endlen =0;
11005 ddp->len = len -beglen-endlen;
11006 for(j=0;j<dim;j++) {
11007 if(ddp0->starts[j]!=-1) {
11008 if(ddp0->strands[j]!=Seq_strand_minus)
11009 ddp->starts[j]=ddp0->starts[j]+beglen;
11010 else
11011 ddp->starts[j]=ddp0->starts[j]-endlen;
11012 } else
11013 ddp->starts[j]=-1;
11014 }
11015
11016 ddp->id = SeqIdDupList(SeqAlignId(salp,0));
11017 ddp->dim=dim;
11018 if(ddp_head) {
11019 ddp_last->next = ddp;
11020 } else {
11021 ddp_head = ddp;
11022 }
11023 ddp_last = ddp;
11024 ddp0=ddp0->next;
11025 }
11026 }
11027 } else {
11028 /* XXX
11029 this will NOT work for SeqAligns with end gaps due to
11030 a bug in AlnMgrGetNextAlnBit
11031 */
11032 amp1 = AlnMsgNew();
11033 amp1->from_m = aln_cut_from;
11034 amp1->to_m = aln_cut_to;
11035 amp1->row_num = 1;
11036 amp1->which_master = 0;
11037 curr_m = 0;
11038 status = TRUE;
11039 while(status) {
11040 status = AlnMgrGetNextAlnBit(salp, amp1);
11041 gap = amp1->gap;
11042 from_q = amp1->from_b;
11043 to_q = amp1->to_b;
11044 len = to_q-from_q; /* Either sequence of gap length */
11045 numseg++;
11046 ddp = DenseDiagNew();
11047 ddp->len = len;
11048 ddp->starts = MemNew(2*sizeof(Int4));
11049 ddp->strands = MemNew(2*sizeof(Uint1));
11050 ddp->strands[0]=amp1->strand;
11051 ddp->id = SeqIdDupList(SeqAlignId(salp,0));
11052 ddp->dim=2;
11053 if(!gap)
11054 ddp->starts[0]=from_q;
11055 else
11056 ddp->starts[0]=-1;
11057 if(ddp_head) {
11058 ddp_last->next = ddp;
11059 } else {
11060 ddp_head = ddp;
11061 }
11062 ddp_last = ddp;
11063 curr_m+=len;
11064 }
11065 Free(amp1);
11066 amp1 = AlnMsgNew();
11067 amp1->from_m = aln_cut_from;
11068 amp1->to_m = aln_cut_to;
11069 amp1->row_num = 2;
11070 amp1->which_master = 0;
11071 status = TRUE;
11072 ddp=ddp_head;
11073 while(status) {
11074 status = AlnMgrGetNextAlnBit(salp, amp1);
11075 gap = amp1->gap;
11076 if(!gap)
11077 ddp->starts[1]=amp1->from_b;
11078 else
11079 ddp->starts[1]=-1;
11080 ddp->strands[1]=amp1->strand;
11081 ddp=ddp->next;
11082 }
11083 Free(amp1);
11084 }
11085
11086 if(numseg_ptr)
11087 *numseg_ptr = numseg;
11088 return ddp_head;
11089 }
11090
11091 /*
11092 Merge 3 SeqAligns.
11093 the master_cut_pos1 = position where the salp_merging SeqAlign starts
11094 the master_cut_pos2 = position where the salp_merging SeqAlign ends
11095
11096 Take 3 SeqAligns and makes a single denseseg seqalign.
11097 If the end-seqaligns are PURE gaps they do not need to be indexed.
11098 (as the alignment manager doesn't yet deal well with purely gaps alignments,
11099 a bypass has been coded.. but the last end-gap seqalign has
11100 to be of the correct length.. as no check will be performed)
11101
11102 */
11103
AlnMgrMerge3OverlappingSeqAligns(SeqAlignPtr salp1,SeqAlignPtr salp_merging,SeqAlignPtr salp2,Int4 master_cut_pos1,Int4 master_cut_pos2)11104 NLM_EXTERN SeqAlignPtr AlnMgrMerge3OverlappingSeqAligns(SeqAlignPtr salp1,SeqAlignPtr salp_merging,SeqAlignPtr salp2,Int4 master_cut_pos1, Int4 master_cut_pos2) {
11105 Int4 i,aln_cut_pos1,aln_cut_pos2;
11106 Int4 numseg=0,this_numseg;
11107 DenseDiagPtr ddp,ddp_head=NULL,ddp_last,ddp_next;
11108 DenseSegPtr dsp;
11109 Boolean sstrand_plus,qstrand_plus;
11110 SeqAlignPtr sap=NULL;
11111
11112
11113 if(!salp1)
11114 return NULL;
11115 if(salp1) {
11116 if(salp1->segtype == SAS_DENSEG && (dsp=(DenseSegPtr) salp1->segs)!=NULL && dsp->numseg==1 &&( dsp->starts[0]==-1 || dsp->starts[1]==-1 )) {
11117 ddp_head = DenseDiagNew();
11118 ddp_head->dim = dsp->dim;
11119 ddp_head->starts = (Int4Ptr)MemNew((dsp->dim)*sizeof(Int4));
11120 ddp_head->id = SeqIdDupList(dsp->ids);
11121 ddp_head->strands = (Uint1Ptr)MemNew((dsp->dim)*sizeof(Uint1));
11122 ddp_head->scores = ScoreDup(dsp->scores);
11123 for (i=0; i<dsp->dim; i++)
11124 {
11125 ddp_head->starts[i] = dsp->starts[i];
11126 ddp_head->strands[i] = dsp->strands[i];
11127 }
11128 ddp_head->len = dsp->lens[0];
11129 /* XXX HS Risky.. bug potential.
11130 trust that beginning-gap-seqalign is correctly computed..
11131 so don't need to trim according to master_cut_pos1
11132 */
11133 numseg=1;
11134 } else {
11135 aln_cut_pos1 = AlnMgrMapBioseqToSeqAlign(salp1,master_cut_pos1,1, NULL);
11136 ddp_head = AlnMgrSeqAlignToDDP(salp1,0,aln_cut_pos1,&numseg);
11137 }
11138 ddp_last=ddp_head;
11139 while(ddp_last!=NULL && ddp_last->next!=NULL) {
11140 ddp_last = ddp_last->next;
11141 }
11142 }
11143 if(salp_merging) {
11144 ddp = AlnMgrSeqAlignToDDP(salp_merging,0,AlnMgrGetAlnLength(salp_merging,FALSE)-1,&this_numseg);
11145
11146 numseg+=this_numseg;
11147 if(ddp_last) {
11148 ddp_last->next = ddp;
11149 } else {
11150 ddp_head = ddp;
11151 }
11152 while(ddp_last!=NULL && ddp_last->next!=NULL) {
11153 ddp_last = ddp_last->next;
11154 }
11155
11156 }
11157 if(salp2) {
11158 if(salp2->segtype == SAS_DENSEG && (dsp=(DenseSegPtr) salp2->segs)!=NULL && dsp->numseg==1 &&( dsp->starts[0]==-1 || dsp->starts[1]==-1 )) {
11159 ddp = DenseDiagNew();
11160 ddp->dim = dsp->dim;
11161 ddp->starts = (Int4Ptr)MemNew((dsp->dim)*sizeof(Int4));
11162 ddp->id = SeqIdDupList(dsp->ids);
11163 ddp->strands = (Uint1Ptr)MemNew((dsp->dim)*sizeof(Uint1));
11164 ddp->scores = ScoreDup(dsp->scores);
11165 for (i=0; i<dsp->dim; i++)
11166 {
11167 ddp->starts[i] = dsp->starts[i];
11168 ddp->strands[i] = dsp->strands[i];
11169 }
11170 ddp->len = dsp->lens[0];
11171 /* XXX HS Risky.. bug potential.
11172 trust that end-gap-seqalign is correctly computed..
11173 so don't need to trim according to master_cut_pos1
11174 */
11175 numseg++;
11176 } else {
11177
11178 aln_cut_pos2 = AlnMgrMapBioseqToSeqAlign(salp2,master_cut_pos2,1, NULL);
11179 this_numseg=0;
11180 ddp= AlnMgrSeqAlignToDDP(salp2,aln_cut_pos2,AlnMgrGetAlnLength(salp2,FALSE)-1,&this_numseg);
11181 numseg += this_numseg;
11182 }
11183
11184 if(ddp_last) {
11185 ddp_last->next = ddp;
11186 } else {
11187 ddp_head = ddp;
11188 }
11189 }
11190 ddp = ddp_head;
11191 if(ddp) {
11192 qstrand_plus = (ddp->strands[0]!=Seq_strand_minus);
11193 sstrand_plus = (ddp->strands[1]!=Seq_strand_minus);
11194 }
11195 /* Merge consecutive ddp of the same type (e.g. full, gap-subject,query-gap)
11196 Assuming that Consecutive blocks represent a GLOBAL alignment.
11197 Except that Gaps may be missing.
11198
11199 If did error correction...
11200 By Scanning "left-to-right", insure that only have to worry about
11201 the interval connecting two consecutived DenseDiags..
11202 the "beginning/start" of the first block is always guaranteed to be OK.
11203 */
11204 while(ddp!=NULL && ddp->next!=NULL) {
11205 if(ddp->starts[0]==-1) {
11206 if(ddp->next->starts[0]==-1) {
11207 /* Merge two consecutive query gaps */
11208 if(!sstrand_plus) {
11209 ddp->starts[1]=ddp->next->starts[1];
11210 ddp->len = ddp->next->starts[1]-ddp->starts[1]+ddp->len;
11211 } else
11212 ddp->len = ddp->next->starts[1]-ddp->starts[1]+ddp->next->len;
11213 ddp_next = ddp->next;
11214 ddp->next = ddp->next->next;
11215 DenseDiagFree(ddp_next);
11216 numseg--;
11217 } else if (ddp->next->starts[1]==-1) {
11218 /* alternating gaps */
11219 } else {
11220 /* query Gap followed by block */
11221 }
11222 } else if (ddp->starts[1]==-1) {
11223 if(ddp->next->starts[0]==-1) {
11224 /* alternating gaps */
11225 } else if (ddp->next->starts[1]==-1) {
11226 /* Merge two consecutive subject gaps */
11227 ddp->len = ddp->next->starts[0]-ddp->starts[0]+ddp->next->len;
11228 if(!qstrand_plus) {
11229 ddp->starts[0]=ddp->next->starts[0];
11230 ddp->len = ddp->len + ddp->next->starts[0] - ddp->starts[0];
11231 }
11232 ddp_next = ddp->next;
11233 ddp->next = ddp->next->next;
11234 DenseDiagFree(ddp_next);
11235 numseg--;
11236 } else {
11237 /* gap in subject followed by whole block */
11238 }
11239
11240 } else {/* whole block, no gaps in first block */
11241 if(ddp->next->starts[0]==-1) {
11242
11243 } else if (ddp->next->starts[1]==-1) {
11244
11245 } else {
11246 if(ddp->starts[0]-ddp->starts[1]!=ddp->next->starts[0]-ddp->next->starts[1]) {
11247 ErrPostEx(SEV_ERROR,0,0,"Bug in Alignment Merging: Consecutive blocks incompatible \n");
11248 } else {
11249 ddp->len = ddp->len+ddp->next->len;
11250 if(!qstrand_plus) {
11251 ddp->starts[0]=ddp->next->starts[0];
11252 }
11253 if(!sstrand_plus) {
11254 ddp->starts[1]=ddp->next->starts[1];
11255 }
11256 ddp_next = ddp->next;
11257 ddp->next = ddp_next->next;
11258 DenseDiagFree(ddp_next);
11259 numseg--;
11260 }
11261 }
11262 }
11263 ddp = ddp->next;
11264 }
11265
11266 if(ddp_head) {
11267 dsp = DenseDiagToGlobalDenseSeg(ddp_head);
11268 sap = SeqAlignNew();
11269 sap->type = SAT_PARTIAL;
11270 sap->segtype = SAS_DENSEG;
11271 sap->dim = dsp->dim;
11272 sap->segs = (Pointer)dsp;
11273 if (dsp->scores)
11274 sap->score = ScoreDup(dsp->scores);
11275 while(ddp_head) {
11276 ddp = ddp_head;
11277 ddp_head = ddp_head->next;
11278 DenseDiagFree(ddp);
11279 }
11280 }
11281 return sap;
11282 }
11283