1 static char const rcsid[] = "$Id: actutils.c,v 6.56 2008/10/22 17:19:22 bollin Exp $";
2 
3 /* ===========================================================================
4 *
5 *                            PUBLIC DOMAIN NOTICE
6 *            National Center for Biotechnology Information (NCBI)
7 *
8 *  This software/database is a "United States Government Work" under the
9 *  terms of the United States Copyright Act.  It was written as part of
10 *  the author's official duties as a United States Government employee and
11 *  thus cannot be copyrighted.  This software/database is freely available
12 *  to the public for use. The National Library of Medicine and the U.S.
13 *  Government do not place any restriction on its use or reproduction.
14 *  We would, however, appreciate having the NCBI and the author cited in
15 *  any work or product based on this material.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 * ===========================================================================
26 *
27 * File Name:  actutils.c
28 *
29 * Author:  Sarah Wheelan
30 *
31 * Version Creation Date:   2/00
32 *
33 * $Revision: 6.56 $
34 *
35 * File Description: utility functions for alignments
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * $Log: actutils.c,v $
40 * Revision 6.56  2008/10/22 17:19:22  bollin
41 * Bug fix for truncating alignments when removing inconsistent alignments from
42 * a set.
43 *
44 * Revision 6.55  2008/05/31 18:07:15  kans
45 * TextFsa uses Int4 states
46 *
47 * Revision 6.54  2008/05/08 20:10:53  bollin
48 * Use Post instead of Message to communicate error for not finding alignment
49 * on reverse strand.
50 *
51 * Revision 6.53  2008/05/06 19:13:42  bollin
52 * When cleaning up alignments before converting a list to a global, if one
53 * is removed, the next one needs to be compared to the previous alignment,
54 * so the index in the loop needs to be reduced.
55 *
56 * Revision 6.52  2008/05/02 18:47:50  bollin
57 * Commented out diagnostic function
58 *
59 * Revision 6.51  2008/05/02 18:02:39  bollin
60 * Fixed bug in ACT_CleanUpAlignments - if an alignment span is completely
61 * contained in another alignment, remove the contained alignment.
62 * Also added a flag to Sqn_GlobalAlign2SeqEx for whether or not the alignment
63 * should be expanded to the ends of the sequence.
64 *
65 * Revision 6.50  2008/04/16 18:46:22  bollin
66 * Corrected bug in functions for creating global alignment from local alignments.
67 *
68 * Revision 6.49  2006/01/04 18:52:34  bollin
69 * changes to ACT_FindPiece to avoid memory leak
70 * changes to ACT_RemoveInconsistentAlnsFromSet to avoid global alignments that
71 * conflict by transposing elements
72 * changes to GetOldBlastAlignmentPiece to use a gap_open value that will not
73 * cause BLAST setup to fail
74 *
75 * Revision 6.48  2006/01/03 15:47:25  bollin
76 * allow alignment callback for ACT_FindPiece so that it can use the new BLAST
77 * library
78 *
79 * Revision 6.47  2005/12/19 14:32:54  bollin
80 * attempt to resolve conflicts between overlapping local alignments when
81 * constructing a global alignment - this resolved problems encountered when
82 * there are tandem repeats.
83 *
84 * Revision 6.46  2005/09/15 13:54:56  bollin
85 * don't create new entityID if bioseq already has one
86 *
87 * Revision 6.45  2005/04/20 19:17:38  lavr
88 * +<assert.h>
89 *
90 * Revision 6.44  2005/01/10 14:53:54  bollin
91 * the strand argument in SeqLocCopyRegion is confusing - rather than specifying
92 * the strand onto which the features should be placed, Seq_strand_minus
93 * indicates that the features should be reverse-complemented, while
94 * Seq_strand_plus indicates that the features should remain on their original
95 * strand.
96 *
97 * Revision 6.43  2004/11/22 16:45:24  bollin
98 * created global alignment function with callback method to allow use of new
99 * BLAST library
100 *
101 * Revision 6.42  2004/11/12 20:20:05  bollin
102 * removed unused variables
103 *
104 * Revision 6.41  2004/11/12 17:28:53  kans
105 * reverting for now - tools library cannot depend upon new blast libraries - will need to move to better place later
106 *
107 * Revision 6.40  2004/11/12 14:03:16  bollin
108 * use new BLAST code instead of old BLAST code in Sqn_GlobalAlign2Seq
109 *
110 * Revision 6.39  2004/11/02 18:53:39  bollin
111 * made act_get_eval available to other libraries
112 *
113 * Revision 6.38  2004/10/27 18:36:15  bollin
114 * make function external instead of replicating it elsewhere
115 *
116 * Revision 6.37  2004/07/30 13:36:47  bollin
117 * created separate function for reversing features on a sequence, changed code
118 * to swap plus to minus and minus to plus instead of moving all features to
119 * minus strand
120 *
121 * Revision 6.36  2004/02/11 20:51:48  bollin
122 * use error log instead of popup message to indicate that  BLAST found no sequence similarity
123 *
124 * Revision 6.35  2004/01/02 18:11:28  kans
125 * Sqn_GlobalAlign2Seq flips code break, anticodon when reverse complementing sequence
126 *
127 * Revision 6.34  2003/06/30 15:01:29  whlavina
128 * Correct minus strand handling in CreaeContinuousAln functions; previous
129 * code could corrupt alignments (stop2-start1>1 would imply len<-2 if
130 * ExtendAlnRight ever gets called).
131 *
132 * Revision 6.33  2003/05/30 17:25:35  coulouri
133 * add rcsid
134 *
135 * Revision 6.32  2002/12/19 14:02:28  johnson
136 * change c++-style comments to c-style
137 *
138 * Revision 6.31  2002/11/20 16:00:03  johnson
139 * Revamped CpG routines; now equivalent to "strict" human CpG mapviewer track
140 *
141 * Revision 6.30  2002/06/11 15:25:12  johnson
142 * two minor bug fixes to CpG functions
143 *
144 * Revision 6.29  2002/04/02 21:45:32  wheelan
145 * added CpG finding functions written by Philip Johnson
146 *
147 * Revision 6.28  2002/03/27 17:35:24  todorov
148 * 1) AMAlignIndexFreeEitherIndex instead of AMFreeAllIndexes
149 * 2) recreated ACT_MakeProfileFromSA
150 *
151 * Revision 6.27  2002/03/27 14:40:41  kans
152 * #include <alignmgr.h>
153 *
154 * Revision 6.26  2002/03/26 19:56:21  todorov
155 * alignmgr to alignmgr2 transition
156 *
157 * Revision 6.25  2001/12/28 21:22:32  wheelan
158 * bug fix in Sqn_GlobalAlign2Seqs
159 *
160 * Revision 6.24  2001/09/04 13:47:15  wheelan
161 * made several functions extern
162 *
163 * Revision 6.23  2001/07/13 14:17:57  wheelan
164 * moved Sqn_GlobalAlign2Seq and associated functions here
165 *
166 * Revision 6.22  2001/05/22 12:01:07  wheelan
167 * changes to avoid overflow on alpha platform
168 *
169 * Revision 6.21  2001/03/26 16:45:57  wheelan
170 * fixed uninitialized variables
171 *
172 * Revision 6.20  2001/01/09 23:18:55  lewisg
173 * fix memory leaks
174 *
175 * Revision 6.19  2000/10/23 18:43:30  wheelan
176 * minor bug fix
177 *
178 * Revision 6.18  2000/09/07 04:53:40  sicotte
179 * fix alignment calls, bad matrix calls, and misc alignments problems for sequence update
180 *
181 * Revision 6.16  2000/08/28 16:19:11  sicotte
182 * moved AlnMgrSeqAlignMergeTwoPairwiseEx AlnMgrSeqAlignMergeTwoPairwise AlnMgrSeqAlignMergePairwiseSet to actutils.c from alignmgr.c
183 *
184 * Revision 6.15  2000/07/21 21:41:04  sicotte
185 * fix bug in AlnMgrForcePairwiseContinuousEx, that was inserting two copies of all the seqaligns when trying to realign both ends
186 *
187 * Revision 6.14  2000/05/05 11:53:10  wheelan
188 * bug fixes in ACT_MakeProfileFromSA
189 *
190 * Revision 6.13  2000/05/04 16:45:19  wheelan
191 * changes to profile builder to accomodate IBMed BLAST results
192 *
193 * Revision 6.12  2000/05/03 19:59:42  wheelan
194 * added fix for NULL alignments
195 *
196 * Revision 6.11  2000/05/02 12:00:21  wheelan
197 * fixed memory leaks
198 *
199 * Revision 6.10  2000/05/01 19:54:26  wheelan
200 * fixed memory leak
201 *
202 * Revision 6.9  2000/04/22 15:54:57  wheelan
203 * bug fixes in profile maker
204 *
205 * Revision 6.8  2000/04/18 13:57:14  wheelan
206 * added AlnMgrForcePairwiseContinuousEx
207 *
208 * Revision 6.7  2000/04/11 14:50:28  wheelan
209 * bug fix in AlnMgrForceContinuous
210 *
211 * Revision 6.6  2000/03/16 12:53:45  wheelan
212 * bug fix in AlnMgrForceContinuous
213 *
214 * Revision 6.5  2000/03/14 11:25:47  wheelan
215 * added ACT_ProfileFree functions
216 *
217 * Revision 6.4  2000/03/10 16:57:37  wheelan
218 * fixed AlnMgrForceContinuous
219 *
220 * Revision 6.3  2000/03/07 17:55:02  wheelan
221 * bug fixes in AlnMgrForcePairwiseContinuous
222 *
223 * Revision 6.2  2000/03/02 21:11:06  lewisg
224 * use bandalign for import sequence, make standalone ddv use viewmgr, make dialogs modal, send color update
225 *
226 * Revision 6.1  2000/02/11 17:31:44  kans
227 * initial checkin of functions depending upon blast/bandalign (SW)
228 *
229 * ==========================================================================
230 */
231 
232 #include <actutils.h>
233 #include <assert.h>
234 #include <viewmgr.h>
235 #include <alignmgr.h>
236 #include <salptool.h>
237 
238 static void StateTableSearch (TextFsaPtr tbl, CharPtr txt, Int4Ptr state, Int4 pos, ACT_sitelistPtr PNTR asp_prev, ACT_sitelistPtr PNTR asp_head);
239 static Boolean am_isa_gap(Int4 start, Int4 prevstop, Uint1 strand);
240 static void am_fix_strand(SeqAlignPtr sap, Uint1 strand1, Uint1 strand2);
241 
242 #define CG_MINLEN 500 /*minimum length for CpG island; should be equivalent to mapviewer track setting! */
243 
244 /*-----------------------------------------------------------------------------
245   PRE : locked nucleotide bioseq ptr
246   POST: entire sequence loaded into memory in iupacna format
247 -----------------------------------------------------------------------------*/
LoadSequence(BioseqPtr bsp)248 static CharPtr LoadSequence(BioseqPtr bsp)
249 {
250     SeqPortPtr spp;
251     CharPtr buf;
252     Int4 readCount;
253 
254     buf = (CharPtr) MemNew(bsp->length * sizeof(Char));
255     if (!buf) {
256         ErrPostStr(SEV_ERROR, 0,0, "Error allocating memory for sequence!");
257         return NULL;
258     }
259 
260     spp = SeqPortNew(bsp, 0, bsp->length-1, Seq_strand_unknown,
261                      Seq_code_iupacna);
262 
263     readCount = 0;
264     while (readCount < bsp->length) {
265         readCount += SeqPortRead(spp, (UcharPtr) buf+readCount, 25000);
266         assert(readCount != 0);
267     }
268     SeqPortFree(spp);
269 
270     return buf;
271 }
272 
273 /*-----------------------------------------------------------------------------
274   PRE : sequence, position to be added, cgIsland information
275   POST: cgIsle adjusted according to the nucleotide at position i & i-1
276 -----------------------------------------------------------------------------*/
AddPosition(CharPtr seq,Int4 i,ACT_CGInfoPtr cgIsle)277 static void AddPosition(CharPtr seq, Int4 i, ACT_CGInfoPtr cgIsle)
278 {
279     switch(seq[i]) {
280     case 'A': cgIsle->a++; break;
281     case 'C': cgIsle->c++; break;
282     case 'G': cgIsle->g++;
283         if (i > 0 && seq[i-1] == 'C')
284             cgIsle->cg++;
285         break;
286     case 'T': cgIsle->t++; break;
287     case 'N': cgIsle->n++; break;
288     }
289 }
290 
291 /*-----------------------------------------------------------------------------
292   PRE : sequence, position to be removed, cgIsland information
293   POST: cgIsle adjusted according to the nucleotide at position i & i-1
294 -----------------------------------------------------------------------------*/
RemovePosition(CharPtr seq,Int4 i,ACT_CGInfoPtr cgIsle)295 static void RemovePosition(CharPtr seq, Int4 i, ACT_CGInfoPtr cgIsle)
296 {
297     switch(seq[i]) {
298     case 'A': cgIsle->a--; break;
299     case 'C': cgIsle->c--; break;
300     case 'G': cgIsle->g--;
301         if (i > 0 && seq[i-1] == 'C')
302             cgIsle->cg--;
303         break;
304     case 'T': cgIsle->t--; break;
305     case 'N': cgIsle->n--; break;
306     }
307 }
308 
309 /*-----------------------------------------------------------------------------
310   PRE : sequence, length of sequence, (ptr to) cgIsle structure with to &
311   from fields filled in
312   POST: cgInfo filled with stats (#a's, c's, etc.) for window (to ->|
313   from)
314 -----------------------------------------------------------------------------*/
CalcWindowStats(CharPtr seq,ACT_CGInfoPtr cgIsle)315 static void CalcWindowStats(CharPtr seq, ACT_CGInfoPtr cgIsle)
316 {
317     Int4 i;
318 
319     cgIsle->a = cgIsle->t = cgIsle->g = cgIsle->c = cgIsle->cg = 0;
320     cgIsle->length = cgIsle->to - cgIsle->from + 1;
321 
322     for (i = cgIsle->from; i <= cgIsle->to; i++)
323         AddPosition(seq, i, cgIsle);
324 }
325 
326 /*-----------------------------------------------------------------------------
327   PRE : island structure filled
328   POST: whether or not we consider this to be a CpG island
329 -----------------------------------------------------------------------------*/
IsIsland(ACT_CGInfoPtr isle)330 static Boolean IsIsland(ACT_CGInfoPtr isle)
331 {
332     Uint4 len = isle->to-isle->from+1;
333 
334     return (100*(isle->c + isle->g) > 50*len &&
335             100* isle->cg*len > 60* isle->c*isle->g);
336 }
337 
338 
339 /*-----------------------------------------------------------------------------
340   PRE : sequence, length of sequence, win->from field filled in
341   POST: whether or not we found a window further down the sequence that
342   meets the mimimum criteria; if so, 'win' is set to that window
343 -----------------------------------------------------------------------------*/
SlideToHit(CharPtr seq,Int4 seqLength,ACT_CGInfoPtr win)344 static Boolean SlideToHit(CharPtr seq, Int4 seqLength, ACT_CGInfoPtr win)
345 {
346     Boolean inIsland, done;
347 
348     win->to = win->from + CG_WINDOWSIZE - 1;
349 
350     if (win->to >= seqLength)
351         return FALSE;
352 
353     CalcWindowStats(seq, win);
354 
355     inIsland = FALSE;
356     done = FALSE;
357 
358     while (win->to < seqLength && !IsIsland(win)) {
359         /* remove 1 nt from left side */
360         RemovePosition(seq, win->from, win);
361 
362         /* advance */
363         ++win->from;
364         ++win->to;
365 
366         if (win->to < seqLength) {
367             /* add 1 nt onto right side */
368             AddPosition(seq, win->to, win);
369         }
370     }
371 
372     return IsIsland(win);
373 }
374 
375 /*-----------------------------------------------------------------------------
376   PRE : sequence, length of sequence, window that meets the GC & CpG criteria
377   POST: whether or not the island can be extended to reach at least the
378   minimum length; if so, isle is set to that window
379 -----------------------------------------------------------------------------*/
ExtendHit(CharPtr seq,Int4 seqLength,ACT_CGInfoPtr isle)380 static Boolean ExtendHit(CharPtr seq, Int4 seqLength, ACT_CGInfoPtr isle)
381 {
382     ACT_CGInfoPtr win = (ACT_CGInfoPtr) MemNew(sizeof(ACT_CGInfo));
383     memcpy(win, isle, sizeof(ACT_CGInfo));
384 
385     /* jump by 200bp increments */
386     while (win->to + CG_WINDOWSIZE < seqLength && IsIsland(win)) {
387         win->from += CG_WINDOWSIZE;
388         win->to += CG_WINDOWSIZE;
389         CalcWindowStats(seq, win);
390     }
391 
392     /* if we overshot, slide back by 1bp increments */
393     while (!IsIsland(win)) {
394         RemovePosition(seq, win->to, win);
395         --win->from;
396         --win->to;
397         AddPosition(seq, win->from, win);
398     }
399 
400     /* trim ends of entire island until we're above criteria again */
401     isle->to = win->to;
402     CalcWindowStats(seq, isle);
403     while(!IsIsland(isle) && (isle->from < isle->to)) {
404         RemovePosition(seq, isle->to, isle);
405         RemovePosition(seq, isle->from, isle);
406         --isle->to;
407         ++isle->from;
408     }
409 
410     free(win);
411 
412     if (isle->from >= isle->to) {/* in case we trimmed to nothing */
413         isle->to = isle->from;
414         return FALSE;
415     }
416     return (isle->to - isle->from + 1 > CG_MINLEN);
417 }
418 
419 /*-----------------------------------------------------------------------------
420   PRE : locked nucleotide bioseq ptr
421   POST: linked list of CpG islands (if any).  Uses algorithm from Takai and
422   Jones, Proc Natl Acad Sci U S A 2002 Mar 19;99(6):3740-5.
423   Cutoffs used in the "strict" human CpG map viewer track as of 11/20/02:
424   CG_WINDOWSIZE=200, CG_MINLEN=500, CpG obs/exp >= 0.6, G+C >= 0.5
425 -----------------------------------------------------------------------------*/
ACT_FindCpG(BioseqPtr bsp)426 NLM_EXTERN ACT_CGInfoPtr ACT_FindCpG(BioseqPtr bsp)
427 {
428     CharPtr seq;
429     ACT_CGInfoPtr cgHead, isle;
430 
431     if (bsp == NULL || bsp->length < CG_WINDOWSIZE)
432         return NULL;
433     if (bsp->mol == Seq_mol_aa) {
434         Message(SEV_WARNING, "Must use nucleotide sequence\n");
435         return NULL;
436     }
437 
438     if (!(seq = LoadSequence(bsp)))
439         return NULL;/* error message displayed in LoadSequence */
440 
441 
442     cgHead = NULL;
443     isle = (ACT_CGInfoPtr) MemNew(sizeof(ACT_CGInfo));
444 
445     isle->from = 0;
446 
447     while (SlideToHit(seq, bsp->length, isle)) {
448         if (ExtendHit(seq, bsp->length, isle)) {
449             isle->next = cgHead;
450             cgHead = isle;
451             isle = (ACT_CGInfoPtr) MemNew(sizeof(ACT_CGInfo));
452             isle->to = cgHead->to;
453         }
454         isle->from = isle->to + 1;
455     }
456 
457     free(isle);
458     free(seq);
459     return cgHead;
460 }
461 
462 /*NLM_EXTERN ACT_CGInfoPtr ACT_FindCpG(BioseqPtr bsp)
463 {
464    ACT_CGInfoPtr    acg;
465    ACT_CGInfoPtr    acg_head;
466    ACT_CGInfoPtr    acg_prev;
467    ACT_sitelistPtr  asp;
468    ACT_sitelistPtr  asp_head;
469    ACT_sitelistPtr  asp_prev;
470    ACT_sitelistPtr  asp_tmp;
471    Uint1Ptr         buf;
472    CharPtr          c;
473    Int2             j;
474    Int4             max_len;
475    Int4             offset;
476    Int4             pos;
477    Uint1            prev;
478    Uint1            r;
479    Uint1            res1;
480    Uint1            res2;
481    Uint1            residue;
482    Int4             start;
483    Int4             state;
484    Int4             state_r;
485    Int4             state_test;
486    TextFsaPtr       tbl;
487    Int4             x;
488    FloatHi          y;
489 
490    if (bsp == NULL)
491       return NULL;
492    if (bsp->mol == Seq_mol_aa)
493    {
494       Message(SEV_WARNING, "Must use nucleotide sequence\n");
495       return NULL;
496    }
497    if (bsp->length < MAX_LEN)
498       max_len = bsp->length+1;
499    else
500       max_len = MAX_LEN;
501    buf = (Uint1Ptr)MemNew((max_len)*sizeof(Uint1));
502    state = 0;
503    prev = 0;
504    pos = 0;
505    state_r = 0;
506    asp_prev = asp_head = NULL;
507    tbl = TextFsaNew();
508    if (tbl == NULL)
509       return NULL;
510    TextFsaAdd(tbl, "CCCGGG");
511    TextFsaAdd(tbl, "CCGCGG");
512    state_test = 0;
513    acg = (ACT_CGInfoPtr)MemNew(sizeof(ACT_CGInfo));
514    acg_head = NULL;
515    offset = 0;
516    while ((residue = ACT_GetResidue(pos, buf, &offset, bsp)) != 0)
517    {
518       if (residue == 65)
519          c = "A";
520       else if (residue == 67)
521          c = "C";
522       else if (residue == 71)
523          c = "G";
524       else if (residue == 84)
525          c = "T";
526       else
527          c = "N";
528       StateTableSearch(tbl, c, &state_test, pos, &asp_prev, &asp_head);
529       acg->length++;
530       acg->to = pos;
531       pos++;
532       if (residue == 65)
533          acg->a++;
534       else if (residue == 67)
535          acg->c++;
536       else if (residue == 71)
537       {
538          if (prev == 67)
539             acg->cg++;
540          acg->g++;
541       } else if (residue == 84)
542          acg->t++;
543       else
544          acg->n++;
545       prev = residue;
546       if (acg->length >= 200)
547       {
548          if (state == 0)
549          {
550             if (100*(acg->cg)*(acg->length) > 60*(acg->c)*(acg->g) && 10*(acg->c+acg->g) > 6*(acg->length))
551             {
552                state = 1;
553             } else
554             {
555                res1 = ACT_GetResidue(acg->from, buf, &offset, bsp);
556                if (res1 == 67)
557                {
558                   res2 = ACT_GetResidue(acg->from+1, buf, &offset, bsp);
559                   if (res2 == 71)
560                      acg->cg--;
561                }
562                if (res1 == 65)
563                   acg->a--;
564                else if (res1 == 67)
565                   acg->c--;
566                else if (res1 == 71)
567                   acg->g--;
568                else if (res1 == 84)
569                   acg->t--;
570                else
571                   acg->n--;
572                acg->from++;
573                acg->length--;
574             }
575          } else if (state == 1)
576          {
577             if (100*(acg->cg)*(acg->length) <= 60*(acg->c)*(acg->g) || 10*(acg->c+acg->g) < 6*(acg->length))
578             {
579                state = 0;
580                if (acg_head)
581                {
582                   acg_prev->next = acg;
583                   acg_prev = acg;
584                } else
585                   acg_head = acg_prev = acg;
586                j=0;
587                if (acg->from - 2000 < 0)
588                   start = 0;
589                else
590                   start = acg->from - 2000;
591                r = 1;
592                acg->sequence = (CharPtr)MemNew(20000*sizeof(Char));
593                for (x=start; x<(acg->to+2000) && r > 0; x++, j++)
594                {
595                   r = ACT_GetResidue(x, buf, &offset, bsp);
596                   if (r == 65)
597                      acg->sequence[j] = 'A';
598                   else if (r == 67)
599                      acg->sequence[j] = 'C';
600                   else if (r == 71)
601                      acg->sequence[j] = 'G';
602                   else if (r == 84)
603                      acg->sequence[j] = 'T';
604                   else
605                      acg->sequence[j] = 'N';
606                }
607                y = (FloatHi)((acg->cg)*(acg->length))/(FloatHi)((acg->c)*(acg->g));
608                printf("Coordinates: %d to %d CpG: %d to %d conf: %f\n%s\n", start, x, acg->from, acg->to, y, acg->sequence);
609                MemFree(acg->sequence);
610                acg = (ACT_CGInfoPtr)MemNew(sizeof(ACT_CGInfo));
611                acg->from = pos;
612                acg->length = 1;
613                if (residue == 65)
614                   acg->a++;
615                else if (residue == 67)
616                   acg->c++;
617                else if (residue == 71)
618                   acg->g++;
619                else if (residue == 84)
620                   acg->t++;
621                else
622                   acg->n++;
623             } else if (acg->length > 1000)
624             {
625                res1 = ACT_GetResidue(acg->from, buf, &offset, bsp);
626                if (res1 == 65 || res1 == 84)
627                {
628                   acg->from++;
629                   acg->length--;
630                   if (res1 == 65)
631                      acg->a--;
632                   else if (res1 == 84)
633                      acg->t--;
634                }
635             }
636          }
637       }
638    }
639     check for restriction sites in potential islands found
640    acg = acg_head;
641    return acg_head;
642    acg_prev = NULL;  Statement not reached...
643    while (acg)
644    {
645       asp_prev = NULL;
646       asp = asp_head;
647       while (asp)
648       {
649          if (asp->start >= acg->from && asp->start < acg->to - 9)
650          {
651             if (asp_prev)
652             {
653                asp_prev->next = asp->next;
654                asp_tmp = asp->next;
655             } else
656                asp_head = asp_tmp = asp->next;
657             asp->next = acg->asp;
658             acg->asp = asp;
659             asp = asp_tmp;
660          } else
661          {
662             asp_prev = asp;
663             asp = asp->next;
664          }
665       }
666       if (acg->asp == NULL)
667       {
668          if (acg_prev != NULL)
669          {
670             acg_prev->next = acg->next;
671             acg->next = NULL;
672             MemFree(acg);
673             acg = acg_prev->next;
674          } else
675          {
676             acg_head = acg->next;
677             acg->next = NULL;
678             MemFree(acg);
679             acg = acg_head;
680          }
681       } else
682       {
683          if (acg->asp->next == NULL)
684          {
685             if (acg_prev != NULL)
686             {
687                acg_prev->next = acg->next;
688                acg->next = NULL;
689                MemFree(acg);
690                acg = acg_prev->next;
691             } else
692             {
693                acg_head = acg->next;
694                acg->next = NULL;
695                MemFree(acg);
696                acg = acg_head;
697             }
698          } else
699          {
700             acg_prev = acg;
701             acg = acg->next;
702          }
703       }
704    }
705    MemFree(buf);
706    return acg_head;
707 }*/
708 
ACT_GetResidue(Int4 pos,Uint1Ptr buf,Int4Ptr offset,BioseqPtr bsp)709 NLM_EXTERN Uint1 ACT_GetResidue(Int4 pos, Uint1Ptr buf, Int4Ptr offset, BioseqPtr bsp)
710 {
711    Int4        bufsize;
712    SeqPortPtr  spp;
713 
714    if (offset == NULL)
715       return 0;
716    if (pos > bsp->length - 1)
717       return 0;
718    if (buf[0] == 0 || pos > (*offset + MAX_LEN - 1) || pos < *offset)
719    {
720       if (pos > *offset + MAX_LEN - 1)
721          *offset = *offset + MAX_LEN;
722       else if (pos < *offset)
723          *offset = pos;
724       else
725          *offset = 0;
726       if (bsp->length < MAX_LEN)
727       {
728          bufsize = bsp->length;
729          spp = SeqPortNew(bsp, 0, -1, 0, Seq_code_iupacna);
730       } else if (bsp->length < *offset + MAX_LEN-1)
731       {
732          bufsize = bsp->length - *offset + 1;
733          spp = SeqPortNew(bsp, *offset, bsp->length-1, 0, Seq_code_iupacna);
734       } else
735       {
736          bufsize = MAX_LEN;
737          spp = SeqPortNew(bsp, *offset, *offset+MAX_LEN-1, 0, Seq_code_iupacna);
738       }
739       if (spp == NULL)
740       {
741          Message(SEV_WARNING, "Couldn't create SeqPort\n");
742          return 0;
743       }
744       SeqPortRead(spp, buf, bufsize);
745    }
746    return (buf[pos-(*offset)]);
747 }
748 
StateTableSearch(TextFsaPtr tbl,CharPtr txt,Int4Ptr state,Int4 pos,ACT_sitelistPtr PNTR asp_prev,ACT_sitelistPtr PNTR asp_head)749 static void StateTableSearch (TextFsaPtr tbl, CharPtr txt, Int4Ptr state, Int4 pos, ACT_sitelistPtr PNTR asp_prev, ACT_sitelistPtr PNTR asp_head)
750 {
751    ACT_sitelistPtr  asp;
752    Char             ch;
753    ValNodePtr       matches;
754    CharPtr          ptr;
755    ValNodePtr       vnp;
756 
757    if (tbl == NULL || txt == NULL) return;
758 
759    for (ptr = txt, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) {
760       *state = TextFsaNext (tbl, *state, ch, &matches);
761       for (vnp = matches; vnp != NULL; vnp = vnp->next) {
762          asp = (ACT_sitelistPtr)MemNew(sizeof(ACT_sitelist));
763          if (asp_prev)
764          {
765             if (*asp_prev)
766             {
767                (*asp_prev)->next = asp;
768                *asp_prev = asp;
769             } else
770                *asp_prev = *asp_head = asp;
771             asp->name = (CharPtr) vnp->data.ptrvalue;
772             asp->start = pos;
773          }
774       }
775    }
776 }
777 
ACT_ProfileNew(Boolean nuc)778 NLM_EXTERN ACTProfilePtr ACT_ProfileNew(Boolean nuc)
779 {
780    ACTProfilePtr  app;
781    FloatHiPtr     PNTR freq;
782 
783    app = (ACTProfilePtr)MemNew(sizeof(ACTProfile));
784    if (nuc)
785    {
786       freq = (FloatHiPtr PNTR)MemNew(ACT_NUCLEN*sizeof(FloatHiPtr));
787       app->freq = freq;
788       app->nuc = TRUE;
789    } else
790    {
791       freq = (FloatHiPtr PNTR)MemNew(ACT_PROTLEN*sizeof(FloatHiPtr));
792       app->freq = freq;
793       app->nuc = FALSE;
794    }
795    return app;
796 }
797 
798 /***************************************************************************
799 *
800 *  ACT_ProfileFree frees a single profile; ACT_ProfileSetFree frees an
801 *  entire linked list of profiles.
802 *
803 ***************************************************************************/
ACT_ProfileFree(ACTProfilePtr app)804 NLM_EXTERN ACTProfilePtr ACT_ProfileFree(ACTProfilePtr app)
805 {
806    Int4  i;
807    Int4  j;
808 
809    if (app == NULL)
810       return NULL;
811    if (app->nuc)
812       j = ACT_NUCLEN;
813    else
814       j = ACT_PROTLEN;
815    for (i=0; i<j; i++)
816    {
817       MemFree(app->freq[i]);
818    }
819    MemFree(app->freq);
820    app->next = NULL;
821    MemFree(app);
822    return NULL;
823 }
824 
ACT_ProfileSetFree(ACTProfilePtr app)825 NLM_EXTERN ACTProfilePtr ACT_ProfileSetFree(ACTProfilePtr app)
826 {
827    ACTProfilePtr  app_next;
828 
829    while (app != NULL)
830    {
831       app_next = app->next;
832       app->next = NULL;
833       ACT_ProfileFree(app);
834       app = app_next;
835    }
836    return NULL;
837 }
838 
ACT_BuildProfile(SeqLocPtr slp,ACTProfilePtr PNTR app,Int4Ptr count,Int4 length)839 NLM_EXTERN void ACT_BuildProfile(SeqLocPtr slp, ACTProfilePtr PNTR app, Int4Ptr count, Int4 length)
840 {
841    Int4        i;
842    Int4        len;
843    Uint1       res;
844    SeqPortPtr  spp;
845 
846    if (app == NULL)
847       return;
848    if (slp == NULL)
849    {
850       *count = *count+length;
851       if ((*app)->len <= *count)
852       {
853          *count = 0;
854          *app = (*app)->next;
855       }
856       return;
857    }
858    len = SeqLocLen(slp);
859    if (len <= 0)
860       return;
861    if ((*app)->len == 0)
862    {
863       (*app)->len = len;
864       if ((*app)->nuc)
865       {
866          for (i=0; i<ACT_NUCLEN; i++)
867          {
868             (*app)->freq[i] = (FloatHiPtr)MemNew((*app)->len*sizeof(FloatHi));
869          }
870       } else
871       {
872          for (i=0; i<ACT_PROTLEN; i++)
873          {
874             (*app)->freq[i] = (FloatHiPtr)MemNew((*app)->len*sizeof(FloatHi));
875          }
876       }
877    } else
878    {
879       if (len > (*app)->len) /* seqloc is longer than the */
880          return;          /* existing profile -- don't add it     */
881    }
882    if ((*app)->nuc)
883       spp = SeqPortNewByLoc(slp, Seq_code_ncbi4na);
884    else
885       spp = SeqPortNewByLoc(slp, Seq_code_ncbistdaa);
886    if (spp == NULL)
887       return;
888    if (*count == 0)
889      (*app)->numseq++;
890    i=0;
891    if ((*app)->nuc == FALSE)
892    {
893       while ((res = SeqPortGetResidue(spp)) != SEQPORT_EOF && i+*count<((*app)->len))
894       {
895          (*app)->freq[res][i+*count]++;
896          i++;
897       }
898    } else
899    {
900       while ((res = SeqPortGetResidue(spp)) != SEQPORT_EOF && i+*count<((*app)->len))
901       {
902          if (res == 1)
903          {
904             (*app)->freq[0][i+*count]++;
905          } else if (res == 2)
906          {
907             (*app)->freq[1][i+*count]++;
908          } else if (res == 4)
909          {
910             (*app)->freq[2][i+*count]++;
911          } else if (res == 8)
912          {
913             (*app)->freq[3][i+*count]++;
914          } else
915          {
916             (*app)->freq[4][i+*count]++;
917          }
918          i++;
919       }
920    }
921    SeqPortFree(spp);
922    if (len+*count == (*app)->len)
923    {
924       *app = (*app)->next;
925       *count = 0;
926    } else
927       *count = *count + len;
928    return;
929 }
930 
ACT_ScoreProfile(BioseqPtr bsp,Int4 pos,Uint1 strand,ACTProfilePtr app)931 NLM_EXTERN FloatHi ACT_ScoreProfile(BioseqPtr bsp, Int4 pos, Uint1 strand, ACTProfilePtr app)
932 {
933    Int4        i;
934    Uint1       res;
935    FloatHi     retval;
936    SeqPortPtr  spp;
937 
938    retval = 0;
939    if (bsp == NULL || app == NULL || pos < 0)
940       return retval;
941    if (pos + app->len-1 >= bsp->length)
942       return retval;
943    if (ISA_na(bsp->mol))
944    {
945       spp = SeqPortNew(bsp, pos, pos+app->len-1, strand, Seq_code_ncbi4na);
946       if (spp == NULL)
947          return retval;
948       i = 0;
949       while ((res = SeqPortGetResidue(spp)) != SEQPORT_EOF && i<app->len)
950       {
951          if (res == 1)
952          {
953             retval += app->freq[0][i];
954          } else if (res == 2)
955          {
956             retval += app->freq[1][i];
957          } else if (res == 4)
958          {
959             retval += app->freq[2][i];
960          } else if (res == 8)
961          {
962             retval += app->freq[3][i];
963          } else
964          {
965             retval += app->freq[4][i];
966          }
967          i++;
968       }
969       retval = retval / app->len;
970       return retval;
971    } else
972    {
973       spp = SeqPortNew(bsp, pos, pos+app->len-1, strand, Seq_code_ncbistdaa);
974       if (spp == NULL)
975          return retval;
976       i = 0;
977       while ((res = SeqPortGetResidue(spp)) != SEQPORT_EOF && i<app->len)
978       {
979          retval += app->freq[res][i];
980          i++;
981       }
982       retval = retval / app->len;
983       return retval;
984    }
985 }
986 
ACT_EstimateConfidence(ACTProfilePtr app)987 NLM_EXTERN void ACT_EstimateConfidence(ACTProfilePtr app)
988 {
989    FloatHi  conf;
990    Int4     i;
991    Int4     j;
992    Int4     max;
993    Int4     numres;
994 
995    if (app == NULL)
996       return;
997    if (app->nuc)
998       numres = ACT_NUCLEN;
999    else
1000       numres = ACT_PROTLEN;
1001    conf = 1;
1002    while (app)
1003    {
1004       for (i=0; i<app->len; i++)
1005       {
1006          max = 0;
1007          for (j=0; j<numres; j++)
1008          {
1009             if (app->freq[j][i] > max)
1010                max = app->freq[j][i];
1011          }
1012          if (max > 0)
1013             conf = conf*max;
1014          if (conf > INT4_MAX)
1015             conf = INT4_MAX;
1016       }
1017       app->confidence = conf;
1018       app = app->next;
1019    }
1020    return;
1021 }
1022 
ACT_SortProfilesByConfidence(ACTProfilePtr app)1023 NLM_EXTERN ACTProfilePtr ACT_SortProfilesByConfidence(ACTProfilePtr app)
1024 {
1025    ACTProfilePtr  app_head;
1026    ACTProfilePtr  PNTR array;
1027    Int4           count;
1028    Int4           i;
1029 
1030    if (app == NULL)
1031       return NULL;
1032    app_head = app;
1033    count = 0;
1034    while (app != NULL)
1035    {
1036       count++;
1037       app = app->next;
1038    }
1039    array = (ACTProfilePtr PNTR)MemNew(count*sizeof(ACTProfilePtr));
1040    app = app_head;
1041    count = 0;
1042    while (app != NULL)
1043    {
1044       array[count] = app;
1045       count++;
1046       app = app->next;
1047    }
1048    HeapSort((Pointer)array, (size_t)count, sizeof(ACTProfilePtr), ACT_CompareProfileConfidence);
1049    app_head = app = array[0];
1050    for (i=1; i<count; i++)
1051    {
1052       app->next = array[i];
1053       app = app->next;
1054    }
1055    return app_head;
1056 }
1057 
ACT_CompareProfileConfidence(VoidPtr base,VoidPtr large_son)1058 NLM_EXTERN int LIBCALLBACK ACT_CompareProfileConfidence(VoidPtr base, VoidPtr large_son)
1059 {
1060    ACTProfilePtr  app1;
1061    ACTProfilePtr  app2;
1062 
1063    app1 = *((ACTProfilePtr PNTR) base);
1064    app2 = *((ACTProfilePtr PNTR) large_son);
1065    if (app1 == NULL || app2 == NULL)
1066       return 0;
1067    if (app1->confidence > app2->confidence)
1068       return -1;
1069    else if (app1->confidence < app2->confidence)
1070       return 1;
1071    else
1072       return 0;
1073 }
1074 
ACT_MakeProfileFromSA(SeqAlignPtr sap)1075 NLM_EXTERN ACTProfilePtr ACT_MakeProfileFromSA(SeqAlignPtr sap)
1076 {
1077    AMAlignIndex2Ptr  amaip;
1078    AlnMsg2Ptr        amp;
1079    ACTProfilePtr    app;
1080    ACTProfilePtr    app_head;
1081    ACTProfilePtr    app_prev;
1082    BioseqPtr        bsp;
1083    Int4             count;
1084    Int4             i;
1085    Int4             j;
1086    Boolean          more;
1087    Boolean          nuc;
1088    Int4             numrows;
1089    SeqIdPtr         sip;
1090    SeqLocPtr        slp;
1091 
1092    if (sap == NULL)
1093       return NULL;
1094    if (sap->saip == NULL)
1095       return NULL;
1096    if (sap->saip->indextype == INDEX_PARENT)
1097    {
1098       amaip = (AMAlignIndex2Ptr)(sap->saip);
1099       if (amaip->alnstyle == AM2_LITE)
1100          return NULL;
1101    }
1102    sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
1103    bsp = BioseqLockById(sip);
1104    if (bsp == NULL)
1105       return NULL;
1106    if (ISA_na(bsp->mol))
1107       nuc = TRUE;
1108    else
1109       nuc = FALSE;
1110    BioseqUnlockById(sip);
1111    sip = SeqIdFree(sip);
1112    amp = AlnMsgNew2();
1113    amp->to_aln = -1;
1114    amp->row_num = 1;
1115    app_head = NULL;
1116 /*
1117    if (sap->saip->indextype == INDEX_PARENT)
1118    {
1119       for (i=0; i<amaip->numseg; i++)
1120       {
1121          app = ACT_ProfileNew(nuc);
1122          app->len = amaip->lens[i];
1123          if (nuc)
1124          {
1125             for (j=0; j<ACT_NUCLEN; j++)
1126             {
1127                app->freq[j] = (FloatHiPtr)MemNew(app->len*sizeof(FloatHi));
1128             }
1129          } else
1130          {
1131             for (j=0; j<ACT_PROTLEN; j++)
1132             {
1133                app->freq[j] = (FloatHiPtr)MemNew(app->len*sizeof(FloatHi));
1134             }
1135          }
1136          if (app_head != NULL)
1137          {
1138             app_prev->next = app;
1139             app_prev = app;
1140          } else
1141             app_head = app_prev = app;
1142       }
1143    } else
1144    {
1145 */
1146       while ((Boolean) (more = AlnMgr2GetNextAlnBit(sap, amp)))
1147       {
1148          app = ACT_ProfileNew(nuc);
1149          app->len = amp->to_row - amp->from_row + 1;;
1150          if (nuc)
1151          {
1152             for (j=0; j<ACT_NUCLEN; j++)
1153             {
1154                app->freq[j] = (FloatHiPtr)MemNew(app->len*sizeof(FloatHi));
1155             }
1156          } else
1157          {
1158             for (j=0; j<ACT_PROTLEN; j++)
1159             {
1160                app->freq[j] = (FloatHiPtr)MemNew(app->len*sizeof(FloatHi));
1161             }
1162          }
1163          if (app_head != NULL)
1164          {
1165             app_prev->next = app;
1166             app_prev = app;
1167          } else
1168             app_head = app_prev = app;
1169       }
1170 /*
1171    }
1172 */
1173    numrows = AlnMgr2GetNumRows(sap);
1174    for (i=1; i<=numrows; i++)
1175    {
1176       AlnMsgReNew2(amp);
1177       amp->to_aln = -1;
1178       amp->row_num = i;
1179       app = app_head;
1180 
1181       sip = AlnMgr2GetNthSeqIdPtr(sap, i);
1182       bsp = BioseqLockById(sip);
1183       count = 0;
1184       while ((Boolean) (more = AlnMgr2GetNextAlnBit(sap, amp)) && app != NULL)
1185       {
1186          if (amp->type == AM_SEQ && bsp != NULL)
1187          {
1188             slp = SeqLocIntNew(amp->from_row, amp->to_row, amp->strand, sip);
1189             ACT_BuildProfile(slp, &app, &count, 0);
1190             SeqLocFree(slp);
1191          } else if (amp->type == AM_GAP)
1192             ACT_BuildProfile(NULL, &app, &count, (amp->to_row - amp->from_row + 1));
1193       }
1194       BioseqUnlockById(sip);
1195       sip = SeqIdFree(sip);
1196    }
1197    ACT_EstimateConfidence(app_head);
1198    AlnMsgFree2(amp);
1199    return app_head;
1200 }
1201 
ACT_SortAndTruncate(ACT_TopScorePtr PNTR ats)1202 NLM_EXTERN ACT_TopScorePtr PNTR ACT_SortAndTruncate(ACT_TopScorePtr PNTR ats)
1203 {
1204    return NULL;
1205 }
1206 
ACT_PlaceByScore(ACT_PlaceBoundsPtr abp)1207 NLM_EXTERN ACT_PositionPtr ACT_PlaceByScore(ACT_PlaceBoundsPtr abp)
1208 {
1209    Int4             i;
1210    FloatHi          score;
1211 
1212    score = ACT_CalcScore(abp);
1213    if (score > abp->apos->score)
1214    {
1215       abp->apos->score = score;
1216       for (i=0; i<abp->nprof; i++)
1217       {
1218          abp->apos->posarray[i] = abp->boundarray[i];
1219       }
1220    }
1221    while (abp->currpos[abp->currprof] < abp->numats[abp->currprof] - 1)
1222    {
1223       abp->currpos[abp->currprof]++;
1224       abp->currats[abp->currprof] = abp->currats[abp->currprof]->next;
1225       abp->boundarray[abp->currprof] = abp->currats[abp->currprof]->pos;
1226       score = ACT_CalcScore(abp);
1227       if (score > abp->apos->score)
1228       {
1229          abp->apos->score = score;
1230          for (i=0; i<abp->nprof; i++)
1231          {
1232             abp->apos->posarray[i] = abp->boundarray[i];
1233          }
1234       }
1235    }
1236    while(abp->currpos[abp->currprof] >= abp->numats[abp->currprof]-1 && abp->currprof >= 0)
1237    {
1238       abp->currprof--;
1239    }
1240    if (abp->currprof < 0)
1241       return (abp->apos);
1242    for (i=abp->currprof+1; i<abp->nprof; i++)
1243    {
1244       abp->currpos[i] = 0;
1245       abp->currats[i] = abp->ats[i];
1246       abp->boundarray[i] = abp->currats[i]->pos;
1247       while (abp->boundarray[i] <= abp->boundarray[i-1])
1248       {
1249          if (abp->currpos[abp->currprof] >= abp->numats[abp->currprof]-1)
1250             return (abp->apos);
1251          abp->currpos[i]+=1;
1252          abp->currats[i] = abp->currats[i]->next;
1253          abp->boundarray[i] = abp->currats[i]->pos;
1254       }
1255    }
1256    abp->currpos[abp->currprof]++;
1257    abp->currats[abp->currprof] = abp->currats[abp->currprof]->next;
1258    abp->boundarray[abp->currprof] = abp->currats[abp->currprof]->pos;
1259    abp->currprof = abp->nprof-1;
1260    abp->apos = ACT_PlaceByScore(abp);
1261    return (abp->apos);
1262 }
1263 
ACT_CalcScore(ACT_PlaceBoundsPtr abp)1264 NLM_EXTERN FloatHi ACT_CalcScore(ACT_PlaceBoundsPtr abp)
1265 {
1266    ACTProfilePtr    app;
1267    ACT_TopScorePtr  ats;
1268    Int4             i;
1269    Int4             j;
1270    FloatHi          score;
1271 
1272    app = abp->app;
1273    score = 0;
1274    for (i=1; i<abp->nprof; i++)
1275    {
1276       if (app == NULL)
1277          return 0;
1278       if (abp->boundarray[i-1] + app->len >= abp->boundarray[i])
1279          return 0;
1280       j=0;
1281       ats = abp->ats[i-1];
1282       while (j<abp->currpos[i-1])
1283       {
1284          if (ats == NULL)
1285             return 0;
1286          ats = ats->next;
1287       }
1288       score += app->confidence*ats->score;
1289       app = app->next;
1290    }
1291    return score;
1292 }
1293 
ACT_FindPeakScores(FloatHiPtr scorearray,Int4 len)1294 NLM_EXTERN ACT_TopScorePtr ACT_FindPeakScores(FloatHiPtr scorearray, Int4 len)
1295 {
1296    ACT_TopScorePtr  ats;
1297    ACT_TopScorePtr  ats_head;
1298    ACT_TopScorePtr  ats_new;
1299    ACT_TopScorePtr  ats_newhead;
1300    ACT_TopScorePtr  ats_newprev;
1301    ACT_TopScorePtr  ats_prev;
1302    FloatHi          diff;
1303    FloatHi          diff_prev;
1304    Int4             i;
1305 
1306    if (scorearray == NULL)
1307       return NULL;
1308    diff = diff_prev = 0;
1309    diff_prev = scorearray[1] - scorearray[0];
1310    ats_head = NULL;
1311    for (i=1; i<len-1; i++)
1312    {
1313       diff = scorearray[i+1]-scorearray[i];
1314       if (diff < 0 && diff_prev >= 0) /* peak */
1315       {
1316          ats = (ACT_TopScorePtr)MemNew(sizeof(ACT_TopScore));
1317          ats->score = scorearray[i-1];
1318          ats->pos = i-1;
1319          if (ats_head != NULL)
1320          {
1321             ats_prev->next = ats;
1322             ats_prev = ats;
1323          } else
1324             ats_head = ats_prev = ats;
1325       }
1326       diff_prev = diff;
1327    }
1328    ats = ats_prev = ats_head;
1329    ats = ats->next;
1330    ats_newhead = NULL;
1331    diff_prev = 0;
1332    while (ats)
1333    {
1334       diff = ats->score - ats_prev->score;
1335       if (diff < 0 && diff_prev >= 0)
1336       {
1337          ats_new = (ACT_TopScorePtr)MemNew(sizeof(ACT_TopScore));
1338          ats_new->score = ats_prev->score;
1339          ats_new->pos = ats_prev->pos;
1340          if (ats_newhead != NULL)
1341          {
1342             ats_newprev->next = ats_new;
1343             ats_newprev = ats_new;
1344          } else
1345             ats_newhead = ats_newprev = ats_new;
1346       }
1347       diff_prev = diff;
1348       ats_prev = ats;
1349       ats = ats->next;
1350    }
1351    ats_prev = ats_head;
1352    while (ats_prev)
1353    {
1354       ats = ats_prev->next;
1355       ats_prev->next = NULL;
1356       MemFree(ats_prev);
1357       ats_prev = ats;
1358    }
1359    return ats_newhead;
1360 }
1361 
act_get_eval(Int4 exp)1362 NLM_EXTERN FloatHi act_get_eval(Int4 exp)
1363 {
1364   FloatHi eval;
1365   Int4 i;
1366 
1367   eval = 1;
1368   for (i=1; i<=exp; i++)
1369   {
1370      eval = eval/10;
1371   }
1372   return eval;
1373 }
1374 
am_isa_gap(Int4 start,Int4 prevstop,Uint1 strand)1375 static Boolean am_isa_gap(Int4 start, Int4 prevstop, Uint1 strand)
1376 {
1377    if (strand != Seq_strand_minus)
1378    {
1379       if (start > prevstop+1)
1380          return TRUE;
1381       else
1382          return FALSE;
1383    } else
1384    {
1385       if (prevstop > start+1)
1386          return TRUE;
1387       else
1388          return FALSE;
1389    }
1390 }
1391 
am_fix_strand(SeqAlignPtr sap,Uint1 strand1,Uint1 strand2)1392 static void am_fix_strand(SeqAlignPtr sap, Uint1 strand1, Uint1 strand2)
1393 {
1394    DenseSegPtr  dsp;
1395    Int4         i;
1396 
1397    if (sap == NULL || strand1 == 0 || strand2 == 0)
1398       return;
1399    if (sap->segtype != SAS_DENSEG)
1400       return;
1401    dsp = (DenseSegPtr)(sap->segs);
1402    if (dsp->dim != 2)
1403       return;
1404    for (i=0; i<dsp->numseg; i++)
1405    {
1406       dsp->strands[i*2] = strand1;
1407       dsp->strands[(i*2) + 1] = strand2;
1408    }
1409    return;
1410 }
1411 
1412 typedef struct sq_spin {
1413    Int4  n1;
1414    Int4  n2;
1415    Int4  n3;
1416    Int4  n4;
1417    Int4  n5;
1418 } SQN_n, PNTR SQN_nPtr;
1419 
1420 #define SQN_LEFT    1
1421 #define SQN_RIGHT   2
1422 #define SQN_MIDDLE  3
1423 
1424 #define SQN_MAXGAP  4
1425 
1426 #define SQN_WINDOW  30 /* window in which to search for missing pieces */
1427 
ACT_CompareSpins(VoidPtr ptr1,VoidPtr ptr2)1428 static int LIBCALLBACK ACT_CompareSpins (VoidPtr ptr1, VoidPtr ptr2)
1429 {
1430    SQN_nPtr  spin1;
1431    SQN_nPtr  spin2;
1432 
1433    spin1 = *((SQN_nPtr PNTR) ptr1);
1434    spin2 = *((SQN_nPtr PNTR) ptr2);
1435    if (spin1 == NULL || spin2 == NULL)
1436       return 0;
1437    if (spin1->n3 > spin2->n3)
1438       return -1;
1439    if (spin1->n3 < spin2->n3)
1440       return 1;
1441    if (spin1->n2 < spin2->n2)
1442       return -1;
1443    if (spin1->n2 > spin2->n2)
1444       return 1;
1445    return 0;
1446 }
1447 
1448 
1449 /* spin structure: n1 = which alignment, n2 = start on first row, n3 =
1450    alignment length on 1st row, n4 = start on 2nd row, n5 = 2nd strand */
SetSpinValuesForSeqAlign(SeqAlignPtr salp,SQN_nPtr spin,Int4 list_pos,Int4 first_row)1451 static void SetSpinValuesForSeqAlign (SeqAlignPtr salp, SQN_nPtr spin, Int4 list_pos, Int4 first_row)
1452 {
1453    Int4             start;
1454    Int4             stop;
1455    Int4             strand;
1456 
1457    if (salp == NULL || spin == NULL || first_row > 2)
1458    {
1459       return;
1460    }
1461 
1462    spin->n1 = list_pos;
1463    AlnMgr2GetNthSeqRangeInSA(salp, first_row, &start, &stop);
1464    spin->n3 = stop - start;
1465    spin->n2 = start;
1466    AlnMgr2GetNthSeqRangeInSA(salp, 3-first_row, &start, &stop);
1467    spin->n4 = start;
1468    strand = AlnMgr2GetNthStrand(salp, 3-first_row);
1469    if (strand == Seq_strand_minus)
1470       spin->n5 = -1;
1471    else
1472       spin->n5 = 1;
1473 
1474 }
1475 
1476 
1477 static Boolean
ResolveSpinConflict(SQN_nPtr spin1,SQN_nPtr spin2,SeqAlignPtr salp,Int4 first_row)1478 ResolveSpinConflict
1479 (SQN_nPtr    spin1,
1480  SQN_nPtr    spin2,
1481  SeqAlignPtr salp,
1482  Int4        first_row)
1483 {
1484    Boolean row1_conflict = FALSE, row2_conflict = FALSE;
1485    Int4    left_aln_overlap = 0;
1486    Int4    right_aln_overlap = 0;
1487    Int4    spin1_row1_start, spin1_row1_stop;
1488    Int4    spin2_row1_start, spin2_row1_stop;
1489    Int4    spin1_row2_start, spin1_row2_stop;
1490    Int4    spin2_row2_start, spin2_row2_stop;
1491    Int4    aln_len;
1492    Int4    strand;
1493    Boolean resolved = FALSE;
1494    SeqAlignPtr salp_next;
1495 
1496    if (spin1 == NULL || spin2 == NULL || salp == NULL)
1497    {
1498       return FALSE;
1499    }
1500 
1501    spin1_row1_start = spin1->n2;
1502    spin1_row1_stop = spin1->n2 + spin1->n3 - 1;
1503 
1504    spin2_row1_start = spin2->n2;
1505    spin2_row1_stop = spin2->n2 + spin2->n3 - 1;
1506 
1507    spin1_row2_start = spin1->n4;
1508    spin1_row2_stop = spin1->n4 + spin1->n3 - 1;
1509 
1510    spin2_row2_start = spin2->n4;
1511    spin2_row2_stop = spin2->n4 + spin2->n3 - 1;
1512 
1513    strand = spin1->n5;
1514 
1515    aln_len = SeqAlignLength (salp);
1516 
1517    /* NOTE - the spins were previously sorted, so it is impossible for
1518     * spin2 to be longer than spin1
1519     * Also, row 1 is required to be on the plus strand for both spins.
1520     * We want to find the amount by which the alignment should be truncated,
1521     * either on the left or on the right.  If the alignment needs to be truncated
1522     * on both sides, there's probably something wrong.
1523     * The alignment should not overlap on the first row or the second row.
1524     */
1525 
1526    /* make sure we aren't aligning out of order */
1527    if (strand == 1)
1528    {
1529      if ((spin1_row1_start > spin2_row1_start && spin1_row2_start < spin2_row2_start)
1530          || (spin1_row1_start < spin2_row1_start && spin1_row2_start > spin2_row2_start))
1531      {
1532         return FALSE;
1533      }
1534    }
1535    else
1536    {
1537      if ((spin1_row1_start > spin2_row1_start && spin1_row2_start > spin2_row2_start)
1538          || (spin1_row1_start < spin2_row1_start && spin1_row2_start < spin2_row2_start))
1539      {
1540         return FALSE;
1541      }
1542    }
1543 
1544    /* check first for overlap on first row */
1545    /* see if second is contained in first */
1546    if (spin2_row1_start >= spin1_row1_start
1547        && spin2_row1_stop <= spin1_row1_stop)
1548    {
1549       /*
1550        * spin1 row 1: <---------->
1551        * spin2 row 1:    <----->
1552        */
1553       /* spin2 is contained in spin1 - no overlap could be removed */
1554       return FALSE;
1555    }
1556    /* look for overlap on right */
1557    if (spin2_row1_stop > spin1_row1_start
1558        && spin2_row1_start < spin1_row1_start)
1559    {
1560       /*
1561        * spin1 row 1:    <----->
1562        * spin2 row 1: <----->
1563        */
1564       right_aln_overlap = aln_len - AlnMgr2MapBioseqToSeqAlign (salp, spin1_row1_start, 1);
1565    }
1566    /* look for overlap on left */
1567    if (spin2_row1_start < spin1_row1_stop
1568        && spin2_row1_stop > spin1_row1_stop)
1569    {
1570       /*
1571        * spin1 row 1: <----->
1572        * spin2 row 1:    <----->
1573        */
1574       left_aln_overlap = AlnMgr2MapBioseqToSeqAlign (salp, spin1_row1_stop, 1);
1575    }
1576 
1577    /* check for overlap on second row */
1578    /* see if second is contained in first */
1579    if (spin2_row2_start >= spin1_row2_start
1580        && spin2_row2_stop <= spin1_row2_stop)
1581    {
1582       /*
1583        * spin1 row 2: <---------->
1584        * spin2 row 2:    <----->
1585        */
1586       /* spin2 is contained in spin1 - no overlap could be removed */
1587       return FALSE;
1588    }
1589 
1590    /* look for overlap on right */
1591    if (spin2_row2_start < spin1_row2_start
1592        && spin2_row2_stop > spin1_row2_start)
1593    {
1594       /*
1595        * spin1 row 2:    <----->
1596        * spin2 row 2: <----->
1597        */
1598       if (strand == 1)
1599       {
1600         right_aln_overlap = MAX (right_aln_overlap,
1601                                  aln_len - AlnMgr2MapBioseqToSeqAlign (salp, spin1_row2_start, 1));
1602       }
1603       else
1604       {
1605         left_aln_overlap = MAX (left_aln_overlap, aln_len - AlnMgr2MapBioseqToSeqAlign (salp, spin1_row2_start, 1));
1606       }
1607    }
1608    /* look for overlap on left */
1609    if (spin2_row2_start < spin1_row2_stop
1610        && spin2_row2_stop > spin1_row2_stop)
1611    {
1612       /*
1613        * spin1 row 2: <----->
1614        * spin2 row 2:    <----->
1615        */
1616       if (strand == 1)
1617       {
1618          left_aln_overlap = MAX (left_aln_overlap, AlnMgr2MapBioseqToSeqAlign (salp, spin1_row2_stop, 1));
1619       }
1620       else
1621       {
1622         right_aln_overlap = MAX (right_aln_overlap, AlnMgr2MapBioseqToSeqAlign (salp, spin1_row2_stop, 1));
1623       }
1624    }
1625 
1626    /* NOTE - we only want to truncate/reindex the current alignment, not any that follow it in the chain.
1627     * For one thing, we might have already deleted the ones that come next.
1628     */
1629    salp_next = salp->next;
1630    salp->next = NULL;
1631    if (left_aln_overlap > 0 && right_aln_overlap == 0)
1632    {
1633       if (TruncateAlignment (salp, left_aln_overlap, TRUE))
1634       {
1635          SetSpinValuesForSeqAlign (salp, spin2, spin2->n1, first_row);
1636          resolved = TRUE;
1637       }
1638    }
1639    else if (left_aln_overlap == 0 && right_aln_overlap > 0)
1640    {
1641       if (TruncateAlignment (salp, right_aln_overlap, FALSE))
1642       {
1643          SetSpinValuesForSeqAlign (salp, spin2, spin2->n1, first_row);
1644          resolved = TRUE;
1645       }
1646    }
1647    /* restore chain */
1648    salp->next = salp_next;
1649 
1650    return resolved;
1651 }
1652 
1653 
SpinsConflict(SQN_nPtr spin1,SQN_nPtr spin2,Int4 fuzz,Int4 strand)1654 static Boolean SpinsConflict (SQN_nPtr spin1, SQN_nPtr spin2, Int4 fuzz, Int4 strand)
1655 {
1656   Boolean conflict = FALSE;
1657 
1658   if (spin1 == NULL || spin2 == NULL) return FALSE;
1659 
1660   /* check first for conflict on first row */
1661   /* right of i greater than left of curr */
1662   if (spin1->n2 + spin1->n3 - 1 > spin2->n2 + fuzz)
1663   {
1664     /* left of i less than left of curr */
1665     if (spin1->n2 < spin2->n2) {
1666         /* i overlaps curr on the left */
1667         /* curr:     [-----------) */
1668         /* i:    (------]          */
1669         conflict = TRUE;
1670     }
1671   }
1672   /* left of i less than right of curr - fuzz */
1673   if (spin1->n2 < spin2->n2 + spin2->n3 - 1 - fuzz)
1674   {
1675     /* right of i greater than right of curr */
1676     if (spin1->n2 + spin1->n3 - 1 > spin2->n2 + spin2->n3 - 1) {
1677         /* i overlaps curr on the right */
1678         /* curr: (-----------]      */
1679         /* i:            [------)   */
1680         conflict = TRUE;
1681     }
1682   }
1683   /* left of i greater than or equal to left of curr */
1684   if (spin1->n2 >= spin2->n2)
1685   {
1686     /* right of i less than or equal to right of curr */
1687     if (spin1->n2 + spin1->n3 - 1 <= spin2->n2 + spin2->n3 - 1) {
1688         /* i is contained in curr */
1689         conflict = TRUE;
1690     }
1691   }
1692   /* then check for conflict and consistency on second row */
1693   /* right of i less than left of curr + fuzz */
1694   if (spin1->n2 + spin1->n3 - 1 < spin2->n2 + fuzz)
1695   {
1696     if (strand == 1)
1697     {
1698         /* right of i on row 2 greater than left of curr on row 2 */
1699         if (spin1->n4 + spin1->n3 - 1 > spin2->n4 + fuzz) {
1700           /* i:     (-------] */
1701           /* curr:     [------) */
1702           conflict = TRUE;
1703         }
1704     } else if (strand == -1)
1705     {
1706         if (spin2->n4 + spin2->n3 - 1 - fuzz > spin1->n4)
1707           conflict = TRUE;
1708     }
1709   }
1710   /* right of i greater than or equal to left of curr + fuzz */
1711   else
1712   {
1713     if (strand == 1)
1714     {
1715         /* left of i on row 2 less than right of curr - fuzz on row 2 */
1716         if (spin1->n4 < spin2->n4 + spin2->n3 - fuzz)
1717           conflict = TRUE;
1718     } else if (strand == -1)
1719     {
1720         if (spin1->n4 + spin1->n3 - 1 - fuzz > spin2->n4)
1721           conflict = TRUE;
1722     }
1723   }
1724 
1725   /* make sure we aren't taking pieces out of order */
1726   if (strand == 1)
1727   {
1728     if ((spin1->n2 > spin2->n2 && spin1->n4 < spin2->n4)
1729         || (spin1->n2 < spin2->n2 && spin1->n4 > spin2->n4))
1730     {
1731       conflict = TRUE;
1732     }
1733   }
1734   else
1735   {
1736     if ((spin1->n2 > spin2->n2 && spin1->n4 > spin2->n4)
1737         || (spin1->n2 < spin2->n2 && spin1->n4 < spin2->n4))
1738     {
1739       conflict = TRUE;
1740     }
1741   }
1742   return conflict;
1743 }
1744 
1745 
ACT_RemoveInconsistentAlnsFromSet(SeqAlignPtr sap,Int4 fuzz,Int4 n)1746 extern void ACT_RemoveInconsistentAlnsFromSet (SeqAlignPtr sap, Int4 fuzz, Int4 n)
1747 {
1748    AMAlignIndex2Ptr amaip;
1749    Boolean          conflict;
1750    Int4             curr;
1751    Int4             i;
1752    Int4             indextype;
1753    Int4             k;
1754    SeqAlignPtr      salp;
1755    SeqAlignPtr      salp_head;
1756    SeqAlignPtr      salp_prev;
1757    SQN_nPtr         PNTR spin;
1758    Int4             strand;
1759 
1760    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1761       return;
1762    if (n > 2)
1763       return;
1764    amaip = (AMAlignIndex2Ptr)(sap->saip);
1765    indextype = amaip->alnstyle;
1766    /* make sure that everything is on the plus strand of the nth sequence */
1767    for (i=0; i<amaip->numsaps; i++)
1768    {
1769       salp = amaip->saps[i];
1770       strand = AlnMgr2GetNthStrand(salp, n);
1771       if (strand == Seq_strand_minus)
1772       {
1773          SAIndex2Free2(salp->saip);
1774          salp->saip = NULL;
1775          salp->next = NULL;
1776          SeqAlignListReverseStrand(salp);
1777          AlnMgr2IndexSingleChildSeqAlign(salp);
1778       }
1779    }
1780    /* spin structure: n1 = which alignment, n2 = start on first row, n3 =
1781       alignment length on 1st row, n4 = start on 2nd row, n5 = 2nd strand */
1782    spin = (SQN_nPtr PNTR)MemNew((amaip->numsaps)*sizeof(SQN_nPtr));
1783    for (i=0; i<amaip->numsaps; i++)
1784    {
1785       spin[i] = (SQN_nPtr)MemNew(sizeof(SQN_n));
1786       SetSpinValuesForSeqAlign (amaip->saps[i], spin[i], i, n);
1787    }
1788    HeapSort((Pointer)spin, (size_t)(amaip->numsaps), sizeof(SQN_nPtr), ACT_CompareSpins);
1789    strand = spin[0]->n5;
1790    for (i=1; i<amaip->numsaps; i++)
1791    {
1792       if (spin[i]->n5 != strand)
1793       {
1794          k = spin[i]->n1;
1795          salp = amaip->saps[k];
1796          salp->next = NULL;
1797          SeqAlignFree(salp);
1798          amaip->saps[k] = NULL;
1799          spin[i]->n1 = -1;
1800          /* fix links for previous alignments */
1801          while (k > 0 && amaip->saps[k - 1] == NULL) {
1802            k--;
1803          }
1804          if (k > 0)
1805          {
1806             if (k < amaip->numsaps - 1)
1807             {
1808               amaip->saps[k - 1]->next = amaip->saps[k + 1];
1809             }
1810             else
1811             {
1812               amaip->saps[k - 1]->next = NULL;
1813             }
1814          }
1815       }
1816    }
1817    for (curr=0; curr<amaip->numsaps; curr++)
1818    {
1819       if (spin[curr]->n1 != -1)
1820       {
1821          for (i=curr+1; i<amaip->numsaps; i++)
1822          {
1823             if (spin[i]->n1 != -1)
1824             {
1825                conflict = FALSE;
1826 
1827                /* check first for conflict on first row */
1828                /* right of i greater than left of curr */
1829                if (spin[i]->n2 + spin[i]->n3 - 1 > spin[curr]->n2 + fuzz)
1830                {
1831                   /* left of i less than left of curr */
1832                   if (spin[i]->n2 < spin[curr]->n2) {
1833                      /* i overlaps curr on the left */
1834                      /* curr:     [-----------) */
1835                      /* i:    (------]          */
1836                      conflict = TRUE;
1837                   }
1838                }
1839                /* left of i less than right of curr - fuzz */
1840                if (spin[i]->n2 < spin[curr]->n2 + spin[curr]->n3 - 1 - fuzz)
1841                {
1842                   /* right of i greater than right of curr */
1843                   if (spin[i]->n2 + spin[i]->n3 - 1 > spin[curr]->n2 + spin[curr]->n3 - 1) {
1844                      /* i overlaps curr on the right */
1845                      /* curr: (-----------]      */
1846                      /* i:            [------)   */
1847                      conflict = TRUE;
1848                   }
1849                }
1850                /* left of i greater than or equal to left of curr */
1851                if (spin[i]->n2 >= spin[curr]->n2)
1852                {
1853                   /* right of i less than or equal to right of curr */
1854                   if (spin[i]->n2 + spin[i]->n3 - 1 <= spin[curr]->n2 + spin[curr]->n3 - 1) {
1855                      /* i is contained in curr */
1856                      conflict = TRUE;
1857                   }
1858                }
1859                /* then check for conflict and consistency on second row */
1860                /* right of i less than left of curr + fuzz */
1861                if (spin[i]->n2 + spin[i]->n3 - 1 < spin[curr]->n2 + fuzz)
1862                {
1863                   if (strand == 1)
1864                   {
1865                      /* right of i on row 2 greater than left of curr on row 2 */
1866                      if (spin[i]->n4 + spin[i]->n3 - 1 > spin[curr]->n4 + fuzz) {
1867                         /* i:     (-------] */
1868                         /* curr:     [------) */
1869                         conflict = TRUE;
1870                      }
1871                   } else if (strand == -1)
1872                   {
1873                      if (spin[curr]->n4 + spin[curr]->n3 - 1 - fuzz > spin[i]->n4)
1874                         conflict = TRUE;
1875                   }
1876                }
1877                /* right of i greater than or equal to left of curr + fuzz */
1878                else
1879                {
1880                   if (strand == 1)
1881                   {
1882                      /* left of i on row 2 less than right of curr - fuzz on row 2 */
1883                      if (spin[i]->n4 < spin[curr]->n4 + spin[curr]->n3 - fuzz)
1884                         conflict = TRUE;
1885                   } else if (strand == -1)
1886                   {
1887                      if (spin[i]->n4 + spin[i]->n3 - 1 - fuzz > spin[curr]->n4)
1888                         conflict = TRUE;
1889                   }
1890                }
1891 
1892                /* make sure we aren't taking pieces out of order */
1893                if (strand == 1)
1894                {
1895                   if ((spin[i]->n2 > spin[curr]->n2 && spin[i]->n4 < spin[curr]->n4)
1896                       || (spin[i]->n2 < spin[curr]->n2 && spin[i]->n4 > spin[curr]->n4))
1897                   {
1898                     conflict = TRUE;
1899                   }
1900                }
1901                else
1902                {
1903                   if ((spin[i]->n2 > spin[curr]->n2 && spin[i]->n4 > spin[curr]->n4)
1904                       || (spin[i]->n2 < spin[curr]->n2 && spin[i]->n4 < spin[curr]->n4))
1905                   {
1906                     conflict = TRUE;
1907                   }
1908                }
1909 
1910                if (conflict && ResolveSpinConflict (spin[curr], spin[i],
1911                                                     amaip->saps[spin[i]->n1],
1912                                                     n))
1913                {
1914                   conflict = FALSE;
1915                }
1916 
1917                if (conflict)
1918                {
1919                   salp = amaip->saps[spin[i]->n1];
1920                   salp->next = NULL;
1921                   SeqAlignFree(salp);
1922                   amaip->saps[spin[i]->n1] = NULL;
1923                   spin[i]->n1 = -1;
1924                }
1925             }
1926          }
1927       }
1928    }
1929    salp_head = salp_prev = NULL;
1930    for (i=0; i<amaip->numsaps; i++)
1931    {
1932       MemFree(spin[i]);
1933       if (amaip->saps[i] != NULL)
1934       {
1935          amaip->saps[i]->next = NULL;
1936          if (salp_prev != NULL)
1937          {
1938             salp_prev->next = amaip->saps[i];
1939             salp_prev = salp_prev->next;
1940          } else
1941             salp_head = salp_prev = amaip->saps[i];
1942       }
1943    }
1944    sap->segs = (Pointer)(salp_head);
1945    if (indextype == AM2_LITE)
1946    {
1947       AMAlignIndex2Free2(sap->saip);
1948       sap->saip = NULL;
1949       AlnMgr2IndexLite(sap);
1950    } else
1951       AlnMgr2ReIndexSeqAlign(sap);
1952    MemFree(spin);
1953 }
1954 
ACT_GetNthSeqRangeInSASet(SeqAlignPtr sap,Int4 n,Int4Ptr start,Int4Ptr stop)1955 NLM_EXTERN void ACT_GetNthSeqRangeInSASet(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
1956 {
1957    SeqAlignPtr  salp;
1958    Int4         start_tmp;
1959    Int4         stop_tmp;
1960    Int4         tmp1;
1961    Int4         tmp2;
1962 
1963    if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1964       return;
1965    salp = (SeqAlignPtr)(sap->segs);
1966    start_tmp = stop_tmp = -1;
1967    while (salp != NULL)
1968    {
1969       if (n > salp->dim)
1970       {
1971          if (start)
1972             *start = -1;
1973          if (stop)
1974             *stop = -1;
1975          return;
1976       }
1977       AlnMgr2GetNthSeqRangeInSA(salp, n, &tmp1, &tmp2);
1978       if (tmp1 < start_tmp || start_tmp == -1)
1979          start_tmp = tmp1;
1980       if (tmp2 > stop_tmp)
1981          stop_tmp = tmp2;
1982       salp = salp->next;
1983    }
1984    if (start)
1985       *start = start_tmp;
1986    if (stop)
1987       *stop = stop_tmp;
1988 }
1989 
ACT_FindBestAlnByDotPlot(SeqLocPtr slp1,SeqLocPtr slp2)1990 static SeqAlignPtr ACT_FindBestAlnByDotPlot(SeqLocPtr slp1, SeqLocPtr slp2)
1991 {
1992    DOTDiagPtr      ddp;
1993    DenseSegPtr     dsp;
1994    Int4            i;
1995    DOTMainDataPtr  mip;
1996    SeqAlignPtr     sap;
1997    SeqAlignPtr     sap_head;
1998    SeqAlignPtr     sap_prev;
1999    ScorePtr        scp;
2000    Int4            start1;
2001    Int4            start2;
2002    Uint1           strand;
2003 
2004    ErrSetMessageLevel(SEV_MAX);
2005    mip = DOT_CreateAndStorebyLoc (slp1, slp2, 6, 10);
2006    ErrSetMessageLevel(SEV_WARNING);
2007    sap = sap_head = sap_prev = NULL;
2008    if (mip == NULL || mip->hitlist == NULL)
2009       return NULL;
2010    i = 0;
2011    ddp = mip->hitlist[i];
2012    start1 = SeqLocStart(slp1);
2013    start2 = SeqLocStart(slp2);
2014    strand = SeqLocStrand(slp2);
2015    /* copy each ddp (a single ungapped alignment) into a one-segment dense-seg alignment */
2016    while (ddp != NULL && i < mip->index)
2017    {
2018       ddp = mip->hitlist[i];
2019       i++;
2020       sap = SeqAlignNew();
2021       dsp = DenseSegNew();
2022       sap->type = SAT_PARTIAL;
2023       sap->segtype = SAS_DENSEG;
2024       sap->dim = 2;
2025       dsp->dim = 2;
2026       dsp->numseg = 1;
2027       dsp->ids = SeqIdDup(SeqLocId(slp1));
2028       dsp->ids->next = SeqIdDup(SeqLocId(slp2));
2029       dsp->strands = (Uint1Ptr)MemNew(2*sizeof(Uint1));
2030       dsp->strands[0] = SeqLocStrand(slp1);
2031       dsp->strands[1] = SeqLocStrand(slp2);
2032       dsp->starts = (Int4Ptr)MemNew(2*sizeof(Int4));
2033       dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
2034       dsp->starts[0] = ddp->q_start;
2035       if (dsp->strands[1] == Seq_strand_minus)
2036          dsp->starts[1] = ddp->s_start - ddp->length + 1;
2037       else
2038          dsp->starts[1] = ddp->s_start;
2039       if (ddp->length > SeqLocLen(slp2))
2040          dsp->lens[0] = SeqLocLen(slp2);
2041       else
2042          dsp->lens[0] = ddp->length - 1;
2043       scp = ScoreNew();
2044       scp->id = ObjectIdNew();
2045       scp->id->str = StringSave("score");
2046       scp->choice = 1;
2047       scp->value.intvalue = ddp->score;
2048       dsp->scores = scp;
2049       sap->segs = (Pointer)(dsp);
2050       if (sap_head != NULL)
2051       {
2052          sap_prev->next = sap;
2053          sap_prev = sap;
2054       } else
2055          sap_head = sap_prev = sap;
2056    }
2057    if (sap_head == NULL)
2058       return NULL;
2059    AlnMgr2IndexLite(sap_head);
2060    ACT_RemoveInconsistentAlnsFromSet(sap_head, 6, 1);
2061    sap = (SeqAlignPtr)(sap_head->segs);
2062    sap_head->segs = NULL;
2063    SeqAlignFree(sap_head);
2064    MemFree(mip->matrix);
2065    MemFree(mip->qseq);
2066    MemFree(mip->sseq);
2067    MemFree(mip->qname);
2068    MemFree(mip->sname);
2069    i = 0;
2070    while (ddp != NULL && i < mip->index)
2071    {
2072       ddp = mip->hitlist[i];
2073       MemFree(ddp);
2074       i++;
2075    }
2076    MemFree(mip->hitlist);
2077    return sap;
2078 }
2079 
2080 NLM_EXTERN SeqAlignPtr
ACT_FindPiece(BioseqPtr bsp1,BioseqPtr bsp2,Int4 start1,Int4 stop1,Int4 start2,Int4 stop2,Uint1 strand,Int4 which_side,GetAlignmentPieceFunc aln_piece_func)2081 ACT_FindPiece
2082 (BioseqPtr             bsp1,
2083  BioseqPtr             bsp2,
2084  Int4                  start1,
2085  Int4                  stop1,
2086  Int4                  start2,
2087  Int4                  stop2,
2088  Uint1                 strand,
2089  Int4                  which_side,
2090  GetAlignmentPieceFunc aln_piece_func)
2091 {
2092    AMAlignIndex2Ptr      amaip;
2093    DenseSegPtr          dsp;
2094    Int4                 i;
2095    Int4                 nstart1;
2096    Int4                 nstart2;
2097    Int4                 nstop1;
2098    Int4                 nstop2;
2099    SeqAlignPtr          sap;
2100    SeqAlignPtr          sap_head;
2101    SeqAlignPtr          sap_new;
2102    SeqAlignPtr          sap_prev = NULL;
2103    SeqLocPtr            slp1;
2104    SeqLocPtr            slp2;
2105 
2106    if (stop1 - start1 < 7 || stop2 - start2 < 7) /* can't do these by BLAST -- wordsize can't go that small */
2107       return NULL;
2108 
2109    if (aln_piece_func == NULL)
2110    {
2111       return NULL;
2112    }
2113    slp1 = SeqLocIntNew(start1, stop1, Seq_strand_plus, bsp1->id);
2114    slp2 = SeqLocIntNew(start2, stop2, strand, bsp2->id);
2115    sap = aln_piece_func (slp1, slp2);
2116 
2117    SeqLocFree(slp1);
2118    SeqLocFree(slp2);
2119 
2120    if (sap == NULL)
2121    {
2122       /* no real alignment, so create a "dummy" alignment that lines up at
2123        * the requested side (left, right, or middle)
2124        */
2125       sap = SeqAlignNew();
2126       dsp = DenseSegNew();
2127       dsp->numseg = 1;
2128       dsp->starts = (Int4Ptr)MemNew(2*sizeof(Int4));
2129       dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
2130       dsp->strands = (Uint1Ptr)MemNew(2*sizeof(Uint1));
2131       dsp->dim = 2;
2132       dsp->ids = SeqIdDup(bsp1->id);
2133       dsp->ids->next = SeqIdDup(bsp2->id);
2134       dsp->lens[0] = MIN(stop1-start1+1, stop2-start2+1);
2135       dsp->strands[0] = dsp->strands[1] = Seq_strand_plus;
2136       if (which_side == SQN_LEFT || which_side == SQN_MIDDLE)
2137       {
2138          dsp->starts[0] = stop1 - dsp->lens[0] + 1;
2139          dsp->starts[1] = stop2 - dsp->lens[0] + 1;
2140       } else if (which_side == SQN_RIGHT)
2141       {
2142          dsp->starts[0] = start1;
2143          dsp->starts[1] = start2;
2144       }
2145       sap->segs = (Pointer)dsp;
2146       sap->segtype = SAS_DENSEG;
2147    }
2148    else if (sap->next != NULL)
2149    {
2150       /* combine the alignments returned (if there are more than one) into
2151        * a mini global alignment to cover the requested interval
2152        */
2153       AlnMgr2IndexLite(sap);
2154       ACT_RemoveInconsistentAlnsFromSet(sap, 20, 1);
2155       amaip = (AMAlignIndex2Ptr)(sap->saip);
2156       AlnMgr2SortAlnSetByNthRowPos(sap, 1);
2157       ACT_GetNthSeqRangeInSASet(sap, 1, &nstart1, &nstop1);
2158       ACT_GetNthSeqRangeInSASet(sap, 2, &nstart2, &nstop2);
2159       strand = AlnMgr2GetNthStrand(amaip->saps[0], 2);
2160       sap_head = NULL;
2161       if (strand != Seq_strand_minus)
2162       {
2163          if (nstart1 > start1+20 && nstart2 > start2+20)
2164          {
2165             slp1 = SeqLocIntNew(start1, nstart1, Seq_strand_plus, bsp1->id);
2166             slp2 = SeqLocIntNew(start2, nstart2, strand, bsp2->id);
2167             sap_head = ACT_FindBestAlnByDotPlot(slp1, slp2);
2168             SeqLocFree(slp1);
2169             SeqLocFree(slp2);
2170          }
2171       }
2172       else
2173       {
2174          if (nstart1 > start1+20 && nstop2 < stop2 - 20)
2175          {
2176             slp1 = SeqLocIntNew(start1, nstart1, Seq_strand_plus, bsp1->id);
2177             slp2 = SeqLocIntNew(nstop2, stop2, strand, bsp2->id);
2178             sap_head = ACT_FindBestAlnByDotPlot(slp1, slp2);
2179             SeqLocFree(slp1);
2180             SeqLocFree(slp2);
2181          }
2182       }
2183       for (i=0; i<amaip->numsaps-1; i++)
2184       {
2185          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, NULL, &nstart1);
2186          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 1, &nstop1, NULL);
2187          if (strand != Seq_strand_minus)
2188          {
2189             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, NULL, &nstart2);
2190             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 2, &nstop2, NULL);
2191          }
2192          else
2193          {
2194             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &nstop2, NULL);
2195             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 2, NULL, &nstart2);
2196          }
2197       }
2198       ACT_GetNthSeqRangeInSASet(sap, 1, &nstart1, &nstop1);
2199       ACT_GetNthSeqRangeInSASet(sap, 2, &nstart2, &nstop2);
2200       sap_prev = sap_head;
2201       if (sap_prev != NULL)
2202       {
2203          while (sap_prev->next != NULL)
2204          {
2205             sap_prev = sap_prev->next;
2206          }
2207       }
2208       if (strand != Seq_strand_minus)
2209       {
2210          if (nstop1 < stop1-20 && nstop2 < stop2-20)  /* missing piece at the end */
2211          {
2212             slp1 = SeqLocIntNew(nstop1, stop1, Seq_strand_plus, bsp1->id);
2213             slp2 = SeqLocIntNew(nstop2, stop2, strand, bsp2->id);
2214             sap_new = ACT_FindBestAlnByDotPlot(slp1, slp2);
2215             SeqLocFree(slp1);
2216             SeqLocFree(slp2);
2217             if (sap_new != NULL)
2218             {
2219                if (sap_head != NULL)
2220                {
2221                   sap_prev->next = sap_new;
2222                   sap_prev = sap_new;
2223                }
2224                else
2225                  sap_head = sap_prev = sap_new;
2226             }
2227          }
2228       }
2229       else
2230       {
2231          if (nstop1 < stop1-20 && nstart2 > start2 + 20)
2232          {
2233             slp1 = SeqLocIntNew(nstop1, stop1, Seq_strand_plus, bsp1->id);
2234             slp2 = SeqLocIntNew(start2, nstart2, strand, bsp2->id);
2235             sap_new = ACT_FindBestAlnByDotPlot(slp1, slp2);
2236             SeqLocFree(slp1);
2237             SeqLocFree(slp2);
2238             if (sap_new != NULL)
2239             {
2240                if (sap_head != NULL)
2241                {
2242                   sap_prev->next = sap_new;
2243                   sap_prev = sap_new;
2244                }
2245                else
2246                   sap_head = sap_prev = sap_new;
2247             }
2248          }
2249       }
2250       sap_new = (SeqAlignPtr)(sap->segs);
2251       while (sap_new->next != NULL)
2252       {
2253          sap_new = sap_new->next;
2254       }
2255       sap_new->next = sap_head;
2256       sap_head = (SeqAlignPtr)(sap->segs);
2257       sap->segs = NULL;
2258       SeqAlignFree(sap);
2259       sap = sap_head;
2260    }
2261    return sap;
2262 }
2263 
ACT_ExtendAlnRight(SeqAlignPtr sap,Int4 which_row,Int4 start,Int4 stop)2264 static void ACT_ExtendAlnRight(SeqAlignPtr sap, Int4 which_row, Int4 start, Int4 stop)
2265 {
2266    DenseSegPtr  dsp;
2267    Int4         i;
2268    Int4Ptr      lens;
2269    Int4Ptr      starts;
2270    Uint1Ptr     strands;
2271 
2272    if (sap == NULL)
2273       return;
2274    if (which_row > 2)
2275       return;
2276    dsp = (DenseSegPtr)(sap->segs);
2277    if (dsp->starts[2*(dsp->numseg-1) + which_row - 1] == -1 || dsp->starts[2*(dsp->numseg-1) + (2-which_row)] != -1)
2278    {
2279       starts = (Int4Ptr)MemNew((dsp->numseg+1)*2*sizeof(Int4));
2280       strands = (Uint1Ptr)MemNew((dsp->numseg+1)*2*sizeof(Uint1));
2281       lens = (Int4Ptr)MemNew((dsp->numseg+1)*sizeof(Int4));
2282       for (i=0; i<dsp->numseg; i++)
2283       {
2284          lens[i] = dsp->lens[i];
2285       }
2286       for (i=0; i<=(dsp->dim)*(dsp->numseg-1)+1; i++)
2287       {
2288          starts[i] = dsp->starts[i];
2289          strands[i] = dsp->strands[i];
2290       }
2291       lens[dsp->numseg] = stop - start + 1;
2292       if (dsp->strands[which_row-1] != Seq_strand_minus)
2293          starts[(dsp->dim)*(dsp->numseg) + which_row - 1] = start;
2294       else
2295          starts[(dsp->dim)*(dsp->numseg) + which_row - 1] = stop;
2296       starts[(dsp->dim)*(dsp->numseg) + (2-which_row)] = -1;
2297       strands[(dsp->dim)*(dsp->numseg) + which_row - 1] = dsp->strands[which_row-1];
2298       strands[(dsp->dim)*(dsp->numseg) + (2-which_row)] = dsp->strands[2-which_row];
2299       MemFree(dsp->starts);
2300       MemFree(dsp->lens);
2301       MemFree(dsp->strands);
2302       dsp->numseg++;
2303       dsp->starts = starts;
2304       dsp->strands = strands;
2305       dsp->lens = lens;
2306    } else
2307    {
2308       dsp->lens[dsp->numseg-1] += stop - start + 1;
2309       if (dsp->strands[which_row-1] == Seq_strand_minus)
2310          dsp->starts[(dsp->dim)*(dsp->numseg-1) + which_row - 1] = stop;
2311    }
2312    SAIndex2Free2(sap->saip);
2313    sap->saip = NULL;
2314    AlnMgr2IndexSingleChildSeqAlign(sap);
2315 }
2316 
ACT_CreateContinuousAln(SeqAlignPtr PNTR saps,Int4 numsaps)2317 static SeqAlignPtr ACT_CreateContinuousAln(SeqAlignPtr PNTR saps, Int4 numsaps)
2318 {
2319    DenseSegPtr  dsp;
2320    DenseSegPtr  dsp_tmp;
2321    Int4         i;
2322    Int4         j;
2323    Int4         n1;
2324    Int4         n2;
2325    Int4         numseg;
2326    SeqAlignPtr  salp;
2327    Int4         start1;
2328    Int4         start2;
2329    Int4         stop1;
2330    Int4         stop2;
2331    Uint1        strand;
2332 
2333    for (i=0; i<numsaps-1; i++)
2334    {
2335       AlnMgr2GetNthSeqRangeInSA(saps[i], 1, &start1, &stop1);
2336       AlnMgr2GetNthSeqRangeInSA(saps[i+1], 1, &start2, &stop2);
2337       if (start2 - stop1 > 1)
2338          ACT_ExtendAlnRight(saps[i], 1, stop1+1, start2-1);
2339       AlnMgr2GetNthSeqRangeInSA(saps[i], 2, &start1, &stop1);
2340       AlnMgr2GetNthSeqRangeInSA(saps[i+1], 2, &start2, &stop2);
2341       strand = AlnMgr2GetNthStrand(saps[i], 2);
2342       if (strand == Seq_strand_minus)
2343       {
2344          if (start1 - stop2 > 1)
2345             ACT_ExtendAlnRight(saps[i], 2, stop2+1, start1-1);
2346       } else
2347       {
2348          if (start2 - stop1 > 1)
2349             ACT_ExtendAlnRight(saps[i], 2, stop1+1, start2-1);
2350       }
2351    }
2352    numseg = 0;
2353    for (i=0; i<numsaps; i++)
2354    {
2355       dsp_tmp = (DenseSegPtr)(saps[i]->segs);
2356       numseg += dsp_tmp->numseg;
2357    }
2358    dsp = DenseSegNew();
2359    dsp->dim = 2;
2360    dsp->numseg = numseg;
2361    dsp->starts = (Int4Ptr)MemNew(2*numseg*sizeof(Int4));
2362    dsp->lens = (Int4Ptr)MemNew(numseg*sizeof(Int4));
2363    dsp->strands = (Uint1Ptr)MemNew(2*numseg*sizeof(Uint1));
2364    n1 = n2 = 0;
2365    for (i=0; i<numsaps; i++)
2366    {
2367       dsp_tmp = (DenseSegPtr)(saps[i]->segs);
2368       if (dsp->ids == NULL)
2369          dsp->ids = SeqIdDupList(dsp_tmp->ids);
2370       for (j=0; j<2*dsp_tmp->numseg; j++)
2371       {
2372          dsp->starts[n1+j] = dsp_tmp->starts[j];
2373          dsp->strands[n1+j] = dsp_tmp->strands[j];
2374       }
2375       for (j=0; j<dsp_tmp->numseg; j++)
2376       {
2377          dsp->lens[n2+j] = dsp_tmp->lens[j];
2378       }
2379       n1 += 2*dsp_tmp->numseg;
2380       n2 += dsp_tmp->numseg;
2381    }
2382    salp = SeqAlignNew();
2383    salp->type = SAT_PARTIAL;
2384    salp->segtype = SAS_DENSEG;
2385    salp->dim = 2;
2386    salp->segs = (Pointer)(dsp);
2387    AlnMgr2IndexSingleChildSeqAlign(salp);
2388    return salp;
2389 }
2390 
2391 
DeleteAmaipSapI(AMAlignIndex2Ptr amaip,Int4 i)2392 static void DeleteAmaipSapI (AMAlignIndex2Ptr  amaip, Int4 i)
2393 {
2394   Int4 j;
2395   if (amaip == NULL || i >= amaip->numsaps) {
2396     return;
2397   }
2398 
2399   amaip->saps[i] = SeqAlignFree (amaip->saps[i]);
2400   for (j = i + 1; j < amaip->numsaps; j++) {
2401     amaip->saps[j - 1] = amaip->saps[j];
2402   }
2403   amaip->numsaps --;
2404 }
2405 
2406 
ACT_CleanUpAlignments(SeqAlignPtr sap,Int4 len1,Int4 len2)2407 NLM_EXTERN SeqAlignPtr ACT_CleanUpAlignments(SeqAlignPtr sap, Int4 len1, Int4 len2)
2408 {
2409    AMAlignIndex2Ptr  amaip;
2410    Int4             diff;
2411    DenseSegPtr      dsp;
2412    Int4             i;
2413    Int4             numseg;
2414    Int4             start1;
2415    Int4             start2;
2416    Int4             stop1;
2417    Int4             stop2;
2418    Uint1            strand;
2419    Int4             tmp;
2420 
2421    if (sap == NULL)
2422       return NULL;
2423    AlnMgr2SortAlnSetByNthRowPos(sap, 1);
2424    amaip = (AMAlignIndex2Ptr)(sap->saip);
2425    strand = AlnMgr2GetNthStrand(amaip->saps[0], 2);
2426    numseg = 0;
2427    AlnMgr2GetNthSeqRangeInSA(amaip->saps[0], 1, &start1, NULL);
2428    AlnMgr2GetNthSeqRangeInSA(amaip->saps[0], 2, &start2, &stop2);
2429    if (strand != Seq_strand_minus)
2430       diff = start2;
2431    else
2432       diff = len2 - stop2;
2433    if (start1 > 0 && diff > 0)
2434       numseg += 2;
2435    for (i=0; i<amaip->numsaps-1; i++)
2436    {
2437       dsp = (DenseSegPtr)(amaip->saps[i]->segs);
2438       numseg += dsp->numseg;
2439       AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, NULL, &start1);
2440       AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 1, &stop1, &tmp);
2441       if (stop1 < start1+1)
2442          AlnMgr2TruncateSAP(amaip->saps[i+1], start1+1, tmp, 1);
2443       if (strand != Seq_strand_minus)
2444       {
2445          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, NULL, &start2);
2446          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 2, &stop2, &tmp);
2447          if (stop2 < start2+1) {
2448             if (tmp <= start2) {
2449                /* alignment is contained in parent, remove completely */
2450                DeleteAmaipSapI (amaip, i + 1);
2451                /* need to compare this with the one after it */
2452                i--;
2453                continue;
2454             } else {
2455               AlnMgr2TruncateSAP(amaip->saps[i+1], start2+1, tmp, 2);
2456             }
2457          }
2458       } else
2459       {
2460          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &stop2, &tmp);
2461          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 2, NULL, &start2);
2462          if (stop2 < start2 + 1) {
2463             if (tmp <= start2) {
2464                /* alignment is contained in parent, remove completely */
2465                DeleteAmaipSapI (amaip, i);
2466                /* need to compare this with the one after it */
2467                i--;
2468                continue;
2469             } else {
2470                AlnMgr2TruncateSAP(amaip->saps[i], start2+1, tmp, 2);
2471             }
2472          }
2473       }
2474       AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, NULL, &start1);
2475       AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 1, &stop1, &tmp);
2476       if (strand != Seq_strand_minus)
2477       {
2478          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, NULL, &start2);
2479          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 2, &stop2, &tmp);
2480       } else
2481       {
2482          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &stop2, &tmp);
2483          AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 2, NULL, &start2);
2484       }
2485       if (stop1 > start1+1)
2486          numseg++;
2487       if (stop2 > start2+1)
2488          numseg++;
2489    }
2490    dsp = (DenseSegPtr)(amaip->saps[amaip->numsaps-1]->segs);
2491    numseg += dsp->numseg;
2492    AlnMgr2GetNthSeqRangeInSA(amaip->saps[amaip->numsaps-1], 1, NULL, &stop1);
2493    AlnMgr2GetNthSeqRangeInSA(amaip->saps[amaip->numsaps-1], 2, &start2, &stop2);
2494    if (strand != Seq_strand_minus)
2495       diff = len2 - stop2;
2496    else
2497       diff = start2;
2498    if (stop1 < len1 && diff > 0)
2499       numseg += 2;
2500    AlnMgr2SortAlnSetByNthRowPos(sap, 1);
2501    return (ACT_CreateContinuousAln(amaip->saps, amaip->numsaps));
2502 }
2503 
SQN_AddToAln(SeqAlignPtr sap,Int4 offset,Int2 which_end,Uint1 strand)2504 static void SQN_AddToAln(SeqAlignPtr sap, Int4 offset, Int2 which_end, Uint1 strand)
2505 {
2506    DenseSegPtr  dsp;
2507    Int4Ptr      lens;
2508    Int4         i;
2509    Int4         j;
2510    Int4Ptr      starts;
2511    Uint1Ptr     strands;
2512 
2513    if (sap == NULL || offset == 0)
2514       return;
2515    dsp = (DenseSegPtr)(sap->segs);
2516    if (which_end == SQN_LEFT)
2517    {
2518       if (dsp->starts[0] != -1 && dsp->starts[1] != -1) /* neither sequence is gapped */
2519       {
2520          dsp->starts[0] -= offset;
2521          if (strand != Seq_strand_minus)
2522             dsp->starts[1] -= offset;
2523          dsp->lens[0] += offset;
2524       } else /* one of the sequences is gapped -> add a new segment */
2525       {
2526          starts = (Int4Ptr)MemNew(2*(dsp->numseg+1)*sizeof(Int4));
2527          lens = (Int4Ptr)MemNew((dsp->numseg+1)*sizeof(Int4));
2528          strands = (Uint1Ptr)MemNew(2*(dsp->numseg+1)*sizeof(Uint1));
2529          AlnMgr2GetNthSeqRangeInSA(sap, 1, &i, &j);
2530          starts[0] = i - offset;
2531          AlnMgr2GetNthSeqRangeInSA(sap, 1, &i, &j);
2532          if (strand == Seq_strand_minus)
2533             starts[1] = j + 1;
2534          else
2535             starts[1] = i - offset;
2536          lens[0] = offset;
2537          strands[0] = Seq_strand_plus;
2538          strands[1] = strand;
2539          for (i=0; i<dsp->numseg; i++)
2540          {
2541            starts[i+1] = dsp->starts[i];
2542            starts[2*(i+1)] = dsp->starts[2*i];
2543            lens[i+1] = dsp->lens[i];
2544            strands[i+1] = dsp->strands[i];
2545            strands[2*(i+1)] = dsp->strands[2*i];
2546          }
2547          dsp->numseg++;
2548          MemFree(dsp->starts);
2549          MemFree(dsp->lens);
2550          MemFree(dsp->strands);
2551          dsp->starts = starts;
2552          dsp->lens = lens;
2553          dsp->strands = strands;
2554       }
2555    } else if (which_end == SQN_RIGHT)
2556    {
2557       if (dsp->starts[2*(dsp->numseg-1)] != -1 && dsp->starts[2*(dsp->numseg-1)+1] != -1)
2558       {
2559          dsp->lens[dsp->numseg-1] += offset;
2560          if (strand == Seq_strand_minus)
2561             dsp->starts[2*(dsp->numseg-1)+1] -= offset;
2562       } else /* one of the sequences is gapped -> add a new segment */
2563       {
2564          starts = (Int4Ptr)MemNew(2*(dsp->numseg+1)*sizeof(Int4));
2565          lens = (Int4Ptr)MemNew((dsp->numseg+1)*sizeof(Int4));
2566          strands = (Uint1Ptr)MemNew(2*(dsp->numseg+1)*sizeof(Uint1));
2567          AlnMgr2GetNthSeqRangeInSA(sap, 1, &i, &j);
2568          starts[2*(dsp->numseg)-1] = i+1;
2569          AlnMgr2GetNthSeqRangeInSA(sap, 2, &i, &j);
2570          if (strand == Seq_strand_minus)
2571            starts[2*(dsp->numseg)] = i - offset;
2572          else
2573            starts[2*(dsp->numseg)] = j + 1;
2574          lens[dsp->numseg] = offset;
2575          strands[2*(dsp->numseg)-1] = Seq_strand_plus;
2576          strands[2*(dsp->numseg)] = strand;
2577          for (i=0; i<dsp->numseg; i++)
2578          {
2579             starts[i] = dsp->starts[i];
2580             starts[2*i] = dsp->starts[2*i];
2581             lens[i] = dsp->lens[i];
2582             strands[i] = dsp->strands[i];
2583             strands[2*i] = dsp->strands[2*i];
2584          }
2585          dsp->numseg++;
2586          MemFree(dsp->starts);
2587          MemFree(dsp->lens);
2588          MemFree(dsp->strands);
2589          dsp->starts = starts;
2590          dsp->lens = lens;
2591          dsp->strands = strands;
2592       }
2593    }
2594    /* free the old index and reindex the alignment */
2595    SAIndex2Free2(sap->saip);
2596    sap->saip = NULL;
2597    AlnMgr2IndexSingleChildSeqAlign(sap);
2598 }
2599 
SQN_ExtendAlnAlg(SeqAlignPtr sap,Int4 ovl,Int4 which_side,Uint1 strand)2600 NLM_EXTERN void SQN_ExtendAlnAlg(SeqAlignPtr sap, Int4 ovl, Int4 which_side, Uint1 strand)
2601 {
2602    BioseqPtr    bsp1;
2603    BioseqPtr    bsp2;
2604    Uint1        buf1[20];
2605    Uint1        buf2[20];
2606    DenseSegPtr  dsp;
2607    DenseSegPtr  dsp_new;
2608    Int4         gap;
2609    Int4         i;
2610    Int4         j;
2611    Boolean      mismatch;
2612    SeqIdPtr     sip1;
2613    SeqIdPtr     sip2;
2614    SeqPortPtr   spp;
2615    Int4         start1;
2616    Int4         start2;
2617    Int4         stop1;
2618    Int4         stop2;
2619 
2620    if (sap == NULL || ovl == 0)
2621       return;
2622    AlnMgr2GetNthSeqRangeInSA(sap, 1, &start1, &stop1);
2623    AlnMgr2GetNthSeqRangeInSA(sap, 2, &start2, &stop2);
2624    sip1 = AlnMgr2GetNthSeqIdPtr(sap, 1);
2625    sip2 = AlnMgr2GetNthSeqIdPtr(sap, 2);
2626    bsp1 = BioseqLockById(sip1);
2627    bsp2 = BioseqLockById(sip2);
2628    if (which_side == SQN_LEFT && (start1<ovl+SQN_MAXGAP || start2<ovl+SQN_MAXGAP))
2629    {
2630       if (start1 < ovl)
2631          ovl = start1;
2632       if (start2 < ovl)
2633          ovl = start2;
2634       if (ovl <= 0) {
2635          SeqIdFree(sip1);
2636          SeqIdFree(sip2);
2637          BioseqUnlock(bsp1);
2638          BioseqUnlock(bsp2);
2639          return;
2640       }
2641       dsp = (DenseSegPtr)(sap->segs);
2642       dsp_new = DenseSegNew();
2643       dsp_new->dim = 2;
2644       dsp_new->numseg = dsp->numseg + 1;
2645       dsp_new->starts = (Int4Ptr)MemNew(2*(dsp_new->numseg)*sizeof(Int4));
2646       dsp_new->lens = (Int4Ptr)MemNew((dsp_new->numseg)*sizeof(Int4));
2647       dsp_new->strands = (Uint1Ptr)MemNew(2*(dsp_new->numseg)*sizeof(Int4));
2648       dsp_new->ids = dsp->ids;
2649       dsp->ids = NULL;
2650       dsp_new->starts[0] = start1-ovl;
2651       dsp_new->starts[1] = start2-ovl;
2652       dsp_new->lens[0] = ovl;
2653       dsp_new->strands[0] = dsp_new->strands[1] = Seq_strand_plus;
2654       for (i=1; i<dsp_new->numseg; i++)
2655       {
2656          dsp_new->lens[i] = dsp->lens[i-1];
2657          dsp_new->starts[i*dsp->dim] = dsp->starts[(i-1)*dsp->dim];
2658          dsp_new->starts[i*dsp->dim+1] = dsp->starts[(i-1)*dsp->dim+1];
2659          dsp_new->strands[i*dsp->dim] = dsp_new->strands[i*dsp->dim+1] = Seq_strand_plus;
2660       }
2661       DenseSegFree(dsp);
2662       sap->segs = (Pointer)(dsp_new);
2663       SAIndex2Free2(sap->saip);
2664       sap->saip = NULL;
2665       AlnMgr2IndexSingleChildSeqAlign(sap);
2666       SeqIdFree(sip1);
2667       SeqIdFree(sip2);
2668       BioseqUnlock(bsp1);
2669       BioseqUnlock(bsp2);
2670       return;
2671    } else if (which_side == SQN_RIGHT && (bsp1->length-1-stop1 < ovl+SQN_MAXGAP || bsp2->length-1-stop2<ovl+SQN_MAXGAP))
2672    {
2673       if (bsp1->length-1-stop1<ovl)
2674          ovl = bsp1->length-1-stop1;
2675       if (bsp2->length-1-stop2<ovl)
2676          ovl = bsp2->length-1-stop2;
2677       if (ovl <= 0) {
2678          SeqIdFree(sip1);
2679          SeqIdFree(sip2);
2680          BioseqUnlock(bsp1);
2681          BioseqUnlock(bsp2);
2682          return;
2683       }
2684       dsp = (DenseSegPtr)(sap->segs);
2685       dsp_new = DenseSegNew();
2686       dsp_new->dim = 2;
2687       dsp_new->numseg = dsp->numseg + 1;
2688       dsp_new->starts = (Int4Ptr)MemNew(2*(dsp_new->numseg)*sizeof(Int4));
2689       dsp_new->lens = (Int4Ptr)MemNew((dsp_new->numseg)*sizeof(Int4));
2690       dsp_new->strands = (Uint1Ptr)MemNew(2*(dsp_new->numseg)*sizeof(Int4));
2691       dsp_new->ids = dsp->ids;
2692       dsp->ids = NULL;
2693       for (i=0; i<dsp->numseg; i++)
2694       {
2695          dsp_new->lens[i] = dsp->lens[i];
2696          dsp_new->starts[i*dsp->dim] = dsp->starts[i*dsp->dim];
2697          dsp_new->starts[i*dsp->dim+1] = dsp->starts[i*dsp->dim+1];
2698          dsp_new->strands[i*dsp->dim] = dsp_new->strands[i*dsp->dim+1] = Seq_strand_plus;
2699       }
2700       dsp_new->lens[dsp_new->numseg-1] = ovl;
2701       dsp_new->starts[(dsp_new->numseg-1)*2] = stop1+1;
2702       dsp_new->starts[(dsp_new->numseg-1)*2+1] = stop2+1;
2703       dsp_new->strands[(dsp_new->numseg-1)*2] = dsp_new->strands[(dsp_new->numseg-1)*2+1] = Seq_strand_plus;
2704       DenseSegFree(dsp);
2705       sap->segs = (Pointer)dsp_new;
2706       SAIndex2Free2(sap->saip);
2707       sap->saip = NULL;
2708       AlnMgr2IndexSingleChildSeqAlign(sap);
2709       SeqIdFree(sip1);
2710       SeqIdFree(sip2);
2711       BioseqUnlock(bsp1);
2712       BioseqUnlock(bsp2);
2713       return;
2714    }
2715    if (which_side == SQN_LEFT)
2716    {
2717       spp = SeqPortNew(bsp1, MAX(0, start1-(SQN_MAXGAP+ovl)), start1-1, Seq_strand_plus, Seq_code_ncbi4na);
2718       SeqPortRead(spp, buf1, 20);
2719       SeqPortFree(spp);
2720       if (strand == Seq_strand_minus)
2721       {
2722          spp = SeqPortNew(bsp2, stop2+1, stop2 + 1 + ovl, strand, Seq_code_ncbi4na);
2723          SeqPortRead(spp, buf2, 20);
2724          SeqPortFree(spp);
2725       } else
2726       {
2727          spp = SeqPortNew(bsp2, start2-1-ovl, start2-1, strand, Seq_code_ncbi4na);
2728          SeqPortRead(spp, buf2, 20);
2729          SeqPortFree(spp);
2730       }
2731       gap = -1;
2732       for (i=0; i<SQN_MAXGAP; i++)
2733       {
2734          mismatch = FALSE;
2735          for (j=0; j<ovl; j++)
2736          {
2737             if (buf2[j] != buf1[i+j])
2738                mismatch = TRUE;
2739          }
2740          if (mismatch == FALSE)
2741             gap = SQN_MAXGAP-i;
2742       }
2743       if (gap > 0)
2744       {
2745          dsp = (DenseSegPtr)(sap->segs);
2746          dsp_new = DenseSegNew();
2747          dsp_new->dim = 2;
2748          dsp_new->numseg = dsp->numseg+2;
2749          dsp_new->ids = dsp->ids;
2750          dsp->ids = NULL;
2751          dsp_new->starts = (Int4Ptr)MemNew((dsp_new->numseg)*2*sizeof(Int4));
2752          dsp_new->lens = (Int4Ptr)MemNew((dsp_new->numseg)*sizeof(Int4));
2753          dsp_new->strands = (Uint1Ptr)MemNew((dsp_new->numseg)*2*sizeof(Uint1));
2754          for (i=2; i<dsp_new->numseg; i++)
2755          {
2756             dsp_new->starts[i*2] = dsp->starts[(i-2)*2];
2757             dsp_new->starts[i*2+1] = dsp->starts[(i-2)*2+1];
2758             dsp_new->strands[i+2] = dsp->strands[(i-2)*2];
2759             dsp_new->strands[i*2+1] = dsp->strands[(i-2)*2+1];
2760             dsp_new->lens[i] = dsp->lens[i-2];
2761          }
2762          dsp_new->starts[0] = dsp->starts[0] - gap - ovl;
2763          dsp_new->starts[2] = dsp_new->starts[0] + ovl;
2764          dsp_new->starts[3] = -1;
2765          dsp_new->strands[0] = dsp_new->strands[2] = dsp->strands[0];
2766          dsp_new->strands[1] = dsp_new->strands[2] = dsp->strands[1];
2767          dsp_new->lens[0] = ovl;
2768          dsp_new->lens[1] = gap;
2769          if (strand == Seq_strand_minus)
2770             dsp_new->starts[1] = stop2 + 1;
2771          else
2772             dsp_new->starts[1] = start2 - ovl;
2773          sap->segs = (Pointer)dsp_new;
2774          DenseSegFree(dsp);
2775          SAIndex2Free2(sap->saip);
2776          sap->saip = NULL;
2777          AlnMgr2IndexSingleChildSeqAlign(sap);
2778       } else
2779          SQN_AddToAln(sap, ovl, SQN_LEFT, strand);
2780    } else if (which_side == SQN_RIGHT)
2781    {
2782       spp = SeqPortNew(bsp1, MIN(stop1, bsp1->length-1), MIN(stop1+SQN_MAXGAP, bsp1->length-1), Seq_strand_plus, Seq_code_ncbi4na);
2783       SeqPortRead(spp, buf1, 20);
2784       SeqPortFree(spp);
2785       if (strand == Seq_strand_minus)
2786       {
2787          spp = SeqPortNew(bsp2, start2-ovl, start2-1-ovl, strand, Seq_code_ncbi4na);
2788          SeqPortRead(spp, buf2, 20);
2789          SeqPortFree(spp);
2790       } else
2791       {
2792          spp = SeqPortNew(bsp2, MIN(stop2+1, bsp2->length-1), MIN(stop2+1+ovl, bsp2->length-1), strand, Seq_code_ncbi4na);
2793          SeqPortRead(spp, buf2, 20);
2794          SeqPortFree(spp);
2795       }
2796       gap = -1;
2797       for (i=0; i<SQN_MAXGAP; i++)
2798       {
2799          mismatch = FALSE;
2800          for (j=0; j<ovl; j++)
2801          {
2802             if (buf1[i+j] != buf1[j])
2803                mismatch = TRUE;
2804          }
2805          if (mismatch == FALSE && gap == -1)
2806             gap = i;
2807       }
2808       if (gap > 0)
2809       {
2810          dsp = (DenseSegPtr)(sap->segs);
2811          dsp_new = DenseSegNew();
2812          dsp_new->dim = 2;
2813          dsp_new->numseg = dsp->numseg+2;
2814          dsp_new->ids = dsp->ids;
2815          dsp->ids = NULL;
2816          dsp_new->starts = (Int4Ptr)MemNew((dsp_new->numseg)*2*sizeof(Int4));
2817          dsp_new->lens = (Int4Ptr)MemNew((dsp_new->numseg)*sizeof(Int4));
2818          dsp_new->strands = (Uint1Ptr)MemNew((dsp_new->numseg)*2*sizeof(Uint1));
2819          for (i=0; i<dsp->numseg; i++)
2820          {
2821             dsp_new->starts[i*2] = dsp->starts[i*2];
2822             dsp_new->starts[i*2+1] = dsp->starts[i*2+1];
2823             dsp_new->strands[i+2] = dsp->strands[i*2];
2824             dsp_new->strands[i*2+1] = dsp->strands[i*2+1];
2825             dsp_new->lens[i] = dsp->lens[i];
2826          }
2827          dsp_new->strands[2*dsp->numseg] = dsp_new->strands[2*dsp->numseg+2] = dsp->strands[0];
2828          dsp_new->strands[2*dsp->numseg+1] = dsp_new->strands[2*dsp->numseg+3] = dsp->strands[1];
2829          dsp_new->lens[dsp->numseg] = gap;
2830          dsp_new->lens[dsp->numseg+1] = ovl;
2831          dsp_new->starts[2*dsp->numseg] = dsp_new->starts[2*(dsp->numseg-1)]+dsp_new->lens[dsp->numseg-1];
2832          dsp_new->starts[2*dsp->numseg+2] = dsp_new->starts[2*dsp->numseg] + gap;
2833          if (strand == Seq_strand_minus)
2834             dsp_new->starts[2*dsp->numseg+3] = start2 - ovl;
2835          else
2836             dsp_new->starts[2*dsp->numseg+3] = stop2 + 1;
2837          dsp_new->starts[2*dsp->numseg+1] = -1;
2838          sap->segs = (Pointer)dsp_new;
2839          DenseSegFree(dsp);
2840          SAIndex2Free2(sap->saip);
2841          sap->saip = NULL;
2842          AlnMgr2IndexSingleChildSeqAlign(sap);
2843       } else
2844          SQN_AddToAln(sap, ovl, SQN_RIGHT, strand);
2845    }
2846    SeqIdFree(sip1);
2847    SeqIdFree(sip2);
2848    BioseqUnlock(bsp1);
2849    BioseqUnlock(bsp2);
2850 }
2851 
sqn_binary_search_on_uint4_list(Uint4Ptr list,Uint4 pos,Uint4 listlen)2852 static Uint4 sqn_binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
2853 {
2854    Uint4  L;
2855    Uint4  mid;
2856    Uint4  R;
2857 
2858    if (list == NULL || listlen == 0)
2859       return 0;
2860    L = 0;
2861    R = listlen - 1;
2862    while (L < R)
2863    {
2864       mid = (L+R)/2;
2865       if (list[mid + 1] <= pos)
2866       {
2867          L = mid + 1;
2868       } else
2869       {
2870          R = mid;
2871       }
2872    }
2873    return R;
2874 }
2875 
MapRowCoordsSpecial(SeqAlignPtr sap,Uint4 pos,Int4 row,Int4 which_end)2876 static Int4 MapRowCoordsSpecial(SeqAlignPtr sap, Uint4 pos, Int4 row, Int4 which_end)
2877 {
2878    DenseSegPtr  dsp;
2879    Int4         idx;
2880    Int4         offset;
2881    SAIndex2Ptr   saip;
2882    Int4         start;
2883 
2884    if (sap == NULL || row < 0)
2885       return -1;
2886    if (sap->saip == NULL)
2887       return -1;
2888    saip = (SAIndex2Ptr)sap->saip;
2889    dsp = (DenseSegPtr)sap->segs;
2890    start = sqn_binary_search_on_uint4_list(saip->aligncoords, pos, dsp->numseg);
2891    offset = pos - saip->aligncoords[start];
2892    idx = (dsp->dim*start) + row - 1;
2893    if (dsp->starts[idx] == -1)
2894    {
2895       if (which_end == SQN_RIGHT)
2896       {
2897          /* round down */
2898          while (start >= 0) {
2899             idx = (dsp->dim*start) + row - 1;
2900             if (dsp->starts[idx] != -1)
2901                return (dsp->starts[idx] + dsp->lens[start] - 1);
2902             start--;
2903          }
2904          return -2;
2905       } else if (which_end == SQN_LEFT)
2906       {
2907          /* round up */
2908          while (start < dsp->numseg) {
2909             idx = (dsp->dim*start) + row - 1;
2910             if (dsp->starts[idx] != -1)
2911                return (dsp->starts[idx]);
2912             start++;
2913          }
2914          return -2;
2915       }
2916    } else
2917    {
2918       idx = (dsp->dim*start) + row - 1;
2919       if (dsp->strands[idx] != Seq_strand_minus)
2920          return (dsp->starts[idx] + offset);
2921       else
2922          return (dsp->starts[idx] + dsp->lens[start] - 1 - offset);
2923    }
2924    return -1;
2925 }
2926 
MapBioseqToBioseqSpecial(SeqAlignPtr sap,Int4 begin,Int4 fin,Int4 pos,Int4 which_end)2927 static Int4 MapBioseqToBioseqSpecial(SeqAlignPtr sap, Int4 begin, Int4 fin, Int4 pos, Int4 which_end)
2928 {
2929    Int4  bspos;
2930    Int4  sapos;
2931    Int4  start1;
2932    Int4  start2;
2933    Int4  stop1;
2934    Int4  stop2;
2935 
2936    if (sap == NULL || sap->saip == NULL)
2937       return -2;
2938    AlnMgr2GetNthSeqRangeInSA(sap, begin, &start1, &stop1);
2939    AlnMgr2GetNthSeqRangeInSA(sap, fin, &start2, &stop2);
2940    /* check to see whether the position is outside the alignment */
2941    if (pos < start1)
2942       return (start2 - (start1 - pos));
2943    else if (pos > stop1)
2944       return (stop2 + (pos-stop1));
2945    sapos = AlnMgr2MapBioseqToSeqAlign(sap, pos, begin);
2946    bspos = MapRowCoordsSpecial(sap, sapos, fin, which_end);
2947    if (bspos >= 0)
2948       return bspos;
2949    else if (which_end == SQN_LEFT)
2950       return (start2-1);
2951    else if (which_end == SQN_RIGHT)
2952       return (stop2+1);
2953    else
2954       return 0;
2955 }
2956 
SPI_flip_sa_list(SeqAlignPtr sap)2957 static void SPI_flip_sa_list (SeqAlignPtr sap)
2958 {
2959    DenseSegPtr  dsp;
2960    Int4         i;
2961    SeqIdPtr     sip;
2962    SeqIdPtr     sip_next;
2963    Int4         tmp_start;
2964    Uint1        tmp_strand;
2965 
2966    if (sap == NULL || sap->segtype != SAS_DENSEG)
2967       return;
2968    while (sap != NULL)
2969    {
2970       dsp = (DenseSegPtr)(sap->segs);
2971       if (dsp->dim == 2) /* skip anything with more than 2 rows */
2972       {
2973          /* first switch the ids */
2974          sip = dsp->ids;
2975          sip_next = sip->next;
2976          sip_next->next = sip;
2977          sip->next = NULL;
2978          dsp->ids = sip_next;
2979          /* then switch the starts and strands */
2980          for (i = 0; i<dsp->numseg; i++)
2981          {
2982             tmp_start = dsp->starts[2*i];
2983             dsp->starts[2*i] = dsp->starts[2*i+1];
2984             dsp->starts[2*i+1] = tmp_start;
2985             tmp_strand = dsp->strands[2*i];
2986             dsp->strands[2*i] = dsp->strands[2*i+1];
2987             dsp->strands[2*i+1] = tmp_strand;
2988          }
2989       }
2990       if (sap->saip != NULL) /* free indexes, reindex */
2991       {
2992          SAIndex2Free2(sap->saip);
2993          sap->saip = NULL;
2994          AlnMgr2IndexSingleChildSeqAlign(sap);
2995       }
2996       sap = sap->next;
2997    }
2998 }
2999 
ReverseBioseqFeatureStrands(BioseqPtr bsp)3000 NLM_EXTERN void ReverseBioseqFeatureStrands (BioseqPtr bsp)
3001 {
3002    Uint2                entityID;
3003    SeqMgrFeatContext    context;
3004    SeqFeatPtr           sfp;
3005    SeqIdPtr             sip;
3006    SeqLocPtr            slp;
3007    CdRegionPtr          crp;
3008    CodeBreakPtr         cbp;
3009    RnaRefPtr            rrp;
3010    tRNAPtr              trp;
3011    Uint1                strand = Seq_strand_minus;
3012 
3013    if (bsp == NULL) return;
3014 
3015    entityID = bsp->idx.entityID;
3016    if (entityID == 0)
3017    {
3018      entityID = ObjMgrGetEntityIDForPointer (bsp);
3019    }
3020    if (! SeqMgrFeaturesAreIndexed (entityID)) {
3021      SeqMgrIndexFeatures (entityID, NULL);
3022    }
3023    sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
3024    while (sfp != NULL) {
3025       sip = SeqLocId (sfp->location);
3026       slp = SeqLocCopyRegion (sip, sfp->location, bsp, 0, bsp->length - 1, strand, FALSE);
3027       sfp->location = SeqLocFree (sfp->location);
3028       sfp->location = slp;
3029       switch (sfp->data.choice) {
3030          case SEQFEAT_CDREGION :
3031            crp = (CdRegionPtr) sfp->data.value.ptrvalue;
3032            if (crp != NULL) {
3033              for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
3034                sip = SeqLocId (cbp->loc);
3035                slp = SeqLocCopyRegion (sip, cbp->loc, bsp, 0, bsp->length - 1, strand, FALSE);
3036                cbp->loc = SeqLocFree (cbp->loc);
3037                cbp->loc = slp;
3038              }
3039            }
3040            break;
3041          case SEQFEAT_RNA :
3042            rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
3043            if (rrp != NULL && rrp->ext.choice == 2) {
3044              trp = (tRNAPtr) rrp->ext.value.ptrvalue;
3045              if (trp != NULL && trp->anticodon != NULL) {
3046                sip = SeqLocId (trp->anticodon);
3047                slp = SeqLocCopyRegion (sip, trp->anticodon, bsp, 0, bsp->length - 1, strand, FALSE);
3048                trp->anticodon = SeqLocFree (trp->anticodon);
3049                trp->anticodon = slp;
3050              }
3051            }
3052            break;
3053          default :
3054            break;
3055       }
3056       sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context);
3057    }
3058    SeqMgrClearFeatureIndexes (entityID, NULL);
3059    SeqMgrIndexFeatures (entityID, NULL);
3060 }
3061 
GetOldBlastAlignment(BioseqPtr bsp1,BioseqPtr bsp2)3062 static SeqAlignPtr LIBCALLBACK GetOldBlastAlignment (BioseqPtr bsp1, BioseqPtr bsp2)
3063 {
3064    BLAST_OptionsBlkPtr  options;
3065    CharPtr              program = "blastn";
3066    SeqAlignPtr          sap;
3067 
3068    if (ISA_aa (bsp1->mol)) {
3069      program = "blastp";
3070    }
3071    options = BLASTOptionNew(program, TRUE);
3072    options->gapped_calculation = TRUE;
3073    options->expect_value = 0.001;
3074    if (bsp1->length > 10000 || bsp2->length > 10000)
3075    {
3076       options->expect_value = act_get_eval(60);
3077       options->wordsize = 20;
3078       options->filter_string = StringSave ("m L");
3079    }
3080    sap = BlastTwoSequences(bsp1, bsp2, program, options);
3081    BLASTOptionDelete(options);
3082    return sap;
3083 }
3084 
GetOldBlastAlignmentPiece(SeqLocPtr slp1,SeqLocPtr slp2)3085 static SeqAlignPtr LIBCALLBACK GetOldBlastAlignmentPiece (SeqLocPtr slp1, SeqLocPtr slp2)
3086 {
3087    BioseqPtr            bsp1, bsp2;
3088    BLAST_OptionsBlkPtr  options;
3089    CharPtr              program;
3090    SeqAlignPtr          sap = NULL;
3091 
3092    if (slp1 == NULL || slp2 == NULL)
3093    {
3094       return NULL;
3095    }
3096 
3097    bsp1 = BioseqFindFromSeqLoc (slp1);
3098    bsp2 = BioseqFindFromSeqLoc (slp2);
3099 
3100    if (bsp1 == NULL || bsp2 == NULL)
3101    {
3102       return NULL;
3103    }
3104 
3105    if (ISA_aa(bsp1->mol))
3106    {
3107       if (ISA_aa(bsp2->mol))
3108          program = StringSave("blastp");
3109       else
3110          return NULL;
3111    } else if (ISA_na(bsp1->mol))
3112    {
3113       if (ISA_na(bsp2->mol))
3114          program = StringSave("blastn");
3115       else
3116          return NULL;
3117    }
3118    options = BLASTOptionNew(program, TRUE);
3119    options->gapped_calculation = TRUE;
3120    options->expect_value = 10;
3121    options->gap_x_dropoff_final = 100;
3122    options->gap_open = 4;
3123    options->gap_extend = 1;
3124    options->penalty = -1;
3125    options->wordsize = 7;
3126    sap = BlastTwoSequencesByLoc(slp1, slp2, program, options);
3127    BLASTOptionDelete(options);
3128    MemFree(program);
3129    return sap;
3130 }
3131 
DumpAlignments(SeqAlignPtr salp)3132 static void DumpAlignments (SeqAlignPtr salp)
3133 {
3134   AsnIoPtr aip;
3135 
3136   aip = AsnIoOpen("tmp.txt", "w");
3137   while (salp != NULL) {
3138     SeqAlignAsnWrite (salp, aip, NULL);
3139     salp = salp->next;
3140   }
3141   AsnIoClose (aip);
3142 }
3143 
3144 
3145 NLM_EXTERN SeqAlignPtr
Sqn_GlobalAlign2SeqEx(BioseqPtr bsp1,BioseqPtr bsp2,BoolPtr revcomp,GetAlignmentFunc aln_func,GetAlignmentPieceFunc aln_piece_func,Boolean extend_ends)3146 Sqn_GlobalAlign2SeqEx
3147 (BioseqPtr bsp1,
3148  BioseqPtr bsp2,
3149  BoolPtr revcomp,
3150  GetAlignmentFunc aln_func,
3151  GetAlignmentPieceFunc aln_piece_func,
3152  Boolean extend_ends)
3153 {
3154    AMAlignIndex2Ptr     amaip;
3155    Int4                 i;
3156    SeqAlignPtr          sap;
3157    SeqAlignPtr          sap_final;
3158    SeqAlignPtr          sap_head;
3159    SeqAlignPtr          sap_new;
3160    SeqAlignPtr          sap_prev;
3161    Int4                 start1;
3162    Int4                 start2;
3163    Int4                 stop1;
3164    Int4                 stop2;
3165    Uint1                strand;
3166    Int4                 extnd = 20;
3167    Int4                 spin_fuzz = 20;
3168 
3169    if (bsp1 == NULL || bsp2 == NULL)
3170       return NULL;
3171 
3172    sap = aln_func (bsp1, bsp2);
3173    if (sap == NULL)
3174    {
3175       ErrPostEx (SEV_ERROR, 0, 0, "BLAST finds no sequence similarity");
3176       return NULL;
3177    }
3178 /*   DumpAlignments(sap);  */
3179 
3180    AlnMgr2IndexLite(sap);
3181    ACT_RemoveInconsistentAlnsFromSet(sap, spin_fuzz, 1);
3182    amaip = (AMAlignIndex2Ptr)(sap->saip);
3183    AlnMgr2SortAlnSetByNthRowPos(sap, 1);
3184    ACT_GetNthSeqRangeInSASet(sap, 1, &start1, &stop1);
3185    ACT_GetNthSeqRangeInSASet(sap, 2, &start2, &stop2);
3186    strand = AlnMgr2GetNthStrand(amaip->saps[0], 2);
3187 
3188    /* if opposite strand submitted, reverse complement, realign */
3189 
3190    if (strand == Seq_strand_minus) {
3191      if (revcomp != NULL) {
3192        *revcomp = TRUE;
3193      }
3194      AMAlignIndexFreeEitherIndex (sap);
3195      sap = SeqAlignFree (sap);
3196      BioseqRevComp (bsp2);
3197      ReverseBioseqFeatureStrands (bsp2);
3198      sap = aln_func (bsp1, bsp2);
3199      if (sap == NULL)
3200      {
3201         ErrPostEx (SEV_ERROR, 0, 0, "BLAST finds no sequence similarity in reverse complement");
3202         return NULL;
3203      }
3204      AlnMgr2IndexLite(sap);
3205      ACT_RemoveInconsistentAlnsFromSet(sap, spin_fuzz, 1);
3206      amaip = (AMAlignIndex2Ptr)(sap->saip);
3207      AlnMgr2SortAlnSetByNthRowPos(sap, 1);
3208      ACT_GetNthSeqRangeInSASet(sap, 1, &start1, &stop1);
3209      ACT_GetNthSeqRangeInSASet(sap, 2, &start2, &stop2);
3210      strand = AlnMgr2GetNthStrand(amaip->saps[0], 2);
3211    }
3212 
3213    if (extend_ends) {
3214       /* done with any reverse complementing and reblasting, now extend frayed ends */
3215 
3216       sap_head = NULL;
3217 
3218       if (start1 > 6 && start1 < extnd)
3219           sap_head = sap_prev = ACT_FindPiece(bsp1, bsp2, MAX(start1-SQN_WINDOW, 0), start1, MAX(start1-SQN_WINDOW, 0), start2, strand, SQN_LEFT, aln_piece_func);
3220       else if (start1 > 0 && start1 < extnd)
3221           SQN_ExtendAlnAlg(amaip->saps[0], start1, SQN_LEFT, Seq_strand_plus);
3222       ACT_GetNthSeqRangeInSASet(sap, 1, &start1, &stop1);
3223       ACT_GetNthSeqRangeInSASet(sap, 2, &start2, &stop2);
3224       if (start2 > 6 && start2 < extnd)
3225       {
3226           sap_new = ACT_FindPiece(bsp2, bsp1, MAX(start2-SQN_WINDOW, 0), start2, MAX(start1-SQN_WINDOW, 0), start1, strand, SQN_LEFT, aln_piece_func);
3227           if (sap_new != NULL)
3228             SPI_flip_sa_list(sap_new);
3229           if (sap_head != NULL)
3230           {
3231             sap_prev->next = sap_new;
3232             sap_prev = sap_new;
3233           } else
3234             sap_head = sap_prev = sap_new;
3235       } else if (start2 > 0 && start2 < extnd)
3236           SQN_ExtendAlnAlg(amaip->saps[0], start2, SQN_LEFT, Seq_strand_plus);
3237       for (i=0; i<amaip->numsaps-1; i++)
3238       {
3239           AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, NULL, &start1);
3240           AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 1, &stop1, NULL);
3241           if (strand != Seq_strand_minus)
3242           {
3243             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, NULL, &start2);
3244             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 2, &stop2, NULL);
3245           } else
3246           {
3247             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &stop2, NULL);
3248             AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 2, NULL, &start2);
3249           }
3250           sap_new = ACT_FindPiece(bsp1, bsp2, start1, stop1, start2, stop2, strand, SQN_MIDDLE, aln_piece_func);
3251           if (sap_head)
3252           {
3253             sap_prev->next = sap_new;
3254             if (sap_new != NULL)
3255                 sap_prev = sap_new;
3256           } else
3257             sap_head = sap_prev = sap_new;
3258       }
3259       ACT_GetNthSeqRangeInSASet(sap, 1, &start1, &stop1);
3260       ACT_GetNthSeqRangeInSASet(sap, 2, &start2, &stop2);
3261       if (bsp1->length-stop1 > 6 && bsp1->length-stop1 < extnd)
3262       {
3263           sap_new = ACT_FindPiece(bsp1, bsp2, stop1, MIN(bsp1->length-1, stop1 + SQN_WINDOW), stop2, MIN(bsp2->length-1, stop2+SQN_WINDOW), strand, SQN_RIGHT, aln_piece_func);
3264           if (sap_new != NULL)
3265           {
3266             if (sap_head != NULL)
3267             {
3268                 sap_prev->next = sap_new;
3269                 sap_prev = sap_new;
3270             } else
3271               sap_head = sap_prev = sap_new;
3272           }
3273       } else if (bsp1->length-stop1 > 0 && bsp1->length-stop1 < extnd)
3274           SQN_ExtendAlnAlg(amaip->saps[amaip->numsaps-1], bsp1->length-stop1, SQN_RIGHT, Seq_strand_plus);
3275       ACT_GetNthSeqRangeInSASet(sap, 1, &start1, &stop1);
3276       ACT_GetNthSeqRangeInSASet(sap, 2, &start2, &stop2);
3277       if (bsp2->length-stop2 > 6 && bsp2->length-stop2 < extnd)
3278       {
3279           sap_new = ACT_FindPiece(bsp2, bsp1, stop2, MIN(bsp2->length-1, stop2 + SQN_WINDOW), stop1, MIN(bsp1->length-1, stop1+SQN_WINDOW), strand, SQN_RIGHT, aln_piece_func);
3280           if (sap_new != NULL)
3281           {
3282             SPI_flip_sa_list(sap_new);
3283             if (sap_head != NULL)
3284             {
3285                 sap_prev->next = sap_new;
3286                 sap_prev = sap_new;
3287             } else
3288               sap_head = sap_prev = sap_new;
3289           }
3290       } else if (bsp2->length-stop2 > 0 && bsp2->length-stop2 < extnd)
3291           SQN_ExtendAlnAlg(amaip->saps[amaip->numsaps-1], bsp2->length-stop2, SQN_RIGHT, Seq_strand_plus);
3292       sap_new = (SeqAlignPtr)(sap->segs);
3293       while (sap_new->next != NULL)
3294       {
3295           sap_new = sap_new->next;
3296       }
3297       sap_new->next = sap_head;  /* put the new alignments in the original set */
3298    }
3299 
3300    AMAlignIndexFreeEitherIndex (sap);
3301    AlnMgr2IndexLite(sap);
3302    ACT_RemoveInconsistentAlnsFromSet(sap, spin_fuzz, 1);
3303    amaip = (AMAlignIndex2Ptr)(sap->saip);
3304 
3305    AMAlignIndex2Free2(amaip);
3306    sap->saip = NULL;
3307    AlnMgr2IndexLite(sap);  /* reindex the alignments */
3308    sap_final = ACT_CleanUpAlignments(sap, bsp1->length, bsp2->length);
3309    return sap_final;
3310 
3311 }
3312 
Sqn_GlobalAlign2Seq(BioseqPtr bsp1,BioseqPtr bsp2,BoolPtr revcomp)3313 NLM_EXTERN SeqAlignPtr Sqn_GlobalAlign2Seq (BioseqPtr bsp1, BioseqPtr bsp2, BoolPtr revcomp)
3314 {
3315   return Sqn_GlobalAlign2SeqEx (bsp1, bsp2, revcomp, GetOldBlastAlignment, GetOldBlastAlignmentPiece, TRUE);
3316 }
3317 
3318 
3319 
3320 /***************************************************************************
3321 *
3322 *   AlnMgrTruncateSAP truncates a given seqalign to contain only the
3323 *   bioseq coordinates from start to stop on the indicated row.  Anything
3324 *   before those coordinates is discarded; anything remaining afterwards
3325 *   is made into another seqalign and put in sap->next (the original next,
3326 *   if any, is now at sap->next->next).  Doesn't work on parent seqaligns.
3327 *   The function returns TRUE if the orignal alignment extended past stop.
3328 *
3329 ***************************************************************************/
AlnMgr2TruncateSAP(SeqAlignPtr sap,Int4 start,Int4 stop,Int4 row)3330 NLM_EXTERN Boolean AlnMgr2TruncateSAP(SeqAlignPtr sap, Int4 start, Int4 stop, Int4 row)
3331 {
3332    DenseDiagPtr  ddp;
3333    DenseDiagPtr  ddp2;
3334    DenseSegPtr   dsp;
3335    Int4          from;
3336    Int4          i;
3337    Int4          mstart;
3338    Int4          mstop;
3339    SeqAlignPtr   sap1;
3340    SeqAlignPtr   sap2;
3341    Int4          tmp;
3342    Int4          to;
3343 
3344    if (sap == NULL || stop<start || row < 1)
3345       return FALSE;
3346    if (sap->segtype == SAS_DENSEG)
3347    {
3348       if (sap->saip == NULL)
3349          AlnMgr2IndexSingleChildSeqAlign(sap);
3350       AlnMgr2GetNthSeqRangeInSA(sap, row, &mstart, &mstop);
3351       if (mstart > start || mstop < stop)
3352          return FALSE;
3353       if (mstart == start)
3354       {
3355          if (mstop == stop)
3356             return FALSE;
3357          else if (mstop > stop)
3358          {
3359             from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
3360             to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
3361             if (to < from)
3362             {
3363                tmp = to;
3364                to = from;
3365                from = tmp;
3366             }
3367             sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, FALSE);
3368             AlnMgr2IndexSingleChildSeqAlign(sap1);
3369             from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
3370             if (from < 0)
3371                return FALSE;
3372             to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
3373             if (to < from)
3374             {
3375                tmp = to;
3376                to = from;
3377                from = tmp;
3378             }
3379             sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, FALSE);
3380             sap2->next = sap->next;
3381             sap->next = sap2;
3382             dsp = sap->segs;
3383             sap->segs = (Pointer)(sap1->segs);
3384             sap1->segs = NULL;
3385             DenseSegFree(dsp);
3386             SeqAlignFree(sap1);
3387             AlnMgr2IndexSingleChildSeqAlign(sap);
3388             AlnMgr2IndexSingleChildSeqAlign(sap2);
3389             return TRUE;
3390          }
3391       } else if (mstart < start) /* throw away the first part */
3392       {
3393          from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
3394          to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
3395          if (to < from)
3396          {
3397             tmp = to;
3398             to = from;
3399             from = tmp;
3400          }
3401          sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, FALSE);
3402          if (mstop == stop) /* done */
3403          {
3404             dsp = sap->segs;
3405             sap->segs = (Pointer)(sap1->segs);
3406             sap1->segs = NULL;
3407             DenseSegFree(dsp);
3408             SeqAlignFree(sap1);
3409             AlnMgr2IndexSingleChildSeqAlign(sap);
3410             return TRUE;
3411          } else if (mstop > stop)
3412          {
3413             from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
3414             if (from < 0)
3415                return FALSE;
3416             to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
3417             if (to < from)
3418             {
3419                tmp = to;
3420                to = from;
3421                from = tmp;
3422             }
3423             sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, FALSE);
3424             sap2->next = sap->next;
3425             sap->next = sap2;
3426             AlnMgr2IndexSingleChildSeqAlign(sap2);
3427             dsp = sap->segs;
3428             sap->segs = (Pointer)(sap1->segs);
3429             sap1->segs = NULL;
3430             DenseSegFree(dsp);
3431             SeqAlignFree(sap1);
3432             AlnMgr2IndexSingleChildSeqAlign(sap);
3433             return TRUE;
3434          }
3435       }
3436    } else if (sap->segtype == SAS_DENDIAG)
3437    {
3438       ddp = (DenseDiagPtr)(sap->segs);
3439       if (ddp->dim < row)
3440          return FALSE;
3441       mstart = ddp->starts[row-1];
3442       mstop = mstart + ddp->len - 1;
3443       if (mstart > start || mstop < stop)
3444          return FALSE;
3445       if (mstart == start)
3446       {
3447          if (mstop == stop)
3448             return FALSE;
3449          else if (mstop > stop)
3450          {
3451             ddp2 = DenseDiagNew();
3452             ddp2->dim = ddp->dim;
3453             ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
3454             ddp2->id = SeqIdDupList(ddp->id);
3455             ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
3456             ddp2->scores = ScoreDup(ddp->scores);
3457             for (i=0; i<ddp->dim; i++)
3458             {
3459                ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
3460                ddp2->strands[i] = ddp->strands[i];
3461             }
3462             ddp2->len = mstop - stop;
3463             ddp->len = ddp->len - (mstop - stop);
3464             sap2 = SeqAlignNew();
3465             sap2->type = SAT_PARTIAL;
3466             sap2->segtype = SAS_DENSEG;
3467             sap2->segs = (Pointer)ddp2;
3468             sap2->next = sap->next;
3469             sap->next = sap2;
3470             AlnMgr2IndexSingleChildSeqAlign(sap2);
3471             return TRUE;
3472          }
3473       } else if (mstart < start)
3474       {
3475          for (i=0; i<ddp->dim; i++)
3476          {
3477             ddp->starts[i] = ddp->starts[i] + start - mstart;
3478          }
3479          ddp->len = ddp->len - (start - mstart);
3480          AlnMgr2IndexSingleChildSeqAlign(sap);
3481          if (mstop == stop)
3482             return FALSE;
3483          else if (mstop > stop)
3484          {
3485             ddp2 = DenseDiagNew();
3486             ddp2->dim = ddp->dim;
3487             ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
3488             ddp2->id = SeqIdDupList(ddp->id);
3489             ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
3490             ddp2->scores = ScoreDup(ddp->scores);
3491             for (i=0; i<ddp->dim; i++)
3492             {
3493                ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
3494                ddp2->strands[i] = ddp->strands[i];
3495             }
3496             ddp2->len = mstop - stop;
3497             ddp->len = ddp->len - (mstop - stop);
3498             sap2 = SeqAlignNew();
3499             sap2->type = SAT_PARTIAL;
3500             sap2->segtype = SAS_DENSEG;
3501             sap2->segs = (Pointer)ddp2;
3502             sap2->next = sap->next;
3503             sap->next = sap2;
3504             AlnMgr2IndexSingleChildSeqAlign(sap2);
3505             return TRUE;
3506          }
3507       }
3508    } else
3509       return FALSE;
3510    return FALSE;
3511 }
3512 
3513