1 static char const rcsid[] = "$Id: seg.c,v 6.19 2003/05/30 17:25:38 coulouri Exp $";
2 
3 /*
4 * ===========================================================================
5 *
6 *                            PUBLIC DOMAIN NOTICE
7 *               National Center for Biotechnology Information
8 *
9 *  This software/database is a "United States Government Work" under the
10 *  terms of the United States Copyright Act.  It was written as part of
11 *  the author's offical duties as a United States Government employee and
12 *  thus cannot be copyrighted.  This software/database is freely available
13 *  to the public for use. The National Library of Medicine and the U.S.
14 *  Government have not placed any restriction on its use or reproduction.
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 *  Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================*/
27 
28 
29 /*--------------------------------------------------------------------------*/
30 
31 #include "seg.h"
32 #include "lnfac.h"
33 
34 static char _this_module[] = "seg";
35 #undef THIS_MODULE
36 #define THIS_MODULE _this_module
37 
38 /*------------------------------------------------------(local functions)---*/
39 
40 SequencePtr seq_phase(SequencePtr seq, Int4 phase, Int4 period);
41 SegPtr per_mergesegs(SequencePtr seq, SegPtr *persegs);
42 FloatHiPtr seqent(SequencePtr seq, Int4 window, Int4 maxbogus);
43 
44 SequencePtr openwin(SequencePtr seq, Int4 start, Int4 length);
45 Int4 shiftwin1(SequencePtr win);
46 void closewin(SequencePtr win);
47 void compon(SequencePtr win);
48 void stateon(SequencePtr win);
49 void enton(SequencePtr win);
50 FloatHi entropy(Int4Ptr sv);
51 static int LIBCALLBACK state_cmp(VoidPtr s1, VoidPtr s2);
52 Int4 findlo(Int4 i, Int4 limit, FloatHi hicut, FloatHiPtr H);
53 Int4 findhi(Int4 i, Int4 limit, FloatHi hicut, FloatHiPtr H);
54 void trim(SequencePtr seq, Int4Ptr leftend, Int4Ptr rightend,
55           SegParamsPtr sparamsp);
56 FloatHi getprob(Int4Ptr sv, Int4 total, AlphaPtr palpha);
57 FloatHi lnperm(Int4Ptr sv, Int4 tot);
58 FloatHi lnass(Int4Ptr sv, Int4 alphasize);
59 void mergesegs(SequencePtr seq, SegPtr segs, Boolean overlap);
60 void decrementsv(Int4Ptr sv, Int4 class);
61 void incrementsv(Int4Ptr sv, Int4 class);
62 static void appendseg(SegPtr segs, SegPtr seg);
63 static Boolean hasdash(SequencePtr win);
64 AlphaPtr AA20alpha (void);
65 AlphaPtr NA4alpha (void);
66 void AlphaFree(AlphaPtr palpha);
67 AlphaPtr AlphaCopy(AlphaPtr palpha);
68 
69 SeqLocPtr SegsToSeqLoc(BioseqPtr bsp, SegPtr segs);
70 SeqLocPtr SeqlocSegsToSeqLoc(SeqLocPtr slp, SegPtr segs);
71 
72 /*------------------------------------------------------------(BioseqSeg)---*/
73 
BioseqSeg(BioseqPtr bsp,SegParamsPtr sparamsp)74 SeqLocPtr BioseqSeg (BioseqPtr bsp, SegParamsPtr sparamsp)
75 
76   {
77    SeqLocPtr	slp = NULL;
78 
79 /* error msg stuff */
80 
81    ErrSetOptFlags (EO_MSG_CODES);
82 
83 /* bail on null bioseq */
84 
85    if (!bsp)
86      {
87        ErrPostEx (SEV_ERROR, 1, 1, "no bioseq");
88        ErrShow ();
89        return (slp);
90      }
91 
92    if (ISA_na (bsp->mol))
93      {
94       slp = BioseqSegNa (bsp, sparamsp);
95      }
96 
97    if (ISA_aa (bsp->mol))
98      {
99       slp = BioseqSegAa (bsp, sparamsp);
100      }
101 
102 /* clean up & return */
103    return (slp);
104   }
105 
106 /*----------------------------------------------------------(BioseqSegNa)---*/
107 
BioseqSegNa(BioseqPtr bsp,SegParamsPtr sparamsp)108 SeqLocPtr BioseqSegNa (BioseqPtr bsp, SegParamsPtr sparamsp)
109 
110   {
111    SeqPortPtr   spp=NULL;
112    SeqLocPtr	slp = NULL;
113    SequencePtr seqwin;
114    SegPtr segs;
115    Int4 len, index;
116    CharPtr seq;
117    Uint1       residue;
118 
119 /* error msg stuff */
120 
121    ErrSetOptFlags (EO_MSG_CODES);
122    SegParamsCheck (sparamsp);
123 
124 /* bail on null bioseq */
125 
126    if (!bsp)
127      {
128        ErrPostEx (SEV_ERROR, 2, 1, "no bioseq");
129        ErrShow ();
130        return (slp);
131      }
132 
133    if (!ISA_na (bsp->mol))
134      {
135       ErrPostEx (SEV_WARNING, 2, 2, "not nucleic acid sequence");
136       ErrShow ();
137       return (slp);
138      }
139 
140    if (!sparamsp)
141      {
142       sparamsp = SegParamsNewNa();
143       if (!sparamsp)
144         {
145          ErrPostEx (SEV_WARNING, 0, 0, "null parameters object");
146          ErrShow();
147          return(slp);
148         }
149      }
150    SegParamsCheck (sparamsp);
151 
152 /* make an old-style genwin sequence window object */
153 
154    len = bsp->length;
155    seq = (CharPtr) MemNew((len+1)*sizeof(Char));
156    spp = SeqPortNew(bsp, 0, -1, bsp->strand, Seq_code_ncbi8na);
157    if (spp == NULL) {
158       ErrPostEx (SEV_ERROR, 0, 0, "SeqPortNew failure");
159       ErrShow();
160    }
161 
162    if (!seq)
163      {
164       ErrPostEx (SEV_ERROR, 0, 0, "memory allocation failure");
165       ErrShow();
166       return(slp);
167      }
168 
169    index = 0;
170    while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
171      {
172       if (IS_residue(residue))
173         {
174          seq[index] = residue;
175          index++;
176         }
177       else if (residue == SEQPORT_EOS)
178         {
179          continue; /*[Segment boundary]*/
180         }
181       else if (residue == SEQPORT_VIRT)
182         {
183          continue; /*[Virtual Sequence]*/
184         }
185      }
186 
187    seq[index] = NULLB;
188 
189    seqwin = SeqNew();
190    seqwin->seq = (CharPtr) seq;
191    seqwin->length = bsp->length;
192    seqwin->palpha = AlphaCopy(sparamsp->palpha);
193 
194 /* seg the sequence */
195 
196    segs = (SegPtr) NULL;
197    SegSeq (seqwin, sparamsp, &segs, 0);
198 
199 /* merge the segment if desired. */
200 
201    if (sparamsp->overlaps)
202 	mergesegs(seqwin, segs, sparamsp->overlaps);
203 
204 /* convert segs to seqlocs */
205 
206    slp = SegsToSeqLoc(bsp, segs);
207 
208 /* clean up & return */
209 
210    SeqFree (seqwin);
211    SegFree (segs);
212    SeqPortFree (spp);
213    return (slp);
214   }
215 
216 /*----------------------------------------------------------(BioseqSegAa)---*/
217 
BioseqSegAa(BioseqPtr bsp,SegParamsPtr sparamsp)218 SeqLocPtr BioseqSegAa (BioseqPtr bsp, SegParamsPtr sparamsp)
219 
220   {
221    SeqPortPtr	spp=NULL;
222    SeqLocPtr	slp = NULL;
223    SequencePtr seqwin;
224    SegPtr segs;
225    Int4 index, len;
226    CharPtr seq;
227    Uint1       residue;
228    Boolean params_allocated = FALSE;
229 
230 /* error msg stuff */
231 
232    ErrSetOptFlags (EO_MSG_CODES);
233    SegParamsCheck (sparamsp);
234 
235 /* bail on null bioseq */
236 
237    if (!bsp)
238      {
239        ErrPostEx (SEV_ERROR, 0, 0, "no bioseq");
240        ErrShow ();
241        return (slp);
242      }
243 
244    if (!ISA_aa (bsp->mol))
245      {
246       ErrPostEx (SEV_WARNING, 0, 0, "not protein sequence");
247       ErrShow ();
248       return (slp);
249      }
250 
251    if (!sparamsp)
252      {
253       sparamsp = SegParamsNewAa();
254       SegParamsCheck (sparamsp);
255       params_allocated = TRUE;
256       if (!sparamsp)
257         {
258          ErrPostEx (SEV_WARNING, 0, 0, "null parameters object");
259          ErrShow();
260          return(slp);
261         }
262      }
263 
264 /* make an old-style genwin sequence window object */
265 
266    len = bsp->length;
267    seq = (CharPtr) MemNew((len+1)*sizeof(Char));
268    spp = SeqPortNew(bsp, 0, -1, bsp->strand, Seq_code_ncbieaa);
269    if (spp == NULL) {
270       ErrPostEx (SEV_ERROR, 0, 0, "SeqPortNew failure");
271       ErrShow();
272    }
273 
274    if (!seq)
275    {
276       ErrPostEx (SEV_ERROR, 0, 0, "memory allocation failure");
277       ErrShow();
278 	  SeqPortFree(spp);
279       return(slp);
280    }
281 
282    index = 0;
283    while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
284      {
285       if (IS_residue(residue))
286         {
287          seq[index] = residue;
288          index++;
289         }
290       else if (residue == SEQPORT_EOS)
291         {
292          continue; /*[Segment boundary]*/
293         }
294       else if (residue == SEQPORT_VIRT)
295         {
296          continue; /*[Virtual Sequence]*/
297         }
298      }
299 
300    seq[index] = NULLB;
301 
302 
303    seqwin = SeqNew();
304    seqwin->seq = (CharPtr) seq;
305    seqwin->length = bsp->length;
306    seqwin->palpha = AlphaCopy(sparamsp->palpha);
307 
308 /* seg the sequence */
309 
310    segs = (SegPtr) NULL;
311    SegSeq (seqwin, sparamsp, &segs, 0);
312 
313 /* merge the segment if desired. */
314 
315    if (sparamsp->overlaps)
316 	mergesegs(seqwin, segs, sparamsp->overlaps);
317 
318 /* convert segs to seqlocs */
319 
320    slp = SegsToSeqLoc(bsp, segs);
321 
322 /* clean up & return */
323 
324    SeqFree (seqwin);
325    SegFree (segs);
326    SeqPortFree(spp);
327    if(params_allocated)
328        SegParamsFree(sparamsp);
329 
330    return (slp);
331   }
332 
333 /*----------------------------------------------------------(SeqlocSegAa)---*/
334 
SeqlocSegAa(SeqLocPtr slpin,SegParamsPtr sparamsp)335 SeqLocPtr SeqlocSegAa (SeqLocPtr slpin, SegParamsPtr sparamsp)
336 
337 {
338     SeqPortPtr	spp=NULL;
339     SeqLocPtr	slp = NULL;
340     SequencePtr seqwin;
341     SegPtr segs;
342     Int4 index, len;
343     Int4 start, stop, temp;
344     CharPtr seq;
345     Uint1       residue;
346     Boolean params_allocated = FALSE;
347 
348     /* error msg stuff */
349 
350     ErrSetOptFlags (EO_MSG_CODES);
351     SegParamsCheck (sparamsp);
352 
353     /* bail on null bioseq */
354 
355     if (!slpin) {
356         ErrPostEx (SEV_ERROR, 0, 0, "no seqloc");
357         ErrShow ();
358         return (slp);
359     }
360 
361     /* only simple intervals */
362 
363     if (slpin->choice != SEQLOC_INT &&
364         slpin->choice != SEQLOC_WHOLE) return(slp);
365 
366     /* get coordinate range */
367 
368     start = SeqLocStart(slpin);
369     stop = SeqLocStop(slpin);
370     if (stop<start) {
371         temp = start;
372         start = stop;
373         stop = temp;
374     }
375 
376     len = stop - start + 1;
377 
378     /* check seg parameters */
379 
380     if (!sparamsp) {
381         params_allocated = TRUE;
382         sparamsp = SegParamsNewAa();
383         SegParamsCheck (sparamsp);
384         if (!sparamsp) {
385             ErrPostEx (SEV_WARNING, 0, 0, "null parameters object");
386             ErrShow();
387             return(slp);
388         }
389     }
390 
391     /* make an old-style genwin sequence window object */
392 
393     seq = (CharPtr) MemNew((len+1)*sizeof(Char));
394     spp = SeqPortNewByLoc(slpin, Seq_code_ncbieaa);
395     if (spp == NULL) {
396         ErrPostEx (SEV_ERROR, 0, 0, "SeqPortNew failure");
397         ErrShow();
398     }
399 
400     if (!seq) {
401         ErrPostEx (SEV_ERROR, 0, 0, "memory allocation failure");
402         ErrShow();
403         SeqPortFree(spp);
404         return(slp);
405     }
406 
407    index = 0;
408    while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
409        if (IS_residue(residue)) {
410            seq[index] = residue;
411            index++;
412        } else if (residue == SEQPORT_EOS) {
413            continue; /*[Segment boundary]*/
414        } else if (residue == SEQPORT_VIRT) {
415            continue; /*[Virtual Sequence]*/
416        }
417    }
418 
419    seq[index] = NULLB;
420 
421    seqwin = SeqNew();
422    seqwin->seq = (CharPtr) seq;
423    seqwin->length = len;
424    seqwin->palpha = AlphaCopy(sparamsp->palpha);
425 
426    /* seg the sequence */
427 
428    segs = (SegPtr) NULL;
429    SegSeq (seqwin, sparamsp, &segs, 0);
430 
431 /* merge the segment if desired. */
432 
433    if (sparamsp->overlaps)
434 	mergesegs(seqwin, segs, sparamsp->overlaps);
435 
436    /* convert segs to seqlocs */
437 
438    slp = SeqlocSegsToSeqLoc(slpin, segs);
439 
440    /* clean up & return */
441 
442    SeqFree (seqwin);
443    SegFree (segs);
444    SeqPortFree(spp);
445 
446    if(params_allocated)
447        SegParamsFree(sparamsp);
448 
449    return (slp);
450 }
451 
452 /*---------------------------------------------------------(SegsToSeqLoc)---*/
453 
SegsToSeqLoc(BioseqPtr bsp,SegPtr segs)454 SeqLocPtr SegsToSeqLoc(BioseqPtr bsp, SegPtr segs)
455 
456   {
457    SeqLocPtr slp, slp_last, slp_next, slp_packed;
458    SeqIntPtr sip;
459 
460    if (bsp==NULL || segs==NULL) return ((SeqLocPtr) NULL);
461 
462    slp = (SeqLocPtr) ValNodeNew(NULL);
463    slp->choice = SEQLOC_INT;
464 
465    sip = SeqIntNew();
466    sip->from = segs->begin;
467    sip->to = segs->end;
468    sip->strand = bsp->strand;
469    sip->id = SeqIdDup(bsp->id);
470 
471    slp->data.ptrvalue = sip;
472 
473 /*---                             SEQLOC_INT for a single segment  ---*/
474 
475    if (segs->next==NULL)
476      {
477       slp->next = NULL;
478       return(slp);
479      }
480 
481 /*---                     SEQLOC_PACKED_INT for multiple segments  ---*/
482 
483    slp_last = slp;
484    for (segs=segs->next; segs!=NULL; segs=segs->next)
485      {
486       slp_next = (SeqLocPtr) ValNodeNew(NULL);
487       slp_next->choice = SEQLOC_INT;
488 
489       sip = SeqIntNew();
490       sip->from = segs->begin;
491       sip->to = segs->end;
492       sip->strand = bsp->strand;
493       sip->id = SeqIdDup(bsp->id);
494 
495       slp_next->data.ptrvalue = sip;
496       slp_last->next = slp_next;
497       slp_last = slp_next;
498      }
499 
500    slp_last->next = NULL;
501 
502    slp_packed = (SeqLocPtr) ValNodeNew(NULL);
503    slp_packed->choice = SEQLOC_PACKED_INT;
504    slp_packed->data.ptrvalue = slp;
505    slp_packed->next = NULL;
506 
507    return(slp_packed);
508   }
509 
510 /*---------------------------------------------------(SeqlocSegsToSeqLoc)---*/
511 
SeqlocSegsToSeqLoc(SeqLocPtr slpin,SegPtr segs)512 SeqLocPtr SeqlocSegsToSeqLoc(SeqLocPtr slpin, SegPtr segs)
513 
514   {
515    SeqLocPtr slp, slp_last, slp_next, slp_packed;
516    SeqIntPtr sip;
517    Int4 start, stop;
518    Uint1 strand;
519    SeqIdPtr id;
520 
521    if (slpin==NULL || segs==NULL) return ((SeqLocPtr) NULL);
522    if (slpin->choice != SEQLOC_INT &&
523        slpin->choice != SEQLOC_WHOLE) return ((SeqLocPtr) NULL);
524 
525    start = SeqLocStart(slpin);
526    stop = SeqLocStop(slpin);
527    if (stop < start) return ((SeqLocPtr) NULL);
528    strand = SeqLocStrand(slpin);
529    id = SeqLocId(slpin);
530 
531    slp = (SeqLocPtr) ValNodeNew(NULL);
532    slp->choice = SEQLOC_INT;
533 
534    sip = SeqIntNew();
535    sip->from = segs->begin + start;
536    sip->to = segs->end + start;
537    sip->strand = strand;
538    sip->id = SeqIdDup(id);
539 
540    slp->data.ptrvalue = sip;
541 
542 /*---                             SEQLOC_INT for a single segment  ---*/
543 
544    if (segs->next==NULL)
545      {
546       slp->next = NULL;
547       return(slp);
548      }
549 
550 /*---                     SEQLOC_PACKED_INT for multiple segments  ---*/
551 
552    slp_last = slp;
553    for (segs=segs->next; segs!=NULL; segs=segs->next)
554      {
555       slp_next = (SeqLocPtr) ValNodeNew(NULL);
556       slp_next->choice = SEQLOC_INT;
557 
558       sip = SeqIntNew();
559       sip->from = segs->begin + start;
560       sip->to = segs->end + start;
561       sip->strand = strand;
562       sip->id = SeqIdDup(id);
563 
564       slp_next->data.ptrvalue = sip;
565       slp_last->next = slp_next;
566       slp_last = slp_next;
567      }
568 
569    slp_last->next = NULL;
570 
571    slp_packed = (SeqLocPtr) ValNodeNew(NULL);
572    slp_packed->choice = SEQLOC_PACKED_INT;
573    slp_packed->data.ptrvalue = slp;
574    slp_packed->next = NULL;
575 
576    return(slp_packed);
577   }
578 
579 /*---------------------------------------------------------------(SeqNew)---*/
580 
SeqNew(void)581 SequencePtr SeqNew(void)
582 
583   {
584    SequencePtr seq;
585 
586    seq = (SequencePtr) MemNew(sizeof(Sequence));
587    if (seq==NULL)
588      {
589       /* raise error flag and etc. */
590       return(seq);
591      }
592 
593    seq->parent = (SequencePtr) NULL;
594    seq->seq = (CharPtr) NULL;
595    seq->palpha = (AlphaPtr) NULL;
596    seq->start = seq->length = 0;
597    seq->bogus = seq->punctuation = FALSE;
598    seq->composition = seq->state = (Int4Ptr) NULL;
599    seq->entropy = (FloatHi) 0.0;
600 
601    return(seq);
602   }
603 
604 /*--------------------------------------------------------------(SeqFree)---*/
605 
SeqFree(SequencePtr seq)606 void SeqFree(SequencePtr seq)
607 
608   {
609    if (seq==NULL) return;
610 
611    MemFree(seq->seq);
612    AlphaFree(seq->palpha);
613    MemFree(seq->composition);
614    MemFree(seq->state);
615    MemFree(seq);
616    return;
617   }
618 
619 /*--------------------------------------------------------------(SegFree)---*/
620 
SegFree(SegPtr seg)621 void SegFree(SegPtr seg)
622 
623   {
624    SegPtr nextseg;
625 
626    while (seg)
627      {
628       nextseg = seg->next;
629       MemFree(seg);
630       seg = nextseg;
631      }
632 
633    return;
634   }
635 
636 /*---------------------------------------------------------------(SegSeq)---*/
637 
SegSeq(SequencePtr seq,SegParamsPtr sparamsp,SegPtr * segs,Int4 offset)638 void SegSeq(SequencePtr seq, SegParamsPtr sparamsp, SegPtr *segs,
639             Int4 offset)
640 
641   {
642    SegPtr seg, leftsegs;
643    SequencePtr leftseq;
644    Int4 window;
645    FloatHi locut, hicut;
646    Int4 maxbogus;
647    Int4 downset, upset;
648    Int4 first, last, lowlim;
649    Int4 loi, hii, i;
650    Int4 leftend, rightend, lend, rend;
651    FloatHiPtr H;
652 
653    if (sparamsp->window<=0) return;
654    if (sparamsp->locut<=0.) sparamsp->locut = 0.;
655    if (sparamsp->hicut<=0.) sparamsp->hicut = 0.;
656 
657    window = sparamsp->window;
658    locut = sparamsp->locut;
659    hicut = sparamsp->hicut;
660    downset = (window+1)/2 - 1;
661    upset = window - downset;
662    maxbogus = sparamsp->maxbogus;
663 
664    H = seqent(seq, window, maxbogus);
665    if (H==NULL) return;
666 
667    first = downset;
668    last = seq->length - upset;
669    lowlim = first;
670 
671    for (i=first; i<=last; i++)
672      {
673       if (H[i]<=locut && H[i]!=-1)
674         {
675          loi = findlo(i, lowlim, hicut, H);
676          hii = findhi(i, last, hicut, H);
677 
678          leftend = loi - downset;
679          rightend = hii + upset - 1;
680 
681          trim(openwin(seq, leftend, rightend-leftend+1), &leftend, &rightend,
682               sparamsp);
683 
684          if (i+upset-1<leftend)   /* check for trigger window in left trim */
685            {
686             lend = loi - downset;
687             rend = leftend - 1;
688 
689             leftseq = openwin(seq, lend, rend-lend+1);
690             leftsegs = (SegPtr) NULL;
691             SegSeq(leftseq, sparamsp, &leftsegs, offset+lend);
692             if (leftsegs!=NULL)
693               {
694                if (*segs==NULL) *segs = leftsegs;
695                else appendseg(*segs, leftsegs);
696               }
697             closewin(leftseq);
698 
699 /*          trim(openwin(seq, lend, rend-lend+1), &lend, &rend);
700             seg = (SegPtr) MemNew(sizeof(Seg));
701             seg->begin = lend;
702             seg->end = rend;
703             seg->next = (SegPtr) NULL;
704             if (segs==NULL) segs = seg;
705             else appendseg(segs, seg);  */
706            }
707 
708          seg = (SegPtr) MemNew(sizeof(Seg));
709          seg->begin = leftend + offset;
710          seg->end = rightend + offset;
711          seg->next = (SegPtr) NULL;
712 
713          if (*segs==NULL) *segs = seg;
714          else appendseg(*segs, seg);
715 
716          i = MIN(hii, rightend+downset);
717          lowlim = i + 1;
718 /*       i = hii;     this ignores the trimmed residues... */
719         }
720      }
721 
722    MemFree(H);
723    return;
724   }
725 
726 /*------------------------------------------------------------(seq_phase)---*/
727 
seq_phase(SequencePtr seq,Int4 phase,Int4 period)728 SequencePtr seq_phase (SequencePtr seq, Int4 phase, Int4 period)
729 
730   {
731    SequencePtr perseq;
732    Int4 len, i, j;
733 
734    perseq = (SequencePtr) MemNew(sizeof(Sequence));
735 
736    len = ((seq->length)/period) + 1;
737    perseq->seq = (CharPtr) MemNew((len+1)*sizeof(Char));
738 
739    perseq->length = 0;
740    for (i=0, j=phase; j<seq->length; i++, j+=period)
741      {
742       perseq->seq[i] = seq->seq[j];
743       perseq->length++;
744      }
745    perseq->seq[i] = '\0';
746 
747    return(perseq);
748   }
749 
750 /*---------------------------------------------------------------(seqent)---*/
751 
seqent(SequencePtr seq,Int4 window,Int4 maxbogus)752 FloatHiPtr seqent(SequencePtr seq, Int4 window, Int4 maxbogus)
753 
754   {
755    SequencePtr win;
756    FloatHiPtr H;
757    Int4 i, first, last, downset, upset;
758 
759    downset = (window+1)/2 - 1;
760    upset = window - downset;
761 
762    if (window>seq->length)
763      {
764       return((FloatHiPtr) NULL);
765      }
766 
767    H = (FloatHiPtr) MemNew(seq->length*sizeof(FloatHi));
768 
769    for (i=0; i<seq->length; i++)
770      {
771       H[i] = -1.;
772      }
773 
774    win = openwin(seq, 0, window);
775    enton(win);
776 
777    first = downset;
778    last = seq->length - upset;
779 
780    for (i=first; i<=last; i++)
781      {
782       if (seq->punctuation && hasdash(win))
783         {
784          H[i] = -1.;
785          shiftwin1(win);
786          continue;
787         }
788       if (win->bogus > maxbogus)
789         {
790          H[i] = -1.;
791          shiftwin1(win);
792          continue;
793         }
794       H[i] = win->entropy;
795       shiftwin1(win);
796      }
797 
798    closewin(win);
799    return(H);
800   }
801 
802 /*--------------------------------------------------------------(hasdash)---*/
803 
hasdash(SequencePtr win)804 static Boolean hasdash(SequencePtr win)
805 
806 
807 {
808 	register char	*seq, *seqmax;
809 
810 	seq = win->seq;
811 	seqmax = seq + win->length;
812 
813 	while (seq < seqmax) {
814 		if (*seq++ == '-')
815 			return TRUE;
816 	}
817 	return FALSE;
818 }
819 
820 /*---------------------------------------------------------------(findlo)---*/
821 
findlo(Int4 i,Int4 limit,FloatHi hicut,FloatHiPtr H)822 Int4 findlo(Int4 i, Int4 limit, FloatHi hicut, FloatHiPtr H)
823 
824   {
825    Int4 j;
826 
827    for (j=i; j>=limit; j--)
828      {
829       if (H[j]==-1) break;
830       if (H[j]>hicut) break;
831      }
832 
833    return(j+1);
834   }
835 
836 /*---------------------------------------------------------------(findhi)---*/
837 
findhi(Int4 i,Int4 limit,FloatHi hicut,FloatHiPtr H)838 Int4 findhi(Int4 i, Int4 limit, FloatHi hicut, FloatHiPtr H)
839 
840   {
841    Int4 j;
842 
843    for (j=i; j<=limit; j++)
844      {
845       if (H[j]==-1) break;
846       if (H[j]>hicut) break;
847      }
848 
849    return(j-1);
850   }
851 
852 /*-----------------------------------------------------------------(trim)---*/
853 
trim(SequencePtr seq,Int4Ptr leftend,Int4Ptr rightend,SegParamsPtr sparamsp)854 void trim(SequencePtr seq, Int4Ptr leftend, Int4Ptr rightend,
855           SegParamsPtr sparamsp)
856 
857   {
858    SequencePtr win;
859    FloatHi prob, minprob;
860    Int4 shift, len, i;
861    Int4 lend, rend;
862    Int4 minlen;
863    Int4 maxtrim;
864 
865 /* fprintf(stderr, "%d %d\n", *leftend, *rightend);  */
866 
867    lend = 0;
868    rend = seq->length - 1;
869    minlen = 1;
870    maxtrim = sparamsp->maxtrim;
871    if ((seq->length-maxtrim)>minlen) minlen = seq->length-maxtrim;
872 
873    minprob = 1.;
874 /*   fprintf(stderr, "\n");                                           -*/
875    for (len=seq->length; len>minlen; len--)
876      {
877 /*      fprintf(stderr, "%5d ", len);                                 -*/
878       win = openwin(seq, 0, len);
879       i = 0;
880 
881       shift = TRUE;
882       while (shift)
883         {
884          prob = getprob(win->state, len, sparamsp->palpha);
885 /*         realprob = exp(prob);                 (for tracing the trim)-*/
886 /*         fprintf(stderr, "%2e ", realprob);                          -*/
887          if (prob<minprob)
888            {
889             minprob = prob;
890             lend = i;
891             rend = len + i - 1;
892            }
893          shift = shiftwin1(win);
894          i++;
895         }
896       closewin(win);
897 /*      fprintf(stderr, "\n");                                       -*/
898      }
899 
900 /*   fprintf(stderr, "%d-%d ", *leftend, *rightend);                 -*/
901 
902    *leftend = *leftend + lend;
903    *rightend = *rightend - (seq->length - rend - 1);
904 
905 /*   fprintf(stderr, "%d-%d\n", *leftend, *rightend);                -*/
906 
907    closewin(seq);
908    return;
909   }
910 
911 /*--------------------------------------------------------------(getprob)---*/
912 
getprob(Int4Ptr sv,Int4 total,AlphaPtr palpha)913 FloatHi getprob(Int4Ptr sv, Int4 total, AlphaPtr palpha)
914 
915   {
916    FloatHi ans, ans1, ans2, totseq;
917 
918    /* #define LN20	2.9957322735539909 */
919    /* #define LN4	1.3862943611198906 */
920 
921    totseq = ((double) total) * palpha->lnalphasize;
922 
923 /*
924    ans = lnass(sv, palpha->alphasize) + lnperm(sv, total) - totseq;
925 */
926    ans1 = lnass(sv, palpha->alphasize);
927    if (ans1 > -100000.0 && sv[0] != INT4_MIN)
928    {
929 	ans2 = lnperm(sv, total);
930    }
931    else
932    {
933         ErrPostEx (SEV_ERROR, 0, 0, "Illegal value returned by lnass");
934    }
935    ans = ans1 + ans2 - totseq;
936 /*
937 fprintf(stderr, "%lf %lf %lf\n", ans, ans1, ans2);
938 */
939 
940    return ans;
941   }
942 
943 /*---------------------------------------------------------------(lnperm)---*/
944 
lnperm(Int4Ptr sv,Int4 tot)945 FloatHi lnperm(Int4Ptr sv, Int4 tot)
946 
947   {
948    FloatHi ans;
949    Int4 i;
950 
951    ans = lnfac[tot];
952 
953    for (i=0; sv[i]!=0; i++)
954      {
955       ans -= lnfac[sv[i]];
956      }
957 
958    return(ans);
959   }
960 
961 /*----------------------------------------------------------------(lnass)---*/
962 
lnass(Int4Ptr sv,Int4 alphasize)963 FloatHi lnass(Int4Ptr sv, Int4 alphasize)
964 
965 {
966 	double	ans;
967 	int	svi, svim1;
968 	int	class, total;
969 	int    i;
970 
971 	ans = lnfac[alphasize];
972 	if (sv[0] == 0)
973 		return ans;
974 
975 	total = alphasize;
976 	class = 1;
977 	svi = *sv;
978 	svim1 = sv[0];
979 	for (i=0;; svim1 = svi) {
980 	        if (++i==alphasize) {
981 		        ans -= lnfac[class];
982 			break;
983 		      }
984 		else if ((svi = *++sv) == svim1) {
985 			class++;
986 			continue;
987 		}
988 		else {
989 			total -= class;
990 			ans -= lnfac[class];
991 			if (svi == 0) {
992 				ans -= lnfac[total];
993 				break;
994 			}
995 			else {
996 				class = 1;
997 				continue;
998 			}
999 		}
1000 	}
1001 
1002 	return ans;
1003 }
1004 
1005 /*------------------------------------------------------------(mergesegs)---*/
1006 /* merge together overlapping segments,
1007 	hilenmin also does something, but we need to ask Scott Federhen what?
1008 */
1009 
mergesegs(SequencePtr seq,SegPtr segs,Boolean overlaps)1010 void mergesegs(SequencePtr seq, SegPtr segs, Boolean overlaps)
1011 
1012   {SegPtr seg, nextseg;
1013    Int4 hilenmin;		/* hilenmin yet unset */
1014    Int4 len;
1015 
1016    hilenmin = 0;               /* hilenmin - temporary default */
1017 
1018    if (overlaps == FALSE)
1019 	return;
1020 
1021    if (segs==NULL) return;
1022 
1023    if (segs->begin<hilenmin) segs->begin = 0;
1024 
1025    seg = segs;
1026    nextseg = seg->next;
1027 
1028    while (nextseg!=NULL)
1029      {
1030       if (seg->end>=nextseg->begin && seg->end>=nextseg->end)
1031         {
1032          seg->next = nextseg->next;
1033          MemFree(nextseg);
1034          nextseg = seg->next;
1035          continue;
1036         }
1037       if (seg->end>=nextseg->begin)               /* overlapping segments */
1038         {
1039          seg->end = nextseg->end;
1040          seg->next = nextseg->next;
1041          MemFree(nextseg);
1042          nextseg = seg->next;
1043          continue;
1044         }
1045       len = nextseg->begin - seg->end - 1;
1046       if (len<hilenmin)                            /* short hient segment */
1047         {
1048          seg->end = nextseg->end;
1049          seg->next = nextseg->next;
1050          MemFree(nextseg);
1051          nextseg = seg->next;
1052          continue;
1053         }
1054       seg = nextseg;
1055       nextseg = seg->next;
1056      }
1057 
1058    len = seq->length - seg->end - 1;
1059    if (len<hilenmin) seg->end = seq->length - 1;
1060 
1061    return;
1062   }
1063 
1064 /*--------------------------------------------------------(per_mergesegs)---*/
1065 
per_mergesegs(SequencePtr seq,SegPtr * persegs)1066 SegPtr per_mergesegs(SequencePtr seq, SegPtr *persegs)
1067 
1068   {SegPtr *localsegs;
1069    SegPtr firstseg, segs, seg;
1070    Int4 period;                           /* period yet unset */
1071    Int4 first;
1072    Int4 phase, savephase;
1073 
1074    period = 1;                            /* period set temporarily */
1075 
1076    segs = (SegPtr) NULL;
1077    if (persegs==NULL) return(segs);
1078 
1079    localsegs = (SegPtr *) MemNew(period*sizeof(FloatHi));
1080    for (phase=0; phase<period; phase++) localsegs[phase] = persegs[phase];
1081 
1082    while (TRUE)
1083      {
1084       firstseg = (SegPtr) NULL;
1085       first = -1;
1086       savephase = -1;
1087 
1088       for (phase=0; phase<period; phase++)
1089         {
1090          if (localsegs[phase]==NULL) continue;
1091          if (first==-1 || localsegs[phase]->begin<first)
1092            {
1093             savephase = phase;
1094             firstseg = localsegs[phase];
1095             first = firstseg->begin;
1096            }
1097         }
1098 
1099       if (firstseg==NULL) break;
1100 
1101       seg = (SegPtr) MemNew(sizeof(Seg));
1102       seg->begin = ((firstseg->begin)*period)+savephase;
1103       seg->end = ((firstseg->end)*period)+savephase;
1104       seg->next = (SegPtr) NULL;
1105       if (segs==NULL) segs = seg; else appendseg(segs, seg);
1106 
1107       localsegs[savephase] = localsegs[savephase]->next;
1108      }
1109 
1110    MemFree(localsegs);
1111    mergesegs(seq, segs, FALSE);
1112    return(segs);
1113   }
1114 
1115 
1116 
1117 /*------------------------------------------------------------(appendseg)---*/
1118 
1119 static void
appendseg(SegPtr segs,SegPtr seg)1120 appendseg(SegPtr segs, SegPtr seg)
1121 
1122   {SegPtr temp;
1123 
1124    temp = segs;
1125    while (TRUE)
1126      {
1127       if (temp->next==NULL)
1128         {
1129          temp->next = seg;
1130          break;
1131         }
1132       else
1133         {
1134          temp = temp->next;
1135         }
1136      }
1137 
1138    return;
1139   }
1140 
1141 /*-------------------------------------------------------(SegParamsNewAa)---*/
1142 
SegParamsNewAa(void)1143 SegParamsPtr SegParamsNewAa (void)
1144 
1145   {
1146    SegParamsPtr sparamsp;
1147 
1148    sparamsp = (SegParamsPtr) MemNew (sizeof(SegParams));
1149 
1150    sparamsp->window = 12;
1151    sparamsp->locut = 2.2;
1152    sparamsp->hicut = 2.5;
1153    sparamsp->period = 1;
1154    sparamsp->hilenmin = 0;
1155    sparamsp->overlaps = FALSE;
1156    sparamsp->maxtrim = 50;
1157    sparamsp->maxbogus = 2;
1158    sparamsp->palpha = AA20alpha();
1159 
1160    return (sparamsp);
1161   }
1162 
1163 /*-------------------------------------------------------(SegParamsNewNa)---*/
1164 
SegParamsNewNa(void)1165 SegParamsPtr SegParamsNewNa (void)
1166 
1167   {
1168    SegParamsPtr sparamsp;
1169 
1170    sparamsp = (SegParamsPtr) MemNew (sizeof(SegParams));
1171 
1172    sparamsp->window = 21;
1173    sparamsp->locut = 1.4;
1174    sparamsp->hicut = 1.6;
1175    sparamsp->period = 1;
1176    sparamsp->hilenmin = 0;
1177    sparamsp->overlaps = FALSE;
1178    sparamsp->maxtrim = 100;
1179    sparamsp->maxbogus = 3;
1180    sparamsp->palpha = NA4alpha();
1181 
1182    return (sparamsp);
1183   }
1184 
1185 /*-------------------------------------------------------(SegParamsCheck)---*/
1186 
SegParamsCheck(SegParamsPtr sparamsp)1187 void SegParamsCheck (SegParamsPtr sparamsp)
1188 
1189   {
1190    if (!sparamsp) return;
1191 
1192    if (sparamsp->window <= 0) sparamsp->window = 12;
1193 
1194    if (sparamsp->locut < 0.0) sparamsp->locut = 0.0;
1195 /* if (sparamsp->locut > sparamsp->palpha->lnalphasize)
1196        sparamsp->locut = sparamsp->palpha->lnalphasize; */
1197 
1198    if (sparamsp->hicut < 0.0) sparamsp->hicut = 0.0;
1199 /* if (sparamsp->hicut > sparamsp->palpha->lnalphasize)
1200        sparamsp->hicut = sparamsp->palpha->lnalphasize; */
1201 
1202    if (sparamsp->locut > sparamsp->hicut)
1203        sparamsp->hicut = sparamsp->locut;
1204 
1205    if (sparamsp->maxbogus < 0)
1206        sparamsp->maxbogus = 0;
1207    if (sparamsp->maxbogus > sparamsp->window)
1208        sparamsp->maxbogus = sparamsp->window;
1209 
1210    if (sparamsp->period <= 0) sparamsp->period = 1;
1211    if (sparamsp->maxtrim < 0) sparamsp->maxtrim = 0;
1212 
1213    return;
1214   }
1215 
1216 /*------------------------------------------------------------(AA20alpha)---*/
1217 
AA20alpha(void)1218 AlphaPtr AA20alpha (void)
1219 
1220   {
1221    AlphaPtr palpha;
1222    Int4Ptr alphaindex;
1223    BoolPtr alphaflag;
1224    CharPtr alphachar;
1225    Int4 i;
1226    Char c;
1227 
1228    palpha = (AlphaPtr) MemNew (sizeof(Alpha));
1229 
1230    palpha->alphabet = AA20;
1231    palpha->alphasize = 20;
1232    palpha->lnalphasize = LN20;
1233 
1234    alphaindex = (Int4Ptr) MemNew (CHAR_SET * sizeof(Int4));
1235    alphaflag = (BoolPtr) MemNew (CHAR_SET * sizeof(Boolean));
1236    alphachar = (CharPtr) MemNew (palpha->alphasize * sizeof(Char));
1237 
1238    for (i=0; i<128; i++)
1239      {
1240       c = (Char) i;
1241       if (c=='a' || c=='A')
1242         {alphaflag[i] = FALSE; alphaindex[i] = 0; alphachar[0] = c;}
1243       else if (c=='c' || c=='C')
1244         {alphaflag[i] = FALSE; alphaindex[i] = 1; alphachar[1] = c;}
1245       else if (c=='d' || c=='D')
1246         {alphaflag[i] = FALSE; alphaindex[i] = 2; alphachar[2] = c;}
1247       else if (c=='e' || c=='E')
1248         {alphaflag[i] = FALSE; alphaindex[i] = 3; alphachar[3] = c;}
1249       else if (c=='f' || c=='F')
1250         {alphaflag[i] = FALSE; alphaindex[i] = 4; alphachar[4] = c;}
1251       else if (c=='g' || c=='G')
1252         {alphaflag[i] = FALSE; alphaindex[i] = 5; alphachar[5] = c;}
1253       else if (c=='h' || c=='H')
1254         {alphaflag[i] = FALSE; alphaindex[i] = 6; alphachar[6] = c;}
1255       else if (c=='i' || c=='I')
1256         {alphaflag[i] = FALSE; alphaindex[i] = 7; alphachar[7] = c;}
1257       else if (c=='k' || c=='K')
1258         {alphaflag[i] = FALSE; alphaindex[i] = 8; alphachar[8] = c;}
1259       else if (c=='l' || c=='L')
1260         {alphaflag[i] = FALSE; alphaindex[i] = 9; alphachar[9] = c;}
1261       else if (c=='m' || c=='M')
1262         {alphaflag[i] = FALSE; alphaindex[i] = 10; alphachar[10] = c;}
1263       else if (c=='n' || c=='N')
1264         {alphaflag[i] = FALSE; alphaindex[i] = 11; alphachar[11] = c;}
1265       else if (c=='p' || c=='P')
1266         {alphaflag[i] = FALSE; alphaindex[i] = 12; alphachar[12] = c;}
1267       else if (c=='q' || c=='Q')
1268         {alphaflag[i] = FALSE; alphaindex[i] = 13; alphachar[13] = c;}
1269       else if (c=='r' || c=='R')
1270         {alphaflag[i] = FALSE; alphaindex[i] = 14; alphachar[14] = c;}
1271       else if (c=='s' || c=='S')
1272         {alphaflag[i] = FALSE; alphaindex[i] = 15; alphachar[15] = c;}
1273       else if (c=='t' || c=='T')
1274         {alphaflag[i] = FALSE; alphaindex[i] = 16; alphachar[16] = c;}
1275       else if (c=='v' || c=='V')
1276         {alphaflag[i] = FALSE; alphaindex[i] = 17; alphachar[17] = c;}
1277       else if (c=='w' || c=='W')
1278         {alphaflag[i] = FALSE; alphaindex[i] = 18; alphachar[18] = c;}
1279       else if (c=='y' || c=='Y')
1280         {alphaflag[i] = FALSE; alphaindex[i] = 19; alphachar[19] = c;}
1281       else
1282         {alphaflag[i] = TRUE; alphaindex[i] = 20;}
1283      }
1284 
1285    palpha->alphaindex = alphaindex;
1286    palpha->alphaflag = alphaflag;
1287    palpha->alphachar = alphachar;
1288 
1289    return (palpha);
1290   }
1291 
1292 /*-------------------------------------------------------------(NA4alpha)---*/
1293 
NA4alpha(void)1294 AlphaPtr NA4alpha (void)
1295 
1296   {
1297    AlphaPtr palpha;
1298    Int4Ptr alphaindex;
1299    BoolPtr alphaflag;
1300    CharPtr alphachar;
1301    Int4 i;
1302    Char c;
1303 
1304    palpha = (AlphaPtr) MemNew (sizeof(Alpha));
1305 
1306    palpha->alphabet = NA4;
1307    palpha->alphasize = 4;
1308    palpha->lnalphasize = LN4;
1309 
1310    alphaindex = (Int4Ptr) MemNew (CHAR_SET * sizeof(Int4));
1311    alphaflag = (BoolPtr) MemNew (CHAR_SET * sizeof(Boolean));
1312    alphachar = (CharPtr) MemNew (palpha->alphasize * sizeof(Char));
1313 
1314    for (i=0; i<128; i++)
1315      {
1316       c = (Char) i;
1317       if (c=='a' || c=='A')
1318         {alphaflag[i] = FALSE; alphaindex[i] = 0; alphachar[0] = c;}
1319       else if (c=='c' || c=='C')
1320         {alphaflag[i] = FALSE; alphaindex[i] = 1; alphachar[1] = c;}
1321       else if (c=='g' || c=='G')
1322         {alphaflag[i] = FALSE; alphaindex[i] = 2; alphachar[2] = c;}
1323       else if (c=='t' || c=='T')
1324         {alphaflag[i] = FALSE; alphaindex[i] = 3; alphachar[3] = c;}
1325       else if (c=='u' || c=='U')
1326         {alphaflag[i] = FALSE; alphaindex[i] = 3;}
1327       else
1328         {alphaflag[i] = TRUE; alphaindex[i] = 4;}
1329      }
1330 
1331    palpha->alphaindex = alphaindex;
1332    palpha->alphaflag = alphaflag;
1333    palpha->alphachar = alphachar;
1334 
1335    return (palpha);
1336   }
1337 
1338 /*------------------------------------------------------------(AlphaFree)---*/
1339 
AlphaFree(AlphaPtr palpha)1340 void AlphaFree (AlphaPtr palpha)
1341 
1342   {
1343    if (!palpha) return;
1344 
1345    MemFree (palpha->alphaindex);
1346    MemFree (palpha->alphaflag);
1347    MemFree (palpha->alphachar);
1348    MemFree (palpha);
1349 
1350    return;
1351  }
1352 
1353 /*------------------------------------------------------------(AlphaCopy)---*/
1354 
AlphaCopy(AlphaPtr palpha)1355 AlphaPtr AlphaCopy (AlphaPtr palpha)
1356 
1357   {
1358    AlphaPtr pbeta;
1359    Int2 i;
1360 
1361    if (!palpha) return((AlphaPtr) NULL);
1362 
1363    pbeta = (AlphaPtr) MemNew(sizeof(Alpha));
1364    pbeta->alphabet = palpha->alphabet;
1365    pbeta->alphasize = palpha->alphasize;
1366    pbeta->lnalphasize = palpha->lnalphasize;
1367 
1368    pbeta->alphaindex = (Int4Ptr) MemNew (CHAR_SET * sizeof(Int4));
1369    pbeta->alphaflag = (BoolPtr) MemNew (CHAR_SET * sizeof(Boolean));
1370    pbeta->alphachar = (CharPtr) MemNew (palpha->alphasize * sizeof(Char));
1371 
1372    for (i=0; i<CHAR_SET; i++)
1373      {
1374       pbeta->alphaindex[i] = palpha->alphaindex[i];
1375       pbeta->alphaflag[i] = palpha->alphaflag[i];
1376      }
1377 
1378    for (i=0; i<palpha->alphasize; i++)
1379      {
1380       pbeta->alphachar[i] = palpha->alphachar[i];
1381      }
1382 
1383    return(pbeta);
1384   }
1385 
1386 /*--------------------------------------------------------(SegParamsFree)---*/
1387 
SegParamsFree(SegParamsPtr sparamsp)1388 void SegParamsFree(SegParamsPtr sparamsp)
1389 
1390   {
1391    if (!sparamsp) return;
1392    AlphaFree(sparamsp->palpha);
1393    MemFree(sparamsp);
1394    return;
1395   }
1396 
1397 /*--------------------------------------------------------------(openwin)---*/
1398 
openwin(SequencePtr parent,Int4 start,Int4 length)1399 SequencePtr openwin(SequencePtr parent, Int4 start, Int4 length)
1400 
1401   {
1402    SequencePtr win;
1403 
1404    if (start<0 || length<0 || start+length>parent->length)
1405      {
1406       return((SequencePtr) NULL);
1407      }
1408 
1409    win = (SequencePtr) MemNew(sizeof(Sequence));
1410 
1411 /*---                                          ---[set links, up and down]---*/
1412 
1413    win->parent = parent;
1414    win->palpha = parent->palpha;
1415 
1416 /*---                          ---[install the local copy of the sequence]---*/
1417 
1418    win->start = start;
1419    win->length = length;
1420 #if 0                                                    /* Hi Warren! */
1421    win->seq = (CharPtr) MemNew(sizeof(Char)*length + 1);
1422    memcpy(win->seq, (parent->seq)+start, length);
1423    win->seq[length] = '\0';
1424 #else
1425 	win->seq = parent->seq + start;
1426 #endif
1427 
1428         win->bogus = 0;
1429 	win->punctuation = FALSE;
1430 
1431 	win->entropy = -2.;
1432 	win->state = (Int4Ptr) NULL;
1433 	win->composition = (Int4Ptr) NULL;
1434 
1435 	stateon(win);
1436 
1437 	return win;
1438 }
1439 
1440 /*------------------------------------------------------------(shiftwin1)---*/
1441 
shiftwin1(SequencePtr win)1442 Int4 shiftwin1(SequencePtr win)
1443 
1444 {
1445 	Int4 j, length;
1446 	Int4Ptr comp;
1447         Int4Ptr alphaindex;
1448         BoolPtr alphaflag;
1449 
1450 	length = win->length;
1451 	comp = win->composition;
1452         alphaindex = win->palpha->alphaindex;
1453         alphaflag = win->palpha->alphaflag;
1454 
1455 	if ((++win->start + length) > win->parent->length) {
1456 		--win->start;
1457 		return FALSE;
1458 	}
1459 
1460 	if (!alphaflag[j = win->seq[0]])
1461 		decrementsv(win->state, comp[alphaindex[j]]--);
1462         else win->bogus--;
1463 
1464 	j = win->seq[length];
1465 	++win->seq;
1466 
1467 	if (!alphaflag[j])
1468 		incrementsv(win->state, comp[alphaindex[j]]++);
1469         else win->bogus++;
1470 
1471 	if (win->entropy > -2.)
1472 		win->entropy = entropy(win->state);
1473 
1474 	return TRUE;
1475 }
1476 
1477 /*-------------------------------------------------------------(closewin)---*/
1478 
closewin(SequencePtr win)1479 void closewin(SequencePtr win)
1480 
1481   {
1482    if (win==NULL) return;
1483 
1484    if (win->state!=NULL)       MemFree(win->state);
1485    if (win->composition!=NULL) MemFree(win->composition);
1486 
1487    MemFree(win);
1488    return;
1489   }
1490 
1491 /*---------------------------------------------------------------(compon)---*/
1492 
compon(SequencePtr win)1493 void compon(SequencePtr win)
1494 
1495 {
1496 	Int4Ptr comp;
1497 	Int4 letter;
1498 	CharPtr seq, seqmax;
1499         Int4Ptr alphaindex;
1500         BoolPtr alphaflag;
1501         Int4 alphasize;
1502 
1503         alphasize = win->palpha->alphasize;
1504         alphaindex = win->palpha->alphaindex;
1505         alphaflag = win->palpha->alphaflag;
1506 
1507 	win->composition = comp =
1508                 (Int4Ptr) MemNew(alphasize*sizeof(Int4));
1509 	seq = win->seq;
1510 	seqmax = seq + win->length;
1511 
1512 	while (seq < seqmax) {
1513 		letter = *seq++;
1514 		if (!alphaflag[letter])
1515 			comp[alphaindex[letter]]++;
1516                 else win->bogus++;
1517 	}
1518 
1519 	return;
1520 }
1521 
1522 /*------------------------------------------------------------(state_cmp)---*/
1523 
state_cmp(VoidPtr s1,VoidPtr s2)1524 static int LIBCALLBACK state_cmp(VoidPtr s1, VoidPtr s2)
1525 
1526 {
1527 	int *np1, *np2;
1528 
1529 	np1 = (int *) s1;
1530 	np2 = (int *) s2;
1531 
1532 	return (*np2 - *np1);
1533 }
1534 
1535 /*--------------------------------------------------------------(stateon)---*/
1536 
stateon(SequencePtr win)1537 void stateon(SequencePtr win)
1538 
1539 {
1540 	Int4 letter, nel, c;
1541         Int4 alphasize;
1542 
1543         alphasize = win->palpha->alphasize;
1544 
1545 	if (win->composition == NULL)
1546 		compon(win);
1547 
1548 	win->state = (Int4Ptr) MemNew((alphasize+1)*sizeof(win->state[0]));
1549 
1550 	for (letter = nel = 0; letter < alphasize; ++letter) {
1551 		if ((c = win->composition[letter]) == 0)
1552 			continue;
1553 		win->state[nel++] = c;
1554 	}
1555 	for (letter = nel; letter < alphasize+1; ++letter)
1556 		win->state[letter] = 0;
1557 
1558 	HeapSort(win->state, nel, sizeof(win->state[0]), state_cmp);
1559 
1560 	return;
1561 }
1562 
1563 /*----------------------------------------------------------------(enton)---*/
1564 
enton(SequencePtr win)1565 void enton(SequencePtr win)
1566 
1567   {
1568    if (win->state==NULL) {stateon(win);}
1569 
1570    win->entropy = entropy(win->state);
1571 
1572    return;
1573   }
1574 
1575 /*--------------------------------------------------------------(entropy)---*/
1576 
entropy(Int4Ptr sv)1577 FloatHi entropy(Int4Ptr sv)
1578 
1579   {FloatHi ent;
1580    Int4 i, total;
1581 
1582    total = 0;
1583    for (i=0; sv[i]!=0; i++)
1584      {
1585       total += sv[i];
1586      }
1587    if (total==0) return(0.);
1588 
1589    ent = 0.0;
1590    for (i=0; sv[i]!=0; i++)
1591      {
1592       ent += ((FloatHi)sv[i])*log(((FloatHi)sv[i])/(double)total)/LN2;
1593      }
1594 
1595    ent = fabs(ent/(FloatHi)total);
1596 
1597    return(ent);
1598   }
1599 
1600 
1601 /*----------------------------------------------------------(decrementsv)---*/
1602 
decrementsv(Int4Ptr sv,Int4 class)1603 void decrementsv(Int4Ptr sv, Int4 class)
1604 
1605 {
1606 	Int4	svi;
1607 
1608 	while ((svi = *sv++) != 0) {
1609 		if (svi == class && *sv < class) {
1610 			sv[-1] = svi - 1;
1611 			break;
1612 		}
1613 	}
1614 }
1615 
1616 /*----------------------------------------------------------(incrementsv)---*/
1617 
incrementsv(Int4Ptr sv,Int4 class)1618 void incrementsv(Int4Ptr sv, Int4 class)
1619 
1620 {
1621 	for (;;) {
1622 		if (*sv++ == class) {
1623 			sv[-1]++;
1624 			break;
1625 		}
1626 	}
1627 }
1628 
1629 /*----------------------------------------------------------------(upper)---*/
1630 
upper(CharPtr string,size_t len)1631 extern void upper(CharPtr string, size_t len)
1632 
1633 {
1634 	CharPtr stringmax;
1635         Char c;
1636 
1637 	for (stringmax = string + len; string < stringmax; ++string)
1638 		if (islower(c = *string))
1639 			*string = toupper(c);
1640 }
1641 
1642 /*----------------------------------------------------------------(lower)---*/
1643 
lower(CharPtr string,size_t len)1644 extern void lower(CharPtr string, size_t len)
1645 
1646 {
1647 	CharPtr stringmax;
1648         Char c;
1649 
1650 	for (stringmax = string + len; string < stringmax; ++string)
1651 		if (isupper(c = *string))
1652 			*string = tolower(c);
1653 }
1654 
1655 /*--------------------------------------------------------------------------*/
1656