1 static char const rcsid[] = "$Id: seg.c,v 6.19 2003/05/30 17:25:38 coulouri Exp $";
2
3 /*
4 * ===========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's offical duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================*/
27
28
29 /*--------------------------------------------------------------------------*/
30
31 #include "seg.h"
32 #include "lnfac.h"
33
34 static char _this_module[] = "seg";
35 #undef THIS_MODULE
36 #define THIS_MODULE _this_module
37
38 /*------------------------------------------------------(local functions)---*/
39
40 SequencePtr seq_phase(SequencePtr seq, Int4 phase, Int4 period);
41 SegPtr per_mergesegs(SequencePtr seq, SegPtr *persegs);
42 FloatHiPtr seqent(SequencePtr seq, Int4 window, Int4 maxbogus);
43
44 SequencePtr openwin(SequencePtr seq, Int4 start, Int4 length);
45 Int4 shiftwin1(SequencePtr win);
46 void closewin(SequencePtr win);
47 void compon(SequencePtr win);
48 void stateon(SequencePtr win);
49 void enton(SequencePtr win);
50 FloatHi entropy(Int4Ptr sv);
51 static int LIBCALLBACK state_cmp(VoidPtr s1, VoidPtr s2);
52 Int4 findlo(Int4 i, Int4 limit, FloatHi hicut, FloatHiPtr H);
53 Int4 findhi(Int4 i, Int4 limit, FloatHi hicut, FloatHiPtr H);
54 void trim(SequencePtr seq, Int4Ptr leftend, Int4Ptr rightend,
55 SegParamsPtr sparamsp);
56 FloatHi getprob(Int4Ptr sv, Int4 total, AlphaPtr palpha);
57 FloatHi lnperm(Int4Ptr sv, Int4 tot);
58 FloatHi lnass(Int4Ptr sv, Int4 alphasize);
59 void mergesegs(SequencePtr seq, SegPtr segs, Boolean overlap);
60 void decrementsv(Int4Ptr sv, Int4 class);
61 void incrementsv(Int4Ptr sv, Int4 class);
62 static void appendseg(SegPtr segs, SegPtr seg);
63 static Boolean hasdash(SequencePtr win);
64 AlphaPtr AA20alpha (void);
65 AlphaPtr NA4alpha (void);
66 void AlphaFree(AlphaPtr palpha);
67 AlphaPtr AlphaCopy(AlphaPtr palpha);
68
69 SeqLocPtr SegsToSeqLoc(BioseqPtr bsp, SegPtr segs);
70 SeqLocPtr SeqlocSegsToSeqLoc(SeqLocPtr slp, SegPtr segs);
71
72 /*------------------------------------------------------------(BioseqSeg)---*/
73
BioseqSeg(BioseqPtr bsp,SegParamsPtr sparamsp)74 SeqLocPtr BioseqSeg (BioseqPtr bsp, SegParamsPtr sparamsp)
75
76 {
77 SeqLocPtr slp = NULL;
78
79 /* error msg stuff */
80
81 ErrSetOptFlags (EO_MSG_CODES);
82
83 /* bail on null bioseq */
84
85 if (!bsp)
86 {
87 ErrPostEx (SEV_ERROR, 1, 1, "no bioseq");
88 ErrShow ();
89 return (slp);
90 }
91
92 if (ISA_na (bsp->mol))
93 {
94 slp = BioseqSegNa (bsp, sparamsp);
95 }
96
97 if (ISA_aa (bsp->mol))
98 {
99 slp = BioseqSegAa (bsp, sparamsp);
100 }
101
102 /* clean up & return */
103 return (slp);
104 }
105
106 /*----------------------------------------------------------(BioseqSegNa)---*/
107
BioseqSegNa(BioseqPtr bsp,SegParamsPtr sparamsp)108 SeqLocPtr BioseqSegNa (BioseqPtr bsp, SegParamsPtr sparamsp)
109
110 {
111 SeqPortPtr spp=NULL;
112 SeqLocPtr slp = NULL;
113 SequencePtr seqwin;
114 SegPtr segs;
115 Int4 len, index;
116 CharPtr seq;
117 Uint1 residue;
118
119 /* error msg stuff */
120
121 ErrSetOptFlags (EO_MSG_CODES);
122 SegParamsCheck (sparamsp);
123
124 /* bail on null bioseq */
125
126 if (!bsp)
127 {
128 ErrPostEx (SEV_ERROR, 2, 1, "no bioseq");
129 ErrShow ();
130 return (slp);
131 }
132
133 if (!ISA_na (bsp->mol))
134 {
135 ErrPostEx (SEV_WARNING, 2, 2, "not nucleic acid sequence");
136 ErrShow ();
137 return (slp);
138 }
139
140 if (!sparamsp)
141 {
142 sparamsp = SegParamsNewNa();
143 if (!sparamsp)
144 {
145 ErrPostEx (SEV_WARNING, 0, 0, "null parameters object");
146 ErrShow();
147 return(slp);
148 }
149 }
150 SegParamsCheck (sparamsp);
151
152 /* make an old-style genwin sequence window object */
153
154 len = bsp->length;
155 seq = (CharPtr) MemNew((len+1)*sizeof(Char));
156 spp = SeqPortNew(bsp, 0, -1, bsp->strand, Seq_code_ncbi8na);
157 if (spp == NULL) {
158 ErrPostEx (SEV_ERROR, 0, 0, "SeqPortNew failure");
159 ErrShow();
160 }
161
162 if (!seq)
163 {
164 ErrPostEx (SEV_ERROR, 0, 0, "memory allocation failure");
165 ErrShow();
166 return(slp);
167 }
168
169 index = 0;
170 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
171 {
172 if (IS_residue(residue))
173 {
174 seq[index] = residue;
175 index++;
176 }
177 else if (residue == SEQPORT_EOS)
178 {
179 continue; /*[Segment boundary]*/
180 }
181 else if (residue == SEQPORT_VIRT)
182 {
183 continue; /*[Virtual Sequence]*/
184 }
185 }
186
187 seq[index] = NULLB;
188
189 seqwin = SeqNew();
190 seqwin->seq = (CharPtr) seq;
191 seqwin->length = bsp->length;
192 seqwin->palpha = AlphaCopy(sparamsp->palpha);
193
194 /* seg the sequence */
195
196 segs = (SegPtr) NULL;
197 SegSeq (seqwin, sparamsp, &segs, 0);
198
199 /* merge the segment if desired. */
200
201 if (sparamsp->overlaps)
202 mergesegs(seqwin, segs, sparamsp->overlaps);
203
204 /* convert segs to seqlocs */
205
206 slp = SegsToSeqLoc(bsp, segs);
207
208 /* clean up & return */
209
210 SeqFree (seqwin);
211 SegFree (segs);
212 SeqPortFree (spp);
213 return (slp);
214 }
215
216 /*----------------------------------------------------------(BioseqSegAa)---*/
217
BioseqSegAa(BioseqPtr bsp,SegParamsPtr sparamsp)218 SeqLocPtr BioseqSegAa (BioseqPtr bsp, SegParamsPtr sparamsp)
219
220 {
221 SeqPortPtr spp=NULL;
222 SeqLocPtr slp = NULL;
223 SequencePtr seqwin;
224 SegPtr segs;
225 Int4 index, len;
226 CharPtr seq;
227 Uint1 residue;
228 Boolean params_allocated = FALSE;
229
230 /* error msg stuff */
231
232 ErrSetOptFlags (EO_MSG_CODES);
233 SegParamsCheck (sparamsp);
234
235 /* bail on null bioseq */
236
237 if (!bsp)
238 {
239 ErrPostEx (SEV_ERROR, 0, 0, "no bioseq");
240 ErrShow ();
241 return (slp);
242 }
243
244 if (!ISA_aa (bsp->mol))
245 {
246 ErrPostEx (SEV_WARNING, 0, 0, "not protein sequence");
247 ErrShow ();
248 return (slp);
249 }
250
251 if (!sparamsp)
252 {
253 sparamsp = SegParamsNewAa();
254 SegParamsCheck (sparamsp);
255 params_allocated = TRUE;
256 if (!sparamsp)
257 {
258 ErrPostEx (SEV_WARNING, 0, 0, "null parameters object");
259 ErrShow();
260 return(slp);
261 }
262 }
263
264 /* make an old-style genwin sequence window object */
265
266 len = bsp->length;
267 seq = (CharPtr) MemNew((len+1)*sizeof(Char));
268 spp = SeqPortNew(bsp, 0, -1, bsp->strand, Seq_code_ncbieaa);
269 if (spp == NULL) {
270 ErrPostEx (SEV_ERROR, 0, 0, "SeqPortNew failure");
271 ErrShow();
272 }
273
274 if (!seq)
275 {
276 ErrPostEx (SEV_ERROR, 0, 0, "memory allocation failure");
277 ErrShow();
278 SeqPortFree(spp);
279 return(slp);
280 }
281
282 index = 0;
283 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
284 {
285 if (IS_residue(residue))
286 {
287 seq[index] = residue;
288 index++;
289 }
290 else if (residue == SEQPORT_EOS)
291 {
292 continue; /*[Segment boundary]*/
293 }
294 else if (residue == SEQPORT_VIRT)
295 {
296 continue; /*[Virtual Sequence]*/
297 }
298 }
299
300 seq[index] = NULLB;
301
302
303 seqwin = SeqNew();
304 seqwin->seq = (CharPtr) seq;
305 seqwin->length = bsp->length;
306 seqwin->palpha = AlphaCopy(sparamsp->palpha);
307
308 /* seg the sequence */
309
310 segs = (SegPtr) NULL;
311 SegSeq (seqwin, sparamsp, &segs, 0);
312
313 /* merge the segment if desired. */
314
315 if (sparamsp->overlaps)
316 mergesegs(seqwin, segs, sparamsp->overlaps);
317
318 /* convert segs to seqlocs */
319
320 slp = SegsToSeqLoc(bsp, segs);
321
322 /* clean up & return */
323
324 SeqFree (seqwin);
325 SegFree (segs);
326 SeqPortFree(spp);
327 if(params_allocated)
328 SegParamsFree(sparamsp);
329
330 return (slp);
331 }
332
333 /*----------------------------------------------------------(SeqlocSegAa)---*/
334
SeqlocSegAa(SeqLocPtr slpin,SegParamsPtr sparamsp)335 SeqLocPtr SeqlocSegAa (SeqLocPtr slpin, SegParamsPtr sparamsp)
336
337 {
338 SeqPortPtr spp=NULL;
339 SeqLocPtr slp = NULL;
340 SequencePtr seqwin;
341 SegPtr segs;
342 Int4 index, len;
343 Int4 start, stop, temp;
344 CharPtr seq;
345 Uint1 residue;
346 Boolean params_allocated = FALSE;
347
348 /* error msg stuff */
349
350 ErrSetOptFlags (EO_MSG_CODES);
351 SegParamsCheck (sparamsp);
352
353 /* bail on null bioseq */
354
355 if (!slpin) {
356 ErrPostEx (SEV_ERROR, 0, 0, "no seqloc");
357 ErrShow ();
358 return (slp);
359 }
360
361 /* only simple intervals */
362
363 if (slpin->choice != SEQLOC_INT &&
364 slpin->choice != SEQLOC_WHOLE) return(slp);
365
366 /* get coordinate range */
367
368 start = SeqLocStart(slpin);
369 stop = SeqLocStop(slpin);
370 if (stop<start) {
371 temp = start;
372 start = stop;
373 stop = temp;
374 }
375
376 len = stop - start + 1;
377
378 /* check seg parameters */
379
380 if (!sparamsp) {
381 params_allocated = TRUE;
382 sparamsp = SegParamsNewAa();
383 SegParamsCheck (sparamsp);
384 if (!sparamsp) {
385 ErrPostEx (SEV_WARNING, 0, 0, "null parameters object");
386 ErrShow();
387 return(slp);
388 }
389 }
390
391 /* make an old-style genwin sequence window object */
392
393 seq = (CharPtr) MemNew((len+1)*sizeof(Char));
394 spp = SeqPortNewByLoc(slpin, Seq_code_ncbieaa);
395 if (spp == NULL) {
396 ErrPostEx (SEV_ERROR, 0, 0, "SeqPortNew failure");
397 ErrShow();
398 }
399
400 if (!seq) {
401 ErrPostEx (SEV_ERROR, 0, 0, "memory allocation failure");
402 ErrShow();
403 SeqPortFree(spp);
404 return(slp);
405 }
406
407 index = 0;
408 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
409 if (IS_residue(residue)) {
410 seq[index] = residue;
411 index++;
412 } else if (residue == SEQPORT_EOS) {
413 continue; /*[Segment boundary]*/
414 } else if (residue == SEQPORT_VIRT) {
415 continue; /*[Virtual Sequence]*/
416 }
417 }
418
419 seq[index] = NULLB;
420
421 seqwin = SeqNew();
422 seqwin->seq = (CharPtr) seq;
423 seqwin->length = len;
424 seqwin->palpha = AlphaCopy(sparamsp->palpha);
425
426 /* seg the sequence */
427
428 segs = (SegPtr) NULL;
429 SegSeq (seqwin, sparamsp, &segs, 0);
430
431 /* merge the segment if desired. */
432
433 if (sparamsp->overlaps)
434 mergesegs(seqwin, segs, sparamsp->overlaps);
435
436 /* convert segs to seqlocs */
437
438 slp = SeqlocSegsToSeqLoc(slpin, segs);
439
440 /* clean up & return */
441
442 SeqFree (seqwin);
443 SegFree (segs);
444 SeqPortFree(spp);
445
446 if(params_allocated)
447 SegParamsFree(sparamsp);
448
449 return (slp);
450 }
451
452 /*---------------------------------------------------------(SegsToSeqLoc)---*/
453
SegsToSeqLoc(BioseqPtr bsp,SegPtr segs)454 SeqLocPtr SegsToSeqLoc(BioseqPtr bsp, SegPtr segs)
455
456 {
457 SeqLocPtr slp, slp_last, slp_next, slp_packed;
458 SeqIntPtr sip;
459
460 if (bsp==NULL || segs==NULL) return ((SeqLocPtr) NULL);
461
462 slp = (SeqLocPtr) ValNodeNew(NULL);
463 slp->choice = SEQLOC_INT;
464
465 sip = SeqIntNew();
466 sip->from = segs->begin;
467 sip->to = segs->end;
468 sip->strand = bsp->strand;
469 sip->id = SeqIdDup(bsp->id);
470
471 slp->data.ptrvalue = sip;
472
473 /*--- SEQLOC_INT for a single segment ---*/
474
475 if (segs->next==NULL)
476 {
477 slp->next = NULL;
478 return(slp);
479 }
480
481 /*--- SEQLOC_PACKED_INT for multiple segments ---*/
482
483 slp_last = slp;
484 for (segs=segs->next; segs!=NULL; segs=segs->next)
485 {
486 slp_next = (SeqLocPtr) ValNodeNew(NULL);
487 slp_next->choice = SEQLOC_INT;
488
489 sip = SeqIntNew();
490 sip->from = segs->begin;
491 sip->to = segs->end;
492 sip->strand = bsp->strand;
493 sip->id = SeqIdDup(bsp->id);
494
495 slp_next->data.ptrvalue = sip;
496 slp_last->next = slp_next;
497 slp_last = slp_next;
498 }
499
500 slp_last->next = NULL;
501
502 slp_packed = (SeqLocPtr) ValNodeNew(NULL);
503 slp_packed->choice = SEQLOC_PACKED_INT;
504 slp_packed->data.ptrvalue = slp;
505 slp_packed->next = NULL;
506
507 return(slp_packed);
508 }
509
510 /*---------------------------------------------------(SeqlocSegsToSeqLoc)---*/
511
SeqlocSegsToSeqLoc(SeqLocPtr slpin,SegPtr segs)512 SeqLocPtr SeqlocSegsToSeqLoc(SeqLocPtr slpin, SegPtr segs)
513
514 {
515 SeqLocPtr slp, slp_last, slp_next, slp_packed;
516 SeqIntPtr sip;
517 Int4 start, stop;
518 Uint1 strand;
519 SeqIdPtr id;
520
521 if (slpin==NULL || segs==NULL) return ((SeqLocPtr) NULL);
522 if (slpin->choice != SEQLOC_INT &&
523 slpin->choice != SEQLOC_WHOLE) return ((SeqLocPtr) NULL);
524
525 start = SeqLocStart(slpin);
526 stop = SeqLocStop(slpin);
527 if (stop < start) return ((SeqLocPtr) NULL);
528 strand = SeqLocStrand(slpin);
529 id = SeqLocId(slpin);
530
531 slp = (SeqLocPtr) ValNodeNew(NULL);
532 slp->choice = SEQLOC_INT;
533
534 sip = SeqIntNew();
535 sip->from = segs->begin + start;
536 sip->to = segs->end + start;
537 sip->strand = strand;
538 sip->id = SeqIdDup(id);
539
540 slp->data.ptrvalue = sip;
541
542 /*--- SEQLOC_INT for a single segment ---*/
543
544 if (segs->next==NULL)
545 {
546 slp->next = NULL;
547 return(slp);
548 }
549
550 /*--- SEQLOC_PACKED_INT for multiple segments ---*/
551
552 slp_last = slp;
553 for (segs=segs->next; segs!=NULL; segs=segs->next)
554 {
555 slp_next = (SeqLocPtr) ValNodeNew(NULL);
556 slp_next->choice = SEQLOC_INT;
557
558 sip = SeqIntNew();
559 sip->from = segs->begin + start;
560 sip->to = segs->end + start;
561 sip->strand = strand;
562 sip->id = SeqIdDup(id);
563
564 slp_next->data.ptrvalue = sip;
565 slp_last->next = slp_next;
566 slp_last = slp_next;
567 }
568
569 slp_last->next = NULL;
570
571 slp_packed = (SeqLocPtr) ValNodeNew(NULL);
572 slp_packed->choice = SEQLOC_PACKED_INT;
573 slp_packed->data.ptrvalue = slp;
574 slp_packed->next = NULL;
575
576 return(slp_packed);
577 }
578
579 /*---------------------------------------------------------------(SeqNew)---*/
580
SeqNew(void)581 SequencePtr SeqNew(void)
582
583 {
584 SequencePtr seq;
585
586 seq = (SequencePtr) MemNew(sizeof(Sequence));
587 if (seq==NULL)
588 {
589 /* raise error flag and etc. */
590 return(seq);
591 }
592
593 seq->parent = (SequencePtr) NULL;
594 seq->seq = (CharPtr) NULL;
595 seq->palpha = (AlphaPtr) NULL;
596 seq->start = seq->length = 0;
597 seq->bogus = seq->punctuation = FALSE;
598 seq->composition = seq->state = (Int4Ptr) NULL;
599 seq->entropy = (FloatHi) 0.0;
600
601 return(seq);
602 }
603
604 /*--------------------------------------------------------------(SeqFree)---*/
605
SeqFree(SequencePtr seq)606 void SeqFree(SequencePtr seq)
607
608 {
609 if (seq==NULL) return;
610
611 MemFree(seq->seq);
612 AlphaFree(seq->palpha);
613 MemFree(seq->composition);
614 MemFree(seq->state);
615 MemFree(seq);
616 return;
617 }
618
619 /*--------------------------------------------------------------(SegFree)---*/
620
SegFree(SegPtr seg)621 void SegFree(SegPtr seg)
622
623 {
624 SegPtr nextseg;
625
626 while (seg)
627 {
628 nextseg = seg->next;
629 MemFree(seg);
630 seg = nextseg;
631 }
632
633 return;
634 }
635
636 /*---------------------------------------------------------------(SegSeq)---*/
637
SegSeq(SequencePtr seq,SegParamsPtr sparamsp,SegPtr * segs,Int4 offset)638 void SegSeq(SequencePtr seq, SegParamsPtr sparamsp, SegPtr *segs,
639 Int4 offset)
640
641 {
642 SegPtr seg, leftsegs;
643 SequencePtr leftseq;
644 Int4 window;
645 FloatHi locut, hicut;
646 Int4 maxbogus;
647 Int4 downset, upset;
648 Int4 first, last, lowlim;
649 Int4 loi, hii, i;
650 Int4 leftend, rightend, lend, rend;
651 FloatHiPtr H;
652
653 if (sparamsp->window<=0) return;
654 if (sparamsp->locut<=0.) sparamsp->locut = 0.;
655 if (sparamsp->hicut<=0.) sparamsp->hicut = 0.;
656
657 window = sparamsp->window;
658 locut = sparamsp->locut;
659 hicut = sparamsp->hicut;
660 downset = (window+1)/2 - 1;
661 upset = window - downset;
662 maxbogus = sparamsp->maxbogus;
663
664 H = seqent(seq, window, maxbogus);
665 if (H==NULL) return;
666
667 first = downset;
668 last = seq->length - upset;
669 lowlim = first;
670
671 for (i=first; i<=last; i++)
672 {
673 if (H[i]<=locut && H[i]!=-1)
674 {
675 loi = findlo(i, lowlim, hicut, H);
676 hii = findhi(i, last, hicut, H);
677
678 leftend = loi - downset;
679 rightend = hii + upset - 1;
680
681 trim(openwin(seq, leftend, rightend-leftend+1), &leftend, &rightend,
682 sparamsp);
683
684 if (i+upset-1<leftend) /* check for trigger window in left trim */
685 {
686 lend = loi - downset;
687 rend = leftend - 1;
688
689 leftseq = openwin(seq, lend, rend-lend+1);
690 leftsegs = (SegPtr) NULL;
691 SegSeq(leftseq, sparamsp, &leftsegs, offset+lend);
692 if (leftsegs!=NULL)
693 {
694 if (*segs==NULL) *segs = leftsegs;
695 else appendseg(*segs, leftsegs);
696 }
697 closewin(leftseq);
698
699 /* trim(openwin(seq, lend, rend-lend+1), &lend, &rend);
700 seg = (SegPtr) MemNew(sizeof(Seg));
701 seg->begin = lend;
702 seg->end = rend;
703 seg->next = (SegPtr) NULL;
704 if (segs==NULL) segs = seg;
705 else appendseg(segs, seg); */
706 }
707
708 seg = (SegPtr) MemNew(sizeof(Seg));
709 seg->begin = leftend + offset;
710 seg->end = rightend + offset;
711 seg->next = (SegPtr) NULL;
712
713 if (*segs==NULL) *segs = seg;
714 else appendseg(*segs, seg);
715
716 i = MIN(hii, rightend+downset);
717 lowlim = i + 1;
718 /* i = hii; this ignores the trimmed residues... */
719 }
720 }
721
722 MemFree(H);
723 return;
724 }
725
726 /*------------------------------------------------------------(seq_phase)---*/
727
seq_phase(SequencePtr seq,Int4 phase,Int4 period)728 SequencePtr seq_phase (SequencePtr seq, Int4 phase, Int4 period)
729
730 {
731 SequencePtr perseq;
732 Int4 len, i, j;
733
734 perseq = (SequencePtr) MemNew(sizeof(Sequence));
735
736 len = ((seq->length)/period) + 1;
737 perseq->seq = (CharPtr) MemNew((len+1)*sizeof(Char));
738
739 perseq->length = 0;
740 for (i=0, j=phase; j<seq->length; i++, j+=period)
741 {
742 perseq->seq[i] = seq->seq[j];
743 perseq->length++;
744 }
745 perseq->seq[i] = '\0';
746
747 return(perseq);
748 }
749
750 /*---------------------------------------------------------------(seqent)---*/
751
seqent(SequencePtr seq,Int4 window,Int4 maxbogus)752 FloatHiPtr seqent(SequencePtr seq, Int4 window, Int4 maxbogus)
753
754 {
755 SequencePtr win;
756 FloatHiPtr H;
757 Int4 i, first, last, downset, upset;
758
759 downset = (window+1)/2 - 1;
760 upset = window - downset;
761
762 if (window>seq->length)
763 {
764 return((FloatHiPtr) NULL);
765 }
766
767 H = (FloatHiPtr) MemNew(seq->length*sizeof(FloatHi));
768
769 for (i=0; i<seq->length; i++)
770 {
771 H[i] = -1.;
772 }
773
774 win = openwin(seq, 0, window);
775 enton(win);
776
777 first = downset;
778 last = seq->length - upset;
779
780 for (i=first; i<=last; i++)
781 {
782 if (seq->punctuation && hasdash(win))
783 {
784 H[i] = -1.;
785 shiftwin1(win);
786 continue;
787 }
788 if (win->bogus > maxbogus)
789 {
790 H[i] = -1.;
791 shiftwin1(win);
792 continue;
793 }
794 H[i] = win->entropy;
795 shiftwin1(win);
796 }
797
798 closewin(win);
799 return(H);
800 }
801
802 /*--------------------------------------------------------------(hasdash)---*/
803
hasdash(SequencePtr win)804 static Boolean hasdash(SequencePtr win)
805
806
807 {
808 register char *seq, *seqmax;
809
810 seq = win->seq;
811 seqmax = seq + win->length;
812
813 while (seq < seqmax) {
814 if (*seq++ == '-')
815 return TRUE;
816 }
817 return FALSE;
818 }
819
820 /*---------------------------------------------------------------(findlo)---*/
821
findlo(Int4 i,Int4 limit,FloatHi hicut,FloatHiPtr H)822 Int4 findlo(Int4 i, Int4 limit, FloatHi hicut, FloatHiPtr H)
823
824 {
825 Int4 j;
826
827 for (j=i; j>=limit; j--)
828 {
829 if (H[j]==-1) break;
830 if (H[j]>hicut) break;
831 }
832
833 return(j+1);
834 }
835
836 /*---------------------------------------------------------------(findhi)---*/
837
findhi(Int4 i,Int4 limit,FloatHi hicut,FloatHiPtr H)838 Int4 findhi(Int4 i, Int4 limit, FloatHi hicut, FloatHiPtr H)
839
840 {
841 Int4 j;
842
843 for (j=i; j<=limit; j++)
844 {
845 if (H[j]==-1) break;
846 if (H[j]>hicut) break;
847 }
848
849 return(j-1);
850 }
851
852 /*-----------------------------------------------------------------(trim)---*/
853
trim(SequencePtr seq,Int4Ptr leftend,Int4Ptr rightend,SegParamsPtr sparamsp)854 void trim(SequencePtr seq, Int4Ptr leftend, Int4Ptr rightend,
855 SegParamsPtr sparamsp)
856
857 {
858 SequencePtr win;
859 FloatHi prob, minprob;
860 Int4 shift, len, i;
861 Int4 lend, rend;
862 Int4 minlen;
863 Int4 maxtrim;
864
865 /* fprintf(stderr, "%d %d\n", *leftend, *rightend); */
866
867 lend = 0;
868 rend = seq->length - 1;
869 minlen = 1;
870 maxtrim = sparamsp->maxtrim;
871 if ((seq->length-maxtrim)>minlen) minlen = seq->length-maxtrim;
872
873 minprob = 1.;
874 /* fprintf(stderr, "\n"); -*/
875 for (len=seq->length; len>minlen; len--)
876 {
877 /* fprintf(stderr, "%5d ", len); -*/
878 win = openwin(seq, 0, len);
879 i = 0;
880
881 shift = TRUE;
882 while (shift)
883 {
884 prob = getprob(win->state, len, sparamsp->palpha);
885 /* realprob = exp(prob); (for tracing the trim)-*/
886 /* fprintf(stderr, "%2e ", realprob); -*/
887 if (prob<minprob)
888 {
889 minprob = prob;
890 lend = i;
891 rend = len + i - 1;
892 }
893 shift = shiftwin1(win);
894 i++;
895 }
896 closewin(win);
897 /* fprintf(stderr, "\n"); -*/
898 }
899
900 /* fprintf(stderr, "%d-%d ", *leftend, *rightend); -*/
901
902 *leftend = *leftend + lend;
903 *rightend = *rightend - (seq->length - rend - 1);
904
905 /* fprintf(stderr, "%d-%d\n", *leftend, *rightend); -*/
906
907 closewin(seq);
908 return;
909 }
910
911 /*--------------------------------------------------------------(getprob)---*/
912
getprob(Int4Ptr sv,Int4 total,AlphaPtr palpha)913 FloatHi getprob(Int4Ptr sv, Int4 total, AlphaPtr palpha)
914
915 {
916 FloatHi ans, ans1, ans2, totseq;
917
918 /* #define LN20 2.9957322735539909 */
919 /* #define LN4 1.3862943611198906 */
920
921 totseq = ((double) total) * palpha->lnalphasize;
922
923 /*
924 ans = lnass(sv, palpha->alphasize) + lnperm(sv, total) - totseq;
925 */
926 ans1 = lnass(sv, palpha->alphasize);
927 if (ans1 > -100000.0 && sv[0] != INT4_MIN)
928 {
929 ans2 = lnperm(sv, total);
930 }
931 else
932 {
933 ErrPostEx (SEV_ERROR, 0, 0, "Illegal value returned by lnass");
934 }
935 ans = ans1 + ans2 - totseq;
936 /*
937 fprintf(stderr, "%lf %lf %lf\n", ans, ans1, ans2);
938 */
939
940 return ans;
941 }
942
943 /*---------------------------------------------------------------(lnperm)---*/
944
lnperm(Int4Ptr sv,Int4 tot)945 FloatHi lnperm(Int4Ptr sv, Int4 tot)
946
947 {
948 FloatHi ans;
949 Int4 i;
950
951 ans = lnfac[tot];
952
953 for (i=0; sv[i]!=0; i++)
954 {
955 ans -= lnfac[sv[i]];
956 }
957
958 return(ans);
959 }
960
961 /*----------------------------------------------------------------(lnass)---*/
962
lnass(Int4Ptr sv,Int4 alphasize)963 FloatHi lnass(Int4Ptr sv, Int4 alphasize)
964
965 {
966 double ans;
967 int svi, svim1;
968 int class, total;
969 int i;
970
971 ans = lnfac[alphasize];
972 if (sv[0] == 0)
973 return ans;
974
975 total = alphasize;
976 class = 1;
977 svi = *sv;
978 svim1 = sv[0];
979 for (i=0;; svim1 = svi) {
980 if (++i==alphasize) {
981 ans -= lnfac[class];
982 break;
983 }
984 else if ((svi = *++sv) == svim1) {
985 class++;
986 continue;
987 }
988 else {
989 total -= class;
990 ans -= lnfac[class];
991 if (svi == 0) {
992 ans -= lnfac[total];
993 break;
994 }
995 else {
996 class = 1;
997 continue;
998 }
999 }
1000 }
1001
1002 return ans;
1003 }
1004
1005 /*------------------------------------------------------------(mergesegs)---*/
1006 /* merge together overlapping segments,
1007 hilenmin also does something, but we need to ask Scott Federhen what?
1008 */
1009
mergesegs(SequencePtr seq,SegPtr segs,Boolean overlaps)1010 void mergesegs(SequencePtr seq, SegPtr segs, Boolean overlaps)
1011
1012 {SegPtr seg, nextseg;
1013 Int4 hilenmin; /* hilenmin yet unset */
1014 Int4 len;
1015
1016 hilenmin = 0; /* hilenmin - temporary default */
1017
1018 if (overlaps == FALSE)
1019 return;
1020
1021 if (segs==NULL) return;
1022
1023 if (segs->begin<hilenmin) segs->begin = 0;
1024
1025 seg = segs;
1026 nextseg = seg->next;
1027
1028 while (nextseg!=NULL)
1029 {
1030 if (seg->end>=nextseg->begin && seg->end>=nextseg->end)
1031 {
1032 seg->next = nextseg->next;
1033 MemFree(nextseg);
1034 nextseg = seg->next;
1035 continue;
1036 }
1037 if (seg->end>=nextseg->begin) /* overlapping segments */
1038 {
1039 seg->end = nextseg->end;
1040 seg->next = nextseg->next;
1041 MemFree(nextseg);
1042 nextseg = seg->next;
1043 continue;
1044 }
1045 len = nextseg->begin - seg->end - 1;
1046 if (len<hilenmin) /* short hient segment */
1047 {
1048 seg->end = nextseg->end;
1049 seg->next = nextseg->next;
1050 MemFree(nextseg);
1051 nextseg = seg->next;
1052 continue;
1053 }
1054 seg = nextseg;
1055 nextseg = seg->next;
1056 }
1057
1058 len = seq->length - seg->end - 1;
1059 if (len<hilenmin) seg->end = seq->length - 1;
1060
1061 return;
1062 }
1063
1064 /*--------------------------------------------------------(per_mergesegs)---*/
1065
per_mergesegs(SequencePtr seq,SegPtr * persegs)1066 SegPtr per_mergesegs(SequencePtr seq, SegPtr *persegs)
1067
1068 {SegPtr *localsegs;
1069 SegPtr firstseg, segs, seg;
1070 Int4 period; /* period yet unset */
1071 Int4 first;
1072 Int4 phase, savephase;
1073
1074 period = 1; /* period set temporarily */
1075
1076 segs = (SegPtr) NULL;
1077 if (persegs==NULL) return(segs);
1078
1079 localsegs = (SegPtr *) MemNew(period*sizeof(FloatHi));
1080 for (phase=0; phase<period; phase++) localsegs[phase] = persegs[phase];
1081
1082 while (TRUE)
1083 {
1084 firstseg = (SegPtr) NULL;
1085 first = -1;
1086 savephase = -1;
1087
1088 for (phase=0; phase<period; phase++)
1089 {
1090 if (localsegs[phase]==NULL) continue;
1091 if (first==-1 || localsegs[phase]->begin<first)
1092 {
1093 savephase = phase;
1094 firstseg = localsegs[phase];
1095 first = firstseg->begin;
1096 }
1097 }
1098
1099 if (firstseg==NULL) break;
1100
1101 seg = (SegPtr) MemNew(sizeof(Seg));
1102 seg->begin = ((firstseg->begin)*period)+savephase;
1103 seg->end = ((firstseg->end)*period)+savephase;
1104 seg->next = (SegPtr) NULL;
1105 if (segs==NULL) segs = seg; else appendseg(segs, seg);
1106
1107 localsegs[savephase] = localsegs[savephase]->next;
1108 }
1109
1110 MemFree(localsegs);
1111 mergesegs(seq, segs, FALSE);
1112 return(segs);
1113 }
1114
1115
1116
1117 /*------------------------------------------------------------(appendseg)---*/
1118
1119 static void
appendseg(SegPtr segs,SegPtr seg)1120 appendseg(SegPtr segs, SegPtr seg)
1121
1122 {SegPtr temp;
1123
1124 temp = segs;
1125 while (TRUE)
1126 {
1127 if (temp->next==NULL)
1128 {
1129 temp->next = seg;
1130 break;
1131 }
1132 else
1133 {
1134 temp = temp->next;
1135 }
1136 }
1137
1138 return;
1139 }
1140
1141 /*-------------------------------------------------------(SegParamsNewAa)---*/
1142
SegParamsNewAa(void)1143 SegParamsPtr SegParamsNewAa (void)
1144
1145 {
1146 SegParamsPtr sparamsp;
1147
1148 sparamsp = (SegParamsPtr) MemNew (sizeof(SegParams));
1149
1150 sparamsp->window = 12;
1151 sparamsp->locut = 2.2;
1152 sparamsp->hicut = 2.5;
1153 sparamsp->period = 1;
1154 sparamsp->hilenmin = 0;
1155 sparamsp->overlaps = FALSE;
1156 sparamsp->maxtrim = 50;
1157 sparamsp->maxbogus = 2;
1158 sparamsp->palpha = AA20alpha();
1159
1160 return (sparamsp);
1161 }
1162
1163 /*-------------------------------------------------------(SegParamsNewNa)---*/
1164
SegParamsNewNa(void)1165 SegParamsPtr SegParamsNewNa (void)
1166
1167 {
1168 SegParamsPtr sparamsp;
1169
1170 sparamsp = (SegParamsPtr) MemNew (sizeof(SegParams));
1171
1172 sparamsp->window = 21;
1173 sparamsp->locut = 1.4;
1174 sparamsp->hicut = 1.6;
1175 sparamsp->period = 1;
1176 sparamsp->hilenmin = 0;
1177 sparamsp->overlaps = FALSE;
1178 sparamsp->maxtrim = 100;
1179 sparamsp->maxbogus = 3;
1180 sparamsp->palpha = NA4alpha();
1181
1182 return (sparamsp);
1183 }
1184
1185 /*-------------------------------------------------------(SegParamsCheck)---*/
1186
SegParamsCheck(SegParamsPtr sparamsp)1187 void SegParamsCheck (SegParamsPtr sparamsp)
1188
1189 {
1190 if (!sparamsp) return;
1191
1192 if (sparamsp->window <= 0) sparamsp->window = 12;
1193
1194 if (sparamsp->locut < 0.0) sparamsp->locut = 0.0;
1195 /* if (sparamsp->locut > sparamsp->palpha->lnalphasize)
1196 sparamsp->locut = sparamsp->palpha->lnalphasize; */
1197
1198 if (sparamsp->hicut < 0.0) sparamsp->hicut = 0.0;
1199 /* if (sparamsp->hicut > sparamsp->palpha->lnalphasize)
1200 sparamsp->hicut = sparamsp->palpha->lnalphasize; */
1201
1202 if (sparamsp->locut > sparamsp->hicut)
1203 sparamsp->hicut = sparamsp->locut;
1204
1205 if (sparamsp->maxbogus < 0)
1206 sparamsp->maxbogus = 0;
1207 if (sparamsp->maxbogus > sparamsp->window)
1208 sparamsp->maxbogus = sparamsp->window;
1209
1210 if (sparamsp->period <= 0) sparamsp->period = 1;
1211 if (sparamsp->maxtrim < 0) sparamsp->maxtrim = 0;
1212
1213 return;
1214 }
1215
1216 /*------------------------------------------------------------(AA20alpha)---*/
1217
AA20alpha(void)1218 AlphaPtr AA20alpha (void)
1219
1220 {
1221 AlphaPtr palpha;
1222 Int4Ptr alphaindex;
1223 BoolPtr alphaflag;
1224 CharPtr alphachar;
1225 Int4 i;
1226 Char c;
1227
1228 palpha = (AlphaPtr) MemNew (sizeof(Alpha));
1229
1230 palpha->alphabet = AA20;
1231 palpha->alphasize = 20;
1232 palpha->lnalphasize = LN20;
1233
1234 alphaindex = (Int4Ptr) MemNew (CHAR_SET * sizeof(Int4));
1235 alphaflag = (BoolPtr) MemNew (CHAR_SET * sizeof(Boolean));
1236 alphachar = (CharPtr) MemNew (palpha->alphasize * sizeof(Char));
1237
1238 for (i=0; i<128; i++)
1239 {
1240 c = (Char) i;
1241 if (c=='a' || c=='A')
1242 {alphaflag[i] = FALSE; alphaindex[i] = 0; alphachar[0] = c;}
1243 else if (c=='c' || c=='C')
1244 {alphaflag[i] = FALSE; alphaindex[i] = 1; alphachar[1] = c;}
1245 else if (c=='d' || c=='D')
1246 {alphaflag[i] = FALSE; alphaindex[i] = 2; alphachar[2] = c;}
1247 else if (c=='e' || c=='E')
1248 {alphaflag[i] = FALSE; alphaindex[i] = 3; alphachar[3] = c;}
1249 else if (c=='f' || c=='F')
1250 {alphaflag[i] = FALSE; alphaindex[i] = 4; alphachar[4] = c;}
1251 else if (c=='g' || c=='G')
1252 {alphaflag[i] = FALSE; alphaindex[i] = 5; alphachar[5] = c;}
1253 else if (c=='h' || c=='H')
1254 {alphaflag[i] = FALSE; alphaindex[i] = 6; alphachar[6] = c;}
1255 else if (c=='i' || c=='I')
1256 {alphaflag[i] = FALSE; alphaindex[i] = 7; alphachar[7] = c;}
1257 else if (c=='k' || c=='K')
1258 {alphaflag[i] = FALSE; alphaindex[i] = 8; alphachar[8] = c;}
1259 else if (c=='l' || c=='L')
1260 {alphaflag[i] = FALSE; alphaindex[i] = 9; alphachar[9] = c;}
1261 else if (c=='m' || c=='M')
1262 {alphaflag[i] = FALSE; alphaindex[i] = 10; alphachar[10] = c;}
1263 else if (c=='n' || c=='N')
1264 {alphaflag[i] = FALSE; alphaindex[i] = 11; alphachar[11] = c;}
1265 else if (c=='p' || c=='P')
1266 {alphaflag[i] = FALSE; alphaindex[i] = 12; alphachar[12] = c;}
1267 else if (c=='q' || c=='Q')
1268 {alphaflag[i] = FALSE; alphaindex[i] = 13; alphachar[13] = c;}
1269 else if (c=='r' || c=='R')
1270 {alphaflag[i] = FALSE; alphaindex[i] = 14; alphachar[14] = c;}
1271 else if (c=='s' || c=='S')
1272 {alphaflag[i] = FALSE; alphaindex[i] = 15; alphachar[15] = c;}
1273 else if (c=='t' || c=='T')
1274 {alphaflag[i] = FALSE; alphaindex[i] = 16; alphachar[16] = c;}
1275 else if (c=='v' || c=='V')
1276 {alphaflag[i] = FALSE; alphaindex[i] = 17; alphachar[17] = c;}
1277 else if (c=='w' || c=='W')
1278 {alphaflag[i] = FALSE; alphaindex[i] = 18; alphachar[18] = c;}
1279 else if (c=='y' || c=='Y')
1280 {alphaflag[i] = FALSE; alphaindex[i] = 19; alphachar[19] = c;}
1281 else
1282 {alphaflag[i] = TRUE; alphaindex[i] = 20;}
1283 }
1284
1285 palpha->alphaindex = alphaindex;
1286 palpha->alphaflag = alphaflag;
1287 palpha->alphachar = alphachar;
1288
1289 return (palpha);
1290 }
1291
1292 /*-------------------------------------------------------------(NA4alpha)---*/
1293
NA4alpha(void)1294 AlphaPtr NA4alpha (void)
1295
1296 {
1297 AlphaPtr palpha;
1298 Int4Ptr alphaindex;
1299 BoolPtr alphaflag;
1300 CharPtr alphachar;
1301 Int4 i;
1302 Char c;
1303
1304 palpha = (AlphaPtr) MemNew (sizeof(Alpha));
1305
1306 palpha->alphabet = NA4;
1307 palpha->alphasize = 4;
1308 palpha->lnalphasize = LN4;
1309
1310 alphaindex = (Int4Ptr) MemNew (CHAR_SET * sizeof(Int4));
1311 alphaflag = (BoolPtr) MemNew (CHAR_SET * sizeof(Boolean));
1312 alphachar = (CharPtr) MemNew (palpha->alphasize * sizeof(Char));
1313
1314 for (i=0; i<128; i++)
1315 {
1316 c = (Char) i;
1317 if (c=='a' || c=='A')
1318 {alphaflag[i] = FALSE; alphaindex[i] = 0; alphachar[0] = c;}
1319 else if (c=='c' || c=='C')
1320 {alphaflag[i] = FALSE; alphaindex[i] = 1; alphachar[1] = c;}
1321 else if (c=='g' || c=='G')
1322 {alphaflag[i] = FALSE; alphaindex[i] = 2; alphachar[2] = c;}
1323 else if (c=='t' || c=='T')
1324 {alphaflag[i] = FALSE; alphaindex[i] = 3; alphachar[3] = c;}
1325 else if (c=='u' || c=='U')
1326 {alphaflag[i] = FALSE; alphaindex[i] = 3;}
1327 else
1328 {alphaflag[i] = TRUE; alphaindex[i] = 4;}
1329 }
1330
1331 palpha->alphaindex = alphaindex;
1332 palpha->alphaflag = alphaflag;
1333 palpha->alphachar = alphachar;
1334
1335 return (palpha);
1336 }
1337
1338 /*------------------------------------------------------------(AlphaFree)---*/
1339
AlphaFree(AlphaPtr palpha)1340 void AlphaFree (AlphaPtr palpha)
1341
1342 {
1343 if (!palpha) return;
1344
1345 MemFree (palpha->alphaindex);
1346 MemFree (palpha->alphaflag);
1347 MemFree (palpha->alphachar);
1348 MemFree (palpha);
1349
1350 return;
1351 }
1352
1353 /*------------------------------------------------------------(AlphaCopy)---*/
1354
AlphaCopy(AlphaPtr palpha)1355 AlphaPtr AlphaCopy (AlphaPtr palpha)
1356
1357 {
1358 AlphaPtr pbeta;
1359 Int2 i;
1360
1361 if (!palpha) return((AlphaPtr) NULL);
1362
1363 pbeta = (AlphaPtr) MemNew(sizeof(Alpha));
1364 pbeta->alphabet = palpha->alphabet;
1365 pbeta->alphasize = palpha->alphasize;
1366 pbeta->lnalphasize = palpha->lnalphasize;
1367
1368 pbeta->alphaindex = (Int4Ptr) MemNew (CHAR_SET * sizeof(Int4));
1369 pbeta->alphaflag = (BoolPtr) MemNew (CHAR_SET * sizeof(Boolean));
1370 pbeta->alphachar = (CharPtr) MemNew (palpha->alphasize * sizeof(Char));
1371
1372 for (i=0; i<CHAR_SET; i++)
1373 {
1374 pbeta->alphaindex[i] = palpha->alphaindex[i];
1375 pbeta->alphaflag[i] = palpha->alphaflag[i];
1376 }
1377
1378 for (i=0; i<palpha->alphasize; i++)
1379 {
1380 pbeta->alphachar[i] = palpha->alphachar[i];
1381 }
1382
1383 return(pbeta);
1384 }
1385
1386 /*--------------------------------------------------------(SegParamsFree)---*/
1387
SegParamsFree(SegParamsPtr sparamsp)1388 void SegParamsFree(SegParamsPtr sparamsp)
1389
1390 {
1391 if (!sparamsp) return;
1392 AlphaFree(sparamsp->palpha);
1393 MemFree(sparamsp);
1394 return;
1395 }
1396
1397 /*--------------------------------------------------------------(openwin)---*/
1398
openwin(SequencePtr parent,Int4 start,Int4 length)1399 SequencePtr openwin(SequencePtr parent, Int4 start, Int4 length)
1400
1401 {
1402 SequencePtr win;
1403
1404 if (start<0 || length<0 || start+length>parent->length)
1405 {
1406 return((SequencePtr) NULL);
1407 }
1408
1409 win = (SequencePtr) MemNew(sizeof(Sequence));
1410
1411 /*--- ---[set links, up and down]---*/
1412
1413 win->parent = parent;
1414 win->palpha = parent->palpha;
1415
1416 /*--- ---[install the local copy of the sequence]---*/
1417
1418 win->start = start;
1419 win->length = length;
1420 #if 0 /* Hi Warren! */
1421 win->seq = (CharPtr) MemNew(sizeof(Char)*length + 1);
1422 memcpy(win->seq, (parent->seq)+start, length);
1423 win->seq[length] = '\0';
1424 #else
1425 win->seq = parent->seq + start;
1426 #endif
1427
1428 win->bogus = 0;
1429 win->punctuation = FALSE;
1430
1431 win->entropy = -2.;
1432 win->state = (Int4Ptr) NULL;
1433 win->composition = (Int4Ptr) NULL;
1434
1435 stateon(win);
1436
1437 return win;
1438 }
1439
1440 /*------------------------------------------------------------(shiftwin1)---*/
1441
shiftwin1(SequencePtr win)1442 Int4 shiftwin1(SequencePtr win)
1443
1444 {
1445 Int4 j, length;
1446 Int4Ptr comp;
1447 Int4Ptr alphaindex;
1448 BoolPtr alphaflag;
1449
1450 length = win->length;
1451 comp = win->composition;
1452 alphaindex = win->palpha->alphaindex;
1453 alphaflag = win->palpha->alphaflag;
1454
1455 if ((++win->start + length) > win->parent->length) {
1456 --win->start;
1457 return FALSE;
1458 }
1459
1460 if (!alphaflag[j = win->seq[0]])
1461 decrementsv(win->state, comp[alphaindex[j]]--);
1462 else win->bogus--;
1463
1464 j = win->seq[length];
1465 ++win->seq;
1466
1467 if (!alphaflag[j])
1468 incrementsv(win->state, comp[alphaindex[j]]++);
1469 else win->bogus++;
1470
1471 if (win->entropy > -2.)
1472 win->entropy = entropy(win->state);
1473
1474 return TRUE;
1475 }
1476
1477 /*-------------------------------------------------------------(closewin)---*/
1478
closewin(SequencePtr win)1479 void closewin(SequencePtr win)
1480
1481 {
1482 if (win==NULL) return;
1483
1484 if (win->state!=NULL) MemFree(win->state);
1485 if (win->composition!=NULL) MemFree(win->composition);
1486
1487 MemFree(win);
1488 return;
1489 }
1490
1491 /*---------------------------------------------------------------(compon)---*/
1492
compon(SequencePtr win)1493 void compon(SequencePtr win)
1494
1495 {
1496 Int4Ptr comp;
1497 Int4 letter;
1498 CharPtr seq, seqmax;
1499 Int4Ptr alphaindex;
1500 BoolPtr alphaflag;
1501 Int4 alphasize;
1502
1503 alphasize = win->palpha->alphasize;
1504 alphaindex = win->palpha->alphaindex;
1505 alphaflag = win->palpha->alphaflag;
1506
1507 win->composition = comp =
1508 (Int4Ptr) MemNew(alphasize*sizeof(Int4));
1509 seq = win->seq;
1510 seqmax = seq + win->length;
1511
1512 while (seq < seqmax) {
1513 letter = *seq++;
1514 if (!alphaflag[letter])
1515 comp[alphaindex[letter]]++;
1516 else win->bogus++;
1517 }
1518
1519 return;
1520 }
1521
1522 /*------------------------------------------------------------(state_cmp)---*/
1523
state_cmp(VoidPtr s1,VoidPtr s2)1524 static int LIBCALLBACK state_cmp(VoidPtr s1, VoidPtr s2)
1525
1526 {
1527 int *np1, *np2;
1528
1529 np1 = (int *) s1;
1530 np2 = (int *) s2;
1531
1532 return (*np2 - *np1);
1533 }
1534
1535 /*--------------------------------------------------------------(stateon)---*/
1536
stateon(SequencePtr win)1537 void stateon(SequencePtr win)
1538
1539 {
1540 Int4 letter, nel, c;
1541 Int4 alphasize;
1542
1543 alphasize = win->palpha->alphasize;
1544
1545 if (win->composition == NULL)
1546 compon(win);
1547
1548 win->state = (Int4Ptr) MemNew((alphasize+1)*sizeof(win->state[0]));
1549
1550 for (letter = nel = 0; letter < alphasize; ++letter) {
1551 if ((c = win->composition[letter]) == 0)
1552 continue;
1553 win->state[nel++] = c;
1554 }
1555 for (letter = nel; letter < alphasize+1; ++letter)
1556 win->state[letter] = 0;
1557
1558 HeapSort(win->state, nel, sizeof(win->state[0]), state_cmp);
1559
1560 return;
1561 }
1562
1563 /*----------------------------------------------------------------(enton)---*/
1564
enton(SequencePtr win)1565 void enton(SequencePtr win)
1566
1567 {
1568 if (win->state==NULL) {stateon(win);}
1569
1570 win->entropy = entropy(win->state);
1571
1572 return;
1573 }
1574
1575 /*--------------------------------------------------------------(entropy)---*/
1576
entropy(Int4Ptr sv)1577 FloatHi entropy(Int4Ptr sv)
1578
1579 {FloatHi ent;
1580 Int4 i, total;
1581
1582 total = 0;
1583 for (i=0; sv[i]!=0; i++)
1584 {
1585 total += sv[i];
1586 }
1587 if (total==0) return(0.);
1588
1589 ent = 0.0;
1590 for (i=0; sv[i]!=0; i++)
1591 {
1592 ent += ((FloatHi)sv[i])*log(((FloatHi)sv[i])/(double)total)/LN2;
1593 }
1594
1595 ent = fabs(ent/(FloatHi)total);
1596
1597 return(ent);
1598 }
1599
1600
1601 /*----------------------------------------------------------(decrementsv)---*/
1602
decrementsv(Int4Ptr sv,Int4 class)1603 void decrementsv(Int4Ptr sv, Int4 class)
1604
1605 {
1606 Int4 svi;
1607
1608 while ((svi = *sv++) != 0) {
1609 if (svi == class && *sv < class) {
1610 sv[-1] = svi - 1;
1611 break;
1612 }
1613 }
1614 }
1615
1616 /*----------------------------------------------------------(incrementsv)---*/
1617
incrementsv(Int4Ptr sv,Int4 class)1618 void incrementsv(Int4Ptr sv, Int4 class)
1619
1620 {
1621 for (;;) {
1622 if (*sv++ == class) {
1623 sv[-1]++;
1624 break;
1625 }
1626 }
1627 }
1628
1629 /*----------------------------------------------------------------(upper)---*/
1630
upper(CharPtr string,size_t len)1631 extern void upper(CharPtr string, size_t len)
1632
1633 {
1634 CharPtr stringmax;
1635 Char c;
1636
1637 for (stringmax = string + len; string < stringmax; ++string)
1638 if (islower(c = *string))
1639 *string = toupper(c);
1640 }
1641
1642 /*----------------------------------------------------------------(lower)---*/
1643
lower(CharPtr string,size_t len)1644 extern void lower(CharPtr string, size_t len)
1645
1646 {
1647 CharPtr stringmax;
1648 Char c;
1649
1650 for (stringmax = string + len; string < stringmax; ++string)
1651 if (isupper(c = *string))
1652 *string = tolower(c);
1653 }
1654
1655 /*--------------------------------------------------------------------------*/
1656