1 static char const rcsid[] = "$Id: sim2.c,v 6.1 2003/05/30 17:25:38 coulouri Exp $";
2
3 #include <stdio.h>
4 #include <ncbi.h>
5 #include <objseq.h>
6 #include <objloc.h>
7 #include <seqport.h>
8 #include <sequtil.h>
9 #include "simutil.h"
10
11
12
13 /* SIM2 - calling routine to falign() */
14
SIM2(SeqLocPtr cslp1,SeqLocPtr cslp2,Int4 k,SimPamPtr a_spp,ValNodePtr PNTR new_list,Uint1 output_type,FilterProc filter)15 SeqAlignPtr SIM2(SeqLocPtr cslp1, SeqLocPtr cslp2, Int4 k, SimPamPtr a_spp, ValNodePtr PNTR new_list, Uint1 output_type, FilterProc filter)
16 {
17 SeqAlignPtr sap = NULL, sapC=NULL, out_sap, out_sapC;
18 Boolean both_strand, is_dna;
19 CharPtr A, B, compB = NULL;
20 Int4 w, r, o, e, m, g, h;
21 FloatHi cut_off;
22 Int4 Width = -1;
23 SimPam sp, PNTR spp;
24 Uint1 m_strand, s_strand;
25
26
27 if(cslp1 == NULL || cslp2 == NULL)
28 {
29 Message(MSG_ERROR, "In sufficient input");
30 return NULL;
31 }
32
33 if(cslp1->choice != SEQLOC_INT || cslp2->choice !=SEQLOC_INT)
34 {
35 Message(MSG_ERROR, "Incorrect type of Seq-loc. Only take Seq-int");
36 return NULL;
37 }
38 if(output_type < OUTPUT_ALIGN || output_type > OUTPUT_BOTH)
39 {
40 Message(MSG_OK, "Illegal output type");
41 output_type = OUTPUT_ALIGN;
42 }
43
44 if(new_list !=NULL)
45 *new_list = NULL;
46 else
47 {
48 if(output_type == OUTPUT_BOTH || output_type == OUTPUT_ENDS)
49 {
50 Message(MSG_ERROR, "No space to store the ends");
51 return NULL;
52 }
53 }
54
55
56 both_strand = check_strand_mol(cslp2, &is_dna);
57 if(!is_dna)
58 {
59 Message(MSG_ERROR, "Sorry, Sim2 can only compare two DNA sequences");
60 return NULL;
61 }
62 if(!both_strand)
63 both_strand = check_strand_mol(cslp1, &is_dna);
64 if(a_spp == NULL)
65 {
66 DefaultSimPam(&sp);
67 spp = &sp;
68 }
69 else
70 spp = a_spp;
71 w = spp->word;
72 r = spp->mismatch;
73 o = spp->gap_open;
74 e = spp->gap_ext;
75 m = spp->mismatch_2;
76 g = spp->gap_open_2;
77 h = spp->gap_ext_2;
78 cut_off = spp->cutoff;
79
80
81 if(both_strand)
82 {
83 m_strand = SeqLocStrand(cslp1);
84 Change_Loc_Strand(cslp1, Seq_strand_plus);
85 s_strand = SeqLocStrand(cslp2);
86 Change_Loc_Strand(cslp2, Seq_strand_plus);
87 }
88
89 A= make_sim_seq(cslp1, TRUE, NULL);
90 if(A == NULL)
91 {
92 if(both_strand)
93 {
94 Change_Loc_Strand(cslp2, s_strand);
95 Change_Loc_Strand(cslp1, m_strand);
96 }
97 return NULL;
98 }
99 B = make_sim_seq(cslp2, TRUE, NULL);
100 if(B == NULL)
101 {
102 if(both_strand)
103 {
104 Change_Loc_Strand(cslp2, s_strand);
105 Change_Loc_Strand(cslp1, m_strand);
106 }
107
108 if(A != NULL)
109 MemFree(A);
110 return NULL;
111 }
112 sap = NULL;
113 sap = falign(cslp1, cslp2, A+1, B+1, w, k, r, o, e);
114 out_sap = Region(sap, cslp1, cslp2, A+1, B+1, m, g, h, Width, new_list, output_type, filter);
115 sap = free_align_list(sap);/* free sap */
116
117 if(both_strand)
118 {
119 Change_Loc_Strand(cslp2, (Uint1)2);
120 B = make_sim_seq(cslp2, TRUE, B);
121 sap = falign(cslp1, cslp2, A+1, B+1, w, k, r, o, e);
122 out_sapC = Region(sap, cslp1, cslp2, A+1, B+1, m, g, h, Width, new_list, output_type, filter);
123 sap = free_align_list(sap); /*free sap */
124
125 /* concatenate out_sap and out_sapC */
126 out_sap = link_align(out_sapC, out_sap);
127 }
128
129 MemFree(A);
130 MemFree(B);
131 if(output_type != OUTPUT_ALIGN)
132 *new_list = get_top_K(*new_list, k);
133 if(both_strand)
134 {
135 Change_Loc_Strand(cslp2, s_strand);
136 Change_Loc_Strand(cslp1, m_strand);
137 }
138 return get_top_K_alignment(out_sap, k, cut_off);
139 }
140
141
142 /**********************************************************************
143 *
144 * SIM2ALN(cslp1, cslp2, K)
145 * Computes top K alignment beteeen cslp1 and cslp2, return the Seq-align
146 * object only
147 *
148 ***********************************************************************/
SIM2ALN(SeqLocPtr cslp1,SeqLocPtr cslp2,Int4 K)149 SeqAlignPtr SIM2ALN (SeqLocPtr cslp1, SeqLocPtr cslp2, Int4 K)
150 {
151 return SIM2(cslp1, cslp2, K, NULL, NULL, OUTPUT_ALIGN, NULL);
152 }
153
154
155
156
157
158 /**********************************************************************
159 *
160 * SIM2ENDS(cslp1, csp2, K, filter)
161 * Computes top K alignment between cslp1, cslp2 and return the
162 * a linked list (ValNodePtr) of RecordEnds
163 * filter: filter based on percent identity. set to NULL when
164 * it is not required
165 *
166 **********************************************************************/
SIM2ENDS(SeqLocPtr cslp1,SeqLocPtr cslp2,Int4 K,FilterProc filter)167 ValNodePtr SIM2ENDS (SeqLocPtr cslp1, SeqLocPtr cslp2, Int4 K, FilterProc
168 filter)
169 {
170
171 ValNodePtr new_list; /*the list for storin the ends*/
172
173 SIM2(cslp1, cslp2, K, NULL, &new_list, OUTPUT_ENDS, filter);
174
175 return new_list;
176 }
177
178