1 /* @source listor application
2 **
3 ** Writes a list file of the logical OR of two sets of sequences
4 **
5 ** @author Copyright (C) Gary Williams
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 ******************************************************************************/
22 #include "emboss.h"
23
24
25
26
27 static void listor_Output(AjPFile list, ajint Operator,
28 const AjPSeqset seq1, const AjPSeqset seq2,
29 const ajint *hits1, const ajint *hits2,
30 ajint n1, ajint n2);
31
32 static void listor_Write(AjPFile list, const AjPSeqset seqset, ajint i);
33
34
35
36
37 enum {L_OR, L_AND, L_XOR, L_NOT};
38
39
40
41
42 /* @prog listor ***************************************************************
43 **
44 ** Writes a list file of the logical OR of two sets of sequences
45 **
46 ******************************************************************************/
47
main(int argc,char ** argv)48 int main(int argc, char **argv)
49 {
50 AjPSeqset seq1;
51 AjPSeqset seq2;
52 AjPFile list;
53 ajint n1;
54 ajint n2;
55 ajint *lengths1;
56 ajint *lengths2;
57 ajuint *order1;
58 ajuint *order2;
59 ajint *hits1;
60 ajint *hits2;
61 ajint curr1;
62 ajint curr2;
63 ajint tmp1;
64 ajint tmp2 = 0;
65 ajint i;
66 AjPStr operator;
67 ajint OperatorCode=0;
68
69
70 embInit("listor", argc, argv);
71
72 seq1 = ajAcdGetSeqset("firstsequences");
73 seq2 = ajAcdGetSeqset("secondsequences");
74 list = ajAcdGetOutfile("outfile");
75 operator = ajAcdGetListSingle("operator");
76
77 /* get the operator value */
78 switch(ajStrGetCharFirst(operator))
79 {
80 case 'O':
81 OperatorCode = L_OR;
82 break;
83 case 'A':
84 OperatorCode = L_AND;
85 break;
86 case 'X':
87 OperatorCode = L_XOR;
88 break;
89 case 'N':
90 OperatorCode = L_NOT;
91 break;
92 default:
93 ajFatal("Invalid operator type: %S", operator);
94 embExitBad();
95 }
96
97
98 /* get the order of seqset 1 by length */
99 n1 = ajSeqsetGetSize(seq1);
100
101 /* lengths of seq1 entries */
102 lengths1 = AJCALLOC0(n1, sizeof(ajint));
103
104 /* seq1 entries which match seq2 */
105 hits1 = AJCALLOC0(n1, sizeof(ajint));
106
107 /* seq1 entries in length order */
108 order1 = AJCALLOC0(n1, sizeof(ajint));
109 for(i=0; i<n1; i++)
110 {
111 lengths1[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq1, i));
112 order1[i] = i;
113 hits1[i] = -1;
114 }
115 ajSortIntIncI(lengths1, order1, n1);
116
117 /* get the order of seqset 2 by length */
118 n2 = ajSeqsetGetSize(seq2);
119 lengths2 = AJCALLOC0(n2, sizeof(ajint));
120 hits2 = AJCALLOC0(n2, sizeof(ajint));
121 order2 = AJCALLOC0(n2, sizeof(ajint));
122
123 for(i=0; i<n2; i++)
124 {
125 lengths2[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq2, i));
126 order2[i] = i;
127 hits2[i] = -1;
128 }
129 ajSortIntIncI(lengths2, order2, n2);
130
131 /*
132 ** go down the two sequence sets, by size order, looking for identical
133 **lengths
134 */
135 curr1 = 0;
136 curr2 = 0;
137 while(curr1 < n1 && curr2 < n2)
138 {
139 if(lengths1[order1[curr1]] < lengths2[order2[curr2]])
140 /* seq1 is shorter - increment curr1 index */
141 curr1++;
142 else if(lengths1[order1[curr1]] > lengths2[order2[curr2]])
143 /* seq2 is shorter - increment curr2 index */
144 curr2++;
145 else
146 {
147 /* identical lengths - check all seq1/seq2 entries of this len */
148 for(tmp1=curr1; tmp1<n1
149 && lengths1[order1[tmp1]] == lengths2[order2[curr2]]; tmp1++)
150 for(tmp2=curr2; tmp2<n2 && lengths2[order2[tmp2]] ==
151 lengths2[order2[curr2]]; tmp2++)
152 /* check to see if the sequences are identical */
153 if(!ajStrCmpCaseS(ajSeqGetSeqS(ajSeqsetGetseqSeq(seq1,
154 order1[tmp1])),
155 ajSeqGetSeqS(ajSeqsetGetseqSeq(seq2,
156 order2[tmp2]))))
157 {
158 hits1[order1[tmp1]] = order2[tmp2];
159 hits2[order2[tmp2]] = order1[tmp1];
160 }
161
162 curr1 = tmp1;
163 curr2 = tmp2;
164 }
165 }
166
167 /* output the required entries to the list file */
168 listor_Output(list, OperatorCode, seq1, seq2, hits1, hits2, n1, n2);
169
170
171 AJFREE(lengths1);
172 AJFREE(lengths2);
173 AJFREE(order1);
174 AJFREE(order2);
175 AJFREE(hits1);
176 AJFREE(hits2);
177 ajFileClose(&list);
178 ajStrDel(&operator);
179
180 ajSeqsetDel(&seq1);
181 ajSeqsetDel(&seq2);
182
183 embExit();
184
185 return 0;
186 }
187
188
189
190
191 /* @funcstatic listor_Output **************************************************
192 **
193 ** Writes out USA of a sequence to a file
194 **
195 ** @param [u] list [AjPFile] Output file
196 ** @param [r] Operator [ajint] logical operation to perform
197 ** @param [r] seq1 [const AjPSeqset] first seqset
198 ** @param [r] seq2 [const AjPSeqset] second seqset
199 ** @param [r] hits1 [const ajint *] array of hits to seq1
200 ** @param [r] hits2 [const ajint *] array of hits to seq2
201 ** @param [r] n1 [ajint] number of sequences in seq1
202 ** @param [r] n2 [ajint] number of sequences in seq2
203 ** @return [void]
204 ** @@
205 ******************************************************************************/
206
listor_Output(AjPFile list,ajint Operator,const AjPSeqset seq1,const AjPSeqset seq2,const ajint * hits1,const ajint * hits2,ajint n1,ajint n2)207 static void listor_Output(AjPFile list, ajint Operator,
208 const AjPSeqset seq1, const AjPSeqset seq2,
209 const ajint *hits1, const ajint *hits2,
210 ajint n1, ajint n2)
211 {
212
213 /*
214 ** OR - output all of seq1 and then those of seq2 with no hits
215 **
216 ** AND - output only those of seq1 with hits
217 **
218 ** XOR - output only those of seq1 with no hits and seq2 with no hits
219 **
220 ** NOT - output only those of seq1 with no hits
221 */
222
223 ajint i;
224
225 /* do seq1 */
226 for(i=0; i<n1; i++)
227 {
228 if(Operator == L_OR)
229 listor_Write(list, seq1, i);
230 else if(Operator == L_AND)
231 {
232 if(hits1[i] != -1)
233 listor_Write(list, seq1, i);
234 }
235 else if(Operator == L_XOR || Operator == L_NOT)
236 {
237 if(hits1[i] == -1)
238 listor_Write(list, seq1, i);
239 }
240 }
241
242 /* do seq2 - write if no hits and OR or no hits and XOR */
243 if(Operator == L_OR || Operator == L_XOR)
244 for(i=0; i<n2; i++)
245 if(hits2[i] == -1)
246 listor_Write(list, seq2, i);
247
248 return;
249 }
250
251
252
253
254 /* @funcstatic listor_Write ***************************************************
255 **
256 ** Writes out USA of a sequence to a file
257 **
258 ** @param [u] list [AjPFile] Output file
259 ** @param [r] seqset [const AjPSeqset] seqset
260 ** @param [r] i [ajint] index into seqset for the sequence to write
261 ** @return [void]
262 ** @@
263 ******************************************************************************/
264
listor_Write(AjPFile list,const AjPSeqset seqset,ajint i)265 static void listor_Write(AjPFile list, const AjPSeqset seqset, ajint i)
266 {
267 ajFmtPrintF(list, "%S\n", ajSeqGetUsaS(ajSeqsetGetseqSeq(seqset, i)));
268
269 return;
270 }
271