1 /* @source listor application
2 **
3 ** Writes a list file of the logical OR of two sets of sequences
4 **
5 ** @author Copyright (C) Gary Williams
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21 ******************************************************************************/
22 #include "emboss.h"
23 
24 
25 
26 
27 static void listor_Output(AjPFile list, ajint Operator,
28 			  const AjPSeqset seq1, const AjPSeqset seq2,
29 			  const ajint *hits1, const ajint *hits2,
30 			  ajint n1, ajint n2);
31 
32 static void listor_Write(AjPFile list, const AjPSeqset seqset, ajint i);
33 
34 
35 
36 
37 enum {L_OR, L_AND, L_XOR, L_NOT};
38 
39 
40 
41 
42 /* @prog listor ***************************************************************
43 **
44 ** Writes a list file of the logical OR of two sets of sequences
45 **
46 ******************************************************************************/
47 
main(int argc,char ** argv)48 int main(int argc, char **argv)
49 {
50     AjPSeqset seq1;
51     AjPSeqset seq2;
52     AjPFile list;
53     ajint n1;
54     ajint n2;
55     ajint *lengths1;
56     ajint *lengths2;
57     ajuint *order1;
58     ajuint *order2;
59     ajint *hits1;
60     ajint *hits2;
61     ajint curr1;
62     ajint curr2;
63     ajint tmp1;
64     ajint tmp2 = 0;
65     ajint i;
66     AjPStr operator;
67     ajint OperatorCode=0;
68 
69 
70     embInit("listor", argc, argv);
71 
72     seq1     = ajAcdGetSeqset("firstsequences");
73     seq2     = ajAcdGetSeqset("secondsequences");
74     list     = ajAcdGetOutfile("outfile");
75     operator = ajAcdGetListSingle("operator");
76 
77     /* get the operator value */
78     switch(ajStrGetCharFirst(operator))
79     {
80     case 'O':
81 	OperatorCode = L_OR;
82 	break;
83     case 'A':
84 	OperatorCode = L_AND;
85 	break;
86     case 'X':
87 	OperatorCode = L_XOR;
88 	break;
89     case 'N':
90 	OperatorCode = L_NOT;
91 	break;
92     default:
93 	ajFatal("Invalid operator type: %S", operator);
94 	embExitBad();
95     }
96 
97 
98     /* get the order of seqset 1 by length */
99     n1 = ajSeqsetGetSize(seq1);
100 
101     /* lengths of seq1 entries */
102     lengths1 = AJCALLOC0(n1, sizeof(ajint));
103 
104     /* seq1 entries which match seq2 */
105     hits1    = AJCALLOC0(n1, sizeof(ajint));
106 
107     /* seq1 entries in length order */
108     order1   = AJCALLOC0(n1, sizeof(ajint));
109     for(i=0; i<n1; i++)
110     {
111 	lengths1[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq1, i));
112 	order1[i]   = i;
113 	hits1[i]    = -1;
114     }
115     ajSortIntIncI(lengths1, order1, n1);
116 
117     /* get the order of seqset 2 by length */
118     n2 = ajSeqsetGetSize(seq2);
119     lengths2 = AJCALLOC0(n2, sizeof(ajint));
120     hits2    = AJCALLOC0(n2, sizeof(ajint));
121     order2   = AJCALLOC0(n2, sizeof(ajint));
122 
123     for(i=0; i<n2; i++)
124     {
125 	lengths2[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq2, i));
126 	order2[i]   = i;
127 	hits2[i]    = -1;
128     }
129     ajSortIntIncI(lengths2, order2, n2);
130 
131     /*
132     ** go down the two sequence sets, by size order, looking for identical
133     **lengths
134     */
135     curr1 = 0;
136     curr2 = 0;
137     while(curr1 < n1 &&  curr2 < n2)
138     {
139 	if(lengths1[order1[curr1]] < lengths2[order2[curr2]])
140 	    /* seq1 is shorter - increment curr1 index */
141 	    curr1++;
142 	else if(lengths1[order1[curr1]] > lengths2[order2[curr2]])
143 	    /* seq2 is shorter - increment curr2 index */
144 	    curr2++;
145 	else
146 	{
147 	    /* identical lengths - check all seq1/seq2 entries of this len */
148 	    for(tmp1=curr1; tmp1<n1
149 		 && lengths1[order1[tmp1]] == lengths2[order2[curr2]]; tmp1++)
150 		for(tmp2=curr2; tmp2<n2 && lengths2[order2[tmp2]] ==
151 		    lengths2[order2[curr2]]; tmp2++)
152 		    /* check to see if the sequences are identical */
153 		    if(!ajStrCmpCaseS(ajSeqGetSeqS(ajSeqsetGetseqSeq(seq1,
154 							     order1[tmp1])),
155 				      ajSeqGetSeqS(ajSeqsetGetseqSeq(seq2,
156 				      order2[tmp2]))))
157 		    {
158 			hits1[order1[tmp1]] = order2[tmp2];
159 			hits2[order2[tmp2]] = order1[tmp1];
160 		    }
161 
162 	    curr1 = tmp1;
163 	    curr2 = tmp2;
164 	}
165     }
166 
167     /* output the required entries to the list file */
168     listor_Output(list, OperatorCode, seq1, seq2, hits1, hits2, n1, n2);
169 
170 
171     AJFREE(lengths1);
172     AJFREE(lengths2);
173     AJFREE(order1);
174     AJFREE(order2);
175     AJFREE(hits1);
176     AJFREE(hits2);
177     ajFileClose(&list);
178     ajStrDel(&operator);
179 
180     ajSeqsetDel(&seq1);
181     ajSeqsetDel(&seq2);
182 
183     embExit();
184 
185     return 0;
186 }
187 
188 
189 
190 
191 /* @funcstatic listor_Output **************************************************
192 **
193 ** Writes out USA of a sequence to a file
194 **
195 ** @param [u] list [AjPFile] Output file
196 ** @param [r] Operator [ajint] logical operation to perform
197 ** @param [r] seq1 [const AjPSeqset] first seqset
198 ** @param [r] seq2 [const AjPSeqset] second seqset
199 ** @param [r] hits1 [const ajint *] array of hits to seq1
200 ** @param [r] hits2 [const ajint *] array of hits to seq2
201 ** @param [r] n1 [ajint] number of sequences in seq1
202 ** @param [r] n2 [ajint] number of sequences in seq2
203 ** @return [void]
204 ** @@
205 ******************************************************************************/
206 
listor_Output(AjPFile list,ajint Operator,const AjPSeqset seq1,const AjPSeqset seq2,const ajint * hits1,const ajint * hits2,ajint n1,ajint n2)207 static void listor_Output(AjPFile list, ajint Operator,
208 			  const AjPSeqset seq1, const AjPSeqset seq2,
209 			  const ajint *hits1, const ajint *hits2,
210 			  ajint n1, ajint n2)
211 {
212 
213      /*
214      ** OR - output all of seq1 and then those of seq2 with no hits
215      **
216      ** AND - output only those of seq1 with hits
217      **
218      ** XOR - output only those of seq1 with no hits and seq2 with no hits
219      **
220      ** NOT - output only those of seq1 with no hits
221      */
222 
223     ajint i;
224 
225     /* do seq1 */
226     for(i=0; i<n1; i++)
227     {
228 	if(Operator == L_OR)
229 	    listor_Write(list, seq1, i);
230 	else if(Operator == L_AND)
231 	{
232 	    if(hits1[i] != -1)
233 		listor_Write(list, seq1, i);
234 	}
235 	else if(Operator == L_XOR || Operator == L_NOT)
236 	{
237 	    if(hits1[i] == -1)
238 		listor_Write(list, seq1, i);
239 	}
240     }
241 
242     /* do seq2 - write if no hits and OR or no hits and XOR */
243     if(Operator == L_OR || Operator == L_XOR)
244 	for(i=0; i<n2; i++)
245 	    if(hits2[i] == -1)
246 		listor_Write(list, seq2, i);
247 
248     return;
249 }
250 
251 
252 
253 
254 /* @funcstatic listor_Write ***************************************************
255 **
256 ** Writes out USA of a sequence to a file
257 **
258 ** @param [u] list [AjPFile] Output file
259 ** @param [r] seqset [const AjPSeqset] seqset
260 ** @param [r] i [ajint] index into seqset for the sequence to write
261 ** @return [void]
262 ** @@
263 ******************************************************************************/
264 
listor_Write(AjPFile list,const AjPSeqset seqset,ajint i)265 static void listor_Write(AjPFile list, const AjPSeqset seqset, ajint i)
266 {
267     ajFmtPrintF(list, "%S\n", ajSeqGetUsaS(ajSeqsetGetseqSeq(seqset, i)));
268 
269     return;
270 }
271