1 /* @source noredundant application
2 **
3 ** Remove redundant sequences from an input set
4 ** @author Copyright (C) Jon Ison (jison@ebi.ac.uk)
5 ** @@
6 **
7 ** This program is free software; you can redistribute it and/or
8 ** modify it under the terms of the GNU General Public License
9 ** as published by the Free Software Foundation; either version 2
10 ** of the License, or (at your option) any later version.
11 **
12 ** This program is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ** GNU General Public License for more details.
16 **
17 ** You should have received a copy of the GNU General Public License
18 ** along with this program; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 ******************************************************************************/
21
22 #include "emboss.h"
23
24
25 static AjBool skipredundant_ClearList (AjPList list);
26 static AjBool skipredundant_SeqsetToList (AjPList list, AjPSeqset seqset);
27
28
29
30
31 /* @prog skipredundant *********************************************************
32 **
33 ** Remove redundant sequences from an input set
34 **
35 ******************************************************************************/
36
main(int argc,char ** argv)37 int main(int argc, char **argv)
38 {
39 /* Variable Declarations */
40 AjPSeqset seqset = NULL;
41 AjPMatrixf fmat = NULL;
42 float thresh;
43 float threshlow;
44 float threshup;
45 float gapopen;
46 float gapextend;
47 AjPSeqout seqout = NULL;
48 AjPSeqout seqoutred = NULL;
49 AjPStr mode = NULL;
50 ajint moden;
51 ajuint i;
52
53
54 /* toggle "feature" from ACD not retrieved ... no need */
55
56 const AjPSeq seq = NULL;
57 AjPList list = NULL; /* List for redundancy removal. */
58 AjPUint keep = NULL; /* 1: Sequence in list was non-redundant,
59 0: redundant. */
60 ajuint nseq = 0; /* No. seqs. in list. */
61 ajint nseqnr = 0; /* No. non-redundant seqs. in list. */
62
63 /* ACD File Processing */
64 embInit("skipredundant", argc, argv);
65 seqset = ajAcdGetSeqset("sequences");
66 mode = ajAcdGetListSingle("mode");
67 fmat = ajAcdGetMatrixf("datafile");
68 thresh = ajAcdGetFloat("threshold");
69 threshlow = ajAcdGetFloat("minthreshold");
70 threshup = ajAcdGetFloat("maxthreshold");
71 gapopen = ajAcdGetFloat("gapopen");
72 gapextend = ajAcdGetFloat("gapextend");
73 seqout = ajAcdGetSeqoutall("outseq");
74 seqoutred = ajAcdGetSeqoutall("redundantoutseq");
75
76
77
78 /* Application logic */
79 list = ajListNew();
80 skipredundant_SeqsetToList(list, seqset);
81 keep = ajUintNew();
82 ajStrToInt(mode, &moden);
83
84
85 if(moden == 1)
86 /* Remove redundancy at a single threshold % sequence similarity */
87 {
88 if((!embDmxSeqNR(list, &keep, &nseqnr, fmat, gapopen,
89 gapextend, thresh, ajFalse)))
90 ajFatal("embDmxSeqNR unexpected failure!");
91 }
92 else if (moden == 2)
93 /* 2: Remove redundancy outside a range of acceptable threshold % similarity */
94 {
95 if((!embDmxSeqNRRange(list, &keep, &nseqnr, fmat, gapopen,
96 gapextend, threshlow, threshup, ajFalse)))
97 ajFatal("embDmxSeqNRRange unexpected failure!");
98 }
99 else
100 ajFatal("Invalid mode (not 1 or 2) which should never occur (check ACD file!)");
101
102 nseq = ajSeqsetGetSize(seqset);
103 for(i=0; i<nseq; i++)
104 {
105 seq = ajSeqsetGetseqSeq(seqset, i);
106
107 if(ajUintGet(keep, i))
108 ajSeqoutWriteSeq(seqout, seq);
109 else if(seqoutred)
110 ajSeqoutWriteSeq(seqoutred, seq);
111 }
112
113 /* Memory management and exit */
114 ajSeqsetDel(&seqset);
115 ajMatrixfDel(&fmat);
116 ajStrDel(&mode);
117 ajSeqoutClose(seqout);
118 ajSeqoutDel(&seqout);
119 if(seqoutred)
120 {
121 ajSeqoutClose(seqoutred);
122 ajSeqoutDel(&seqoutred);
123 }
124 skipredundant_ClearList(list);
125
126 ajListFree(&list);
127 ajUintDel(&keep);
128
129 embExit();
130
131 return 0;
132 }
133
134
135
136
137 /* @funcstatic skipredundant_SeqsetToList **************************************
138 **
139 ** Builds a list of sequences from a sequence set.
140 ** The sequences are NOT copied (only a reference is pushed onto the list)
141 **
142 ** @param [u] list [AjPList] List
143 ** @param [w] seqset [AjPSeqset] Sequence set
144 ** @return [AjBool] True on success
145 ******************************************************************************/
skipredundant_SeqsetToList(AjPList list,AjPSeqset seqset)146 static AjBool skipredundant_SeqsetToList (AjPList list, AjPSeqset seqset)
147 {
148 ajint n = 0;
149 ajint x = 0;
150 EmbPDmxNrseq seq_tmp = NULL; /* Temp. pointer for making seq_list. */
151
152 if(!list || !seqset)
153 return ajFalse;
154
155 n = ajSeqsetGetSize(seqset);
156 for(x=0; x<n; x++)
157 {
158 seq_tmp = embDmxNrseqNew(ajSeqsetGetseqSeq(seqset, x));
159 ajListPushAppend(list, seq_tmp);
160 seq_tmp = NULL;
161 }
162
163 return ajTrue;
164 }
165
166
167
168
169
170 /* @funcstatic skipredundant_ClearList **************************************
171 **
172 ** Clears a list of sequences from a sequence set.
173 ** The sequences are copied
174 **
175 ** @param [u] list [AjPList] List
176 ** @return [AjBool] True on success
177 ******************************************************************************/
skipredundant_ClearList(AjPList list)178 static AjBool skipredundant_ClearList (AjPList list)
179 {
180 EmbPDmxNrseq seq_tmp = NULL;
181 AjIList iter;
182
183 if(!list)
184 return ajFalse;
185
186 iter = ajListIterNew(list);
187 while(!ajListIterDone(iter))
188 {
189 seq_tmp = (EmbPDmxNrseq)ajListIterGet(iter);
190 embDmxNrseqDel(&seq_tmp);
191 }
192 ajListIterDel(&iter);
193
194 return ajTrue;
195 }
196