1 /* @source splitter application
2 **
3 ** Split a sequence into (overlapping) smaller sequences
4 **
5 ** @author Copyright (C) Gary Williams (gwilliam@hgmp.mrc.ac.uk)
6 ** @Modified: Rewritten for more intuitive overlaps (ableasby@hgmp.mrc.ac.uk)
7 ** @@
8 **
9 ** This program is free software; you can redistribute it and/or
10 ** modify it under the terms of the GNU General Public License
11 ** as published by the Free Software Foundation; either version 2
12 ** of the License, or (at your option) any later version.
13 **
14 ** This program is distributed in the hope that it will be useful,
15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 ** GNU General Public License for more details.
18 **
19 ** You should have received a copy of the GNU General Public License
20 ** along with this program; if not, write to the Free Software
21 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22 ******************************************************************************/
23 
24 #include "emboss.h"
25 
26 
27 
28 static void splitter_write(AjPSeqout seqout, AjPSeq subseq, const AjPSeq seq);
29 static void splitter_AddSubSeqFeat(AjPFeattable ftable, ajuint start,
30                                    ajuint end, const AjPSeq oldseq);
31 static void splitter_ProcessChunk (AjPSeqout seqout, const AjPSeq seq,
32                                    ajuint start, ajuint end, const AjPStr name,
33                                    AjBool feature);
34 static void splitter_MakeSubSeqName (AjPStr * name_ptr, const AjPSeq seq,
35                                      ajuint start, ajuint end);
36 
37 
38 
39 
40 /* @prog splitter *************************************************************
41 **
42 ** Split a sequence into (overlapping) smaller sequences
43 **
44 ******************************************************************************/
45 
main(int argc,char ** argv)46 int main(int argc, char **argv)
47 {
48 
49     AjPSeqall seqall;
50     AjPSeqout seqout;
51     AjPSeq seq;
52     ajint size;
53     ajint overlap;
54     ajint len;
55     ajint pos;
56     AjBool addover;
57     AjBool feature;
58     AjPStr outseq_name = ajStrNew();
59 
60     ajint start;
61     ajint end;
62 
63     embInit("splitter", argc, argv);
64 
65     seqout  = ajAcdGetSeqoutall("outseq");
66     seqall  = ajAcdGetSeqall("sequence");
67     size    = ajAcdGetInt("size");
68     overlap = ajAcdGetInt("overlap");
69     addover = ajAcdGetBoolean("addoverlap");
70     feature = ajAcdGetBoolean("feature");
71 
72     while(ajSeqallNext(seqall, &seq))
73     {
74 	ajSeqTrim(seq);
75 
76 	len = ajSeqGetLen(seq);
77 	pos = 0;
78 
79         ajStrAssignC(&outseq_name, "");
80 
81         if (!addover)
82         {
83             while(pos+size <= len-1)
84             {
85                 start = pos;
86                 end = pos+size-1;
87                 splitter_MakeSubSeqName (&outseq_name, seq, start, end);
88                 splitter_ProcessChunk (seqout, seq, start, end,
89                                        outseq_name, feature);
90                 pos += size-overlap;
91             }
92         }
93         else
94         {
95             while(pos+size+overlap < len-1)
96             {
97                 start = pos;
98                 end = pos+size+overlap-1;
99                 splitter_MakeSubSeqName (&outseq_name, seq, start, end);
100                 splitter_ProcessChunk (seqout, seq, start, end,
101                                        outseq_name, feature);
102                 pos += size;
103             }
104         }
105 
106         splitter_MakeSubSeqName(&outseq_name, seq, pos, len-1);
107         splitter_ProcessChunk (seqout, seq, pos, len-1,
108                                outseq_name, feature);
109     }
110 
111     ajSeqoutClose(seqout);
112     ajSeqallDel(&seqall);
113     ajSeqoutDel(&seqout);
114     ajSeqDel(&seq);
115     ajStrDel(&outseq_name);
116 
117     embExit();
118 
119     return 0;
120 }
121 
122 
123 
124 
125 /* @funcstatic splitter_write  ************************************************
126 **
127 ** Write out split sequence
128 **
129 ** @param [u] default_seqout [AjPSeqout] Output object
130 ** @param [u] subseq [AjPSeq] sequence to write
131 ** @param [r] seq [const AjPSeq] original trimmed sequence
132 ** @return [void]
133 ** @@
134 ******************************************************************************/
135 
splitter_write(AjPSeqout default_seqout,AjPSeq subseq,const AjPSeq seq)136 static void splitter_write(AjPSeqout default_seqout,
137                            AjPSeq subseq, const AjPSeq seq)
138 {
139     /* set the description of the subsequence */
140     ajSeqAssignDescS(subseq, ajSeqGetDescS(seq));
141 
142     /* set the type of the subsequence */
143     ajSeqType(subseq);
144 
145     ajSeqoutWriteSeq(default_seqout, subseq);
146 
147     return;
148 }
149 
150 
151 
152 
153 /* @funcstatic splitter_MakeSubSeqName ****************************************
154 **
155 ** Undocumented
156 **
157 ** @param [w] name_ptr [AjPStr*] Undocumented
158 ** @param [r] seq [const AjPSeq] Undocumented
159 ** @param [r] start [ajuint] Undocumented
160 ** @param [r] end [ajuint] Undocumented
161 **
162 ******************************************************************************/
163 
splitter_MakeSubSeqName(AjPStr * name_ptr,const AjPSeq seq,ajuint start,ajuint end)164 static void splitter_MakeSubSeqName (AjPStr * name_ptr,
165                                      const AjPSeq seq, ajuint start,
166                                      ajuint end)
167 {
168     AjPStr value = ajStrNew();
169 
170     /* create a nice name for the subsequence */
171     ajStrAssignS(name_ptr, ajSeqGetNameS(seq));
172     ajStrAppendC(name_ptr, "_");
173     ajStrFromUint(&value, ajSeqGetBegin(seq)+start);
174     ajStrAppendS(name_ptr, value);
175     ajStrAppendC(name_ptr, "-");
176     ajStrFromUint(&value, ajSeqGetBegin(seq)+end);
177     ajStrAppendS(name_ptr, value);
178 
179     ajStrDel(&value);
180 }
181 
182 
183 
184 
185 /* @funcstatic splitter_ProcessChunk ******************************************
186 **
187 ** Undocumented
188 **
189 ** @param [u] seqout [AjPSeqout] Undocumented
190 ** @param [r] seq [const AjPSeq] Undocumented
191 ** @param [r] start [ajuint] Undocumented
192 ** @param [r] end [ajuint] Undocumented
193 ** @param [r] name [const AjPStr] Undocumented
194 ** @param [r] feature [AjBool] Undocumented
195 **
196 ******************************************************************************/
197 
splitter_ProcessChunk(AjPSeqout seqout,const AjPSeq seq,ajuint start,ajuint end,const AjPStr name,AjBool feature)198 static void splitter_ProcessChunk (AjPSeqout seqout, const AjPSeq seq,
199                                    ajuint start, ajuint end, const AjPStr name,
200                                    AjBool feature)
201 {
202     AjPStr str;
203 
204     AjPFeattable new_feattable = NULL;
205     AjPSeq subseq;
206 
207     ajDebug("splitter_ProcessChunk %d..%d '%S' %B\n",
208             start, end, name, feature);
209 
210     str    = ajStrNew();
211     subseq = ajSeqNew();
212 
213 
214     new_feattable = ajFeattableNew(name);
215     subseq->Fttable = new_feattable;
216     ajFeattableSetNuc(new_feattable);
217 
218     ajStrAssignSubC(&str,ajSeqGetSeqC(seq),start,end);
219     ajSeqAssignSeqS(subseq,str);
220 
221     if(feature)
222         splitter_AddSubSeqFeat(subseq->Fttable,start,end,seq);
223 
224     ajSeqAssignNameS(subseq, name);
225     splitter_write(seqout,subseq,seq);
226 
227     ajStrDel(&str);
228     ajSeqDel(&subseq);
229 
230     return;
231 }
232 
233 
234 
235 
236 /* @funcstatic splitter_AddSubSeqFeat *****************************************
237 **
238 ** Undocumented
239 **
240 ** @param [u] ftable [AjPFeattable] Undocumented
241 ** @param [r] start [ajuint] Undocumented
242 ** @param [r] end [ajuint] Undocumented
243 ** @param [r] oldseq [const AjPSeq] Undocumented
244 **
245 ******************************************************************************/
246 
splitter_AddSubSeqFeat(AjPFeattable ftable,ajuint start,ajuint end,const AjPSeq oldseq)247 static void splitter_AddSubSeqFeat(AjPFeattable ftable, ajuint start,
248                                    ajuint end, const AjPSeq oldseq)
249 {
250     AjPFeattable old_feattable = NULL;
251     AjIList iter = NULL;
252 
253     old_feattable = ajSeqGetFeatCopy(oldseq);
254 
255     if(!old_feattable)
256         return;
257 
258     iter = ajListIterNewread(old_feattable->Features);
259 
260     while(!ajListIterDone(iter))
261     {
262         AjPFeature gf = ajListIterGet(iter);
263 
264         AjPFeature copy = NULL;
265 
266 
267         if (((ajFeatGetEnd(gf) < start + 1) &&
268              (gf->End2 == 0 || gf->End2 < start + 1)) ||
269             ((ajFeatGetStart(gf) > end + 1) &&
270              (gf->Start2 == 0 || gf->Start2 > end + 1)))
271         {
272             continue;
273         }
274 
275         copy = ajFeatNewFeat(gf);
276         copy->Start = copy->Start - start;
277         copy->End = copy->End - start;
278 
279         if (copy->Start2 > 0)
280             copy->Start2 = copy->Start2 - start;
281 
282         if (copy->End2 > 0)
283             copy->End2 = copy->End2 - start;
284 
285         ajFeatTrimOffRange (copy, 0, 1, end - start + 1, AJTRUE, AJTRUE);
286 
287         ajFeattableAdd(ftable, copy);
288     }
289 
290     ajFeattableDel(&old_feattable);
291     ajListIterDel(&iter);
292 
293     return;
294 }
295