1 /* @source ajseqwrite *********************************************************
2 **
3 ** AJAX seqwrite functions
4 **
5 ** @author Copyright (C) 2001 Peter Rice
6 ** @version $Revision: 1.167 $
7 ** @modified 2001-2011 Peter Rice
8 ** @modified $Date: 2013/06/29 22:27:17 $ by $Author: rice $
9 ** @@
10 **
11 ** This library is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU Lesser General Public
13 ** License as published by the Free Software Foundation; either
14 ** version 2.1 of the License, or (at your option) any later version.
15 **
16 ** This library is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ** Lesser General Public License for more details.
20 **
21 ** You should have received a copy of the GNU Lesser General Public
22 ** License along with this library; if not, write to the Free Software
23 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 ** MA 02110-1301, USA.
25 **
26 ******************************************************************************/
27
28 #include "ajlib.h"
29
30 #include "ajseqwrite.h"
31 #include "ajseq.h"
32 #include "ajseqtype.h"
33 #include "ajseqbam.h"
34 #include "ajfeat.h"
35 #include "ajfeatwrite.h"
36
37 #include "ajmath.h"
38 #include "ajreg.h"
39 #include "ajfileio.h"
40 #include "ajnam.h"
41 #include "ajutil.h"
42
43 #include <math.h>
44 #include <float.h>
45
46 #ifdef WIN32
47 #define fileno _fileno
48 #endif /* WIN32 */
49
50 static AjPRegexp seqoutRegFmt = NULL;
51 static AjPRegexp seqoutRegId = NULL;
52
53 static AjPStr seqoutUsaTest = NULL;
54
55 static ajuint seqQualPhredToIndex[] = { 0, /* 0 */
56 1, 2, 5, 7, 9, 10, 11, 13, 14, 15, /* 1-10 */
57 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, /* 11-20 */
58 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, /* 21-30 */
59 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, /* 31-40 */
60 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, /* 41-50 */
61 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, /* 51-60 */
62 66, 67, 68 /* 61-63 */
63 };
64
65 static double seqQualIndexToSolexa[] = { -FLT_MAX, /* 0 -5 */
66 1.318795, 1.603736, 1.937759, 2.324741, 2.767492, /* 1-5 -4-0 */
67 3.267492, 3.824741, 4.437759, 5.103736, 5.818795, /* 76-10 1-5 */
68 6.578332, 7.377360, 8.210819, 9.073822, 9.961836, /* 11-15 6-10 */
69 10.870778, 11.797062, 12.737602, 13.689784, 14.651423, /* 16-20 11-14 */
70 15.620708, 16.596154, 17.576551, 18.560916, 19.548457, /* 21-25 16-19 */
71 20.538535, 21.530637, 22.524354, 23.519356, 24.515382, /* 26-30 21-24 */
72 25.512223, 26.509712, 27.507716, 28.506130, 29.504870, /* 31-35 26-29 */
73 30.503869, 31.503073, 32.502442, 33.501939, 34.501541, /* 36-40 31-34 */
74 35.501224, 36.500972, 37.500772, 38.500613, 39.500487, /* 41-45 36-39 */
75 40.500387, 41.500307, 42.500244, 43.500194, 44.500154, /* 46-50 41-44 */
76 45.500122, 46.500097, 47.500077, 48.500061, 49.500049, /* 51-55 46-49 */
77 50.500039, 51.500031, 52.500024, 53.500019, 54.500015, /* 56-60 51-54 */
78 55.500012, 56.500010, 57.500008, 58.500006, 59.500005, /* 61-65 56-59 */
79 60.500004, 61.500003, FLT_MAX, 999.999 /* 66-67 61-62 */
80 };
81
82
83
84
85 /* @filesection ajseqwrite ****************************************************
86 **
87 ** @nam1rule aj Function belongs to the AJAX library.
88 **
89 ******************************************************************************/
90
91
92
93
94 /* @datastatic SeqPOutFormat **************************************************
95 **
96 ** Sequence output formats
97 **
98 ** @attr Name [const char*] Format name
99 ** @attr Obo [const char*] Ontology term id from EDAM
100 ** @attr Desc [const char*] Format description
101 ** @attr Alias [AjBool] Name is an alias for an identical definition
102 ** @attr Single [AjBool] Write each sequence to a new file if true (e.g. GCG)
103 ** @attr Save [AjBool] Save in memory and write at end (e.g. MSF alignments)
104 ** @attr Nucleotide [AjBool] True if nucleotide data is supported
105 ** @attr Protein [AjBool] True if protein data is supported
106 ** @attr Feature [AjBool] True if feature data can be written
107 ** @attr Gap [AjBool] True if gap characters are supported
108 ** @attr Multiset [AjBool] True if sets of sets (seqsetall) are supported
109 ** @attr Write [void function] Function to write the format
110 ** @@
111 ******************************************************************************/
112
113 typedef struct SeqSOutFormat
114 {
115 const char *Name;
116 const char *Obo;
117 const char *Desc;
118 AjBool Alias;
119 AjBool Single;
120 AjBool Save;
121 AjBool Nucleotide;
122 AjBool Protein;
123 AjBool Feature;
124 AjBool Gap;
125 AjBool Multiset;
126 void (*Write) (AjPSeqout outseq);
127 } SeqOOutFormat;
128
129 #define SeqPOutFormat SeqOOutFormat*
130
131
132
133
134 /* @datastatic SeqPSeqFormat **************************************************
135 **
136 ** Data structure to hold definitions when writing sequence data.
137 **
138 ** Most output functions generate the sequence header, then build
139 ** this data structure for the actual output using function seqWriteSeq
140 **
141 ** @attr linepos [ajuint] Undocumented
142 ** @attr namewidth [ajuint] Name format width
143 ** @attr numline [ajuint] Undocumented
144 ** @attr numwidth [ajuint] Number format width
145 ** @attr spacer [ajint] Spacer - can be negative
146 ** @attr tab [ajuint] Undocumented
147 ** @attr width [ajuint] Number of bases per line
148 ** @attr baseonlynum [AjBool] Undocumented
149 ** @attr degap [AjBool] Remove gap characters
150 ** @attr domatch [AjBool] Show matching line
151 ** @attr isactive [AjBool] Undocumented
152 ** @attr nameright [AjBool] Sequence name in right margin
153 ** @attr nameleft [AjBool] Sequence name in left margin
154 ** @attr noleaves [AjBool] Undocumented
155 ** @attr numjust [AjBool] Justify numbers
156 ** @attr numleft [AjBool] Base number on left
157 ** @attr numright [AjBool] Base number on right
158 ** @attr pretty [AjBool] Undocumented
159 ** @attr skipafter [AjBool] Undocumented
160 ** @attr skipbefore [AjBool] Undocumented
161 ** @attr gapchar [char] gap character
162 ** @attr matchchar [char] matching character
163 ** @attr endstr [char[20]] Last line(s)
164 ** @attr leftstr [char[20]] string in left margin
165 ** @attr Padding [char[2]] Padding to alignment boundary
166 ** @@
167 ******************************************************************************/
168
169 typedef struct SeqSSeqFormat
170 {
171 ajuint linepos;
172 ajuint namewidth;
173 ajuint numline;
174 ajuint numwidth;
175 ajint spacer;
176 ajuint tab;
177 ajuint width;
178 AjBool baseonlynum;
179 AjBool degap;
180 AjBool domatch;
181 AjBool isactive;
182 AjBool nameright;
183 AjBool nameleft;
184 AjBool noleaves;
185 AjBool numjust;
186 AjBool numleft;
187 AjBool numright;
188 AjBool pretty;
189 AjBool skipafter;
190 AjBool skipbefore;
191 char gapchar;
192 char matchchar;
193 char endstr[20];
194 char leftstr[20];
195 char Padding[2];
196 } SeqOSeqFormat;
197
198 #define SeqPSeqFormat SeqOSeqFormat*
199
200
201
202
203 static ajint seqSpaceAll = -9;
204
205
206
207
208 static void seqClone(AjPSeqout outseq, const AjPSeq seq);
209 static void seqDbName(AjPStr* name, const AjPStr db);
210 static void seqDeclone(AjPSeqout outseq);
211 static AjBool seqFileReopen(AjPSeqout outseq);
212 static void seqFormatDel(SeqPSeqFormat* pformat);
213 static AjBool seqNcbiKnowndb(const AjPStr dbname);
214 static AjBool seqoutUfoLocal(const AjPSeqout thys);
215 static AjBool seqoutUsaProcess(AjPSeqout thys);
216 static void seqsetClone(AjPSeqout outseq, const AjPSeqset seq, ajint i);
217 static AjBool seqoutFindOutFormat(const AjPStr format, ajint* iformat);
218
219 static void seqCleanBam(AjPSeqout outseq);
220 static void seqCleanDasdna(AjPSeqout outseq);
221 static void seqCleanDasseq(AjPSeqout outseq);
222 static void seqSeqFormat(ajint seqlen, SeqPSeqFormat* psf);
223 static void seqWriteAcedb(AjPSeqout outseq);
224 static void seqWriteAsn1(AjPSeqout outseq);
225 static void seqWriteBam(AjPSeqout outseq);
226 static void seqWriteClustal(AjPSeqout outseq);
227 static void seqWriteCodata(AjPSeqout outseq);
228 static void seqWriteDasdna(AjPSeqout outseq);
229 static void seqWriteDasseq(AjPSeqout outseq);
230 static void seqWriteDebug(AjPSeqout outseq);
231 static void seqWriteEmbl(AjPSeqout outseq);
232 static void seqWriteEmblnew(AjPSeqout outseq);
233 static void seqWriteExperiment(AjPSeqout outseq);
234 static void seqWriteFasta(AjPSeqout outseq);
235 static void seqWriteFastqIllumina(AjPSeqout outseq);
236 /*static void seqWriteFastqInt(AjPSeqout outseq);*/
237 static void seqWriteFastqSanger(AjPSeqout outseq);
238 static void seqWriteFastqSolexa(AjPSeqout outseq);
239 static void seqWriteFitch(AjPSeqout outseq);
240 static void seqWriteGcg(AjPSeqout outseq);
241 static void seqWriteGde(AjPSeqout outseq);
242 static void seqWriteGenbank(AjPSeqout outseq);
243 static void seqWriteGenpept(AjPSeqout outseq);
244 static void seqWriteGifasta(AjPSeqout outseq);
245 static void seqWriteGff2(AjPSeqout outseq);
246 static void seqWriteGff3(AjPSeqout outseq);
247 static void seqWriteHennig86(AjPSeqout outseq);
248 static void seqWriteIg(AjPSeqout outseq);
249 static void seqWriteIguspto(AjPSeqout outseq);
250 static void seqWriteJackknifer(AjPSeqout outseq);
251 static void seqWriteJackknifernon(AjPSeqout outseq);
252 static void seqWriteListAppend(AjPSeqout outseq, const AjPSeq seq);
253 static void seqWriteListClear(AjPSeqout outseq);
254 static void seqWriteMase(AjPSeqout outseq);
255 static void seqWriteMega(AjPSeqout outseq);
256 static void seqWriteMeganon(AjPSeqout outseq);
257 static void seqWriteMsf(AjPSeqout outseq);
258 static void seqWriteNbrf(AjPSeqout outseq);
259 static void seqWriteNcbi(AjPSeqout outseq);
260 static void seqWriteNexus(AjPSeqout outseq);
261 static void seqWriteNexusnon(AjPSeqout outseq);
262 static void seqWriteNibble(AjPSeqout outseq);
263 static void seqWritePhylip(AjPSeqout outseq);
264 static void seqWritePhylipnon(AjPSeqout outseq);
265 static void seqWriteRefseq(AjPSeqout outseq);
266 static void seqWriteRefseqp(AjPSeqout outseq);
267 static void seqWriteSam(AjPSeqout outseq);
268 static void seqWriteScf(AjPSeqout outseq);
269 static void seqWriteSelex(AjPSeqout outseq);
270 static void seqWriteSeq(AjPSeqout outseq, const SeqPSeqFormat sf);
271 static void seqWriteStaden(AjPSeqout outseq);
272 static void seqWriteStrider(AjPSeqout outseq);
273 static void seqWriteSwiss(AjPSeqout outseq);
274 static void seqWriteSwissnew(AjPSeqout outseq);
275 static void seqWriteText(AjPSeqout outseq);
276 static void seqWriteTreecon(AjPSeqout outseq);
277
278
279
280
281 /* @funclist seqOutFormat *****************************************************
282 **
283 ** Functions to write each sequence format
284 **
285 ******************************************************************************/
286
287 static SeqOOutFormat seqOutFormat[] =
288 {
289 /* Name, OBO Description */
290 /* Alias Single, Save, Nucleotide, Protein */
291 /* Feature, Gap, Multiset, WriteFunction */
292 {"unknown", "0000", "Unknown format",
293 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
294 AJFALSE, AJTRUE, AJFALSE, &seqWriteFasta}, /* internal default
295 writes FASTA */
296 /* set 'fasta' in ajSeqoutstrGetFormatDefault */
297 {"gcg", "1935", "GCG sequence format",
298 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
299 AJFALSE, AJTRUE, AJFALSE, &seqWriteGcg},
300 {"gcg8", "1935", "GCG old (version 8) sequence format",
301 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
302 AJFALSE, AJTRUE, AJFALSE, &seqWriteGcg}, /* alias for gcg */
303 {"embl", "1927", "EMBL entry format",
304 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
305 AJTRUE, AJTRUE, AJFALSE, &seqWriteEmblnew},
306 {"emold", "1927", "EMBL entry format (alias)",
307 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
308 AJTRUE, AJTRUE, AJFALSE, &seqWriteEmbl},
309 {"emblold", "1927", "EMBL entry format (alias)",
310 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
311 AJTRUE, AJTRUE, AJFALSE, &seqWriteEmbl}, /* embl pre-87 format*/
312 {"em", "1927", "EMBL entry format (alias)",
313 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
314 AJTRUE, AJTRUE, AJFALSE, &seqWriteEmblnew}, /* alias for embl */
315 {"emblnew", "1927", "EMBL new entry format",
316 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
317 AJTRUE, AJTRUE, AJFALSE, &seqWriteEmblnew},
318 {"swiss", "1963", "Swissprot entry format",
319 AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
320 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwissnew},
321 {"swold", "1963", "Swissprot entry format",
322 AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
323 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwiss},
324 {"swissold", "1963", "Swissprot entry format", /* before 2006 release */
325 AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
326 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwiss},
327 {"swissprotold","1963", "Swissprot entry format",
328 AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
329 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwiss},
330 {"sw", "1963", "Swissprot entry format(alias)",
331 AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
332 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwissnew}, /* alias for swiss */
333 {"swissprot", "1963", "Swissprot entry format(alias)",
334 AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
335 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwissnew}, /* alias for swiss */
336 {"swissnew", "1963", "Swissprot entry format",
337 AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
338 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwissnew},
339 {"swnew", "1963", "Swissprot entry format(alias)",
340 AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
341 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwissnew}, /* alias for swiss */
342 {"swissprotnew","1963", "Swissprot entry format(alias)",
343 AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
344 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwissnew}, /* alias for swiss */
345 {"uniprot", "2188", "Swissprot entry format(alias)",
346 AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
347 AJTRUE, AJTRUE, AJFALSE, &seqWriteSwissnew}, /* alias for swiss */
348 {"fasta", "1929", "FASTA format",
349 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
350 AJFALSE, AJTRUE, AJFALSE, &seqWriteFasta},
351 {"pearson", "1954", "FASTA format (alias)",
352 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
353 AJFALSE, AJTRUE, AJFALSE, &seqWriteFasta}, /* alias for fasta */
354 {"ncbi", "1929", "NCBI fasta format with NCBI-style IDs",
355 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
356 AJFALSE, AJTRUE, AJFALSE, &seqWriteNcbi},
357 {"gifasta", "1940", "NCBI fasta format with NCBI-style IDs using GI number",
358 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
359 AJFALSE, AJTRUE, AJFALSE, &seqWriteGifasta},
360 {"gde", "0000", "GDE program format",
361 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
362 AJFALSE, AJTRUE, AJFALSE, &seqWriteGde},
363 {"nib", "0000", "Nibble format",
364 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
365 AJFALSE, AJTRUE, AJFALSE, &seqWriteNibble},
366 {"nibble", "0000", "Nibble format",
367 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
368 AJFALSE, AJTRUE, AJFALSE, &seqWriteNibble},
369 {"nbrf", "1948", "NBRF/PIR entry format",
370 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
371 AJTRUE, AJTRUE, AJFALSE, &seqWriteNbrf},
372 {"pir", "1948", "NBRF/PIR entry format (alias)",
373 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
374 AJTRUE, AJTRUE, AJFALSE, &seqWriteNbrf}, /* alias for nbrf */
375 {"genbank", "1936", "Genbank entry format",
376 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
377 AJTRUE, AJTRUE, AJFALSE, &seqWriteGenbank},
378 {"gb", "1936", "Genbank entry format (alias)",
379 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
380 AJTRUE, AJTRUE, AJFALSE, &seqWriteGenbank}, /* alias for genbank */
381 {"ddbj", "1936", "Genbank/DDBJ entry format (alias)",
382 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
383 AJTRUE, AJTRUE, AJFALSE, &seqWriteGenbank}, /* alias for genbank */
384 {"genpept", "1937", "Genpept entry format",
385 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
386 AJTRUE, AJTRUE, AJFALSE, &seqWriteGenpept},
387 {"refseq", "1936", "Refseq entry format",
388 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
389 AJTRUE, AJTRUE, AJFALSE, &seqWriteRefseq},
390 {"refseqp", "1937", "Refseqp entry format",
391 AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJTRUE,
392 AJTRUE, AJTRUE, AJFALSE, &seqWriteRefseqp},
393 {"gff2", "1938", "GFF2 feature file with sequence in the header",
394 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
395 AJTRUE, AJTRUE, AJFALSE, &seqWriteGff2},
396 {"gff3", "1939", "GFF3 feature file with sequence in FASTA format after",
397 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
398 AJTRUE, AJTRUE, AJFALSE, &seqWriteGff3},
399 {"gff", "1939", "GFF3 feature file with sequence in FASTA format after",
400 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
401 AJTRUE, AJTRUE, AJFALSE, &seqWriteGff3},
402 {"ig", "1942", "Intelligenetics sequence format",
403 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
404 AJFALSE, AJTRUE, AJFALSE, &seqWriteIg},
405 {"iguspto", "1942", "US patent office multi-line Intelligenetics sequence format",
406 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
407 AJFALSE, AJTRUE, AJFALSE, &seqWriteIguspto},
408 {"codata", "1925", "Codata entry format",
409 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
410 AJFALSE, AJTRUE, AJFALSE, &seqWriteCodata},
411 {"strider", "1962", "DNA strider output format",
412 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
413 AJFALSE, AJTRUE, AJFALSE, &seqWriteStrider},
414 {"acedb", "1923", "ACEDB sequence format",
415 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
416 AJFALSE, AJTRUE, AJFALSE, &seqWriteAcedb},
417 {"experiment", "1928", "Staden experiment file",
418 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
419 AJFALSE, AJTRUE, AJFALSE, &seqWriteExperiment},
420 {"staden", "1960", "Old staden package sequence format",
421 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
422 AJFALSE, AJTRUE, AJFALSE, &seqWriteStaden},
423 {"text", "1957", "Plain text",
424 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
425 AJFALSE, AJTRUE, AJFALSE, &seqWriteText},
426 {"plain", "1957", "Plain text (alias)",
427 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
428 AJFALSE, AJTRUE, AJFALSE, &seqWriteText}, /* alias for text */
429 {"raw", "1957", "Plain text (alias)",
430 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
431 AJFALSE, AJTRUE, AJFALSE, &seqWriteText}, /* alias for text output */
432 {"fitch", "1934", "Fitch program format",
433 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
434 AJFALSE, AJTRUE, AJFALSE, &seqWriteFitch},
435 {"msf", "1947", "GCG MSF (multiple sequence file) file format",
436 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
437 AJFALSE, AJTRUE, AJFALSE, &seqWriteMsf},
438 {"clustal", "1924", "Clustalw multiple alignment format",
439 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
440 AJFALSE, AJTRUE, AJFALSE, &seqWriteClustal},
441 {"aln", "1924", "Clustalw output format (alias)",
442 AJTRUE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
443 AJFALSE, AJTRUE, AJFALSE, &seqWriteClustal}, /* alias for clustal */
444 {"selex", "1959", "Selex format",
445 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
446 AJFALSE, AJTRUE, AJFALSE, &seqWriteSelex},
447 {"phylip", "1955", "Phylip interleaved format",
448 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
449 AJFALSE, AJTRUE, AJTRUE, &seqWritePhylip},
450 {"phylipnon", "1956", "Phylip non-interleaved format",
451 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
452 AJFALSE, AJTRUE, AJFALSE, &seqWritePhylipnon},
453 {"phylip3", "1956", "Phylip non-interleaved format (alias)",
454 AJTRUE, AJFALSE, AJTRUE, AJTRUE, AJTRUE, /* alias for phylipnon*/
455 AJFALSE, AJTRUE, AJFALSE, &seqWritePhylipnon},
456 {"asn1", "1966", "NCBI ASN.1 format",
457 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
458 AJFALSE, AJTRUE, AJFALSE, &seqWriteAsn1},
459 {"hennig86", "1941", "Hennig86 output format",
460 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
461 AJFALSE, AJTRUE, AJFALSE, &seqWriteHennig86},
462 {"mega", "1991", "Mega interleaved output format",
463 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
464 AJFALSE, AJTRUE, AJFALSE, &seqWriteMega},
465 {"meganon", "1992", "Mega non-interleaved output format",
466 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
467 AJFALSE, AJTRUE, AJFALSE, &seqWriteMeganon},
468 {"nexus", "1912", "Nexus/paup interleaved format",
469 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
470 AJFALSE, AJTRUE, AJFALSE, &seqWriteNexus},
471 {"paup", "1912", "Nexus/paup interleaved format (alias)",
472 AJTRUE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
473 AJFALSE, AJTRUE, AJFALSE, &seqWriteNexus}, /* alias for nexus */
474 {"nexusnon", "1973", "Nexus/paup non-interleaved format",
475 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
476 AJFALSE, AJTRUE, AJFALSE, &seqWriteNexusnon},
477 {"paupnon", "1973", "Nexus/paup non-interleaved format (alias)",
478 AJTRUE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
479 AJFALSE, AJTRUE, AJFALSE, &seqWriteNexusnon}, /* alias for nexusnon*/
480 {"jackknifer", "1944", "Jackknifer output interleaved format",
481 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
482 AJFALSE, AJTRUE, AJFALSE, &seqWriteJackknifer},
483 {"jackknifernon", "1970", "Jackknifer output non-interleaved format",
484 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
485 AJFALSE, AJTRUE, AJFALSE, &seqWriteJackknifernon},
486 {"treecon", "1911", "Treecon output format",
487 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJTRUE,
488 AJFALSE, AJTRUE, AJFALSE, &seqWriteTreecon},
489 {"mase", "1945", "Mase program format",
490 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
491 AJFALSE, AJTRUE, AJFALSE, &seqWriteMase},
492 {"dasdna", "1968", "DASDNA DAS nucleotide-only sequence",
493 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
494 AJFALSE, AJTRUE, AJFALSE, &seqWriteDasdna},
495 {"das", "1967", "DASSEQUENCE DAS any sequence",
496 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
497 AJFALSE, AJTRUE, AJFALSE, &seqWriteDasseq},
498 {"fastq-sanger", "1932", "FASTQ short read format with phred quality",
499 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
500 AJFALSE, AJFALSE, AJFALSE, &seqWriteFastqSanger},
501 {"fastq", "1930", "FASTQ short read format with phred quality",
502 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
503 AJFALSE, AJFALSE, AJFALSE, &seqWriteFastqSanger},
504 {"fastq-illumina", "1931", "FASTQ Illumina 1.3 short read format",
505 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
506 AJFALSE, AJFALSE, AJFALSE, &seqWriteFastqIllumina},
507 {"fastq-solexa", "1933", "FASTQ Solexa/Illumina 1.0 short read format",
508 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
509 AJFALSE, AJFALSE, AJFALSE, &seqWriteFastqSolexa},
510 /*
511 ** {"fastq-int", "2182", "FASTQ short read format with integer phred quality",
512 ** AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJFALSE,
513 ** AJFALSE, AJFALSE, AJFALSE, seqWriteFastqInt},
514 */
515 {"sam", "2573", "Sequence alignment/map (SAM) format",
516 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
517 AJFALSE, AJFALSE, AJFALSE, &seqWriteSam},
518 {"bam", "2572", "Binary sequence alignment/map (BAM) format",
519 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
520 AJFALSE, AJFALSE, AJFALSE, &seqWriteBam},
521 {"scf", "1632", "SCF format",
522 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
523 AJFALSE, AJFALSE, AJFALSE, &seqWriteScf},
524 {"debug", "1983", "Debugging trace of full internal data content",
525 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE,
526 AJFALSE, AJTRUE, AJFALSE, &seqWriteDebug}, /* trace report */
527 {NULL, NULL, NULL, 0, 0, 0, 0, 0, 0, 0, 0, NULL}
528 };
529
530
531
532
533 /* @datasection [AjPSeqout] Sequence output *********************************
534 **
535 ** Function is for manipulating sequence output objects
536 **
537 ** @nam2rule Seqout
538 **
539 ******************************************************************************/
540
541
542
543
544 /* @section Sequence Output Constructors **************************************
545 **
546 ** All constructors return a new sequence output object by pointer. It
547 ** is the responsibility of the user to first destroy any previous
548 ** sequence output object. The target pointer does not need to be
549 ** initialised to NULL, but it is good programming practice to do so
550 ** anyway.
551 **
552 ** @fdata [AjPSeqout]
553 ** @fcategory new
554 **
555 ** @nam3rule New Constructor
556 ** @nam4rule NewFile Constructor using an open output file
557 ** @nam4rule NewFormat Constructor using a named format
558 ** @suffix C [char*] C character string
559 ** @suffix S [AjPStr] string object
560 **
561 ** @argrule NewFile file [AjPFile] Open output file
562 ** @argrule C txt [const char*] Format name
563 ** @argrule S str [const AjPStr] Format name
564 **
565 ** @valrule * [AjPSeqout]
566 ******************************************************************************/
567
568
569
570
571 /* @func ajSeqoutNew **********************************************************
572 **
573 ** Creates a new sequence output object.
574 **
575 ** @return [AjPSeqout] New sequence output object.
576 **
577 ** @release 1.0.0
578 ** @@
579 ******************************************************************************/
580
ajSeqoutNew(void)581 AjPSeqout ajSeqoutNew(void)
582 {
583 AjPSeqout pthis;
584
585 AJNEW0(pthis);
586
587 pthis->Name = ajStrNew();
588 /* pthis->Acc = ajStrNew(); */
589 pthis->Sv = ajStrNew();
590 pthis->Gi = ajStrNew();
591
592 /*
593 // pthis->Tax = ajStrNew();
594 // pthis->Taxcommon = ajStrNew();
595 // pthis->Taxid = ajStrNew();
596 // pthis->Organelle = ajStrNew();
597 */
598
599 pthis->Desc = ajStrNew();
600 pthis->Type = ajStrNew();
601 pthis->EType = 0;
602
603 pthis->Outputtype = ajStrNew();
604
605 /*
606 // pthis->Db = ajStrNew();
607 // pthis->Setdb = ajStrNew();
608 // pthis->Setoutdb = ajStrNew();
609 // pthis->Full = ajStrNew();
610 // pthis->Doc = ajStrNew();
611 // pthis->Rev = ajFalse;
612 // pthis->Ufo = ajStrNew();
613 */
614
615 pthis->Usa = ajStrNew();
616 pthis->Informatstr = ajStrNew();
617 pthis->Formatstr = ajStrNew();
618
619 pthis->Format = 0;
620 pthis->Filename = ajStrNew();
621 pthis->Directory = ajStrNew();
622 pthis->Entryname = ajStrNew();
623 pthis->Seq = ajStrNew();
624 pthis->File = NULL;
625 pthis->Count = 0;
626 pthis->Single = ajFalse;
627 pthis->Features = ajFalse;
628 pthis->Extension = ajStrNew();
629 pthis->Savelist = NULL;
630
631 pthis->Ftquery = ajFeattabOutNew();
632 pthis->Fttable = NULL;
633
634 pthis->Acclist = ajListstrNew();
635
636 /*
637 // pthis->Keylist = ajListstrNew();
638 // pthis->Taxlist = ajListstrNew();
639 // pthis->Genelist = ajListstrNew();
640 // pthis->Cmtlist = ajListstrNew();
641 // pthis->Xreflist = ajListNew();
642 // pthis->Reflist = ajListNew();
643 // pthis->Fulldesc = ajSeqdescNew();
644 */
645
646 return pthis;
647 }
648
649
650
651
652 /* @func ajSeqoutNewFile ******************************************************
653 **
654 ** Creates a new sequence output object using a pre-opened file.
655 **
656 ** @param [u] file [AjPFile] Open file object
657 ** @return [AjPSeqout] New sequence output object.
658 **
659 ** @release 4.1.0
660 ** @@
661 ******************************************************************************/
662
ajSeqoutNewFile(AjPFile file)663 AjPSeqout ajSeqoutNewFile(AjPFile file)
664 {
665 AjPSeqout pthis;
666
667 pthis = ajSeqoutNew();
668 pthis->Knownfile = file;
669 pthis->File = file;
670
671 return pthis;
672 }
673
674
675
676
677 /* @func ajSeqoutNewFormatC ***************************************************
678 **
679 ** Creates a new sequence output object with a specified format.
680 **
681 ** @param [r] txt [const char*] Output sequence format
682 ** @return [AjPSeqout] New sequence output object.
683 **
684 ** @release 5.0.0
685 ** @@
686 ******************************************************************************/
687
ajSeqoutNewFormatC(const char * txt)688 AjPSeqout ajSeqoutNewFormatC(const char* txt)
689 {
690 AjPSeqout thys;
691
692 thys = ajSeqoutNew();
693
694 ajSeqoutSetFormatC(thys, txt);
695
696 return thys;
697 }
698
699
700
701
702 /* @func ajSeqoutNewFormatS ***************************************************
703 **
704 ** Creates a new sequence output object with a specified format.
705 **
706 ** @param [r] str [const AjPStr] Output sequence format
707 ** @return [AjPSeqout] New sequence output object.
708 **
709 ** @release 5.0.0
710 ** @@
711 ******************************************************************************/
712
ajSeqoutNewFormatS(const AjPStr str)713 AjPSeqout ajSeqoutNewFormatS(const AjPStr str)
714 {
715 AjPSeqout thys;
716
717 thys = ajSeqoutNew();
718
719 ajSeqoutSetFormatS(thys, str);
720
721 return thys;
722 }
723
724
725
726
727 /* @section destructors *******************************************************
728 **
729 ** Destruction destroys all internal data structures and frees the
730 ** memory allocated for the sequence output object.
731 **
732 **
733 ** @fdata [AjPSeqout]
734 ** @fcategory delete
735 **
736 ** @nam3rule Del Destroy (free) a sequence output object
737 **
738 ** @argrule * Pseqout [AjPSeqout*] Sequence output object address
739 **
740 ** @valrule * [void]
741 **
742 ******************************************************************************/
743
744
745
746
747 /* @func ajSeqoutDel **********************************************************
748 **
749 ** Destructor for AjPSeqout objects
750 **
751 ** @param [d] Pseqout [AjPSeqout*] Sequence output object
752 ** @return [void]
753 **
754 ** @release 2.9.0
755 ** @@
756 ******************************************************************************/
757
ajSeqoutDel(AjPSeqout * Pseqout)758 void ajSeqoutDel(AjPSeqout* Pseqout)
759 {
760 AjPSeqout seqout;
761 AjPSeq seq = NULL;
762 AjPStr tmpstr = NULL;
763 AjPSeqRef tmpref = NULL;
764 AjPSeqXref tmpxref = NULL;
765 AjPSeqGene tmpgene = NULL;
766
767 seqout = *Pseqout;
768
769 if(!seqout)
770 return;
771
772 ajStrDel(&seqout->Name);
773 ajStrDel(&seqout->Acc);
774 MAJSTRDEL(&seqout->Sv);
775 MAJSTRDEL(&seqout->Gi);
776 MAJSTRDEL(&seqout->Tax);
777 MAJSTRDEL(&seqout->Taxcommon);
778 MAJSTRDEL(&seqout->Taxid);
779 MAJSTRDEL(&seqout->Organelle);
780 ajStrDel(&seqout->Desc);
781 ajStrDel(&seqout->Type);
782 ajStrDel(&seqout->Outputtype);
783 MAJSTRDEL(&seqout->Molecule);
784 MAJSTRDEL(&seqout->Class);
785 MAJSTRDEL(&seqout->Division);
786 MAJSTRDEL(&seqout->Evidence);
787 MAJSTRDEL(&seqout->Db);
788 MAJSTRDEL(&seqout->Setdb);
789 MAJSTRDEL(&seqout->Setoutdb);
790 MAJSTRDEL(&seqout->Full);
791 MAJSTRDEL(&seqout->Doc);
792 MAJSTRDEL(&seqout->Usa);
793 MAJSTRDEL(&seqout->Ufo);
794 MAJSTRDEL(&seqout->FtFormat);
795 MAJSTRDEL(&seqout->FtFilename);
796 ajStrDel(&seqout->Informatstr);
797 ajStrDel(&seqout->Formatstr);
798 ajStrDel(&seqout->Filename);
799 MAJSTRDEL(&seqout->Directory);
800 ajStrDel(&seqout->Entryname);
801 ajStrDel(&seqout->Seq);
802 ajStrDel(&seqout->Extension);
803
804 if(seqout->Acclist)
805 {
806 while(ajListPop(seqout->Acclist,(void **)&tmpstr))
807 ajStrDel(&tmpstr);
808
809 ajListFree(&seqout->Acclist);
810 }
811
812 if(seqout->Keylist)
813 {
814 while(ajListPop(seqout->Keylist,(void **)&tmpstr))
815 ajStrDel(&tmpstr);
816
817 ajListFree(&seqout->Keylist);
818 }
819
820 if(seqout->Taxlist)
821 {
822 while(ajListPop(seqout->Taxlist,(void **)&tmpstr))
823 ajStrDel(&tmpstr);
824
825 ajListFree(&seqout->Taxlist);
826 }
827
828 if(seqout->Genelist)
829 {
830 while(ajListPop(seqout->Genelist,(void **)&tmpgene))
831 ajSeqgeneDel(&tmpgene);
832
833 ajListFree(&seqout->Genelist);
834 }
835
836 if(seqout->Reflist)
837 {
838 while(ajListPop(seqout->Reflist,(void **)&tmpref))
839 ajSeqrefDel(&tmpref);
840
841 ajListFree(&seqout->Reflist);
842 }
843
844 if(seqout->Cmtlist)
845 {
846 while(ajListPop(seqout->Cmtlist,(void **)&tmpstr))
847 ajStrDel(&tmpstr);
848
849 ajListFree(&seqout->Cmtlist);
850 }
851
852 if(seqout->Xreflist)
853 {
854 while(ajListPop(seqout->Xreflist,(void **)&tmpxref))
855 ajSeqxrefDel(&tmpxref);
856
857 ajListFree(&seqout->Xreflist);
858 }
859
860 if(seqout->Savelist)
861 {
862 while(ajListPop(seqout->Savelist,(void **)&seq))
863 ajSeqDel(&seq);
864
865 ajListFree(&seqout->Savelist);
866 }
867
868 if(seqout->Ftquery)
869 ajFeattabOutDel(&seqout->Ftquery);
870
871 if(seqout->Cleanup)
872 (*seqout->Cleanup)(seqout);
873
874 seqout->Cleanup = NULL;
875
876 if(seqout->Knownfile)
877 seqout->File = NULL;
878 else
879 ajFileClose(&seqout->File);
880
881 if(seqout->Date)
882 ajSeqdateDel(&seqout->Date);
883 if(seqout->Fulldesc)
884 ajSeqdescDel(&seqout->Fulldesc);
885
886 AJFREE(seqout->Accuracy);
887 AJFREE(*Pseqout);
888
889 return;
890 }
891
892
893
894
895 /* @datasection [AjPSeqout] Sequence stream output ****************************
896 **
897 ** Function is for writing sequence streams using sequence output objects
898 **
899 ** @nam2rule Seqall
900 **
901 ******************************************************************************/
902
903
904
905
906 /* @section Sequence Stream output *****************************************
907 **
908 ** These functions output the contents of a sequence stream object.
909 ** As the input is a stream they expect to be called again.
910 **
911 ** @fdata [AjPSeqout]
912 ** @fcategory output
913 **
914 ** @nam2rule Seqout
915 **
916 ** @nam3rule Write Write sequence data
917 ** @nam4rule WriteSet Write sequence set data
918 ** @nam4rule WriteSeq Write single sequence data
919 **
920 ** @argrule * outseq [AjPSeqout] Sequence output object
921 ** @argrule WriteSet seq [const AjPSeqset] sequence set current object
922 ** @argrule WriteSeq seq [const AjPSeq] sequence object
923 **
924 ** @valrule * [AjBool] True on success
925 **
926 ******************************************************************************/
927
928
929
930
931 /* @func ajSeqoutWriteSeq *****************************************************
932 **
933 ** Write next sequence out - continue until done.
934 **
935 ** @param [u] outseq [AjPSeqout] Sequence output.
936 ** @param [r] seq [const AjPSeq] Sequence.
937 ** @return [AjBool] True if sequence was successfully written.
938 ** Note if the save flag is true this means it was saved
939 ** to be written later when the output is closed.
940 **
941 ** @release 4.1.0
942 ** @@
943 ******************************************************************************/
944
ajSeqoutWriteSeq(AjPSeqout outseq,const AjPSeq seq)945 AjBool ajSeqoutWriteSeq(AjPSeqout outseq, const AjPSeq seq)
946 {
947 AjBool isnuc = ajFalse;
948 AjBool isprot = ajFalse;
949 AjBool ok;
950
951 ajDebug("ajSeqoutWriteSeq '%S' len: %d\n",
952 ajSeqGetNameS(seq), ajSeqGetLen(seq));
953
954 if(!outseq->Format)
955 {
956 if(!seqoutFindOutFormat(outseq->Formatstr, &outseq->Format))
957 {
958 if(!outseq->Count++)
959 ajErr("unknown output sequence format '%S'",
960 outseq->Formatstr);
961 return ajFalse;
962 }
963 }
964
965 ajDebug("ajSeqoutWriteSeq %d '%s' single: %B feat: %B Save: %B\n",
966 outseq->Format,
967 seqOutFormat[outseq->Format].Name,
968 seqOutFormat[outseq->Format].Single,
969 outseq->Features,
970 seqOutFormat[outseq->Format].Save);
971
972 if(ajSeqIsNuc(seq))
973 isnuc = ajTrue;
974
975 if(ajSeqIsProt(seq))
976 isprot = ajTrue;
977
978 ok = ajFalse;
979
980 if(isnuc && seqOutFormat[outseq->Format].Nucleotide)
981 ok = ajTrue;
982 else if(isprot && seqOutFormat[outseq->Format].Protein)
983 ok = ajTrue;
984
985 if(!ok)
986 {
987 if(isnuc)
988 ajErr("Sequence format '%S' not supported for nucleotide sequences",
989 outseq->Formatstr);
990 else if(isprot)
991 ajErr("Sequence format '%S' not supported for protein sequences",
992 outseq->Formatstr);
993 else
994 ajErr("Sequence format '%S' failed: unknown sequence type",
995 outseq->Formatstr);
996
997 return ajFalse;
998 }
999
1000 seqClone(outseq, seq);
1001
1002 if(seqOutFormat[outseq->Format].Save)
1003 {
1004 seqWriteListAppend(outseq, seq);
1005 outseq->Count++;
1006
1007 return ajTrue;
1008 }
1009
1010 ajSeqoutSetNameDefaultS(outseq, !outseq->Single, outseq->Entryname);
1011
1012 if(outseq->Fttable)
1013 ajFeattableSetDefname(outseq->Fttable, outseq->Name);
1014
1015 if(outseq->Single)
1016 seqFileReopen(outseq);
1017
1018 if (outseq->Knownfile && !outseq->File)
1019 outseq->File = outseq->Knownfile;
1020
1021 /* Calling funclist seqOutFormat() */
1022 (*seqOutFormat[outseq->Format].Write)(outseq);
1023 outseq->Count++;
1024
1025 ajDebug("ajSeqoutWriteSeq tests features %B taboutisopen %B "
1026 "UfoLocal %B ftlocal %B\n",
1027 outseq->Features, ajFeattabOutIsOpen(outseq->Ftquery),
1028 seqoutUfoLocal(outseq), ajFeattabOutIsLocal(outseq->Ftquery));
1029
1030 if(outseq->Features &&
1031 !ajFeattabOutIsLocal(outseq->Ftquery)) /* not already done */
1032 {
1033 if(!ajFeattabOutIsOpen(outseq->Ftquery))
1034 {
1035 ajDebug("ajSeqoutWriteSeq features output needed\n");
1036 ajFeattabOutSetBasename(outseq->Ftquery, outseq->Filename);
1037
1038 if(!ajFeattabOutOpen(outseq->Ftquery, outseq->Ufo))
1039 {
1040 ajWarn("ajSeqoutWriteSeq features output file open failed "
1041 "'%S%S'",
1042 outseq->Ftquery->Directory, outseq->Ftquery->Filename);
1043
1044 return ajFalse;
1045 }
1046
1047 ajStrAssignEmptyS(&outseq->Ftquery->Seqname, seq->Name);
1048 ajStrAssignEmptyS(&outseq->Ftquery->Type, seq->Type);
1049 }
1050
1051 /* ajFeattableTrace(outseq->Fttable); */
1052 if(!ajFeattableWriteUfo(outseq->Ftquery, outseq->Fttable,
1053 outseq->Ufo))
1054 {
1055 ajWarn("ajSeqWriteSeq features output failed UFO: '%S'",
1056 outseq->Ufo);
1057
1058 return ajFalse;
1059 }
1060 }
1061
1062 seqDeclone(outseq);
1063
1064 return ajTrue;
1065 }
1066
1067
1068
1069
1070 /* @func ajSeqoutWriteSet *****************************************************
1071 **
1072 ** Write a set of sequences out.
1073 **
1074 ** @param [u] outseq [AjPSeqout] Sequence output.
1075 ** @param [r] seq [const AjPSeqset] Sequence set.
1076 ** @return [AjBool] True on success
1077 **
1078 ** @release 4.1.0
1079 ** @@
1080 ******************************************************************************/
1081
ajSeqoutWriteSet(AjPSeqout outseq,const AjPSeqset seq)1082 AjBool ajSeqoutWriteSet(AjPSeqout outseq, const AjPSeqset seq)
1083 {
1084 ajuint i = 0;
1085
1086 ajDebug("ajSeqoutWriteSet\n");
1087
1088 if(!outseq->Format)
1089 if(!seqoutFindOutFormat(outseq->Formatstr, &outseq->Format))
1090 ajErr("unknown output sequence set format '%S'", outseq->Formatstr);
1091
1092 ajDebug("ajSeqoutWriteSet %d '%s' single: %B feat: %B Save: %B\n",
1093 outseq->Format,
1094 seqOutFormat[outseq->Format].Name,
1095 seqOutFormat[outseq->Format].Single,
1096 outseq->Features,
1097 seqOutFormat[outseq->Format].Save);
1098
1099 for(i=0; i < seq->Size; i++)
1100 {
1101 seqsetClone(outseq, seq, i);
1102
1103 if(seqOutFormat[outseq->Format].Save)
1104 {
1105 seqWriteListAppend(outseq, seq->Seq[i]);
1106 outseq->Count++;
1107 continue;
1108 }
1109
1110 ajSeqoutSetNameDefaultS(outseq, !outseq->Single, outseq->Entryname);
1111
1112 if(outseq->Fttable)
1113 ajFeattableSetDefname(outseq->Fttable, outseq->Name);
1114
1115 if(outseq->Single)
1116 seqFileReopen(outseq);
1117
1118 /* Calling funclist seqOutFormat() */
1119 (*seqOutFormat[outseq->Format].Write)(outseq);
1120 outseq->Count++;
1121
1122 ajDebug("ajSeqoutWriteSet tests features %B taboutisopen %B "
1123 "UfoLocal %B ftlocal %B\n",
1124 outseq->Features, ajFeattabOutIsOpen(outseq->Ftquery),
1125 seqoutUfoLocal(outseq), ajFeattabOutIsLocal(outseq->Ftquery));
1126
1127 if(outseq->Features &&
1128 !ajFeattabOutIsLocal(outseq->Ftquery))
1129 {
1130 /* not already done */
1131 if(!ajFeattabOutIsOpen(outseq->Ftquery))
1132 {
1133 ajDebug("ajSeqoutWriteSet features output needed\n");
1134 ajFeattabOutSetBasename(outseq->Ftquery, outseq->Filename);
1135
1136 if(!ajFeattabOutOpen(outseq->Ftquery, outseq->Ufo))
1137 {
1138 ajWarn("ajSeqoutWriteSet features output "
1139 "failed to open UFO '%S'",
1140 outseq->Ufo);
1141
1142 return ajFalse;
1143 }
1144
1145 ajStrAssignEmptyS(&outseq->Ftquery->Seqname, seq->Name);
1146 ajStrAssignEmptyS(&outseq->Ftquery->Type, seq->Type);
1147 }
1148
1149 /* ajFeattableTrace(outseq->Fttable); */
1150
1151 if(!ajFeattableWriteUfo(outseq->Ftquery, outseq->Fttable,
1152 outseq->Ufo))
1153 {
1154 ajWarn("ajSeqoutWriteSet features output failed UFO: '%S'",
1155 outseq->Ufo);
1156
1157 return ajFalse;
1158 }
1159 }
1160
1161 seqDeclone(outseq);
1162 }
1163
1164 ajSeqoutFlush(outseq);
1165
1166 return ajTrue;
1167 }
1168
1169
1170
1171
1172 /* @section non-standard output ************************************************
1173 **
1174 ** Writes a sequence string in SWISSPROT format with a user-defined line code
1175 **
1176 ** @fdata [AjPSeqout]
1177 ** @fcategory output
1178 **
1179 ** @nam3rule Dump Print sequence to an output file
1180 ** @nam4rule Swisslike Dump in swissprot-like format with a user-defined
1181 ** sequence line code
1182 **
1183 ** @argrule Dump outseq [AjPSeqout] Sequence output object
1184 ** @argrule Dump seq [const AjPStr] Sequence string
1185 ** @argrule Swisslike prefix [const char*] Line prefix code
1186 **
1187 ** @valrule * [void]
1188 ******************************************************************************/
1189
1190
1191
1192
1193 /* @func ajSeqoutDumpSwisslike ************************************************
1194 **
1195 ** Writes a sequence in SWISSPROT format with a user-defined line code
1196 **
1197 ** @param [w] outseq [AjPSeqout] Sequence output object
1198 ** @param [r] seq [const AjPStr] sequence
1199 ** @param [r] prefix [const char *] line prefix code - should be 2 characters
1200 ** @return [void]
1201 **
1202 ** @release 5.0.0
1203 ** @@
1204 ******************************************************************************/
1205
ajSeqoutDumpSwisslike(AjPSeqout outseq,const AjPStr seq,const char * prefix)1206 void ajSeqoutDumpSwisslike(AjPSeqout outseq,
1207 const AjPStr seq, const char *prefix)
1208 {
1209 static SeqPSeqFormat sf = NULL;
1210 AjPFile outf = outseq->File;
1211 ajint mw;
1212 unsigned long long crc;
1213
1214 crc = ajMathCrc64(seq);
1215 mw = (ajint) (0.5+ajSeqstrCalcMolwt(seq));
1216 ajFmtPrintF(outf,
1217 "%-5sSEQUENCE %5d AA; %6d MW; %08X",
1218 prefix, ajStrGetLen(seq), mw,
1219 (crc>>32)&0xffffffff);
1220
1221 ajFmtPrintF(outf,
1222 "%08X CRC64;\n",
1223 crc&0xffffffff);
1224
1225 ajStrAssignS(&outseq->Seq,seq);
1226
1227 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
1228 strcpy(sf->endstr, "");
1229 sf->tab = 4;
1230 sf->spacer = 11;
1231 sf->width = 60;
1232
1233 seqWriteSeq(outseq, sf);
1234 seqFormatDel(&sf);
1235
1236 return;
1237 }
1238
1239
1240
1241
1242 /* @funcstatic seqWriteListAppend *********************************************
1243 **
1244 ** Add the latest sequence to the output list. If we are in single
1245 ** sequence mode, also write it out now though it does not seem
1246 ** a great idea in most cases to ask for this.
1247 **
1248 ** @param [u] outseq [AjPSeqout] Sequence output
1249 ** @param [r] seq [const AjPSeq] Sequence to be appended
1250 ** @return [void]
1251 **
1252 ** @release 1.0.0
1253 ** @@
1254 ******************************************************************************/
1255
seqWriteListAppend(AjPSeqout outseq,const AjPSeq seq)1256 static void seqWriteListAppend(AjPSeqout outseq, const AjPSeq seq)
1257 {
1258 AjPSeq listseq;
1259
1260 ajDebug("seqWriteListAppend '%F' %S\n", outseq->File, ajSeqGetNameS(seq));
1261
1262 if(!outseq->Savelist)
1263 outseq->Savelist = ajListNew();
1264
1265 listseq = ajSeqNewSeq(seq);
1266 ajSeqTrim(listseq);
1267
1268 /* if(listseq->Rev)
1269 ajSeqReverseDo(listseq); */ /* already done */
1270
1271 if(outseq->Single)
1272 ajSeqSetName(listseq, outseq->Entryname);
1273 else
1274 ajSeqSetNameMulti(listseq, outseq->Entryname);
1275
1276 if(listseq->Fttable)
1277 ajFeattableSetDefname(listseq->Fttable, listseq->Name);
1278
1279 ajListPushAppend(outseq->Savelist, listseq);
1280
1281 if(outseq->Single)
1282 {
1283 ajDebug("single sequence mode: write immediately\n");
1284 ajSeqoutSetNameDefaultS(outseq, !outseq->Single, outseq->Entryname);
1285
1286 if(outseq->Fttable)
1287 ajFeattableSetDefname(outseq->Fttable, outseq->Name);
1288
1289 /* Calling funclist seqOutFormat() */
1290 (*seqOutFormat[outseq->Format].Write)(outseq);
1291 }
1292
1293 ajDebug("seqWriteListAppend Features: %B IsLocal: %B Count: %d\n",
1294 outseq->Features, ajFeattabOutIsLocal(outseq->Ftquery),
1295 ajFeattableGetSize(outseq->Fttable));
1296
1297 if(outseq->Features &&
1298 !ajFeattabOutIsLocal(outseq->Ftquery))
1299 {
1300 /* seqClone(outseq, seq); */ /* already cloned feature table */
1301 ajDebug("seqWriteListAppend after seqClone Count: %d\n",
1302 ajFeattableGetSize(outseq->Fttable));
1303
1304 if(!ajFeattabOutIsOpen(outseq->Ftquery))
1305 {
1306 ajDebug("seqWriteListAppend features output needed table\n");
1307
1308 ajFeattabOutSetBasename(outseq->Ftquery, outseq->Filename);
1309
1310 if(!ajFeattabOutOpen(outseq->Ftquery, outseq->Ufo))
1311 {
1312 ajWarn("seqWriteListAppend features output "
1313 "failed to open UFO '%S'",
1314 outseq->Ufo);
1315 return;
1316 }
1317
1318 ajStrAssignEmptyS(&outseq->Ftquery->Seqname, seq->Name);
1319 ajStrAssignEmptyS(&outseq->Ftquery->Type, seq->Type);
1320 }
1321
1322 ajDebug("seqWriteListAppend after ajFeattabOutOpen Count: %d\n",
1323 ajFeattableGetSize(outseq->Fttable));
1324 ajFeattableTrace(outseq->Fttable);
1325
1326 if(!ajFeattableWriteUfo(outseq->Ftquery, outseq->Fttable, outseq->Ufo))
1327 {
1328 ajWarn("seqWriteListAppend features output failed UFO: '%S'",
1329 outseq->Ufo);
1330
1331 return;
1332 }
1333
1334 seqDeclone(outseq);
1335 }
1336
1337 return;
1338 }
1339
1340
1341
1342
1343 /* @funcstatic seqWriteListClear **********************************************
1344 **
1345 ** Removes the sequences from the output list. Called after sequences
1346 ** have been written.
1347 **
1348 ** @param [u] outseq [AjPSeqout] Sequence output
1349 ** @return [void]
1350 **
1351 ** @release 6.1.0
1352 ** @@
1353 ******************************************************************************/
1354
seqWriteListClear(AjPSeqout outseq)1355 static void seqWriteListClear(AjPSeqout outseq)
1356 {
1357 AjPSeq seq;
1358
1359 while(ajListPop(outseq->Savelist,(void **)&seq))
1360 ajSeqDel(&seq);
1361
1362 return;
1363 }
1364
1365
1366
1367
1368 /* @funcstatic seqWriteFasta **************************************************
1369 **
1370 ** Writes a sequence in FASTA format.
1371 **
1372 ** @param [u] outseq [AjPSeqout] Sequence output object.
1373 ** @return [void]
1374 **
1375 ** @release 1.0.0
1376 ** @@
1377 ******************************************************************************/
1378
seqWriteFasta(AjPSeqout outseq)1379 static void seqWriteFasta(AjPSeqout outseq)
1380 {
1381 ajuint i;
1382 ajuint ilen;
1383 AjPStr seq = NULL;
1384 ajuint linelen = 60;
1385 ajuint iend;
1386
1387 ajDebug("seqWriteFasta Name '%S'\n",
1388 outseq->Name);
1389
1390 seqDbName(&outseq->Name, outseq->Setoutdb);
1391
1392 ajWritebinByte(outseq->File, '>');
1393 ajWriteline(outseq->File, outseq->Name);
1394
1395 if(ajStrGetLen(outseq->Sv))
1396 ajWritelineSpace(outseq->File, outseq->Sv);
1397 else if(ajStrGetLen(outseq->Acc))
1398 ajWritelineSpace(outseq->File, outseq->Acc);
1399
1400 /* no need to bother with outseq->Gi because we have Sv anyway */
1401
1402 if(ajStrGetLen(outseq->Desc))
1403 ajWritelineSpace(outseq->File, outseq->Desc);
1404
1405 ajWritebinNewline(outseq->File);
1406 ilen = ajStrGetLen(outseq->Seq);
1407
1408 for(i=0; i < ilen; i += linelen)
1409 {
1410 iend = AJMIN(ilen-1, i+linelen-1);
1411 ajStrAssignSubS(&seq, outseq->Seq, i, iend);
1412 ajWritelineNewline(outseq->File, seq);
1413 }
1414
1415 ajStrDel(&seq);
1416
1417 return;
1418 }
1419
1420
1421
1422
1423 /* @funcstatic seqWriteFastqSanger ********************************************
1424 **
1425 ** Writes a sequence in FASTA format with phred scores
1426 **
1427 ** @param [u] outseq [AjPSeqout] Sequence output object.
1428 ** @return [void]
1429 **
1430 ** @release 6.1.0
1431 ** @@
1432 ******************************************************************************/
1433
seqWriteFastqSanger(AjPSeqout outseq)1434 static void seqWriteFastqSanger(AjPSeqout outseq)
1435 {
1436 ajuint i;
1437 ajuint ilen;
1438
1439 /* ajuint j; */
1440 /* ajuint jlen; */
1441
1442 AjPStr seq = NULL;
1443 /*ajuint linelen = 60;*/
1444 /*ajuint iend;*/
1445 /*char qchar;*/
1446
1447 ajDebug("seqWriteFastqSanger Name '%S'\n",
1448 outseq->Name);
1449
1450 seqDbName(&outseq->Name, outseq->Setoutdb);
1451
1452 ajWritebinByte(outseq->File, '@');
1453 ajWriteline(outseq->File, outseq->Name);
1454
1455 if(ajStrGetLen(outseq->Sv))
1456 ajWritelineSpace(outseq->File, outseq->Sv);
1457 else if(ajStrGetLen(outseq->Acc))
1458 ajWritelineSpace(outseq->File, outseq->Acc);
1459
1460 /* no need to bother with outseq->Gi because we have Sv anyway */
1461
1462 if(ajStrGetLen(outseq->Desc))
1463 ajWritelineSpace(outseq->File, outseq->Desc);
1464
1465 ajWritebinNewline(outseq->File);
1466
1467 ajWritelineNewline(outseq->File, outseq->Seq);
1468 ajWritebinByte(outseq->File, '+');
1469 ajWritebinNewline(outseq->File);
1470
1471 ilen = ajStrGetLen(outseq->Seq);
1472 seq = ajStrNewRes(ilen+1);
1473
1474 if(outseq->Accuracy)
1475 {
1476 for(i=0;i<ilen;i++)
1477 {
1478 ajStrAppendK(&seq, 33 + (int) outseq->Accuracy[i]);
1479 }
1480 ajWritelineNewline(outseq->File, seq);
1481 }
1482
1483 else
1484 {
1485 ajStrAppendCountK(&seq,'\"', ilen);
1486 ajWritelineNewline(outseq->File, seq);
1487 }
1488
1489 ajStrDel(&seq);
1490
1491 return;
1492 }
1493
1494
1495
1496
1497 /* @funcstatic seqWriteFastqIllumina ******************************************
1498 **
1499 ** Writes a sequence in FASTA format with Illumina scores
1500 **
1501 ** @param [u] outseq [AjPSeqout] Sequence output object.
1502 ** @return [void]
1503 **
1504 ** @release 6.1.0
1505 ** @@
1506 ******************************************************************************/
1507
seqWriteFastqIllumina(AjPSeqout outseq)1508 static void seqWriteFastqIllumina(AjPSeqout outseq)
1509 {
1510 ajuint j;
1511 ajuint ilen;
1512 AjPStr seq = NULL;
1513 ajint qchar;
1514
1515 ajDebug("seqWriteFastqIllumina Name '%S'\n",
1516 outseq->Name);
1517
1518 seqDbName(&outseq->Name, outseq->Setoutdb);
1519
1520 ajWritebinByte(outseq->File, '@');
1521 ajWriteline(outseq->File, outseq->Name);
1522
1523 if(ajStrGetLen(outseq->Sv))
1524 ajWritelineSpace(outseq->File, outseq->Sv);
1525 else if(ajStrGetLen(outseq->Acc))
1526 ajWritelineSpace(outseq->File, outseq->Acc);
1527
1528 /* no need to bother with outseq->Gi because we have Sv anyway */
1529
1530 if(ajStrGetLen(outseq->Desc))
1531 ajWritelineSpace(outseq->File, outseq->Desc);
1532
1533 ajWritebinNewline(outseq->File);
1534 ilen = ajStrGetLen(outseq->Seq);
1535
1536 ajWritelineNewline(outseq->File, outseq->Seq);
1537 ajWritebinByte(outseq->File, '+');
1538 ajWritebinNewline(outseq->File);
1539
1540 ilen = ajStrGetLen(outseq->Seq);
1541
1542 if(outseq->Accuracy)
1543 {
1544 ajStrAssignClear(&seq);
1545
1546 for(j=0;j<ilen;j++)
1547 {
1548 qchar = 64 + (int) (0.5 + outseq->Accuracy[j]);
1549 if(qchar > 126)
1550 qchar = 126;
1551 else if(qchar < 33)
1552 qchar = 33;
1553 ajStrAppendK(&seq, (char) qchar);
1554 }
1555
1556 ajWritelineNewline(outseq->File, seq);
1557 }
1558
1559 else
1560 {
1561 /*
1562 ** default to a score of 1 (0.75 error : 1 base in 4 is right)
1563 */
1564
1565 ajStrAssignClear(&seq);
1566
1567 ajStrAppendCountK(&seq,'A', ilen);
1568
1569 ajWritelineNewline(outseq->File, seq);
1570 }
1571
1572 ajStrDel(&seq);
1573
1574 return;
1575 }
1576
1577
1578
1579
1580 /* @funcstatic seqWriteFastqSolexa ********************************************
1581 **
1582 ** Writes a sequence in FASTA format with Solexa/Illumina scores
1583 **
1584 ** @param [u] outseq [AjPSeqout] Sequence output object.
1585 ** @return [void]
1586 **
1587 ** @release 6.1.0
1588 ** @@
1589 ******************************************************************************/
1590
seqWriteFastqSolexa(AjPSeqout outseq)1591 static void seqWriteFastqSolexa(AjPSeqout outseq)
1592 {
1593 ajuint i;
1594 ajuint j;
1595 ajuint ilen;
1596 AjPStr seq = NULL;
1597 double sval;
1598
1599 ajDebug("seqWriteFastqSolexa Name '%S'\n",
1600 outseq->Name);
1601
1602 seqDbName(&outseq->Name, outseq->Setoutdb);
1603
1604 ajWritebinByte(outseq->File, '@');
1605 ajWriteline(outseq->File, outseq->Name);
1606
1607 if(ajStrGetLen(outseq->Sv))
1608 ajWritelineSpace(outseq->File, outseq->Sv);
1609 else if(ajStrGetLen(outseq->Acc))
1610 ajWritelineSpace(outseq->File, outseq->Acc);
1611
1612 /* no need to bother with outseq->Gi because we have Sv anyway */
1613
1614 if(ajStrGetLen(outseq->Desc))
1615 ajWritelineSpace(outseq->File, outseq->Desc);
1616
1617 ajWritebinNewline(outseq->File);
1618
1619 ilen = ajStrGetLen(outseq->Seq);
1620
1621 ajWritelineNewline(outseq->File, outseq->Seq);
1622 ajWritebinByte(outseq->File, '+');
1623 ajWritebinNewline(outseq->File);
1624
1625 if(outseq->Accuracy)
1626 {
1627 ajStrAssignClear(&seq);
1628
1629 for(j=0;j<ilen;j++)
1630 {
1631 sval = outseq->Accuracy[j];
1632
1633 i = (int) sval;
1634 if(i > 62)
1635 i = 62;
1636
1637 i = seqQualPhredToIndex[i];
1638
1639 while(seqQualIndexToSolexa[i] < sval)
1640 i++;
1641
1642 ajStrAppendK(&seq, (char) (58+i));
1643 }
1644
1645 ajWritelineNewline(outseq->File, seq);
1646 }
1647
1648 else
1649 {
1650 /*
1651 ** default to a score of -5 (0.75 error : 1 base in 4 is right)
1652 */
1653
1654 ajStrAssignClear(&seq);
1655
1656 ajStrAppendCountK(&seq,';', ilen);
1657
1658 ajWritelineNewline(outseq->File, seq);
1659 }
1660
1661 ajStrDel(&seq);
1662
1663 return;
1664 }
1665
1666
1667
1668
1669 /* #funcstatic seqWriteFastqInt ***********************************************
1670 **
1671 ** Writes a sequence in FASTA format with Solexa integer scores
1672 **
1673 ** #param [u] outseq [AjPSeqout] Sequence output object.
1674 ** #return [void]
1675 ** ##
1676 ******************************************************************************/
1677
1678 /*
1679 //static void seqWriteFastqInt(AjPSeqout outseq)
1680 //{
1681 // ajuint i;
1682 // ajuint j;
1683 // ajuint ilen;
1684 // ajuint jlen;
1685 // AjPStr seq = NULL;
1686 // ajuint linelen = 60;
1687 // ajuint numcount = 20;
1688 // ajuint iend;
1689 // AjPStr db = NULL;
1690 // double sval;
1691 // double pval;
1692 // double qval;
1693 //
1694 // ajStrAssignS(&db, outseq->Setoutdb);
1695 //
1696 // ajDebug("seqWriteFastqInt outseq Db '%S' Setdb '%S' Setoutdb '%S' "
1697 // "Name '%S'\n",
1698 // outseq->Db, outseq->Setdb, outseq->Setoutdb, outseq->Name);
1699 //
1700 // seqDbName(&outseq->Name, db);
1701 //
1702 // ajFmtPrintF(outseq->File, "@%S", outseq->Name);
1703 //
1704 // if(ajStrGetLen(outseq->Sv))
1705 // ajFmtPrintF(outseq->File, " %S", outseq->Sv);
1706 // else if(ajStrGetLen(outseq->Acc))
1707 // ajFmtPrintF(outseq->File, " %S", outseq->Acc);
1708 //
1709 // if(ajStrGetLen(outseq->Desc))
1710 // ajFmtPrintF(outseq->File, " %S", outseq->Desc);
1711 //
1712 // ajFmtPrintF(outseq->File, "\n");
1713 // ilen = ajStrGetLen(outseq->Seq);
1714 //
1715 // for(i=0; i < ilen; i += linelen)
1716 // {
1717 // iend = AJMIN(ilen-1, i+linelen-1);
1718 // ajStrAssignSubS(&seq, outseq->Seq, i, iend);
1719 // ajFmtPrintF(outseq->File, "%S\n", seq);
1720 // }
1721 //
1722 // ajFmtPrintF(outseq->File, "+%S", outseq->Name);
1723 //
1724 // if(ajStrGetLen(outseq->Sv))
1725 // ajFmtPrintF(outseq->File, " %S", outseq->Sv);
1726 // else if(ajStrGetLen(outseq->Acc))
1727 // ajFmtPrintF(outseq->File, " %S", outseq->Acc);
1728 //
1729 // ajFmtPrintF(outseq->File, "\n");
1730 //
1731 // ilen = ajStrGetLen(outseq->Seq);
1732 //
1733 // if(outseq->Accuracy)
1734 // {
1735 // for(i=0; i < ilen; i += numcount)
1736 // {
1737 // iend = AJMIN(ilen-1, i+numcount-1);
1738 // ajStrAssignClear(&seq);
1739 // for(j=i;j<=iend;j++)
1740 // {
1741 // sval = outseq->Accuracy[j];
1742 // pval = 1.0 / pow(10.0, (sval/10.0));
1743 // qval = -10.0 * log10(pval/(1.0 - pval));
1744 // if(j==i)
1745 // ajFmtPrintAppS(&seq, "%2d", (ajint) qval);
1746 // else
1747 // ajFmtPrintAppS(&seq, " %2d", (ajint) qval);
1748 // }
1749 // ajFmtPrintF(outseq->File, "%S\n", seq);
1750 // }
1751 // }
1752 //
1753 // else
1754 // {
1755 // for(i=0; i < ilen; i += linelen)
1756 // {
1757 // iend = AJMIN(ilen-1, i+linelen-1);
1758 // jlen = (iend - i + 1);
1759 // ajStrAssignClear(&seq);
1760 // ajStrAppendC(&seq," 0");
1761 // ajFmtPrintF(outseq->File, "%S\n", seq);
1762 // }
1763 //
1764 // }
1765 //
1766 // ajStrDel(&seq);
1767 // ajStrDel(&db);
1768 //
1769 // return;
1770 //}
1771 */
1772
1773
1774
1775
1776 /* @funcstatic seqNcbiKnowndb *************************************************
1777 **
1778 ** Tests whether a database name is valid for use in NCBI ids.
1779 **
1780 ** @param [r] dbname [const AjPStr] Database name
1781 ** @return [AjBool] True if found
1782 **
1783 ** @release 4.1.0
1784 ** @@
1785 ******************************************************************************/
1786
seqNcbiKnowndb(const AjPStr dbname)1787 static AjBool seqNcbiKnowndb(const AjPStr dbname)
1788 {
1789 static const char* ncbidbs[] = {
1790 "gb", "emb", "dbj", /* big three, listed by NCBI for nr */
1791 "sp", "ref", "pir", "prf", /* others listed by NCBI for nr */
1792 "tpd", "tpe", "tpg", /* third party annotation */
1793 NULL
1794 };
1795 ajint i;
1796
1797 for (i=0;ncbidbs[i];i++)
1798 {
1799 if(ajStrMatchC(dbname, ncbidbs[i]))
1800 return ajTrue;
1801 }
1802
1803 return ajFalse;
1804 }
1805
1806
1807
1808
1809 /* @funcstatic seqWriteGde ****************************************************
1810 **
1811 ** Writes a sequence in GDE format.
1812 **
1813 ** @param [u] outseq [AjPSeqout] Sequence output object.
1814 ** @return [void]
1815 **
1816 ** @release 6.6.0
1817 ** @@
1818 ******************************************************************************/
1819
seqWriteGde(AjPSeqout outseq)1820 static void seqWriteGde(AjPSeqout outseq)
1821 {
1822 ajuint i;
1823 ajuint ilen;
1824 AjPStr seq = NULL;
1825 ajuint linelen = 60;
1826 ajuint iend;
1827
1828 ajDebug("seqWriteGde Name '%S'\n",
1829 outseq->Name);
1830
1831 seqDbName(&outseq->Name, outseq->Setoutdb);
1832
1833 ajWritebinByte(outseq->File, '#');
1834 ajWriteline(outseq->File, outseq->Name);
1835 ajWritebinNewline(outseq->File);
1836
1837 ilen = ajStrGetLen(outseq->Seq);
1838
1839 for(i=0; i < ilen; i += linelen)
1840 {
1841 iend = AJMIN(ilen-1, i+linelen-1);
1842 ajStrAssignSubS(&seq, outseq->Seq, i, iend);
1843 ajWritelineNewline(outseq->File, seq);
1844 }
1845
1846 ajStrDel(&seq);
1847
1848 return;
1849 }
1850
1851
1852
1853
1854 /* @funcstatic seqWriteNcbi ***************************************************
1855 **
1856 ** Writes a sequence in NCBI format.
1857 **
1858 ** @param [u] outseq [AjPSeqout] Sequence output object.
1859 ** @return [void]
1860 **
1861 ** @release 1.0.0
1862 ** @@
1863 ******************************************************************************/
1864
seqWriteNcbi(AjPSeqout outseq)1865 static void seqWriteNcbi(AjPSeqout outseq)
1866 {
1867
1868 ajuint i;
1869 ajuint ilen;
1870 AjPStr seq = NULL;
1871 ajuint linelen = 60;
1872 ajuint iend;
1873 AjPStr version = NULL;
1874 AjPStr dbname = NULL;
1875 static ajuint blordnum=0;
1876
1877 ajDebug("seqWriteNcbi SetDb '%S' Db '%S'\n", outseq->Setdb, outseq->Db);
1878
1879 if(ajStrGetLen(outseq->Setdb))
1880 ajStrAssignS(&dbname, outseq->Setdb);
1881 else if(ajStrGetLen(outseq->Db))
1882 ajStrAssignS(&dbname, outseq->Db);
1883 else
1884 ajStrAssignC(&dbname, "unk");
1885
1886
1887 if(ajStrGetLen(outseq->Sv))
1888 ajStrAssignS(&version, outseq->Sv);
1889 else if(ajStrGetLen(outseq->Acc))
1890 ajStrAssignS(&version, outseq->Acc);
1891 else
1892 ajStrAssignClear(&version);
1893
1894 ajDebug("seqWriteNcbi version '%S' dbname: '%S' KnownDb: %B\n",
1895 version, dbname, seqNcbiKnowndb(dbname));
1896
1897 if(ajStrGetLen(outseq->Gi) &&
1898 !ajStrGetLen(outseq->Db) &&
1899 ajStrMatchCaseS(outseq->Gi, outseq->Name))
1900 {
1901 ajWritebinChar(outseq->File, ">gi|", 4);
1902 ajWriteline(outseq->File, outseq->Gi);
1903 }
1904 else
1905 {
1906 ajWritebinByte(outseq->File, '>');
1907
1908 if(ajStrGetLen(outseq->Gi))
1909 {
1910 ajWritebinChar(outseq->File, "gi|", 3);
1911 ajWriteline(outseq->File, outseq->Gi);
1912 ajWritebinByte(outseq->File, '|');
1913 }
1914
1915 if(seqNcbiKnowndb(dbname))
1916 {
1917 ajWriteline(outseq->File, dbname);
1918 ajWritebinByte(outseq->File, '|');
1919 ajWriteline(outseq->File, version);
1920 ajWritebinByte(outseq->File, '|');
1921 }
1922
1923 else if(ajStrMatchCaseC(dbname, "lcl"))
1924 ajWritebinChar(outseq->File, "lcl|", 4);
1925
1926 else if(ajStrMatchCaseC(dbname, "bbs"))
1927 ajWritebinChar(outseq->File, "bbs|", 4);
1928
1929 else if(ajStrMatchCaseC(dbname, "BL_ORD_ID"))
1930 ajFmtPrintF(outseq->File, "gnl|%S|%d ", dbname, blordnum++);
1931
1932 else
1933 {
1934 ajWritebinChar(outseq->File, "gnl|", 4);
1935 ajWriteline(outseq->File, dbname);
1936 ajWritebinByte(outseq->File, '|');
1937 }
1938
1939 if (!ajStrMatchCaseS(version, outseq->Name))
1940 ajWriteline(outseq->File, outseq->Name);
1941 }
1942
1943 if(ajStrGetLen(version) && !seqNcbiKnowndb(dbname))
1944 {
1945 ajWritebinChar(outseq->File, " (", 2);
1946 ajWriteline(outseq->File, version);
1947 ajWritebinByte(outseq->File, ')');
1948 }
1949
1950 if(ajStrGetLen(outseq->Desc))
1951 ajWritelineSpace(outseq->File, outseq->Desc);
1952
1953 ajWritebinNewline(outseq->File);
1954
1955 ilen = ajStrGetLen(outseq->Seq);
1956
1957 for(i=0; i < ilen; i += linelen)
1958 {
1959 iend = AJMIN(ilen-1, i+linelen-1);
1960 ajStrAssignSubS(&seq, outseq->Seq, i, iend);
1961 ajWritelineNewline(outseq->File, seq);
1962 }
1963
1964 ajStrDel(&seq);
1965 ajStrDel(&dbname);
1966 ajStrDel(&version);
1967
1968 return;
1969 }
1970
1971
1972
1973
1974 /* @funcstatic seqWriteGifasta ************************************************
1975 **
1976 ** Writes a sequence in NCBI format using only the GI number
1977 **
1978 ** @param [u] outseq [AjPSeqout] Sequence output object.
1979 ** @return [void]
1980 **
1981 ** @release 4.1.0
1982 ** @@
1983 ******************************************************************************/
1984
seqWriteGifasta(AjPSeqout outseq)1985 static void seqWriteGifasta(AjPSeqout outseq)
1986 {
1987
1988 ajuint i;
1989 ajuint ilen;
1990 AjPStr seq = NULL;
1991 ajuint linelen = 60;
1992 ajuint iend;
1993 AjPStr version = NULL;
1994 AjPStr dbname = NULL;
1995 static ajuint blordnum=0;
1996
1997 if(ajStrGetLen(outseq->Setdb))
1998 ajStrAssignS(&dbname, outseq->Setdb);
1999 else if(ajStrGetLen(outseq->Db))
2000 ajStrAssignS(&dbname, outseq->Db);
2001 else
2002 ajStrAssignC(&dbname, "unk");
2003
2004
2005 if(ajStrGetLen(outseq->Sv))
2006 ajStrAssignS(&version, outseq->Sv);
2007 else if(ajStrGetLen(outseq->Acc))
2008 ajStrAssignS(&version, outseq->Acc);
2009
2010 else
2011 ajStrAssignClear(&version);
2012
2013 ajDebug("seqWriteGifasta version '%S' dbname: '%S' KnownDb: %B\n",
2014 version, dbname, seqNcbiKnowndb(dbname));
2015
2016 if(ajStrGetLen(outseq->Gi) &&
2017 !ajStrGetLen(outseq->Db) &&
2018 ajStrMatchCaseS(outseq->Gi, outseq->Name))
2019 {
2020 ajFmtPrintF(outseq->File, ">gi|%S", outseq->Gi);
2021 }
2022 else
2023 {
2024 ajWritebinByte(outseq->File, '>');
2025
2026 if(ajStrGetLen(outseq->Gi))
2027 ajFmtPrintF(outseq->File, "gi|%S|", outseq->Gi);
2028 else
2029 ajFmtPrintF(outseq->File, "gi|000000|");
2030
2031 if(seqNcbiKnowndb(dbname))
2032 ajFmtPrintF(outseq->File, "%S|%S|", dbname, version);
2033 else if(ajStrMatchCaseC(dbname, "lcl"))
2034 ajFmtPrintF(outseq->File, "%S|", dbname);
2035 else if(ajStrMatchCaseC(dbname, "BL_ORD_ID"))
2036 ajFmtPrintF(outseq->File, "gnl|%S|%d ", dbname, blordnum++);
2037 else
2038 ajFmtPrintF(outseq->File, "gnl|%S|", dbname);
2039
2040 if (!ajStrMatchCaseS(version, outseq->Name))
2041 ajWriteline(outseq->File, outseq->Name);
2042 }
2043
2044 if(ajStrGetLen(version) && !seqNcbiKnowndb(dbname))
2045 ajFmtPrintF(outseq->File, " (%S)", version);
2046
2047 if(ajStrGetLen(outseq->Desc))
2048 ajWritelineSpace(outseq->File, outseq->Desc);
2049
2050 ajWritebinNewline(outseq->File);
2051
2052 ilen = ajStrGetLen(outseq->Seq);
2053
2054 for(i=0; i < ilen; i += linelen)
2055 {
2056 iend = AJMIN(ilen-1, i+linelen-1);
2057 ajStrAssignSubS(&seq, outseq->Seq, i, iend);
2058 ajWritelineNewline(outseq->File, seq);
2059 }
2060
2061 ajStrDel(&seq);
2062 ajStrDel(&dbname);
2063 ajStrDel(&version);
2064
2065 return;
2066 }
2067
2068
2069
2070
2071 /* @funcstatic seqWriteGcg ****************************************************
2072 **
2073 ** Writes a sequence in GCG format.
2074 **
2075 ** @param [u] outseq [AjPSeqout] Sequence output object.
2076 ** @return [void]
2077 **
2078 ** @release 1.0.0
2079 ** @@
2080 ******************************************************************************/
2081
seqWriteGcg(AjPSeqout outseq)2082 static void seqWriteGcg(AjPSeqout outseq)
2083 {
2084
2085 ajuint ilen;
2086 char ctype = 'N';
2087 ajuint check;
2088 SeqPSeqFormat sf = NULL;
2089
2090 ilen = ajStrGetLen(outseq->Seq);
2091
2092 if(!outseq->Type)
2093 ajFmtPrintF(outseq->File, "!!NA_SEQUENCE 1.0\n\n");
2094 else if(ajStrGetCharFirst(outseq->Type) == 'P')
2095 {
2096 ajFmtPrintF(outseq->File, "!!AA_SEQUENCE 1.0\n\n");
2097 ctype = 'P';
2098 }
2099 else
2100 ajFmtPrintF(outseq->File, "!!NA_SEQUENCE 1.0\n\n");
2101
2102 ajSeqGapS(&outseq->Seq, '.');
2103 check = ajSeqoutGetCheckgcg(outseq);
2104
2105 if(ajStrGetLen(outseq->Desc))
2106 ajFmtPrintF(outseq->File, "%S\n\n", outseq->Desc);
2107
2108 ajFmtPrintF(outseq->File,
2109 "%S Length: %d Type: %c Check: %4d ..\n",
2110 outseq->Name, ilen, ctype, check);
2111
2112 if(sf)
2113 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
2114 else
2115 {
2116 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
2117 sf->spacer = 11;
2118 sf->numleft = ajTrue;
2119 sf->skipbefore = ajTrue;
2120 strcpy(sf->endstr, "\n"); /* to help with misreads at EOF */
2121 }
2122
2123 seqWriteSeq(outseq, sf);
2124 seqFormatDel(&sf);
2125
2126 return;
2127 }
2128
2129
2130
2131
2132 /* @funcstatic seqWriteNibble ************************************************
2133 **
2134 ** Writes a sequence in Nibble (half-byte compressed) format.
2135 **
2136 ** @param [u] outseq [AjPSeqout] Sequence output object.
2137 ** @return [void]
2138 **
2139 ** @release 6.6.0
2140 ** @@
2141 ******************************************************************************/
2142
seqWriteNibble(AjPSeqout outseq)2143 static void seqWriteNibble(AjPSeqout outseq)
2144 {
2145 ajuint imagic = 0x6BE9ED3A;
2146 const char *cp;
2147 AjBool fullbyte = AJTRUE;
2148 union lbytes
2149 {
2150 char chars[4];
2151 ajuint i;
2152 } seqbyte;
2153
2154 ajFileSeek(outseq->File, 0L, SEEK_SET);
2155
2156 ajWritebinUint4(outseq->File, imagic);
2157
2158 ajWritebinUint4(outseq->File, (ajuint) ajStrGetLen(outseq->Seq));
2159
2160 seqbyte.i = 0;
2161
2162 cp = ajStrGetPtr(outseq->Seq);
2163 while(*cp)
2164 {
2165 switch(*cp++)
2166 {
2167 case 'A':
2168 case 'a':
2169 seqbyte.chars[0] |= '\002';
2170 break;
2171 case 'C':
2172 case 'c':
2173 seqbyte.chars[0] |= '\001';
2174 break;
2175 case 'G':
2176 case 'g':
2177 seqbyte.chars[0] |= '\003';
2178 break;
2179 case 'T':
2180 case 't':
2181 case 'U':
2182 case 'u':
2183 seqbyte.chars[0] |= '\000';
2184 break;
2185 default:
2186 seqbyte.chars[0] |= '\004';
2187 break;
2188 }
2189 fullbyte = !fullbyte;
2190
2191 if(fullbyte)
2192 {
2193 ajWritebinByte(outseq->File, seqbyte.chars[0]);
2194 seqbyte.i = 0;
2195 }
2196 else
2197 {
2198 seqbyte.i = seqbyte.i << 4;
2199 }
2200 }
2201
2202 if(!fullbyte)
2203 {
2204 ajWritebinByte(outseq->File, seqbyte.chars[0]);
2205 }
2206
2207 return;
2208 }
2209
2210
2211
2212
2213 /* @funcstatic seqWriteStaden *************************************************
2214 **
2215 ** Writes a sequence in Staden format.
2216 **
2217 ** @param [u] outseq [AjPSeqout] Sequence output object.
2218 ** @return [void]
2219 **
2220 ** @release 1.0.0
2221 ** @@
2222 ******************************************************************************/
2223
seqWriteStaden(AjPSeqout outseq)2224 static void seqWriteStaden(AjPSeqout outseq)
2225 {
2226 static SeqPSeqFormat sf = NULL;
2227
2228 ajFmtPrintF(outseq->File, "<%S---->\n", outseq->Name);
2229 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
2230
2231 sf->width = 60;
2232 seqWriteSeq(outseq, sf);
2233 seqFormatDel(&sf);
2234
2235 return;
2236 }
2237
2238
2239
2240
2241 /* @funcstatic seqWriteText ***************************************************
2242 **
2243 ** Writes a sequence in plain Text format.
2244 **
2245 ** @param [u] outseq [AjPSeqout] Sequence output object.
2246 ** @return [void]
2247 **
2248 ** @release 1.0.0
2249 ** @@
2250 ******************************************************************************/
2251
seqWriteText(AjPSeqout outseq)2252 static void seqWriteText(AjPSeqout outseq)
2253 {
2254 static SeqPSeqFormat sf = NULL;
2255
2256 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
2257
2258 seqWriteSeq(outseq, sf);
2259 seqFormatDel(&sf);
2260
2261 return;
2262 }
2263
2264
2265
2266
2267 /* @funcstatic seqWriteHennig86 ***********************************************
2268 **
2269 ** Writes a sequence in Hennig86 format.
2270 **
2271 ** @param [u] outseq [AjPSeqout] Sequence output object.
2272 ** @return [void]
2273 **
2274 ** @release 1.0.0
2275 ** @@
2276 ******************************************************************************/
2277
seqWriteHennig86(AjPSeqout outseq)2278 static void seqWriteHennig86(AjPSeqout outseq)
2279 {
2280 ajulong isize;
2281 ajuint ilen = 0;
2282 ajulong i = 0UL;
2283 void** seqs = NULL;
2284 AjPSeq seq;
2285 AjPSeq* seqarr;
2286 ajulong itest;
2287 AjPStr sseq = NULL;
2288 char* cp;
2289
2290 ajDebug("seqWriteHennig86 list size %Lu\n",
2291 ajListGetLength(outseq->Savelist));
2292
2293 isize = ajListGetLength(outseq->Savelist);
2294
2295 if(!isize)
2296 return;
2297
2298 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
2299 ajDebug("ajListToarray listed %Lu items\n", itest);
2300 seqarr = (AjPSeq*) seqs;
2301
2302 for(i=0UL; i < isize; i++)
2303 {
2304 seq = seqarr[i];
2305
2306 if(ilen < ajSeqGetLen(seq))
2307 ilen = ajSeqGetLen(seq);
2308 }
2309
2310 ajFmtPrintF(outseq->File, /* header text */
2311 "xread\n");
2312
2313 ajFmtPrintF(outseq->File, /* title text */
2314 "' Written by EMBOSS %D '\n", ajTimeRefToday());
2315
2316 ajFmtPrintF(outseq->File, /* length, count */
2317 "%u %Lu\n", ilen, isize);
2318
2319 for(i=0UL; i < isize; i++)
2320 {
2321 /* loop over sequences */
2322 seq = seqarr[i];
2323 ajStrAssignS(&sseq, seq->Seq);
2324
2325 cp = ajStrGetuniquePtr(&sseq);
2326
2327 while(*cp)
2328 {
2329 switch(*cp)
2330 {
2331 case 'A':
2332 case 'a':
2333 *cp = '0';
2334 break;
2335 case 'T':
2336 case 't':
2337 case 'U':
2338 case 'u':
2339 *cp = '1';
2340 break;
2341 case 'G':
2342 case 'g':
2343 *cp = '2';
2344 break;
2345 case 'C':
2346 case 'c':
2347 *cp = '3';
2348 break;
2349 default:
2350 *cp = '?';
2351 break;
2352 }
2353 cp++;
2354 }
2355
2356 ajWritelineNewline(outseq->File, seq->Name);
2357 ajWritelineNewline(outseq->File, sseq);
2358 }
2359
2360 ajWritebinByte(outseq->File, ';');
2361 ajWritebinNewline(outseq->File);
2362
2363 ajStrDel(&sseq);
2364 AJFREE(seqs);
2365
2366 return;
2367 }
2368
2369
2370
2371
2372 /* @funcstatic seqWriteMega ***************************************************
2373 **
2374 ** Writes a sequence in Mega format.
2375 **
2376 ** @param [u] outseq [AjPSeqout] Sequence output object.
2377 ** @return [void]
2378 **
2379 ** @release 1.0.0
2380 ** @@
2381 ******************************************************************************/
2382
seqWriteMega(AjPSeqout outseq)2383 static void seqWriteMega(AjPSeqout outseq)
2384 {
2385 ajulong isize;
2386 ajuint ilen = 0;
2387 ajulong i = 0;
2388 void** seqs = NULL;
2389 const AjPSeq seq;
2390 const AjPSeq seqfirst;
2391 AjPSeq* seqarr;
2392 ajulong itest;
2393 ajuint namewidth;
2394 AjPStr sseq = NULL;
2395 AjPStr sseqfirst = NULL;
2396 ajuint ipos;
2397 ajuint iend;
2398 ajuint wid = 50;
2399 AjBool onedesc = ajTrue;
2400 AjBool onegene = ajTrue;
2401 AjPSeqGene gene = NULL;
2402 AjPStr genestr = NULL;
2403
2404 isize = ajListGetLength(outseq->Savelist);
2405
2406 if(!isize)
2407 return;
2408
2409 /* test for group description only when writing 2+ sequences (a set) */
2410
2411 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
2412 ajDebug("ajListToarray listed %Lu items\n", itest);
2413 seqarr = (AjPSeq*) seqs;
2414
2415 seqfirst = seqarr[0];
2416
2417 namewidth = ajStrGetLen(seqfirst->Name);
2418
2419 if(isize == 1)
2420 {
2421 onegene = ajFalse;
2422 onedesc = ajFalse;
2423 }
2424 else
2425 {
2426 onegene = ajTrue;
2427 onedesc = ajTrue;
2428
2429 ilen = ajSeqGetLen(seqfirst);
2430
2431 if(!ajStrGetLen(seqfirst->Desc))
2432 onedesc = ajFalse;
2433
2434 if(ajListPeek(seqfirst->Genelist, (void*) &gene))
2435 ajStrAssignS(&genestr, gene->Name);
2436 else
2437 onegene = ajFalse;
2438
2439 for(i=1UL; i < isize; i++)
2440 {
2441 seq = seqarr[i];
2442
2443 if(ajStrGetLen(seq->Name) > namewidth)
2444 namewidth = ajStrGetLen(seq->Name);
2445 if(ilen < ajSeqGetLen(seq))
2446 ilen = ajSeqGetLen(seq);
2447 if(onedesc && !ajStrMatchS(seq->Desc, seqfirst->Desc))
2448 onedesc = ajFalse;
2449 if(onegene && ajListPeek(seq->Genelist, (void*) &gene))
2450 {
2451 if(!ajStrMatchS(genestr, gene->Name))
2452 onegene = ajFalse;
2453 }
2454 }
2455 }
2456
2457 if(namewidth > 40)
2458 namewidth = 40;
2459
2460 if(outseq->Count == (ajint) isize)
2461 {
2462 ajFmtPrintF(outseq->File, /* header text */
2463 "#mega\n");
2464 if(onedesc)
2465 ajFmtPrintF(outseq->File, /* dummy title */
2466 "!Title: %S;\n", seqfirst->Desc);
2467 else
2468 ajFmtPrintF(outseq->File, /* dummy title */
2469 "!Title: Written by EMBOSS %D;\n", ajTimeRefToday());
2470
2471 ajFmtPrintF(outseq->File,"!Format\n");
2472 if(ajSeqIsProt(seqfirst))
2473 ajFmtPrintF(outseq->File,
2474 " DataType=Protein DataFormat=Interleaved\n");
2475 else
2476 ajFmtPrintF(outseq->File,
2477 " DataType=Nucleotide DataFormat=Interleaved\n");
2478
2479 /*
2480 ** this is sensible for one set of sequences,
2481 ** but multiple sequence sets cause problems when it appears on later sets
2482 */
2483
2484 /*ajFmtPrintF(outseq->File,
2485 " NSeqs=%Lu NSites=%u\n", isize, ilen);*/
2486
2487 ajFmtPrintF(outseq->File,
2488 " Identical=. Indel=- Missing=?\n");
2489 if(!ajSeqIsProt(seqfirst))
2490 ajFmtPrintF(outseq->File,
2491 " CodeTable=Standard\n");
2492 ajFmtPrintF(outseq->File,
2493 " ;\n\n");
2494 }
2495
2496 ajWritebinNewline(outseq->File);
2497 ajWritebinNewline(outseq->File);
2498
2499 if(onegene)
2500 ajFmtPrintF(outseq->File,
2501 "!Gene=%S;\n", genestr);
2502
2503 for(ipos=1; ipos <= ilen; ipos += wid)
2504 {
2505 /* interleaved */
2506 iend = ipos + wid -1;
2507
2508 if(iend > ilen)
2509 iend = ilen;
2510
2511 ajStrAssignSubS(&sseqfirst, seqfirst->Seq, ipos-1, iend-1);
2512 ajSeqGapS(&sseqfirst, '-');
2513
2514 ajWritebinNewline(outseq->File); /* blank space for comments */
2515
2516 for(i=0; i < isize; i++)
2517 {
2518 /* loop over sequences */
2519 seq = seqarr[i];
2520 ajStrAssignSubS(&sseq, seq->Seq, ipos-1, iend-1);
2521 ajSeqGapS(&sseq, '-');
2522 if(i)
2523 ajStrMaskIdent(&sseq, sseqfirst, '.');
2524 if(!onedesc && ipos == 1 && ajStrGetLen(seq->Desc))
2525 ajFmtPrintF(outseq->File, "[%S]\n", seq->Desc);
2526 ajFmtPrintF(outseq->File, "#%-*.*S %S\n",
2527 namewidth, namewidth, seq->Name, sseq);
2528 }
2529 }
2530
2531 ajWritebinNewline(outseq->File);
2532
2533 ajStrDel(&genestr);
2534 ajStrDel(&sseq);
2535 ajStrDel(&sseqfirst);
2536 AJFREE(seqs);
2537
2538 return;
2539 }
2540
2541
2542
2543
2544 /* @funcstatic seqWriteMeganon ************************************************
2545 **
2546 ** Writes a sequence in Mega non-interleaved format.
2547 **
2548 ** @param [u] outseq [AjPSeqout] Sequence output object.
2549 ** @return [void]
2550 **
2551 ** @release 1.0.0
2552 ** @@
2553 ******************************************************************************/
2554
seqWriteMeganon(AjPSeqout outseq)2555 static void seqWriteMeganon(AjPSeqout outseq)
2556 {
2557 ajulong isize;
2558 ajuint ilen = 0;
2559 ajulong i = 0;
2560 void** seqs = NULL;
2561 const AjPSeq seq;
2562 const AjPSeq seqfirst;
2563 AjPSeq* seqarr;
2564 ajulong itest;
2565 ajuint namewidth;
2566 AjPStr sseq = NULL;
2567 AjPStr sseqfirst = NULL;
2568 AjBool onedesc = ajTrue;
2569 AjBool onegene = ajTrue;
2570 AjPSeqGene gene = NULL;
2571 AjPStr genestr = NULL;
2572
2573 ajDebug("seqWriteMeganon list size %Lu\n",
2574 ajListGetLength(outseq->Savelist));
2575
2576 isize = ajListGetLength(outseq->Savelist);
2577
2578 if(!isize)
2579 return;
2580
2581 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
2582 ajDebug("ajListToarray listed %Lu items\n", itest);
2583 seqarr = (AjPSeq*) seqs;
2584
2585 seqfirst = seqarr[0];
2586
2587 namewidth = ajStrGetLen(seqfirst->Name);
2588
2589 if(isize == 1)
2590 {
2591 onegene = ajFalse;
2592 onedesc = ajFalse;
2593 }
2594 else
2595 {
2596 onegene = ajTrue;
2597 onedesc = ajTrue;
2598
2599 ilen = ajSeqGetLen(seqfirst);
2600
2601 if(!ajStrGetLen(seqfirst->Desc))
2602 onedesc = ajFalse;
2603
2604 if(ajListPeek(seqfirst->Genelist, (void*) &gene))
2605 ajStrAssignS(&genestr, gene->Name);
2606 else
2607 onegene = ajFalse;
2608
2609 for(i=1UL; i < isize; i++)
2610 {
2611 seq = seqarr[i];
2612
2613 if(ajStrGetLen(seq->Name) > namewidth)
2614 namewidth = ajStrGetLen(seq->Name);
2615
2616 if(ilen < ajSeqGetLen(seq))
2617 ilen = ajSeqGetLen(seq);
2618 if(onedesc && !ajStrMatchS(seq->Desc, seqfirst->Desc))
2619 onedesc = ajFalse;
2620 if(onegene && ajListPeek(seq->Genelist, (void*) &gene))
2621 {
2622 if(!ajStrMatchS(genestr, gene->Name))
2623 onegene = ajFalse;
2624 }
2625 }
2626 }
2627
2628 if(namewidth > 40)
2629 namewidth = 40;
2630
2631 for(i=0UL; i < isize; i++)
2632 {
2633 seq = seqarr[i];
2634
2635 if(ilen < ajSeqGetLen(seq))
2636 ilen = ajSeqGetLen(seq);
2637 if(!ajStrMatchS(seq->Desc, seqfirst->Desc))
2638 onedesc = ajFalse;
2639 }
2640
2641 if(outseq->Count == (ajint) isize)
2642 {
2643 ajFmtPrintF(outseq->File, /* header text */
2644 "#mega\n");
2645 if(onedesc)
2646 ajFmtPrintF(outseq->File, /* dummy title */
2647 "!Title: %S;\n", seqfirst->Desc);
2648 else
2649 ajFmtPrintF(outseq->File, /* dummy title */
2650 "!Title: Written by EMBOSS %D;\n", ajTimeRefToday());
2651
2652 ajFmtPrintF(outseq->File,"!Format\n");
2653 if(ajSeqIsProt(seqfirst))
2654 ajFmtPrintF(outseq->File,
2655 " DataType=Protein\n");
2656 else
2657 ajFmtPrintF(outseq->File,
2658 " DataType=Nucleotide\n");
2659 /*ajFmtPrintF(outseq->File,
2660 " NSeqs=%Lu NSites=%u\n", isize, ilen);*/
2661 ajFmtPrintF(outseq->File,
2662 " Identical=. Indel=- Missing=?\n");
2663 if(!ajSeqIsProt(seqfirst))
2664 ajFmtPrintF(outseq->File,
2665 " CodeTable=Standard\n");
2666 ajFmtPrintF(outseq->File,
2667 " ;\n\n");
2668 }
2669
2670 ajStrAssignS(&sseqfirst, seqfirst->Seq);
2671 ajSeqGapS(&sseqfirst, '-');
2672
2673 if(onegene)
2674 ajFmtPrintF(outseq->File,
2675 "!Gene=%S;\n", genestr);
2676
2677 for(i=0; i < isize; i++)
2678 { /* loop over sequences */
2679 seq = seqarr[i];
2680 ajStrAssignS(&sseq, seq->Seq);
2681 ajSeqGapS(&sseq, '-');
2682 if(i)
2683 ajStrMaskIdent(&sseq, sseqfirst, '.');
2684 if(!onedesc && ajStrGetLen(seq->Desc))
2685 ajFmtPrintF(outseq->File, "[%S]\n", seq->Desc);
2686 ajFmtPrintF(outseq->File,
2687 "#%-*.*S\n%S\n",
2688 namewidth, namewidth, seq->Name, sseq);
2689 }
2690
2691 ajStrDel(&genestr);
2692 ajStrDel(&sseq);
2693 ajStrDel(&sseqfirst);
2694 AJFREE(seqs);
2695
2696 return;
2697 }
2698
2699
2700
2701
2702 /* @funcstatic seqWriteNexus **************************************************
2703 **
2704 ** Writes a sequence in Nexus interleaved format.
2705 **
2706 ** @param [u] outseq [AjPSeqout] Sequence output object.
2707 ** @return [void]
2708 **
2709 ** @release 1.0.0
2710 ** @@
2711 ******************************************************************************/
2712
seqWriteNexus(AjPSeqout outseq)2713 static void seqWriteNexus(AjPSeqout outseq)
2714 {
2715 ajulong isize;
2716 ajuint ilen = 0;
2717 ajulong i = 0;
2718 void** seqs = NULL;
2719 AjPSeq seq;
2720 AjPSeq* seqarr;
2721 ajulong itest;
2722 AjPStr sseq = NULL;
2723 ajuint ipos;
2724 ajuint iend;
2725 ajuint wid = 50;
2726
2727 ajDebug("seqWriteNexus list size %Lu\n",
2728 ajListGetLength(outseq->Savelist));
2729
2730 isize = ajListGetLength(outseq->Savelist);
2731
2732 if(!isize)
2733 return;
2734
2735 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
2736 ajDebug("ajListToarray listed %Lu items\n", itest);
2737 seqarr = (AjPSeq*) seqs;
2738
2739 for(i=0UL; i < isize; i++)
2740 {
2741 seq = seqarr[i];
2742
2743 if(ilen < ajSeqGetLen(seq))
2744 ilen = ajSeqGetLen(seq);
2745 }
2746
2747 for(i=0UL; i < isize; i++)
2748 {
2749 seq = seqarr[i];
2750 ajSeqGapLen(seq, '-', '-', ilen); /* need to pad if any are shorter */
2751 }
2752
2753 ajFmtPrintF(outseq->File, /* header text */
2754 "#NEXUS\n");
2755 ajFmtPrintF(outseq->File, /* dummy title */
2756 "[TITLE: Written by EMBOSS %D]\n\n", ajTimeRefToday());
2757 ajFmtPrintF(outseq->File,
2758 "begin data;\n");
2759 ajFmtPrintF(outseq->File, /* count, length */
2760 "dimensions ntax=%Lu nchar=%u;\n", isize, ilen);
2761 ajDebug("seqWriteNexus outseq->Type '%S'\n", outseq->Type);
2762
2763 if(ajStrGetCharFirst(outseq->Type) == 'P')
2764 ajFmtPrintF(outseq->File,
2765 "format interleave datatype=protein missing=X gap=-;\n");
2766 else
2767 ajFmtPrintF(outseq->File,
2768 "format interleave datatype=DNA missing=N gap=-;\n");
2769
2770 ajWritebinNewline(outseq->File);
2771
2772 ajFmtPrintF(outseq->File,
2773 "matrix\n");
2774 for(ipos=1; ipos <= ilen; ipos += wid)
2775 { /* interleaved */
2776 iend = ipos +wid -1;
2777
2778 if(iend > ilen)
2779 iend = ilen;
2780
2781 if(ipos > 1)
2782 ajWritebinNewline(outseq->File);
2783
2784 for(i=0; i < isize; i++)
2785 { /* loop over sequences */
2786 seq = seqarr[i];
2787 ajStrAssignSubS(&sseq, seq->Seq, ipos-1, iend-1);
2788 ajSeqGapS(&sseq, '-');
2789 ajFmtPrintF(outseq->File,
2790 "%-20.20S %S\n",
2791 seq->Name, sseq);
2792 }
2793 }
2794
2795 ajFmtPrintF(outseq->File,
2796 ";\n\n");
2797 ajFmtPrintF(outseq->File,
2798 "end;\n");
2799 ajFmtPrintF(outseq->File,
2800 "begin assumptions;\n");
2801 ajFmtPrintF(outseq->File,
2802 "options deftype=unord;\n");
2803 ajFmtPrintF(outseq->File,
2804 "end;\n");
2805
2806 ajStrDel(&sseq);
2807 AJFREE(seqs);
2808
2809 return;
2810 }
2811
2812
2813
2814
2815 /* @funcstatic seqWriteNexusnon ***********************************************
2816 **
2817 ** Writes a sequence in Nexus non-interleaved format.
2818 **
2819 ** @param [u] outseq [AjPSeqout] Sequence output object.
2820 ** @return [void]
2821 **
2822 ** @release 1.0.0
2823 ** @@
2824 ******************************************************************************/
2825
seqWriteNexusnon(AjPSeqout outseq)2826 static void seqWriteNexusnon(AjPSeqout outseq)
2827 {
2828 ajulong isize;
2829 ajuint ilen = 0;
2830 ajulong i = 0;
2831 void** seqs = NULL;
2832 AjPSeq seq;
2833 AjPSeq* seqarr;
2834 ajulong itest;
2835 AjPStr sseq = NULL;
2836
2837 ajDebug("seqWriteNexusnon list size %Lu\n",
2838 ajListGetLength(outseq->Savelist));
2839
2840 isize = ajListGetLength(outseq->Savelist);
2841
2842 if(!isize)
2843 return;
2844
2845 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
2846 ajDebug("ajListToarray listed %Lu items\n", itest);
2847 seqarr = (AjPSeq*) seqs;
2848
2849 for(i=0UL; i < isize; i++)
2850 {
2851 seq = seqarr[i];
2852
2853 if(ilen < ajSeqGetLen(seq))
2854 ilen = ajSeqGetLen(seq);
2855 }
2856
2857 ajFmtPrintF(outseq->File, /* header text */
2858 "#NEXUS\n");
2859 ajFmtPrintF(outseq->File, /* dummy title */
2860 "[TITLE: Written by EMBOSS %D]\n\n", ajTimeRefToday());
2861 ajFmtPrintF(outseq->File,
2862 "begin data;\n");
2863 ajFmtPrintF(outseq->File, /* count, length */
2864 "dimensions ntax=%Lu nchar=%u;\n", isize, ilen);
2865
2866 if(ajStrGetCharFirst(outseq->Type) == 'P')
2867 ajFmtPrintF(outseq->File,
2868 "format datatype=protein missing=X gap=-;\n");
2869 else
2870 ajFmtPrintF(outseq->File,
2871 "format datatype=DNA missing=N gap=-;\n");
2872
2873 ajWritebinNewline(outseq->File);
2874
2875 ajFmtPrintF(outseq->File,
2876 "matrix\n");
2877
2878 for(i=0UL; i < isize; i++)
2879 {
2880 /* loop over sequences */
2881 seq = seqarr[i];
2882 ajStrAssignS(&sseq, seq->Seq);
2883 ajSeqGapS(&sseq, '-');
2884 ajFmtPrintF(outseq->File,
2885 "%S\n%S\n",
2886 seq->Name, sseq);
2887 }
2888
2889 ajFmtPrintF(outseq->File,
2890 ";\n\n");
2891 ajFmtPrintF(outseq->File,
2892 "end;\n");
2893 ajFmtPrintF(outseq->File,
2894 "begin assumptions;\n");
2895 ajFmtPrintF(outseq->File,
2896 "options deftype=unord;\n");
2897 ajFmtPrintF(outseq->File,
2898 "end;\n");
2899
2900 ajStrDel(&sseq);
2901 AJFREE(seqs);
2902
2903 return;
2904 }
2905
2906
2907
2908
2909 /* @funcstatic seqWriteJackknifer *********************************************
2910 **
2911 ** Writes a sequence in Jackknifer format.
2912 **
2913 ** @param [u] outseq [AjPSeqout] Sequence output object.
2914 ** @return [void]
2915 **
2916 ** @release 1.0.0
2917 ** @@
2918 ******************************************************************************/
2919
seqWriteJackknifer(AjPSeqout outseq)2920 static void seqWriteJackknifer(AjPSeqout outseq)
2921 {
2922 ajulong isize;
2923 ajuint ilen = 0;
2924 ajulong i = 0;
2925 void** seqs = NULL;
2926 AjPSeq seq;
2927 AjPSeq* seqarr;
2928 ajulong itest;
2929 AjPStr sseq = NULL;
2930 ajuint ipos;
2931 ajuint iend;
2932 ajuint wid = 50;
2933 AjPStr tmpid = NULL;
2934
2935 ajDebug("seqWriteJackknifer list size %Lu\n",
2936 ajListGetLength(outseq->Savelist));
2937
2938 isize = ajListGetLength(outseq->Savelist);
2939
2940 if(!isize)
2941 return;
2942
2943 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
2944 ajDebug("ajListToarray listed %Lu items\n", itest);
2945 seqarr = (AjPSeq*) seqs;
2946
2947 for(i=0UL; i < isize; i++)
2948 {
2949 seq = seqarr[i];
2950
2951 if(ilen < ajSeqGetLen(seq))
2952 ilen = ajSeqGetLen(seq);
2953 }
2954
2955 ajFmtPrintF(outseq->File, /* header text */
2956 "' Written by EMBOSS %D \n", ajTimeRefToday());
2957
2958 for(ipos=1; ipos <= ilen; ipos += wid)
2959 { /* interleaved */
2960 iend = ipos +wid -1;
2961
2962 if(iend > ilen)
2963 iend = ilen;
2964
2965 for(i=0; i < isize; i++)
2966 { /* loop over sequences */
2967 seq = seqarr[i];
2968 ajStrAssignSubS(&sseq, seq->Seq, ipos-1, iend-1);
2969 ajSeqGapS(&sseq, '-');
2970 ajFmtPrintS(&tmpid, "(%S)", seq->Name);
2971 ajFmtPrintF(outseq->File,
2972 "%-20.20S %S\n",
2973 tmpid, sseq);
2974 }
2975 }
2976
2977 ajFmtPrintF(outseq->File, ";\n");
2978
2979 ajStrDel(&sseq);
2980 ajStrDel(&tmpid);
2981 AJFREE(seqs);
2982
2983 return;
2984 }
2985
2986
2987
2988
2989 /* @funcstatic seqWriteJackknifernon ******************************************
2990 **
2991 ** Writes a sequence in Jackknifer on-interleaved format.
2992 **
2993 ** @param [u] outseq [AjPSeqout] Sequence output object.
2994 ** @return [void]
2995 **
2996 ** @release 1.0.0
2997 ** @@
2998 ******************************************************************************/
2999
seqWriteJackknifernon(AjPSeqout outseq)3000 static void seqWriteJackknifernon(AjPSeqout outseq)
3001 {
3002 ajulong isize;
3003 ajuint ilen = 0;
3004 ajulong i = 0;
3005 void** seqs = NULL;
3006 AjPSeq seq;
3007 AjPSeq* seqarr;
3008 ajulong itest;
3009 AjPStr sseq = NULL;
3010 ajuint ipos;
3011 ajuint iend;
3012 ajuint wid = 50;
3013 static AjPStr tmpid = NULL;
3014
3015 ajDebug("seqWriteJackknifernon list size %Lu\n",
3016 ajListGetLength(outseq->Savelist));
3017
3018 isize = ajListGetLength(outseq->Savelist);
3019
3020 if(!isize)
3021 return;
3022
3023 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
3024 ajDebug("ajListToarray listed %Lu items\n", itest);
3025 seqarr = (AjPSeq*) seqs;
3026
3027 for(i=0UL; i < isize; i++)
3028 {
3029 seq = seqarr[i];
3030
3031 if(ilen < ajSeqGetLen(seq))
3032 ilen = ajSeqGetLen(seq);
3033 }
3034
3035 ajFmtPrintF(outseq->File, /* header text */
3036 "' Written by EMBOSS %D \n", ajTimeRefToday());
3037
3038 for(i=0; i < isize; i++)
3039 {
3040 /* loop over sequences */
3041 seq = seqarr[i];
3042
3043 for(ipos=1; ipos <= ilen; ipos += wid)
3044 { /* interleaved */
3045 iend = ipos +wid -1;
3046
3047 if(iend > ilen)
3048 iend = ilen;
3049
3050 ajStrAssignSubS(&sseq, seq->Seq, ipos-1, iend-1);
3051 ajSeqGapS(&sseq, '-');
3052
3053 if(ipos == 1)
3054 {
3055 ajFmtPrintS(&tmpid, "(%S)", seq->Name);
3056 ajFmtPrintF(outseq->File,
3057 "%-20.20S %S\n",
3058 tmpid, sseq);
3059 }
3060 else
3061 ajWritelineNewline(outseq->File, sseq);
3062 }
3063 }
3064
3065 ajFmtPrintF(outseq->File, ";\n");
3066
3067 ajStrDel(&sseq);
3068 AJFREE(seqs);
3069
3070 return;
3071 }
3072
3073
3074
3075
3076 /* @funcstatic seqWriteTreecon ************************************************
3077 **
3078 ** Writes a sequence in Treecon format.
3079 **
3080 ** @param [u] outseq [AjPSeqout] Sequence output object.
3081 ** @return [void]
3082 **
3083 ** @release 1.0.0
3084 ** @@
3085 ******************************************************************************/
3086
seqWriteTreecon(AjPSeqout outseq)3087 static void seqWriteTreecon(AjPSeqout outseq)
3088 {
3089 ajulong isize;
3090 ajuint ilen = 0;
3091 ajulong i = 0;
3092 void** seqs = NULL;
3093 AjPSeq seq;
3094 AjPSeq* seqarr;
3095 ajulong itest;
3096 AjPStr sseq = NULL;
3097
3098 ajDebug("seqWriteTreecon list size %Lu\n",
3099 ajListGetLength(outseq->Savelist));
3100
3101 isize = ajListGetLength(outseq->Savelist);
3102
3103 if(!isize)
3104 return;
3105
3106 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
3107 ajDebug("ajListToarray listed %Lu items\n", itest);
3108 seqarr = (AjPSeq*) seqs;
3109
3110 for(i=0UL; i < isize; i++)
3111 {
3112 seq = seqarr[i];
3113
3114 if(ilen < ajSeqGetLen(seq))
3115 ilen = ajSeqGetLen(seq);
3116 }
3117
3118 ajFmtPrintF(outseq->File, /* count */
3119 "%d\n", ilen);
3120
3121 for(i=0; i < isize; i++)
3122 {
3123 /* loop over sequences */
3124 seq = seqarr[i];
3125 ajStrAssignS(&sseq, seq->Seq);
3126 ajSeqGapS(&sseq, '-');
3127 ajFmtPrintF(outseq->File,
3128 "%S\n%S\n",
3129 seq->Name, sseq);
3130 }
3131
3132 ajStrDel(&sseq);
3133 AJFREE(seqs);
3134
3135 return;
3136 }
3137
3138
3139
3140
3141 /* @funcstatic seqWriteClustal ************************************************
3142 **
3143 ** Writes a sequence in Clustal (ALN) format.
3144 **
3145 ** @param [u] outseq [AjPSeqout] Sequence output object.
3146 ** @return [void]
3147 **
3148 ** @release 1.0.0
3149 ** @@
3150 ******************************************************************************/
3151
seqWriteClustal(AjPSeqout outseq)3152 static void seqWriteClustal(AjPSeqout outseq)
3153 {
3154 ajulong isize;
3155 ajuint ilen = 0;
3156 ajulong i = 0;
3157 void** seqs = NULL;
3158 AjPSeq seq;
3159 AjPSeq* seqarr;
3160 ajulong itest;
3161 AjPStr sseq = NULL;
3162 ajuint ipos;
3163 ajuint iend;
3164 ajuint iwidth = 50;
3165
3166 ajDebug("seqWriteClustal list size %Lu\n",
3167 ajListGetLength(outseq->Savelist));
3168
3169
3170 isize = ajListGetLength(outseq->Savelist);
3171
3172 if(!isize)
3173 return;
3174
3175 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
3176 ajDebug("ajListToarray listed %Lu items\n", itest);
3177 seqarr = (AjPSeq*) seqs;
3178
3179 for(i=0UL; i < isize; i++)
3180 {
3181 seq = seqarr[i];
3182
3183 if(ilen < ajSeqGetLen(seq))
3184 ilen = ajSeqGetLen(seq);
3185 }
3186
3187 for(i=0UL; i < isize; i++)
3188 {
3189 seq = seqarr[i];
3190
3191 if(ilen > ajSeqGetLen(seq))
3192 ajSeqGapFill(seq, ilen);
3193 }
3194
3195 ajFmtPrintF(outseq->File,
3196 "CLUSTAL W (1.83) multiple sequence alignment\n");
3197
3198 ajFmtPrintF(outseq->File, "\n\n");
3199
3200 iwidth = 60;
3201
3202 for(ipos=1; ipos <= ilen; ipos += 60)
3203 {
3204 iend = ipos + 60 -1;
3205
3206 if(iend > ilen)
3207 {
3208 iend = ilen;
3209 iwidth = ilen - ipos + 1;
3210 }
3211
3212 for(i=0; i < isize; i++)
3213 {
3214 seq = seqarr[i];
3215 ajStrAssignSubS(&sseq, seq->Seq, ipos-1, iend-1);
3216 ajSeqGapS(&sseq, '-');
3217 /* clustalw no longer uses blocks of 10 - after version 1.4 */
3218 /*ajStrFmtBlock(&sseq, 10);*/
3219 ajFmtPrintF(outseq->File,
3220 "%-15.15S %S\n",
3221 seq->Name, sseq);
3222 }
3223 ajFmtPrintF(outseq->File, /* *. conserved line */
3224 "%-15.15s %*.*s\n", "", iwidth, iwidth, "");
3225 if(iend < ilen)
3226 ajWritebinNewline(outseq->File);
3227 }
3228
3229 ajStrDel(&sseq);
3230 AJFREE(seqs);
3231
3232 return;
3233 }
3234
3235
3236
3237
3238 /* @funcstatic seqWriteSelex **************************************************
3239 **
3240 ** Writes a sequence in Selex format.
3241 **
3242 ** @param [u] outseq [AjPSeqout] Sequence output object.
3243 ** @return [void]
3244 **
3245 ** @release 2.0.1
3246 ** @@
3247 ******************************************************************************/
3248
seqWriteSelex(AjPSeqout outseq)3249 static void seqWriteSelex(AjPSeqout outseq)
3250 {
3251 ajulong n;
3252 ajuint len = 0;
3253 ajulong i = 0;
3254 ajuint j = 0;
3255
3256 AjPSeq seq = NULL;
3257 AjPSeq* seqs = NULL;
3258 ajulong test;
3259 /*
3260 ajuint k = 0;
3261 ajuint namelen = 0;
3262 ajuint v = 0;
3263 AjBool sep = ajFalse;
3264 */
3265 AjPStr rfstr = NULL;
3266 AjPStr csstr = NULL;
3267 AjPStr ssstr = NULL;
3268 const char *p = NULL;
3269 AjPStr *names;
3270 ajuint extra;
3271 ajuint nlen = 0;
3272 ajuint slen = 0;
3273 AjPStr *aseqs = NULL;
3274
3275 ajDebug("seqWriteSelex list size %Lu\n",
3276 ajListGetLength(outseq->Savelist));
3277
3278 rfstr = ajStrNewC("#=RF");
3279 csstr = ajStrNewC("#=CS");
3280 ssstr = ajStrNewC("#=SS");
3281
3282 n = ajListGetLength(outseq->Savelist);
3283
3284 if(!n)
3285 return;
3286
3287 test = ajListToarray(outseq->Savelist, (void***) &seqs);
3288 ajDebug("ajListToarray listed %Lu items\n", test);
3289
3290
3291
3292 for(i=0UL; i < n; ++i)
3293 {
3294 seq = seqs[i];
3295
3296 if(len < ajSeqGetLen(seq))
3297 len = ajSeqGetLen(seq);
3298 }
3299
3300 /*
3301 sdata = seqs[0]->Selexdata;
3302 if(sdata)
3303 {
3304
3305 if(ajStrGetLen(sdata->id))
3306 {
3307 sep=ajTrue;
3308 ajFmtPrintF(outseq->File,"#=ID %S\n",sdata->id);
3309 }
3310
3311 if(ajStrGetLen(sdata->ac))
3312 {
3313 sep=ajTrue;
3314 ajFmtPrintF(outseq->File,"#=AC %S\n",sdata->ac);
3315 }
3316
3317 if(ajStrGetLen(sdata->de))
3318 {
3319 sep=ajTrue;
3320 ajFmtPrintF(outseq->File,"#=DE %S\n",sdata->de);
3321 }
3322
3323 if(sdata->ga[0] || sdata->ga[1])
3324 {
3325 sep=ajTrue;
3326 ajFmtPrintF(outseq->File,"#=GA %.2f %.2f\n",sdata->ga[0],
3327 sdata->ga[1]);
3328 }
3329
3330 if(sdata->tc[0] || sdata->tc[1])
3331 {
3332 sep=ajTrue;
3333 ajFmtPrintF(outseq->File,"#=TC %.2f %.2f\n",sdata->tc[0],
3334 sdata->tc[1]);
3335 }
3336
3337 if(sdata->nc[0] || sdata->nc[1])
3338 {
3339 sep=ajTrue;
3340 ajFmtPrintF(outseq->File,"#=NC %.2f %.2f\n",sdata->nc[0],
3341 sdata->nc[1]);
3342 }
3343
3344 if(ajStrGetLen(sdata->au))
3345 {
3346 sep=ajTrue;
3347 ajFmtPrintF(outseq->File,"#=AU %S\n",sdata->au);
3348 }
3349
3350 if(sep)
3351 ajFmtPrintF(outseq->File,"\n");
3352
3353
3354 v=4;
3355 for(i=0;i<n;++i)
3356 {
3357 v = ajStrGetLen(seqs[i]->Selexdata->sq->name);
3358 namelen = (namelen > v) ? namelen : v;
3359 }
3360
3361 for(i=0UL;i<n;++i)
3362 {
3363 v = namelen - ajStrGetLen(seqs[i]->Selexdata->sq->name);
3364 for(j=0;j<v;++j)
3365 ajStrAppendK(&seqs[i]->Selexdata->sq->name,' ');
3366 }
3367
3368
3369 if(ajStrGetLen(sdata->sq->ac))
3370 for(i=0UL;i<n;++i)
3371 {
3372 qdata = seqs[i]->Selexdata->sq;
3373 ajFmtPrintF(outseq->File,"#=SQ %S %.2f %S %S %d..%d:%d %S\n",
3374 qdata->name,qdata->wt,qdata->source,qdata->ac,
3375 qdata->start,qdata->stop,qdata->len,qdata->de);
3376 }
3377 ajFmtPrintF(outseq->File,"\n");
3378
3379
3380
3381 if(ajStrGetLen(seqs[0]->Selexdata->rf))
3382 {
3383 v = namelen - 4;
3384 for(k=0;k<v;++k)
3385 ajStrAppendK(&rfstr,' ');
3386 }
3387
3388 if(ajStrGetLen(seqs[0]->Selexdata->cs))
3389 {
3390 v = namelen - 4;
3391 for(k=0;k<v;++k)
3392 ajStrAppendK(&csstr,' ');
3393 }
3394 if(ajStrGetLen(seqs[0]->Selexdata->ss))
3395 {
3396 v = namelen - 4;
3397 for(k=0;k<v;++k)
3398 ajStrAppendK(&ssstr,' ');
3399 }
3400
3401
3402
3403 for(i=0UL;i<len;i+=50)
3404 {
3405 if(ajStrGetLen(seqs[0]->Selexdata->rf))
3406 {
3407 p = ajStrGetPtr(seqs[0]->Selexdata->rf);
3408 if(i+50>=len)
3409 ajFmtPrintF(outseq->File,"%S %s\n",rfstr, &p[i]);
3410 else
3411 ajFmtPrintF(outseq->File,"%S %-50.50s\n",rfstr,
3412 &p[i]);
3413 }
3414
3415 if(ajStrGetLen(seqs[0]->Selexdata->cs))
3416 {
3417 p = ajStrGetPtr(seqs[0]->Selexdata->cs);
3418 if(i+50>=len)
3419 ajFmtPrintF(outseq->File,"%S %s\n",csstr,&p[i]);
3420 else
3421 ajFmtPrintF(outseq->File,"%S %-50.50s\n",csstr,
3422 &p[i]);
3423 }
3424
3425
3426 for(j=0;j<n;++j)
3427 {
3428 sdata = seqs[j]->Selexdata;
3429
3430 p = ajStrGetPtr(sdata->str);
3431 if(i+50>=len)
3432 ajFmtPrintF(outseq->File,"%S %s\n",sdata->sq->name,&p[i]);
3433 else
3434 ajFmtPrintF(outseq->File,"%S %-50.50s\n",sdata->sq->name,
3435 &p[i]);
3436
3437 if(ajStrGetLen(seqs[0]->Selexdata->ss))
3438 {
3439 p = ajStrGetPtr(seqs[0]->Selexdata->ss);
3440 if(i+50>=len)
3441 ajFmtPrintF(outseq->File,"%S %s\n",ssstr,&p[i]);
3442 else
3443 ajFmtPrintF(outseq->File,"%S %-50.50s\n",ssstr,
3444 &p[i]);
3445 }
3446
3447 }
3448
3449 if(i+50<len)
3450 ajFmtPrintF(outseq->File,"\n");
3451 }
3452 }
3453 else / * Wasn't originally Selex format * /
3454 {
3455 */
3456
3457 AJCNEW0(aseqs,n);
3458 AJCNEW0(names,n);
3459
3460 for(i=0UL; i < n; ++i)
3461 {
3462 seq = seqs[i];
3463 aseqs[i] = ajStrNew();
3464 names[i] = ajStrNew();
3465 ajStrAssignS(&names[i],seq->Name);
3466
3467 if((len=ajStrGetLen(names[i])) > nlen)
3468 nlen = len;
3469
3470 if((len=ajStrGetLen(seq->Seq)) > slen)
3471 slen = len;
3472
3473 ajStrAssignS(&aseqs[i],seq->Seq);
3474 }
3475
3476 for(i=0UL;i<n;++i)
3477 {
3478 seq = seqs[i];
3479 extra = nlen - ajStrGetLen(names[i]);
3480
3481 for(j=0;j<extra;++j)
3482 ajStrAppendK(&names[i],' ');
3483
3484 extra = slen - ajStrGetLen(seq->Seq);
3485
3486 for(j=0;j<extra;++j)
3487 ajStrAppendK(&aseqs[i],' ');
3488
3489 ajFmtPrintF(outseq->File,"#=SQ %S %.2f - - 0..0:0 ",
3490 names[i],seq->Weight);
3491 if(ajStrGetLen(seq->Desc))
3492 ajWritelineNewline(outseq->File, seq->Desc);
3493 else
3494 ajFmtPrintF(outseq->File,"-\n");
3495 }
3496
3497 ajWritebinNewline(outseq->File);
3498
3499
3500 for(i=0UL;i<slen;i+=50)
3501 {
3502 for(j=0;j<n;++j)
3503 {
3504 p = ajStrGetPtr(aseqs[j]);
3505
3506 if(i+50>=len)
3507 ajFmtPrintF(outseq->File,"%S %s\n",names[j],&p[i]);
3508 else
3509 ajFmtPrintF(outseq->File,"%S %-50.50s\n",names[j],
3510 &p[i]);
3511 }
3512 if(i+50<len)
3513 ajWritebinNewline(outseq->File);
3514
3515 }
3516
3517 for(i=0UL;i<n;++i)
3518 {
3519 ajStrDel(&names[i]);
3520 ajStrDel(&aseqs[i]);
3521 }
3522 AJFREE(names);
3523 AJFREE(aseqs);
3524
3525 /*
3526 }
3527 */
3528
3529 AJFREE(seqs);
3530
3531 ajStrDel(&rfstr);
3532 ajStrDel(&csstr);
3533 ajStrDel(&ssstr);
3534
3535 return;
3536 }
3537
3538
3539
3540
3541 /* @funcstatic seqWriteMsf ****************************************************
3542 **
3543 ** Writes a sequence in GCG Multiple Sequence File format.
3544 **
3545 ** @param [u] outseq [AjPSeqout] Sequence output object.
3546 ** @return [void]
3547 **
3548 ** @release 1.0.0
3549 ** @@
3550 ******************************************************************************/
3551
seqWriteMsf(AjPSeqout outseq)3552 static void seqWriteMsf(AjPSeqout outseq)
3553 {
3554 ajulong isize;
3555 ajuint ilen = 0;
3556 ajulong i = 0;
3557 void** seqs = NULL;
3558 AjPSeq seq;
3559 AjPSeq* seqarr;
3560 ajuint checktot = 0;
3561 ajuint check;
3562 ajulong itest;
3563 AjPStr sqbeg = NULL;
3564 AjPStr sqend = NULL;
3565 AjPStr sseq = NULL;
3566 ajuint ipos;
3567 ajuint iend;
3568 ajuint igap;
3569 ajuint maxnamelen = 10;
3570
3571 ajDebug("seqWriteMsf list size %Lu\n",
3572 ajListGetLength(outseq->Savelist));
3573
3574 isize = ajListGetLength(outseq->Savelist);
3575
3576 if(!isize)
3577 return;
3578
3579 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
3580
3581 ajDebug("ajListToarray listed %Lu items\n", itest);
3582 seqarr = (AjPSeq*) seqs;
3583 maxnamelen = 10;
3584
3585 for(i=0UL; i < isize; i++)
3586 {
3587 seq = seqarr[i];
3588
3589 if(ilen < ajSeqGetLen(seq))
3590 ilen = ajSeqGetLen(seq);
3591
3592 if (ajStrGetLen(seq->Name) > maxnamelen)
3593 maxnamelen = ajStrGetLen(seq->Name);
3594 }
3595
3596 for(i=0UL; i < isize; i++)
3597 {
3598 seq = seqarr[i];
3599 ajSeqGapLen(seq, '.', '~', ilen); /* need to pad if any are shorter */
3600 check = ajSeqCalcCheckgcg(seq);
3601 ajDebug(" '%S' len: %d checksum: %d\n",
3602 ajSeqGetNameS(seq), ajSeqGetLen(seq), check);
3603 checktot += check;
3604 checktot = checktot % 10000;
3605 }
3606
3607 ajDebug("checksum %d\n", checktot);
3608 ajDebug("outseq->Type '%S'\n", outseq->Type);
3609
3610 if(!ajStrGetLen(outseq->Type))
3611 {
3612 ajSeqType(seqarr[0]);
3613 ajStrAssignEmptyS(&outseq->Type, seqarr[0]->Type);
3614 }
3615 ajDebug("outseq->Type '%S'\n", outseq->Type);
3616
3617 if(ajStrGetCharFirst(outseq->Type) == 'P')
3618 {
3619 ajFmtPrintF(outseq->File, "!!AA_MULTIPLE_ALIGNMENT 1.0\n\n");
3620 ajFmtPrintF(outseq->File,
3621 " %F MSF: %d Type: P %D CompCheck: %4d ..\n\n",
3622 outseq->File, ilen, ajTimeRefToday(), checktot);
3623 }
3624 else
3625 {
3626 ajFmtPrintF(outseq->File, "!!NA_MULTIPLE_ALIGNMENT 1.0\n\n");
3627 ajFmtPrintF(outseq->File,
3628 " %F MSF: %d Type: N %D CompCheck: %4d ..\n\n",
3629 outseq->File, ilen, ajTimeRefToday(), checktot);
3630 }
3631
3632 for(i=0UL; i < isize; i++)
3633 {
3634 seq = seqarr[i];
3635 check = ajSeqCalcCheckgcg(seq);
3636 ajFmtPrintF(outseq->File,
3637 " Name: %-*S Len: %d Check: %4d Weight: %.2f\n",
3638 maxnamelen, seq->Name, ajStrGetLen(seq->Seq),
3639 check, seq->Weight);
3640 }
3641
3642 ajFmtPrintF(outseq->File, "\n//\n\n");
3643
3644 for(ipos=1; ipos <= ilen; ipos += 50)
3645 {
3646 iend = ipos + 50 -1;
3647 if(iend > ilen)
3648 iend = ilen;
3649 ajFmtPrintS(&sqbeg, "%d", ipos);
3650 ajFmtPrintS(&sqend, "%d", iend);
3651
3652 if(iend == ilen)
3653 {
3654 igap = iend - ipos - ajStrGetLen(sqbeg);
3655 ajDebug("sqbeg: %S sqend: %S ipos: %d iend: %d igap: %d len: %d\n",
3656 sqbeg, sqend, ipos, iend, igap, ajStrGetLen(sqend));
3657
3658 if(igap >= ajStrGetLen(sqend))
3659 ajFmtPrintF(outseq->File,
3660 "%*s %S %*S\n", maxnamelen, " ", sqbeg, igap, sqend);
3661 else
3662 ajFmtPrintF(outseq->File, " %S\n", sqbeg);
3663 }
3664 else
3665 ajFmtPrintF(outseq->File, " %-25S%25S\n",
3666 sqbeg, sqend);
3667
3668 for(i=0UL; i < isize; i++)
3669 {
3670 seq = seqarr[i];
3671 check = ajSeqCalcCheckgcg(seq);
3672 ajStrAssignSubS(&sseq, seq->Seq, ipos-1, iend-1);
3673 ajFmtPrintF(outseq->File,
3674 "%-*S %S\n",
3675 maxnamelen, seq->Name, sseq);
3676 }
3677 ajWritebinNewline(outseq->File);
3678 }
3679
3680
3681 /* AJB: Shouldn't this be left to ajSeqoutDel? */
3682 while(ajListPop(outseq->Savelist,(void **)&seq))
3683 ajSeqDel(&seq);
3684 ajListFree(&outseq->Savelist);
3685
3686 ajStrDel(&sqbeg);
3687 ajStrDel(&sqend);
3688 ajStrDel(&sseq);
3689 AJFREE(seqs);
3690
3691 return;
3692 }
3693
3694
3695
3696
3697 /* @funcstatic seqWriteCodata *************************************************
3698 **
3699 ** Writes a sequence in Codata format.
3700 **
3701 ** @param [u] outseq [AjPSeqout] Sequence output object.
3702 ** @return [void]
3703 **
3704 ** @release 1.0.0
3705 ** @@
3706 ******************************************************************************/
3707
seqWriteCodata(AjPSeqout outseq)3708 static void seqWriteCodata(AjPSeqout outseq)
3709 {
3710
3711 static SeqPSeqFormat sf = NULL;
3712 ajuint j;
3713
3714 ajFmtPrintF(outseq->File, "ENTRY %S \n", outseq->Name);
3715
3716 if(ajStrGetLen(outseq->Desc))
3717 ajFmtPrintF(outseq->File, "TITLE %S, %d bases\n",
3718 outseq->Desc, ajStrGetLen(outseq->Seq));
3719
3720 if(ajStrGetLen(outseq->Acc))
3721 ajFmtPrintF(outseq->File, "ACCESSION %S\n",
3722 outseq->Acc);
3723 ajFmtPrintF(outseq->File, "SEQUENCE \n");
3724
3725 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
3726 sf->numwidth = 7;
3727 sf->width = 30;
3728 sf->numleft = ajTrue;
3729 sf->spacer = seqSpaceAll;
3730 strcpy(sf->endstr, "\n///");
3731
3732 for(j = 0; j <= sf->numwidth; j++)
3733 ajFmtPrintF(outseq->File, " ");
3734
3735 for(j = 5; j <= sf->width; j+=5)
3736 ajFmtPrintF(outseq->File, "%10d", j);
3737
3738 ajWritebinNewline(outseq->File);
3739
3740 seqWriteSeq(outseq, sf);
3741 seqFormatDel(&sf);
3742
3743 return;
3744 }
3745
3746
3747
3748
3749 /* @funcstatic seqWriteNbrf ***************************************************
3750 **
3751 ** Writes a sequence in NBRF format.
3752 **
3753 ** @param [u] outseq [AjPSeqout] Sequence output object.
3754 ** @return [void]
3755 **
3756 ** @release 1.0.0
3757 ** @@
3758 ******************************************************************************/
3759
seqWriteNbrf(AjPSeqout outseq)3760 static void seqWriteNbrf(AjPSeqout outseq)
3761 {
3762 static SeqPSeqFormat sf = NULL;
3763
3764 if(!outseq->Type)
3765 ajFmtPrintF(outseq->File, ">D1;%S\n", outseq->Name);
3766 else if(ajStrGetCharFirst(outseq->Type) == 'P')
3767 ajFmtPrintF(outseq->File, ">P1;%S\n", outseq->Name);
3768 else
3769 ajFmtPrintF(outseq->File, ">D1;%S\n", outseq->Name);
3770
3771 ajFmtPrintF(outseq->File, "%S, %d bases\n",
3772 outseq->Desc, ajStrGetLen(outseq->Seq));
3773
3774 if(seqoutUfoLocal(outseq))
3775 {
3776 ajFeattabOutDel(&outseq->Ftquery);
3777 outseq->Ftquery = ajFeattabOutNewCSF("pir", outseq->Name,
3778 ajStrGetPtr(outseq->Type),
3779 outseq->File);
3780
3781 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
3782 ajWarn("seqWriteNbrf features output failed UFO: '%S'",
3783 outseq->Ufo);
3784 }
3785
3786 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
3787 sf->spacer = 11;
3788 strcpy(sf->endstr, "*\n");
3789 seqWriteSeq(outseq, sf);
3790 seqFormatDel(&sf);
3791
3792 return;
3793 }
3794
3795
3796
3797
3798 /* @funcstatic seqWriteExperiment *********************************************
3799 **
3800 ** Writes a sequence in Staden experiment format.
3801 **
3802 ** @param [u] outseq [AjPSeqout] Sequence output object.
3803 ** @return [void]
3804 **
3805 ** @release 3.0.0
3806 ** @@
3807 ******************************************************************************/
3808
seqWriteExperiment(AjPSeqout outseq)3809 static void seqWriteExperiment(AjPSeqout outseq)
3810 {
3811 static SeqPSeqFormat sf = NULL;
3812 ajuint b[5];
3813 AjIList it;
3814 AjPStr cur;
3815 ajuint ilen;
3816 ajuint i;
3817 ajuint j;
3818 ajuint jend;
3819
3820 if(ajStrGetCharFirst(outseq->Type) == 'P')
3821 {
3822 seqWriteSwiss(outseq);
3823
3824 return;
3825 }
3826
3827 ajFmtPrintF(outseq->File,
3828 "ID %-10S standard; DNA; UNC; %d BP.\n",
3829 outseq->Name, ajStrGetLen(outseq->Seq));
3830
3831 if(ajListGetLength(outseq->Acclist))
3832 {
3833 ilen=0;
3834 it = ajListIterNewread(outseq->Acclist);
3835
3836 while((cur = (AjPStr) ajListIterGet(it)))
3837 {
3838 if(ilen + ajStrGetLen(cur) > 79)
3839 {
3840 ajFmtPrintF(outseq->File, ";\n");
3841 ilen = 0;
3842 }
3843
3844 if(ilen == 0)
3845 {
3846 ajFmtPrintF(outseq->File, "AC ");
3847 ilen = 6;
3848 }
3849 else
3850 {
3851 ajFmtPrintF(outseq->File, "; ");
3852 ilen += 2;
3853 }
3854
3855 ajWriteline(outseq->File, cur);
3856 ilen += ajStrGetLen(cur);
3857
3858 }
3859
3860 ajListIterDel(&it) ;
3861 ajFmtPrintF(outseq->File, ";\n");
3862 }
3863
3864 if(ajStrGetLen(outseq->Sv))
3865 ajFmtPrintF(outseq->File, "SV %S\n", outseq->Sv);
3866
3867 /* no need to bother with outseq->Gi because Staden doesn't use it */
3868
3869 if(ajStrGetLen(outseq->Desc))
3870 ajFmtPrintF(outseq->File, "EX %S\n", outseq->Desc);
3871
3872 if(ajListGetLength(outseq->Keylist))
3873 {
3874 ilen=0;
3875 it = ajListIterNewread(outseq->Keylist);
3876
3877 while((cur = (AjPStr) ajListIterGet(it)))
3878 {
3879 if(ilen+ajStrGetLen(cur) >= 79)
3880 {
3881 ajFmtPrintF(outseq->File, ";\n");
3882 ilen = 0;
3883 }
3884
3885 if(ilen == 0)
3886 {
3887 ajFmtPrintF(outseq->File, "KW ");
3888 ilen = 6;
3889 }
3890 else
3891 {
3892 ajFmtPrintF(outseq->File, "; ");
3893 ilen += 2;
3894 }
3895
3896 ajWriteline(outseq->File, cur);
3897 ilen += ajStrGetLen(cur);
3898 }
3899
3900 ajListIterDel(&it) ;
3901 ajFmtPrintF(outseq->File, ".\n");
3902 }
3903
3904 if(ajStrGetLen(outseq->Tax))
3905 ajFmtPrintF(outseq->File, "OS %S\n", outseq->Tax);
3906
3907 if(ajListGetLength(outseq->Taxlist))
3908 {
3909 ilen=0;
3910 it = ajListIterNewread(outseq->Taxlist);
3911
3912 while((cur = (AjPStr) ajListIterGet(it)))
3913 {
3914 if(ilen+ajStrGetLen(cur) >= 79)
3915 {
3916 ajFmtPrintF(outseq->File, ";\n");
3917 ilen = 0;
3918 }
3919
3920 if(ilen == 0)
3921 {
3922 ajFmtPrintF(outseq->File, "OC ");
3923 ilen = 6;
3924 }
3925 else
3926 {
3927 ajFmtPrintF(outseq->File, "; ");
3928 ilen += 2;
3929 }
3930
3931 ajWriteline(outseq->File, cur);
3932 ilen += ajStrGetLen(cur);
3933 }
3934
3935 ajListIterDel(&it) ;
3936 ajFmtPrintF(outseq->File, ".\n");
3937 }
3938
3939 if(seqoutUfoLocal(outseq))
3940 {
3941 ajFeattabOutDel(&outseq->Ftquery);
3942 outseq->Ftquery = ajFeattabOutNewCSF("embl", outseq->Name,
3943 ajStrGetPtr(outseq->Type),
3944 outseq->File);
3945 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
3946 ajWarn("seqWriteEmbl features output failed UFO: '%S'",
3947 outseq->Ufo);
3948 }
3949
3950
3951 if(outseq->Accuracy)
3952 {
3953 ilen = ajStrGetLen(outseq->Seq);
3954
3955 for(i=0; i<ilen;i+=20)
3956 {
3957 ajFmtPrintF(outseq->File, "AV ");
3958 jend = i+20;
3959
3960 if(jend > ilen)
3961 jend = ilen;
3962
3963 for(j=i;j<jend;j++)
3964 ajFmtPrintF(outseq->File, " %2d", (ajint) outseq->Accuracy[j]);
3965
3966 ajWritebinNewline(outseq->File);
3967 }
3968 }
3969
3970 ajSeqoutGetBasecount(outseq, b);
3971 ajFmtPrintF(outseq->File,
3972 "SQ Sequence %d BP; %d A; %d C; %d G; %d T; %d other;\n",
3973 ajStrGetLen(outseq->Seq), b[0], b[1], b[2], b[3], b[4]);
3974
3975 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
3976 strcpy(sf->endstr, "\n//");
3977 sf->tab = 4;
3978 sf->spacer = 11;
3979 sf->width = 60;
3980 sf->numright = ajTrue;
3981 sf->numwidth = 9;
3982 sf->numjust = ajTrue;
3983
3984 seqWriteSeq(outseq, sf);
3985 seqFormatDel(&sf);
3986
3987 return;
3988 }
3989
3990
3991
3992
3993 /* @funcstatic seqWriteEmbl ***************************************************
3994 **
3995 ** Writes a sequence in EMBL format.
3996 **
3997 ** @param [u] outseq [AjPSeqout] Sequence output object.
3998 ** @return [void]
3999 **
4000 ** @release 1.0.0
4001 ** @@
4002 ******************************************************************************/
4003
seqWriteEmbl(AjPSeqout outseq)4004 static void seqWriteEmbl(AjPSeqout outseq)
4005 {
4006 static SeqPSeqFormat sf = NULL;
4007 ajuint b[5];
4008 AjIList it;
4009 AjPStr cur;
4010 ajuint ilen;
4011 AjPStr tmpstr = NULL;
4012 const AjPStr tmpline = NULL;
4013
4014 if(ajStrGetCharFirst(outseq->Type) == 'P')
4015 {
4016 seqWriteSwiss(outseq);
4017
4018 return;
4019 }
4020
4021 ajFmtPrintF(outseq->File,
4022 "ID %-10S standard; DNA; UNC; %d BP.\n",
4023 outseq->Name, ajStrGetLen(outseq->Seq));
4024
4025 if(ajListGetLength(outseq->Acclist))
4026 {
4027 ilen=0;
4028 it = ajListIterNewread(outseq->Acclist);
4029
4030 while((cur = (AjPStr) ajListIterGet(it)))
4031 {
4032 if(ilen + ajStrGetLen(cur) > 79)
4033 {
4034 ajFmtPrintF(outseq->File, ";\n");
4035 ilen = 0;
4036 }
4037
4038 if(ilen == 0)
4039 {
4040 ajFmtPrintF(outseq->File, "AC ");
4041 ilen = 6;
4042 }
4043 else
4044 {
4045 ajFmtPrintF(outseq->File, "; ");
4046 ilen += 2;
4047 }
4048
4049 ajWriteline(outseq->File, cur);
4050 ilen += ajStrGetLen(cur);
4051
4052 }
4053
4054 ajListIterDel(&it) ;
4055 ajFmtPrintF(outseq->File, ";\n");
4056 }
4057
4058 if(ajStrGetLen(outseq->Sv))
4059 ajFmtPrintF(outseq->File, "SV %S\n", outseq->Sv);
4060
4061 /* no need to bother with outseq->Gi because EMBL doesn't use it */
4062
4063 if(ajStrGetLen(outseq->Desc))
4064 {
4065 ajStrAssignS(&tmpstr, outseq->Desc);
4066 ajStrFmtWrap(&tmpstr, 75);
4067 tmpline = ajStrParseC(tmpstr, "\n");
4068
4069 while (tmpline)
4070 {
4071 ajFmtPrintF(outseq->File, "DE %S\n", tmpline);
4072 tmpline = ajStrParseC(NULL, "\n");
4073 }
4074 }
4075
4076 if(ajListGetLength(outseq->Keylist))
4077 {
4078 ilen=0;
4079 it = ajListIterNewread(outseq->Keylist);
4080
4081 while((cur = (AjPStr) ajListIterGet(it)))
4082 {
4083 if(ilen+ajStrGetLen(cur) >= 79)
4084 {
4085 ajFmtPrintF(outseq->File, ";\n");
4086 ilen = 0;
4087 }
4088
4089 if(ilen == 0)
4090 {
4091 ajFmtPrintF(outseq->File, "KW ");
4092 ilen = 6;
4093 }
4094 else
4095 {
4096 ajFmtPrintF(outseq->File, "; ");
4097 ilen += 2;
4098 }
4099
4100 ajWriteline(outseq->File, cur);
4101 ilen += ajStrGetLen(cur);
4102 }
4103
4104 ajListIterDel(&it) ;
4105 ajFmtPrintF(outseq->File, ".\n");
4106 }
4107
4108 if(ajStrGetLen(outseq->Tax))
4109 {
4110 if(ajStrGetLen(outseq->Taxcommon))
4111 ajFmtPrintF(outseq->File, "OS %S (%S)\n",
4112 outseq->Tax, outseq->Taxcommon);
4113 else
4114 ajFmtPrintF(outseq->File, "OS %S\n", outseq->Tax);
4115 }
4116
4117 if(ajListGetLength(outseq->Taxlist) > 1)
4118 {
4119 ilen=0;
4120 it = ajListIterNewread(outseq->Taxlist);
4121
4122 while((cur = (AjPStr) ajListIterGet(it)))
4123 {
4124 if(ilen+ajStrGetLen(cur) >= 79)
4125 {
4126 ajFmtPrintF(outseq->File, ";\n");
4127 ilen = 0;
4128 }
4129
4130 if(ilen == 0)
4131 {
4132 ajFmtPrintF(outseq->File, "OC ");
4133 ilen = 6;
4134 }
4135 else
4136 {
4137 ajFmtPrintF(outseq->File, "; ");
4138 ilen += 2;
4139 }
4140
4141 ajWriteline(outseq->File, cur);
4142 ilen += ajStrGetLen(cur);
4143 }
4144
4145 ajListIterDel(&it) ;
4146 ajFmtPrintF(outseq->File, ".\n");
4147 }
4148
4149 if(seqoutUfoLocal(outseq))
4150 {
4151 ajFeattabOutDel(&outseq->Ftquery);
4152 outseq->Ftquery = ajFeattabOutNewCSF("embl", outseq->Name,
4153 ajStrGetPtr(outseq->Type),
4154 outseq->File);
4155 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
4156 ajWarn("seqWriteEmbl features output failed UFO: '%S'",
4157 outseq->Ufo);
4158 }
4159
4160 ajSeqoutGetBasecount(outseq, b);
4161 ajFmtPrintF(outseq->File,
4162 "SQ Sequence %d BP; %d A; %d C; %d G; %d T; %d other;\n",
4163 ajStrGetLen(outseq->Seq), b[0], b[1], b[2], b[3], b[4]);
4164
4165 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
4166 strcpy(sf->endstr, "\n//");
4167 sf->tab = 4;
4168 sf->spacer = 11;
4169 sf->width = 60;
4170 sf->numright = ajTrue;
4171 sf->numwidth = 9;
4172 sf->numjust = ajTrue;
4173
4174 seqWriteSeq(outseq, sf);
4175 seqFormatDel(&sf);
4176
4177 ajStrDel(&tmpstr);
4178
4179 return;
4180 }
4181
4182
4183
4184
4185 /* @funcstatic seqWriteEmblnew ************************************************
4186 **
4187 ** Writes a sequence in new EMBL format, introduced in EMBL release 87.
4188 **
4189 ** @param [u] outseq [AjPSeqout] Sequence output object.
4190 ** @return [void]
4191 **
4192 ** @release 4.0.0
4193 ** @@
4194 ******************************************************************************/
4195
seqWriteEmblnew(AjPSeqout outseq)4196 static void seqWriteEmblnew(AjPSeqout outseq)
4197 {
4198 static SeqPSeqFormat sf = NULL;
4199 ajuint b[5];
4200 AjIList it;
4201 const AjPStr cur;
4202 ajuint ilen;
4203 ajlong ifind;
4204 AjPStr idstr = NULL;
4205 AjPStr svstr = NULL;
4206 const AjPStr cmtstr = NULL; /* from list - do not delete */
4207 AjPStr tmpstr = NULL;
4208 const AjPSeqRef seqref = NULL;
4209 const AjPSeqXref xref = NULL;
4210 const AjPStr tmpline = NULL;
4211
4212 if(ajStrGetCharFirst(outseq->Type) == 'P')
4213 {
4214 seqWriteSwiss(outseq);
4215
4216 return;
4217 }
4218
4219 if(ajStrGetLen(outseq->Sv))
4220 {
4221 ajStrAssignS(&svstr, outseq->Sv);
4222 ifind = ajStrFindC(svstr, ".");
4223
4224 if(ifind >= 0)
4225 ajStrCutStart(&svstr, (size_t) (ifind+1));
4226 }
4227 else
4228 ajStrAssignC(&svstr, "1");
4229
4230 if(ajStrGetLen(outseq->Acc))
4231 ajStrAssignS(&idstr, outseq->Acc);
4232 else
4233 ajStrAssignS(&idstr, outseq->Name);
4234
4235 ajFmtPrintF(outseq->File,
4236 "ID %S; SV %S; %s;",
4237 idstr, svstr, outseq->Circular? "circular" : "linear");
4238
4239 ajFmtPrintF(outseq->File, " %s;",ajSeqmolGetEmbl(outseq->Molecule));
4240
4241 if(ajStrGetLen(outseq->Class))
4242 ajFmtPrintF(outseq->File, " %S;",outseq->Class);
4243 else
4244 ajFmtPrintF(outseq->File, " STD;");
4245
4246 if(ajStrGetLen(outseq->Division))
4247 ajFmtPrintF(outseq->File, " %S;",outseq->Division);
4248 else
4249 ajFmtPrintF(outseq->File, " UNC;");
4250
4251 ajFmtPrintF(outseq->File,
4252 " %d BP.\nXX\n",
4253 ajStrGetLen(outseq->Seq));
4254 ajStrDel(&svstr);
4255
4256 if(ajListGetLength(outseq->Acclist))
4257 {
4258 ilen=0;
4259 it = ajListIterNewread(outseq->Acclist);
4260
4261 while((cur = (AjPStr) ajListIterGet(it)))
4262 {
4263 if(ilen + ajStrGetLen(cur) > 79)
4264 {
4265 ajFmtPrintF(outseq->File, ";\n");
4266 ilen = 0;
4267 }
4268
4269 if(ilen == 0)
4270 {
4271 ajFmtPrintF(outseq->File, "AC ");
4272 ilen = 6;
4273 }
4274 else
4275 {
4276 ajFmtPrintF(outseq->File, "; ");
4277 ilen += 2;
4278 }
4279
4280 ajWriteline(outseq->File, cur);
4281 ilen += ajStrGetLen(cur);
4282
4283 }
4284
4285 ajListIterDel(&it) ;
4286 ajFmtPrintF(outseq->File, ";\nXX\n");
4287 }
4288
4289 /* no SV line in the new format - see the ID line */
4290 /*
4291 if(ajStrGetLen(outseq->Sv))
4292 ajFmtPrintF(outseq->File, "SV %S\n", outseq->Sv);
4293 */
4294
4295 /* no need to bother with outseq->Gi because EMBL doesn't use it */
4296
4297
4298 if(ajSeqdateExists(outseq->Date))
4299 {
4300 if(outseq->Date->CreDate)
4301 ajFmtPrintF(outseq->File,
4302 "DT %D (Rel. %S, Created)\n",
4303 outseq->Date->CreDate, outseq->Date->CreRel);
4304 else if (outseq->Date->ModDate)
4305 ajFmtPrintF(outseq->File,
4306 "DT %D (Rel. %S, Created)\n",
4307 outseq->Date->ModDate, outseq->Date->ModRel);
4308
4309 if(outseq->Date->ModDate)
4310 ajFmtPrintF(outseq->File,
4311 "DT %D (Rel. %S, Last updated, Version %S)\n",
4312 outseq->Date->ModDate, outseq->Date->ModRel,
4313 outseq->Date->ModVer);
4314 ajFmtPrintF(outseq->File, "XX\n");
4315 }
4316
4317 if(ajStrGetLen(outseq->Desc))
4318 {
4319 ajStrAssignS(&tmpstr, outseq->Desc);
4320 ajStrFmtWrap(&tmpstr, 75);
4321 tmpline = ajStrParseC(tmpstr, "\n");
4322
4323 while (tmpline)
4324 {
4325 ajFmtPrintF(outseq->File, "DE %S\n", tmpline);
4326 tmpline = ajStrParseC(NULL, "\n");
4327 }
4328
4329 ajFmtPrintF(outseq->File, "XX\n");
4330 }
4331
4332 if(ajListGetLength(outseq->Keylist))
4333 {
4334 ilen=0;
4335 it = ajListIterNewread(outseq->Keylist);
4336
4337 while((cur = (AjPStr) ajListIterGet(it)))
4338 {
4339 if(ilen+ajStrGetLen(cur) >= 79)
4340 {
4341 ajFmtPrintF(outseq->File, ";\n");
4342 ilen = 0;
4343 }
4344
4345 if(ilen == 0)
4346 {
4347 ajFmtPrintF(outseq->File, "KW ");
4348 ilen = 6;
4349 }
4350 else
4351 {
4352 ajFmtPrintF(outseq->File, "; ");
4353 ilen += 2;
4354 }
4355
4356 ajWriteline(outseq->File, cur);
4357 ilen += ajStrGetLen(cur);
4358 }
4359
4360 ajListIterDel(&it) ;
4361 ajFmtPrintF(outseq->File, ".\nXX\n");
4362 }
4363
4364 if(ajStrGetLen(outseq->Tax))
4365 {
4366 if(ajStrGetLen(outseq->Taxcommon))
4367 ajFmtPrintF(outseq->File, "OS %S (%S)\n",
4368 outseq->Tax, outseq->Taxcommon);
4369 else
4370 ajFmtPrintF(outseq->File, "OS %S\n", outseq->Tax);
4371 }
4372
4373 if(ajListGetLength(outseq->Taxlist) > 1)
4374 {
4375 ilen=0;
4376 it = ajListIterNewread(outseq->Taxlist);
4377
4378 while((cur = (AjPStr) ajListIterGet(it)))
4379 {
4380 if(ilen+ajStrGetLen(cur) >= 79)
4381 {
4382 ajFmtPrintF(outseq->File, ";\n");
4383 ilen = 0;
4384 }
4385
4386 if(ilen == 0)
4387 {
4388 ajFmtPrintF(outseq->File, "OC ");
4389 ilen = 6;
4390 }
4391 else
4392 {
4393 ajFmtPrintF(outseq->File, "; ");
4394 ilen += 2;
4395 }
4396
4397 ajWriteline(outseq->File, cur);
4398 ilen += ajStrGetLen(cur);
4399 }
4400
4401 ajListIterDel(&it) ;
4402 ajFmtPrintF(outseq->File, ".\n");
4403 }
4404
4405 if(ajStrGetLen(outseq->Organelle))
4406 ajFmtPrintF(outseq->File, "OG %S\n", outseq->Organelle);
4407
4408 if(ajStrGetLen(outseq->Tax) ||
4409 ajStrGetLen(outseq->Organelle) ||
4410 ajListGetLength(outseq->Taxlist) > 1)
4411 ajFmtPrintF(outseq->File, "XX\n");
4412
4413 if(ajListGetLength(outseq->Reflist))
4414 {
4415 it = ajListIterNewread(outseq->Reflist);
4416
4417 while ((seqref = (const AjPSeqRef) ajListIterGet(it)))
4418 {
4419 ajFmtPrintF(outseq->File, "RN [%u]\n", seqref->Number);
4420
4421 if(ajStrGetLen(seqref->Comment))
4422 {
4423 ajStrAssignS(&tmpstr, seqref->Comment);
4424 ajStrFmtWrap(&tmpstr, 75);
4425 tmpline = ajStrParseC(tmpstr, "\n");
4426
4427 while (tmpline)
4428 {
4429 ajFmtPrintF(outseq->File, "RC %S\n", tmpline);
4430 tmpline = ajStrParseC(NULL, "\n");
4431 }
4432 }
4433
4434 if(ajStrGetLen(seqref->Position))
4435 {
4436 ajStrAssignS(&tmpstr, seqref->Position);
4437 ajStrFmtWrap(&tmpstr, 75);
4438 tmpline = ajStrParseC(tmpstr, "\n");
4439
4440 while (tmpline)
4441 {
4442 ajFmtPrintF(outseq->File, "RP %S\n", tmpline);
4443 tmpline = ajStrParseC(NULL, "\n");
4444 }
4445 }
4446
4447 if(ajStrGetLen(seqref->Xref))
4448 {
4449 ajStrAssignS(&tmpstr, seqref->Xref);
4450 ajStrAppendK(&tmpstr, '.');
4451 ajStrFmtWrap(&tmpstr, 75);
4452 tmpline = ajStrParseC(tmpstr, "\n");
4453
4454 while (tmpline)
4455 {
4456 ajFmtPrintF(outseq->File, "RX %S\n", tmpline);
4457 tmpline = ajStrParseC(NULL, "\n");
4458 }
4459 }
4460
4461 if(ajStrGetLen(seqref->Groupname))
4462 {
4463 ajStrAssignS(&tmpstr, seqref->Groupname);
4464 ajStrFmtWrap(&tmpstr, 75);
4465 tmpline = ajStrParseC(tmpstr, "\n");
4466
4467 while (tmpline)
4468 {
4469 ajFmtPrintF(outseq->File, "RG %S\n", tmpline);
4470 tmpline = ajStrParseC(NULL, "\n");
4471 }
4472 }
4473
4474 if(ajStrGetLen(seqref->Authors))
4475 {
4476 ajStrAssignS(&tmpstr, seqref->Authors);
4477 ajStrAppendK(&tmpstr, ';');
4478 ajStrFmtWrapAt(&tmpstr, 75, ',');
4479 tmpline = ajStrParseC(tmpstr, "\n");
4480
4481 while (tmpline)
4482 {
4483 ajFmtPrintF(outseq->File, "RA %S\n", tmpline);
4484 tmpline = ajStrParseC(NULL, "\n");
4485 }
4486 }
4487
4488 if(ajStrGetLen(seqref->Title))
4489 {
4490 ajStrAssignS(&tmpstr, seqref->Title);
4491 ajStrInsertC(&tmpstr, 0, "\"");
4492 ajStrAppendC(&tmpstr, "\";");
4493 ajStrFmtWrap(&tmpstr, 75);
4494 tmpline = ajStrParseC(tmpstr, "\n");
4495
4496 while (tmpline)
4497 {
4498 ajFmtPrintF(outseq->File, "RT %S\n", tmpline);
4499 tmpline = ajStrParseC(NULL, "\n");
4500 }
4501 }
4502 else
4503 ajFmtPrintF(outseq->File, "RT ;\n");
4504
4505 if(ajStrGetLen(seqref->Location))
4506 {
4507 ajStrAssignS(&tmpstr, seqref->Location);
4508 ajStrAppendK(&tmpstr, '.');
4509 ajStrFmtWrap(&tmpstr, 75);
4510 tmpline = ajStrParseC(tmpstr, "\n");
4511
4512 while (tmpline)
4513 {
4514 ajFmtPrintF(outseq->File, "RL %S\n", tmpline);
4515 tmpline = ajStrParseC(NULL, "\n");
4516 }
4517 }
4518
4519 ajFmtPrintF(outseq->File, "XX\n");
4520 }
4521
4522 ajListIterDel(&it);
4523 }
4524
4525 if(ajListGetLength(outseq->Xreflist))
4526 {
4527 it = ajListIterNewread(outseq->Xreflist);
4528
4529 while ((xref = (const AjPSeqXref) ajListIterGet(it)))
4530 {
4531 if(xref->Type == XREF_DR)
4532 {
4533 if(ajStrGetLen(xref->Quatid))
4534 {
4535 ajFmtPrintF(outseq->File, "DR %S; %S; %S; %S; %S.\n",
4536 xref->Db, xref->Id, xref->Secid,
4537 xref->Terid, xref->Quatid);
4538 }
4539 else if(ajStrGetLen(xref->Terid))
4540 {
4541 ajFmtPrintF(outseq->File, "DR %S; %S; %S; %S.\n",
4542 xref->Db, xref->Id, xref->Secid, xref->Terid);
4543 }
4544 else if(ajStrGetLen(xref->Secid))
4545 {
4546 ajFmtPrintF(outseq->File, "DR %S; %S; %S.\n",
4547 xref->Db, xref->Id, xref->Secid);
4548 }
4549 else
4550 {
4551 ajFmtPrintF(outseq->File, "DR %S; %S.\n",
4552 xref->Db, xref->Id);
4553 }
4554 }
4555 }
4556
4557 ajListIterDel(&it);
4558 ajFmtPrintF(outseq->File, "XX\n");
4559 }
4560
4561 if(ajListGetLength(outseq->Cmtlist))
4562 {
4563 it = ajListIterNewread(outseq->Cmtlist);
4564
4565 while ((cmtstr = (const AjPStr) ajListIterGet(it)))
4566 {
4567 ajStrAssignS(&tmpstr, cmtstr);
4568 ajStrFmtWrapAt(&tmpstr, 75, ',');
4569 tmpline = ajStrParseC(tmpstr, "\n");
4570
4571 while (tmpline)
4572 {
4573 if(ajStrMatchC(tmpline, " "))
4574 ajFmtPrintF(outseq->File, "CC \n");
4575 else
4576 ajFmtPrintF(outseq->File, "CC %S\n", tmpline);
4577
4578 tmpline = ajStrParseC(NULL, "\n");
4579 }
4580
4581 ajFmtPrintF(outseq->File, "XX\n");
4582 }
4583
4584 ajListIterDel(&it);
4585 }
4586
4587 if(seqoutUfoLocal(outseq))
4588 {
4589 ajFeattabOutDel(&outseq->Ftquery);
4590 outseq->Ftquery = ajFeattabOutNewCSF("embl", outseq->Name,
4591 ajStrGetPtr(outseq->Type),
4592 outseq->File);
4593 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
4594 ajWarn("seqWriteEmbl features output failed UFO: '%S'",
4595 outseq->Ufo);
4596 ajFmtPrintF(outseq->File, "XX\n");
4597 }
4598
4599 ajSeqoutGetBasecount(outseq, b);
4600 ajFmtPrintF(outseq->File,
4601 "SQ Sequence %d BP; %d A; %d C; %d G; %d T; %d other;\n",
4602 ajStrGetLen(outseq->Seq), b[0], b[1], b[2], b[3], b[4]);
4603
4604 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
4605 strcpy(sf->endstr, "\n//");
4606 sf->tab = 4;
4607 sf->spacer = 11;
4608 sf->width = 60;
4609 sf->numright = ajTrue;
4610 sf->numwidth = 9;
4611 sf->numjust = ajTrue;
4612
4613 seqWriteSeq(outseq, sf);
4614 seqFormatDel(&sf);
4615 ajStrDel(&tmpstr);
4616 ajStrDel(&idstr);
4617
4618 return;
4619 }
4620
4621
4622
4623
4624 /* @funcstatic seqWriteSwiss **************************************************
4625 **
4626 ** Writes a sequence in SWISSPROT format.
4627 **
4628 ** @param [u] outseq [AjPSeqout] Sequence output object.
4629 ** @return [void]
4630 **
4631 ** @release 1.0.0
4632 ** @@
4633 ******************************************************************************/
4634
seqWriteSwiss(AjPSeqout outseq)4635 static void seqWriteSwiss(AjPSeqout outseq)
4636 {
4637 static SeqPSeqFormat sf = NULL;
4638 ajuint mw;
4639 /* ajuint crc; old 32-bit crc */
4640 unsigned long long crc;
4641 AjIList it;
4642 AjPStr cur;
4643 ajuint ilen;
4644 AjPStr tmpstr = NULL;
4645 const AjPStr tmpline = NULL;
4646
4647 if(ajStrGetCharFirst(outseq->Type) == 'N')
4648 {
4649 seqWriteEmbl(outseq);
4650
4651 return;
4652 }
4653
4654 ajFmtPrintF(outseq->File,
4655 "ID %-10S STANDARD; PRT; %5d AA.\n",
4656 outseq->Name, ajStrGetLen(outseq->Seq));
4657
4658 if(ajListGetLength(outseq->Acclist))
4659 {
4660 ilen = 0;
4661 it = ajListIterNewread(outseq->Acclist);
4662
4663 while((cur = (AjPStr) ajListIterGet(it)))
4664 {
4665 if(ilen + ajStrGetLen(cur) > 79)
4666 {
4667 ajFmtPrintF(outseq->File, ";\n");
4668 ilen = 0;
4669 }
4670
4671 if(ilen == 0)
4672 {
4673 ajFmtPrintF(outseq->File, "AC ");
4674 ilen = 6;
4675 }
4676 else
4677 {
4678 ajFmtPrintF(outseq->File, "; ");
4679 ilen += 2;
4680 }
4681
4682 ajWriteline(outseq->File, cur);
4683 ilen += ajStrGetLen(cur);
4684 }
4685
4686 ajListIterDel(&it) ;
4687 ajFmtPrintF(outseq->File, ";\n");
4688 }
4689
4690 if(ajStrGetLen(outseq->Desc))
4691 ajFmtPrintF(outseq->File, "DE %S\n", outseq->Desc);
4692
4693 if(ajStrGetLen(outseq->Tax))
4694 {
4695 ajStrAssignS(&tmpstr, outseq->Tax);
4696
4697 if(ajStrGetLen(outseq->Taxcommon))
4698 {
4699 ajStrAppendC(&tmpstr, " (");
4700 ajStrAppendS(&tmpstr, outseq->Taxcommon);
4701 ajStrAppendK(&tmpstr, ')');
4702 }
4703
4704 ajStrAppendK(&tmpstr, '.');
4705 ajStrFmtWrap(&tmpstr, 75);
4706 tmpline = ajStrParseC(tmpstr, "\n");
4707
4708 while (tmpline)
4709 {
4710 ajFmtPrintF(outseq->File, "OS %S\n", tmpline);
4711 tmpline = ajStrParseC(NULL, "\n");
4712 }
4713 }
4714
4715 if(ajListGetLength(outseq->Taxlist) > 1)
4716 {
4717 ilen = 0;
4718 it = ajListIterNewread(outseq->Taxlist);
4719
4720 while((cur = (AjPStr) ajListIterGet(it)))
4721 {
4722 if(ilen+ajStrGetLen(cur) >= 79)
4723 {
4724 ajFmtPrintF(outseq->File, ";\n");
4725 ilen = 0;
4726 }
4727
4728 if(ilen == 0)
4729 {
4730 ajFmtPrintF(outseq->File, "OC ");
4731 ilen = 6;
4732 }
4733 else
4734 {
4735 ajFmtPrintF(outseq->File, "; ");
4736 ilen += 2;
4737 }
4738
4739 ajWriteline(outseq->File, cur);
4740 ilen += ajStrGetLen(cur);
4741 }
4742
4743 ajListIterDel(&it) ;
4744 ajFmtPrintF(outseq->File, ".\n");
4745 }
4746
4747 if(ajListGetLength(outseq->Keylist))
4748 {
4749 ilen = 0;
4750 it = ajListIterNewread(outseq->Keylist);
4751
4752 while((cur = (AjPStr) ajListIterGet(it)))
4753 {
4754 if(ilen+ajStrGetLen(cur) >= 79)
4755 {
4756 ajFmtPrintF(outseq->File, ";\n");
4757 ilen = 0;
4758 }
4759
4760 if(ilen == 0)
4761 {
4762 ajFmtPrintF(outseq->File, "KW ");
4763 ilen = 6;
4764 }
4765 else
4766 {
4767 ajFmtPrintF(outseq->File, "; ");
4768 ilen += 2;
4769 }
4770
4771 ajWriteline(outseq->File, cur);
4772 ilen += ajStrGetLen(cur);
4773 }
4774
4775 ajListIterDel(&it) ;
4776 ajFmtPrintF(outseq->File, ".\n");
4777 }
4778
4779 if(seqoutUfoLocal(outseq))
4780 {
4781 ajFeattabOutDel(&outseq->Ftquery);
4782 outseq->Ftquery = ajFeattabOutNewCSF("swiss", outseq->Name,
4783 ajStrGetPtr(outseq->Type),
4784 outseq->File);
4785 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
4786 ajWarn("seqWriteSwiss features output failed UFO: '%S'",
4787 outseq->Ufo);
4788 }
4789
4790 crc = ajMathCrc64(outseq->Seq);
4791 mw = (ajint) (0.5+ajSeqstrCalcMolwt(outseq->Seq));
4792
4793 ajFmtPrintF(outseq->File,
4794 "SQ SEQUENCE %5d AA; %6d MW; %08X",
4795 ajStrGetLen(outseq->Seq), mw, (crc>>32)&0xffffffff);
4796 ajFmtPrintF(outseq->File,
4797 "%08X CRC64;\n",crc&0xffffffff);
4798
4799 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
4800 strcpy(sf->endstr, "\n//");
4801 sf->tab = 4;
4802 sf->spacer = 11;
4803 sf->width = 60;
4804
4805 seqWriteSeq(outseq, sf);
4806 seqFormatDel(&sf);
4807
4808 return;
4809 }
4810
4811
4812
4813
4814 /* @funcstatic seqWriteSwissnew ***********************************************
4815 **
4816 ** Writes a sequence in SWISSPROT/UNIPROT format, revised in September 2006
4817 **
4818 ** @param [u] outseq [AjPSeqout] Sequence output object.
4819 ** @return [void]
4820 **
4821 ** @release 4.0.0
4822 ** @@
4823 ******************************************************************************/
4824
seqWriteSwissnew(AjPSeqout outseq)4825 static void seqWriteSwissnew(AjPSeqout outseq)
4826 {
4827 static SeqPSeqFormat sf = NULL;
4828 ajuint mw;
4829 /* ajuint crc; old 32-bit crc */
4830 unsigned long long crc;
4831 AjIList it;
4832 AjIList itb;
4833 AjIList itc;
4834 AjPStr cur;
4835 ajuint ilen;
4836 const AjPStr cmtstr = NULL; /* from list - do not delete */
4837 AjPStr tmpstr = NULL;
4838 const AjPSeqRef seqref = NULL;
4839 const AjPSeqXref xref = NULL;
4840 const AjPSeqGene seqgene = NULL;
4841 const AjPSeqDesc desc = NULL;
4842 const AjPStr tmpline = NULL;
4843 const AjPSeqSubdesc subdesc = NULL;
4844 const char* altnames="AltName:";
4845 const char* altspace=" ";
4846 const char* alttext;
4847
4848 if(ajStrGetCharFirst(outseq->Type) == 'N')
4849 {
4850 seqWriteEmbl(outseq);
4851
4852 return;
4853 }
4854
4855 if(ajStrFindAnyK(outseq->Name, '_') > 0)
4856 ajFmtPrintF(outseq->File,
4857 "ID %-19S Reviewed; %8d AA.\n",
4858 outseq->Name, ajStrGetLen(outseq->Seq));
4859 else
4860 ajFmtPrintF(outseq->File,
4861 "ID %-19S Unreviewed; %8d AA.\n",
4862 outseq->Name, ajStrGetLen(outseq->Seq));
4863
4864 if(ajListGetLength(outseq->Acclist))
4865 {
4866 ilen = 0;
4867 it = ajListIterNewread(outseq->Acclist);
4868
4869 while((cur = (AjPStr) ajListIterGet(it)))
4870 {
4871 if(ilen + ajStrGetLen(cur) > 73)
4872 {
4873 ajFmtPrintF(outseq->File, ";\n");
4874 ilen = 0;
4875 }
4876
4877 if(ilen == 0)
4878 {
4879 ajFmtPrintF(outseq->File, "AC ");
4880 ilen = 6;
4881 }
4882 else
4883 {
4884 ajFmtPrintF(outseq->File, "; ");
4885 ilen += 2;
4886 }
4887
4888 ajWriteline(outseq->File, cur);
4889 ilen += ajStrGetLen(cur);
4890
4891 }
4892
4893 ajListIterDel(&it) ;
4894 ajFmtPrintF(outseq->File, ";\n");
4895 }
4896
4897 if(outseq->Date)
4898 {
4899 if(outseq->Date->CreDate)
4900 ajFmtPrintF(outseq->File,
4901 "DT %D, integrated into %S.\n",
4902 outseq->Date->CreDate, outseq->Date->CreVer);
4903 if (outseq->Date->SeqDate)
4904 ajFmtPrintF(outseq->File,
4905 "DT %D, sequence version %S.\n",
4906 outseq->Date->SeqDate, outseq->Date->SeqVer);
4907
4908 if(outseq->Date->ModDate)
4909 ajFmtPrintF(outseq->File,
4910 "DT %D, entry version %S.\n",
4911 outseq->Date->ModDate, outseq->Date->ModVer);
4912 }
4913
4914 if(outseq->Fulldesc && ajStrGetLen(outseq->Fulldesc->Name))
4915 {
4916 ajFmtPrintF(outseq->File,
4917 "DE RecName: Full=%S;\n", outseq->Fulldesc->Name);
4918
4919 it = ajListIterNewread(outseq->Fulldesc->Short);
4920
4921 while((cur = (AjPStr) ajListIterGet(it)))
4922 ajFmtPrintF(outseq->File,
4923 "DE Short=%S;\n", cur);
4924 ajListIterDel(&it);
4925
4926 it = ajListIterNewread(outseq->Fulldesc->EC);
4927
4928 while((cur = (AjPStr) ajListIterGet(it)))
4929 ajFmtPrintF(outseq->File,
4930 "DE EC=%S;\n", cur);
4931 ajListIterDel(&it);
4932
4933 it = ajListIterNewread(outseq->Fulldesc->AltNames);
4934
4935 while((subdesc = (AjPSeqSubdesc) ajListIterGet(it)))
4936 {
4937 alttext = altnames;
4938
4939 if(ajStrGetLen(subdesc->Name))
4940 {
4941 ajFmtPrintF(outseq->File,
4942 "DE %s Full=%S;\n", alttext, subdesc->Name);
4943 alttext = altspace;
4944 }
4945
4946 itb = ajListIterNewread(subdesc->Inn);
4947 while((cur = (AjPStr) ajListIterGet(itb)))
4948 {
4949 ajFmtPrintF(outseq->File,
4950 "DE %s INN=%S;\n", alttext, cur);
4951 alttext = altspace;
4952 }
4953
4954 ajListIterDel(&itb);
4955
4956 itb = ajListIterNewread(subdesc->Short);
4957
4958 while((cur = (AjPStr) ajListIterGet(itb)))
4959 ajFmtPrintF(outseq->File,
4960 "DE Short=%S;\n", cur);
4961
4962 ajListIterDel(&itb);
4963
4964 itb = ajListIterNewread(subdesc->EC);
4965
4966 while((cur = (AjPStr) ajListIterGet(itb)))
4967 ajFmtPrintF(outseq->File,
4968 "DE EC=%S;\n", cur);
4969 ajListIterDel(&itb);
4970
4971 itb = ajListIterNewread(subdesc->Allergen);
4972
4973 while((cur = (AjPStr) ajListIterGet(itb)))
4974 ajFmtPrintF(outseq->File,
4975 "DE Allergen=%S;\n", cur);
4976 ajListIterDel(&itb);
4977
4978 itb = ajListIterNewread(subdesc->Biotech);
4979
4980 while((cur = (AjPStr) ajListIterGet(itb)))
4981 ajFmtPrintF(outseq->File,
4982 "DE Biotech=%S;\n", cur);
4983 ajListIterDel(&itb);
4984
4985 itb = ajListIterNewread(subdesc->Cdantigen);
4986
4987 while((cur = (AjPStr) ajListIterGet(itb)))
4988 ajFmtPrintF(outseq->File,
4989 "DE CD_antigen=%S;\n", cur);
4990 ajListIterDel(&itb);
4991
4992 }
4993
4994 ajListIterDel(&it);
4995
4996 it = ajListIterNewread(outseq->Fulldesc->SubNames);
4997
4998 while((subdesc = (AjPSeqSubdesc) ajListIterGet(it)))
4999 {
5000 ajFmtPrintF(outseq->File,
5001 "DE SubName: Full=%S;\n", subdesc->Name);
5002 itb = ajListIterNewread(subdesc->Short);
5003
5004 while((cur = (AjPStr) ajListIterGet(itb)))
5005 ajFmtPrintF(outseq->File,
5006 "DE Short=%S;\n", cur);
5007 ajListIterDel(&itb);
5008 itb = ajListIterNewread(subdesc->EC);
5009
5010 while((cur = (AjPStr) ajListIterGet(itb)))
5011 ajFmtPrintF(outseq->File,
5012 "DE EC=%S;\n", cur);
5013 ajListIterDel(&itb);
5014 }
5015
5016 ajListIterDel(&it);
5017
5018 itc = ajListIterNewread(outseq->Fulldesc->Includes);
5019
5020 while((desc = (AjPSeqDesc) ajListIterGet(itc)))
5021 {
5022 ajFmtPrintF(outseq->File,
5023 "DE Includes:\n");
5024 ajFmtPrintF(outseq->File,
5025 "DE RecName: Full=%S;\n", desc->Name);
5026 it = ajListIterNewread(desc->Short);
5027
5028 while((cur = (AjPStr) ajListIterGet(it)))
5029 ajFmtPrintF(outseq->File,
5030 "DE Short=%S;\n", cur);
5031 ajListIterDel(&it);
5032
5033 it = ajListIterNewread(desc->EC);
5034
5035 while((cur = (AjPStr) ajListIterGet(it)))
5036 ajFmtPrintF(outseq->File,
5037 "DE EC=%S;\n", cur);
5038 ajListIterDel(&it);
5039
5040 it = ajListIterNewread(desc->AltNames);
5041
5042 while((subdesc = (AjPSeqSubdesc) ajListIterGet(it)))
5043 {
5044 alttext = altnames;
5045
5046 if(ajStrGetLen(subdesc->Name))
5047 {
5048 ajFmtPrintF(outseq->File,
5049 "DE %s Full=%S;\n", alttext, subdesc->Name);
5050 alttext = altspace;
5051 }
5052
5053 itb = ajListIterNewread(subdesc->Inn);
5054
5055 while((cur = (AjPStr) ajListIterGet(itb)))
5056 {
5057 ajFmtPrintF(outseq->File,
5058 "DE %s INN=%S;\n", alttext, cur);
5059 alttext = altspace;
5060 }
5061
5062 ajListIterDel(&itb);
5063 itb = ajListIterNewread(subdesc->Short);
5064
5065 while((cur = (AjPStr) ajListIterGet(itb)))
5066 ajFmtPrintF(outseq->File,
5067 "DE Short=%S;\n", cur);
5068 ajListIterDel(&itb);
5069
5070 itb = ajListIterNewread(subdesc->EC);
5071
5072 while((cur = (AjPStr) ajListIterGet(itb)))
5073 ajFmtPrintF(outseq->File,
5074 "DE EC=%S;\n", cur);
5075 ajListIterDel(&itb);
5076
5077 itb = ajListIterNewread(subdesc->Allergen);
5078
5079 while((cur = (AjPStr) ajListIterGet(itb)))
5080 ajFmtPrintF(outseq->File,
5081 "DE Allergen=%S;\n", cur);
5082 ajListIterDel(&itb);
5083
5084 itb = ajListIterNewread(subdesc->Biotech);
5085
5086 while((cur = (AjPStr) ajListIterGet(itb)))
5087 ajFmtPrintF(outseq->File,
5088 "DE Biotech=%S;\n", cur);
5089 ajListIterDel(&itb);
5090
5091 itb = ajListIterNewread(subdesc->Cdantigen);
5092
5093 while((cur = (AjPStr) ajListIterGet(itb)))
5094 ajFmtPrintF(outseq->File,
5095 "DE CD_antigen=%S;\n", cur);
5096 ajListIterDel(&itb);
5097 }
5098
5099 ajListIterDel(&it);
5100
5101 it = ajListIterNewread(desc->SubNames);
5102
5103 while((subdesc = (AjPSeqSubdesc) ajListIterGet(it)))
5104 {
5105 ajFmtPrintF(outseq->File,
5106 "DE SubName: Full=%S;\n", subdesc->Name);
5107 itb = ajListIterNewread(subdesc->Short);
5108
5109 while((cur = (AjPStr) ajListIterGet(itb)))
5110 ajFmtPrintF(outseq->File,
5111 "DE Short=%S;\n", cur);
5112 ajListIterDel(&itb);
5113 itb = ajListIterNewread(subdesc->EC);
5114
5115 while((cur = (AjPStr) ajListIterGet(itb)))
5116 ajFmtPrintF(outseq->File,
5117 "DE EC=%S;\n", cur);
5118 ajListIterDel(&itb);
5119 }
5120
5121 ajListIterDel(&it);
5122
5123 }
5124
5125 ajListIterDel(&itc);
5126
5127 itc = ajListIterNewread(outseq->Fulldesc->Contains);
5128
5129 while((desc = (AjPSeqDesc) ajListIterGet(itc)))
5130 {
5131 ajFmtPrintF(outseq->File,
5132 "DE Contains:\n");
5133 ajFmtPrintF(outseq->File,
5134 "DE RecName: Full=%S;\n", desc->Name);
5135 it = ajListIterNewread(desc->Short);
5136
5137 while((cur = (AjPStr) ajListIterGet(it)))
5138 ajFmtPrintF(outseq->File,
5139 "DE Short=%S;\n", cur);
5140 ajListIterDel(&it);
5141
5142 it = ajListIterNewread(desc->EC);
5143
5144 while((cur = (AjPStr) ajListIterGet(it)))
5145 ajFmtPrintF(outseq->File,
5146 "DE EC=%S;\n", cur);
5147 ajListIterDel(&it);
5148
5149 it = ajListIterNewread(desc->AltNames);
5150
5151 while((subdesc = (AjPSeqSubdesc) ajListIterGet(it)))
5152 {
5153 alttext = altnames;
5154
5155 if(ajStrGetLen(subdesc->Name))
5156 {
5157 ajFmtPrintF(outseq->File,
5158 "DE %s Full=%S;\n", alttext, subdesc->Name);
5159 alttext = altspace;
5160 }
5161
5162 itb = ajListIterNewread(subdesc->Inn);
5163
5164 while((cur = (AjPStr) ajListIterGet(itb)))
5165 {
5166 ajFmtPrintF(outseq->File,
5167 "DE %s INN=%S;\n", alttext, cur);
5168 alttext = altspace;
5169 }
5170
5171 ajListIterDel(&itb);
5172
5173 itb = ajListIterNewread(subdesc->Short);
5174
5175 while((cur = (AjPStr) ajListIterGet(itb)))
5176 ajFmtPrintF(outseq->File,
5177 "DE Short=%S;\n", cur);
5178 ajListIterDel(&itb);
5179
5180 itb = ajListIterNewread(subdesc->EC);
5181
5182 while((cur = (AjPStr) ajListIterGet(itb)))
5183 ajFmtPrintF(outseq->File,
5184 "DE EC=%S;\n", cur);
5185 ajListIterDel(&itb);
5186
5187 itb = ajListIterNewread(subdesc->Allergen);
5188
5189 while((cur = (AjPStr) ajListIterGet(itb)))
5190 ajFmtPrintF(outseq->File,
5191 "DE Allergen=%S;\n", cur);
5192 ajListIterDel(&itb);
5193
5194 itb = ajListIterNewread(subdesc->Biotech);
5195
5196 while((cur = (AjPStr) ajListIterGet(itb)))
5197 ajFmtPrintF(outseq->File,
5198 "DE Biotech=%S;\n", cur);
5199 ajListIterDel(&itb);
5200
5201 itb = ajListIterNewread(subdesc->Cdantigen);
5202
5203 while((cur = (AjPStr) ajListIterGet(itb)))
5204 ajFmtPrintF(outseq->File,
5205 "DE CD_antigen=%S;\n", cur);
5206 ajListIterDel(&itb);
5207 }
5208
5209 ajListIterDel(&it);
5210
5211 it = ajListIterNewread(desc->SubNames);
5212
5213 while((subdesc = (AjPSeqSubdesc) ajListIterGet(it)))
5214 {
5215 ajFmtPrintF(outseq->File,
5216 "DE SubName: Full=%S;\n", subdesc->Name);
5217 itb = ajListIterNewread(subdesc->Short);
5218
5219 while((cur = (AjPStr) ajListIterGet(itb)))
5220 ajFmtPrintF(outseq->File,
5221 "DE Short=%S;\n", cur);
5222
5223 ajListIterDel(&itb);
5224 itb = ajListIterNewread(subdesc->EC);
5225
5226 while((cur = (AjPStr) ajListIterGet(itb)))
5227 ajFmtPrintF(outseq->File,
5228 "DE EC=%S;\n", cur);
5229 ajListIterDel(&itb);
5230 }
5231
5232 ajListIterDel(&it);
5233 }
5234
5235 ajListIterDel(&itc);
5236
5237 if(outseq->Fulldesc->Fragments || outseq->Fulldesc->Precursor)
5238 {
5239 ajFmtPrintF(outseq->File,
5240 "DE Flags:");
5241
5242 if(outseq->Fulldesc->Fragments == 1)
5243 ajFmtPrintF(outseq->File,
5244 " Fragment;");
5245
5246 if(outseq->Fulldesc->Fragments == 2)
5247 ajFmtPrintF(outseq->File,
5248 " Fragments;");
5249
5250 if(outseq->Fulldesc->Precursor)
5251 ajFmtPrintF(outseq->File,
5252 " Precursor;");
5253
5254 ajWritebinNewline(outseq->File);
5255 }
5256
5257
5258 }
5259 else if(ajStrGetLen(outseq->Desc))
5260 ajFmtPrintF(outseq->File, "DE %S\n", outseq->Desc);
5261
5262 if(ajListGetLength(outseq->Genelist))
5263 {
5264 ajStrAssignClear(&tmpstr);
5265 it = ajListIterNewread(outseq->Genelist);
5266
5267 while ((seqgene = (const AjPSeqGene) ajListIterGet(it)))
5268 {
5269 if(ajStrGetLen(tmpstr))
5270 ajFmtPrintF(outseq->File,
5271 "GN and\n");
5272
5273 ajStrAssignClear(&tmpstr);
5274
5275 if(ajStrGetLen(seqgene->Name))
5276 ajFmtPrintAppS(&tmpstr, " Name=%S;", seqgene->Name);
5277
5278 if(ajStrGetLen(seqgene->Synonyms))
5279 ajFmtPrintAppS(&tmpstr, " Synonyms=%S;", seqgene->Synonyms);
5280
5281 if(ajStrGetLen(seqgene->Oln))
5282 ajFmtPrintAppS(&tmpstr, " OrderedLocusNames=%S;", seqgene->Oln);
5283
5284 if(ajStrGetLen(seqgene->Orf))
5285 ajFmtPrintAppS(&tmpstr, " ORFNames=%S;", seqgene->Orf);
5286
5287 if(ajStrGetLen(tmpstr))
5288 ajFmtPrintF(outseq->File,
5289 "GN %S\n", tmpstr);
5290 }
5291
5292 ajListIterDel(&it);
5293 }
5294
5295 if(ajStrGetLen(outseq->Tax))
5296 {
5297 ajStrAssignS(&tmpstr, outseq->Tax);
5298
5299 if(ajStrGetLen(outseq->Taxcommon))
5300 {
5301 ajStrAppendC(&tmpstr, " (");
5302 ajStrAppendS(&tmpstr, outseq->Taxcommon);
5303 ajStrAppendK(&tmpstr, ')');
5304 }
5305
5306 ajStrAppendK(&tmpstr, '.');
5307 ajStrFmtWrap(&tmpstr, 75);
5308 tmpline = ajStrParseC(tmpstr, "\n");
5309
5310 while (tmpline)
5311 {
5312 ajFmtPrintF(outseq->File, "OS %S\n", tmpline);
5313 tmpline = ajStrParseC(NULL, "\n");
5314 }
5315 }
5316
5317 if(ajStrGetLen(outseq->Organelle))
5318 ajFmtPrintF(outseq->File, "OG %S.\n", outseq->Organelle);
5319
5320 if(ajListGetLength(outseq->Taxlist) > 1)
5321 {
5322 ilen = 0;
5323 it = ajListIterNewread(outseq->Taxlist);
5324
5325 while((cur = (AjPStr) ajListIterGet(it)))
5326 {
5327 if(ilen+ajStrGetLen(cur) >= 74)
5328 {
5329 ajFmtPrintF(outseq->File, ";\n");
5330 ilen = 0;
5331 }
5332
5333 if(ilen == 0)
5334 {
5335 ajFmtPrintF(outseq->File, "OC ");
5336 ilen = 6;
5337 }
5338 else
5339 {
5340 ajFmtPrintF(outseq->File, "; ");
5341 ilen += 2;
5342 }
5343 ajWriteline(outseq->File, cur);
5344 ilen += ajStrGetLen(cur);
5345 }
5346
5347 ajListIterDel(&it) ;
5348 ajFmtPrintF(outseq->File, ".\n");
5349 }
5350
5351 if(ajStrGetLen(outseq->Taxid))
5352 ajFmtPrintF(outseq->File, "OX NCBI_TaxID=%S;\n", outseq->Taxid);
5353
5354 if(ajListGetLength(outseq->Reflist))
5355 {
5356 it = ajListIterNewread(outseq->Reflist);
5357
5358 while ((seqref = (const AjPSeqRef) ajListIterGet(it)))
5359 {
5360 ajFmtPrintF(outseq->File, "RN [%u]\n", seqref->Number);
5361
5362 if(ajStrGetLen(seqref->Position))
5363 {
5364 ajStrAssignS(&tmpstr, seqref->Position);
5365 ajStrFmtWrap(&tmpstr, 70);
5366 tmpline = ajStrParseC(tmpstr, "\n");
5367
5368 while (tmpline)
5369 {
5370 ajFmtPrintF(outseq->File, "RP %S\n", tmpline);
5371 tmpline = ajStrParseC(NULL, "\n");
5372 }
5373 }
5374
5375 if(ajStrGetLen(seqref->Comment))
5376 {
5377 ajStrAssignS(&tmpstr, seqref->Comment);
5378 ajStrFmtWrap(&tmpstr, 70);
5379 tmpline = ajStrParseC(tmpstr, "\n");
5380
5381 while (tmpline)
5382 {
5383 ajFmtPrintF(outseq->File, "RC %S\n", tmpline);
5384 tmpline = ajStrParseC(NULL, "\n");
5385 }
5386 }
5387
5388 if(ajStrGetLen(seqref->Xref))
5389 {
5390 ajStrAssignS(&tmpstr, seqref->Xref);
5391 ajStrFmtWrap(&tmpstr, 70);
5392 tmpline = ajStrParseC(tmpstr, "\n");
5393
5394 while (tmpline)
5395 {
5396 ajFmtPrintF(outseq->File, "RX %S\n", tmpline);
5397 tmpline = ajStrParseC(NULL, "\n");
5398 }
5399 }
5400
5401 if(ajStrGetLen(seqref->Groupname))
5402 {
5403 ajStrAssignS(&tmpstr, seqref->Groupname);
5404 ajStrFmtWrap(&tmpstr, 70);
5405 tmpline = ajStrParseC(tmpstr, "\n");
5406
5407 while (tmpline)
5408 {
5409 ajFmtPrintF(outseq->File, "RG %S\n", tmpline);
5410 tmpline = ajStrParseC(NULL, "\n");
5411 }
5412 }
5413
5414 if(ajStrGetLen(seqref->Authors))
5415 {
5416 ajStrAssignS(&tmpstr, seqref->Authors);
5417 ajStrAppendK(&tmpstr, ';');
5418 ajStrFmtWrapAt(&tmpstr, 70, ',');
5419 tmpline = ajStrParseC(tmpstr, "\n");
5420
5421 while (tmpline)
5422 {
5423 ajFmtPrintF(outseq->File, "RA %S\n", tmpline);
5424 tmpline = ajStrParseC(NULL, "\n");
5425 }
5426 }
5427
5428 if(ajStrGetLen(seqref->Title))
5429 {
5430 ajStrAssignS(&tmpstr, seqref->Title);
5431 ajStrInsertC(&tmpstr, 0, "\"");
5432 ajStrAppendC(&tmpstr, "\";");
5433 ajStrFmtWrap(&tmpstr, 70);
5434 tmpline = ajStrParseC(tmpstr, "\n");
5435
5436 while (tmpline)
5437 {
5438 ajFmtPrintF(outseq->File, "RT %S\n", tmpline);
5439 tmpline = ajStrParseC(NULL, "\n");
5440 }
5441 }
5442
5443 if(ajStrGetLen(seqref->Location))
5444 {
5445 ajStrAssignS(&tmpstr, seqref->Location);
5446 ajStrAppendK(&tmpstr, '.');
5447 ajStrFmtWrap(&tmpstr, 70);
5448 tmpline = ajStrParseC(tmpstr, "\n");
5449
5450 while (tmpline)
5451 {
5452 ajFmtPrintF(outseq->File, "RL %S\n", tmpline);
5453 tmpline = ajStrParseC(NULL, "\n");
5454 }
5455 }
5456 }
5457 ajListIterDel(&it);
5458 }
5459
5460 if(ajListGetLength(outseq->Cmtlist))
5461 {
5462 it = ajListIterNewread(outseq->Cmtlist);
5463
5464 while ((cmtstr = (const AjPStr) ajListIterGet(it)))
5465 {
5466 ajStrAssignS(&tmpstr, cmtstr);
5467 tmpline = ajStrParseC(tmpstr, "\n");
5468
5469 while (tmpline)
5470 {
5471 if(ajStrMatchC(tmpline, " "))
5472 ajFmtPrintF(outseq->File, "CC \n");
5473 else
5474 ajFmtPrintF(outseq->File, "CC %S\n", tmpline);
5475
5476 tmpline = ajStrParseC(NULL, "\n");
5477 }
5478 }
5479
5480 ajListIterDel(&it);
5481 }
5482
5483
5484 if(ajListGetLength(outseq->Xreflist))
5485 {
5486 it = ajListIterNewread(outseq->Xreflist);
5487
5488 while ((xref = (const AjPSeqXref) ajListIterGet(it)))
5489 {
5490 if(xref->Type == XREF_DR)
5491 {
5492 if(ajStrGetLen(xref->Quatid))
5493 {
5494 ajFmtPrintF(outseq->File, "DR %S; %S; %S; %S; %S.\n",
5495 xref->Db, xref->Id, xref->Secid,
5496 xref->Terid, xref->Quatid);
5497 }
5498 else if(ajStrGetLen(xref->Terid))
5499 {
5500 ajFmtPrintF(outseq->File, "DR %S; %S; %S; %S.\n",
5501 xref->Db, xref->Id, xref->Secid, xref->Terid);
5502 }
5503 else if(ajStrGetLen(xref->Secid))
5504 {
5505 ajFmtPrintF(outseq->File, "DR %S; %S; %S.\n",
5506 xref->Db, xref->Id, xref->Secid);
5507 }
5508 else
5509 {
5510 ajFmtPrintF(outseq->File, "DR %S; %S.\n",
5511 xref->Db, xref->Id);
5512 }
5513 }
5514 }
5515
5516 ajListIterDel(&it);
5517 }
5518
5519 if(ajStrGetLen(outseq->Evidence))
5520 ajFmtPrintF(outseq->File, "PE %S\n", outseq->Evidence);
5521
5522 if(ajListGetLength(outseq->Keylist))
5523 {
5524 ilen = 0;
5525 it = ajListIterNewread(outseq->Keylist);
5526
5527 while((cur = (AjPStr) ajListIterGet(it)))
5528 {
5529 if(ilen+ajStrGetLen(cur) >= 74)
5530 {
5531 ajFmtPrintF(outseq->File, ";\n");
5532 ilen = 0;
5533 }
5534
5535 if(ilen == 0)
5536 {
5537 ajFmtPrintF(outseq->File, "KW ");
5538 ilen = 6;
5539 }
5540 else
5541 {
5542 ajFmtPrintF(outseq->File, "; ");
5543 ilen += 2;
5544 }
5545
5546 ajWriteline(outseq->File, cur);
5547 ilen += ajStrGetLen(cur);
5548 }
5549
5550 ajListIterDel(&it) ;
5551 ajFmtPrintF(outseq->File, ".\n");
5552 }
5553
5554 if(seqoutUfoLocal(outseq))
5555 {
5556 ajFeattabOutDel(&outseq->Ftquery);
5557 outseq->Ftquery = ajFeattabOutNewCSF("swiss", outseq->Name,
5558 ajStrGetPtr(outseq->Type),
5559 outseq->File);
5560 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
5561 ajWarn("seqWriteSwiss features output failed UFO: '%S'",
5562 outseq->Ufo);
5563 }
5564
5565 crc = ajMathCrc64(outseq->Seq);
5566 mw = (ajint) (0.5+ajSeqstrCalcMolwt(outseq->Seq));
5567
5568 /* old 32-bit crc
5569 ajFmtPrintF(outseq->File,
5570 "SQ SEQUENCE %5d AA; %6d MW; %08X CRC32;\n",
5571 ajStrGetLen(outseq->Seq), mw, crc);
5572 */
5573
5574 ajFmtPrintF(outseq->File,
5575 "SQ SEQUENCE %d AA; %d MW; %08X",
5576 ajStrGetLen(outseq->Seq), mw, (crc>>32)&0xffffffff);
5577 ajFmtPrintF(outseq->File,
5578 "%08X CRC64;\n",crc&0xffffffff);
5579
5580 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
5581 strcpy(sf->endstr, "\n//");
5582 sf->tab = 4;
5583 sf->spacer = 11;
5584 sf->width = 60;
5585
5586 seqWriteSeq(outseq, sf);
5587 seqFormatDel(&sf);
5588
5589 ajStrDel(&tmpstr);
5590
5591 return;
5592 }
5593
5594
5595
5596
5597 /* @funcstatic seqWriteGenbank ************************************************
5598 **
5599 ** Writes a sequence in GENBANK format.
5600 **
5601 ** @param [u] outseq [AjPSeqout] Sequence output object.
5602 ** @return [void]
5603 **
5604 ** @release 1.0.0
5605 ** @@
5606 ******************************************************************************/
5607
seqWriteGenbank(AjPSeqout outseq)5608 static void seqWriteGenbank(AjPSeqout outseq)
5609 {
5610
5611 static SeqPSeqFormat sf = NULL;
5612 /*ajuint b[5];*/ /* was used for BASE COUNT line */
5613 AjPStr ftfmt = NULL;
5614 const AjPStr cmtstr = NULL;
5615 AjPStr tmpstr = NULL;
5616 AjPStr tmpstr2 = NULL;
5617 const AjPStr tmpline = NULL;
5618 const AjPSeqRef seqref = NULL;
5619 AjIList it;
5620 AjPStr cur;
5621 ajuint ilen;
5622 AjBool firstcmt = ajTrue;
5623
5624 if(!ftfmt)
5625 ajStrAssignC(&ftfmt, "genbank");
5626
5627 ajSeqoutTrace(outseq);
5628
5629 ajFmtPrintF(outseq->File, "LOCUS %-17S %10u bp ",
5630 outseq->Name, ajStrGetLen(outseq->Seq));
5631
5632 ajFmtPrintF(outseq->File, " %-7s",
5633 ajSeqmolGetGb(outseq->Molecule));
5634
5635 if(outseq->Circular)
5636 ajFmtPrintF(outseq->File, " %-8s", "circular");
5637 else
5638 ajFmtPrintF(outseq->File, " %-8s", "linear");
5639
5640 if(ajStrGetLen(outseq->Division))
5641 ajFmtPrintF(outseq->File, " %-3s",ajSeqdivGetGb(outseq->Division));
5642 else if(ajStrGetLen(outseq->Class))
5643 ajFmtPrintF(outseq->File, " %-3s",ajSeqclsGetGb(outseq->Class));
5644 else
5645 ajFmtPrintF(outseq->File, " UNC");
5646
5647 if(outseq->Date)
5648 {
5649 if(outseq->Date->ModDate)
5650 ajFmtPrintF(outseq->File, " %D", outseq->Date->ModDate);
5651 else if(outseq->Date->CreDate)
5652 ajFmtPrintF(outseq->File, " %D", outseq->Date->CreDate);
5653 }
5654 else
5655 ajFmtPrintF(outseq->File, " %D", ajTimeRefTodayFmt("dtline"));
5656
5657 ajWritebinNewline(outseq->File);
5658
5659 if(ajStrGetLen(outseq->Desc))
5660 {
5661 ajStrAssignS(&tmpstr, outseq->Desc);
5662
5663 if(ajStrGetCharLast(tmpstr) != '.')
5664 ajStrAppendK(&tmpstr, '.');
5665
5666 ajStrFmtWrap(&tmpstr, 67);
5667 tmpline = ajStrParseC(tmpstr, "\n");
5668 ajFmtPrintF(outseq->File, "DEFINITION %S\n", tmpline);
5669 tmpline = ajStrParseC(NULL, "\n");
5670
5671 while (tmpline)
5672 {
5673 ajFmtPrintF(outseq->File, " %S\n", tmpline);
5674 tmpline = ajStrParseC(NULL, "\n");
5675 }
5676 }
5677
5678 if(ajListGetLength(outseq->Acclist))
5679 {
5680 ilen = 0;
5681 it = ajListIterNewread(outseq->Acclist);
5682 while((cur = (AjPStr) ajListIterGet(it)))
5683 {
5684 if(ilen == 0)
5685 {
5686 ajFmtPrintF(outseq->File, "ACCESSION ");
5687 ilen = 11;
5688 }
5689
5690 if(ilen + ajStrGetLen(cur) > 79)
5691 {
5692 ajFmtPrintF(outseq->File, "\n ");
5693 ilen = 11;
5694 }
5695
5696 if(ilen > 11)
5697 ajFmtPrintF(outseq->File, " ");
5698 ilen += 1;
5699
5700 ajWriteline(outseq->File, cur);
5701 ilen += ajStrGetLen(cur);
5702
5703 }
5704
5705 ajListIterDel(&it);
5706
5707 if(ilen > 0)
5708 ajWritebinNewline(outseq->File);
5709 }
5710
5711 if(ajStrGetLen(outseq->Sv))
5712 {
5713 if(ajStrGetLen(outseq->Gi))
5714 ajFmtPrintF(outseq->File, "VERSION %S GI:%S\n",
5715 outseq->Sv, outseq->Gi);
5716 else
5717 ajFmtPrintF(outseq->File, "VERSION %S\n", outseq->Sv);
5718 }
5719
5720 if(ajListGetLength(outseq->Keylist))
5721 {
5722 ilen = 0;
5723 it = ajListIterNewread(outseq->Keylist);
5724
5725 while((cur = (AjPStr) ajListIterGet(it)))
5726 {
5727 if(ilen == 0)
5728 {
5729 ajFmtPrintF(outseq->File, "KEYWORDS ");
5730 ilen = 11;
5731 }
5732
5733 if(ilen+ajStrGetLen(cur) >= 79)
5734 {
5735 ajFmtPrintF(outseq->File, ";\n ");
5736 ilen = 11;
5737 }
5738
5739 if(ilen > 11)
5740 ajFmtPrintF(outseq->File, "; ");
5741
5742 ilen += 2;
5743
5744 ajWriteline(outseq->File, cur);
5745 ilen += ajStrGetLen(cur);
5746 }
5747
5748 ajListIterDel(&it) ;
5749 ajFmtPrintF(outseq->File, ".\n");
5750 }
5751
5752 if(ajStrGetLen(outseq->Tax))
5753 {
5754 if(ajStrGetLen(outseq->Taxcommon))
5755 ajFmtPrintF(outseq->File, "SOURCE %S (%S)\n",
5756 outseq->Tax, outseq->Taxcommon);
5757 else
5758 ajFmtPrintF(outseq->File, "SOURCE %S\n", outseq->Tax);
5759
5760 ajFmtPrintF(outseq->File, " ORGANISM %S\n", outseq->Tax);
5761
5762 if(ajListGetLength(outseq->Taxlist))
5763 {
5764 ilen = 0;
5765 it = ajListIterNewread(outseq->Taxlist);
5766
5767 while((cur = (AjPStr) ajListIterGet(it)))
5768 {
5769 if(ilen+ajStrGetLen(cur) >= 79)
5770 {
5771 ajFmtPrintF(outseq->File, ";\n");
5772 ilen = 0;
5773 }
5774
5775 if(ilen == 0)
5776 {
5777 ajFmtPrintF(outseq->File, " ");
5778 ilen = 12;
5779 }
5780 else
5781 {
5782 ajFmtPrintF(outseq->File, "; ");
5783 ilen += 2;
5784 }
5785 ajWriteline(outseq->File, cur);
5786 ilen += ajStrGetLen(cur);
5787 }
5788
5789 ajListIterDel(&it) ;
5790 ajFmtPrintF(outseq->File, ".\n");
5791 }
5792 }
5793
5794 if(ajListGetLength(outseq->Reflist))
5795 {
5796 it = ajListIterNewread(outseq->Reflist);
5797
5798 while ((seqref = (const AjPSeqRef) ajListIterGet(it)))
5799 {
5800 ajFmtPrintF(outseq->File, "REFERENCE %u", seqref->Number);
5801
5802 if(ajStrGetLen(seqref->Position))
5803 {
5804 ajStrAssignS(&tmpstr, seqref->Position);
5805 ajStrExchangeCC(&tmpstr, "-", " to ");
5806 ajFmtPrintF(outseq->File, " (bases %S)", tmpstr);
5807 }
5808
5809 ajWritebinNewline(outseq->File);
5810
5811 if(ajStrGetLen(seqref->Authors))
5812 {
5813 ajSeqrefFmtAuthorsGb(seqref, &tmpstr);
5814 ajStrFmtWrapAt(&tmpstr, 68, ',');
5815 tmpline = ajStrParseC(tmpstr, "\n");
5816 ajFmtPrintF(outseq->File, " AUTHORS %S\n", tmpline);
5817 tmpline = ajStrParseC(NULL, "\n");
5818
5819 while (tmpline)
5820 {
5821 ajFmtPrintF(outseq->File, " %S\n", tmpline);
5822 tmpline = ajStrParseC(NULL, "\n");
5823 }
5824 }
5825
5826 ajSeqrefFmtTitleGb(seqref, &tmpstr); /* may set an empty title */
5827
5828 if(ajStrGetLen(tmpstr))
5829 {
5830 ajStrFmtWrap(&tmpstr, 68);
5831 tmpline = ajStrParseC(tmpstr, "\n");
5832 ajFmtPrintF(outseq->File, " TITLE %S\n", tmpline);
5833 tmpline = ajStrParseC(NULL, "\n");
5834
5835 while (tmpline)
5836 {
5837 ajFmtPrintF(outseq->File, " %S\n", tmpline);
5838 tmpline = ajStrParseC(NULL, "\n");
5839 }
5840 }
5841
5842 if(ajStrGetLen(seqref->Location))
5843 {
5844 ajSeqrefFmtLocationGb(seqref, &tmpstr);
5845 ajStrFmtWrap(&tmpstr, 68);
5846 tmpline = ajStrParseC(tmpstr, "\n");
5847 ajFmtPrintF(outseq->File, " JOURNAL %S\n", tmpline);
5848 tmpline = ajStrParseC(NULL, "\n");
5849
5850 while (tmpline)
5851 {
5852 ajFmtPrintF(outseq->File, " %S\n", tmpline);
5853 tmpline = ajStrParseC(NULL, "\n");
5854 }
5855 }
5856
5857 if(ajStrGetLen(seqref->Xref))
5858 {
5859 ajStrAssignS(&tmpstr, seqref->Xref);
5860 ajStrFmtWrap(&tmpstr, 75);
5861 tmpline = ajStrParseC(tmpstr, "\n");
5862
5863 while (tmpline)
5864 {
5865 if(ajStrPrefixC(tmpline, "PUBMED; "))
5866 {
5867 ajStrAssignSubS(&tmpstr2, tmpline, 8, -1);
5868 ajFmtPrintF(outseq->File, " PUBMED %S\n", tmpstr2);
5869 }
5870
5871 tmpline = ajStrParseC(NULL, "\n");
5872 }
5873 }
5874
5875 }
5876
5877 ajListIterDel(&it);
5878 }
5879
5880 if(ajListGetLength(outseq->Cmtlist))
5881 {
5882 it = ajListIterNewread(outseq->Cmtlist);
5883
5884 while ((cmtstr = (const AjPStr) ajListIterGet(it)))
5885 {
5886 ajStrAssignS(&tmpstr, cmtstr);
5887 ajStrFmtWrapAt(&tmpstr, 68, ',');
5888 tmpline = ajStrParseC(tmpstr, "\n");
5889
5890 if(firstcmt)
5891 {
5892 firstcmt = ajFalse;
5893 ajFmtPrintF(outseq->File, "COMMENT %S\n", tmpline);
5894 }
5895 else
5896 {
5897 ajFmtPrintF(outseq->File, " \n");
5898 ajFmtPrintF(outseq->File, " %S\n", tmpline);
5899 }
5900
5901 tmpline = ajStrParseC(NULL, "\n");
5902
5903 while (tmpline)
5904 {
5905 ajFmtPrintF(outseq->File, " %S\n", tmpline);
5906 tmpline = ajStrParseC(NULL, "\n");
5907 }
5908 }
5909
5910 ajListIterDel(&it);
5911 }
5912
5913 if(seqoutUfoLocal(outseq))
5914 {
5915 ajFeattabOutDel(&outseq->Ftquery);
5916 outseq->Ftquery = ajFeattabOutNewSSF(ftfmt, outseq->Name,
5917 ajStrGetPtr(outseq->Type),
5918 outseq->File);
5919 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
5920 ajWarn("seqWriteGenbank features output failed UFO: '%S'",
5921 outseq->Ufo);
5922 }
5923
5924 /* no longer used by GenBank */
5925 /*
5926 ajSeqoutGetBasecount(outseq, b);
5927 if(b[4])
5928 ajFmtPrintF(outseq->File,
5929 "BASE COUNT %6d a %6d c %6d g %6d t %6d others\n",
5930 b[0], b[1], b[2], b[3], b[4]);
5931 else
5932 ajFmtPrintF(outseq->File,
5933 "BASE COUNT %6d a %6d c %6d g %6d t\n",
5934 b[0], b[1], b[2], b[3]);
5935 */
5936
5937 ajFmtPrintF(outseq->File, "ORIGIN\n");
5938
5939 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
5940 strcpy(sf->endstr, "\n//");
5941 sf->tab = 0;
5942 sf->spacer = 11;
5943 sf->width = 60;
5944 sf->numleft = ajTrue;
5945 sf->numwidth = 9;
5946
5947 seqWriteSeq(outseq, sf);
5948 seqFormatDel(&sf);
5949 ajStrDel(&ftfmt);
5950 ajStrDel(&tmpstr);
5951 ajStrDel(&tmpstr2);
5952
5953 return;
5954 }
5955
5956
5957
5958
5959 /* @funcstatic seqWriteGenpept ************************************************
5960 **
5961 ** Writes a sequence in GENPEPT format.
5962 **
5963 ** @param [u] outseq [AjPSeqout] Sequence output object.
5964 ** @return [void]
5965 **
5966 ** @release 6.2.0
5967 ** @@
5968 ******************************************************************************/
5969
seqWriteGenpept(AjPSeqout outseq)5970 static void seqWriteGenpept(AjPSeqout outseq)
5971 {
5972
5973 static SeqPSeqFormat sf = NULL;
5974 /*ajuint b[5];*/ /* was used for BASE COUNT line */
5975 AjPStr ftfmt = NULL;
5976 AjPStr tmpstr = NULL;
5977 AjPStr tmpstr2 = NULL;
5978 const AjPStr cmtstr = NULL;
5979 const AjPStr tmpline = NULL;
5980 const AjPSeqRef seqref = NULL;
5981 AjIList it;
5982 AjPStr cur;
5983 ajuint ilen;
5984 AjBool firstcmt = ajTrue;
5985
5986 if(!ftfmt)
5987 ajStrAssignC(&ftfmt, "genpept");
5988
5989 ajSeqoutTrace(outseq);
5990
5991 ajFmtPrintF(outseq->File, "LOCUS %-17S %10u bp ",
5992 outseq->Name, ajStrGetLen(outseq->Seq));
5993
5994 ajFmtPrintF(outseq->File, " %-7s",
5995 ajSeqmolGetGb(outseq->Molecule));
5996
5997 if(outseq->Circular)
5998 ajFmtPrintF(outseq->File, " %-8s", "circular");
5999 else
6000 ajFmtPrintF(outseq->File, " %-8s", "linear");
6001
6002 if(ajStrGetLen(outseq->Division))
6003 ajFmtPrintF(outseq->File, " %-3s",ajSeqdivGetGb(outseq->Division));
6004 else
6005 ajFmtPrintF(outseq->File, " UNC");
6006
6007 if(outseq->Date)
6008 {
6009 if(outseq->Date->ModDate)
6010 ajFmtPrintF(outseq->File, " %D", outseq->Date->ModDate);
6011 else if(outseq->Date->CreDate)
6012 ajFmtPrintF(outseq->File, " %D", outseq->Date->CreDate);
6013 }
6014 else
6015 ajFmtPrintF(outseq->File, " %D", ajTimeRefTodayFmt("dtline"));
6016
6017 ajWritebinNewline(outseq->File);
6018
6019 if(ajStrGetLen(outseq->Desc))
6020 {
6021 ajStrAssignS(&tmpstr, outseq->Desc);
6022
6023 if(ajStrGetCharLast(tmpstr) != '.')
6024 ajStrAppendK(&tmpstr, '.');
6025
6026 ajStrFmtWrap(&tmpstr, 67);
6027 tmpline = ajStrParseC(tmpstr, "\n");
6028 ajFmtPrintF(outseq->File, "DEFINITION %S\n", tmpline);
6029 tmpline = ajStrParseC(NULL, "\n");
6030
6031 while (tmpline)
6032 {
6033 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6034 tmpline = ajStrParseC(NULL, "\n");
6035 }
6036 }
6037
6038 if(ajListGetLength(outseq->Acclist))
6039 {
6040 ilen = 0;
6041 it = ajListIterNewread(outseq->Acclist);
6042 while((cur = (AjPStr) ajListIterGet(it)))
6043 {
6044 if(ilen == 0)
6045 {
6046 ajFmtPrintF(outseq->File, "ACCESSION ");
6047 ilen = 11;
6048 }
6049
6050 if(ilen + ajStrGetLen(cur) > 79)
6051 {
6052 ajFmtPrintF(outseq->File, "\n ");
6053 ilen = 11;
6054 }
6055
6056 if(ilen > 11)
6057 ajFmtPrintF(outseq->File, " ");
6058 ilen += 1;
6059
6060 ajWriteline(outseq->File, cur);
6061 ilen += ajStrGetLen(cur);
6062
6063 }
6064
6065 ajListIterDel(&it);
6066
6067 if(ilen > 0)
6068 ajWritebinNewline(outseq->File);
6069 }
6070
6071 if(ajStrGetLen(outseq->Sv))
6072 {
6073 if(ajStrGetLen(outseq->Gi))
6074 ajFmtPrintF(outseq->File, "VERSION %S GI:%S\n",
6075 outseq->Sv, outseq->Gi);
6076 else
6077 ajFmtPrintF(outseq->File, "VERSION %S\n", outseq->Sv);
6078 }
6079
6080 if(ajListGetLength(outseq->Keylist))
6081 {
6082 ilen = 0;
6083 it = ajListIterNewread(outseq->Keylist);
6084
6085 while((cur = (AjPStr) ajListIterGet(it)))
6086 {
6087 if(ilen == 0)
6088 {
6089 ajFmtPrintF(outseq->File, "KEYWORDS ");
6090 ilen = 11;
6091 }
6092
6093 if(ilen+ajStrGetLen(cur) >= 79)
6094 {
6095 ajFmtPrintF(outseq->File, ";\n ");
6096 ilen = 11;
6097 }
6098
6099 if(ilen > 11)
6100 ajFmtPrintF(outseq->File, "; ");
6101
6102 ilen += 2;
6103
6104 ajWriteline(outseq->File, cur);
6105 ilen += ajStrGetLen(cur);
6106 }
6107
6108 ajListIterDel(&it) ;
6109 ajFmtPrintF(outseq->File, ".\n");
6110 }
6111
6112 if(ajStrGetLen(outseq->Tax))
6113 {
6114 ajFmtPrintF(outseq->File, "SOURCE %S\n", outseq->Tax);
6115
6116 ajFmtPrintF(outseq->File, " ORGANISM %S\n", outseq->Tax);
6117
6118 if(ajListGetLength(outseq->Taxlist))
6119 {
6120 ilen = 0;
6121 it = ajListIterNewread(outseq->Taxlist);
6122
6123 while((cur = (AjPStr) ajListIterGet(it)))
6124 {
6125 if(ilen+ajStrGetLen(cur) >= 79)
6126 {
6127 ajFmtPrintF(outseq->File, ";\n");
6128 ilen = 0;
6129 }
6130
6131 if(ilen == 0)
6132 {
6133 ajFmtPrintF(outseq->File, " ");
6134 ilen = 12;
6135 }
6136 else
6137 {
6138 ajFmtPrintF(outseq->File, "; ");
6139 ilen += 2;
6140 }
6141 ajWriteline(outseq->File, cur);
6142 ilen += ajStrGetLen(cur);
6143 }
6144
6145 ajListIterDel(&it) ;
6146 ajFmtPrintF(outseq->File, ".\n");
6147 }
6148 }
6149
6150 if(ajListGetLength(outseq->Reflist))
6151 {
6152 it = ajListIterNewread(outseq->Reflist);
6153
6154 while ((seqref = (const AjPSeqRef) ajListIterGet(it)))
6155 {
6156 ajFmtPrintF(outseq->File, "REFERENCE %u", seqref->Number);
6157
6158 if(ajStrGetLen(seqref->Position))
6159 {
6160 ajStrAssignS(&tmpstr, seqref->Position);
6161 ajStrExchangeCC(&tmpstr, "-", " to ");
6162 ajFmtPrintF(outseq->File, " (bases %S)", tmpstr);
6163 }
6164
6165 ajWritebinNewline(outseq->File);
6166
6167 if(ajStrGetLen(seqref->Authors))
6168 {
6169 ajSeqrefFmtAuthorsGb(seqref, &tmpstr);
6170 ajStrFmtWrapAt(&tmpstr, 68, ',');
6171 tmpline = ajStrParseC(tmpstr, "\n");
6172 ajFmtPrintF(outseq->File, " AUTHORS %S\n", tmpline);
6173 tmpline = ajStrParseC(NULL, "\n");
6174
6175 while (tmpline)
6176 {
6177 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6178 tmpline = ajStrParseC(NULL, "\n");
6179 }
6180 }
6181
6182 ajSeqrefFmtTitleGb(seqref, &tmpstr); /* may set an empty title */
6183
6184 if(ajStrGetLen(tmpstr))
6185 {
6186 ajStrFmtWrap(&tmpstr, 68);
6187 tmpline = ajStrParseC(tmpstr, "\n");
6188 ajFmtPrintF(outseq->File, " TITLE %S\n", tmpline);
6189 tmpline = ajStrParseC(NULL, "\n");
6190
6191 while (tmpline)
6192 {
6193 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6194 tmpline = ajStrParseC(NULL, "\n");
6195 }
6196 }
6197
6198 if(ajStrGetLen(seqref->Location))
6199 {
6200 ajSeqrefFmtLocationGb(seqref, &tmpstr);
6201 ajStrFmtWrap(&tmpstr, 68);
6202 tmpline = ajStrParseC(tmpstr, "\n");
6203 ajFmtPrintF(outseq->File, " JOURNAL %S\n", tmpline);
6204 tmpline = ajStrParseC(NULL, "\n");
6205
6206 while (tmpline)
6207 {
6208 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6209 tmpline = ajStrParseC(NULL, "\n");
6210 }
6211 }
6212
6213 if(ajStrGetLen(seqref->Xref))
6214 {
6215 ajStrAssignS(&tmpstr, seqref->Xref);
6216 ajStrFmtWrap(&tmpstr, 75);
6217 tmpline = ajStrParseC(tmpstr, "\n");
6218
6219 while (tmpline)
6220 {
6221 if(ajStrPrefixC(tmpline, "PUBMED; "))
6222 {
6223 ajStrAssignSubS(&tmpstr2, tmpline, 8, -1);
6224 ajFmtPrintF(outseq->File, " PUBMED %S\n", tmpstr2);
6225 }
6226
6227 tmpline = ajStrParseC(NULL, "\n");
6228 }
6229 }
6230
6231 }
6232
6233 ajListIterDel(&it);
6234 }
6235
6236 if(ajListGetLength(outseq->Cmtlist))
6237 {
6238 it = ajListIterNewread(outseq->Cmtlist);
6239
6240 while ((cmtstr = (const AjPStr) ajListIterGet(it)))
6241 {
6242 ajStrAssignS(&tmpstr, cmtstr);
6243 ajStrFmtWrapAt(&tmpstr, 68, ',');
6244 tmpline = ajStrParseC(tmpstr, "\n");
6245
6246 if(firstcmt)
6247 {
6248 firstcmt = ajFalse;
6249 ajFmtPrintF(outseq->File, "COMMENT %S\n", tmpline);
6250 }
6251 else
6252 {
6253 ajFmtPrintF(outseq->File, " \n");
6254 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6255 }
6256
6257 tmpline = ajStrParseC(NULL, "\n");
6258
6259 while (tmpline)
6260 {
6261 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6262 tmpline = ajStrParseC(NULL, "\n");
6263 }
6264 }
6265
6266 ajListIterDel(&it);
6267 }
6268
6269 if(seqoutUfoLocal(outseq))
6270 {
6271 ajFeattabOutDel(&outseq->Ftquery);
6272 outseq->Ftquery = ajFeattabOutNewSSF(ftfmt, outseq->Name,
6273 ajStrGetPtr(outseq->Type),
6274 outseq->File);
6275 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
6276 ajWarn("seqWriteGenpept features output failed UFO: '%S'",
6277 outseq->Ufo);
6278 }
6279
6280 ajFmtPrintF(outseq->File, "ORIGIN\n");
6281
6282 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
6283 strcpy(sf->endstr, "\n//");
6284 sf->tab = 1;
6285 sf->spacer = 11;
6286 sf->width = 60;
6287 sf->numleft = ajTrue;
6288 sf->numwidth = 8;
6289
6290 seqWriteSeq(outseq, sf);
6291 seqFormatDel(&sf);
6292 ajStrDel(&ftfmt);
6293 ajStrDel(&tmpstr);
6294 ajStrDel(&tmpstr2);
6295
6296 return;
6297 }
6298
6299
6300
6301
6302 /* @funcstatic seqWriteRefseq *************************************************
6303 **
6304 ** Writes a sequence in REFSEQ format.
6305 **
6306 ** @param [u] outseq [AjPSeqout] Sequence output object.
6307 ** @return [void]
6308 **
6309 ** @release 6.2.0
6310 ** @@
6311 ******************************************************************************/
6312
seqWriteRefseq(AjPSeqout outseq)6313 static void seqWriteRefseq(AjPSeqout outseq)
6314 {
6315
6316 static SeqPSeqFormat sf = NULL;
6317 /*ajuint b[5];*/ /* was used for BASE COUNT line */
6318 AjPStr ftfmt = NULL;
6319 AjPStr tmpstr = NULL;
6320 AjPStr tmpstr2 = NULL;
6321 const AjPStr cmtstr = NULL;
6322 const AjPStr tmpline = NULL;
6323 const AjPSeqRef seqref = NULL;
6324 AjIList it;
6325 AjPStr cur;
6326 ajuint ilen;
6327 AjBool firstcmt = ajTrue;
6328
6329 if(!ftfmt)
6330 ajStrAssignC(&ftfmt, "refseq");
6331
6332 ajSeqoutTrace(outseq);
6333
6334 ajFmtPrintF(outseq->File, "LOCUS %-17S %10u bp ",
6335 outseq->Name, ajStrGetLen(outseq->Seq));
6336
6337 ajFmtPrintF(outseq->File, " %-7s",
6338 ajSeqmolGetGb(outseq->Molecule));
6339
6340 if(outseq->Circular)
6341 ajFmtPrintF(outseq->File, " %-8s", "circular");
6342 else
6343 ajFmtPrintF(outseq->File, " %-8s", "linear");
6344
6345 if(ajStrGetLen(outseq->Division))
6346 ajFmtPrintF(outseq->File, " %-3s",ajSeqdivGetGb(outseq->Division));
6347 else
6348 ajFmtPrintF(outseq->File, " UNC");
6349
6350 if(outseq->Date)
6351 {
6352 if(outseq->Date->ModDate)
6353 ajFmtPrintF(outseq->File, " %D", outseq->Date->ModDate);
6354 else if(outseq->Date->CreDate)
6355 ajFmtPrintF(outseq->File, " %D", outseq->Date->CreDate);
6356 }
6357 else
6358 ajFmtPrintF(outseq->File, " %D", ajTimeRefTodayFmt("dtline"));
6359
6360 ajWritebinNewline(outseq->File);
6361
6362 if(ajStrGetLen(outseq->Desc))
6363 {
6364 ajStrAssignS(&tmpstr, outseq->Desc);
6365
6366 if(ajStrGetCharLast(tmpstr) != '.')
6367 ajStrAppendK(&tmpstr, '.');
6368
6369 ajStrFmtWrap(&tmpstr, 67);
6370 tmpline = ajStrParseC(tmpstr, "\n");
6371 ajFmtPrintF(outseq->File, "DEFINITION %S\n", tmpline);
6372 tmpline = ajStrParseC(NULL, "\n");
6373
6374 while (tmpline)
6375 {
6376 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6377 tmpline = ajStrParseC(NULL, "\n");
6378 }
6379 }
6380
6381 if(ajListGetLength(outseq->Acclist))
6382 {
6383 ilen = 0;
6384 it = ajListIterNewread(outseq->Acclist);
6385 while((cur = (AjPStr) ajListIterGet(it)))
6386 {
6387 if(ilen == 0)
6388 {
6389 ajFmtPrintF(outseq->File, "ACCESSION ");
6390 ilen = 11;
6391 }
6392
6393 if(ilen + ajStrGetLen(cur) > 79)
6394 {
6395 ajFmtPrintF(outseq->File, "\n ");
6396 ilen = 11;
6397 }
6398
6399 if(ilen > 11)
6400 ajFmtPrintF(outseq->File, " ");
6401 ilen += 1;
6402
6403 ajWriteline(outseq->File, cur);
6404 ilen += ajStrGetLen(cur);
6405
6406 }
6407
6408 ajListIterDel(&it);
6409
6410 if(ilen > 0)
6411 ajWritebinNewline(outseq->File);
6412 }
6413
6414 if(ajStrGetLen(outseq->Sv))
6415 {
6416 if(ajStrGetLen(outseq->Gi))
6417 ajFmtPrintF(outseq->File, "VERSION %S GI:%S\n",
6418 outseq->Sv, outseq->Gi);
6419 else
6420 ajFmtPrintF(outseq->File, "VERSION %S\n", outseq->Sv);
6421 }
6422
6423 if(ajListGetLength(outseq->Keylist))
6424 {
6425 ilen = 0;
6426 it = ajListIterNewread(outseq->Keylist);
6427
6428 while((cur = (AjPStr) ajListIterGet(it)))
6429 {
6430 if(ilen == 0)
6431 {
6432 ajFmtPrintF(outseq->File, "KEYWORDS ");
6433 ilen = 11;
6434 }
6435
6436 if(ilen+ajStrGetLen(cur) >= 79)
6437 {
6438 ajFmtPrintF(outseq->File, ";\n ");
6439 ilen = 11;
6440 }
6441
6442 if(ilen > 11)
6443 ajFmtPrintF(outseq->File, "; ");
6444
6445 ilen += 2;
6446
6447 ajWriteline(outseq->File, cur);
6448 ilen += ajStrGetLen(cur);
6449 }
6450
6451 ajListIterDel(&it) ;
6452 ajFmtPrintF(outseq->File, ".\n");
6453 }
6454
6455 if(ajStrGetLen(outseq->Tax))
6456 {
6457 ajFmtPrintF(outseq->File, "SOURCE %S\n", outseq->Tax);
6458
6459 ajFmtPrintF(outseq->File, " ORGANISM %S\n", outseq->Tax);
6460
6461 if(ajListGetLength(outseq->Taxlist))
6462 {
6463 ilen = 0;
6464 it = ajListIterNewread(outseq->Taxlist);
6465
6466 while((cur = (AjPStr) ajListIterGet(it)))
6467 {
6468 if(ilen+ajStrGetLen(cur) >= 79)
6469 {
6470 ajFmtPrintF(outseq->File, ";\n");
6471 ilen = 0;
6472 }
6473
6474 if(ilen == 0)
6475 {
6476 ajFmtPrintF(outseq->File, " ");
6477 ilen = 12;
6478 }
6479 else
6480 {
6481 ajFmtPrintF(outseq->File, "; ");
6482 ilen += 2;
6483 }
6484 ajWriteline(outseq->File, cur);
6485 ilen += ajStrGetLen(cur);
6486 }
6487
6488 ajListIterDel(&it) ;
6489 ajFmtPrintF(outseq->File, ".\n");
6490 }
6491 }
6492
6493 if(ajListGetLength(outseq->Reflist))
6494 {
6495 it = ajListIterNewread(outseq->Reflist);
6496
6497 while ((seqref = (const AjPSeqRef) ajListIterGet(it)))
6498 {
6499 ajFmtPrintF(outseq->File, "REFERENCE %u", seqref->Number);
6500
6501 if(ajStrGetLen(seqref->Position))
6502 {
6503 ajStrAssignS(&tmpstr, seqref->Position);
6504 ajStrExchangeCC(&tmpstr, "-", " to ");
6505 ajFmtPrintF(outseq->File, " (bases %S)", tmpstr);
6506 }
6507
6508 ajWritebinNewline(outseq->File);
6509
6510 if(ajStrGetLen(seqref->Authors))
6511 {
6512 ajSeqrefFmtAuthorsGb(seqref, &tmpstr);
6513 ajStrFmtWrapAt(&tmpstr, 68, ',');
6514 tmpline = ajStrParseC(tmpstr, "\n");
6515 ajFmtPrintF(outseq->File, " AUTHORS %S\n", tmpline);
6516 tmpline = ajStrParseC(NULL, "\n");
6517
6518 while (tmpline)
6519 {
6520 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6521 tmpline = ajStrParseC(NULL, "\n");
6522 }
6523 }
6524
6525 ajSeqrefFmtTitleGb(seqref, &tmpstr); /* may set an empty title */
6526
6527 if(ajStrGetLen(tmpstr))
6528 {
6529 ajStrFmtWrap(&tmpstr, 68);
6530 tmpline = ajStrParseC(tmpstr, "\n");
6531 ajFmtPrintF(outseq->File, " TITLE %S\n", tmpline);
6532 tmpline = ajStrParseC(NULL, "\n");
6533
6534 while (tmpline)
6535 {
6536 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6537 tmpline = ajStrParseC(NULL, "\n");
6538 }
6539 }
6540
6541 if(ajStrGetLen(seqref->Location))
6542 {
6543 ajSeqrefFmtLocationGb(seqref, &tmpstr);
6544 ajStrFmtWrap(&tmpstr, 68);
6545 tmpline = ajStrParseC(tmpstr, "\n");
6546 ajFmtPrintF(outseq->File, " JOURNAL %S\n", tmpline);
6547 tmpline = ajStrParseC(NULL, "\n");
6548
6549 while (tmpline)
6550 {
6551 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6552 tmpline = ajStrParseC(NULL, "\n");
6553 }
6554 }
6555
6556 if(ajStrGetLen(seqref->Xref))
6557 {
6558 ajStrAssignS(&tmpstr, seqref->Xref);
6559 ajStrFmtWrap(&tmpstr, 75);
6560 tmpline = ajStrParseC(tmpstr, "\n");
6561
6562 while (tmpline)
6563 {
6564 if(ajStrPrefixC(tmpline, "PUBMED; "))
6565 {
6566 ajStrAssignSubS(&tmpstr2, tmpline, 8, -1);
6567 ajFmtPrintF(outseq->File, " PUBMED %S\n", tmpstr2);
6568 }
6569
6570 tmpline = ajStrParseC(NULL, "\n");
6571 }
6572 }
6573
6574 }
6575
6576 ajListIterDel(&it);
6577 }
6578
6579 if(ajListGetLength(outseq->Cmtlist))
6580 {
6581 it = ajListIterNewread(outseq->Cmtlist);
6582
6583 while ((cmtstr = (const AjPStr) ajListIterGet(it)))
6584 {
6585 ajStrAssignS(&tmpstr, cmtstr);
6586 ajStrFmtWrapAt(&tmpstr, 68, ',');
6587 tmpline = ajStrParseC(tmpstr, "\n");
6588
6589 if(firstcmt)
6590 {
6591 firstcmt = ajFalse;
6592 ajFmtPrintF(outseq->File, "COMMENT %S\n", tmpline);
6593 }
6594 else
6595 {
6596 ajFmtPrintF(outseq->File, " \n");
6597 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6598 }
6599
6600 tmpline = ajStrParseC(NULL, "\n");
6601
6602 while (tmpline)
6603 {
6604 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6605 tmpline = ajStrParseC(NULL, "\n");
6606 }
6607 }
6608
6609 ajListIterDel(&it);
6610 }
6611
6612 if(seqoutUfoLocal(outseq))
6613 {
6614 ajFeattabOutDel(&outseq->Ftquery);
6615 outseq->Ftquery = ajFeattabOutNewSSF(ftfmt, outseq->Name,
6616 ajStrGetPtr(outseq->Type),
6617 outseq->File);
6618 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
6619 ajWarn("seqWriteRefseq features output failed UFO: '%S'",
6620 outseq->Ufo);
6621 }
6622
6623 ajFmtPrintF(outseq->File, "ORIGIN\n");
6624
6625 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
6626 strcpy(sf->endstr, "\n//");
6627 sf->tab = 1;
6628 sf->spacer = 11;
6629 sf->width = 60;
6630 sf->numleft = ajTrue;
6631 sf->numwidth = 8;
6632
6633 seqWriteSeq(outseq, sf);
6634 seqFormatDel(&sf);
6635 ajStrDel(&ftfmt);
6636 ajStrDel(&tmpstr);
6637 ajStrDel(&tmpstr2);
6638
6639 return;
6640 }
6641
6642
6643
6644
6645 /* @funcstatic seqWriteRefseqp ************************************************
6646 **
6647 ** Writes a sequence in REFSEQP format.
6648 **
6649 ** @param [u] outseq [AjPSeqout] Sequence output object.
6650 ** @return [void]
6651 **
6652 ** @release 6.2.0
6653 ** @@
6654 ******************************************************************************/
6655
seqWriteRefseqp(AjPSeqout outseq)6656 static void seqWriteRefseqp(AjPSeqout outseq)
6657 {
6658
6659 static SeqPSeqFormat sf = NULL;
6660 /*ajuint b[5];*/ /* was used for BASE COUNT line */
6661 AjPStr tmpstr = NULL;
6662 AjPStr tmpstr2 = NULL;
6663 const AjPStr cmtstr = NULL;
6664 const AjPStr tmpline = NULL;
6665 const AjPSeqRef seqref = NULL;
6666 AjIList it;
6667 AjPStr cur;
6668 ajuint ilen;
6669 AjBool firstcmt = ajTrue;
6670
6671 ajSeqoutTrace(outseq);
6672
6673 ajFmtPrintF(outseq->File, "LOCUS %-9S %7u aa ",
6674 outseq->Name, ajStrGetLen(outseq->Seq));
6675
6676 if(ajStrGetLen(outseq->Division))
6677 ajFmtPrintF(outseq->File, " %-3s",ajSeqdivGetGb(outseq->Division));
6678 else
6679 ajFmtPrintF(outseq->File, " UNC");
6680
6681 if(outseq->Date)
6682 {
6683 if(outseq->Date->ModDate)
6684 ajFmtPrintF(outseq->File, " %D", outseq->Date->ModDate);
6685 else if(outseq->Date->CreDate)
6686 ajFmtPrintF(outseq->File, " %D", outseq->Date->CreDate);
6687 }
6688 else
6689 ajFmtPrintF(outseq->File, " %D", ajTimeRefTodayFmt("dtline"));
6690
6691 ajWritebinNewline(outseq->File);
6692
6693 if(ajStrGetLen(outseq->Desc))
6694 {
6695 ajStrAssignS(&tmpstr, outseq->Desc);
6696
6697 if(ajStrGetCharLast(tmpstr) != '.')
6698 ajStrAppendK(&tmpstr, '.');
6699
6700 ajStrFmtWrap(&tmpstr, 67);
6701 tmpline = ajStrParseC(tmpstr, "\n");
6702 ajFmtPrintF(outseq->File, "DEFINITION %S\n", tmpline);
6703 tmpline = ajStrParseC(NULL, "\n");
6704
6705 while (tmpline)
6706 {
6707 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6708 tmpline = ajStrParseC(NULL, "\n");
6709 }
6710 }
6711
6712 if(ajListGetLength(outseq->Acclist))
6713 {
6714 ilen = 0;
6715 it = ajListIterNewread(outseq->Acclist);
6716 while((cur = (AjPStr) ajListIterGet(it)))
6717 {
6718 if(ilen == 0)
6719 {
6720 ajFmtPrintF(outseq->File, "ACCESSION ");
6721 ilen = 11;
6722 }
6723
6724 if(ilen + ajStrGetLen(cur) > 79)
6725 {
6726 ajFmtPrintF(outseq->File, "\n ");
6727 ilen = 11;
6728 }
6729
6730 if(ilen > 11)
6731 ajFmtPrintF(outseq->File, " ");
6732 ilen += 1;
6733
6734 ajWriteline(outseq->File, cur);
6735 ilen += ajStrGetLen(cur);
6736
6737 }
6738
6739 ajListIterDel(&it);
6740
6741 if(ilen > 0)
6742 ajWritebinNewline(outseq->File);
6743 }
6744
6745 if(ajStrGetLen(outseq->Sv))
6746 {
6747 if(ajStrGetLen(outseq->Gi))
6748 ajFmtPrintF(outseq->File, "VERSION %S GI:%S\n",
6749 outseq->Sv, outseq->Gi);
6750 else
6751 ajFmtPrintF(outseq->File, "VERSION %S\n", outseq->Sv);
6752 }
6753
6754 if(ajListGetLength(outseq->Keylist))
6755 {
6756 ilen = 0;
6757 it = ajListIterNewread(outseq->Keylist);
6758
6759 while((cur = (AjPStr) ajListIterGet(it)))
6760 {
6761 if(ilen == 0)
6762 {
6763 ajFmtPrintF(outseq->File, "KEYWORDS ");
6764 ilen = 11;
6765 }
6766
6767 if(ilen+ajStrGetLen(cur) >= 79)
6768 {
6769 ajFmtPrintF(outseq->File, ";\n ");
6770 ilen = 11;
6771 }
6772
6773 if(ilen > 11)
6774 ajFmtPrintF(outseq->File, "; ");
6775
6776 ilen += 2;
6777
6778 ajWriteline(outseq->File, cur);
6779 ilen += ajStrGetLen(cur);
6780 }
6781
6782 ajListIterDel(&it) ;
6783 ajFmtPrintF(outseq->File, ".\n");
6784 }
6785
6786 if(ajStrGetLen(outseq->Tax))
6787 {
6788 ajFmtPrintF(outseq->File, "SOURCE %S\n", outseq->Tax);
6789
6790 ajFmtPrintF(outseq->File, " ORGANISM %S\n", outseq->Tax);
6791
6792 if(ajListGetLength(outseq->Taxlist))
6793 {
6794 ilen = 0;
6795 it = ajListIterNewread(outseq->Taxlist);
6796
6797 while((cur = (AjPStr) ajListIterGet(it)))
6798 {
6799 if(ilen+ajStrGetLen(cur) >= 79)
6800 {
6801 ajFmtPrintF(outseq->File, ";\n");
6802 ilen = 0;
6803 }
6804
6805 if(ilen == 0)
6806 {
6807 ajFmtPrintF(outseq->File, " ");
6808 ilen = 12;
6809 }
6810 else
6811 {
6812 ajFmtPrintF(outseq->File, "; ");
6813 ilen += 2;
6814 }
6815 ajWriteline(outseq->File, cur);
6816 ilen += ajStrGetLen(cur);
6817 }
6818
6819 ajListIterDel(&it) ;
6820 ajFmtPrintF(outseq->File, ".\n");
6821 }
6822 }
6823
6824 if(ajListGetLength(outseq->Reflist))
6825 {
6826 it = ajListIterNewread(outseq->Reflist);
6827
6828 while ((seqref = (const AjPSeqRef) ajListIterGet(it)))
6829 {
6830 ajFmtPrintF(outseq->File, "REFERENCE %u", seqref->Number);
6831
6832 if(ajStrGetLen(seqref->Position))
6833 {
6834 ajStrAssignS(&tmpstr, seqref->Position);
6835 ajStrExchangeCC(&tmpstr, "-", " to ");
6836 ajFmtPrintF(outseq->File, " (residues %S)", tmpstr);
6837 }
6838
6839 ajWritebinNewline(outseq->File);
6840
6841 if(ajStrGetLen(seqref->Authors))
6842 {
6843 ajSeqrefFmtAuthorsGb(seqref, &tmpstr);
6844 ajStrFmtWrapAt(&tmpstr, 68, ',');
6845 tmpline = ajStrParseC(tmpstr, "\n");
6846 ajFmtPrintF(outseq->File, " AUTHORS %S\n", tmpline);
6847 tmpline = ajStrParseC(NULL, "\n");
6848
6849 while (tmpline)
6850 {
6851 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6852 tmpline = ajStrParseC(NULL, "\n");
6853 }
6854 }
6855
6856 ajSeqrefFmtTitleGb(seqref, &tmpstr); /* may set an empty title */
6857
6858 if(ajStrGetLen(tmpstr))
6859 {
6860 ajStrFmtWrap(&tmpstr, 68);
6861 tmpline = ajStrParseC(tmpstr, "\n");
6862 ajFmtPrintF(outseq->File, " TITLE %S\n", tmpline);
6863 tmpline = ajStrParseC(NULL, "\n");
6864
6865 while (tmpline)
6866 {
6867 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6868 tmpline = ajStrParseC(NULL, "\n");
6869 }
6870 }
6871
6872 if(ajStrGetLen(seqref->Location))
6873 {
6874 ajSeqrefFmtLocationGb(seqref, &tmpstr);
6875 ajStrFmtWrap(&tmpstr, 68);
6876 tmpline = ajStrParseC(tmpstr, "\n");
6877 ajFmtPrintF(outseq->File, " JOURNAL %S\n", tmpline);
6878 tmpline = ajStrParseC(NULL, "\n");
6879
6880 while (tmpline)
6881 {
6882 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6883 tmpline = ajStrParseC(NULL, "\n");
6884 }
6885 }
6886
6887 if(ajStrGetLen(seqref->Xref))
6888 {
6889 ajStrAssignS(&tmpstr, seqref->Xref);
6890 ajStrFmtWrap(&tmpstr, 75);
6891 tmpline = ajStrParseC(tmpstr, "\n");
6892
6893 while (tmpline)
6894 {
6895 if(ajStrPrefixC(tmpline, "PUBMED; "))
6896 {
6897 ajStrAssignSubS(&tmpstr2, tmpline, 8, -1);
6898 ajFmtPrintF(outseq->File, " PUBMED %S\n", tmpstr2);
6899 }
6900
6901 tmpline = ajStrParseC(NULL, "\n");
6902 }
6903 }
6904
6905 }
6906
6907 ajListIterDel(&it);
6908 }
6909
6910 if(ajListGetLength(outseq->Cmtlist))
6911 {
6912 it = ajListIterNewread(outseq->Cmtlist);
6913
6914 while ((cmtstr = (const AjPStr) ajListIterGet(it)))
6915 {
6916 ajStrAssignS(&tmpstr, cmtstr);
6917 ajStrFmtWrapAt(&tmpstr, 68, ',');
6918 tmpline = ajStrParseC(tmpstr, "\n");
6919
6920 if(firstcmt)
6921 {
6922 firstcmt = ajFalse;
6923 ajFmtPrintF(outseq->File, "COMMENT %S\n", tmpline);
6924 }
6925 else
6926 {
6927 ajFmtPrintF(outseq->File, " \n");
6928 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6929 }
6930
6931 tmpline = ajStrParseC(NULL, "\n");
6932
6933 while (tmpline)
6934 {
6935 ajFmtPrintF(outseq->File, " %S\n", tmpline);
6936 tmpline = ajStrParseC(NULL, "\n");
6937 }
6938 }
6939
6940 ajListIterDel(&it);
6941 }
6942
6943 if(seqoutUfoLocal(outseq))
6944 {
6945 ajFeattabOutDel(&outseq->Ftquery);
6946 outseq->Ftquery = ajFeattabOutNewCSF("refseqp", outseq->Name,
6947 ajStrGetPtr(outseq->Type),
6948 outseq->File);
6949 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
6950 ajWarn("seqWriteRefseqp features output failed UFO: '%S'",
6951 outseq->Ufo);
6952 }
6953
6954 ajFmtPrintF(outseq->File, "ORIGIN \n");
6955
6956 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
6957 strcpy(sf->endstr, "\n//");
6958 sf->tab = 0;
6959 sf->spacer = 11;
6960 sf->width = 60;
6961 sf->numleft = ajTrue;
6962 sf->numwidth = 9;
6963
6964 seqWriteSeq(outseq, sf);
6965 seqFormatDel(&sf);
6966 ajStrDel(&tmpstr);
6967 ajStrDel(&tmpstr2);
6968
6969 return;
6970 }
6971
6972
6973
6974
6975 /* @funcstatic seqWriteGff2 ***************************************************
6976 **
6977 ** Writes a sequence in GFF 2.0 format.
6978 **
6979 ** @param [u] outseq [AjPSeqout] Sequence output object.
6980 ** @return [void]
6981 **
6982 ** @release 6.0.0
6983 ** @@
6984 ******************************************************************************/
6985
seqWriteGff2(AjPSeqout outseq)6986 static void seqWriteGff2(AjPSeqout outseq)
6987 {
6988 SeqPSeqFormat sf = NULL;
6989
6990 ajFmtPrintF(outseq->File,
6991 "##gff-version 2\n");
6992 ajFmtPrintF(outseq->File,
6993 "##source-version EMBOSS %S\n", ajNamValueVersion());
6994 ajFmtPrintF(outseq->File,
6995 "##date %D\n", ajTimeRefTodayFmt("GFF"));
6996
6997 if(ajStrGetCharFirst(outseq->Type) == 'P')
6998 ajFmtPrintF(outseq->File,
6999 "##Protein %S\n", outseq->Name);
7000 else
7001 ajFmtPrintF(outseq->File,
7002 "##DNA %S\n", outseq->Name);
7003
7004 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
7005
7006 strcpy(sf->leftstr, "##");
7007 sf->width = 60;
7008 /*
7009 sf->tab = 4;
7010 sf->spacer = 11;
7011 sf->numright = ajTrue;
7012 sf->numwidth = 9;
7013 sf->numjust = ajTrue;
7014 */
7015
7016 seqWriteSeq(outseq, sf);
7017 seqFormatDel(&sf);
7018
7019 if(ajStrGetCharFirst(outseq->Type) == 'P')
7020 ajFmtPrintF(outseq->File, "##end-Protein\n");
7021 else
7022 ajFmtPrintF(outseq->File, "##end-DNA\n");
7023
7024 if(seqoutUfoLocal(outseq))
7025 {
7026 ajFeattabOutDel(&outseq->Ftquery);
7027 outseq->Ftquery = ajFeattabOutNewCSF("gff2", outseq->Name,
7028 ajStrGetPtr(outseq->Type),
7029 outseq->File);
7030 if(ajStrGetCharFirst(outseq->Type) == 'P')
7031 ajFeattableSetProt(outseq->Fttable);
7032 else
7033 ajFeattableSetNuc(outseq->Fttable);
7034
7035 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
7036 ajWarn("seqWriteGff2 features output failed UFO: '%S'",
7037 outseq->Ufo);
7038
7039 }
7040
7041 return;
7042 }
7043
7044
7045
7046
7047 /* @funcstatic seqWriteGff3 ***************************************************
7048 **
7049 ** Writes a sequence in GFF 3 format.
7050 **
7051 ** @param [u] outseq [AjPSeqout] Sequence output object.
7052 ** @return [void]
7053 **
7054 ** @release 6.0.0
7055 ** @@
7056 ******************************************************************************/
7057
seqWriteGff3(AjPSeqout outseq)7058 static void seqWriteGff3(AjPSeqout outseq)
7059 {
7060 if(seqoutUfoLocal(outseq))
7061 {
7062 ajFeattabOutDel(&outseq->Ftquery);
7063 outseq->Ftquery = ajFeattabOutNewCSF("gff3", outseq->Name,
7064 ajStrGetPtr(outseq->Type),
7065 outseq->File);
7066 if(ajStrGetCharFirst(outseq->Type) == 'P')
7067 ajFeattableSetProt(outseq->Fttable);
7068 else
7069 ajFeattableSetNuc(outseq->Fttable);
7070
7071 if(!ajFeattableWrite(outseq->Ftquery, outseq->Fttable))
7072 ajWarn("seqWriteGff3 features output failed UFO: '%S'",
7073 outseq->Ufo);
7074
7075 }
7076 else
7077 {
7078 /* GFF3 header */
7079 ajFmtPrintF(outseq->File, "##gff-version 3\n") ;
7080 ajFmtPrintF(outseq->File, "##sequence-region %S %u %u\n",
7081 outseq->Name, 1, ajStrGetLen(outseq->Seq));
7082
7083 /* extra EMBOSS header lines */
7084 ajFmtPrintF(outseq->File,
7085 "#!Date %D\n", ajTimeRefTodayFmt("GFF"));
7086
7087 if(ajStrGetCharFirst(outseq->Type) == 'P')
7088 ajFmtPrintF(outseq->File,"#!Type Protein\n");
7089 else
7090 ajFmtPrintF(outseq->File, "#!Type DNA\n");
7091
7092 ajFmtPrintF(outseq->File, "#!Source-version EMBOSS %S\n",
7093 ajNamValueVersion());
7094 }
7095
7096 ajFmtPrintF(outseq->File, "##FASTA\n");
7097
7098 seqWriteFasta(outseq);
7099
7100 return;
7101 }
7102
7103
7104
7105
7106 /* @funcstatic seqWriteBam ****************************************************
7107 **
7108 ** Writes a sequence in binary sequence alignment/map (BAM) format.
7109 **
7110 ** The sort order is "unsorted". Samtools can re-sort the file.
7111 **
7112 ** @param [u] outseq [AjPSeqout] Sequence output object.
7113 ** @return [void]
7114 **
7115 ** @release 6.2.0
7116 ** @@
7117 ******************************************************************************/
7118
seqWriteBam(AjPSeqout outseq)7119 static void seqWriteBam(AjPSeqout outseq)
7120 {
7121 struct bamdata
7122 {
7123 ajuint Count;
7124 ajuint Nref;
7125 AjPSeqBamBgzf gzfile;
7126 AjPSeqBam bam;
7127 } *bamdata = NULL;
7128
7129 AjPSeqBamHeader header;
7130 AjPSeqBam bam;
7131 AjPSeqBamCore core;
7132 unsigned char *dpos;
7133 AjPStr qualstr = NULL;
7134 const char *s;
7135 ajuint ilen;
7136 ajuint slen;
7137 ajuint i;
7138
7139 unsigned char bam_nt16_table[256] =
7140 {
7141 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7142 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7143 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7144 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,
7145 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
7146 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,
7147 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
7148 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,
7149 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7150 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7151 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7152 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7153 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7154 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7155 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
7156 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
7157 };
7158
7159
7160 if(!outseq->Count)
7161 {
7162 outseq->Cleanup = seqCleanBam;
7163 AJNEW0(bamdata);
7164 AJNEW0(bam);
7165
7166 bamdata->bam = bam;
7167
7168 bamdata->gzfile =
7169 ajSeqBamBgzfNew(ajFileGetFileptr(outseq->File), "w");
7170
7171 /* header text is simply copied from SAM */
7172
7173 header = ajSeqBamHeaderNewTextC("@HD\tVN:1.3\tSO:unsorted\n");
7174
7175 ajSeqBamHeaderWrite(bamdata->gzfile, header);
7176 outseq->Data = bamdata;
7177 ajSeqBamHeaderDel(&header);
7178 }
7179
7180 /* bam_write1 for each sequence */
7181
7182 /* get data for name, flag 0x0004, seq, quality */
7183 bamdata = outseq->Data;
7184 bam = bamdata->bam;
7185 core = &bam->core;
7186
7187 ilen = ajStrGetLen(outseq->Seq);
7188
7189 core->tid = -1;
7190 core->pos = -1; /* BAM format is zero based;
7191 -1 is translated to 0, meaning unmapped */
7192 core->bin = 0;
7193 core->qual = '\0';
7194 core->l_qname = 1 + ajStrGetLen(outseq->Name);
7195 core->flag = 0x0004;
7196 core->n_cigar = 0;
7197 core->l_qseq = ilen;
7198 core->mtid = -1;
7199 core->mpos = 0;
7200 core->isize = 0;
7201
7202 qualstr = ajStrNewRes(ilen+1);
7203
7204 if(outseq->Accuracy)
7205 {
7206 for(i=0;i<ilen;i++)
7207 {
7208 ajStrAppendK(&qualstr, (int) outseq->Accuracy[i]);
7209 }
7210 }
7211
7212 else
7213 {
7214 ajStrAppendCountK(&qualstr,'\"' - 33, ilen);
7215 }
7216
7217
7218 bam->data_len = core->n_cigar*4 + core->l_qname +
7219 (ilen + 1)/2 + ilen;
7220 if(bam->data_len > bam->m_data)
7221 {
7222 AJCRESIZE0(bam->data,bam->m_data, bam->data_len);
7223 bam->m_data = bam->data_len;
7224 }
7225
7226 dpos = bam->data;
7227 memcpy(dpos, ajStrGetPtr(outseq->Name), core->l_qname);
7228
7229 dpos += core->l_qname;
7230 dpos += core->n_cigar*4;
7231
7232 s = ajStrGetPtr(outseq->Seq);
7233 slen = (ilen+1)/2;
7234 for (i = 0; i < slen; ++i)
7235 dpos[i] = 0;
7236 for (i = 0; i < ilen; ++i)
7237 dpos[i/2] |= bam_nt16_table[(ajuint)s[i]] << 4*(1-i%2);
7238
7239 dpos += slen;
7240 memcpy(dpos, ajStrGetPtr(qualstr), ilen);
7241
7242 ajSeqBamWrite(bamdata->gzfile, bam);
7243
7244 ajStrDel(&qualstr);
7245
7246 return;
7247 }
7248
7249
7250
7251
7252 /* @funcstatic seqCleanBam ****************************************************
7253 **
7254 ** Writes the remaining lines to complete and close a BAM file
7255 **
7256 ** @param [u] outseq [AjPSeqout] Sequence output object
7257 ** @return [void]
7258 **
7259 ** @release 6.3.0
7260 ** @@
7261 ******************************************************************************/
7262
seqCleanBam(AjPSeqout outseq)7263 static void seqCleanBam(AjPSeqout outseq)
7264 {
7265 struct bamdata
7266 {
7267 ajuint Count;
7268 ajuint Nref;
7269 AjPSeqBamBgzf gzfile;
7270 AjPSeqBam bam;
7271 } *bamdata = NULL;
7272
7273 bamdata = outseq->Data;
7274
7275 ajSeqBamBgzfClose(bamdata->gzfile);
7276 AJFREE(bamdata->bam->data);
7277 AJFREE(bamdata->bam);
7278 AJFREE(bamdata);
7279
7280 return;
7281 }
7282
7283
7284
7285
7286 /* @funcstatic seqWriteSam ****************************************************
7287 **
7288 ** Writes a sequence in sequence alignment/map (SAM) format.
7289 **
7290 ** The sort order is "unsorted". Samtools can re-sort the file.
7291 **
7292 ** @param [u] outseq [AjPSeqout] Sequence output object.
7293 ** @return [void]
7294 **
7295 ** @release 6.2.0
7296 ** @@
7297 ******************************************************************************/
7298
seqWriteSam(AjPSeqout outseq)7299 static void seqWriteSam(AjPSeqout outseq)
7300 {
7301 AjPStr argstr = NULL;
7302 AjPStr qualstr = NULL;
7303 ajint flag = 0;
7304 ajuint ilen;
7305 ajuint i;
7306
7307 if(!outseq->Count)
7308 {
7309 outseq->Cleanup = NULL;
7310 ajFmtPrintF(outseq->File, "@HD\tVN:1.3\tSO:unsorted\n");
7311
7312 /* SQ is a reference sequence. Can we omit this if not aligned? */
7313
7314 /*ajFmtPrintF(outseq->File, "@SQ\tSN:%S\tLN:%d",
7315 outseq->Name, ajStrGetLen(outseq->Seq));*/
7316
7317 /* AS assembly identifier */
7318 /* M5 checksum */
7319 /* UR URI */
7320 /* SP species */
7321 /* ajFmtPrintF(outseq->File, "\n"); */ /* end of @SQ record */
7322
7323 /* Read group */
7324
7325 /* Program record */
7326 argstr = ajStrNewS(ajUtilGetCmdline());
7327 ajStrExchangeKK(&argstr, '\n', ' ');
7328 ajFmtPrintF(outseq->File, "@PG\tID:%S\tVN:%S\tCL:%S\n",
7329 ajUtilGetProgram(), ajNamValueVersion(), argstr);
7330 ajStrDel(&argstr);
7331
7332 /* Comment */
7333 /*ajFmtPrintF(outseq->File, "@CO\t%S\n", cmtstr);*/
7334 }
7335
7336 flag = 0x0004; /* query not mapped */
7337
7338 ilen = ajStrGetLen(outseq->Seq);
7339
7340 qualstr = ajStrNewRes(ilen+1);
7341
7342 if(outseq->Accuracy)
7343 {
7344 for(i=0;i<ilen;i++)
7345 {
7346 ajStrAppendK(&qualstr, 33 + (int) outseq->Accuracy[i]);
7347 }
7348 }
7349
7350 else
7351 {
7352 ajStrAppendCountK(&qualstr,'\"', ilen);
7353 }
7354
7355
7356 ajFmtPrintF(outseq->File, "%S\t%d\t*\t0\t0\t*\t*\t0\t0\t%S\t%S\n",
7357 outseq->Name, flag, outseq->Seq, qualstr);
7358
7359 /* could add tag:vtype:value fields at end of record */
7360
7361 ajStrDel(&qualstr);
7362
7363 return;
7364 }
7365
7366
7367
7368
7369
7370 /* @funcstatic seqWriteScf ****************************************************
7371 **
7372 ** Writes a sequence in SCF version 3 format.
7373 **
7374 ** @param [u] outseq [AjPSeqout] Sequence output object.
7375 ** @return [void]
7376 **
7377 ** @release 1.0.0
7378 ** @@
7379 ******************************************************************************/
7380
seqWriteScf(AjPSeqout outseq)7381 static void seqWriteScf(AjPSeqout outseq)
7382 {
7383 AjPFile outf = outseq->File;
7384 ajuint filepos = 128;
7385 AjPStr tmpstr = NULL;
7386 ajuint i;
7387 unsigned char iqual;
7388 ajuint spare[18] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
7389 ajuint seqlen = ajStrGetLen(outseq->Seq);
7390 const char* cp;
7391
7392 ajStrAssignS(&tmpstr, outseq->Desc);
7393 ajStrExchangeCC(&tmpstr, "; ", "\n");
7394 ajStrExchangeCC(&tmpstr, ";", "\n");
7395
7396 ajWritebinChar(outf, ".scf", 4);
7397 ajWritebinUint4(outf, 0); /* no samples */
7398 ajWritebinUint4(outf, filepos); /* after header */
7399 ajWritebinUint4(outf, seqlen);
7400 ajWritebinUint4(outf, 0); /* unused left clip */
7401 ajWritebinUint4(outf, 0); /* unused right clip */
7402 filepos += 4; /* one dummy sample */
7403 ajWritebinUint4(outf, filepos); /* no samples */
7404 filepos += 12 * ajStrGetLen(outseq->Seq);
7405 ajWritebinUint4(outf, ajStrGetLen(tmpstr)); /* comments */
7406 ajWritebinUint4(outf, filepos); /* no samples */
7407 filepos += ajStrGetLen(tmpstr);
7408 ajWritebinChar(outf, "3.00", 4);
7409 ajWritebinUint4(outf, 1); /* 8bit unused samples */
7410 ajWritebinUint4(outf, 0); /* standard ACGT and - */
7411 ajWritebinUint4(outf, 0); /* no private data */
7412 ajWritebinUint4(outf, filepos); /* end of file */
7413 ajWritebinBinary(outf, 18, 4, spare);
7414
7415 ajWritebinUint4(outf, 0); /* dummy 4 byte sample */
7416
7417 cp = ajStrGetPtr(outseq->Seq);
7418
7419 if(outseq->Accuracy)
7420 {
7421 for(i=0; i < seqlen; i++)
7422 ajWritebinUint4(outf, 0);
7423 for(i=0; i < seqlen; i++)
7424 {
7425 iqual = 0;
7426 if(cp[i] == 'A' || cp[i] == 'a')
7427 iqual = (int) outseq->Accuracy[i];
7428 ajWritebinByte(outf, iqual);
7429 }
7430 for(i=0; i < seqlen; i++)
7431 {
7432 iqual = 0;
7433 if(cp[i] == 'C' || cp[i] == 'c')
7434 iqual = (int) outseq->Accuracy[i];
7435 ajWritebinByte(outf, iqual);
7436 }
7437 for(i=0; i < seqlen; i++)
7438 {
7439 iqual = 0;
7440 if(cp[i] == 'G' || cp[i] == 'g')
7441 iqual = (int) outseq->Accuracy[i];
7442 ajWritebinByte(outf, iqual);
7443 }
7444 for(i=0; i < seqlen; i++)
7445 {
7446 iqual = 0;
7447 if(cp[i] == 'T' || cp[i] == 't')
7448 iqual = (int) outseq->Accuracy[i];
7449 ajWritebinByte(outf, iqual);
7450 }
7451 }
7452 else
7453 {
7454 for(i=0; i < seqlen; i++)
7455 ajWritebinUint4(outf, 0);
7456 for(i=0; i < seqlen; i++)
7457 ajWritebinByte(outf, '\0');
7458 for(i=0; i < seqlen; i++)
7459 ajWritebinByte(outf, '\0');
7460 for(i=0; i < seqlen; i++)
7461 ajWritebinByte(outf, '\0');
7462 for(i=0; i < seqlen; i++)
7463 ajWritebinByte(outf, '\0');
7464 }
7465
7466 ajWritebinStr(outf, outseq->Seq, seqlen);
7467
7468 /* spare bytes */
7469 for(i=0; i < seqlen; i++)
7470 ajWritebinChar(outf, "\0\0\0", 3);
7471
7472 ajWritebinStr(outf, tmpstr, ajStrGetLen(tmpstr));
7473
7474 return;
7475 }
7476
7477
7478
7479
7480 /* @funcstatic seqWriteStrider ************************************************
7481 **
7482 ** Writes a sequence in DNA STRIDER format.
7483 **
7484 ** @param [u] outseq [AjPSeqout] Sequence output object.
7485 ** @return [void]
7486 **
7487 ** @release 1.0.0
7488 ** @@
7489 ******************************************************************************/
7490
seqWriteStrider(AjPSeqout outseq)7491 static void seqWriteStrider(AjPSeqout outseq)
7492 {
7493 static SeqPSeqFormat sf = NULL;
7494
7495 ajFmtPrintF(outseq->File, "; ### from DNA Strider ;-)\n");
7496 ajFmtPrintF(outseq->File, "; DNA sequence %S, %d bases\n;\n",
7497 outseq->Name, ajStrGetLen(outseq->Seq));
7498
7499 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
7500 strcpy(sf->endstr, "\n//");
7501
7502 seqWriteSeq(outseq, sf);
7503 seqFormatDel(&sf);
7504
7505 return;
7506 }
7507
7508
7509
7510
7511 /* @funcstatic seqWriteFitch **************************************************
7512 **
7513 ** Writes a sequence in FITCH format.
7514 **
7515 ** @param [u] outseq [AjPSeqout] Sequence output object.
7516 ** @return [void]
7517 **
7518 ** @release 1.0.0
7519 ** @@
7520 ******************************************************************************/
7521
seqWriteFitch(AjPSeqout outseq)7522 static void seqWriteFitch(AjPSeqout outseq)
7523 {
7524 static SeqPSeqFormat sf = NULL;
7525
7526 ajFmtPrintF(outseq->File, "%S, %d bases\n",
7527 outseq->Name, ajStrGetLen(outseq->Seq));
7528
7529 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
7530 sf->spacer = 4;
7531 sf->width = 60;
7532
7533 seqWriteSeq(outseq, sf);
7534 seqFormatDel(&sf);
7535
7536 return;
7537 }
7538
7539
7540
7541
7542 /* @funcstatic seqWriteMase ***************************************************
7543 **
7544 ** Writes a sequence in MASE format.
7545 **
7546 ** @param [u] outseq [AjPSeqout] Sequence output object.
7547 ** @return [void]
7548 **
7549 ** @release 2.8.0
7550 ** @@
7551 ******************************************************************************/
7552
seqWriteMase(AjPSeqout outseq)7553 static void seqWriteMase(AjPSeqout outseq)
7554 {
7555 ajuint i;
7556 ajuint ilen;
7557 AjPStr seq = NULL;
7558 ajuint linelen = 60;
7559 ajuint iend;
7560
7561 if (!ajFileResetPos(outseq->File))
7562 ajFmtPrintF(outseq->File, ";;Written by EMBOSS on %D\n",
7563 ajTimeRefTodayFmt("report"));
7564
7565 ajFmtPrintF(outseq->File, ";%S\n",
7566 outseq->Desc);
7567
7568 ajWritelineNewline(outseq->File, outseq->Name);
7569
7570 ilen = ajStrGetLen(outseq->Seq);
7571
7572 for(i=0; i < ilen; i += linelen)
7573 {
7574 iend = AJMIN(ilen-1, i+linelen-1);
7575 ajStrAssignSubS(&seq, outseq->Seq, i, iend);
7576 ajWritelineNewline(outseq->File, seq);
7577 }
7578
7579 ajStrDel(&seq);
7580
7581 return;
7582 }
7583
7584
7585
7586
7587 /* @funcstatic seqWritePhylip *************************************************
7588 **
7589 ** Writes a sequence in PHYLIP interleaved format.
7590 **
7591 ** @param [u] outseq [AjPSeqout] Sequence output object.
7592 ** @return [void]
7593 **
7594 ** @release 1.0.0
7595 ** @@
7596 ******************************************************************************/
7597
seqWritePhylip(AjPSeqout outseq)7598 static void seqWritePhylip(AjPSeqout outseq)
7599 {
7600 ajulong isize;
7601 ajuint ilen = 0;
7602 ajulong i = 0;
7603 ajuint j = 0;
7604 char *p = NULL;
7605 void** seqs = NULL;
7606 AjPSeq seq;
7607 AjPSeq* seqarr;
7608 ajulong itest;
7609 AjPStr sseq = NULL;
7610 ajuint ipos;
7611 ajuint iend;
7612 AjPStr tstr = NULL;
7613
7614 ajDebug("seqWritePhylip list size %Lu\n",
7615 ajListGetLength(outseq->Savelist));
7616
7617 isize = ajListGetLength(outseq->Savelist);
7618
7619 if(!isize)
7620 return;
7621
7622 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
7623 ajDebug("ajListToarray listed %Lu items\n", itest);
7624 seqarr = (AjPSeq*) seqs;
7625
7626 for(i=0UL; i < isize; i++)
7627 {
7628 seq = seqarr[i];
7629
7630 if(ilen < ajSeqGetLen(seq))
7631 ilen = ajSeqGetLen(seq);
7632 }
7633
7634 tstr = ajStrNewRes(ilen+1);
7635 ajFmtPrintF(outseq->File, " %Lu %u\n", isize, ilen);
7636
7637 for(ipos=1; ipos <= ilen; ipos += 50)
7638 {
7639 iend = ipos + 50 -1;
7640
7641 if(iend > ilen)
7642 iend = ilen;
7643
7644 for(i=0UL; i < isize; i++)
7645 {
7646 seq = seqarr[i];
7647
7648 ajStrAssignC(&tstr,ajStrGetPtr(seq->Seq));
7649 p = ajStrGetuniquePtr(&tstr);
7650
7651 for(j=ajStrGetLen(tstr);j<ilen;++j)
7652 *(p+j)='-';
7653
7654 *(p+j)='\0';
7655 tstr->Len=ilen;
7656 ajStrAssignSubS(&sseq, tstr, ipos-1, iend-1);
7657 ajSeqGapS(&sseq, '-');
7658 ajStrFmtBlock(&sseq, 10);
7659
7660 if(ipos == 1)
7661 ajFmtPrintF(outseq->File,
7662 "%-10.10S%S\n",
7663 seq->Name, sseq);
7664 else
7665 ajFmtPrintF(outseq->File,
7666 "%10s%S\n",
7667 " ", sseq);
7668 }
7669
7670 if(iend < ilen)
7671 ajWritebinNewline(outseq->File);
7672 }
7673
7674 ajStrDel(&tstr);
7675 ajStrDel(&sseq);
7676 AJFREE(seqs);
7677
7678 return;
7679 }
7680
7681
7682
7683
7684 /* @funcstatic seqWritePhylipnon **********************************************
7685 **
7686 ** Writes a sequence in PHYLIP non-interleaved format.
7687 **
7688 ** @param [u] outseq [AjPSeqout] Sequence output object.
7689 ** @return [void]
7690 **
7691 ** @release 3.0.0
7692 ** @@
7693 ******************************************************************************/
7694
seqWritePhylipnon(AjPSeqout outseq)7695 static void seqWritePhylipnon(AjPSeqout outseq)
7696 {
7697 ajulong isize;
7698 ajuint ilen = 0;
7699 ajuint i = 0;
7700 ajulong j = 0;
7701 ajuint n = 0;
7702 char *p = NULL;
7703 void** seqs = NULL;
7704 AjPSeq seq;
7705 AjPSeq* seqarr;
7706 ajulong itest;
7707 AjPStr sseq = NULL;
7708 ajuint ipos;
7709 ajuint iend = 0;
7710 AjPStr tstr = NULL;
7711
7712 ajDebug("seqWritePhylipnon list size %Lu\n",
7713 ajListGetLength(outseq->Savelist));
7714
7715 isize = ajListGetLength(outseq->Savelist);
7716
7717 if(!isize)
7718 return;
7719
7720 itest = ajListToarray(outseq->Savelist, (void***) &seqs);
7721 ajDebug("ajListToarray listed %Lu items\n", itest);
7722 seqarr = (AjPSeq*) seqs;
7723
7724 for(i=0UL; i < isize; i++)
7725 {
7726 seq = seqarr[i];
7727
7728 if(ilen < ajSeqGetLen(seq))
7729 ilen = ajSeqGetLen(seq);
7730 }
7731
7732 tstr = ajStrNewRes(ilen+1);
7733 ajFmtPrintF(outseq->File, "%Lu %u\n", isize, ilen);
7734
7735 for(n=0;n<isize;++n)
7736 {
7737 seq = seqarr[n];
7738 ajStrAssignC(&tstr,ajStrGetPtr(seq->Seq));
7739 p = ajStrGetuniquePtr(&tstr);
7740
7741 for(j=ajStrGetLen(tstr);j<ilen;++j)
7742 *(p+j)='-';
7743
7744 *(p+j)='\0';
7745 tstr->Len=ilen;
7746
7747
7748 for(ipos=1; ipos <= ilen; ipos += 50)
7749 {
7750 iend = ipos + 50 -1;
7751
7752 if(iend > ilen)
7753 iend = ilen;
7754
7755 ajStrAssignSubS(&sseq, tstr, ipos-1, iend-1);
7756 ajSeqGapS(&sseq, '-');
7757 ajStrFmtBlock(&sseq, 10);
7758
7759 if(ipos == 1)
7760 ajFmtPrintF(outseq->File,
7761 "%-10.10S%S\n",
7762 seq->Name, sseq);
7763 else
7764 ajFmtPrintF(outseq->File,
7765 "%10s%S\n",
7766 " ", sseq);
7767 }
7768
7769 if(iend < ilen)
7770 ajWritebinNewline(outseq->File);
7771
7772 }
7773
7774 ajStrDel(&tstr);
7775 ajStrDel(&sseq);
7776 AJFREE(seqs);
7777
7778 return;
7779 }
7780
7781
7782
7783
7784 /* @funcstatic seqWriteAsn1 ***************************************************
7785 **
7786 ** Writes a sequence in ASN.1 format.
7787 **
7788 ** @param [u] outseq [AjPSeqout] Sequence output object.
7789 ** @return [void]
7790 **
7791 ** @release 1.0.0
7792 ** @@
7793 ******************************************************************************/
7794
seqWriteAsn1(AjPSeqout outseq)7795 static void seqWriteAsn1(AjPSeqout outseq)
7796 {
7797 static SeqPSeqFormat sf = NULL;
7798
7799 ajFmtPrintF(outseq->File, " seq {\n");
7800 ajFmtPrintF(outseq->File, " id { local id 1 },\n");
7801 ajFmtPrintF(outseq->File, " descr { title \"%S\" },\n",
7802 outseq->Desc);
7803 ajFmtPrintF(outseq->File, " inst {\n");
7804
7805 if(!outseq->Type)
7806 ajFmtPrintF(outseq->File,
7807 " repr raw, mol dna, length %d, "
7808 "topology linear,\n {\n",
7809 ajStrGetLen(outseq->Seq));
7810 else if(ajStrGetCharFirst(outseq->Type) == 'P')
7811 ajFmtPrintF(outseq->File,
7812 " repr raw, mol aa, length %d, "
7813 "topology linear,\n {\n",
7814 ajStrGetLen(outseq->Seq));
7815 else
7816 ajFmtPrintF(outseq->File,
7817 " repr raw, mol dna, length %d, "
7818 "topology linear,\n",
7819 ajStrGetLen(outseq->Seq));
7820
7821 ajFmtPrintF(outseq->File, " seq-data\n");
7822
7823 if(ajStrGetCharFirst(outseq->Type) == 'P')
7824 ajFmtPrintF(outseq->File, " iupacaa \"");
7825 else
7826 ajFmtPrintF(outseq->File, " iupacna \"");
7827
7828 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
7829 sf->linepos = 17;
7830 sf->spacer = 0;
7831 sf->width = 78;
7832 sf->tab = 0;
7833 strcpy(sf->endstr, "\"\n } } ,");
7834
7835 seqWriteSeq(outseq, sf);
7836 seqFormatDel(&sf);
7837
7838 return;
7839 }
7840
7841
7842
7843
7844 /* @funcstatic seqWriteIg *****************************************************
7845 **
7846 ** Writes a sequence in INTELLIGENETICS format.
7847 **
7848 ** @param [u] outseq [AjPSeqout] Sequence output object.
7849 ** @return [void]
7850 **
7851 ** @release 1.0.0
7852 ** @@
7853 ******************************************************************************/
7854
seqWriteIg(AjPSeqout outseq)7855 static void seqWriteIg(AjPSeqout outseq)
7856 {
7857 static SeqPSeqFormat sf = NULL;
7858
7859 if(ajStrGetCharFirst(outseq->Type) == 'P')
7860 ajFmtPrintF(outseq->File, ";%S, %d aa\n",
7861 outseq->Desc, ajStrGetLen(outseq->Seq));
7862 else
7863 ajFmtPrintF(outseq->File, ";%S, %d bases\n",
7864 outseq->Desc, ajStrGetLen(outseq->Seq));
7865
7866 ajWritelineNewline(outseq->File, outseq->Name);
7867
7868 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
7869 strcpy(sf->endstr, "1"); /* linear (DNA) */
7870
7871 seqWriteSeq(outseq, sf);
7872 seqFormatDel(&sf);
7873
7874 return;
7875 }
7876
7877
7878
7879
7880 /* @funcstatic seqWriteIguspto ************************************************
7881 **
7882 ** Writes a sequence in US patent office multi-line INTELLIGENETICS format.
7883 **
7884 ** @param [u] outseq [AjPSeqout] Sequence output object.
7885 ** @return [void]
7886 **
7887 ** @release 6.6.0
7888 ** @@
7889 ******************************************************************************/
7890
seqWriteIguspto(AjPSeqout outseq)7891 static void seqWriteIguspto(AjPSeqout outseq)
7892 {
7893 static SeqPSeqFormat sf = NULL;
7894 const AjPStr tmpstr;
7895 AjIList iter;
7896
7897 if(outseq->Count && !outseq->Single)
7898 ajFmtPrintF(outseq->File, "\n%c\n", '\014');
7899
7900 if(ajListGetLength(outseq->Fulldesc->Multi))
7901 {
7902 iter = ajListIterNewread(outseq->Fulldesc->Multi);
7903 while(!ajListIterDone(iter))
7904 {
7905 tmpstr = ajListIterGet(iter);
7906 ajFmtPrintF(outseq->File, "; %S\n", tmpstr);
7907 }
7908 ajListIterDel(&iter);
7909 }
7910 else
7911 {
7912 if(ajStrGetCharFirst(outseq->Type) == 'P')
7913 ajFmtPrintF(outseq->File, ";%S, %d aa\n",
7914 outseq->Desc, ajStrGetLen(outseq->Seq));
7915 else
7916 ajFmtPrintF(outseq->File, ";%S, %d bases\n",
7917 outseq->Desc, ajStrGetLen(outseq->Seq));
7918 }
7919
7920 ajWritelineNewline(outseq->File, outseq->Name);
7921
7922 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
7923 strcpy(sf->endstr, "1"); /* linear (DNA) */
7924 sf->width = 60;
7925
7926 seqWriteSeq(outseq, sf);
7927 seqFormatDel(&sf);
7928
7929 return;
7930 }
7931
7932
7933
7934
7935 /* @funcstatic seqWriteAcedb **************************************************
7936 **
7937 ** Writes a sequence in ACEDB format.
7938 **
7939 ** @param [u] outseq [AjPSeqout] Sequence output object.
7940 ** @return [void]
7941 **
7942 ** @release 1.0.0
7943 ** @@
7944 ******************************************************************************/
7945
seqWriteAcedb(AjPSeqout outseq)7946 static void seqWriteAcedb(AjPSeqout outseq)
7947 {
7948 static SeqPSeqFormat sf = NULL;
7949
7950 if(ajStrGetCharFirst(outseq->Type) == 'P')
7951 ajFmtPrintF(outseq->File, "Peptide : \"%S\"\n", outseq->Name);
7952 else
7953 ajFmtPrintF(outseq->File, "DNA : \"%S\"\n", outseq->Name);
7954
7955 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
7956 strcpy(sf->endstr, "\n");
7957
7958 seqWriteSeq(outseq, sf);
7959 seqFormatDel(&sf);
7960
7961 return;
7962 }
7963
7964
7965
7966
7967 /* @funcstatic seqWriteDasdna *************************************************
7968 **
7969 ** Writes a sequence in DASDNA XML format.
7970 **
7971 ** @param [u] outseq [AjPSeqout] Sequence output object.
7972 ** @return [void]
7973 **
7974 ** @release 6.1.0
7975 ** @@
7976 ******************************************************************************/
7977
seqWriteDasdna(AjPSeqout outseq)7978 static void seqWriteDasdna(AjPSeqout outseq)
7979 {
7980 ajuint i;
7981 ajuint ilen;
7982 ajuint iend;
7983 ajuint linelen = 50;
7984 AjPStr seq = NULL;
7985
7986 ilen = ajStrGetLen(outseq->Seq);
7987
7988 if(!outseq->Count)
7989 {
7990 outseq->Cleanup = seqCleanDasdna;
7991 ajFmtPrintF(outseq->File,
7992 "<?xml version=\"1.0\" standalone=\"no\"?>\n");
7993 ajFmtPrintF(outseq->File,
7994 "<!DOCTYPE DASDNA SYSTEM "
7995 "\"http://www.biodas.org/dtd/dasdna.dtd\">\n");
7996 ajFmtPrintF(outseq->File,
7997 "<DASDNA>\n");
7998 }
7999
8000
8001 ajFmtPrintF(outseq->File,
8002 " <SEQUENCE id=\"%S\" start=\"%d\" stop=\"%d\" "
8003 "version=\"%S\">\n",
8004 outseq->Name, 1+outseq->Offset,
8005 ilen+outseq->Offset,
8006 outseq->Sv);
8007
8008 ajFmtPrintF(outseq->File,
8009 " <DNA length=\"%d\">\n", ilen);
8010
8011 for(i=0; i < ilen; i += linelen)
8012 {
8013 iend = AJMIN(ilen-1, i+linelen-1);
8014 ajStrAssignSubS(&seq, outseq->Seq, i, iend);
8015 ajFmtPrintF(outseq->File, " %S\n", seq);
8016 }
8017
8018 ajFmtPrintF(outseq->File,
8019 " </DNA>\n");
8020 ajFmtPrintF(outseq->File,
8021 " </SEQUENCE>\n");
8022
8023 ajStrDel(&seq);
8024
8025 return;
8026 }
8027
8028
8029
8030
8031 /* @funcstatic seqCleanDasdna *************************************************
8032 **
8033 ** Writes the remaining lines to complete and close a DASDNA XML file
8034 **
8035 ** @param [u] outseq [AjPSeqout] Sequence output object
8036 ** @return [void]
8037 **
8038 ** @release 6.1.0
8039 ** @@
8040 ******************************************************************************/
8041
8042
seqCleanDasdna(AjPSeqout outseq)8043 static void seqCleanDasdna(AjPSeqout outseq)
8044 {
8045 AjPFile file = outseq->File;
8046 ajFmtPrintF(file,
8047 "</DASDNA>\n");
8048
8049 return;
8050 }
8051
8052
8053
8054
8055 /* @funcstatic seqWriteDasseq *************************************************
8056 **
8057 ** Writes a sequence in DAS SEQUENCE XML format.
8058 **
8059 ** @param [u] outseq [AjPSeqout] Sequence output object.
8060 ** @return [void]
8061 **
8062 ** @release 6.1.0
8063 ** @@
8064 ******************************************************************************/
8065
seqWriteDasseq(AjPSeqout outseq)8066 static void seqWriteDasseq(AjPSeqout outseq)
8067 {
8068 ajuint i;
8069 ajuint ilen;
8070 ajuint iend;
8071 ajuint linelen = 50;
8072 AjPStr seq = NULL;
8073
8074 ilen = ajStrGetLen(outseq->Seq);
8075
8076 if(!outseq->Count)
8077 {
8078 outseq->Cleanup = seqCleanDasseq;
8079 ajFmtPrintF(outseq->File,
8080 "<?xml version=\"1.0\" standalone=\"no\"?>\n");
8081 ajFmtPrintF(outseq->File,
8082 "<!DOCTYPE DASSEQUENCE SYSTEM "
8083 "\"http://www.biodas.org/dtd/dassequence.dtd\">\n");
8084 }
8085
8086
8087 ajFmtPrintF(outseq->File,
8088 "<DASSEQUENCE>\n");
8089
8090 ajFmtPrintF(outseq->File,
8091 " <SEQUENCE id=\"%S\" start=\"%d\" stop=\"%d\"\n",
8092 outseq->Name,
8093 1+outseq->Offset,
8094 ilen+outseq->Offset);
8095 if(ajStrGetCharFirst(outseq->Type) == 'P')
8096 ajFmtPrintF(outseq->File,
8097 " moltype=\"Protein\"",
8098 outseq->Sv);
8099 else
8100 ajFmtPrintF(outseq->File,
8101 " moltype=\"DNA\"");
8102 if(ajStrGetLen(outseq->Sv))
8103 ajFmtPrintF(outseq->File,
8104 " version=\"%S\">\n",
8105 outseq->Sv);
8106 else
8107 ajFmtPrintF(outseq->File,
8108 " version=\"0.0\">\n");
8109
8110 for(i=0; i < ilen; i += linelen)
8111 {
8112 iend = AJMIN(ilen-1, i+linelen-1);
8113 ajStrAssignSubS(&seq, outseq->Seq, i, iend);
8114 ajFmtPrintF(outseq->File, " %S\n", seq);
8115 }
8116
8117 ajFmtPrintF(outseq->File,
8118 " </SEQUENCE>\n");
8119
8120 ajStrDel(&seq);
8121
8122 return;
8123 }
8124
8125
8126
8127
8128 /* @funcstatic seqCleanDasseq *************************************************
8129 **
8130 ** Writes the remaining lines to complete and close a DASDNA XML file
8131 **
8132 ** @param [u] outseq [AjPSeqout] Sequence output object
8133 ** @return [void]
8134 **
8135 ** @release 6.1.0
8136 ** @@
8137 ******************************************************************************/
8138
8139
seqCleanDasseq(AjPSeqout outseq)8140 static void seqCleanDasseq(AjPSeqout outseq)
8141 {
8142 AjPFile file = outseq->File;
8143 ajFmtPrintF(file,
8144 "</DASSEQUENCE>\n");
8145
8146 return;
8147 }
8148
8149
8150
8151
8152 /* @funcstatic seqWriteDebug **************************************************
8153 **
8154 ** Writes a sequence in debug report format.
8155 **
8156 ** @param [u] outseq [AjPSeqout] Sequence output object.
8157 ** @return [void]
8158 **
8159 ** @release 1.0.0
8160 ** @@
8161 ******************************************************************************/
8162
seqWriteDebug(AjPSeqout outseq)8163 static void seqWriteDebug(AjPSeqout outseq)
8164 {
8165 static SeqPSeqFormat sf = NULL;
8166 AjIList it;
8167 AjPStr cur;
8168 AjPSeqRef curref;
8169 AjPSeqXref curxref;
8170 ajuint ilen;
8171 ajuint i;
8172 ajuint j;
8173 ajuint jend;
8174
8175 const char* xreftypes[] = {
8176 "unknown", "DRline", "db_xref",
8177 "ECnumber", "DescTag", "TaxId", "RXline", NULL
8178 };
8179
8180 ajFmtPrintF(outseq->File, "Sequence output trace\n");
8181 ajFmtPrintF(outseq->File, "=====================\n\n");
8182 ajFmtPrintF(outseq->File, " Name: '%S'\n", ajStrConstS(outseq->Name));
8183 ajFmtPrintF(outseq->File, " Accession: '%S'\n", ajStrConstS(outseq->Acc));
8184
8185 if(ajListGetLength(outseq->Acclist))
8186 {
8187 ajFmtPrintF(outseq->File, " Acclist: (%Lu)",
8188 ajListGetLength(outseq->Acclist));
8189 it = ajListIterNewread(outseq->Acclist);
8190
8191 while((cur = (AjPStr) ajListIterGet(it)))
8192 ajFmtPrintF(outseq->File, " %S\n", cur);
8193
8194 ajListIterDel(&it);
8195 ajWritebinNewline(outseq->File);
8196 }
8197
8198 ajFmtPrintF(outseq->File, " SeqVersion: '%S'\n", ajStrConstS(outseq->Sv));
8199 ajFmtPrintF(outseq->File, " GenInfo Id: '%S'\n", ajStrConstS(outseq->Gi));
8200 ajFmtPrintF(outseq->File, " Description: '%S'\n",
8201 ajStrConstS(outseq->Desc));
8202
8203 if(ajListGetLength(outseq->Keylist))
8204 {
8205 ajFmtPrintF(outseq->File, " Keywordlist: (%Lu)\n",
8206 ajListGetLength(outseq->Keylist));
8207 it = ajListIterNewread(outseq->Keylist);
8208
8209 while((cur = (AjPStr) ajListIterGet(it)))
8210 ajFmtPrintF(outseq->File, " '%S'\n", cur);
8211
8212 ajListIterDel(&it);
8213 }
8214
8215 ajFmtPrintF(outseq->File, " Taxonomy: '%S'\n", ajStrConstS(outseq->Tax));
8216 ajFmtPrintF(outseq->File, " Taxcommon: '%S'\n",
8217 ajStrConstS(outseq->Taxcommon));
8218 ajFmtPrintF(outseq->File, " TaxId: '%S'\n", ajStrConstS(outseq->Taxid));
8219 ajFmtPrintF(outseq->File, " Organelle: '%S'\n",
8220 ajStrConstS(outseq->Organelle));
8221
8222 if(ajListGetLength(outseq->Taxlist))
8223 {
8224 ajFmtPrintF(outseq->File, " Taxlist: (%Lu)\n",
8225 ajListGetLength(outseq->Taxlist));
8226 it = ajListIterNewread(outseq->Taxlist);
8227
8228 while((cur = (AjPStr) ajListIterGet(it)))
8229 ajFmtPrintF(outseq->File, " '%S'\n", cur);
8230
8231 ajListIterDel(&it);
8232 }
8233
8234 ajFmtPrintF(outseq->File, " Type: '%S'\n", ajStrConstS(outseq->Type));
8235 ajFmtPrintF(outseq->File, " Output type: '%S'\n",
8236 ajStrConstS(outseq->Outputtype));
8237 ajFmtPrintF(outseq->File, " Molecule: '%S'\n",
8238 ajStrConstS(outseq->Molecule));
8239 ajFmtPrintF(outseq->File, " Class: '%S'\n", ajStrConstS(outseq->Class));
8240 ajFmtPrintF(outseq->File, " Division: '%S'\n",
8241 ajStrConstS(outseq->Division));
8242 ajFmtPrintF(outseq->File, " Database: '%S'\n", ajStrConstS(outseq->Db));
8243 ajFmtPrintF(outseq->File, " Set database: '%S'\n",
8244 ajStrConstS(outseq->Setdb));
8245 ajFmtPrintF(outseq->File, " Output database: '%S'\n",
8246 ajStrConstS(outseq->Setoutdb));
8247 ajFmtPrintF(outseq->File, " Full name: '%S'\n", ajStrConstS(outseq->Full));
8248
8249 if(outseq->Date)
8250 {
8251 if(outseq->Date->CreDate)
8252 ajFmtPrintF(outseq->File,
8253 " Created: '%D' Rel. '%S' Ver. '%S'\n",
8254 outseq->Date->CreDate, outseq->Date->CreRel,
8255 outseq->Date->CreVer);
8256 if(outseq->Date->ModDate)
8257 ajFmtPrintF(outseq->File,
8258 " Modified: '%D' Rel. '%S' Ver. '%S'\n",
8259 outseq->Date->ModDate, outseq->Date->ModRel,
8260 outseq->Date->ModVer);
8261 if(outseq->Date->SeqDate)
8262 ajFmtPrintF(outseq->File,
8263 " Seq modified: '%D' Rel. '%S' Ver. '%S'\n",
8264 outseq->Date->SeqDate, outseq->Date->SeqRel,
8265 outseq->Date->SeqVer);
8266 }
8267
8268 if(ajListGetLength(outseq->Cmtlist))
8269 {
8270 ajFmtPrintF(outseq->File, " Commentlist: (%Lu)\n",
8271 ajListGetLength(outseq->Cmtlist));
8272 it = ajListIterNewread(outseq->Cmtlist);
8273
8274 while((cur = (AjPStr) ajListIterGet(it)))
8275 ajFmtPrintF(outseq->File, " '%S'\n", cur);
8276
8277 ajListIterDel(&it);
8278 }
8279
8280 if(ajListGetLength(outseq->Xreflist))
8281 {
8282 ajFmtPrintF(outseq->File, " Xreflist: (%Lu)\n",
8283 ajListGetLength(outseq->Xreflist));
8284 it = ajListIterNewread(outseq->Xreflist);
8285
8286 i = 0;
8287
8288 while((curxref = (AjPSeqXref) ajListIterGet(it)))
8289 {
8290 if(curxref->Type < XREF_MAX)
8291 ajFmtPrintF(outseq->File, "%3d Type: '%u' %s\n",
8292 i++, curxref->Type, xreftypes[curxref->Type]);
8293 else
8294 ajFmtPrintF(outseq->File, "%3d Type: '%u' ..BAD..\n",
8295 i++, curxref->Type);
8296 ajFmtPrintF(outseq->File, " Db: '%S'\n", curxref->Db);
8297 ajFmtPrintF(outseq->File, " Id: '%S'\n", curxref->Id);
8298 if(ajStrGetLen(curxref->Secid))
8299 ajFmtPrintF(outseq->File, " Secid: '%S'\n", curxref->Secid);
8300 if(ajStrGetLen(curxref->Terid))
8301 ajFmtPrintF(outseq->File, " Terid: '%S'\n", curxref->Terid);
8302 if(ajStrGetLen(curxref->Quatid))
8303 ajFmtPrintF(outseq->File, " Quatid: '%S'\n", curxref->Quatid);
8304 if(curxref->Start || curxref->End)
8305 ajFmtPrintF(outseq->File, " Range: %d..%d\n",
8306 curxref->Start, curxref->End);
8307 }
8308
8309 ajListIterDel(&it);
8310 }
8311
8312 if(ajListGetLength(outseq->Reflist))
8313 {
8314 ajFmtPrintF(outseq->File, " Citationlist: (%Lu)\n",
8315 ajListGetLength(outseq->Reflist));
8316 it = ajListIterNewread(outseq->Reflist);
8317
8318 while((curref = (AjPSeqRef) ajListIterGet(it)))
8319 {
8320 ajFmtPrintF(outseq->File, " Number: %u\n",
8321 curref->Number);
8322
8323 if(ajStrGetLen(curref->Position))
8324 ajFmtPrintF(outseq->File, " Position: '%S'\n",
8325 curref->Position);
8326
8327 if(ajStrGetLen(curref->Groupname))
8328 ajFmtPrintF(outseq->File, " Groupname: '%S'\n",
8329 curref->Groupname);
8330
8331 if(ajStrGetLen(curref->Authors))
8332 ajFmtPrintF(outseq->File, " Authors: '%S'\n",
8333 curref->Authors);
8334
8335 if(ajStrGetLen(curref->Title))
8336 ajFmtPrintF(outseq->File, " Title: '%S'\n",
8337 curref->Title);
8338
8339 if(ajStrGetLen(curref->Comment))
8340 ajFmtPrintF(outseq->File, " Comment: '%S'\n",
8341 curref->Comment);
8342
8343 if(ajStrGetLen(curref->Xref))
8344 ajFmtPrintF(outseq->File, " Xref: '%S'\n",
8345 curref->Xref);
8346
8347 if(ajStrGetLen(curref->Location))
8348 ajFmtPrintF(outseq->File, " Location: '%S'\n",
8349 curref->Location);
8350
8351 if(ajStrGetLen(curref->Loctype))
8352 ajFmtPrintF(outseq->File, " Loctype: '%S'\n",
8353 curref->Loctype);
8354 }
8355
8356 ajListIterDel(&it);
8357 }
8358
8359 ajFmtPrintF(outseq->File, " Usa: '%S'\n", ajStrConstS(outseq->Usa));
8360 ajFmtPrintF(outseq->File, " Ufo: '%S'\n", ajStrConstS(outseq->Ufo));
8361 ajFmtPrintF(outseq->File, " Input format: '%S'\n",
8362 ajStrConstS(outseq->Informatstr));
8363 ajFmtPrintF(outseq->File, " Output format: '%S'\n",
8364 ajStrConstS(outseq->Formatstr));
8365 ajFmtPrintF(outseq->File, " Filename: '%S'\n",
8366 ajStrConstS(outseq->Filename));
8367 ajFmtPrintF(outseq->File, " Output directory: '%S'\n",
8368 ajStrConstS(outseq->Directory));
8369 ajFmtPrintF(outseq->File, " Entryname: '%S'\n",
8370 ajStrConstS(outseq->Entryname));
8371 ajFmtPrintF(outseq->File, " File name: '%F'\n",
8372 outseq->File);
8373 ajFmtPrintF(outseq->File, " Known file name: '%D'\n",
8374 outseq->Knownfile);
8375 ajFmtPrintF(outseq->File, " Extension: '%S'\n",
8376 ajStrConstS(outseq->Extension));
8377 ajFmtPrintF(outseq->File, " Single: '%B'\n", outseq->Single);
8378 ajFmtPrintF(outseq->File, " Features: '%B'\n", outseq->Features);
8379 ajFmtPrintF(outseq->File, " Rev: '%B'\n", outseq->Rev);
8380 ajFmtPrintF(outseq->File, " Circular: '%B'\n", outseq->Circular);
8381 ajFmtPrintF(outseq->File, " Offset: '%d'\n", outseq->Offset);
8382 ajFmtPrintF(outseq->File, " Count: '%d'\n", outseq->Count);
8383 ajFmtPrintF(outseq->File, " Documentation:...\n%S\n",
8384 ajStrConstS(outseq->Doc));
8385 ajFmtPrintF(outseq->File, " Features filename: '%S'\n",
8386 outseq->FtFilename);
8387 ajFmtPrintF(outseq->File, " Features format: '%S'\n",
8388 outseq->FtFormat);
8389 ajFmtPrintF(outseq->File, " Feature table size: '%u'\n",
8390 ajFeattableGetSize(outseq->Fttable));
8391
8392 if(outseq->Accuracy)
8393 {
8394 ilen = ajStrGetLen(outseq->Seq);
8395 ajFmtPrintF(outseq->File, " Accuracy: \n");
8396
8397 for(i=0; i<ilen;i+=20)
8398 {
8399 ajFmtPrintF(outseq->File, " ");
8400 jend = i+20;
8401
8402 if(jend > ilen)
8403 jend = ilen;
8404
8405 for(j=i;j<jend;j++)
8406 ajFmtPrintF(outseq->File, " %2d", (ajint) outseq->Accuracy[j]);
8407
8408 ajWritebinNewline(outseq->File);
8409 }
8410 }
8411
8412 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
8413 sf->numright = ajTrue;
8414 sf->numleft = ajTrue;
8415 sf->numjust = ajTrue;
8416 sf->tab = 1;
8417 sf->spacer = 11;
8418 sf->width = 50;
8419
8420 seqWriteSeq(outseq, sf);
8421 seqFormatDel(&sf);
8422
8423 return;
8424 }
8425
8426
8427
8428
8429 /* @funcstatic seqWriteSeq ****************************************************
8430 **
8431 ** Writes an output sequence. The format and all other information is
8432 ** already stored in the output sequence object and the formatting structure.
8433 **
8434 ** @param [u] outseq [AjPSeqout] Output sequence.
8435 ** @param [r] sf [const SeqPSeqFormat] Output formatting structure.
8436 ** @return [void]
8437 **
8438 ** @release 1.0.0
8439 ** @@
8440 ******************************************************************************/
8441
seqWriteSeq(AjPSeqout outseq,const SeqPSeqFormat sf)8442 static void seqWriteSeq(AjPSeqout outseq, const SeqPSeqFormat sf)
8443 {
8444 /* code adapted from what readseq did */
8445
8446 static ajuint maxSeqWidth = 250;
8447 static const char* defNocountSymbols = "_.-?";
8448
8449 ajuint i = 0;
8450 ajuint j = 0;
8451 ajuint l = 0;
8452 ajuint ibase = 0;
8453 ajuint linesout = 0;
8454 ajuint seqlen;
8455 const char *seq;
8456 const char *idword;
8457 char *cp;
8458 char s[1024]; /* the output line */
8459
8460 char nameform[20];
8461 char numform[20];
8462 char nocountsymbols[20];
8463
8464 ajuint width;
8465 ajuint l1; /* can be negative */
8466
8467 AjPFile file;
8468 FILE* outf;
8469
8470 /*
8471 ajint numline = 0;
8472
8473 ajint namewidth = sf->namewidth;
8474 ajint numwidth = sf->numwidth;
8475 ajint spacer = sf->spacer;
8476 ajint tab = sf->tab;
8477 AjBool nameleft = sf->nameleft;
8478 AjBool nameright = sf->nameright;
8479 AjBool numleft = sf->numleft;
8480 AjBool numright = sf->numright;
8481 AjBool numjust = sf->numjust;
8482 AjBool skipbefore = sf->skipbefore;
8483 AjBool skipafter = sf->skipafter;
8484 AjBool baseonlynum = sf->baseonlynum;
8485 AjBool pretty = sf->pretty;
8486 char *endstr = sf->endstr;
8487 char *leftstr = sf->leftstr;
8488 */
8489
8490 ajDebug("seqWriteSeq\n");
8491
8492
8493 seqlen = ajStrGetLen(outseq->Seq);
8494 seq = ajStrGetPtr(outseq->Seq);
8495 width = sf->width;
8496 l1 = sf->linepos;
8497 file = outseq->File;
8498 outf = ajFileGetFileptr(file);
8499
8500
8501 /* if(sf->numline) numline = 1;*/
8502
8503 if(sf->nameleft || sf->nameright)
8504 sprintf(nameform, "%%%u.%us ",sf->namewidth,sf->namewidth);
8505
8506 if(sf->numline)
8507 sprintf(numform, "%%%us ",sf->numwidth);
8508 else
8509 sprintf(numform, "%%%ud",sf->numwidth);
8510
8511 strcpy( nocountsymbols, defNocountSymbols);
8512
8513 if(sf->baseonlynum)
8514 { /* add gap character to skips */
8515 if(strchr(nocountsymbols,sf->gapchar)==NULL)
8516 {
8517 strcat(nocountsymbols," ");
8518 nocountsymbols[strlen(nocountsymbols)-1]= sf->gapchar;
8519 }
8520
8521 if(sf->domatch && /* remove gap character from skips */
8522 (cp=strchr(nocountsymbols,sf->matchchar))!=NULL)
8523 *cp= ' ';
8524 }
8525
8526 if(sf->numline)
8527 idword= "";
8528 else
8529 idword = ajStrGetPtr(outseq->Name);
8530
8531 width = AJMIN(width,maxSeqWidth);
8532
8533 i=0; /* seqpos position in seq[]*/
8534 l=0; /* linepos position in output line s[] */
8535
8536 ibase = 1; /* base count */
8537
8538 while(i < seqlen)
8539 {
8540
8541 if(l1 == 0)
8542 {
8543 /* start of a new line */
8544 if(sf->skipbefore)
8545 {
8546 fprintf(outf, "\n"); /* blank line before writing */
8547 linesout++;
8548 }
8549
8550 if(*(sf->leftstr))
8551 fprintf(outf, "%s", sf->leftstr); /* string at line start */
8552
8553 if(sf->nameleft)
8554 fprintf(outf, nameform, idword);
8555
8556 if(sf->numleft)
8557 {
8558 if(sf->numline)
8559 fprintf(outf, numform, "");
8560 else
8561 fprintf(outf, numform, ibase);
8562 }
8563
8564 for(j=0; j < sf->tab; j++)
8565 fputc(' ',outf);
8566 }
8567
8568 l1++; /* don't count spaces for width */
8569
8570 if(sf->numline)
8571 {
8572 if(sf->spacer==seqSpaceAll ||
8573 (sf->spacer != 0 && (l+1) % sf->spacer == 1))
8574 {
8575 if(sf->numline)
8576 fputc(' ',outf);
8577
8578 s[l++] = ' ';
8579 }
8580
8581 if(l1 % 10 == 1 || l1 == width)
8582 {
8583 if(sf->numline)
8584 fprintf(outf,"%-9u ",i+1);
8585
8586 s[l++]= '|'; /* == put a number here */
8587 }
8588 else
8589 s[l++]= ' ';
8590
8591 i++;
8592 }
8593 else
8594 {
8595 if(sf->spacer==seqSpaceAll ||
8596 (sf->spacer != 0 && (l+1) % sf->spacer == 1))
8597 s[l++] = ' ';
8598
8599 if(!sf->baseonlynum)
8600 ibase++;
8601 else if(0==strchr(nocountsymbols,seq[i]))
8602 ibase++;
8603
8604 s[l++] = seq[i++];
8605 }
8606
8607 if(l1 == width || i == seqlen)
8608 {
8609 if(sf->pretty || sf->numjust)
8610 for( ; l1<width; l1++)
8611 {
8612 if(sf->spacer==seqSpaceAll ||
8613 (sf->spacer != 0 && (l+1) % sf->spacer == 1))
8614 s[l++] = ' ';
8615
8616 s[l++]=' '; /* pad with blanks */
8617 }
8618
8619 s[l] = '\0';
8620 l = 0; l1 = 0;
8621
8622 if(!sf->numline)
8623 {
8624 fprintf(outf,"%s",s);
8625
8626 if(sf->numright || sf->nameright)
8627 fputc(' ',outf);
8628
8629 if(sf->numright)
8630 fprintf(outf,numform, ibase-1);
8631
8632 if(sf->nameright)
8633 fprintf(outf, nameform,idword);
8634
8635 if(i == seqlen)
8636 fprintf(outf,"%s",sf->endstr);
8637 }
8638
8639 fprintf(outf, "\n");
8640 linesout++;
8641
8642 if(sf->skipafter)
8643 {
8644 fprintf(outf, "\n");
8645 linesout++;
8646 }
8647 }
8648 }
8649
8650 return;
8651 }
8652
8653
8654
8655
8656 /* @funcstatic seqoutUfoLocal *************************************************
8657 **
8658 ** Tests whether a sequence output object will write features to the
8659 ** sequence output file. The alternative is to use a separate UFO.
8660 **
8661 ** @param [r] thys [const AjPSeqout] Sequence output object.
8662 ** @return [AjBool] ajTrue if the features will be written to the sequence
8663 **
8664 ** @release 2.0.0
8665 ** @@
8666 ******************************************************************************/
8667
seqoutUfoLocal(const AjPSeqout thys)8668 static AjBool seqoutUfoLocal(const AjPSeqout thys)
8669 {
8670 ajDebug("seqoutUfoLocal Features %B Ufo %d '%S'\n",
8671 thys->Features, ajStrGetLen(thys->Ufo), thys->Ufo);
8672
8673 if(thys->Features && !ajStrGetLen(thys->Ufo))
8674 return ajTrue;
8675
8676 return ajFalse;
8677 }
8678
8679
8680
8681
8682 /* @funcstatic seqoutUsaProcess ***********************************************
8683 **
8684 ** Converts a USA Universal Sequence Address into an open output file.
8685 **
8686 ** First tests for format:: and sets this if it is found
8687 **
8688 ** Then looks for file:id and opens the file.
8689 ** In this case the file position is not known and sequence reading
8690 ** will have to scan for the entry/entries we need.
8691 **
8692 ** @param [u] thys [AjPSeqout] Sequence output definition.
8693 ** @return [AjBool] ajTrue on success.
8694 **
8695 ** @release 1.0.0
8696 ** @@
8697 ******************************************************************************/
8698
seqoutUsaProcess(AjPSeqout thys)8699 static AjBool seqoutUsaProcess(AjPSeqout thys)
8700 {
8701 AjBool fmtstat;
8702 AjBool regstat;
8703
8704 #ifdef __CYGWIN__
8705 AjPStr usatmp = NULL;
8706 #endif /* __CYGWIN__ */
8707
8708 ajDebug("seqoutUsaProcess\n");
8709 if(!seqoutRegFmt)
8710 #ifndef WIN32
8711 seqoutRegFmt = ajRegCompC("^([A-Za-z0-9-]*)::?(.*)$");
8712 /* \1 format */
8713 /* \2 remainder */
8714 #else /* WIN32 */
8715 /* Windows file names can start with e.g.: 'C:\' */
8716 /* -> Require that format names have at least 2 letters */
8717 seqoutRegFmt = ajRegCompC("^([A-Za-z0-9][A-Za-z0-9-][A-Za-z0-9-]*)"
8718 "::?(.*)$");
8719 /* \1 format */
8720 /* \2 remainder */
8721 #endif /* !WIN32 */
8722
8723
8724 if(!seqoutRegId) /* \1 is filename \3 is the qryid */
8725 seqoutRegId = ajRegCompC("^(.*)$");
8726
8727 ajStrAssignS(&seqoutUsaTest, thys->Usa);
8728
8729 #ifdef __CYGWIN__
8730 if(*(ajStrGetPtr(seqoutUsaTest)+1)==':')
8731 {
8732 usatmp = ajStrNew();
8733 ajFmtPrintS(&usatmp,"/cygdrive/%c/%s",*ajStrGetPtr(seqoutUsaTest),
8734 ajStrGetPtr(seqoutUsaTest)+2);
8735 ajStrAssignRef(&seqoutUsaTest,usatmp);
8736 ajStrDel(&usatmp);
8737 }
8738 #endif /* __CYGWIN__ */
8739
8740 ajDebug("output USA to test: '%S'\n\n", seqoutUsaTest);
8741
8742 fmtstat = ajRegExec(seqoutRegFmt, seqoutUsaTest);
8743 ajDebug("format regexp: %B\n", fmtstat);
8744
8745 if(fmtstat)
8746 {
8747 ajRegSubI(seqoutRegFmt, 1, &thys->Formatstr);
8748 ajStrAssignEmptyC(&thys->Formatstr, seqOutFormat[0].Name);
8749 /* default unknown */
8750
8751 ajRegSubI(seqoutRegFmt, 2, &seqoutUsaTest);
8752 ajDebug("found format %S\n", thys->Formatstr);
8753
8754 if(!seqoutFindOutFormat(thys->Formatstr, &thys->Format))
8755 {
8756 ajDebug("unknown format '%S'\n", thys->Formatstr);
8757
8758 return ajFalse;
8759 }
8760 }
8761 else
8762 ajDebug("no format specified in USA\n");
8763
8764 ajDebug("\n");
8765
8766 regstat = ajRegExec(seqoutRegId, seqoutUsaTest);
8767 ajDebug("file:id regexp: %B\n", regstat);
8768
8769 if(regstat)
8770 {
8771 ajRegSubI(seqoutRegId, 1, &thys->Filename);
8772 ajDebug("found filename %S single: %B dir: '%S'\n",
8773 thys->Filename, thys->Single, thys->Directory);
8774
8775 if(thys->Single)
8776 ajDebug("single output file per sequence, open later\n");
8777 else
8778 {
8779 if(thys->Knownfile)
8780 thys->File = thys->Knownfile;
8781 else
8782 thys->File = ajFileNewOutNamePathS(thys->Filename,
8783 thys->Directory);
8784
8785 if(!thys->File)
8786 {
8787 if(ajStrGetLen(thys->Directory))
8788 ajErr("failed to open filename '%S' in directory '%S'",
8789 thys->Filename, thys->Directory);
8790 else
8791 ajErr("failed to open filename '%S'", thys->Filename);
8792
8793 return ajFalse;
8794 }
8795 }
8796 }
8797 else
8798 ajDebug("no filename specified\n");
8799
8800 ajDebug("\n");
8801
8802 return ajTrue;
8803 }
8804
8805
8806
8807
8808 /* @section sequence output opening *****************************************
8809 **
8810 ** These functions use the contents of a sequence output object and
8811 ** update them.
8812 **
8813 ** @fdata [AjPSeqout]
8814 ** @fcategory modify
8815 **
8816 ** @nam3rule Clear Reset output object
8817 ** @nam4rule ClearUsa Reset output object and set new Usa
8818 ** @nam3rule Close Close output file
8819 ** @nam4rule CloseEmpty Close output file and check it is empty
8820 ** @nam3rule Flush Flush saved sequences to output file and clear list
8821 ** @nam3rule Open Open output file
8822 ** @nam4rule OpenFilename Open named output file
8823 ** @nam3rule Reset Clear ready to accept further sequences
8824 **
8825 ** @argrule * seqout [AjPSeqout] Sequence output object
8826 ** @argrule OpenFilename name [const AjPStr] Output filename
8827 ** @argrule ClearUsa usa [const AjPStr] Uniform sequence address
8828 **
8829 ** @valrule * [void]
8830 ** @valrule *Open [AjBool] True on success
8831 **
8832 ******************************************************************************/
8833
8834
8835
8836
8837 /* @func ajSeqoutClear ********************************************************
8838 **
8839 ** Clears a Sequence output object back to "as new" condition
8840 **
8841 ** @param [u] seqout [AjPSeqout] Sequence output object
8842 ** @category modify [AjPSeqout] Resets ready for reuse.
8843 ** @return [void]
8844 **
8845 ** @release 1.0.0
8846 ** @@
8847 ******************************************************************************/
8848
ajSeqoutClear(AjPSeqout seqout)8849 void ajSeqoutClear(AjPSeqout seqout)
8850 {
8851
8852 AjPStr ptr = NULL;
8853 AjPSeqRef tmpref = NULL;
8854 AjPSeqXref tmpxref = NULL;
8855 AjPSeqGene tmpgene = NULL;
8856
8857 ajDebug("ajSeqoutClear called\n");
8858
8859 ajStrSetClear(&seqout->Name);
8860 ajStrSetClear(&seqout->Acc);
8861 ajStrSetClear(&seqout->Sv);
8862 ajStrSetClear(&seqout->Gi);
8863 ajStrSetClear(&seqout->Tax);
8864 ajStrSetClear(&seqout->Taxcommon);
8865 ajStrSetClear(&seqout->Taxid);
8866 ajStrSetClear(&seqout->Organelle);
8867 ajStrSetClear(&seqout->Desc);
8868 ajStrSetClear(&seqout->Type);
8869 ajStrSetClear(&seqout->Outputtype);
8870 ajStrSetClear(&seqout->Full);
8871 ajStrSetClear(&seqout->Doc);
8872 ajStrSetClear(&seqout->Usa);
8873 ajStrSetClear(&seqout->Ufo);
8874 ajStrSetClear(&seqout->Informatstr);
8875 ajStrSetClear(&seqout->Formatstr);
8876 ajStrSetClear(&seqout->Filename);
8877 ajStrSetClear(&seqout->Directory);
8878 ajStrSetClear(&seqout->Entryname);
8879 ajStrSetClear(&seqout->Extension);
8880 ajStrSetClear(&seqout->Seq);
8881 seqout->EType = 0;
8882 seqout->Rev = ajFalse;
8883 seqout->Format = 0;
8884
8885
8886 if(seqout->File)
8887 {
8888 if(seqout->Cleanup)
8889 (*seqout->Cleanup)(seqout);
8890
8891 if(seqout->Knownfile)
8892 seqout->File = NULL;
8893 else
8894 ajFileClose(&seqout->File);
8895 }
8896
8897 seqout->Cleanup = NULL;
8898
8899 seqout->Count = 0;
8900 seqout->Single = ajFalse;
8901 seqout->Features = ajFalse;
8902
8903 while(ajListstrPop(seqout->Acclist,&ptr))
8904 ajStrDel(&ptr);
8905
8906 while(ajListstrPop(seqout->Keylist,&ptr))
8907 ajStrDel(&ptr);
8908
8909 while(ajListstrPop(seqout->Taxlist,&ptr))
8910 ajStrDel(&ptr);
8911
8912 while(ajListPop(seqout->Genelist,(void **)&tmpgene))
8913 ajSeqgeneDel(&tmpgene);
8914
8915 while(ajListPop(seqout->Reflist,(void **)&tmpref))
8916 ajSeqrefDel(&tmpref);
8917
8918 while(ajListstrPop(seqout->Cmtlist,&ptr))
8919 ajStrDel(&ptr);
8920
8921 while(ajListPop(seqout->Xreflist,(void **)&tmpxref))
8922 ajSeqxrefDel(&tmpxref);
8923
8924 ajSeqdescClear(seqout->Fulldesc);
8925 ajFeattabOutClear(&seqout->Ftquery);
8926 AJCSET0(seqout->Accuracy, seqout->Qualsize);
8927
8928 return;
8929 }
8930
8931
8932
8933
8934 /* @func ajSeqoutClearUsa *****************************************************
8935 **
8936 ** Creates or resets a sequence output object using a new Universal
8937 ** Sequence Address
8938 **
8939 ** @param [u] seqout [AjPSeqout] Sequence output object.
8940 ** @param [r] usa [const AjPStr] USA
8941 ** @return [void]
8942 ** @category modify [AjPSeqout] Resets using a new USA
8943 **
8944 ** @release 5.0.0
8945 ** @@
8946 ******************************************************************************/
8947
ajSeqoutClearUsa(AjPSeqout seqout,const AjPStr usa)8948 void ajSeqoutClearUsa(AjPSeqout seqout, const AjPStr usa)
8949 {
8950 ajSeqoutClear(seqout);
8951
8952 ajStrAssignS(&seqout->Usa, usa);
8953
8954 return;
8955 }
8956
8957
8958
8959
8960 /* @func ajSeqoutClose ********************************************************
8961 **
8962 ** Close a sequence output file. For formats that save everything up
8963 ** and write at the end, call the Write function first.
8964 **
8965 ** @param [u] seqout [AjPSeqout] Sequence output
8966 ** @return [void]
8967 **
8968 ** @release 4.1.0
8969 ** @@
8970 ******************************************************************************/
8971
ajSeqoutClose(AjPSeqout seqout)8972 void ajSeqoutClose(AjPSeqout seqout)
8973 {
8974
8975 ajDebug("ajSeqoutClose '%F'\n", seqout->File);
8976
8977 if(seqOutFormat[seqout->Format].Save)
8978 {
8979 /* Calling funclist seqOutFormat() */
8980 (*seqOutFormat[seqout->Format].Write)(seqout);
8981 }
8982
8983 if(seqout->Cleanup)
8984 (*seqout->Cleanup)(seqout);
8985 seqout->Cleanup = NULL;
8986
8987 if(seqout->File && !seqout->Count)
8988 ajWarn("No sequences written to output file '%F'", seqout->File);
8989
8990 if(seqout->Knownfile)
8991 seqout->File = NULL;
8992 else
8993 ajFileClose(&seqout->File);
8994
8995 return;
8996 }
8997
8998
8999
9000
9001 /* @func ajSeqoutCloseEmpty ***************************************************
9002 **
9003 ** Close a sequence output file with no output.
9004 **
9005 ** Warns if the file has been written to.
9006 **
9007 ** Closes the file so that no warning message is written bu the destructor
9008 **
9009 ** @param [u] seqout [AjPSeqout] Sequence output
9010 ** @return [void]
9011 **
9012 ** @release 6.5.0
9013 ** @@
9014 ******************************************************************************/
9015
ajSeqoutCloseEmpty(AjPSeqout seqout)9016 void ajSeqoutCloseEmpty(AjPSeqout seqout)
9017 {
9018
9019 ajDebug("ajSeqoutCloseEmpty '%F'\n", seqout->File);
9020
9021 if(seqOutFormat[seqout->Format].Save)
9022 {
9023 /* Calling funclist seqOutFormat() */
9024 (*seqOutFormat[seqout->Format].Write)(seqout);
9025 }
9026
9027 if(seqout->Cleanup)
9028 (*seqout->Cleanup)(seqout);
9029 seqout->Cleanup = NULL;
9030
9031 if(seqout->File && seqout->Count)
9032 ajWarn("Not empty file: %d sequence(s) written to output file '%F'",
9033 seqout->Count, seqout->File);
9034
9035 if(seqout->Knownfile)
9036 seqout->File = NULL;
9037 else
9038 ajFileClose(&seqout->File);
9039
9040 return;
9041 }
9042
9043
9044
9045
9046 /* @func ajSeqoutFlush ********************************************************
9047 **
9048 ** Flush output to a sequence output file. For formats that save everything up
9049 ** and write at the end, call the Write function first.
9050 **
9051 ** Do not close the file. It can be reused for more output
9052 **
9053 ** @param [u] seqout [AjPSeqout] Sequence output
9054 ** @return [void]
9055 **
9056 ** @release 6.1.0
9057 ** @@
9058 ******************************************************************************/
9059
ajSeqoutFlush(AjPSeqout seqout)9060 void ajSeqoutFlush(AjPSeqout seqout)
9061 {
9062
9063 ajDebug("ajSeqoutFlush '%F'\n", seqout->File);
9064
9065 if(seqOutFormat[seqout->Format].Save)
9066 {
9067 /* Calling funclist seqOutFormat() */
9068 (*seqOutFormat[seqout->Format].Write)(seqout);
9069 }
9070
9071 seqWriteListClear(seqout);
9072
9073 if(seqout->Cleanup)
9074 (*seqout->Cleanup)(seqout);
9075
9076 return;
9077 }
9078
9079
9080
9081
9082 /* @func ajSeqoutOpen *********************************************************
9083 **
9084 ** If the file is not yet open, calls seqoutUsaProcess to convert the USA into
9085 ** an open output file stream.
9086 **
9087 ** Returns the results in the AjPSeqout object.
9088 **
9089 ** @param [w] seqout [AjPSeqout] Sequence output object.
9090 ** @return [AjBool] ajTrue on success.
9091 ** @category modify [AjPSeqout] If the file is not yet open, calls
9092 ** seqoutUsaProcess
9093 **
9094 ** @release 1.0.0
9095 ** @@
9096 ******************************************************************************/
9097
ajSeqoutOpen(AjPSeqout seqout)9098 AjBool ajSeqoutOpen(AjPSeqout seqout)
9099 {
9100 AjBool ret = ajFalse;
9101
9102 if(seqout->Ftquery)
9103 ajDebug("ajSeqoutOpen dir '%S' qrydir '%S'\n",
9104 seqout->Directory, seqout->Ftquery->Directory);
9105 else
9106 ajDebug("ajSeqoutOpen dir '%S' (no ftquery)\n",
9107 seqout->Directory);
9108
9109 ret = seqoutUsaProcess(seqout);
9110
9111 if(!ret)
9112 return ajFalse;
9113
9114 if(!seqout->Features)
9115 return ret;
9116
9117 ajStrAssignEmptyS(&seqout->Ftquery->Seqname, seqout->Name);
9118 ajFeattabOutSetBasename(seqout->Ftquery, seqout->Filename);
9119 ret = ajFeattabOutSet(seqout->Ftquery, seqout->Ufo);
9120
9121 return ret;
9122 }
9123
9124
9125
9126
9127 /* @func ajSeqoutOpenFilename *************************************************
9128 **
9129 ** Opens an output file for sequence writing. 'stdout' and 'stderr' are
9130 ** special cases using standard output and standard error respectively.
9131 **
9132 ** @param [u] seqout [AjPSeqout] Sequence output object.
9133 ** @param [r] name [const AjPStr] Output filename.
9134 ** @return [AjBool] ajTrue on success.
9135 ** @category modify [AjPSeqout] Opens an output file for sequence
9136 ** writing.
9137 **
9138 ** @release 4.1.0
9139 ** @@
9140 ******************************************************************************/
9141
ajSeqoutOpenFilename(AjPSeqout seqout,const AjPStr name)9142 AjBool ajSeqoutOpenFilename(AjPSeqout seqout, const AjPStr name)
9143 {
9144 AjBool single;
9145 AjBool features;
9146
9147 single = seqout->Single;
9148 features = seqout->Features;
9149
9150 if(ajStrMatchCaseC(name, "stdout"))
9151 single = ajFalse;
9152
9153 if(ajStrMatchCaseC(name, "stderr"))
9154 single = ajFalse;
9155
9156 if(single)
9157 { /* OK, but nothing to open yet */
9158 ajStrAssignEmptyS(&seqout->Extension, seqout->Formatstr);
9159
9160 return ajTrue;
9161 }
9162 else
9163 {
9164 seqout->File = ajFileNewOutNameS(name);
9165
9166 if(seqout->File)
9167 return ajTrue;
9168 }
9169
9170 if(features)
9171 ajWarn("ajSeqFileNewOut features not yet implemented");
9172
9173 return ajFalse;
9174 }
9175
9176
9177
9178
9179 /* @func ajSeqoutReset ********************************************************
9180 **
9181 ** Clears a Sequence output object ready to accept further sequences
9182 **
9183 ** @param [u] seqout [AjPSeqout] Sequence output object
9184 ** @category modify [AjPSeqout] Resets ready for reuse.
9185 ** @return [void]
9186 **
9187 ** @release 6.1.0
9188 ** @@
9189 ******************************************************************************/
9190
ajSeqoutReset(AjPSeqout seqout)9191 void ajSeqoutReset(AjPSeqout seqout)
9192 {
9193
9194 AjPStr ptr = NULL;
9195 AjPSeqRef tmpref = NULL;
9196 AjPSeqXref tmpxref = NULL;
9197 AjPSeqGene tmpgene = NULL;
9198
9199 ajDebug("ajSeqoutClear called\n");
9200
9201 ajStrSetClear(&seqout->Name);
9202 ajStrSetClear(&seqout->Acc);
9203 ajStrSetClear(&seqout->Sv);
9204 ajStrSetClear(&seqout->Gi);
9205 ajStrSetClear(&seqout->Tax);
9206 ajStrSetClear(&seqout->Taxcommon);
9207 ajStrSetClear(&seqout->Taxid);
9208 ajStrSetClear(&seqout->Organelle);
9209 ajStrSetClear(&seqout->Desc);
9210 ajStrSetClear(&seqout->Type);
9211 ajStrSetClear(&seqout->Outputtype);
9212 ajStrSetClear(&seqout->Full);
9213 ajStrSetClear(&seqout->Doc);
9214 ajStrSetClear(&seqout->Usa);
9215 ajStrSetClear(&seqout->Ufo);
9216 ajStrSetClear(&seqout->Informatstr);
9217 ajStrSetClear(&seqout->Formatstr);
9218 ajStrSetClear(&seqout->Filename);
9219 ajStrSetClear(&seqout->Directory);
9220 ajStrSetClear(&seqout->Entryname);
9221 ajStrSetClear(&seqout->Extension);
9222 ajStrSetClear(&seqout->Seq);
9223 seqout->EType = 0;
9224 seqout->Rev = ajFalse;
9225 seqout->Format = 0;
9226
9227
9228 if(seqout->File)
9229 {
9230 if(seqout->Cleanup)
9231 (*seqout->Cleanup)(seqout);
9232
9233 if(seqout->Knownfile)
9234 seqout->File = NULL;
9235 else
9236 ajFileClose(&seqout->File);
9237 }
9238
9239 seqout->Cleanup = NULL;
9240
9241 seqout->Count = 0;
9242 seqout->Single = ajFalse;
9243 seqout->Features = ajFalse;
9244
9245 while(ajListstrPop(seqout->Acclist,&ptr))
9246 ajStrDel(&ptr);
9247
9248 while(ajListstrPop(seqout->Keylist,&ptr))
9249 ajStrDel(&ptr);
9250
9251 while(ajListstrPop(seqout->Taxlist,&ptr))
9252 ajStrDel(&ptr);
9253
9254 while(ajListPop(seqout->Genelist,(void **)&tmpgene))
9255 ajSeqgeneDel(&tmpgene);
9256
9257 while(ajListPop(seqout->Reflist,(void **)&tmpref))
9258 ajSeqrefDel(&tmpref);
9259
9260 while(ajListstrPop(seqout->Cmtlist,&ptr))
9261 ajStrDel(&ptr);
9262
9263 while(ajListPop(seqout->Xreflist,(void **)&tmpxref))
9264 ajSeqxrefDel(&tmpxref);
9265
9266 ajSeqdescClear(seqout->Fulldesc);
9267 ajFeattabOutClear(&seqout->Ftquery);
9268 AJCSET0(seqout->Accuracy, seqout->Qualsize);
9269
9270 return;
9271 }
9272
9273
9274
9275
9276 /* @section set ***************************************************************
9277 **
9278 ** @fdata [AjPSeqout]
9279 ** @fcategory modify
9280 **
9281 ** @nam3rule Set Set values
9282 ** @nam4rule Format Set output format
9283 ** @nam4rule Name Set output name
9284 ** @nam5rule NameDefault Set output name to default if not already set
9285 **
9286 ** @suffix C [char*] C character string
9287 ** @suffix S [AjPStr] string object
9288 **
9289 ** @argrule Set seqout [AjPSeqout] Sequence output object
9290 ** @argrule NameDefault multi [AjBool] True if number is to be appended
9291 ** @argrule C txt [const char*] Format name
9292 ** @argrule S str [const AjPStr] Format name
9293 **
9294 ** @valrule * [AjBool] True on success
9295 **
9296 ******************************************************************************/
9297
9298
9299
9300
9301 /* @func ajSeqoutSetFormatC ***************************************************
9302 **
9303 ** Sets the output format. Currently hard coded but will be replaced
9304 ** in future by a variable.
9305 **
9306 ** @param [u] seqout [AjPSeqout] Sequence output object.
9307 ** @param [r] txt [const char *] Output format.
9308 ** @return [AjBool] ajTrue on success.
9309 **
9310 ** @release 5.0.0
9311 ** @@
9312 ******************************************************************************/
9313
ajSeqoutSetFormatC(AjPSeqout seqout,const char * txt)9314 AjBool ajSeqoutSetFormatC(AjPSeqout seqout, const char* txt)
9315 {
9316 AjPStr fmt = NULL;
9317 AjBool ret;
9318
9319 fmt = ajStrNewC(txt);
9320 ret = ajSeqoutSetFormatS(seqout,fmt);
9321 ajStrDel(&fmt);
9322
9323 return ret;
9324 }
9325
9326
9327
9328
9329 /* @func ajSeqoutSetFormatS ***************************************************
9330 **
9331 ** Sets the output format. Currently hard coded but will be replaced
9332 ** in future by a variable.
9333 **
9334 ** @param [u] seqout [AjPSeqout] Sequence output object.
9335 ** @param [r] str [const AjPStr] Output format.
9336 ** @return [AjBool] ajTrue on success.
9337 **
9338 ** @release 5.0.0
9339 ** @@
9340 ******************************************************************************/
9341
ajSeqoutSetFormatS(AjPSeqout seqout,const AjPStr str)9342 AjBool ajSeqoutSetFormatS(AjPSeqout seqout, const AjPStr str)
9343 {
9344 AjPStr fmt = NULL;
9345
9346 ajDebug("ajSeqoutSetFormatS '%S'\n", str);
9347 ajStrAssignS(&fmt, str);
9348 ajSeqoutstrGetFormatDefault(&fmt);
9349
9350 ajStrAssignEmptyS(&seqout->Formatstr, fmt);
9351 ajDebug("... output format set to '%S'\n", fmt);
9352
9353 ajStrDel(&fmt);
9354
9355 return ajTrue;
9356 }
9357
9358
9359
9360
9361 /* @func ajSeqoutSetNameDefaultC **********************************************
9362 **
9363 ** Provides a unique (for this program run) name for a sequence.
9364 **
9365 ** @param [w] seqout [AjPSeqout] Sequence output object
9366 ** @param [r] multi [AjBool] If true, appends a number to the name.
9367 ** @param [r] txt [const char*] Name set by caller
9368 ** @return [AjBool] True on success
9369 **
9370 ** @release 5.0.0
9371 ** @@
9372 ******************************************************************************/
9373
ajSeqoutSetNameDefaultC(AjPSeqout seqout,AjBool multi,const char * txt)9374 AjBool ajSeqoutSetNameDefaultC(AjPSeqout seqout,
9375 AjBool multi, const char* txt)
9376 {
9377 static ajint count = 0;
9378
9379 if(ajStrGetLen(seqout->Name))
9380 {
9381 ajDebug("ajSeqoutSetNameDefaultC already has a name '%S'\n",
9382 seqout->Name);
9383
9384 return ajFalse;
9385 }
9386
9387 if (*txt)
9388 {
9389 if(multi && count)
9390 ajFmtPrintS(&seqout->Name, "%s_%3.3d", txt, ++count);
9391 else
9392 {
9393 ajStrAssignC(&seqout->Name, txt);
9394 ++count;
9395 }
9396 }
9397 else
9398 {
9399 if(multi)
9400 ajFmtPrintS(&seqout->Name, "EMBOSS_%3.3d", ++count);
9401 else
9402 {
9403 ajStrAssignC(&seqout->Name, "EMBOSS");
9404 ++count;
9405 }
9406 }
9407
9408 ajDebug("ajSeqoutSetNameDefaultC set to '%S'\n", seqout->Name);
9409
9410 return ajTrue;
9411 }
9412
9413
9414
9415
9416 /* @func ajSeqoutSetNameDefaultS **********************************************
9417 **
9418 ** Provides a unique (for this program run) name for a sequence.
9419 **
9420 ** @param [w] seqout [AjPSeqout] Sequence output object
9421 ** @param [r] multi [AjBool] If true, appends a number to the name.
9422 ** @param [r] str [const AjPStr] Name set by caller
9423 ** @return [AjBool] True on success
9424 **
9425 ** @release 5.0.0
9426 ** @@
9427 ******************************************************************************/
9428
ajSeqoutSetNameDefaultS(AjPSeqout seqout,AjBool multi,const AjPStr str)9429 AjBool ajSeqoutSetNameDefaultS(AjPSeqout seqout,
9430 AjBool multi, const AjPStr str)
9431 {
9432 static ajint count = 0;
9433
9434 if(ajStrGetLen(seqout->Name))
9435 {
9436 ajDebug("ajSeqoutSetNameDefaultS already has a name '%S'\n",
9437 seqout->Name);
9438
9439 return ajFalse;
9440 }
9441
9442 if (ajStrGetLen(str))
9443 {
9444 if(multi && count)
9445 ajFmtPrintS(&seqout->Name, "%S_%3.3d", str, ++count);
9446 else
9447 {
9448 ajStrAssignS(&seqout->Name, str);
9449 ++count;
9450 }
9451 }
9452 else
9453 {
9454 if(multi)
9455 ajFmtPrintS(&seqout->Name, "EMBOSS_%3.3d", ++count);
9456 else
9457 {
9458 ajStrAssignC(&seqout->Name, "EMBOSS");
9459 ++count;
9460 }
9461 }
9462
9463 ajDebug("ajSeqoutSetNameDefaultS set to '%S'\n", seqout->Name);
9464
9465 return ajTrue;
9466 }
9467
9468
9469
9470
9471 /* @funcstatic seqDbName ******************************************************
9472 **
9473 ** Adds the database name (if any) to the name provided.
9474 **
9475 ** @param [w] name [AjPStr*] Derived name.
9476 ** @param [r] db [const AjPStr] Database name (if any)
9477 ** @return [void]
9478 **
9479 ** @release 1.0.0
9480 ** @@
9481 ******************************************************************************/
9482
seqDbName(AjPStr * name,const AjPStr db)9483 static void seqDbName(AjPStr* name, const AjPStr db)
9484 {
9485 AjPStr tmpname = NULL;
9486
9487 if(!ajStrGetLen(db))
9488 return;
9489
9490 ajStrAssignS(&tmpname, *name);
9491 ajFmtPrintS(name, "%S:%S", db, tmpname);
9492
9493 ajStrDel(&tmpname);
9494
9495 return;
9496 }
9497
9498
9499
9500
9501 /* @section print *************************************************************
9502 **
9503 ** @fdata [AjPSeqout]
9504 **
9505 ** Print internal details for use by entrails
9506 **
9507 ** @nam3rule Print Print internal details
9508 ** @nam3rule Printbook Print internal details as docbook
9509 ** @nam3rule Printhtml Print internal details as html
9510 ** @nam3rule Printwiki Print internal details as wikitext
9511 ** @nam4rule Format Print details of sequence output formats
9512 **
9513 ** @argrule Print outf [AjPFile] Output file
9514 ** @argrule Printbook outf [AjPFile] Output file
9515 ** @argrule Printhtml outf [AjPFile] Output file
9516 ** @argrule Printwiki outf [AjPFile] Output file
9517 ** @argrule Print full [AjBool] Print full details
9518 **
9519 ** @valrule * [void]
9520 ** @fcategory misc
9521 **
9522 ******************************************************************************/
9523
9524
9525
9526
9527 /* @func ajSeqoutPrintFormat **************************************************
9528 **
9529 ** Reports the internal data structures
9530 **
9531 ** @param [u] outf [AjPFile] Output file
9532 ** @param [r] full [AjBool] Full report (usually ajFalse)
9533 ** @return [void]
9534 **
9535 ** @release 5.0.0
9536 ** @@
9537 ******************************************************************************/
9538
ajSeqoutPrintFormat(AjPFile outf,AjBool full)9539 void ajSeqoutPrintFormat(AjPFile outf, AjBool full)
9540 {
9541
9542 ajint i = 0;
9543
9544 (void) full; /* make it used - no extra detail reported */
9545
9546 ajFmtPrintF(outf, "\n");
9547 ajFmtPrintF(outf, "# Sequence output formats\n");
9548 ajFmtPrintF(outf, "# Alias Alias name\n");
9549 ajFmtPrintF(outf, "# Single: If true, write each sequence to new file\n");
9550 ajFmtPrintF(outf, "# Save: If true, save sequences, write when closed\n");
9551 ajFmtPrintF(outf, "# Nuc Can read nucleotide input\n");
9552 ajFmtPrintF(outf, "# Pro Can read protein input\n");
9553 ajFmtPrintF(outf, "# Feat Can read feature annotation\n");
9554 ajFmtPrintF(outf, "# Gap Can read gap characters\n");
9555 ajFmtPrintF(outf, "# Mset Can read seqsetall (multiple seqsets)\n");
9556 ajFmtPrintF(outf, "# Name Alias Single Save Pro Nuc Feat "
9557 "Gap MSet Description\n");
9558 ajFmtPrintF(outf, "\n");
9559 ajFmtPrintF(outf, "OutFormat {\n");
9560
9561 for(i=0; seqOutFormat[i].Name; i++)
9562 {
9563 ajFmtPrintF(outf,
9564 " %-15s %3B %3B %3B %3B %3B %3B %3B %3B \"%s\"\n",
9565 seqOutFormat[i].Name,
9566 seqOutFormat[i].Alias,
9567 seqOutFormat[i].Single,
9568 seqOutFormat[i].Save,
9569 seqOutFormat[i].Nucleotide,
9570 seqOutFormat[i].Protein,
9571 seqOutFormat[i].Feature,
9572 seqOutFormat[i].Gap,
9573 seqOutFormat[i].Multiset,
9574 seqOutFormat[i].Desc);
9575 }
9576
9577 ajFmtPrintF(outf, "}\n\n");
9578
9579 return;
9580 }
9581
9582
9583
9584
9585 /* @func ajSeqoutPrintbookFormat **********************************************
9586 **
9587 ** Reports the internal data structures as EMBOSS book format
9588 **
9589 ** @param [u] outf [AjPFile] Output file
9590 ** @return [void]
9591 **
9592 ** @release 6.2.0
9593 ** @@
9594 ******************************************************************************/
9595
ajSeqoutPrintbookFormat(AjPFile outf)9596 void ajSeqoutPrintbookFormat(AjPFile outf)
9597 {
9598
9599 ajint i = 0;
9600 ajint j = 0;
9601 AjPStr namestr = NULL;
9602 AjPList fmtlist;
9603 AjPStr* names;
9604
9605 fmtlist = ajListstrNew();
9606
9607 ajFmtPrintF(outf, "<para>The supported sequence formats are summarised "
9608 "in the table below. "
9609 "The columns are as follows: "
9610 "<emphasis>Input format</emphasis> (format name), "
9611 "<emphasis>Output format</emphasis> (format name), "
9612 "<emphasis>Sngl</emphasis> "
9613 "(indicates whether each sequence is written to a new file. "
9614 "This behaviour is the default and can be set by the "
9615 "<option>-ossingle</option> command line qualifier. "
9616 "<emphasis>Save</emphasis> (indicates that sequence data is "
9617 "stored internally and written when the output is closed. "
9618 "This is needed for 'interleaved' formats such as Phylip "
9619 "and MSF), <emphasis>Try</emphasis> (indicates whether the "
9620 "format can be detected automatically on input), "
9621 "<emphasis>Nuc</emphasis> (\"true\" indicates nucleotide "
9622 "sequence data may be represented), <emphasis>Pro</emphasis> "
9623 "(\"true\" indicates protein sequence data may be represented, "
9624 "<emphasis>Feat</emphasis> (whether the format includes "
9625 "feature annotation data. "
9626 "EMBOSS can also read feature data from a separate "
9627 "feature file). "
9628 "<emphasis>Gap</emphasis> (whether the format supports "
9629 "sequence data with gap characters, for example the results "
9630 "of an alignment), "
9631 "<emphasis>Mset</emphasis> (\"true\" indicates that more "
9632 "than one set of sequences can be stored in a single file. "
9633 "This is used by, for example, phylogenetic analysis "
9634 "applications to store many versions of a multiple alignment "
9635 "for statistical analysis) and "
9636 "<emphasis>Description</emphasis> (short description of "
9637 "the format).</para> \n");
9638
9639
9640 ajFmtPrintF(outf, "<table frame=\"box\" rules=\"cols\">\n");
9641 ajFmtPrintF(outf, " <caption>Output sequence formats</caption>\n");
9642 ajFmtPrintF(outf, " <thead>\n");
9643 ajFmtPrintF(outf, " <tr align=\"center\">\n");
9644 ajFmtPrintF(outf, " <th>Input Format</th>\n");
9645 ajFmtPrintF(outf, " <th>Sngl</th>\n");
9646 ajFmtPrintF(outf, " <th>Save</th>\n");
9647 ajFmtPrintF(outf, " <th>Nuc</th>\n");
9648 ajFmtPrintF(outf, " <th>Pro</th>\n");
9649 ajFmtPrintF(outf, " <th>Feat</th>\n");
9650 ajFmtPrintF(outf, " <th>Gap</th>\n");
9651 ajFmtPrintF(outf, " <th>Multi</th>\n");
9652 ajFmtPrintF(outf, " <th>Description</th>\n");
9653 ajFmtPrintF(outf, " </tr>\n");
9654 ajFmtPrintF(outf, " </thead>\n");
9655 ajFmtPrintF(outf, " <tbody>\n");
9656
9657 for(i=1; seqOutFormat[i].Name; i++)
9658 {
9659 if(!seqOutFormat[i].Alias)
9660 {
9661 namestr = ajStrNewC(seqOutFormat[i].Name);
9662 ajListPush(fmtlist, namestr);
9663 namestr = NULL;
9664 }
9665 }
9666
9667 ajListSort(fmtlist, &ajStrVcmp);
9668 ajListstrToarray(fmtlist, &names);
9669
9670 for(i=0; names[i]; i++)
9671 {
9672 for(j=0; seqOutFormat[j].Name; j++)
9673 {
9674 if(ajStrMatchC(names[i],seqOutFormat[j].Name))
9675 {
9676 ajFmtPrintF(outf, " <tr>\n");
9677 ajFmtPrintF(outf, " <td>%s</td>\n",
9678 seqOutFormat[j].Name);
9679 ajFmtPrintF(outf, " <td>%B</td>\n",
9680 seqOutFormat[j].Single);
9681 ajFmtPrintF(outf, " <td>%B</td>\n",
9682 seqOutFormat[j].Save);
9683 ajFmtPrintF(outf, " <td>%B</td>\n",
9684 seqOutFormat[j].Nucleotide);
9685 ajFmtPrintF(outf, " <td>%B</td>\n",
9686 seqOutFormat[j].Protein);
9687 ajFmtPrintF(outf, " <td>%B</td>\n",
9688 seqOutFormat[j].Feature);
9689 ajFmtPrintF(outf, " <td>%B</td>\n",
9690 seqOutFormat[j].Gap);
9691 ajFmtPrintF(outf, " <td>%B</td>\n",
9692 seqOutFormat[j].Multiset);
9693 ajFmtPrintF(outf, " <td>%s</td>\n",
9694 seqOutFormat[j].Desc);
9695 ajFmtPrintF(outf, " </tr>\n");
9696 }
9697 }
9698 }
9699
9700 ajFmtPrintF(outf, " </tbody>\n");
9701 ajFmtPrintF(outf, "</table>\n");
9702 ajStrDel(&namestr);
9703
9704 return;
9705 }
9706
9707
9708
9709
9710 /* @func ajSeqoutPrinthtmlFormat **********************************************
9711 **
9712 ** Reports the internal data structures as wikitext
9713 **
9714 ** @param [u] outf [AjPFile] Output file
9715 ** @return [void]
9716 **
9717 ** @release 6.2.0
9718 ** @@
9719 ******************************************************************************/
9720
ajSeqoutPrinthtmlFormat(AjPFile outf)9721 void ajSeqoutPrinthtmlFormat(AjPFile outf)
9722 {
9723
9724 ajint i = 0;
9725 ajint j = 0;
9726 AjPStr namestr = NULL;
9727
9728 ajFmtPrintF(outf, "<table border=3>");
9729 ajFmtPrintF(outf, "<tr><th>Output Format</th>\n");
9730 ajFmtPrintF(outf, "<th>Single</th><th>Save</th>\n");
9731 ajFmtPrintF(outf, "<th>Nuc</th><th>Pro</th><th>Feat</th><th>Gap</th>\n");
9732 ajFmtPrintF(outf, "<th>Multi</th><th>Description</th></tr>\n");
9733
9734 for(i=1; seqOutFormat[i].Name; i++)
9735 {
9736 if(!seqOutFormat[i].Alias)
9737 {
9738 ajStrAssignC(&namestr, seqOutFormat[i].Name);
9739
9740 for(j=i+1; seqOutFormat[j].Name; j++)
9741 {
9742 if(seqOutFormat[j].Write == seqOutFormat[i].Write)
9743 {
9744 ajFmtPrintAppS(&namestr, "<br>%s", seqOutFormat[j].Name);
9745 if(!seqOutFormat[j].Alias)
9746 {
9747 ajWarn("Output format '%s' same as '%s' but not alias",
9748 seqOutFormat[j].Name, seqOutFormat[i].Name);
9749 }
9750 }
9751 }
9752
9753 ajFmtPrintF(outf, "<tr><td>\n%S\n</td><td>%B</td><td>%B</td>\n",
9754 namestr,
9755 seqOutFormat[i].Single,
9756 seqOutFormat[i].Save);
9757 ajFmtPrintF(outf, "<td>%B</td><td>%B</td><td>%B</td><td>%B</td>\n",
9758 seqOutFormat[i].Nucleotide,
9759 seqOutFormat[i].Protein,
9760 seqOutFormat[i].Feature,
9761 seqOutFormat[i].Gap);
9762 ajFmtPrintF(outf, "<td>%B</td><td>\n%s\n</td></tr>\n",
9763 seqOutFormat[i].Multiset,
9764 seqOutFormat[i].Desc);
9765 }
9766 }
9767
9768
9769 ajFmtPrintF(outf, "</table>\n");
9770 ajStrDel(&namestr);
9771
9772 return;
9773 }
9774
9775
9776
9777
9778 /* @func ajSeqoutPrintwikiFormat **********************************************
9779 **
9780 ** Reports the internal data structures as wikitext
9781 **
9782 ** @param [u] outf [AjPFile] Output file
9783 ** @return [void]
9784 **
9785 ** @release 6.2.0
9786 ** @@
9787 ******************************************************************************/
9788
ajSeqoutPrintwikiFormat(AjPFile outf)9789 void ajSeqoutPrintwikiFormat(AjPFile outf)
9790 {
9791
9792 ajint i = 0;
9793 ajint j = 0;
9794 AjPStr namestr = NULL;
9795
9796 ajFmtPrintF(outf, "{| class=\"wikitable sortable\" border=\"2\"\n");
9797 ajFmtPrintF(outf, "|-\n");
9798 ajFmtPrintF(outf, "!Format!!Sngl!!Save!!Nuc!!Pro!!Feat!!Gap!!MSet!!"
9799 "class=\"unsortable\"|Description\n");
9800
9801 for(i=1; seqOutFormat[i].Name; i++)
9802 {
9803 if(!seqOutFormat[i].Alias)
9804 {
9805 ajStrAssignC(&namestr, seqOutFormat[i].Name);
9806
9807 for(j=i+1; seqOutFormat[j].Name; j++)
9808 {
9809 if(seqOutFormat[j].Write == seqOutFormat[i].Write)
9810 {
9811 ajFmtPrintAppS(&namestr, "<br>%s", seqOutFormat[j].Name);
9812 if(!seqOutFormat[j].Alias)
9813 {
9814 ajWarn("Sequence output format '%s' same as '%s' "
9815 "but not alias",
9816 seqOutFormat[j].Name, seqOutFormat[i].Name);
9817 }
9818 }
9819 }
9820
9821 ajFmtPrintF(outf, "|-\n");
9822 ajFmtPrintF(outf,
9823 "|%S||%B||%B||%B||%B||%B||%B||%B||%s\n",
9824 namestr,
9825 seqOutFormat[i].Single,
9826 seqOutFormat[i].Save,
9827 seqOutFormat[i].Nucleotide,
9828 seqOutFormat[i].Protein,
9829 seqOutFormat[i].Feature,
9830 seqOutFormat[i].Gap,
9831 seqOutFormat[i].Multiset,
9832 seqOutFormat[i].Desc);
9833 }
9834 }
9835
9836
9837 ajFmtPrintF(outf, "|}\n\n");
9838 ajStrDel(&namestr);
9839
9840 return;
9841 }
9842
9843
9844
9845
9846 /* @funcstatic seqSeqFormat ***************************************************
9847 **
9848 ** Initialises sequence output formatting parameters.
9849 **
9850 ** @param [r] seqlen [ajint] Sequence length
9851 ** @param [u] Psf [SeqPSeqFormat*] Sequence format object
9852 ** @return [void]
9853 **
9854 ** @release 1.0.0
9855 ** @@
9856 ******************************************************************************/
9857
seqSeqFormat(ajint seqlen,SeqPSeqFormat * Psf)9858 static void seqSeqFormat(ajint seqlen, SeqPSeqFormat* Psf)
9859 {
9860 char numform[20];
9861 SeqPSeqFormat sf;
9862 ajint i;
9863 ajint j;
9864
9865 j = 1;
9866
9867 for(i = seqlen; i; i /= 10)
9868 j++;
9869
9870 sprintf(numform, "%d", seqlen);
9871 ajDebug("seqSeqFormat numwidth old: %d new: %d\n", strlen(numform)+1, j);
9872
9873 if(!*Psf)
9874 {
9875 sf = AJNEW0(*Psf);
9876 sf->namewidth = 8;
9877 sf->spacer = 0;
9878 sf->width = 50;
9879 sf->tab = 0;
9880 sf->numleft = ajFalse;
9881 sf->numright = sf->numleft = sf->numjust = ajFalse;
9882 sf->nameright = sf->nameleft = ajFalse;
9883 sf->numline = 0;
9884 sf->linepos = 0;
9885
9886 sf->skipbefore = ajFalse;
9887 sf->skipafter = ajFalse;
9888 sf->isactive = ajFalse;
9889 sf->baseonlynum = ajFalse;
9890 sf->gapchar = '-';
9891 sf->matchchar = '.';
9892 sf->noleaves = sf->domatch = sf->degap = ajFalse;
9893 sf->pretty = ajFalse;
9894 strcpy(sf->endstr, "");
9895 /*sf->interline = 1;*/
9896 }
9897 else
9898 sf = *Psf;
9899
9900 sf->numwidth = j; /* or 8 as a reasonable minimum */
9901
9902 return;
9903 }
9904
9905
9906
9907
9908 /* ==================================================================== */
9909 /* ============================ Casts ================================= */
9910 /* ==================================================================== */
9911
9912
9913
9914
9915 /* @section Sequence Output Casts *********************************************
9916 **
9917 ** @fdata [AjPSeqout]
9918 **
9919 ** These functions examine the contents of a sequence output object
9920 ** and return some derived information. Some of them provide access to
9921 ** the internal components of a sequence output object. They are
9922 ** provided for programming convenience but should be used with
9923 ** caution.
9924 **
9925 ** @nam3rule Get Return an element or property
9926 ** @nam4rule Basecount Counts of nucleotide bases
9927 ** @nam4rule Checkgcg GCG checksum
9928 ** @nam4rule Filename Output filename
9929 **
9930 ** @argrule Get seqout [const AjPSeqout] Sequence output object
9931 ** @argrule Basecount bases [ajuint*] Base counts
9932 **
9933 ** @valrule Checkgcg [ajint] GCG checksum
9934 ** @valrule Basecount [void]
9935 ** @valrule Filename [const AjPStr] Output filename
9936 **
9937 ** @fcategory cast
9938 **
9939 ******************************************************************************/
9940
9941
9942
9943
9944 /* @func ajSeqoutGetBasecount *************************************************
9945 **
9946 ** Counts the numbers of A, C, G and T in a nucleotide sequence.
9947 **
9948 ** @param [r] seqout [const AjPSeqout] Sequence output object
9949 ** @param [w] bases [ajuint*] Integer array, minimum size 5,
9950 ** to hold the results.
9951 ** @return [void]
9952 **
9953 ** @release 5.0.0
9954 ** @@
9955 ******************************************************************************/
9956
ajSeqoutGetBasecount(const AjPSeqout seqout,ajuint * bases)9957 void ajSeqoutGetBasecount(const AjPSeqout seqout, ajuint* bases)
9958 {
9959 const char* cp;
9960
9961 ajDebug("ajSeqoutGetBasecount %d bases\n", ajStrGetLen(seqout->Seq));
9962
9963 bases[0] = bases[1] = bases[2] = bases[3] = bases[4] = 0;
9964
9965 cp = ajStrGetPtr(seqout->Seq);
9966
9967 while(*cp)
9968 {
9969 switch (*cp)
9970 {
9971 case 'A':
9972 case 'a':
9973 bases[0]++;
9974 break;
9975 case 'C':
9976 case 'c':
9977 bases[1]++;
9978 break;
9979 case 'G':
9980 case 'g':
9981 bases[2]++;
9982 break;
9983 case 'T':
9984 case 't':
9985 case 'U':
9986 case 'u':
9987 bases[3]++;
9988 break;
9989 default:
9990 break;
9991 }
9992 cp++;
9993 }
9994
9995 bases[4] = ajStrGetLen(seqout->Seq) -
9996 bases[0] - bases[1] - bases[2] - bases[3];
9997
9998 return;
9999 }
10000
10001
10002
10003
10004 /* @func ajSeqoutGetCheckgcg **************************************************
10005 **
10006 ** Calculates a GCG checksum for an output sequence.
10007 **
10008 ** @param [r] seqout [const AjPSeqout] Output sequence.
10009 ** @return [ajint] GCG checksum.
10010 ** @category cast [AjPSeqout] Calculates the GCG checksum for a
10011 ** sequence set.
10012 **
10013 ** @release 5.0.0
10014 ** @@
10015 ******************************************************************************/
10016
ajSeqoutGetCheckgcg(const AjPSeqout seqout)10017 ajint ajSeqoutGetCheckgcg(const AjPSeqout seqout)
10018 {
10019 ajlong i;
10020 ajlong check = 0;
10021 ajlong count = 0;
10022 const char *cp;
10023 ajint ilen;
10024
10025 cp = ajStrGetPtr(seqout->Seq);
10026 ilen = ajStrGetLen(seqout->Seq);
10027
10028 for(i = 0; i < ilen; i++)
10029 {
10030 count++;
10031 check += count * toupper((ajint) cp[i]);
10032
10033 if(count == 57)
10034 count = 0;
10035 }
10036 check %= 10000;
10037
10038 return (ajint) check;
10039 }
10040
10041
10042
10043
10044 /* @func ajSeqoutGetFilename **************************************************
10045 **
10046 ** Returns the filename for a sequence output object
10047 **
10048 ** @param [r] seqout [const AjPSeqout] Sequence output object
10049 ** @return [const AjPStr] Filename
10050 **
10051 **
10052 ** @release 6.1.0
10053 ******************************************************************************/
10054
ajSeqoutGetFilename(const AjPSeqout seqout)10055 const AjPStr ajSeqoutGetFilename(const AjPSeqout seqout)
10056 {
10057 if(!seqout)
10058 return NULL;
10059
10060 return ajFileGetPrintnameS(seqout->File);
10061 }
10062
10063
10064
10065
10066 /* @funcstatic seqClone *******************************************************
10067 **
10068 ** Copies data from a sequence into a sequence output object.
10069 ** Used before writing the sequence. This version works with sequence streams.
10070 ** The difference is that the output object must be overwritten.
10071 **
10072 ** @param [u] outseq [AjPSeqout] Sequence output.
10073 ** @param [r] seq [const AjPSeq] Sequence.
10074 ** @return [void]
10075 **
10076 ** @release 1.0.0
10077 ** @@
10078 ******************************************************************************/
10079
seqClone(AjPSeqout outseq,const AjPSeq seq)10080 static void seqClone(AjPSeqout outseq, const AjPSeq seq)
10081 {
10082
10083 ajint ibegin = 1;
10084 ajint iend;
10085 ajint ilen;
10086 AjPSeqRef tmpref = NULL;
10087 AjPSeqXref tmpxref = NULL;
10088 AjBool seqrange = ajFalse;
10089
10090 ajDebug("seqClone out Setdb '%S' Db '%S' seq Setdb '%S' Db '%S'\n",
10091 outseq->Setdb, outseq->Db,
10092 seq->Setdb, seq->Db);
10093
10094 iend = MAJSTRGETLEN(seq->Seq);
10095
10096 if(seq->Begin)
10097 {
10098 ibegin = ajSeqGetBegin(seq);
10099 if(ibegin > 1)
10100 seqrange = ajTrue;
10101 ajDebug("seqClone begin: %d range: %B\n", ibegin, seqrange);
10102 }
10103
10104 if(seq->End)
10105 {
10106 iend = ajSeqGetEnd(seq);
10107 if((ajuint) iend < ajSeqGetLen(seq))
10108 seqrange = ajTrue;
10109 ajDebug("seqClone end: %d range: %B\n", iend, seqrange);
10110 }
10111
10112 ajDebug("seqClone outseq->Type '%S' seq->Type '%S'\n",
10113 outseq->Type, seq->Type);
10114
10115
10116 if(MAJSTRGETLEN(seq->Setdb))
10117 ajStrAssignRef(&outseq->Setdb, seq->Setdb);
10118
10119 /* replace this with anything from -osdbname which takes precedence */
10120 if(MAJSTRGETLEN(outseq->Setoutdb))
10121 ajStrAssignRef(&outseq->Setdb, outseq->Setoutdb);
10122
10123 if(MAJSTRGETLEN(seq->Molecule))
10124 ajStrAssignRef(&outseq->Molecule, seq->Molecule);
10125 if(MAJSTRGETLEN(seq->Class))
10126 ajStrAssignRef(&outseq->Class, seq->Class);
10127 if(MAJSTRGETLEN(seq->Division))
10128 ajStrAssignRef(&outseq->Division, seq->Division);
10129 if(MAJSTRGETLEN(seq->Evidence))
10130 ajStrAssignRef(&outseq->Evidence, seq->Evidence);
10131 if(MAJSTRGETLEN(seq->Db))
10132 ajStrAssignRef(&outseq->Db, seq->Db);
10133
10134 if(MAJSTRGETLEN(seq->Name))
10135 ajStrAssignRef(&outseq->Name, seq->Name);
10136 if(MAJSTRGETLEN(seq->Acc))
10137 ajStrAssignRef(&outseq->Acc, seq->Acc);
10138
10139 if(outseq->Acclist)
10140 ajListstrFreeData(&outseq->Acclist);
10141 if(seq->Acclist)
10142 outseq->Acclist = ajListstrNewList(seq->Acclist);
10143
10144 if(MAJSTRGETLEN(seq->Sv))
10145 ajStrAssignRef(&outseq->Sv, seq->Sv);
10146 if(MAJSTRGETLEN(seq->Gi))
10147 ajStrAssignRef(&outseq->Gi, seq->Gi);
10148
10149 if(MAJSTRGETLEN(seq->Tax))
10150 ajStrAssignRef(&outseq->Tax, seq->Tax);
10151 if(MAJSTRGETLEN(seq->Taxcommon))
10152 ajStrAssignRef(&outseq->Taxcommon, seq->Taxcommon);
10153 if(MAJSTRGETLEN(seq->Taxid))
10154 ajStrAssignRef(&outseq->Taxid, seq->Taxid);
10155 if(MAJSTRGETLEN(seq->Organelle))
10156 ajStrAssignRef(&outseq->Organelle, seq->Organelle);
10157
10158 if(outseq->Taxlist)
10159 ajListstrFreeData(&outseq->Taxlist);
10160 if(seq->Taxlist)
10161 outseq->Taxlist = ajListstrNewList(seq->Taxlist);
10162
10163 if(outseq->Keylist)
10164 ajListstrFreeData(&outseq->Keylist);
10165 if(seq->Keylist)
10166 outseq->Keylist = ajListstrNewList(seq->Keylist);
10167
10168 if(outseq->Cmtlist)
10169 ajListstrFreeData(&outseq->Cmtlist);
10170 if(seq->Cmtlist)
10171 outseq->Cmtlist = ajListstrNewList(seq->Cmtlist);
10172
10173 if(outseq->Reflist)
10174 {
10175 while(ajListPop(outseq->Reflist,(void **)&tmpref))
10176 ajSeqrefDel(&tmpref);
10177
10178 if(seq->Reflist)
10179 ajSeqreflistClone(seq->Reflist, outseq->Reflist);
10180 }
10181 else if(seq->Reflist)
10182 {
10183 outseq->Reflist = ajListNew();
10184 ajSeqreflistClone(seq->Reflist, outseq->Reflist);
10185 }
10186
10187 if(outseq->Xreflist)
10188 {
10189 while(ajListPop(outseq->Xreflist,(void **)&tmpxref))
10190 ajSeqxrefDel(&tmpxref);
10191
10192 if (seq->Xreflist)
10193 ajSeqxreflistClone(seq->Xreflist, outseq->Xreflist);
10194 }
10195 else if (seq->Xreflist)
10196 {
10197 outseq->Xreflist = ajListNew();
10198 ajSeqxreflistClone(seq->Xreflist, outseq->Xreflist);
10199 }
10200
10201 if(outseq->Genelist)
10202 {
10203 if(seq->Genelist)
10204 ajSeqgenelistClone(seq->Genelist, outseq->Genelist);
10205 }
10206 else if(seq->Genelist)
10207 {
10208 outseq->Genelist = ajListstrNew();
10209 ajSeqgenelistClone(seq->Genelist, outseq->Genelist);
10210 }
10211
10212 if(MAJSTRGETLEN(seq->Desc))
10213 ajStrAssignRef(&outseq->Desc, seq->Desc);
10214 if(MAJSTRGETLEN(seq->Type))
10215 ajStrAssignRef(&outseq->Type, seq->Type);
10216 if(MAJSTRGETLEN(seq->Formatstr))
10217 ajStrAssignRef(&outseq->Informatstr, seq->Formatstr);
10218 if(MAJSTRGETLEN(seq->Entryname))
10219 ajStrAssignRef(&outseq->Entryname, seq->Entryname);
10220
10221 if(outseq->Date)
10222 ajSeqdateDel(&outseq->Date);
10223
10224 if(seq->Date)
10225 outseq->Date = ajSeqdateNewDate(seq->Date);
10226
10227 if(outseq->Fulldesc)
10228 ajSeqdescDel(&outseq->Fulldesc);
10229 if(seq->Fulldesc)
10230 outseq->Fulldesc = ajSeqdescNewDesc(seq->Fulldesc);
10231
10232 if(seq->Accuracy)
10233 {
10234 ilen = MAJSTRGETLEN(seq->Seq);
10235 AJCRESIZE(outseq->Accuracy, ilen);
10236 outseq->Qualsize = ilen;
10237 memmove(outseq->Accuracy,seq->Accuracy+ibegin-1,ilen*sizeof(float));
10238 }
10239 else
10240 {
10241 if(outseq->Accuracy)
10242 AJCSET0(outseq->Accuracy,outseq->Qualsize);
10243 }
10244
10245 outseq->Offset = ibegin - 1;
10246
10247 if(iend >= ibegin)
10248 {
10249 if(seqrange)
10250 ajStrAssignSubS(&outseq->Seq, seq->Seq, ibegin-1, iend-1);
10251 else
10252 ajStrAssignRef(&outseq->Seq, seq->Seq);
10253 }
10254 else /* empty sequence */
10255 ajStrAssignClear(&outseq->Seq);
10256
10257 outseq->Fttable = seq->Fttable;
10258
10259 outseq->Rev = seq->Rev;
10260 outseq->Circular = seq->Circular;
10261
10262 if(outseq->Fttable)
10263 {
10264 if(seq->Rev)
10265 ajFeattableSetReverse(outseq->Fttable);
10266 if(seq->Circular)
10267 ajFeattableSetCircular(outseq->Fttable);
10268 if(seq->Begin || seq->End)
10269 ajFeattableSetRange(outseq->Fttable, ibegin, iend);
10270 }
10271
10272 ajDebug("seqClone %d .. %d %d .. %d len: %d type: '%S'\n",
10273 seq->Begin, seq->End, ibegin, iend,
10274 MAJSTRGETLEN(outseq->Seq), outseq->Type);
10275 ajDebug(" Db: '%S' Name: '%S' Entryname: '%S'\n",
10276 outseq->Db, outseq->Name, outseq->Entryname);
10277
10278 ajSeqTypeCheckS(&outseq->Seq, outseq->Outputtype);
10279
10280 return;
10281 }
10282
10283
10284
10285
10286 /* @funcstatic seqsetClone ****************************************************
10287 **
10288 ** Clones one sequence from a set ready for output.
10289 **
10290 ** @param [u] outseq [AjPSeqout] Sequence output.
10291 ** @param [r] seqset [const AjPSeqset] Sequence set.
10292 ** @param [r] i [ajint] Sequence number, zero for the first sequence.
10293 ** @return [void]
10294 **
10295 ** @release 1.0.0
10296 ** @@
10297 ******************************************************************************/
10298
seqsetClone(AjPSeqout outseq,const AjPSeqset seqset,ajint i)10299 static void seqsetClone(AjPSeqout outseq, const AjPSeqset seqset, ajint i)
10300 {
10301 /* intended to clone ith sequence in the set */
10302 AjPSeq seq;
10303
10304 seq = seqset->Seq[i];
10305
10306 seqClone(outseq, seq);
10307
10308 return;
10309 }
10310
10311
10312
10313
10314 /* @funcstatic seqDeclone *****************************************************
10315 **
10316 ** Clears cloned data in a sequence output object.
10317 **
10318 ** @param [u] outseq [AjPSeqout] Sequence output.
10319 ** @return [void]
10320 **
10321 ** @release 1.0.0
10322 ** @@
10323 ******************************************************************************/
10324
seqDeclone(AjPSeqout outseq)10325 static void seqDeclone(AjPSeqout outseq)
10326 {
10327 AjPStr ptr = NULL;
10328 AjPSeqRef tmpref = NULL;
10329 AjPSeqXref tmpxref = NULL;
10330 AjPSeqGene tmpgene = NULL;
10331
10332 if(MAJSTRGETLEN(outseq->Db))
10333 ajStrSetClear(&outseq->Db);
10334 if(MAJSTRGETLEN(outseq->Setdb))
10335 ajStrSetClear(&outseq->Setdb);
10336 if(MAJSTRGETLEN(outseq->Name))
10337 ajStrSetClear(&outseq->Name);
10338 if(MAJSTRGETLEN(outseq->Acc))
10339 ajStrSetClear(&outseq->Acc);
10340 if(MAJSTRGETLEN(outseq->Sv))
10341 ajStrSetClear(&outseq->Sv);
10342 if(MAJSTRGETLEN(outseq->Gi))
10343 ajStrSetClear(&outseq->Gi);
10344 if(MAJSTRGETLEN(outseq->Tax))
10345 ajStrSetClear(&outseq->Tax);
10346 if(MAJSTRGETLEN(outseq->Taxcommon))
10347 ajStrSetClear(&outseq->Taxcommon);
10348 if(MAJSTRGETLEN(outseq->Taxid))
10349 ajStrSetClear(&outseq->Taxid);
10350 if(MAJSTRGETLEN(outseq->Organelle))
10351 ajStrSetClear(&outseq->Organelle);
10352 if(MAJSTRGETLEN(outseq->Desc))
10353 ajStrSetClear(&outseq->Desc);
10354 if(MAJSTRGETLEN(outseq->Type))
10355 ajStrSetClear(&outseq->Type);
10356 if(MAJSTRGETLEN(outseq->Informatstr))
10357 ajStrSetClear(&outseq->Informatstr);
10358 if(MAJSTRGETLEN(outseq->Entryname))
10359 ajStrSetClear(&outseq->Entryname);
10360
10361 if(outseq->Date)
10362 ajSeqdateDel(&outseq->Date);
10363
10364 if(outseq->Acclist)
10365 while(ajListstrPop(outseq->Acclist,&ptr))
10366 ajStrDel(&ptr);
10367
10368 if(outseq->Keylist)
10369 while(ajListstrPop(outseq->Keylist,&ptr))
10370 ajStrDel(&ptr);
10371
10372 if(outseq->Taxlist)
10373 while(ajListstrPop(outseq->Taxlist,&ptr))
10374 ajStrDel(&ptr);
10375
10376 if(outseq->Cmtlist)
10377 while(ajListstrPop(outseq->Cmtlist,&ptr))
10378 ajStrDel(&ptr);
10379
10380 if(outseq->Xreflist)
10381 while(ajListPop(outseq->Xreflist,(void **)&tmpxref))
10382 ajSeqxrefDel(&tmpxref);
10383
10384 if(outseq->Genelist)
10385 while(ajListPop(outseq->Genelist,(void **)&tmpgene))
10386 ajSeqgeneDel(&tmpgene);
10387
10388 if(outseq->Reflist)
10389 while(ajListPop(outseq->Reflist,(void **)&tmpref))
10390 ajSeqrefDel(&tmpref);
10391
10392 if(MAJSTRGETLEN(outseq->Seq))
10393 ajStrSetClear(&outseq->Seq);
10394
10395 if(outseq->Fulldesc)
10396 ajSeqdescClear(outseq->Fulldesc);
10397
10398 AJCSET0(outseq->Accuracy, outseq->Qualsize);
10399
10400 return;
10401 }
10402
10403
10404
10405
10406 /* @funcstatic seqFileReopen **************************************************
10407 **
10408 ** Reopen a sequence output file. Used after the file name has been changed
10409 ** when writing a set of sequences one to each file.
10410 **
10411 ** @param [u] outseq [AjPSeqout] Sequence output object.
10412 ** @return [AjBool] ajTrue on success
10413 **
10414 ** @release 1.0.0
10415 ** @@
10416 ******************************************************************************/
10417
seqFileReopen(AjPSeqout outseq)10418 static AjBool seqFileReopen(AjPSeqout outseq)
10419 {
10420 AjPStr name = NULL;
10421
10422 if(outseq->File)
10423 ajFileClose(&outseq->File);
10424
10425 if(outseq->Knownfile)
10426 outseq->Knownfile = NULL;
10427
10428 ajFmtPrintS(&name, "%S.%S", outseq->Name, outseq->Extension);
10429 ajStrFmtLower(&name);
10430 outseq->File = ajFileNewOutNamePathS(name, outseq->Directory);
10431 ajDebug("seqFileReopen single: %B file '%S'\n", outseq->Single, name);
10432 ajStrDel(&name);
10433
10434 if(!outseq->File)
10435 return ajFalse;
10436
10437 return ajTrue;
10438 }
10439
10440
10441
10442
10443 /* @section debugging *********************************************************
10444 **
10445 ** Functions for reporting of a string object.
10446 **
10447 ** @fdata [AjPSeqout]
10448 **
10449 ** @nam3rule Trace Report string elements to debug file
10450 **
10451 ** @argrule Trace seqout [const AjPSeqout] String
10452 **
10453 ** @valrule * [void]
10454 **
10455 ** @fcategory misc
10456 */
10457
10458
10459
10460
10461 /* @func ajSeqoutTrace ********************************************************
10462 **
10463 ** Debug calls to trace the data in a sequence object.
10464 **
10465 ** @param [r] seqout [const AjPSeqout] Sequence output object.
10466 ** @return [void]
10467 **
10468 ** @release 1.0.0
10469 ** @@
10470 ******************************************************************************/
10471
ajSeqoutTrace(const AjPSeqout seqout)10472 void ajSeqoutTrace(const AjPSeqout seqout)
10473 {
10474 AjIList it;
10475 AjPStr cur;
10476
10477 ajDebug("\n\n\nSequence Out trace\n");
10478 ajDebug( "==============\n\n");
10479 ajDebug( " Name: '%S'\n", seqout->Name);
10480
10481 if(ajStrGetLen(seqout->Acc))
10482 ajDebug( " Accession: '%S'\n", seqout->Acc);
10483
10484 if(ajListGetLength(seqout->Acclist))
10485 {
10486 ajDebug(" Acclist: (%Lu)",
10487 ajListGetLength(seqout->Acclist));
10488 it = ajListIterNewread(seqout->Acclist);
10489
10490 while((cur = (AjPStr) ajListIterGet(it)))
10491 ajDebug(" %S\n", cur);
10492
10493 ajListIterDel(&it);
10494 ajDebug("\n");
10495 }
10496
10497 if(ajStrGetLen(seqout->Sv))
10498 ajDebug( " SeqVersion: '%S'\n", seqout->Sv);
10499
10500 if(ajStrGetLen(seqout->Gi))
10501 ajDebug( " GenInfo Id: '%S'\n", seqout->Gi);
10502
10503 if(ajStrGetLen(seqout->Desc))
10504 ajDebug( " Description: '%S'\n", seqout->Desc);
10505
10506 if(ajStrGetRes(seqout->Seq))
10507 ajDebug( " Reserved: %d\n", ajStrGetRes(seqout->Seq));
10508
10509 if(ajListGetLength(seqout->Keylist))
10510 {
10511 ajDebug(" Keywordlist: (%Lu)",
10512 ajListGetLength(seqout->Keylist));
10513 it = ajListIterNewread(seqout->Keylist);
10514
10515 while((cur = (AjPStr) ajListIterGet(it)))
10516 ajDebug(" '%S'\n", cur);
10517
10518 ajListIterDel(&it);
10519 ajDebug("\n");
10520 }
10521
10522 ajDebug(" Taxonomy: '%S'\n", seqout->Tax);
10523 ajDebug(" Taxcommon: '%S'\n", seqout->Taxcommon);
10524 ajDebug(" Taxid: '%S'\n", seqout->Taxid);
10525 ajDebug(" Organelle: '%S'\n", seqout->Organelle);
10526
10527 if(ajListGetLength(seqout->Taxlist))
10528 {
10529 ajDebug(" Taxlist: (%Lu)",
10530 ajListGetLength(seqout->Taxlist));
10531 it = ajListIterNewread(seqout->Taxlist);
10532
10533 while((cur = (AjPStr) ajListIterGet(it)))
10534 ajDebug(" '%S'\n", cur);
10535
10536 ajListIterDel(&it);
10537 }
10538
10539 if(ajStrGetLen(seqout->Type))
10540 ajDebug( " Type: '%S'\n", seqout->Type);
10541
10542 if(ajStrGetLen(seqout->Outputtype))
10543 ajDebug( " Output type: '%S'\n", seqout->Outputtype);
10544
10545 if(ajStrGetLen(seqout->Db))
10546 ajDebug( " Database: '%S'\n", seqout->Db);
10547
10548 if(ajStrGetLen(seqout->Full))
10549 ajDebug( " Full name: '%S'\n", seqout->Full);
10550
10551 if(ajStrGetLen(seqout->Usa))
10552 ajDebug( " Usa: '%S'\n", seqout->Usa);
10553
10554 if(ajStrGetLen(seqout->Ufo))
10555 ajDebug( " Ufo: '%S'\n", seqout->Ufo);
10556
10557 if(ajStrGetLen(seqout->Formatstr))
10558 ajDebug( " Output format: '%S'\n", seqout->Formatstr);
10559
10560 if(ajStrGetLen(seqout->Filename))
10561 ajDebug( " Filename: '%S'\n", seqout->Filename);
10562
10563 if(ajStrGetLen(seqout->Directory))
10564 ajDebug( " Directory: '%S'\n", seqout->Directory);
10565
10566 if(ajStrGetLen(seqout->Entryname))
10567 ajDebug( " Entryname: '%S'\n", seqout->Entryname);
10568
10569 if(ajStrGetLen(seqout->Doc))
10570 ajDebug( " Documentation:...\n%S\n", seqout->Doc);
10571
10572 if(seqout->Fttable)
10573 ajFeattableTrace(seqout->Fttable);
10574 else
10575 ajDebug( " No Feature table present\n");
10576
10577 if(seqout->Features)
10578 ajDebug( " Features ON\n");
10579 else
10580 ajDebug( " Features OFF\n");
10581
10582 return;
10583 }
10584
10585
10586
10587
10588 /* @funcstatic seqFormatDel ***************************************************
10589 **
10590 ** Delete a sequence format object
10591 **
10592 ** @param [d] pformat [SeqPSeqFormat*] Sequence format
10593 ** @return [void]
10594 **
10595 ** @release 4.0.0
10596 ******************************************************************************/
10597
seqFormatDel(SeqPSeqFormat * pformat)10598 static void seqFormatDel(SeqPSeqFormat* pformat)
10599 {
10600 AJFREE(*pformat);
10601
10602 return;
10603 }
10604
10605
10606
10607
10608 /* @section exit **************************************************************
10609 **
10610 ** Functions called on exit
10611 **
10612 ** @fdata [AjPSeqout]
10613 **
10614 ** @nam3rule Exit Cleanup of internals when program exits
10615 **
10616 ** @valrule * [void]
10617 ** @fcategory misc
10618 **
10619 ******************************************************************************/
10620
10621
10622
10623
10624 /* @func ajSeqoutExit *********************************************************
10625 **
10626 ** Cleans up sequence output processing internal memory
10627 **
10628 ** @return [void]
10629 **
10630 ** @release 5.0.0
10631 ** @@
10632 ******************************************************************************/
10633
ajSeqoutExit(void)10634 void ajSeqoutExit(void)
10635 {
10636 ajRegFree(&seqoutRegFmt);
10637 ajRegFree(&seqoutRegId);
10638
10639 ajStrDel(&seqoutUsaTest);
10640
10641 return;
10642 }
10643
10644
10645
10646
10647 /* @datasection [AjPStr] Sequence output strings ******************************
10648 **
10649 ** Function is for manipulating sequence output objects
10650 **
10651 ** @nam2rule Seqoutstr
10652 **
10653 ******************************************************************************/
10654
10655
10656
10657
10658 /* @section other *******************************************************
10659 **
10660 ** @fdata [AjPStr]
10661 ** @fcategory use
10662 **
10663 ** @nam3rule Get Return value
10664 ** @nam3rule Is Test value
10665 ** @nam4rule Format Sequence format names
10666 ** @nam5rule FormatDefault Return name of default output format
10667 ** @nam5rule Exists Test format exists
10668 ** @nam5rule Single Test format writes to single or multiple files
10669 **
10670 ** @argrule GetFormatDefault Pformat [AjPStr*] Default output format name
10671 ** @argrule IsFormat format [const AjPStr] Output format required
10672 **
10673 ** @valrule FormatDefault [AjBool] True on success
10674 ** @valrule FormatExists [AjBool] True format is defined
10675 ** @valrule FormatSingle [AjBool] True if single files are written
10676 **
10677 ******************************************************************************/
10678
10679
10680
10681
10682 /* @func ajSeqoutstrGetFormatDefault ******************************************
10683 **
10684 ** Sets the default output format.
10685 ** Checks the _OUTFORMAT variable,
10686 ** and uses FASTA if no other definition is found.
10687 **
10688 ** @param [w] Pformat [AjPStr*] Default output format.
10689 ** @return [AjBool] ajTrue on success.
10690 **
10691 ** @release 5.0.0
10692 ** @@
10693 ******************************************************************************/
10694
ajSeqoutstrGetFormatDefault(AjPStr * Pformat)10695 AjBool ajSeqoutstrGetFormatDefault(AjPStr* Pformat)
10696 {
10697
10698 if(ajStrGetLen(*Pformat))
10699 ajDebug("... output format '%S'\n", *Pformat);
10700 else
10701 {
10702 /* ajStrAssignEmptyC(pformat, seqOutFormat[0].Name);*/
10703 if (ajNamGetValueC("outformat", Pformat))
10704 ajDebug("ajSeqoutstrGetFormatDefault '%S' from EMBOSS_OUTFORMAT\n",
10705 *Pformat);
10706 else
10707 {
10708 ajStrAssignEmptyC(Pformat, "fasta"); /* use the real name */
10709 ajDebug("... output format not set, default to '%S'\n", *Pformat);
10710 }
10711 }
10712
10713 return ajTrue;
10714 }
10715
10716
10717
10718
10719 /* @funcstatic seqoutFindOutFormat ********************************************
10720 **
10721 ** Looks for the specified output format in the internal definitions and
10722 ** returns the index.
10723 **
10724 ** @param [r] format [const AjPStr] Format required.
10725 ** @param [w] iformat [ajint*] Index
10726 ** @return [AjBool] ajTrue on success.
10727 **
10728 ** @release 5.0.0
10729 ** @@
10730 ******************************************************************************/
10731
seqoutFindOutFormat(const AjPStr format,ajint * iformat)10732 static AjBool seqoutFindOutFormat(const AjPStr format, ajint* iformat)
10733 {
10734
10735 AjPStr tmpformat = NULL;
10736 ajint i = 0;
10737
10738 if(!ajStrGetLen(format))
10739 {
10740 if (ajNamGetValueC("outformat", &tmpformat))
10741 ajDebug("seqoutFindOutFormat '%S' from EMBOSS_OUTFORMAT\n",
10742 tmpformat);
10743 else
10744 return ajFalse;
10745
10746 }
10747 else
10748 ajStrAssignS(&tmpformat, format);
10749
10750 ajStrFmtLower(&tmpformat);
10751
10752 while(seqOutFormat[i].Name)
10753 {
10754 if(ajStrMatchCaseC(tmpformat, seqOutFormat[i].Name))
10755 {
10756 *iformat = i;
10757 ajStrDel(&tmpformat);
10758
10759 return ajTrue;
10760 }
10761
10762 i++;
10763 }
10764
10765 ajStrDel(&tmpformat);
10766
10767 return ajFalse;
10768 }
10769
10770
10771
10772
10773 /* @func ajSeqoutstrIsFormatExists ********************************************
10774 **
10775 ** Checks whether an output format name exists.
10776 **
10777 ** @param [r] format [const AjPStr] Output format required.
10778 ** @return [AjBool] ajTrue if format is known.
10779 **
10780 ** @release 5.0.0
10781 ** @@
10782 ******************************************************************************/
10783
ajSeqoutstrIsFormatExists(const AjPStr format)10784 AjBool ajSeqoutstrIsFormatExists(const AjPStr format)
10785 {
10786 ajint iformat;
10787
10788 if(!seqoutFindOutFormat(format, &iformat))
10789 return ajFalse;
10790
10791 return ajTrue;
10792 }
10793
10794
10795
10796
10797 /* @func ajSeqoutstrIsFormatSingle ********************************************
10798 **
10799 ** Checks whether an output format should go to single files, rather than
10800 ** all sequences being written to one file. Some formats do not work when
10801 ** more than one sequence is written to a file. Obvious examples are plain
10802 ** text and GCG formats.
10803 **
10804 ** @param [r] format [const AjPStr] Output format required.
10805 ** @return [AjBool] ajTrue if separate file is needed for each sequence.
10806 **
10807 ** @release 5.0.0
10808 ** @@
10809 ******************************************************************************/
10810
ajSeqoutstrIsFormatSingle(const AjPStr format)10811 AjBool ajSeqoutstrIsFormatSingle(const AjPStr format)
10812 {
10813 ajint iformat;
10814
10815 if(!seqoutFindOutFormat(format, &iformat))
10816 {
10817 ajDebug("ajSeqoutstrIsFormatSingle: unknown output format '%S'",
10818 format);
10819
10820 return ajFalse;
10821 }
10822
10823 return seqOutFormat[iformat].Single;
10824 }
10825
10826
10827
10828
10829 #ifdef AJ_COMPILE_DEPRECATED_BOOK
10830 #endif /* AJ_COMPILE_DEPRECATED_BOOK */
10831
10832
10833
10834
10835 #ifdef AJ_COMPILE_DEPRECATED
10836 /* @obsolete ajSeqoutNewF
10837 ** @rename ajSeqoutNewFile
10838 */
ajSeqoutNewF(AjPFile file)10839 __deprecated AjPSeqout ajSeqoutNewF(AjPFile file)
10840 {
10841 return ajSeqoutNewFile(file);
10842 }
10843
10844
10845
10846
10847 /* @obsolete ajSeqWrite
10848 ** @rename ajSeqoutWriteSeq
10849 */
ajSeqWrite(AjPSeqout outseq,const AjPSeq seq)10850 __deprecated void ajSeqWrite(AjPSeqout outseq, const AjPSeq seq)
10851 {
10852 ajSeqoutWriteSeq(outseq, seq);
10853 return;
10854 }
10855
10856
10857
10858
10859 /* @obsolete ajSeqWriteXyz
10860 ** @replace ajSeqoutDumpSwisslike (1,2,3/2,1,3)
10861 */
10862
ajSeqWriteXyz(AjPFile outf,const AjPStr seq,const char * prefix)10863 __deprecated void ajSeqWriteXyz(AjPFile outf, const AjPStr seq,
10864 const char *prefix)
10865 {
10866 AjPSeqout outseq;
10867 outseq = ajSeqoutNewFile(outf);
10868 ajSeqoutDumpSwisslike(outseq, seq, prefix);
10869 ajSeqoutDel(&outseq);
10870
10871 return;
10872 }
10873
10874
10875
10876
10877 /* @obsolete ajSeqoutUsa
10878 ** @rename ajSeqoutClearUsa
10879 */
10880
ajSeqoutUsa(AjPSeqout * pthis,const AjPStr Usa)10881 __deprecated void ajSeqoutUsa(AjPSeqout* pthis, const AjPStr Usa)
10882 {
10883 ajSeqoutClearUsa(*pthis, Usa);
10884
10885 return;
10886 }
10887
10888
10889
10890
10891 /* @obsolete ajSeqWriteClose
10892 ** @rename ajSeqoutClose
10893 */
ajSeqWriteClose(AjPSeqout outseq)10894 __deprecated void ajSeqWriteClose(AjPSeqout outseq)
10895 {
10896 ajSeqoutClose(outseq);
10897 }
10898
10899
10900
10901
10902 /* @obsolete ajSeqFileNewOut
10903 ** @rename ajSeqoutOpenFilename
10904 */
ajSeqFileNewOut(AjPSeqout seqout,const AjPStr name)10905 __deprecated AjBool ajSeqFileNewOut(AjPSeqout seqout, const AjPStr name)
10906 {
10907 return ajSeqoutOpenFilename(seqout, name);
10908 }
10909
10910
10911
10912
10913 /* @obsolete ajSeqsetWrite
10914 ** @rename ajSeqoutWriteSet
10915 */
ajSeqsetWrite(AjPSeqout outseq,const AjPSeqset seq)10916 __deprecated void ajSeqsetWrite(AjPSeqout outseq, const AjPSeqset seq)
10917 {
10918 ajSeqoutWriteSet(outseq,seq);
10919
10920 return;
10921 }
10922
10923
10924
10925
10926 /* @obsolete ajSeqOutSetFormatC
10927 ** @rename ajSeqoutSetFormatC
10928 */
10929
ajSeqOutSetFormatC(AjPSeqout thys,const char * txt)10930 __deprecated AjBool ajSeqOutSetFormatC(AjPSeqout thys, const char* txt)
10931 {
10932 return ajSeqoutSetFormatC(thys, txt);
10933 }
10934
10935
10936
10937
10938 /* @obsolete ajSeqOutSetFormat
10939 ** @rename ajSeqoutSetFormatS
10940 */
10941
ajSeqOutSetFormat(AjPSeqout seqout,const AjPStr format)10942 __deprecated AjBool ajSeqOutSetFormat(AjPSeqout seqout, const AjPStr format)
10943 {
10944 return ajSeqoutSetFormatS(seqout, format);
10945 }
10946
10947
10948
10949
10950 /* @obsolete ajSeqoutDefName
10951 ** @replace ajSeqoutSetNameDefaultS (1,2,3/1,3,2)
10952 */
10953
ajSeqoutDefName(AjPSeqout thys,const AjPStr setname,AjBool multi)10954 __deprecated void ajSeqoutDefName(AjPSeqout thys,
10955 const AjPStr setname, AjBool multi)
10956 {
10957 ajSeqoutSetNameDefaultS(thys, multi, setname);
10958
10959 return;
10960 }
10961
10962
10963
10964
10965 /* @obsolete ajSeqPrintOutFormat
10966 ** @rename ajSeqoutPrintFormat
10967 */
10968
ajSeqPrintOutFormat(AjPFile outf,AjBool full)10969 __deprecated void ajSeqPrintOutFormat(AjPFile outf, AjBool full)
10970 {
10971 ajSeqoutPrintFormat(outf, full);
10972
10973 return;
10974 }
10975
10976
10977
10978
10979 /* @obsolete ajSeqoutCount
10980 ** @rename ajSeqoutGetBasecount
10981 */
10982
ajSeqoutCount(const AjPSeqout seqout,ajuint * b)10983 __deprecated void ajSeqoutCount(const AjPSeqout seqout, ajuint* b)
10984 {
10985 ajSeqoutGetBasecount(seqout, b);
10986
10987 return;
10988 }
10989
10990
10991
10992
10993 /* @obsolete ajSeqoutCheckGcg
10994 ** @rename ajSeqoutGetCheckgcg
10995 */
10996
ajSeqoutCheckGcg(const AjPSeqout seqout)10997 __deprecated ajint ajSeqoutCheckGcg(const AjPSeqout seqout)
10998 {
10999 return ajSeqoutGetCheckgcg(seqout);
11000 }
11001
11002
11003
11004
11005 /* @obsolete ajSeqWriteExit
11006 ** @rename ajSeqoutExit
11007 */
11008
ajSeqWriteExit(void)11009 __deprecated void ajSeqWriteExit(void)
11010 {
11011 ajSeqoutExit();
11012
11013 return;
11014 }
11015
11016
11017
11018
11019 /* @obsolete ajSeqOutFormatDefault
11020 ** @rename ajSeqoutstrGetFormatDefault
11021 */
11022
ajSeqOutFormatDefault(AjPStr * pformat)11023 __deprecated AjBool ajSeqOutFormatDefault(AjPStr* pformat)
11024 {
11025 return ajSeqoutstrGetFormatDefault(pformat);
11026 }
11027
11028
11029
11030
11031 /* @obsolete ajSeqFindOutFormat
11032 ** @rename seqoutFindOutFormat
11033 */
11034
ajSeqFindOutFormat(const AjPStr format,ajint * iformat)11035 __deprecated AjBool ajSeqFindOutFormat(const AjPStr format, ajint* iformat)
11036 {
11037 return seqoutFindOutFormat(format, iformat);
11038 }
11039
11040
11041
11042
11043 /* @obsolete ajSeqAllWrite
11044 ** @rename ajSeqoutWriteSeq
11045 */
ajSeqAllWrite(AjPSeqout outseq,const AjPSeq seq)11046 __deprecated void ajSeqAllWrite(AjPSeqout outseq, const AjPSeq seq)
11047 {
11048 ajSeqoutWriteSeq(outseq, seq);
11049
11050 return;
11051 }
11052
11053
11054
11055
11056 /* @obsolete ajSssWriteXyz
11057 ** @remove No longer called ... see ajSeqWriteSwisslike
11058 */
11059
ajSssWriteXyz(AjPFile outf,const AjPStr seq,const char * prefix)11060 __deprecated void ajSssWriteXyz(AjPFile outf, const AjPStr seq,
11061 const char *prefix)
11062 {
11063 AjPSeqout outseq = NULL;
11064 static SeqPSeqFormat sf = NULL;
11065
11066 outseq = ajSeqoutNew();
11067
11068 outseq->File = outf;
11069 ajStrAssignS(&outseq->Seq,seq);
11070
11071 ajFmtPrintF(outseq->File,
11072 "%-5sSEQUENCE %5d AA;\n",
11073 prefix, ajStrGetLen(outseq->Seq));
11074
11075 seqSeqFormat(ajStrGetLen(outseq->Seq), &sf);
11076 strcpy(sf->endstr, "");
11077 sf->tab = 4;
11078 sf->spacer = 11;
11079 sf->width = 60;
11080
11081 seqWriteSeq(outseq, sf);
11082 seqFormatDel(&sf);
11083
11084 return;
11085 }
11086
11087
11088
11089
11090 /* @obsolete ajSeqOutFormatSingle
11091 ** @rename ajSeqoutstrIsFormatSingle
11092 */
11093
ajSeqOutFormatSingle(AjPStr format)11094 __deprecated AjBool ajSeqOutFormatSingle(AjPStr format)
11095 {
11096 return ajSeqoutstrIsFormatSingle(format);
11097 }
11098
11099 #endif /* AJ_COMPILE_DEPRECATED */
11100