1 /* @include ajseqdata *********************************************************
2 **
3 ** AJAX SEQ (sequence) data structures
4 **
5 ** @author Copyright (C) 1998 Peter Rice
6 ** @version $Revision: 1.82 $
7 ** @modified Jun 25 pmr First version
8 ** @modified $Date: 2012/12/07 10:09:13 $ by $Author: rice $
9 ** @@
10 **
11 ** This library is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU Lesser General Public
13 ** License as published by the Free Software Foundation; either
14 ** version 2.1 of the License, or (at your option) any later version.
15 **
16 ** This library is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ** Lesser General Public License for more details.
20 **
21 ** You should have received a copy of the GNU Lesser General Public
22 ** License along with this library; if not, write to the Free Software
23 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 ** MA  02110-1301,  USA.
25 **
26 ******************************************************************************/
27 
28 #ifndef AJSEQDATA_H
29 #define AJSEQDATA_H
30 
31 /* ========================================================================= */
32 /* ============================= include files ============================= */
33 /* ========================================================================= */
34 
35 #include "ajdefine.h"
36 #include "ajtextdata.h"
37 #include "ajfeatdata.h"
38 #include "ajtime.h"
39 
40 AJ_BEGIN_DECLS
41 
42 
43 
44 
45 /* ========================================================================= */
46 /* =============================== constants =============================== */
47 /* ========================================================================= */
48 
49 
50 
51 
52 /* ========================================================================= */
53 /* ============================== public data ============================== */
54 /* ========================================================================= */
55 
56 
57 
58 
59 /* @enum AjEXrefType **********************************************************
60 **
61 ** Enumerated cross-reference type
62 **
63 ** @value XREF_UNKNOWN type not defined
64 ** @value XREF_DR      DR line in EMBL, SwissProt
65 ** @value XREF_DBXREF  db_xref in EMBL/Genbank
66 ** @value XREF_EC      EC= in SwissProt
67 ** @value XREF_DESC    Allergen= and CD_Antigen= in SwissProt DE
68 ** @value XREF_TAX     NCBI_TaxID
69 ** @value XREF_RX      RX line in EMBL or SwissProt
70 ** @value XREF_MAX     Beyond last defined value
71 ******************************************************************************/
72 
73 typedef enum AjOXrefType
74 {
75     XREF_UNKNOWN,
76     XREF_DR,
77     XREF_DBXREF,
78     XREF_EC,
79     XREF_DESC,
80     XREF_TAX,
81     XREF_RX,
82     XREF_MAX
83 } AjEXrefType;
84 
85 
86 
87 
88 /* @data AjPSeqDesc ***********************************************************
89 **
90 ** Ajax sequence description object.
91 **
92 ** Defines the gene fields needed to support various standard
93 ** sequence database entry formats, especially UniProt
94 **
95 ** @alias AjSSeqDesc
96 ** @alias AjOSeqDesc
97 **
98 ** @attr Name [AjPStr] Recommended name (only one per sequence)
99 ** @attr Short [AjPList] String list of short names
100 ** @attr EC [AjPList] String list of EC numbers
101 ** @attr Multi [AjPList] String list of multiple description lines
102 ** @attr AltNames [AjPList] List of alternate description objects
103 ** @attr SubNames [AjPList] List of submitted name objects
104 ** @attr Includes [AjPList] List of names for included functional domains
105 ** @attr Contains [AjPList] List of names for post-processing components
106 ** @attr Precursor [AjBool] True if this is a precursor
107 ** @attr Fragments [ajuint] 1 to mark as a fragment, more to mark as fragments
108 **
109 ** @@
110 ******************************************************************************/
111 
112 typedef struct AjSSeqDesc
113 {
114     AjPStr Name;
115     AjPList Short;
116     AjPList EC;
117     AjPList Multi;
118     AjPList AltNames;
119     AjPList SubNames;
120     AjPList Includes;
121     AjPList Contains;
122     AjBool Precursor;
123     ajuint Fragments;
124 } AjOSeqDesc;
125 
126 #define AjPSeqDesc AjOSeqDesc*
127 
128 
129 
130 
131 /* @data AjPSeqSubdesc ********************************************************
132 **
133 ** Ajax sequence sub-description object.
134 **
135 ** Defines the gene fields needed to support various standard
136 ** sequence database entry formats, especially UniProt
137 **
138 ** @alias AjSSeqSubdesc
139 ** @alias AjOSeqSubdesc
140 **
141 ** @attr Name [AjPStr] Recommended name (only one per sequence)
142 ** @attr Short [AjPList] String list of short names
143 ** @attr EC [AjPList] String list of EC numbers
144 ** @attr Allergen [AjPList] String list of Ig-E mediated atopic allergens
145 ** @attr Biotech [AjPList] String list of biotechnology context names
146 ** @attr Cdantigen [AjPList] String list of Cell Differentiation antigens
147 ** @attr Inn [AjPList] String list of International Non-proprietary Names
148 **
149 ** @@
150 ******************************************************************************/
151 
152 typedef struct AjSSeqSubdesc
153 {
154     AjPStr Name;
155     AjPList Short;
156     AjPList EC;
157     AjPList Allergen;
158     AjPList Biotech;
159     AjPList Cdantigen;
160     AjPList Inn;
161 } AjOSeqSubdesc;
162 
163 #define AjPSeqSubdesc AjOSeqSubdesc*
164 
165 
166 
167 
168 /* @data AjPSeqGene ***********************************************************
169 **
170 ** Ajax genes object.
171 **
172 ** Defines the gene fields needed to support various standard
173 ** sequence database entry formats, especially UniProt
174 **
175 ** @alias AjSSeqGene
176 ** @alias AjOSeqGene
177 **
178 ** @attr Name [AjPStr] Gene standard name
179 ** @attr Synonyms [AjPStr] Accepted synonyms
180 ** @attr Orf [AjPStr] Recognised open reading frame (ORF) names
181 **                         usually for sequencing projects in progress
182 ** @attr Oln [AjPStr] Ordered locus name(s) representing order on chromosome
183 ** @@
184 ******************************************************************************/
185 
186 typedef struct AjSSeqGene
187 {
188     AjPStr Name;
189     AjPStr Synonyms;
190     AjPStr Orf;
191     AjPStr Oln;
192 } AjOSeqGene;
193 
194 #define AjPSeqGene AjOSeqGene*
195 
196 
197 
198 
199 /* @data AjPSeqDate ***********************************************************
200 **
201 ** Ajax sequence dates object.
202 **
203 ** Defines the date fields needed to support various standard
204 ** sequence database entry formats
205 **
206 ** @alias AjSSeqDate
207 ** @alias AjOSeqDate
208 **
209 ** @attr CreDate [AjPTime] Creation date
210 ** @attr ModDate [AjPTime] Entry modification date
211 ** @attr SeqDate [AjPTime] Sequence modification date
212 ** @attr CreRel [AjPStr] Database release when first released
213 ** @attr ModRel [AjPStr] Database release when entry last changed
214 ** @attr SeqRel [AjPStr] Database release when sequence last changed
215 ** @attr CreVer [AjPStr] Entry version when last changed
216 ** @attr ModVer [AjPStr] Entry version when last changed
217 ** @attr SeqVer [AjPStr] Entry version when sequence last changed
218 ** @@
219 ******************************************************************************/
220 
221 typedef struct AjSSeqDate
222 {
223     AjPTime CreDate;
224     AjPTime ModDate;
225     AjPTime SeqDate;
226     AjPStr CreRel;
227     AjPStr ModRel;
228     AjPStr SeqRel;
229     AjPStr CreVer;
230     AjPStr ModVer;
231     AjPStr SeqVer;
232 } AjOSeqDate;
233 
234 #define AjPSeqDate AjOSeqDate*
235 
236 
237 
238 
239 /* @data AjPSeqRange **********************************************************
240 **
241 ** Ajax sequence range
242 **
243 ** Defines the queries that map to positions in a sequence (or physical map)
244 **
245 ** @alias AjSSeqRange
246 ** @alias AjOSeqRange
247 **
248 ** @attr Query [AjPStr] Query
249 ** @attr Start [ajulong] Start position
250 ** @attr Length [ajulong] Length
251 ** @@
252 ******************************************************************************/
253 
254 typedef struct AjSSeqRange
255 {
256     AjPStr Query;
257     ajulong Start;
258     ajulong Length;
259 } AjOSeqRange;
260 
261 #define AjPSeqRange AjOSeqRange*
262 
263 
264 
265 
266 /* @data AjPSeqRef ************************************************************
267 **
268 ** Ajax sequence citation object.
269 **
270 ** Defines the fields needed to support various standard
271 ** entry citation formats
272 **
273 ** @alias AjSSeqRef
274 ** @alias AjOSeqRef
275 **
276 ** @attr Position [AjPStr] Sequence positions
277 ** @attr Groupname [AjPStr] Working group or consortium
278 ** @attr Authors [AjPStr] Author list, comma-delimited
279 ** @attr Title [AjPStr] Title
280 ** @attr Comment [AjPStr] Comment
281 ** @attr Xref [AjPStr] Cross reference
282 ** @attr Location [AjPStr] Location (journal, book, submission)
283 ** @attr Loctype [AjPStr] Location type
284 ** @attr Number [ajuint] Reference number. This may be used in the feature
285 **                       table and references can disappear so the position
286 **                       in the list is not enough
287 ** @attr Padding [char[4]] Padding to alignment boundary
288 ** @@
289 ******************************************************************************/
290 
291 typedef struct AjSSeqRef
292 {
293     AjPStr Position;
294     AjPStr Groupname;
295     AjPStr Authors;
296     AjPStr Title;
297     AjPStr Comment;
298     AjPStr Xref;
299     AjPStr Location;
300     AjPStr Loctype;
301     ajuint Number;
302     char Padding[4];
303 } AjOSeqRef;
304 
305 #define AjPSeqRef AjOSeqRef*
306 
307 
308 
309 
310 /* @data AjPSeqXref ***********************************************************
311 **
312 ** Ajax sequence cross-reference object.
313 **
314 ** Defines the fields needed to support various standard
315 ** entry cross-reference formats
316 **
317 ** @alias AjSSeqXref
318 ** @alias AjOSeqXref
319 **
320 ** @attr Db [AjPStr] Database name
321 ** @attr Id [AjPStr] Primary identifier
322 ** @attr Secid [AjPStr] Secondary identifier
323 ** @attr Terid [AjPStr] Tertiary identifier
324 ** @attr Quatid [AjPStr] Quaternary identifier
325 ** @attr Type [ajuint] Type of cross-reference
326 ** @attr Start [ajuint] Start position
327 ** @attr End [ajuint] End position
328 ** @attr Padding [char[4]] Padding to alignment boundary
329 ** @@
330 ******************************************************************************/
331 
332 typedef struct AjSSeqXref
333 {
334     AjPStr Db;
335     AjPStr Id;
336     AjPStr Secid;
337     AjPStr Terid;
338     AjPStr Quatid;
339     ajuint Type;
340     ajuint Start;
341     ajuint End;
342     char Padding[4];
343 } AjOSeqXref;
344 
345 #define AjPSeqXref AjOSeqXref*
346 
347 
348 
349 
350 /* @data AjPSeq ***************************************************************
351 **
352 ** Ajax Sequence object.
353 **
354 ** Holds the sequence itself, plus associated information such as a
355 ** sequence name, accession number, format, type.
356 **
357 ** Also holds information on a selected sequence range and other
358 ** options.
359 **
360 ** Sequence features can also be stored, but for efficiency reasons
361 ** features are turned off by default.
362 **
363 ** @alias AjOSeq
364 ** @alias AjSSeq
365 **
366 ** @attr Name      [AjPStr] Name (ID)
367 ** @attr Acc       [AjPStr] Accession number (primary only)
368 ** @attr Sv        [AjPStr] SeqVersion number
369 ** @attr Gi        [AjPStr] GI NCBI version number
370 ** @attr Tax       [AjPStr] Main taxonomy (species)
371 ** @attr Taxcommon [AjPStr] Main taxonomy (species) common name
372 ** @attr Taxid     [AjPStr] Main taxonomy (species) id in NCBI taxonomy
373 ** @attr Organelle [AjPStr] Organelle taxonomy
374 ** @attr Type      [AjPStr] Type N or P
375 ** @attr Molecule  [AjPStr] Molecule type
376 ** @attr Class     [AjPStr] Class of entry
377 ** @attr Division  [AjPStr] Database division
378 ** @attr Evidence  [AjPStr] Experimental evidence (e.g. from UniProt)
379 ** @attr Db        [AjPStr] Database name from input
380 ** @attr Setdb     [AjPStr] Database name from command line
381 ** @attr Full      [AjPStr] Full name
382 ** @attr Date      [AjPSeqDate] Creation, modification and sequence mod dates
383 ** @attr Desc      [AjPStr] One-line description
384 ** @attr Fulldesc  [AjPSeqDesc] Detailed description
385 ** @attr Doc       [AjPStr] Obsolete - see TextPtr
386 ** @attr Usa       [AjPStr] USA for re-reading
387 ** @attr Ufo       [AjPStr] UFO for re-reading
388 ** @attr Formatstr [AjPStr] Input format name
389 ** @attr Filename  [AjPStr] Original filename
390 ** @attr Entryname [AjPStr] Entryname (ID)
391 ** @attr TextPtr   [AjPStr] Full text
392 ** @attr Acclist   [AjPList] Secondary accessions
393 ** @attr Keylist   [AjPList] Keyword list
394 ** @attr Taxlist   [AjPList] Taxonomy list (organelle, species, taxa)
395 ** @attr Genelist  [AjPList] Gene names list
396 ** @attr Reflist   [AjPList] Reference citation list
397 ** @attr Cmtlist   [AjPList] Comment block list
398 ** @attr Xreflist  [AjPList] Cross reference list
399 ** @attr Hostlist  [AjPList] Viral host species list
400 ** @attr Seq       [AjPStr] The sequence
401 ** @attr Fttable   [AjPFeattable] Feature table
402 ** @attr Accuracy  [float*] Accuracy values (one per base) from base calling
403 ** @attr Fpos      [ajlong] File position (fseek) for USA
404 ** @attr Rev       [AjBool] true: to be reverse-complemented
405 ** @attr Reversed  [AjBool] true: has been reverse-complemented
406 ** @attr Trimmed   [AjBool] true: has been trimmed
407 ** @attr Circular  [AjBool] true: circular nucleotide molecule
408 ** @attr Begin     [ajint] start position (processed on reading)
409 ** @attr End       [ajint] end position (processed on reading)
410 ** @attr Offset    [ajuint] offset from start
411 ** @attr Offend    [ajuint] offset from end
412 ** @attr Qualsize  [ajuint] Size of Accuracy array
413 ** @attr Weight    [float] Weight from multiple alignment
414 ** @attr Format    [AjEnum] Input format enum
415 ** @attr EType     [AjEnum] unused, obsolete
416 **
417 ** @@
418 ******************************************************************************/
419 
420 typedef struct AjSSeq
421 {
422     AjPStr Name;
423     AjPStr Acc;
424     AjPStr Sv;
425     AjPStr Gi;
426     AjPStr Tax;
427     AjPStr Taxcommon;
428     AjPStr Taxid;
429     AjPStr Organelle;
430     AjPStr Type;
431     AjPStr Molecule;
432     AjPStr Class;
433     AjPStr Division;
434     AjPStr Evidence;
435     AjPStr Db;
436     AjPStr Setdb;
437     AjPStr Full;
438     AjPSeqDate Date;
439     AjPStr Desc;
440     AjPSeqDesc Fulldesc;
441     AjPStr Doc;
442     AjPStr Usa;
443     AjPStr Ufo;
444     AjPStr Formatstr;
445     AjPStr Filename;
446     AjPStr Entryname;
447     AjPStr TextPtr;
448     AjPList Acclist;
449     AjPList Keylist;
450     AjPList Taxlist;
451     AjPList Genelist;
452     AjPList Reflist;
453     AjPList Cmtlist;
454     AjPList Xreflist;
455     AjPList Hostlist;
456     AjPStr Seq;
457     AjPFeattable Fttable;
458     float* Accuracy;
459     ajlong Fpos;
460     AjBool Rev;
461     AjBool Reversed;
462     AjBool Trimmed;
463     AjBool Circular;
464     ajint Begin;
465     ajint End;
466     ajuint Offset;
467     ajuint Offend;
468     ajuint Qualsize;
469     float Weight;
470     AjEnum Format;
471     AjEnum EType;
472 } AjOSeq;
473 
474 #define AjPSeq AjOSeq*
475 
476 
477 
478 
479 /* @data AjPSeqset ************************************************************
480 **
481 ** Ajax Sequence set object. A sequence set contains one or more
482 ** sequences together in memory, for example as a sequence alignment.
483 **
484 ** Holds the sequence set itself, plus associated information such as a
485 ** sequence names, accession number, format, type.
486 **
487 ** Also holds information on a selected sequence range and other
488 ** options.
489 **
490 ** Sequence set features can also be stored, but for efficiency reasons
491 ** features are turned off by default.
492 **
493 ** @alias AjSSeqset
494 ** @alias AjOSeqset
495 ** @other AjPSeq Sequences
496 ** @other AjPSeqall Sequence streams
497 **
498 ** @attr Size [ajuint] Number of sequences
499 ** @attr Len [ajuint] Maximum sequence length
500 ** @attr Begin [ajint] start position
501 ** @attr End [ajint] end position
502 ** @attr Offset [ajuint] offset from start
503 ** @attr Offend [ajuint] offset from end
504 ** @attr Rev [AjBool] true: reverse-complemented
505 ** @attr Trimmed [AjBool] true: has been trimmed
506 ** @attr Type [AjPStr] Type N or P
507 ** @attr Totweight [float] total weight (usually 1.0 * Size)
508 ** @attr EType [AjEnum] enum type obsolete
509 ** @attr Formatstr [AjPStr] Input format name
510 ** @attr Filename [AjPStr] Original filename
511 ** @attr Full [AjPStr] Full name
512 ** @attr Name [AjPStr] Name
513 ** @attr Usa [AjPStr] USA for re-reading
514 ** @attr Ufo [AjPStr] UFO for re-reading
515 ** @attr Seq [AjPSeq*] Sequence array (see Size)
516 ** @attr Seqweight [float*] Sequence weights (see also AjPSeq)
517 ** @attr Format [AjEnum] Input format enum
518 ** @attr Padding [char[4]] Padding to alignment boundary
519 **
520 ** @new ajSeqsetNew Default constructor
521 ** @delete ajSeqsetDel Default destructor
522 ** @input ajSeqsetRead Master input routine for a sequence set
523 ** @modify ajSeqsetToLower Converts a sequence set to lower case
524 ** @modify ajSeqsetToUpper Converts a sequence set to upper case
525 ** @cast ajSeqsetLen Returns the maximum length of a sequence set
526 ** @cast ajSeqsetSize Returns the number of sequences in a sequence set
527 ** @cast ajSeqsetAcc Returns the accession number of a sequence in a set
528 ** @cast ajSeqsetName Returns the name of a sequence in a set
529 ** @cast ajSeqsetSeq Returns the char* pointer to a sequence in a set
530 ** @cast ajSeqsetIsNuc Tests whether the sequence set is nucleotide
531 ** @cast ajSeqsetIsProt Tests whether the sequence set is protein
532 ** @cast ajSeqsetGetFilename Returns the filename of a sequence set
533 ** @output ajSeqsetWrite Writes out all sequences in a set
534 ** @@
535 ******************************************************************************/
536 
537 typedef struct AjSSeqset
538 {
539     ajuint Size;
540     ajuint Len;
541     ajint Begin;
542     ajint End;
543     ajuint Offset;
544     ajuint Offend;
545     AjBool Rev;
546     AjBool Trimmed;
547     AjPStr Type;
548     float Totweight;
549     AjEnum EType;
550     AjPStr Formatstr;
551     AjPStr Filename;
552     AjPStr Full;
553     AjPStr Name;
554     AjPStr Usa;
555     AjPStr Ufo;
556     AjPSeq* Seq;
557     float* Seqweight;
558     AjEnum Format;
559     char Padding[4];
560 } AjOSeqset;
561 
562 #define AjPSeqset AjOSeqset*
563 
564 
565 
566 
567 /* @data AjPSeqin *************************************************************
568 **
569 ** Ajax Sequence Input object.
570 **
571 ** Holds the sequence specification and information needed to read
572 ** the sequence and possible further sequences.
573 **
574 ** Also holds information on a selected sequence range and other
575 ** options.
576 **
577 ** @alias AjSSeqin
578 ** @alias AjOSeqin
579 **
580 ** @other AjPSeq Sequences
581 ** @other AjPSeqset Sequence sets
582 ** @other AjPSeqall Sequence streams
583 **
584 ** @attr Input     [AjPTextin] Text file input object
585 ** @attr Name      [AjPStr]    Sequence name (replace on reading)
586 ** @attr Acc       [AjPStr]    Sequence accession number (replace on reading)
587 ** @attr Inputtype [AjPStr]    Sequence type from ACD
588 ** @attr Type      [AjPStr]    Sequence type N or P
589 ** @attr Full      [AjPStr]    Full name
590 ** @attr Date      [AjPStr]    Date
591 ** @attr Desc      [AjPStr]    One-line description
592 ** @attr Doc       [AjPStr]    Full text
593 ** @attr Inseq     [AjPStr]    Temporary input sequence holder
594 ** @attr DbSequence   [AjPStr] Field name of sequence string
595 ** @attr Usalist   [AjPList]   List of USA processing nodes
596 ** @attr Begin     [ajint]     Start position
597 ** @attr End       [ajint]     End position
598 ** @attr Ufo       [AjPStr]    UFO for features (if any)
599 ** @attr Fttable   [AjPFeattable] Input feature table (why in AjPSeqin?)
600 ** @attr Ftquery   [AjPFeattabin] Feature table input spec
601 ** @attr Entryname [AjPStr]    Entry name
602 ** @attr Minimal   [AjBool]    true: read minimal information only
603 ** @attr Features  [AjBool]    true: read features if any
604 ** @attr IsNuc     [AjBool]    true: known to be nucleic
605 ** @attr IsProt    [AjBool]    true: known to be protein
606 ** @attr Multiset  [AjBool]    true: seqsetall input
607 ** @attr Multidone [AjBool]    seqsetall input: true when set completed
608 ** @attr Lower     [AjBool]    true: convert to lower case -slower
609 ** @attr Upper     [AjBool]    true: convert to upper case -supper
610 ** @attr Rev       [AjBool]    Reverse/complement if true
611 ** @attr Circular  [AjBool]    Set sequences to be circular
612 ** @attr SeqData   [void*]     Format data for reuse,
613 **                               e.g. multiple sequence input
614 ** @@
615 ******************************************************************************/
616 
617 typedef struct AjSSeqin
618 {
619     AjPTextin Input;
620     AjPStr Name;
621     AjPStr Acc;
622     AjPStr Inputtype;
623     AjPStr Type;
624     AjPStr Full;
625     AjPStr Date;
626     AjPStr Desc;
627     AjPStr Doc;
628     AjPStr Inseq;
629     AjPStr DbSequence;
630     AjPList Usalist;
631     ajint Begin;
632     ajint End;
633     AjPStr Ufo;
634     AjPFeattable Fttable;
635     AjPFeattabin Ftquery;
636     AjPStr Entryname;
637     AjBool Minimal;
638     AjBool Features;
639     AjBool IsNuc;
640     AjBool IsProt;
641     AjBool Multiset;
642     AjBool Multidone;
643     AjBool Lower;
644     AjBool Upper;
645     AjBool Rev;
646     AjBool Circular;
647     void *SeqData;
648 } AjOSeqin;
649 
650 #define AjPSeqin AjOSeqin*
651 
652 
653 
654 
655 /* @data AjPSeqall ************************************************************
656 **
657 ** Ajax Sequence all (stream) object.
658 **
659 ** Inherits an AjPSeq but allows more sequences to be read from the
660 ** same input by also inheriting the AjPSeqin input object.
661 **
662 ** @alias AjSSeqall
663 ** @alias AjOSeqall
664 **
665 ** @new ajSeqallNew Default constructor
666 ** @delete ajSeqallDel Default destructor
667 ** @modify ajSeqallNext Master sequence stream input, reads next sequence
668 **                   from an open input stream.
669 ** @cast ajSeqallGetFilename Returns the filename of a seqall object.
670 ** @other AjPSeq Sequences
671 ** @other AjPSeqin Sequence input
672 ** @other AjPSeqset Sequence sets
673 **
674 ** @attr Seq [AjPSeq] Current sequence
675 ** @attr Seqin [AjPSeqin] Sequence input for reading next
676 ** @attr Totseqs [ajlong] Count of sequences so far
677 ** @attr Totlength [ajlong] Count of sequence lengths so far
678 ** @attr Count [ajint] Count of sequences so far
679 ** @attr Begin [ajint] start position
680 ** @attr End [ajint] end position
681 ** @attr Rev [AjBool] if true: reverse-complement
682 ** @attr Returned [AjBool] if true: Seq object has been returned to a new owner
683 **                         and is not to be deleted by the destructor
684 ** @attr Padding [char[4]] Padding to alignment boundary
685 ** @@
686 ******************************************************************************/
687 
688 typedef struct AjSSeqall
689 {
690     AjPSeq Seq;
691     AjPSeqin Seqin;
692     ajlong Totseqs;
693     ajlong Totlength;
694     ajint Count;
695     ajint Begin;
696     ajint End;
697     AjBool Rev;
698     AjBool Returned;
699     char Padding[4];
700 } AjOSeqall;
701 
702 #define AjPSeqall AjOSeqall*
703 
704 
705 
706 
707 /* @data AjPSeqAccess *********************************************************
708 **
709 ** Ajax sequence access database reading object.
710 **
711 ** Holds information needed to read a sequence from a database.
712 ** Access methods are defined for each known database type.
713 **
714 ** Sequences are read from the database using the defined
715 ** database access function, which is usually a static function
716 ** within ajtextdb.c ajseqdb.c
717 **
718 ** This should be a static data object but is needed for the definition
719 ** of AjPSeqin.
720 **
721 ** @alias AjSSeqAccess
722 ** @alias AjOSeqAccess
723 **
724 ** @attr Name [const char*] Access method name used in emboss.default
725 ** @attr Access [AjBool function] Access function
726 ** @attr AccessFree [AjBool function] Access cleanup function
727 ** @attr Qlink [const char*] Supported query link operators
728 ** @attr Desc [const char*] Description
729 ** @attr Alias [AjBool] Alias for another name
730 ** @attr Entry [AjBool] Supports retrieval of single entries
731 ** @attr Query [AjBool] Supports retrieval of selected entries
732 ** @attr All [AjBool] Supports retrieval of all entries
733 ** @attr Chunked [AjBool] Supports retrieval of entries in chunks
734 ** @attr Padding [AjBool] Padding to alignment boundary
735 ** @@
736 ******************************************************************************/
737 
738 typedef struct AjSSeqAccess
739 {
740     const char *Name;
741     AjBool (*Access)(AjPSeqin seqin);
742     AjBool (*AccessFree)(void* qry);
743     const char* Qlink;
744     const char* Desc;
745     AjBool Alias;
746     AjBool Entry;
747     AjBool Query;
748     AjBool All;
749     AjBool Chunked;
750     AjBool Padding;
751 } AjOSeqAccess;
752 
753 #define AjPSeqAccess AjOSeqAccess*
754 
755 
756 
757 
758 /* @data AjPSeqout ************************************************************
759 **
760 ** Ajax Sequence Output object.
761 **
762 ** Holds definition of sequence output.
763 **
764 ** @alias AjSSeqout
765 ** @alias AjOSeqout
766 **
767 ** @attr Name [AjPStr] Name (ID)
768 ** @attr Acc [AjPStr] Accession number (primary only)
769 ** @attr Sv [AjPStr] SeqVersion number
770 ** @attr Gi [AjPStr] GI NCBI version number
771 ** @attr Desc [AjPStr] One-line description
772 ** @attr Tax [AjPStr] Main taxonomy (species)
773 ** @attr Taxcommon [AjPStr] Main taxonomy (species) common name
774 ** @attr Taxid [AjPStr] Main taxonomy (species) id in NCBI taxonomy
775 ** @attr Organelle [AjPStr] Organelle taxonomy
776 ** @attr Type [AjPStr] Type N or P
777 ** @attr Outputtype [AjPStr] Output sequence known type
778 ** @attr Molecule [AjPStr] Molecule type
779 ** @attr Class [AjPStr] Class of entry
780 ** @attr Division [AjPStr] Database division
781 ** @attr Evidence [AjPStr] Experimental evidence (e.g. from UniProt)
782 ** @attr Db [AjPStr] Database name from input name
783 ** @attr Setdb [AjPStr] Database name from input command line
784 ** @attr Setoutdb [AjPStr] Database name from command line
785 ** @attr Full [AjPStr] Full name
786 ** @attr Date [AjPSeqDate] Dates
787 ** @attr Fulldesc [AjPSeqDesc] Dates
788 ** @attr Doc [AjPStr] Obsolete - see TextPtr
789 ** @attr Usa [AjPStr] USA for re-reading
790 ** @attr Ufo [AjPStr] UFO for re-reading
791 ** @attr Fttable [AjPFeattable] Feature table
792 ** @attr Ftquery [AjPFeattabOut] Feature table output
793 ** @attr FtFormat [AjPStr] Feature output format (if not in UFO)
794 ** @attr FtFilename [AjPStr] Feature output filename (if not in UFO)
795 ** @attr Informatstr [AjPStr] Input format
796 ** @attr Formatstr [AjPStr] Output format
797 ** @attr EType [AjEnum] unused, obsolete
798 ** @attr Format [AjEnum] Output format index
799 ** @attr Filename [AjPStr] Output filename (if not in USA)
800 ** @attr Directory [AjPStr] Output directory
801 ** @attr Entryname [AjPStr] Entry name
802 ** @attr Acclist [AjPList] Secondary accessions
803 ** @attr Keylist [AjPList] Keyword list
804 ** @attr Taxlist [AjPList] Taxonomy list
805 ** @attr Genelist [AjPList] Gene list
806 ** @attr Reflist [AjPList] References (citations)
807 ** @attr Cmtlist [AjPList] Comment block list
808 ** @attr Xreflist [AjPList] Database cross reference list
809 ** @attr Seq [AjPStr] The sequence
810 ** @attr File [AjPFile] Output file
811 ** @attr Knownfile [AjPFile] Already open output file (we don't close this one)
812 ** @attr Extension [AjPStr] File extension
813 ** @attr Savelist [AjPList] Previous sequences saved for later output
814 **                          (e.g. MSF format)
815 ** @attr Accuracy [float*] Accuracy values (one per base) from base calling
816 ** @attr Data [void*] Format data for reuse, e.g. multiple sequence output
817 ** @attr Cleanup [void function] Function to write remaining lines on closing
818 ** @attr Rev [AjBool] true: to be reverse-complemented
819 ** @attr Circular [AjBool] true: circular nucleotide molecule
820 ** @attr Single [AjBool] If true, single sequence in each file (-ossingle)
821 ** @attr Features [AjBool] If true, save features with sequence or in file
822 ** @attr Qualsize [ajuint] Size of Accuracy array
823 ** @attr Count [ajint] Number of sequences
824 ** @attr Offset [ajint] offset from start
825 **
826 ** @new ajSeqoutNew Default constructor
827 ** @delete ajSeqoutDel Default destructor
828 ** @modify ajSeqoutUsa Resets using a new USA
829 ** @modify ajSeqoutClear Resets ready for reuse.
830 ** @modify ajSeqoutOpen If the file is not yet open, calls seqoutUsaProcess
831 ** @cast ajSeqoutCheckGcg Calculates the GCG checksum for a sequence set.
832 ** @modify ajSeqWrite Master sequence output routine
833 ** @modify ajSeqsetWrite Master sequence set output routine
834 ** @modify ajSeqFileNewOut Opens an output file for sequence writing.
835 ** @other AjPSeq Sequences
836 ** @attr Padding [char[4]] Padding to alignment boundary
837 ** @@
838 ******************************************************************************/
839 
840 typedef struct AjSSeqout
841 {
842     AjPStr Name;
843     AjPStr Acc;
844     AjPStr Sv;
845     AjPStr Gi;
846     AjPStr Desc;
847     AjPStr Tax;
848     AjPStr Taxcommon;
849     AjPStr Taxid;
850     AjPStr Organelle;
851     AjPStr Type;
852     AjPStr Outputtype;
853     AjPStr Molecule;
854     AjPStr Class;
855     AjPStr Division;
856     AjPStr Evidence;
857     AjPStr Db;
858     AjPStr Setdb;
859     AjPStr Setoutdb;
860     AjPStr Full;
861     AjPSeqDate Date;
862     AjPSeqDesc Fulldesc;
863     AjPStr Doc;
864     AjPStr Usa;
865     AjPStr Ufo;
866     AjPFeattable Fttable;
867     AjPFeattabOut Ftquery;
868     AjPStr FtFormat;
869     AjPStr FtFilename;
870     AjPStr Informatstr;
871     AjPStr Formatstr;
872     AjEnum EType;
873     AjEnum Format;
874     AjPStr Filename;
875     AjPStr Directory;
876     AjPStr Entryname;
877     AjPList Acclist;
878     AjPList Keylist;
879     AjPList Taxlist;
880     AjPList Genelist;
881     AjPList Reflist;
882     AjPList Cmtlist;
883     AjPList Xreflist;
884     AjPStr Seq;
885     AjPFile File;
886     AjPFile Knownfile;
887     AjPStr Extension;
888     AjPList Savelist;
889     float* Accuracy;
890     void *Data;
891     void (*Cleanup)(struct AjSSeqout * outseq);
892     AjBool Rev;
893     AjBool Circular;
894     AjBool Single;
895     AjBool Features;
896     ajuint Qualsize;
897     ajint Count;
898     ajint Offset;
899     char Padding[4];
900 } AjOSeqout;
901 
902 #define AjPSeqout AjOSeqout*
903 
904 
905 
906 
907 /* ========================================================================= */
908 /* =========================== public functions ============================ */
909 /* ========================================================================= */
910 
911 
912 
913 
914 /*
915 ** Prototype definitions
916 */
917 
918 /*
919 ** End of prototype definitions
920 */
921 
922 
923 
924 
925 AJ_END_DECLS
926 
927 #endif /* !AJSEQDATA_H */
928