1--$Revision: 6.0 $
2--**********************************************************************
3--
4--  asn.all
5--  this file contains all NCBI ASN.1 specifications together
6--
7--  by James Ostell, 1990
8--
9--**********************************************************************
10
11--$Revision: 6.6 $
12--**********************************************************************
13--
14--  NCBI General Data elements
15--  by James Ostell, 1990
16--  Version 3.0 - June 1994
17--
18--**********************************************************************
19
20NCBI-General DEFINITIONS ::=
21BEGIN
22
23EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;
24
25-- StringStore is really a VisibleString.  It is used to define very
26--   long strings which may need to be stored by the receiving program
27--   in special structures, such as a ByteStore, but it's just a hint.
28--   AsnTool stores StringStores in ByteStore structures.
29-- OCTET STRINGs are also stored in ByteStores by AsnTool
30--
31-- typedef struct bsunit {             /* for building multiline strings */
32   -- Nlm_Handle str;            /* the string piece */
33   -- Nlm_Int2 len_avail,
34       -- len;
35   -- struct bsunit PNTR next; }       /* the next one */
36-- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
37--
38-- typedef struct bytestore {
39   -- Nlm_Int4 seekptr,       /* current position */
40      -- totlen,             /* total stored data length in bytes */
41      -- chain_offset;       /* offset in ByteStore of first byte in curchain */
42   -- Nlm_BSUnitPtr chain,       /* chain of elements */
43      -- curchain;           /* the BSUnit containing seekptr */
44-- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
45--
46-- AsnTool incorporates this as a primitive type, so the definition
47--   is here just for completeness
48--
49--  StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
50--
51
52-- BigInt is really an INTEGER. It is used to warn the receiving code to expect
53--   a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
54--
55--   Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
56--   BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
57--
58
59-- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
60--  of ASN.1
61--  It stores only a date
62--
63
64Date ::= CHOICE {
65    str VisibleString ,        -- for those unparsed dates
66    std Date-std }             -- use this if you can
67
68Date-std ::= SEQUENCE {        -- NOTE: this is NOT a unix tm struct
69    year INTEGER ,             -- full year (including 1900)
70    month INTEGER OPTIONAL ,   -- month (1-12)
71    day INTEGER OPTIONAL ,     -- day of month (1-31)
72    season VisibleString OPTIONAL ,  -- for "spring", "may-june", etc
73    hour INTEGER OPTIONAL ,    -- hour of day (0-23)
74    minute INTEGER OPTIONAL ,  -- minute of hour (0-59)
75    second INTEGER OPTIONAL }  -- second of minute (0-59)
76
77-- Dbtag is generalized for tagging
78-- eg. { "Social Security", str "023-79-8841" }
79-- or  { "member", id 8882224 }
80
81Dbtag ::= SEQUENCE {
82    db VisibleString ,          -- name of database or system
83    tag Object-id }         -- appropriate tag
84
85-- Object-id can tag or name anything
86--
87
88Object-id ::= CHOICE {
89    id INTEGER ,
90    str VisibleString }
91
92-- Person-id is to define a std element for people
93--
94
95Person-id ::= CHOICE {
96    dbtag Dbtag ,               -- any defined database tag
97    name Name-std ,             -- structured name
98    ml VisibleString ,          -- MEDLINE name (semi-structured)
99                                --    eg. "Jones RM"
100    str VisibleString,          -- unstructured name
101    consortium VisibleString }  -- consortium name
102
103Name-std ::= SEQUENCE { -- Structured names
104    last VisibleString ,
105    first VisibleString OPTIONAL ,
106    middle VisibleString OPTIONAL ,
107    full VisibleString OPTIONAL ,    -- full name eg. "J. John Smith, Esq"
108    initials VisibleString OPTIONAL,  -- first + middle initials
109    suffix VisibleString OPTIONAL ,   -- Jr, Sr, III
110    title VisibleString OPTIONAL }    -- Dr., Sister, etc
111
112--**** Int-fuzz **********************************************
113--*
114--*   uncertainties in integer values
115
116Int-fuzz ::= CHOICE {
117    p-m INTEGER ,                    -- plus or minus fixed amount
118    range SEQUENCE {                 -- max to min
119        max INTEGER ,
120        min INTEGER } ,
121    pct INTEGER ,                    -- % plus or minus (x10) 0-1000
122    lim ENUMERATED {                 -- some limit value
123        unk (0) ,                    -- unknown
124        gt (1) ,                     -- greater than
125        lt (2) ,                     -- less than
126        tr (3) ,                     -- space to right of position
127        tl (4) ,                     -- space to left of position
128        circle (5) ,                 -- artificial break at origin of circle
129        other (255) } ,              -- something else
130    alt SET OF INTEGER }             -- set of alternatives for the integer
131
132
133--**** User-object **********************************************
134--*
135--*   a general object for a user defined structured data item
136--*    used by Seq-feat and Seq-descr
137
138User-object ::= SEQUENCE {
139    class VisibleString OPTIONAL ,   -- endeavor which designed this object
140    type Object-id ,                 -- type of object within class
141    data SEQUENCE OF User-field }    -- the object itself
142
143User-field ::= SEQUENCE {
144    label Object-id ,                -- field label
145    num INTEGER OPTIONAL ,           -- required for strs, ints, reals, oss
146    data CHOICE {                    -- field contents
147        str UTF8String ,
148        int INTEGER ,
149        real REAL ,
150        bool BOOLEAN ,
151        os OCTET STRING ,
152        object User-object ,         -- for using other definitions
153        strs SEQUENCE OF UTF8String ,
154        ints SEQUENCE OF INTEGER ,
155        reals SEQUENCE OF REAL ,
156        oss SEQUENCE OF OCTET STRING ,
157        fields SEQUENCE OF User-field ,
158        objects SEQUENCE OF User-object } }
159
160
161
162END
163
164--$Revision: 6.3 $
165--****************************************************************
166--
167--  NCBI Bibliographic data elements
168--  by James Ostell, 1990
169--
170--  Taken from the American National Standard for
171--      Bibliographic References
172--      ANSI Z39.29-1977
173--  Version 3.0 - June 1994
174--  PubMedId added in 1996
175--  ArticleIds and eprint elements added in 1999
176--
177--****************************************************************
178
179NCBI-Biblio DEFINITIONS ::=
180BEGIN
181
182EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
183        Cit-proc, Cit-sub, Title, Author, PubMedId, DOI;
184
185IMPORTS Person-id, Date, Dbtag FROM NCBI-General;
186
187    -- Article Ids
188
189ArticleId ::= CHOICE {         -- can be many ids for an article
190	pubmed PubMedId ,      -- see types below
191	medline MedlineUID ,
192	doi DOI ,
193	pii PII ,
194	pmcid PmcID ,
195	pmcpid PmcPid ,
196        pmpid PmPid ,
197        other Dbtag  }    -- generic catch all
198
199PubMedId ::= INTEGER           -- Id from the PubMed database at NCBI
200MedlineUID ::= INTEGER         -- Id from MEDLINE
201DOI ::= VisibleString          -- Document Object Identifier
202PII ::= VisibleString          -- Controlled Publisher Identifier
203PmcID ::= INTEGER              -- PubMed Central Id
204PmcPid ::= VisibleString       -- Publisher Id supplied to PubMed Central
205PmPid ::= VisibleString        -- Publisher Id supplied to PubMed
206
207ArticleIdSet ::= SET OF ArticleId
208
209    -- Status Dates
210
211PubStatus ::= INTEGER {            -- points of publication
212    received  (1) ,            -- date manuscript received for review
213    accepted  (2) ,            -- accepted for publication
214    epublish  (3) ,            -- published electronically by publisher
215    ppublish  (4) ,            -- published in print by publisher
216    revised   (5) ,            -- article revised by publisher/author
217    pmc       (6) ,            -- article first appeared in PubMed Central
218    pmcr      (7) ,            -- article revision in PubMed Central
219    pubmed    (8) ,            -- article citation first appeared in PubMed
220    pubmedr   (9) ,            -- article citation revision in PubMed
221    aheadofprint (10),         -- epublish, but will be followed by print
222    premedline (11),           -- date into PreMedline status
223    medline    (12),           -- date made a MEDLINE record
224    other    (255) }
225
226PubStatusDate ::= SEQUENCE {   -- done as a structure so fields can be added
227    pubstatus PubStatus ,
228    date Date }                -- time may be added later
229
230PubStatusDateSet ::= SET OF PubStatusDate
231
232    -- Citation Types
233
234Cit-art ::= SEQUENCE {                  -- article in journal or book
235    title Title OPTIONAL ,              -- title of paper (ANSI requires)
236    authors Auth-list OPTIONAL ,        -- authors (ANSI requires)
237    from CHOICE {                       -- journal or book
238        journal Cit-jour ,
239        book Cit-book ,
240        proc Cit-proc } ,
241    ids ArticleIdSet OPTIONAL }         -- lots of ids
242
243Cit-jour ::= SEQUENCE {             -- Journal citation
244    title Title ,                   -- title of journal
245    imp Imprint }
246
247Cit-book ::= SEQUENCE {              -- Book citation
248    title Title ,                    -- Title of book
249    coll Title OPTIONAL ,            -- part of a collection
250    authors Auth-list,               -- authors
251    imp Imprint }
252
253Cit-proc ::= SEQUENCE {             -- Meeting proceedings
254    book Cit-book ,                 -- citation to meeting
255    meet Meeting }                  -- time and location of meeting
256
257    -- Patent number and date-issue were made optional in 1997 to
258    --   support patent applications being issued from the USPTO
259    --   Semantically a Cit-pat must have either a patent number or
260    --   an application number (or both) to be valid
261
262Cit-pat ::= SEQUENCE {                  -- patent citation
263    title VisibleString ,
264    authors Auth-list,                  -- author/inventor
265    country VisibleString ,             -- Patent Document Country
266    doc-type VisibleString ,            -- Patent Document Type
267    number VisibleString OPTIONAL,      -- Patent Document Number
268    date-issue Date OPTIONAL,           -- Patent Issue/Pub Date
269    class SEQUENCE OF VisibleString OPTIONAL ,      -- Patent Doc Class Code
270    app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
271    app-date Date OPTIONAL ,            -- Patent Appl File Date
272    applicants Auth-list OPTIONAL ,     -- Applicants
273    assignees Auth-list OPTIONAL ,      -- Assignees
274    priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
275    abstract VisibleString OPTIONAL }   -- abstract of patent
276
277Patent-priority ::= SEQUENCE {
278    country VisibleString ,             -- Patent country code
279    number VisibleString ,              -- number assigned in that country
280    date Date }                         -- date of application
281
282Id-pat ::= SEQUENCE {                   -- just to identify a patent
283    country VisibleString ,             -- Patent Document Country
284    id CHOICE {
285        number VisibleString ,          -- Patent Document Number
286        app-number VisibleString } ,    -- Patent Doc Appl Number
287    doc-type VisibleString OPTIONAL }   -- Patent Doc Type
288
289Cit-let ::= SEQUENCE {                  -- letter, thesis, or manuscript
290    cit Cit-book ,                      -- same fields as a book
291    man-id VisibleString OPTIONAL ,     -- Manuscript identifier
292    type ENUMERATED {
293        manuscript (1) ,
294        letter (2) ,
295        thesis (3) } OPTIONAL }
296                                -- NOTE: this is just to cite a
297                                -- direct data submission, see NCBI-Submit
298                                -- for the form of a sequence submission
299Cit-sub ::= SEQUENCE {               -- citation for a direct submission
300    authors Auth-list ,              -- not necessarily authors of the paper
301    imp Imprint OPTIONAL ,			 -- this only used to get date.. will go
302    medium ENUMERATED {              -- medium of submission
303        paper   (1) ,
304        tape    (2) ,
305        floppy  (3) ,
306        email   (4) ,
307        other   (255) } OPTIONAL ,
308    date Date OPTIONAL ,              -- replaces imp, will become required
309    descr VisibleString OPTIONAL }    -- description of changes for public view
310
311Cit-gen ::= SEQUENCE {      -- NOT from ANSI, this is a catchall
312    cit VisibleString OPTIONAL ,     -- anything, not parsable
313    authors Auth-list OPTIONAL ,
314    muid INTEGER OPTIONAL ,      -- medline uid
315    journal Title OPTIONAL ,
316    volume VisibleString OPTIONAL ,
317    issue VisibleString OPTIONAL ,
318    pages VisibleString OPTIONAL ,
319    date Date OPTIONAL ,
320    serial-number INTEGER OPTIONAL ,   -- for GenBank style references
321    title VisibleString OPTIONAL ,     -- eg. cit="unpublished",title="title"
322	pmid PubMedId OPTIONAL }           -- PubMed Id
323
324
325    -- Authorship Group
326Auth-list ::= SEQUENCE {
327        names CHOICE {
328            std SEQUENCE OF Author ,        -- full citations
329            ml SEQUENCE OF VisibleString ,  -- MEDLINE, semi-structured
330            str SEQUENCE OF VisibleString } , -- free for all
331        affil Affil OPTIONAL }        -- author affiliation
332
333Author ::= SEQUENCE {
334    name Person-id ,                        -- Author, Primary or Secondary
335    level ENUMERATED {
336        primary (1),
337        secondary (2) } OPTIONAL ,
338    role ENUMERATED {                   -- Author Role Indicator
339        compiler (1),
340        editor (2),
341        patent-assignee (3),
342        translator (4) } OPTIONAL ,
343    affil Affil OPTIONAL ,
344    is-corr BOOLEAN OPTIONAL }          -- TRUE if corresponding author
345
346Affil ::= CHOICE {
347    str VisibleString ,                 -- unparsed string
348    std SEQUENCE {                      -- std representation
349    affil VisibleString OPTIONAL ,      -- Author Affiliation, Name
350    div VisibleString OPTIONAL ,        -- Author Affiliation, Division
351    city VisibleString OPTIONAL ,       -- Author Affiliation, City
352    sub VisibleString OPTIONAL ,        -- Author Affiliation, County Sub
353    country VisibleString OPTIONAL ,    -- Author Affiliation, Country
354    street VisibleString OPTIONAL ,    -- street address, not ANSI
355    email VisibleString OPTIONAL ,
356    fax VisibleString OPTIONAL ,
357    phone VisibleString OPTIONAL ,
358    postal-code VisibleString OPTIONAL }}
359
360    -- Title Group
361    -- Valid for = A = Analytic (Cit-art)
362    --             J = Journals (Cit-jour)
363    --             B = Book (Cit-book)
364                                                 -- Valid for:
365Title ::= SET OF CHOICE {
366    name VisibleString ,    -- Title, Anal,Coll,Mono    AJB
367    tsub VisibleString ,    -- Title, Subordinate       A B
368    trans VisibleString ,   -- Title, Translated        AJB
369    jta VisibleString ,     -- Title, Abbreviated        J
370    iso-jta VisibleString , -- specifically ISO jta      J
371    ml-jta VisibleString ,  -- specifically MEDLINE jta  J
372    coden VisibleString ,   -- a coden                   J
373    issn VisibleString ,    -- ISSN                      J
374    abr VisibleString ,     -- Title, Abbreviated         B
375    isbn VisibleString }    -- ISBN                       B
376
377Imprint ::= SEQUENCE {                  -- Imprint group
378    date Date ,                         -- date of publication
379    volume VisibleString OPTIONAL ,
380    issue VisibleString OPTIONAL ,
381    pages VisibleString OPTIONAL ,
382    section VisibleString OPTIONAL ,
383    pub Affil OPTIONAL,                     -- publisher, required for book
384    cprt Date OPTIONAL,                     -- copyright date, "    "   "
385    part-sup VisibleString OPTIONAL ,       -- part/sup of volume
386    language VisibleString DEFAULT "ENG" ,  -- put here for simplicity
387    prepub ENUMERATED {                     -- for prepublication citations
388        submitted (1) ,                     -- submitted, not accepted
389        in-press (2) ,                      -- accepted, not published
390        other (255)  } OPTIONAL ,
391    part-supi VisibleString OPTIONAL ,      -- part/sup on issue
392    retract CitRetract OPTIONAL ,           -- retraction info
393    pubstatus PubStatus OPTIONAL ,          -- current status of this publication
394    history PubStatusDateSet OPTIONAL }     -- dates for this record
395
396CitRetract ::= SEQUENCE {
397    type ENUMERATED {                    -- retraction of an entry
398        retracted (1) ,               -- this citation retracted
399        notice (2) ,                  -- this citation is a retraction notice
400        in-error (3) ,                -- an erratum was published about this
401        erratum (4) } ,               -- this is a published erratum
402    exp VisibleString OPTIONAL }      -- citation and/or explanation
403
404Meeting ::= SEQUENCE {
405    number VisibleString ,
406    date Date ,
407    place Affil OPTIONAL }
408
409
410END
411
412
413--$Revision: 6.0 $
414--**********************************************************************
415--
416--  MEDLINE data definitions
417--  James Ostell, 1990
418--
419--  enhanced in 1996 to support PubMed records as well by simply adding
420--    the PubMedId and making MedlineId optional
421--
422--**********************************************************************
423
424NCBI-Medline DEFINITIONS ::=
425BEGIN
426
427EXPORTS Medline-entry, Medline-si;
428
429IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
430        Date FROM NCBI-General;
431
432                                -- a MEDLINE or PubMed entry
433Medline-entry ::= SEQUENCE {
434    uid INTEGER OPTIONAL ,      -- MEDLINE UID, sometimes not yet available if from PubMed
435    em Date ,                   -- Entry Month
436    cit Cit-art ,               -- article citation
437    abstract VisibleString OPTIONAL ,
438    mesh SET OF Medline-mesh OPTIONAL ,
439    substance SET OF Medline-rn OPTIONAL ,
440    xref SET OF Medline-si OPTIONAL ,
441    idnum SET OF VisibleString OPTIONAL ,  -- ID Number (grants, contracts)
442    gene SET OF VisibleString OPTIONAL ,
443    pmid PubMedId OPTIONAL ,               -- MEDLINE records may include the PubMedId
444    pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
445    mlfield SET OF Medline-field OPTIONAL ,  -- additional Medline field types
446    status INTEGER {
447	publisher (1) ,      -- record as supplied by publisher
448        premedline (2) ,     -- premedline record
449        medline (3) } DEFAULT medline }  -- regular medline record
450
451Medline-mesh ::= SEQUENCE {
452    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point (*)
453    term VisibleString ,                   -- the MeSH term
454    qual SET OF Medline-qual OPTIONAL }    -- qualifiers
455
456Medline-qual ::= SEQUENCE {
457    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point
458    subh VisibleString }             -- the subheading
459
460Medline-rn ::= SEQUENCE {       -- medline substance records
461    type ENUMERATED {           -- type of record
462        nameonly (0) ,
463        cas (1) ,               -- CAS number
464        ec (2) } ,              -- EC number
465    cit VisibleString OPTIONAL ,  -- CAS or EC number if present
466    name VisibleString }          -- name (always present)
467
468Medline-si ::= SEQUENCE {       -- medline cross reference records
469    type ENUMERATED {           -- type of xref
470        ddbj (1) ,              -- DNA Data Bank of Japan
471        carbbank (2) ,          -- Carbohydrate Structure Database
472        embl (3) ,              -- EMBL Data Library
473        hdb (4) ,               -- Hybridoma Data Bank
474        genbank (5) ,           -- GenBank
475        hgml (6) ,              -- Human Gene Map Library
476        mim (7) ,               -- Mendelian Inheritance in Man
477        msd (8) ,               -- Microbial Strains Database
478        pdb (9) ,               -- Protein Data Bank (Brookhaven)
479        pir (10) ,              -- Protein Identification Resource
480        prfseqdb (11) ,         -- Protein Research Foundation (Japan)
481        psd (12) ,              -- Protein Sequence Database (Japan)
482        swissprot (13) ,        -- SwissProt
483        gdb (14) } ,            -- Genome Data Base
484    cit VisibleString OPTIONAL }    -- the citation/accession number
485
486Medline-field ::= SEQUENCE {
487    type INTEGER {              -- Keyed type
488	other (0) ,             -- look in line code
489	comment (1) ,           -- comment line
490        erratum (2) } ,         -- retracted, corrected, etc
491    str VisibleString ,         -- the text
492    ids SEQUENCE OF DocRef OPTIONAL }  -- pointers relevant to this text
493
494DocRef ::= SEQUENCE {           -- reference to a document
495    type INTEGER {
496        medline (1) ,
497        pubmed (2) ,
498        ncbigi (3) } ,
499    uid INTEGER }
500
501END
502
503--$Revision: 6.0 $
504--**********************************************************************
505--
506--  PUBMED data definitions
507--
508--**********************************************************************
509
510NCBI-PubMed DEFINITIONS ::=
511BEGIN
512
513EXPORTS Pubmed-entry, Pubmed-url;
514
515IMPORTS PubMedId FROM NCBI-Biblio
516        Medline-entry FROM NCBI-Medline;
517
518Pubmed-entry ::= SEQUENCE {        -- a PubMed entry
519    -- PUBMED records must include the PubMedId
520    pmid PubMedId,
521
522    -- Medline entry information
523    medent Medline-entry OPTIONAL,
524
525    -- Publisher name
526    publisher VisibleString OPTIONAL,
527
528    -- List of URL to publisher cite
529    urls SET OF Pubmed-url OPTIONAL,
530
531    -- Publisher's article identifier
532    pubid VisibleString OPTIONAL
533}
534
535Pubmed-url ::= SEQUENCE {
536    location VisibleString OPTIONAL, -- Location code
537    url VisibleString                -- Selected URL for location
538}
539
540END
541--$Revision: 6.0 $
542--**********************************************************************
543--
544--  MEDLARS data definitions
545--  Grigoriy Starchenko, 1997
546--
547--**********************************************************************
548
549NCBI-Medlars DEFINITIONS ::=
550BEGIN
551
552EXPORTS Medlars-entry, Medlars-record;
553
554IMPORTS PubMedId FROM NCBI-Biblio;
555
556Medlars-entry ::= SEQUENCE {     -- a MEDLARS entry
557    pmid PubMedId,               -- All entries in PubMed must have it
558    muid INTEGER OPTIONAL,       -- Medline(OCCS) id
559    recs SET OF Medlars-record   -- List of Medlars records
560}
561
562Medlars-record ::= SEQUENCE {
563    code INTEGER,                -- Unit record field type integer form
564    abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
565    data VisibleString           -- Unit record data
566}
567
568END
569--$Revision: 6.0 $
570--********************************************************************
571--
572--  Publication common set
573--  James Ostell, 1990
574--
575--  This is the base class definitions for Publications of all sorts
576--
577--  support for PubMedId added in 1996
578--********************************************************************
579
580NCBI-Pub DEFINITIONS ::=
581BEGIN
582
583EXPORTS Pub, Pub-set, Pub-equiv;
584
585IMPORTS Medline-entry FROM NCBI-Medline
586        Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
587        Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;
588
589Pub ::= CHOICE {
590    gen Cit-gen ,        -- general or generic unparsed
591    sub Cit-sub ,        -- submission
592    medline Medline-entry ,
593    muid INTEGER ,       -- medline uid
594    article Cit-art ,
595    journal Cit-jour ,
596    book Cit-book ,
597    proc Cit-proc ,      -- proceedings of a meeting
598    patent Cit-pat ,
599    pat-id Id-pat ,      -- identify a patent
600    man Cit-let ,        -- manuscript, thesis, or letter
601    equiv Pub-equiv,     -- to cite a variety of ways
602	pmid PubMedId }      -- PubMedId
603
604Pub-equiv ::= SET OF Pub   -- equivalent identifiers for same citation
605
606Pub-set ::= CHOICE {
607    pub SET OF Pub ,
608    medline SET OF Medline-entry ,
609    article SET OF Cit-art ,
610    journal SET OF Cit-jour ,
611    book SET OF Cit-book ,
612    proc SET OF Cit-proc ,      -- proceedings of a meeting
613    patent SET OF Cit-pat }
614
615END
616
617--$Revision: 6.7 $
618--**********************************************************************
619--
620--  NCBI Sequence location and identifier elements
621--  by James Ostell, 1990
622--
623--  Version 3.0 - 1994
624--
625--**********************************************************************
626
627NCBI-Seqloc DEFINITIONS ::=
628BEGIN
629
630EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
631        Na-strand, Giimport-id;
632
633IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
634        Id-pat FROM NCBI-Biblio
635        Feat-id FROM NCBI-Seqfeat;
636
637--*** Sequence identifiers ********************************
638--*
639
640Seq-id ::= CHOICE {
641    local Object-id ,            -- local use
642    gibbsq INTEGER ,             -- Geninfo backbone seqid
643    gibbmt INTEGER ,             -- Geninfo backbone moltype
644    giim Giimport-id ,           -- Geninfo import id
645    genbank Textseq-id ,
646    embl Textseq-id ,
647    pir Textseq-id ,
648    swissprot Textseq-id ,
649    patent Patent-seq-id ,
650    other Textseq-id ,           -- for historical reasons, 'other' = 'refseq'
651    general Dbtag ,              -- for other databases
652    gi INTEGER ,                 -- GenInfo Integrated Database
653    ddbj Textseq-id ,            -- DDBJ
654    prf Textseq-id ,             -- PRF SEQDB
655    pdb PDB-seq-id ,             -- PDB sequence
656    tpg Textseq-id ,             -- Third Party Annot/Seq Genbank
657    tpe Textseq-id ,             -- Third Party Annot/Seq EMBL
658    tpd Textseq-id ,             -- Third Party Annot/Seq DDBJ
659    gpipe Textseq-id ,           -- Internal NCBI genome pipeline processing ID
660    named-annot-track Textseq-id -- Internal named annotation tracking ID
661}
662
663Seq-id-set ::= SET OF Seq-id
664
665
666Patent-seq-id ::= SEQUENCE {
667    seqid INTEGER ,         -- number of sequence in patent
668    cit Id-pat }           -- patent citation
669
670Textseq-id ::= SEQUENCE {
671    name VisibleString OPTIONAL ,
672    accession VisibleString OPTIONAL ,
673    release VisibleString OPTIONAL ,
674    version INTEGER OPTIONAL }
675
676Giimport-id ::= SEQUENCE {
677    id INTEGER ,                     -- the id to use here
678    db VisibleString OPTIONAL ,      -- dbase used in
679    release VisibleString OPTIONAL } -- the release
680
681PDB-seq-id ::= SEQUENCE {            -- must set either chain or chain_id, but not both
682      mol PDB-mol-id ,                    -- the molecule name
683      chain INTEGER DEFAULT 32 ,          -- Deprecated : For single ASCII character
684                                          -- chain identifiers of pre-2015 PDB structures
685      rel Date OPTIONAL ,                 -- release date, month and year
686      chain-id VisibleString OPTIONAL }   -- chain identifier
687
688PDB-mol-id ::= VisibleString  -- name of mol, 4 chars
689
690--*** Sequence locations **********************************
691--*
692
693Seq-loc ::= CHOICE {
694    null NULL ,           -- not placed
695    empty Seq-id ,        -- to NULL one Seq-id in a collection
696    whole Seq-id ,        -- whole sequence
697    int Seq-interval ,    -- from to
698    packed-int Packed-seqint ,
699    pnt Seq-point ,
700    packed-pnt Packed-seqpnt ,
701    mix Seq-loc-mix ,
702    equiv Seq-loc-equiv ,  -- equivalent sets of locations
703    bond Seq-bond ,
704    feat Feat-id }         -- indirect, through a Seq-feat
705
706
707Seq-interval ::= SEQUENCE {
708    from INTEGER ,
709    to INTEGER ,
710    strand Na-strand OPTIONAL ,
711    id Seq-id ,    -- WARNING: this used to be optional
712    fuzz-from Int-fuzz OPTIONAL ,
713    fuzz-to Int-fuzz OPTIONAL }
714
715Packed-seqint ::= SEQUENCE OF Seq-interval
716
717Seq-point ::= SEQUENCE {
718    point INTEGER ,
719    strand Na-strand OPTIONAL ,
720    id Seq-id ,     -- WARNING: this used to be optional
721    fuzz Int-fuzz OPTIONAL }
722
723Packed-seqpnt ::= SEQUENCE {
724    strand Na-strand OPTIONAL ,
725    id Seq-id ,
726    fuzz Int-fuzz OPTIONAL ,
727    points SEQUENCE OF INTEGER }
728
729Na-strand ::= ENUMERATED {          -- strand of nucleic acid
730    unknown (0) ,
731    plus (1) ,
732    minus (2) ,
733    both (3) ,                -- in forward orientation
734    both-rev (4) ,            -- in reverse orientation
735    other (255) }
736
737Seq-bond ::= SEQUENCE {         -- bond between residues
738    a Seq-point ,           -- connection to a least one residue
739    b Seq-point OPTIONAL }  -- other end may not be available
740
741Seq-loc-mix ::= SEQUENCE OF Seq-loc   -- this will hold anything
742
743Seq-loc-equiv ::= SET OF Seq-loc      -- for a set of equivalent locations
744
745END
746
747
748--$Revision: 6.26 $
749--**********************************************************************
750--
751--  NCBI Sequence elements
752--  by James Ostell, 1990
753--  Version 3.0 - June 1994
754--
755--**********************************************************************
756
757NCBI-Sequence DEFINITIONS ::=
758BEGIN
759
760EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
761        Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
762        Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap;
763
764IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
765        Seq-align FROM NCBI-Seqalign
766        Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat
767        Seq-graph FROM NCBI-Seqres
768        Pub-equiv FROM NCBI-Pub
769        Org-ref FROM NCBI-Organism
770        BioSource FROM NCBI-BioSource
771        Seq-id, Seq-loc FROM NCBI-Seqloc
772        GB-block FROM GenBank-General
773        PIR-block FROM PIR-General
774        EMBL-block FROM EMBL-General
775        SP-block FROM SP-General
776        PRF-block FROM PRF-General
777        PDB-block FROM PDB-General
778        Seq-table FROM NCBI-SeqTable;
779
780--*** Sequence ********************************
781--*
782
783Bioseq ::= SEQUENCE {
784    id SET OF Seq-id ,            -- equivalent identifiers
785    descr Seq-descr OPTIONAL , -- descriptors
786    inst Seq-inst ,            -- the sequence data
787    annot SET OF Seq-annot OPTIONAL }
788
789--*** Descriptors *****************************
790--*
791
792Seq-descr ::= SET OF Seqdesc
793
794Seqdesc ::= CHOICE {
795    mol-type GIBB-mol ,          -- type of molecule
796    modif SET OF GIBB-mod ,             -- modifiers
797    method GIBB-method ,         -- sequencing method
798    name VisibleString ,         -- a name for this sequence
799    title VisibleString ,        -- a title for this sequence
800    org Org-ref ,                -- if all from one organism
801    comment VisibleString ,      -- a more extensive comment
802    num Numbering ,              -- a numbering system
803    maploc Dbtag ,               -- map location of this sequence
804    pir PIR-block ,              -- PIR specific info
805    genbank GB-block ,           -- GenBank specific info
806    pub Pubdesc ,                -- a reference to the publication
807    region VisibleString ,       -- overall region (globin locus)
808    user User-object ,           -- user defined object
809    sp SP-block ,                -- SWISSPROT specific info
810    dbxref Dbtag ,               -- xref to other databases
811    embl EMBL-block ,            -- EMBL specific information
812    create-date Date ,           -- date entry first created/released
813    update-date Date ,           -- date of last update
814    prf PRF-block ,              -- PRF specific information
815    pdb PDB-block ,              -- PDB specific information
816    het Heterogen ,              -- cofactor, etc associated but not bound
817    source BioSource ,           -- source of materials, includes Org-ref
818    molinfo MolInfo ,            -- info on the molecule and techniques
819    modelev ModelEvidenceSupport -- model evidence for XM records
820}
821
822--******* NOTE:
823--*       mol-type, modif, method, and org are consolidated and expanded
824--*       in Org-ref, BioSource, and MolInfo in this specification. They
825--*       will be removed in later specifications. Do not use them in the
826--*       the future. Instead expect the new structures.
827--*
828--***************************
829
830--********************************************************************
831--
832-- MolInfo gives information on the
833-- classification of the type and quality of the sequence
834--
835-- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
836--
837--********************************************************************
838
839MolInfo ::= SEQUENCE {
840    biomol INTEGER {
841        unknown (0) ,
842        genomic (1) ,
843        pre-RNA (2) ,              -- precursor RNA of any sort really
844        mRNA (3) ,
845        rRNA (4) ,
846        tRNA (5) ,
847        snRNA (6) ,
848        scRNA (7) ,
849        peptide (8) ,
850        other-genetic (9) ,      -- other genetic material
851        genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
852        cRNA (11) ,              -- viral RNA genome copy intermediate
853        snoRNA (12) ,            -- small nucleolar RNA
854        transcribed-RNA (13) ,   -- transcribed RNA other than existing classes
855        ncRNA (14) ,
856        tmRNA (15) ,
857        other (255) } DEFAULT unknown ,
858    tech INTEGER {
859        unknown (0) ,
860        standard (1) ,          -- standard sequencing
861        est (2) ,               -- Expressed Sequence Tag
862        sts (3) ,               -- Sequence Tagged Site
863        survey (4) ,            -- one-pass genomic sequence
864        genemap (5) ,           -- from genetic mapping techniques
865        physmap (6) ,           -- from physical mapping techniques
866        derived (7) ,           -- derived from other data, not a primary entity
867        concept-trans (8) ,     -- conceptual translation
868        seq-pept (9) ,          -- peptide was sequenced
869        both (10) ,             -- concept transl. w/ partial pept. seq.
870        seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
871        seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
872        concept-trans-a (13) ,  -- conceptual transl. supplied by author
873        htgs-1 (14) ,           -- unordered High Throughput sequence contig
874        htgs-2 (15) ,           -- ordered High Throughput sequence contig
875        htgs-3 (16) ,           -- finished High Throughput sequence
876        fli-cdna (17) ,         -- full length insert cDNA
877        htgs-0 (18) ,           -- single genomic reads for coordination
878        htc (19) ,              -- high throughput cDNA
879        wgs (20) ,              -- whole genome shotgun sequencing
880        barcode (21) ,          -- barcode of life project
881        composite-wgs-htgs (22) , -- composite of WGS and HTGS
882        tsa (23) ,              -- transcriptome shotgun assembly
883        targeted (24) ,         -- targeted locus sets/studies
884        other (255) }           -- use Source.techexp
885               DEFAULT unknown ,
886    techexp VisibleString OPTIONAL ,   -- explanation if tech not enough
887    --
888    -- Completeness is not indicated in most records.  For genomes, assume
889    -- the sequences are incomplete unless specifically marked as complete.
890    -- For mRNAs, assume the ends are not known exactly unless marked as
891    -- having the left or right end.
892    --
893    completeness INTEGER {
894      unknown (0) ,
895      complete (1) ,                   -- complete biological entity
896      partial (2) ,                    -- partial but no details given
897      no-left (3) ,                    -- missing 5' or NH3 end
898      no-right (4) ,                   -- missing 3' or COOH end
899      no-ends (5) ,                    -- missing both ends
900      has-left (6) ,                   -- 5' or NH3 end present
901      has-right (7) ,                  -- 3' or COOH end present
902      other (255) } DEFAULT unknown ,
903    gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
904
905
906GIBB-mol ::= ENUMERATED {       -- type of molecule represented
907    unknown (0) ,
908    genomic (1) ,
909    pre-mRNA (2) ,              -- precursor RNA of any sort really
910    mRNA (3) ,
911    rRNA (4) ,
912    tRNA (5) ,
913    snRNA (6) ,
914    scRNA (7) ,
915    peptide (8) ,
916    other-genetic (9) ,      -- other genetic material
917    genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
918    other (255) }
919
920GIBB-mod ::= ENUMERATED {        -- GenInfo Backbone modifiers
921    dna (0) ,
922    rna (1) ,
923    extrachrom (2) ,
924    plasmid (3) ,
925    mitochondrial (4) ,
926    chloroplast (5) ,
927    kinetoplast (6) ,
928    cyanelle (7) ,
929    synthetic (8) ,
930    recombinant (9) ,
931    partial (10) ,
932    complete (11) ,
933    mutagen (12) ,    -- subject of mutagenesis ?
934    natmut (13) ,     -- natural mutant ?
935    transposon (14) ,
936    insertion-seq (15) ,
937    no-left (16) ,    -- missing left end (5' for na, NH2 for aa)
938    no-right (17) ,   -- missing right end (3' or COOH)
939    macronuclear (18) ,
940    proviral (19) ,
941    est (20) ,        -- expressed sequence tag
942    sts (21) ,        -- sequence tagged site
943    survey (22) ,     -- one pass survey sequence
944    chromoplast (23) ,
945    genemap (24) ,    -- is a genetic map
946    restmap (25) ,    -- is an ordered restriction map
947    physmap (26) ,    -- is a physical map (not ordered restriction map)
948    other (255) }
949
950GIBB-method ::= ENUMERATED {        -- sequencing methods
951    concept-trans (1) ,    -- conceptual translation
952    seq-pept (2) ,         -- peptide was sequenced
953    both (3) ,             -- concept transl. w/ partial pept. seq.
954    seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
955    seq-pept-homol (5) ,   -- sequenced peptide, ordered by homology
956    concept-trans-a (6) ,  -- conceptual transl. supplied by author
957    other (255) }
958
959Numbering ::= CHOICE {           -- any display numbering system
960    cont Num-cont ,              -- continuous numbering
961    enum Num-enum ,              -- enumerated names for residues
962    ref Num-ref ,                -- by reference to another sequence
963    real Num-real }              -- supports mapping to a float system
964
965Num-cont ::= SEQUENCE {          -- continuous display numbering system
966    refnum INTEGER DEFAULT 1,         -- number assigned to first residue
967    has-zero BOOLEAN DEFAULT FALSE ,  -- 0 used?
968    ascending BOOLEAN DEFAULT TRUE }  -- ascending numbers?
969
970Num-enum ::= SEQUENCE {          -- any tags to residues
971    num INTEGER ,                        -- number of tags to follow
972    names SEQUENCE OF VisibleString }    -- the tags
973
974Num-ref ::= SEQUENCE {           -- by reference to other sequences
975    type ENUMERATED {            -- type of reference
976        not-set (0) ,
977        sources (1) ,            -- by segmented or const seq sources
978        aligns (2) } ,           -- by alignments given below
979    aligns Seq-align OPTIONAL }
980
981Num-real ::= SEQUENCE {          -- mapping to floating point system
982    a REAL ,                     -- from an integer system used by Bioseq
983    b REAL ,                     -- position = (a * int_position) + b
984    units VisibleString OPTIONAL }
985
986Pubdesc ::= SEQUENCE {              -- how sequence presented in pub
987    pub Pub-equiv ,                 -- the citation(s)
988    name VisibleString OPTIONAL ,   -- name used in paper
989    fig VisibleString OPTIONAL ,    -- figure in paper
990    num Numbering OPTIONAL ,        -- numbering from paper
991    numexc BOOLEAN OPTIONAL ,       -- numbering problem with paper
992    poly-a BOOLEAN OPTIONAL ,       -- poly A tail indicated in figure?
993    maploc VisibleString OPTIONAL , -- map location reported in paper
994    seq-raw StringStore OPTIONAL ,  -- original sequence from paper
995    align-group INTEGER OPTIONAL ,  -- this seq aligned with others in paper
996    comment VisibleString OPTIONAL, -- any comment on this pub in context
997    reftype INTEGER {           -- type of reference in a GenBank record
998        seq (0) ,               -- refers to sequence
999        sites (1) ,             -- refers to unspecified features
1000        feats (2) ,             -- refers to specified features
1001        no-target (3) }         -- nothing specified (EMBL)
1002        DEFAULT seq }
1003
1004Heterogen ::= VisibleString       -- cofactor, prosthetic group, inhibitor, etc
1005
1006--*** Instances of sequences *******************************
1007--*
1008
1009Seq-inst ::= SEQUENCE {            -- the sequence data itself
1010    repr ENUMERATED {              -- representation class
1011        not-set (0) ,              -- empty
1012        virtual (1) ,              -- no seq data
1013        raw (2) ,                  -- continuous sequence
1014        seg (3) ,                  -- segmented sequence
1015        const (4) ,                -- constructed sequence
1016        ref (5) ,                  -- reference to another sequence
1017        consen (6) ,               -- consensus sequence or pattern
1018        map (7) ,                  -- ordered map of any kind
1019        delta (8) ,              -- sequence made by changes (delta) to others
1020        other (255) } ,
1021    mol ENUMERATED {               -- molecule class in living organism
1022        not-set (0) ,              --   > cdna = rna
1023        dna (1) ,
1024        rna (2) ,
1025        aa (3) ,
1026        na (4) ,                   -- just a nucleic acid
1027        other (255) } ,
1028    length INTEGER OPTIONAL ,      -- length of sequence in residues
1029    fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
1030    topology ENUMERATED {          -- topology of molecule
1031        not-set (0) ,
1032        linear (1) ,
1033        circular (2) ,
1034        tandem (3) ,               -- some part of tandem repeat
1035        other (255) } DEFAULT linear ,
1036    strand ENUMERATED {            -- strandedness in living organism
1037        not-set (0) ,
1038        ss (1) ,                   -- single strand
1039        ds (2) ,                   -- double strand
1040        mixed (3) ,
1041        other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
1042    seq-data Seq-data OPTIONAL ,   -- the sequence
1043    ext Seq-ext OPTIONAL ,         -- extensions for special types
1044    hist Seq-hist OPTIONAL }       -- sequence history
1045
1046--*** Sequence Extensions **********************************
1047--*  for representing more complex types
1048--*  const type uses Seq-hist.assembly
1049
1050Seq-ext ::= CHOICE {
1051    seg Seg-ext ,        -- segmented sequences
1052    ref Ref-ext ,        -- hot link to another sequence (a view)
1053    map Map-ext ,        -- ordered map of markers
1054    delta Delta-ext }
1055
1056Seg-ext ::= SEQUENCE OF Seq-loc
1057
1058Ref-ext ::= Seq-loc
1059
1060Map-ext ::= SEQUENCE OF Seq-feat
1061
1062Delta-ext ::= SEQUENCE OF Delta-seq
1063
1064Delta-seq ::= CHOICE {
1065    loc Seq-loc ,       -- point to a sequence
1066    literal Seq-literal }   -- a piece of sequence
1067
1068Seq-literal ::= SEQUENCE {
1069    length INTEGER ,         -- must give a length in residues
1070    fuzz Int-fuzz OPTIONAL , -- could be unsure
1071    seq-data Seq-data OPTIONAL } -- may have the data
1072
1073--*** Sequence History Record ***********************************
1074--** assembly = records how seq was assembled from others
1075--** replaces = records sequences made obsolete by this one
1076--** replaced-by = this seq is made obsolete by another(s)
1077
1078Seq-hist ::= SEQUENCE {
1079    assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
1080    replaces Seq-hist-rec OPTIONAL ,    -- seq makes these seqs obsolete
1081    replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
1082    deleted CHOICE {
1083        bool BOOLEAN ,
1084        date Date } OPTIONAL }
1085
1086Seq-hist-rec ::= SEQUENCE {
1087    date Date OPTIONAL ,
1088    ids SET OF Seq-id }
1089
1090--*** Various internal sequence representations ************
1091--*      all are controlled, fixed length forms
1092
1093Seq-data ::= CHOICE {              -- sequence representations
1094    iupacna IUPACna ,              -- IUPAC 1 letter nuc acid code
1095    iupacaa IUPACaa ,              -- IUPAC 1 letter amino acid code
1096    ncbi2na NCBI2na ,              -- 2 bit nucleic acid code
1097    ncbi4na NCBI4na ,              -- 4 bit nucleic acid code
1098    ncbi8na NCBI8na ,              -- 8 bit extended nucleic acid code
1099    ncbipna NCBIpna ,              -- nucleic acid probabilities
1100    ncbi8aa NCBI8aa ,              -- 8 bit extended amino acid codes
1101    ncbieaa NCBIeaa ,              -- extended ASCII 1 letter aa codes
1102    ncbipaa NCBIpaa ,              -- amino acid probabilities
1103    ncbistdaa NCBIstdaa,           -- consecutive codes for std aas
1104    gap Seq-gap                    -- gap types
1105}
1106
1107Seq-gap ::= SEQUENCE {
1108    type INTEGER {
1109        unknown(0),
1110        fragment(1),               -- Deprecated. Used only for AGP 1.1
1111        clone(2),                  -- Deprecated. Used only for AGP 1.1
1112        short-arm(3),
1113        heterochromatin(4),
1114        centromere(5),
1115        telomere(6),
1116        repeat(7),
1117        contig(8),
1118        scaffold(9),
1119        other(255)
1120    },
1121    linkage INTEGER {
1122        unlinked(0),
1123        linked(1),
1124        other(255)
1125    } OPTIONAL,
1126    linkage-evidence SET OF Linkage-evidence OPTIONAL
1127}
1128
1129Linkage-evidence ::= SEQUENCE {
1130    type INTEGER {
1131        paired-ends(0),
1132        align-genus(1),
1133        align-xgenus(2),
1134        align-trnscpt(3),
1135        within-clone(4),
1136        clone-contig(5),
1137        map(6),
1138        strobe(7),
1139        unspecified(8),
1140        pcr(9),
1141        other(255)
1142    }
1143}
1144
1145IUPACna ::= StringStore       -- IUPAC 1 letter codes, no spaces
1146IUPACaa ::= StringStore       -- IUPAC 1 letter codes, no spaces
1147NCBI2na ::= OCTET STRING      -- 00=A, 01=C, 10=G, 11=T
1148NCBI4na ::= OCTET STRING      -- 1 bit each for agct
1149                              -- 0001=A, 0010=C, 0100=G, 1000=T/U
1150                              -- 0101=Purine, 1010=Pyrimidine, etc
1151NCBI8na ::= OCTET STRING      -- for modified nucleic acids
1152NCBIpna ::= OCTET STRING      -- 5 octets/base, prob for a,c,g,t,n
1153                              -- probabilities are coded 0-255 = 0.0-1.0
1154NCBI8aa ::= OCTET STRING      -- for modified amino acids
1155NCBIeaa ::= StringStore       -- ASCII extended 1 letter aa codes
1156                              -- IUPAC codes + U=selenocysteine
1157NCBIpaa ::= OCTET STRING      -- 25 octets/aa, prob for IUPAC aas in order:
1158                              -- A-Y,B,Z,X,(ter),anything
1159                              -- probabilities are coded 0-255 = 0.0-1.0
1160NCBIstdaa ::= OCTET STRING    -- codes 0-25, 1 per byte
1161
1162--*** Sequence Annotation *************************************
1163--*
1164
1165-- This is a replica of Textseq-id
1166-- This is specific for annotations, and exists to maintain a semantic
1167-- difference between IDs assigned to annotations and IDs assigned to
1168-- sequences
1169Textannot-id ::= SEQUENCE {
1170    name	  VisibleString OPTIONAL ,
1171    accession VisibleString OPTIONAL ,
1172    release   VisibleString OPTIONAL ,
1173    version   INTEGER       OPTIONAL
1174}
1175
1176Annot-id ::= CHOICE {
1177    local Object-id ,
1178    ncbi INTEGER ,
1179    general Dbtag,
1180    other Textannot-id
1181}
1182
1183Annot-descr ::= SET OF Annotdesc
1184
1185Annotdesc ::= CHOICE {
1186    name VisibleString ,         -- a short name for this collection
1187    title VisibleString ,        -- a title for this collection
1188    comment VisibleString ,      -- a more extensive comment
1189    pub Pubdesc ,                -- a reference to the publication
1190    user User-object ,           -- user defined object
1191    create-date Date ,           -- date entry first created/released
1192    update-date Date ,           -- date of last update
1193    src Seq-id ,                 -- source sequence from which annot came
1194    align Align-def,             -- definition of the SeqAligns
1195    region Seq-loc }             -- all contents cover this region
1196
1197Align-def ::= SEQUENCE {
1198    align-type INTEGER {         -- class of align Seq-annot
1199      ref (1) ,                  -- set of alignments to the same sequence
1200      alt (2) ,                  -- set of alternate alignments of the same seqs
1201      blocks (3) ,               -- set of aligned blocks in the same seqs
1202      other (255) } ,
1203    ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
1204
1205Seq-annot ::= SEQUENCE {
1206    id SET OF Annot-id OPTIONAL ,
1207    db INTEGER {                 -- source of annotation
1208        genbank (1) ,
1209        embl (2) ,
1210        ddbj (3) ,
1211        pir  (4) ,
1212        sp   (5) ,
1213        bbone (6) ,
1214        pdb   (7) ,
1215        other (255) } OPTIONAL ,
1216    name VisibleString OPTIONAL ,-- source if "other" above
1217    desc Annot-descr OPTIONAL ,  -- used only for stand alone Seq-annots
1218    data CHOICE {
1219        ftable SET OF Seq-feat ,
1220        align SET OF Seq-align ,
1221        graph SET OF Seq-graph ,
1222        ids SET OF Seq-id ,      -- used for communication between tools
1223        locs SET OF Seq-loc ,    -- used for communication between tools
1224        seq-table Seq-table } }  -- features in table form
1225
1226END
1227
1228
1229--$Revision: 6.6 $
1230--**********************************************************************
1231--
1232--  NCBI Sequence Collections
1233--  by James Ostell, 1990
1234--
1235--  Version 3.0 - 1994
1236--
1237--**********************************************************************
1238
1239NCBI-Seqset DEFINITIONS ::=
1240BEGIN
1241
1242EXPORTS Bioseq-set, Seq-entry;
1243
1244IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
1245        Object-id, Dbtag, Date FROM NCBI-General;
1246
1247--*** Sequence Collections ********************************
1248--*
1249
1250Bioseq-set ::= SEQUENCE {      -- just a collection
1251    id Object-id OPTIONAL ,
1252    coll Dbtag OPTIONAL ,          -- to identify a collection
1253    level INTEGER OPTIONAL ,       -- nesting level
1254    class ENUMERATED {
1255        not-set (0) ,
1256        nuc-prot (1) ,              -- nuc acid and coded proteins
1257        segset (2) ,                -- segmented sequence + parts
1258        conset (3) ,                -- constructed sequence + parts
1259        parts (4) ,                 -- parts for 2 or 3
1260        gibb (5) ,                  -- geninfo backbone
1261        gi (6) ,                    -- geninfo
1262        genbank (7) ,               -- converted genbank
1263        pir (8) ,                   -- converted pir
1264        pub-set (9) ,               -- all the seqs from a single publication
1265        equiv (10) ,                -- a set of equivalent maps or seqs
1266        swissprot (11) ,            -- converted SWISSPROT
1267        pdb-entry (12) ,            -- a complete PDB entry
1268        mut-set (13) ,              -- set of mutations
1269        pop-set (14) ,              -- population study
1270        phy-set (15) ,              -- phylogenetic study
1271        eco-set (16) ,              -- ecological sample study
1272        gen-prod-set (17) ,         -- genomic products, chrom+mRNA+protein
1273        wgs-set (18) ,              -- whole genome shotgun project
1274        named-annot (19) ,          -- named annotation set
1275        named-annot-prod (20) ,     -- with instantiated mRNA+protein
1276        read-set (21) ,             -- set from a single read
1277        paired-end-reads (22) ,     -- paired sequences within a read-set
1278        small-genome-set (23) ,     -- viral segments or mitochondrial minicircles
1279        other (255) } DEFAULT not-set ,
1280    release VisibleString OPTIONAL ,
1281    date Date OPTIONAL ,
1282    descr Seq-descr OPTIONAL ,
1283    seq-set SEQUENCE OF Seq-entry ,
1284    annot SET OF Seq-annot OPTIONAL }
1285
1286Seq-entry ::= CHOICE {
1287        seq Bioseq ,
1288        set Bioseq-set }
1289
1290END
1291
1292--$Revision: 6.0 $
1293--  *********************************************************************
1294--
1295--  These are code and conversion tables for NCBI sequence codes
1296--  ASN.1 for the sequences themselves are define in seq.asn
1297--
1298--  Seq-map-table and Seq-code-table REQUIRE that codes start with 0
1299--    and increase continuously.  So IUPAC codes, which are upper case
1300--    letters will always have 65 0 cells before the codes begin.  This
1301--    allows all codes to do indexed lookups for things
1302--
1303--  Valid names for code tables are:
1304--    IUPACna
1305--    IUPACaa
1306--    IUPACeaa
1307--    IUPACaa3     3 letter amino acid codes : parallels IUPACeaa
1308--                   display only, not a data exchange type
1309--    NCBI2na
1310--    NCBI4na
1311--    NCBI8na
1312--    NCBI8aa
1313--    NCBIstdaa
1314--     probability types map to IUPAC types for display as characters
1315
1316NCBI-SeqCode DEFINITIONS ::=
1317BEGIN
1318
1319EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
1320
1321Seq-code-type ::= ENUMERATED {              -- sequence representations
1322    iupacna (1) ,              -- IUPAC 1 letter nuc acid code
1323    iupacaa (2) ,              -- IUPAC 1 letter amino acid code
1324    ncbi2na (3) ,              -- 2 bit nucleic acid code
1325    ncbi4na (4) ,              -- 4 bit nucleic acid code
1326    ncbi8na (5) ,              -- 8 bit extended nucleic acid code
1327    ncbipna (6) ,              -- nucleic acid probabilities
1328    ncbi8aa (7) ,              -- 8 bit extended amino acid codes
1329    ncbieaa (8) ,              -- extended ASCII 1 letter aa codes
1330    ncbipaa (9) ,              -- amino acid probabilities
1331    iupacaa3 (10) ,            -- 3 letter code only for display
1332    ncbistdaa (11) }           -- consecutive codes for std aas, 0-25
1333
1334Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings
1335    from Seq-code-type ,      -- code to map from
1336    to Seq-code-type ,        -- code to map to
1337    num INTEGER ,             -- number of rows in table
1338    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
1339    table SEQUENCE OF INTEGER }  -- table of values, in from-to order
1340
1341Seq-code-table ::= SEQUENCE { -- for names of coded values
1342    code Seq-code-type ,      -- name of code
1343    num INTEGER ,             -- number of rows in table
1344    one-letter BOOLEAN ,   -- symbol is ALWAYS 1 letter?
1345    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
1346    table SEQUENCE OF
1347        SEQUENCE {
1348            symbol VisibleString ,      -- the printed symbol or letter
1349            name VisibleString } ,      -- an explanatory name or string
1350    comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
1351
1352Seq-code-set ::= SEQUENCE {    -- for distribution
1353    codes SET OF Seq-code-table OPTIONAL ,
1354    maps SET OF Seq-map-table OPTIONAL }
1355
1356END
1357
1358--$Revision: 6.0 $
1359--*********************************************************************
1360--
1361-- 1990 - J.Ostell
1362-- Version 3.0 - June 1994
1363--
1364--*********************************************************************
1365--*********************************************************************
1366--
1367--  EMBL specific data
1368--  This block of specifications was developed by Reiner Fuchs of EMBL
1369--  Updated by J.Ostell, 1994
1370--
1371--*********************************************************************
1372
1373EMBL-General DEFINITIONS ::=
1374BEGIN
1375
1376EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;
1377
1378IMPORTS Date, Object-id FROM NCBI-General;
1379
1380EMBL-dbname ::= CHOICE {
1381    code ENUMERATED {
1382        embl(0),
1383        genbank(1),
1384        ddbj(2),
1385        geninfo(3),
1386        medline(4),
1387        swissprot(5),
1388        pir(6),
1389        pdb(7),
1390        epd(8),
1391        ecd(9),
1392        tfd(10),
1393        flybase(11),
1394        prosite(12),
1395        enzyme(13),
1396        mim(14),
1397        ecoseq(15),
1398        hiv(16) ,
1399        other (255) } ,
1400    name    VisibleString }
1401
1402EMBL-xref ::= SEQUENCE {
1403    dbname EMBL-dbname,
1404    id SEQUENCE OF Object-id }
1405
1406EMBL-block ::= SEQUENCE {
1407    class ENUMERATED {
1408        not-set(0),
1409        standard(1),
1410        unannotated(2),
1411        other(255) } DEFAULT standard,
1412    div ENUMERATED {
1413        fun(0),
1414        inv(1),
1415        mam(2),
1416        org(3),
1417        phg(4),
1418        pln(5),
1419        pri(6),
1420        pro(7),
1421        rod(8),
1422        syn(9),
1423        una(10),
1424        vrl(11),
1425        vrt(12),
1426        pat(13),
1427        est(14),
1428        sts(15),
1429        other (255) } OPTIONAL,
1430    creation-date Date,
1431    update-date Date,
1432    extra-acc SEQUENCE OF VisibleString OPTIONAL,
1433    keywords SEQUENCE OF VisibleString OPTIONAL,
1434    xref SEQUENCE OF EMBL-xref OPTIONAL }
1435
1436END
1437
1438--*********************************************************************
1439--
1440--  SWISSPROT specific data
1441--  This block of specifications was developed by Mark Cavanaugh of
1442--      NCBI working with Amos Bairoch of SWISSPROT
1443--
1444--*********************************************************************
1445
1446SP-General DEFINITIONS ::=
1447BEGIN
1448
1449EXPORTS SP-block;
1450
1451IMPORTS Date, Dbtag FROM NCBI-General
1452        Seq-id FROM NCBI-Seqloc;
1453
1454SP-block ::= SEQUENCE {         -- SWISSPROT specific descriptions
1455    class ENUMERATED {
1456        not-set (0) ,
1457        standard (1) ,      -- conforms to all SWISSPROT checks
1458        prelim (2) ,        -- only seq and biblio checked
1459        other (255) } ,
1460    extra-acc SET OF VisibleString OPTIONAL ,  -- old SWISSPROT ids
1461    imeth BOOLEAN DEFAULT FALSE ,  -- seq known to start with Met
1462    plasnm SET OF VisibleString OPTIONAL,  -- plasmid names carrying gene
1463    seqref SET OF Seq-id OPTIONAL,         -- xref to other sequences
1464    dbref SET OF Dbtag OPTIONAL ,          -- xref to non-sequence dbases
1465    keywords SET OF VisibleString OPTIONAL , -- keywords
1466    created Date OPTIONAL ,         -- creation date
1467    sequpd Date OPTIONAL ,          -- sequence update
1468    annotupd Date OPTIONAL }        -- annotation update
1469
1470END
1471
1472--*********************************************************************
1473--
1474--  PIR specific data
1475--  This block of specifications was developed by Jim Ostell of
1476--      NCBI
1477--
1478--*********************************************************************
1479
1480PIR-General DEFINITIONS ::=
1481BEGIN
1482
1483EXPORTS PIR-block;
1484
1485IMPORTS Seq-id FROM NCBI-Seqloc;
1486
1487PIR-block ::= SEQUENCE {          -- PIR specific descriptions
1488    had-punct BOOLEAN OPTIONAL ,      -- had punctuation in sequence ?
1489    host VisibleString OPTIONAL ,
1490    source VisibleString OPTIONAL ,     -- source line
1491    summary VisibleString OPTIONAL ,
1492    genetic VisibleString OPTIONAL ,
1493    includes VisibleString OPTIONAL ,
1494    placement VisibleString OPTIONAL ,
1495    superfamily VisibleString OPTIONAL ,
1496    keywords SEQUENCE OF VisibleString OPTIONAL ,
1497    cross-reference VisibleString OPTIONAL ,
1498    date VisibleString OPTIONAL ,
1499    seq-raw VisibleString OPTIONAL ,  -- seq with punctuation
1500    seqref SET OF Seq-id OPTIONAL }         -- xref to other sequences
1501
1502END
1503
1504--*********************************************************************
1505--
1506--  GenBank specific data
1507--  This block of specifications was developed by Jim Ostell of
1508--      NCBI
1509--
1510--*********************************************************************
1511
1512GenBank-General DEFINITIONS ::=
1513BEGIN
1514
1515EXPORTS GB-block;
1516
1517IMPORTS Date FROM NCBI-General;
1518
1519GB-block ::= SEQUENCE {          -- GenBank specific descriptions
1520    extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
1521    source VisibleString OPTIONAL ,     -- source line
1522    keywords SEQUENCE OF VisibleString OPTIONAL ,
1523    origin VisibleString OPTIONAL,
1524    date VisibleString OPTIONAL ,       -- OBSOLETE old form Entry Date
1525    entry-date Date OPTIONAL ,          -- replaces date
1526    div VisibleString OPTIONAL ,        -- GenBank division
1527    taxonomy VisibleString OPTIONAL }   -- continuation line of organism
1528
1529END
1530
1531--**********************************************************************
1532-- PRF specific definition
1533--    PRF is a protein sequence database crated and maintained by
1534--    Protein Research Foundation, Minoo-city, Osaka, Japan.
1535--
1536--    Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
1537--            Kyoto Univ., Japan
1538--
1539--**********************************************************************
1540
1541PRF-General DEFINITIONS ::=
1542BEGIN
1543
1544EXPORTS PRF-block;
1545
1546PRF-block ::= SEQUENCE {
1547      extra-src       PRF-ExtraSrc OPTIONAL,
1548      keywords        SEQUENCE OF VisibleString OPTIONAL
1549}
1550
1551PRF-ExtraSrc ::= SEQUENCE {
1552      host    VisibleString OPTIONAL,
1553      part    VisibleString OPTIONAL,
1554      state   VisibleString OPTIONAL,
1555      strain  VisibleString OPTIONAL,
1556      taxon   VisibleString OPTIONAL
1557}
1558
1559END
1560
1561--*********************************************************************
1562--
1563--  PDB specific data
1564--  This block of specifications was developed by Jim Ostell and
1565--      Steve Bryant of NCBI
1566--
1567--*********************************************************************
1568
1569PDB-General DEFINITIONS ::=
1570BEGIN
1571
1572EXPORTS PDB-block;
1573
1574IMPORTS Date FROM NCBI-General;
1575
1576PDB-block ::= SEQUENCE {          -- PDB specific descriptions
1577    deposition Date ,         -- deposition date  month,year
1578    class VisibleString ,
1579    compound SEQUENCE OF VisibleString ,
1580    source SEQUENCE OF VisibleString ,
1581    exp-method VisibleString OPTIONAL ,  -- present if NOT X-ray diffraction
1582    replace PDB-replace OPTIONAL } -- replacement history
1583
1584PDB-replace ::= SEQUENCE {
1585    date Date ,
1586    ids SEQUENCE OF VisibleString }   -- entry ids replace by this one
1587
1588END
1589
1590--$Revision: 6.53 $
1591--**********************************************************************
1592--
1593--  NCBI Sequence Feature elements
1594--  by James Ostell, 1990
1595--  Version 3.0 - June 1994
1596--
1597--**********************************************************************
1598
1599NCBI-Seqfeat DEFINITIONS ::=
1600BEGIN
1601
1602EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport;
1603
1604IMPORTS Gene-ref FROM NCBI-Gene
1605        Prot-ref FROM NCBI-Protein
1606        Org-ref FROM NCBI-Organism
1607        Variation-ref FROM NCBI-Variation
1608        BioSource FROM NCBI-BioSource
1609        RNA-ref FROM NCBI-RNA
1610        Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc
1611        Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
1612        Rsite-ref FROM NCBI-Rsite
1613        Txinit FROM NCBI-TxInit
1614        DOI, PubMedId FROM NCBI-Biblio
1615        Pub-set FROM NCBI-Pub
1616        Object-id, Dbtag, User-object FROM NCBI-General;
1617
1618--*** Feature identifiers ********************************
1619--*
1620
1621Feat-id ::= CHOICE {
1622    gibb INTEGER ,            -- geninfo backbone
1623    giim Giimport-id ,        -- geninfo import
1624    local Object-id ,         -- for local software use
1625    general Dbtag }           -- for use by various databases
1626
1627--*** Seq-feat *******************************************
1628--*  sequence feature generalization
1629
1630Seq-feat ::= SEQUENCE {
1631    id Feat-id OPTIONAL ,
1632    data SeqFeatData ,           -- the specific data
1633    partial BOOLEAN OPTIONAL ,    -- incomplete in some way?
1634    except BOOLEAN OPTIONAL ,     -- something funny about this?
1635    comment VisibleString OPTIONAL ,
1636    product Seq-loc OPTIONAL ,    -- product of process
1637    location Seq-loc ,            -- feature made from
1638    qual SEQUENCE OF Gb-qual OPTIONAL ,  -- qualifiers
1639    title VisibleString OPTIONAL ,   -- for user defined label
1640    ext User-object OPTIONAL ,    -- user defined structure extension
1641    cit Pub-set OPTIONAL ,        -- citations for this feature
1642    exp-ev ENUMERATED {           -- evidence for existence of feature
1643        experimental (1) ,        -- any reasonable experimental check
1644        not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
1645    xref SET OF SeqFeatXref OPTIONAL ,   -- cite other relevant features
1646    dbxref SET OF Dbtag OPTIONAL ,  -- support for xref to other databases
1647    pseudo BOOLEAN OPTIONAL ,     -- annotated on pseudogene?
1648    except-text VisibleString OPTIONAL , -- explain if except=TRUE
1649    ids SET OF Feat-id OPTIONAL ,       -- set of Ids; will replace 'id' field
1650    exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field
1651    support SeqFeatSupport OPTIONAL  -- will replace /experiment, /inference, model-evidence
1652}
1653
1654SeqFeatData ::= CHOICE {
1655    gene Gene-ref ,
1656    org Org-ref ,
1657    cdregion Cdregion ,
1658    prot Prot-ref ,
1659    rna RNA-ref ,
1660    pub Pubdesc ,              -- publication applies to this seq
1661    seq Seq-loc ,              -- to annotate origin from another seq
1662    imp Imp-feat ,
1663    region VisibleString,      -- named region (globin locus)
1664    comment NULL ,             -- just a comment
1665    bond ENUMERATED {
1666        disulfide (1) ,
1667        thiolester (2) ,
1668        xlink (3) ,
1669        thioether (4) ,
1670        other (255) } ,
1671    site ENUMERATED {
1672        active (1) ,
1673        binding (2) ,
1674        cleavage (3) ,
1675        inhibit (4) ,
1676        modified (5),
1677        glycosylation (6) ,
1678        myristoylation (7) ,
1679        mutagenized (8) ,
1680        metal-binding (9) ,
1681        phosphorylation (10) ,
1682        acetylation (11) ,
1683        amidation (12) ,
1684        methylation (13) ,
1685        hydroxylation (14) ,
1686        sulfatation (15) ,
1687        oxidative-deamination (16) ,
1688        pyrrolidone-carboxylic-acid (17) ,
1689        gamma-carboxyglutamic-acid (18) ,
1690        blocked (19) ,
1691        lipid-binding (20) ,
1692        np-binding (21) ,
1693        dna-binding (22) ,
1694        signal-peptide (23) ,
1695        transit-peptide (24) ,
1696        transmembrane-region (25) ,
1697        nitrosylation (26) ,
1698        other (255) } ,
1699    rsite Rsite-ref ,       -- restriction site  (for maps really)
1700    user User-object ,      -- user defined structure
1701    txinit Txinit ,         -- transcription initiation
1702    num Numbering ,         -- a numbering system
1703    psec-str ENUMERATED {   -- protein secondary structure
1704        helix (1) ,         -- any helix
1705        sheet (2) ,         -- beta sheet
1706        turn  (3) } ,       -- beta or gamma turn
1707    non-std-residue VisibleString ,  -- non-standard residue here in seq
1708    het Heterogen ,         -- cofactor, prosthetic grp, etc, bound to seq
1709    biosrc BioSource,
1710    clone Clone-ref,
1711    variation Variation-ref
1712}
1713
1714SeqFeatXref ::= SEQUENCE {       -- both optional because can have one or both
1715    id Feat-id OPTIONAL ,        -- the feature copied
1716    data SeqFeatData OPTIONAL }  -- the specific data
1717
1718SeqFeatSupport ::= SEQUENCE {
1719  experiment SET OF ExperimentSupport OPTIONAL ,
1720  inference SET OF InferenceSupport OPTIONAL ,
1721  model-evidence SET OF ModelEvidenceSupport OPTIONAL
1722}
1723
1724EvidenceCategory ::= INTEGER {
1725  not-set (0) ,
1726  coordinates (1) ,
1727  description (2) ,
1728  existence (3)
1729}
1730
1731ExperimentSupport ::= SEQUENCE {
1732  category EvidenceCategory OPTIONAL ,
1733  explanation VisibleString ,
1734  pmids SET OF PubMedId OPTIONAL ,
1735  dois SET OF DOI OPTIONAL
1736}
1737
1738Program-id ::= SEQUENCE {
1739  name VisibleString ,
1740  version VisibleString OPTIONAL
1741}
1742
1743EvidenceBasis ::= SEQUENCE {
1744  programs SET OF Program-id OPTIONAL ,
1745  accessions SET OF Seq-id OPTIONAL
1746}
1747
1748InferenceSupport ::= SEQUENCE {
1749  category EvidenceCategory OPTIONAL ,
1750  type INTEGER {
1751    not-set (0) ,
1752    similar-to-sequence (1) ,
1753    similar-to-aa (2) ,
1754    similar-to-dna (3) ,
1755    similar-to-rna (4) ,
1756    similar-to-mrna (5) ,
1757    similiar-to-est (6) ,
1758    similar-to-other-rna (7) ,
1759    profile (8) ,
1760    nucleotide-motif (9) ,
1761    protein-motif (10) ,
1762    ab-initio-prediction (11) ,
1763    alignment (12) ,
1764    other (255)
1765  } DEFAULT not-set ,
1766  other-type VisibleString OPTIONAL ,
1767  same-species BOOLEAN DEFAULT FALSE ,
1768  basis EvidenceBasis ,
1769  pmids SET OF PubMedId OPTIONAL ,
1770  dois SET OF DOI OPTIONAL
1771}
1772
1773ModelEvidenceItem ::= SEQUENCE {
1774  id Seq-id ,
1775  exon-count INTEGER OPTIONAL ,
1776  exon-length INTEGER OPTIONAL ,
1777  full-length BOOLEAN DEFAULT FALSE ,
1778  supports-all-exon-combo BOOLEAN DEFAULT FALSE
1779}
1780
1781ModelEvidenceSupport ::= SEQUENCE {
1782  method VisibleString OPTIONAL ,
1783  mrna SET OF ModelEvidenceItem OPTIONAL ,
1784  est SET OF ModelEvidenceItem OPTIONAL ,
1785  protein SET OF ModelEvidenceItem OPTIONAL ,
1786  identification Seq-id OPTIONAL ,
1787  dbxref SET OF Dbtag OPTIONAL ,
1788  exon-count INTEGER OPTIONAL ,
1789  exon-length INTEGER OPTIONAL ,
1790  full-length BOOLEAN DEFAULT FALSE ,
1791  supports-all-exon-combo BOOLEAN DEFAULT FALSE
1792}
1793
1794--*** CdRegion ***********************************************
1795--*
1796--*  Instructions to translate from a nucleic acid to a peptide
1797--*    conflict means it's supposed to translate but doesn't
1798--*
1799
1800
1801Cdregion ::= SEQUENCE {
1802    orf BOOLEAN OPTIONAL ,             -- just an ORF ?
1803    frame ENUMERATED {
1804        not-set (0) ,                  -- not set, code uses one
1805        one (1) ,
1806        two (2) ,
1807        three (3) } DEFAULT not-set ,      -- reading frame
1808    conflict BOOLEAN OPTIONAL ,        -- conflict
1809    gaps INTEGER OPTIONAL ,            -- number of gaps on conflict/except
1810    mismatch INTEGER OPTIONAL ,        -- number of mismatches on above
1811    code Genetic-code OPTIONAL ,       -- genetic code used
1812    code-break SEQUENCE OF Code-break OPTIONAL ,   -- individual exceptions
1813    stops INTEGER OPTIONAL }           -- number of stop codons on above
1814
1815                    -- each code is 64 cells long, in the order where
1816                    -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
1817                    -- NOTE: this order does NOT correspond to a Seq-data
1818                    -- encoding.  It is "natural" to codon usage instead.
1819                    -- the value in each cell is the AA coded for
1820                    -- start= AA coded only if first in peptide
1821                    --   in start array, if codon is not a legitimate start
1822                    --   codon, that cell will have the "gap" symbol for
1823                    --   that alphabet.  Otherwise it will have the AA
1824                    --   encoded when that codon is used at the start.
1825
1826Genetic-code ::= SET OF CHOICE {
1827    name VisibleString ,               -- name of a code
1828    id INTEGER ,                       -- id in dbase
1829    ncbieaa VisibleString ,            -- indexed to IUPAC extended
1830    ncbi8aa OCTET STRING ,             -- indexed to NCBI8aa
1831    ncbistdaa OCTET STRING ,           -- indexed to NCBIstdaa
1832    sncbieaa VisibleString ,            -- start, indexed to IUPAC extended
1833    sncbi8aa OCTET STRING ,             -- start, indexed to NCBI8aa
1834    sncbistdaa OCTET STRING }           -- start, indexed to NCBIstdaa
1835
1836Code-break ::= SEQUENCE {              -- specific codon exceptions
1837    loc Seq-loc ,                      -- location of exception
1838    aa CHOICE {                        -- the amino acid
1839        ncbieaa INTEGER ,              -- ASCII value of NCBIeaa code
1840        ncbi8aa INTEGER ,              -- NCBI8aa code
1841        ncbistdaa INTEGER } }           -- NCBIstdaa code
1842
1843Genetic-code-table ::= SET OF Genetic-code     -- table of genetic codes
1844
1845--*** Import ***********************************************
1846--*
1847--*  Features imported from other databases
1848--*
1849
1850Imp-feat ::= SEQUENCE {
1851    key VisibleString ,
1852    loc VisibleString OPTIONAL ,         -- original location string
1853    descr VisibleString OPTIONAL }       -- text description
1854
1855Gb-qual ::= SEQUENCE {
1856    qual VisibleString ,
1857    val VisibleString }
1858
1859
1860--*** Clone-ref ***********************************************
1861--*
1862--*  Specification of clone features
1863--*
1864
1865Clone-ref ::= SEQUENCE {
1866    name VisibleString,        -- Official clone symbol
1867    library VisibleString OPTIONAL,     -- Library name
1868
1869    concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1870    unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1871    placement-method INTEGER {
1872        end-seq (0),           -- Clone placed by end sequence
1873        insert-alignment (1),  -- Clone placed by insert alignment
1874        sts (2),               -- Clone placed by STS
1875        fish (3),
1876        fingerprint (4),
1877        end-seq-insert-alignment (5), -- combined end-seq and insert align
1878        external (253),           -- Placement provided externally
1879        curated (254),            -- Human placed or approved
1880        other (255)
1881    } OPTIONAL,
1882    clone-seq Clone-seq-set OPTIONAL
1883}
1884
1885Clone-seq-set ::= SET OF Clone-seq
1886
1887
1888Clone-seq ::= SEQUENCE {
1889    type INTEGER {
1890        insert (0),
1891        end (1),
1892        other (255)
1893    },
1894    confidence INTEGER {
1895        multiple (0),        -- Multiple hits
1896        na (1),              -- Unspecified
1897        nohit-rep (2),       -- No hits, end flagged repetitive
1898        nohitnorep (3),      -- No hits, end not flagged repetitive
1899        other-chrm (4),      -- Hit on different chromosome
1900        unique (5),
1901        virtual (6),         -- Virtual (hasn't been sequenced)
1902        multiple-rep (7),    -- Multiple hits, end flagged repetitive
1903        multiplenorep (8),   -- Multiple hits, end not flagged repetitive
1904        no-hit (9),          -- No hits
1905        other (255)
1906    } OPTIONAL,
1907    location Seq-loc,        -- location on sequence
1908    seq Seq-loc OPTIONAL,    -- clone sequence location
1909    align-id Dbtag OPTIONAL, -- internal alignment identifier
1910    support INTEGER {
1911        prototype (0),       -- sequence used to place clone
1912        supporting (1),      -- sequence supports placement
1913        supports-other(2),   -- supports a different placement
1914        non-supporting (3)   -- does not support any placement
1915    } OPTIONAL
1916}
1917
1918END
1919
1920
1921--*** Variation-ref ***********************************************
1922--*
1923--*  Specification of variation features
1924--*
1925
1926NCBI-Variation DEFINITIONS ::=
1927BEGIN
1928
1929EXPORTS Variation-ref, Variation-inst, VariantProperties,
1930        Population-data, Phenotype;
1931
1932IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General
1933        Seq-literal FROM NCBI-Sequence
1934        SubSource FROM NCBI-BioSource
1935        Seq-loc FROM NCBI-Seqloc
1936        Pub FROM NCBI-Pub;
1937
1938
1939-- --------------------------------------------------------------------------
1940-- Historically, the dbSNP definitions document data structures used in the
1941-- processing and annotation of variations by the dbSNP group.  The intention
1942-- is to provide information to clients that reflect internal information
1943-- produced during the mapping of SNPs
1944-- --------------------------------------------------------------------------
1945
1946VariantProperties ::= SEQUENCE {
1947    version INTEGER,
1948
1949    -- NOTE:
1950    -- The format for most of these values is as an integer
1951    -- Unless otherwise noted, these integers represent a bitwise OR (= simple
1952    -- sum) of the possible values, and as such, these values represent the
1953    -- specific bit flags that may be set for each of the possible attributes
1954    -- here.
1955
1956    resource-link INTEGER {
1957        preserved        (1), -- Clinical, Pubmed, Cited, (0x01)
1958        provisional      (2), -- Provisional Third Party Annotations (0x02)
1959        has3D            (4), -- Has 3D strcture SNP3D table (0x04)
1960        submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08)
1961        clinical        (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10)
1962        genotypeKit     (32)  -- Marker exists on high density genotyping kit
1963                              -- (0x20)
1964    } OPTIONAL,
1965
1966    gene-location INTEGER {
1967        in-gene         (1), -- Sequence intervals covered by a gene ID but not
1968                             -- having an aligned transcript (0x01)
1969        near-gene-5     (2), -- Within 2kb of the 5' end of a gene feature
1970        near-gene-3     (4), -- Within 0.5kb of the 3' end of a gene feature
1971        intron          (8), -- In Intron (0x08)
1972        donor          (16), -- In donor splice-site (0x10)
1973        acceptor       (32), -- In acceptor splice-site (0x20)
1974        utr-5          (64), -- In 5' UTR (0x40)
1975        utr-3         (128), -- In 3' UTR (0x80)
1976        in-start-codon(256), -- the variant is observed in a start codon
1977                             -- (0x100)
1978        in-stop-codon (512), -- the variant is observed in a stop codon
1979                             -- (0x200)
1980        intergenic   (1024), -- variant located between genes (0x400)
1981        conserved-noncoding(2048) -- variant is located in a conserved
1982                                  -- non-coding region (0x800)
1983    } OPTIONAL,
1984
1985    effect INTEGER {
1986        no-change      (0), -- known to cause no functional changes
1987                            -- since 0 does not combine with any other bit
1988                            -- value, 'no-change' specifically implies that
1989                            -- there are no consequences
1990        synonymous     (1), -- one allele in the set does not change the encoded
1991                            -- amino acid (0x1)
1992        nonsense       (2), -- one allele in the set changes to STOP codon
1993                            -- (TER).  (0x2)
1994        missense       (4), -- one allele in the set changes protein peptide
1995                            -- (0x4)
1996        frameshift     (8), -- one allele in the set changes all downstream
1997                            -- amino acids (0x8)
1998
1999        up-regulator  (16), -- the variant causes increased transcription
2000                            -- (0x10)
2001        down-regulator(32), -- the variant causes decreased transcription
2002                            -- (0x20)
2003        methylation   (64),
2004        stop-gain     (128), -- reference codon is not stop codon, but the snp
2005                             -- variant allele changes the codon to a
2006                             -- terminating codon.
2007        stop-loss     (256)  -- reverse of STOP-GAIN: reference codon is a
2008                             -- stop codon, but a snp variant allele changes
2009                             -- the codon to a non-terminating codon.
2010    } OPTIONAL,
2011
2012    mapping INTEGER {
2013        has-other-snp         (1), -- Another SNP has the same mapped positions
2014                                   -- on reference assembly (0x01)
2015        has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different
2016                                   -- chromosomes on different assemblies (0x02)
2017        is-assembly-specific  (4)  -- Only maps to 1 assembly (0x04)
2018    } OPTIONAL,
2019
2020    -- map-weight captures specificity of placement
2021    -- NOTE: This is *NOT* a bitfield
2022    map-weight INTEGER {
2023        is-uniquely-placed(1),
2024        placed-twice-on-same-chrom(2),
2025        placed-twice-on-diff-chrom(3),
2026        many-placements(10)
2027    } OPTIONAL,
2028
2029    frequency-based-validation INTEGER {
2030        is-mutation       (1), -- low frequency variation that is cited in
2031                               -- journal or other reputable sources (0x01)
2032        above-5pct-all    (2), -- >5% minor allele freq in each and all
2033                               -- populations (0x02)
2034        above-5pct-1plus  (4), -- >5% minor allele freq in 1+ populations (0x04)
2035        validated         (8), -- Bit is set if the variant has a minor allele
2036                               -- observed in two or more separate chromosomes
2037        above-1pct-all   (16), -- >1% minor allele freq in each and all
2038                               -- populations (0x10)
2039        above-1pct-1plus (32)  -- >1% minor allele freq in 1+ populations (0x20)
2040    } OPTIONAL,
2041
2042    genotype INTEGER {
2043        in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01)
2044        has-genotypes    (2)  -- SNP has individual genotype (0x02)
2045    } OPTIONAL,
2046
2047    -- project IDs are IDs from BioProjects
2048    -- in order to report information about project relationships, we
2049    -- require projects to be registered
2050    -- This field in many ways duplicates dbxrefs; however, the
2051    -- intention of this field is to more adequately reflect
2052    -- ownership and data source
2053    --
2054    -- 11/9/2010: DO NOT USE
2055    -- This field was changed in the spec in a breaking way; using it will
2056    -- break clients.  We are officially suppressing / abandoning this field.
2057    -- Clients who need to use this should instead place the data in
2058    -- Seq-feat.dbxref, using the db name 'BioProject'
2059    project-data SET OF INTEGER OPTIONAL,
2060
2061    quality-check INTEGER {
2062        contig-allele-missing   (1), -- Reference sequence allele at the mapped
2063                                     -- position is not present in the SNP
2064                                     -- allele list, adjusted for orientation
2065                                     -- (0x01)
2066        withdrawn-by-submitter  (2), -- One member SS is withdrawn by submitter
2067                                     -- (0x02)
2068        non-overlapping-alleles (4), -- RS set has 2+ alleles from different
2069                                     -- submissions and these sets share no
2070                                     -- alleles in common (0x04)
2071        strain-specific         (8), -- Straing specific fixed difference (0x08)
2072        genotype-conflict      (16)  -- Has Genotype Conflict (0x10)
2073    } OPTIONAL,
2074
2075    confidence INTEGER {
2076        unknown         (0),
2077        likely-artifact (1),
2078        other           (255)
2079    } OPTIONAL,
2080
2081    -- has this variant been validated?
2082    -- While a boolean flag offers no subtle distinctions of validation
2083    -- methods, occasionally it is only known as a single boolean value
2084    -- NOTE: this flag is redundant and should be omitted if more comprehensive
2085    -- validation information is present
2086    other-validation BOOLEAN OPTIONAL,
2087
2088    -- origin of this allele, if known
2089    -- note that these are powers-of-two, and represent bits; thus, we can
2090    -- represent more than one state simultaneously through a bitwise OR
2091    allele-origin INTEGER {
2092        unknown         (0),
2093        germline        (1),
2094        somatic         (2),
2095        inherited       (4),
2096        paternal        (8),
2097        maternal        (16),
2098        de-novo         (32),
2099        biparental      (64),
2100        uniparental     (128),
2101        not-tested      (256),
2102        tested-inconclusive (512),
2103        not-reported   (1024),
2104
2105        -- stopper - 2^31
2106        other           (1073741824)
2107    } OPTIONAL,
2108
2109    -- observed allele state, if known
2110    -- NOTE: THIS IS NOT A BITFIELD!
2111    allele-state INTEGER {
2112        unknown         (0),
2113        homozygous      (1),
2114        heterozygous    (2),
2115        hemizygous      (3),
2116        nullizygous     (4),
2117        other           (255)
2118    } OPTIONAL,
2119
2120    -- NOTE:
2121    -- 'allele-frequency' here refers to the minor allele frequency of the
2122    -- default population
2123    allele-frequency REAL OPTIONAL,
2124
2125    -- is this variant the ancestral allele?
2126    is-ancestral-allele BOOLEAN OPTIONAL
2127}
2128
2129Phenotype ::= SEQUENCE {
2130    source VisibleString OPTIONAL,
2131    term VisibleString OPTIONAL,
2132    xref SET OF Dbtag OPTIONAL,
2133
2134    -- does this variant have known clinical significance?
2135    clinical-significance INTEGER {
2136        unknown                 (0),
2137        untested                (1),
2138        non-pathogenic          (2),
2139        probable-non-pathogenic (3),
2140        probable-pathogenic     (4),
2141        pathogenic              (5),
2142        drug-response           (6),
2143        histocompatibility      (7),
2144        other                   (255)
2145    } OPTIONAL
2146}
2147
2148Population-data ::= SEQUENCE {
2149    -- assayed population (e.g. HAPMAP-CEU)
2150    population VisibleString,
2151    genotype-frequency REAL OPTIONAL,
2152    chromosomes-tested INTEGER OPTIONAL,
2153    sample-ids SET OF Object-id OPTIONAL,
2154    allele-frequency REAL OPTIONAL,
2155
2156    -- This field is an explicit bit-field
2157    -- Valid values should be a bitwise combination (= simple sum)
2158    -- of any of the values below
2159    flags INTEGER {
2160        is-default-population   (1),
2161        is-minor-allele         (2),
2162        is-rare-allele          (4)
2163    } OPTIONAL
2164}
2165
2166Ext-loc ::= SEQUENCE {
2167    id Object-id,
2168    location Seq-loc
2169}
2170
2171
2172Variation-ref ::= SEQUENCE {
2173    -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
2174    -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
2175    --
2176    -- we relate three kinds of IDs here:
2177    --  - our current object's id
2178    --  - the id of this object's parent, if it exists
2179    --  - the sample ID that this item originates from
2180    id        Dbtag OPTIONAL,
2181    parent-id Dbtag OPTIONAL,
2182    sample-id Object-id OPTIONAL,
2183    other-ids SET OF Dbtag OPTIONAL,
2184
2185    -- names and synonyms
2186    -- some variants have well-known canonical names and possible accepted
2187    -- synonyms
2188    name VisibleString OPTIONAL,
2189    synonyms SET OF VisibleString OPTIONAL,
2190
2191    -- tag for comment and descriptions
2192    description VisibleString OPTIONAL,
2193
2194    -- phenotype
2195    phenotype SET OF Phenotype OPTIONAL,
2196
2197    -- sequencing / acuisition method
2198    method SET OF INTEGER {
2199        unknown             (0),
2200        bac-acgh            (1),
2201        computational       (2),
2202        curated             (3),
2203        digital-array       (4),
2204        expression-array    (5),
2205        fish                (6),
2206        flanking-sequence   (7),
2207        maph                (8),
2208        mcd-analysis        (9),
2209        mlpa                (10),
2210        oea-assembly        (11),
2211        oligo-acgh          (12),
2212        paired-end          (13),
2213        pcr                 (14),
2214        qpcr                (15),
2215        read-depth          (16),
2216        roma                (17),
2217        rt-pcr              (18),
2218        sage                (19),
2219        sequence-alignment  (20),
2220        sequencing          (21),
2221        snp-array           (22),
2222        snp-genoytyping     (23),
2223        southern            (24),
2224        western             (25),
2225        optical-mapping     (26),
2226
2227        other               (255)
2228    } OPTIONAL,
2229
2230    -- Note about SNP representation and pretinent fields: allele-frequency,
2231    -- population, quality-codes:
2232    -- The case of multiple alleles for a SNP would be described by
2233    -- parent-feature of type Variation-set.diff-alleles, where the child
2234    -- features of type Variation-inst, all at the same location, would
2235    -- describe individual alleles.
2236
2237    -- population data
2238    -- DEPRECATED - do not use
2239    population-data SET OF Population-data OPTIONAL,
2240
2241    -- variant properties bit fields
2242    variant-prop VariantProperties OPTIONAL,
2243
2244    -- has this variant been validated?
2245    -- DEPRECATED: new field = VariantProperties.other-validation
2246    validated BOOLEAN OPTIONAL,
2247
2248    -- link-outs to GeneTests database
2249    -- DEPRECATED - do not use
2250    clinical-test SET OF Dbtag OPTIONAL,
2251
2252    -- origin of this allele, if known
2253    -- note that these are powers-of-two, and represent bits; thus, we can
2254    -- represent more than one state simultaneously through a bitwise OR
2255    -- DEPRECATED: new field = VariantProperties.allele-origin
2256    allele-origin INTEGER {
2257        unknown         (0),
2258        germline        (1),
2259        somatic         (2),
2260        inherited       (4),
2261        paternal        (8),
2262        maternal        (16),
2263        de-novo         (32),
2264        biparental      (64),
2265        uniparental     (128),
2266        not-tested      (256),
2267        tested-inconclusive (512),
2268
2269        -- stopper - 2^31
2270        other           (1073741824)
2271    } OPTIONAL,
2272
2273    -- observed allele state, if known
2274    -- DEPRECATED: new field = VariantProperties.allele-state
2275    allele-state INTEGER {
2276        unknown         (0),
2277        homozygous      (1),
2278        heterozygous    (2),
2279        hemizygous      (3),
2280        nullizygous     (4),
2281        other           (255)
2282    } OPTIONAL,
2283
2284    -- NOTE:
2285    -- 'allele-frequency' here refers to the minor allele frequency of the
2286    -- default population
2287    -- DEPRECATED: new field = VariantProperties.allele-frequency
2288    allele-frequency REAL OPTIONAL,
2289
2290    -- is this variant the ancestral allele?
2291    -- DEPRECATED: new field = VariantProperties.is-ancestral-allele
2292    is-ancestral-allele BOOLEAN OPTIONAL,
2293
2294    -- publication support.
2295    -- Note: made this pub instead of pub-equiv, since
2296    -- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like
2297    -- Pub is more often used as top-level container
2298    -- DEPRECATED - do not use; use Seq-feat.dbxref instead
2299    pub Pub OPTIONAL,
2300
2301    data CHOICE {
2302        unknown NULL,
2303        note    VisibleString, --free-form
2304        uniparental-disomy NULL,
2305
2306        -- actual sequence-edit at feat.location
2307        instance        Variation-inst,
2308
2309        -- Set of related Variations.
2310        -- Location of the set equals to the union of member locations
2311        set SEQUENCE {
2312            type INTEGER {
2313                unknown     (0),
2314                compound    (1), -- complex change at the same location on the
2315                                 -- same molecule
2316                products    (2), -- different products arising from the same
2317                                 -- variation in a precursor, e.g. r.[13g>a,
2318                                 -- 13_88del]
2319                haplotype   (3), -- changes on the same allele, e.g
2320                                 -- r.[13g>a;15u>c]
2321                genotype    (4), -- changes on different alleles in the same
2322                                 -- genotype, e.g. g.[476C>T]+[476C>T]
2323                mosaic      (5), -- different genotypes in the same individual
2324                individual  (6), -- same organism; allele relationship unknown,
2325                                 -- e.g. g.[476C>T(+)183G>C]
2326                population  (7), -- population
2327                alleles     (8), -- set represents a set of observed alleles
2328                package     (9), -- set represents a package of observations at
2329                                 -- a given location, generally containing
2330                                 -- asserted + reference
2331                other       (255)
2332            },
2333            variations SET OF Variation-ref,
2334            name  VisibleString OPTIONAL
2335        },
2336
2337        -- variant is a complex and undescribed change at the location
2338        -- This type of variant is known to occur in dbVar submissions
2339        complex NULL
2340    },
2341
2342    consequence SET OF CHOICE {
2343        unknown     NULL,
2344        splicing    NULL, --some effect on splicing
2345        note        VisibleString,  --freeform
2346
2347        -- Describe resulting variation in the product, e.g. missense,
2348        -- nonsense, silent, neutral, etc in a protein, that arises from
2349        -- THIS variation.
2350        variation   Variation-ref,
2351
2352        -- see http://www.hgvs.org/mutnomen/recs-prot.html
2353        frameshift SEQUENCE {
2354            phase INTEGER OPTIONAL,
2355            x-length INTEGER OPTIONAL
2356        },
2357
2358        loss-of-heterozygosity SEQUENCE {
2359            -- In germline comparison, it will be reference genome assembly
2360            -- (default) or reference/normal population. In somatic mutation,
2361            -- it will be a name of the normal tissue.
2362            reference VisibleString OPTIONAL,
2363
2364            -- Name of the testing subject type or the testing tissue.
2365            test VisibleString OPTIONAL
2366        }
2367    } OPTIONAL,
2368
2369    -- Observed location, if different from the parent set or feature.location.
2370    -- DEPRECATED - do not use
2371    location        Seq-loc OPTIONAL,
2372
2373    -- reference other locs, e.g. mapped source
2374    -- DEPRECATED - do not use
2375    ext-locs SET OF Ext-loc OPTIONAL,
2376
2377    -- DEPRECATED - do not use; use Seq-feat.exts instead
2378    ext             User-object OPTIONAL,
2379
2380    somatic-origin SET OF SEQUENCE {
2381        -- description of the somatic origin itself
2382        source SubSource OPTIONAL,
2383        -- condition related to this origin's type
2384        condition SEQUENCE {
2385            description VisibleString OPTIONAL,
2386            -- reference to BioTerm / other descriptive database
2387            object-id SET OF Dbtag OPTIONAL
2388        } OPTIONAL
2389    } OPTIONAL
2390
2391}
2392
2393
2394Delta-item ::= SEQUENCE {
2395    seq CHOICE {
2396        literal Seq-literal,
2397        loc Seq-loc,
2398        this NULL --same location as variation-ref itself
2399    } OPTIONAL,
2400
2401    -- Multiplier allows representing a tandem, e.g.  ATATAT as AT*3
2402    -- This allows describing CNV/SSR where delta=self  with a
2403    -- multiplier which specifies the count of the repeat unit.
2404
2405    multiplier          INTEGER OPTIONAL, --assumed 1 if not specified.
2406    multiplier-fuzz     Int-fuzz OPTIONAL,
2407
2408    action INTEGER {
2409
2410        -- replace len(seq) positions starting with location.start with seq
2411        morph      (0),
2412
2413        -- go downstream by distance specified by multiplier (upstream if < 0),
2414        -- in genomic context.
2415        offset     (1),
2416
2417        -- excise sequence at location
2418        -- if multiplier is specified, delete len(location)*multiplier
2419        -- positions downstream
2420        del-at     (2),
2421
2422        -- insert seq before the location.start
2423        ins-before (3)
2424
2425    } DEFAULT morph
2426}
2427
2428
2429-- Variation instance
2430Variation-inst ::= SEQUENCE {
2431    type INTEGER {
2432        unknown         (0),    -- delta=[]
2433        identity        (1),    -- delta=[]
2434        inv             (2),    -- delta=[del, ins.seq=
2435                                -- RevComp(variation-location)]
2436        snv             (3),    -- delta=[morph of length 1]
2437                                -- NOTE: this is snV not snP; the latter
2438                                -- requires frequency-based validation to be
2439                                -- established in VariantProperties
2440                                -- the strict definition of SNP is an SNV with
2441                                -- an established population frequency of at
2442                                -- least 1% in at least 1 popuplation
2443        mnp             (4),    -- delta=[morph of length >1]
2444        delins          (5),    -- delta=[del, ins]
2445        del             (6),    -- delta=[del]
2446        ins             (7),    -- delta=[ins]
2447        microsatellite  (8),    -- delta=[del, ins.seq= repeat-unit with fuzzy
2448                                -- multiplier]
2449                                -- variation-location is the microsat expansion
2450                                -- on the sequence
2451        transposon      (9),    -- delta=[del, ins.seq= known donor or 'this']
2452                                -- variation-location is equiv of transposon
2453                                -- locs.
2454        cnv             (10),   -- delta=[del, ins= 'this' with fuzzy
2455                                -- multiplier]
2456        direct-copy     (11),   -- delta=[ins.seq= upstream location on the
2457                                -- same strand]
2458        rev-direct-copy (12),   -- delta=[ins.seq= downstream location on the
2459                                -- same strand]
2460        inverted-copy   (13),   -- delta=[ins.seq= upstream location on the
2461                                -- opposite strand]
2462        everted-copy    (14),   -- delta=[ins.seq= downstream location on the
2463                                -- opposite strand]
2464        translocation   (15),   -- delta=like delins
2465        prot-missense   (16),   -- delta=[morph of length 1]
2466        prot-nonsense   (17),   -- delta=[del]; variation-location is the tail
2467                                -- of the protein being truncated
2468        prot-neutral    (18),   -- delta=[morph of length 1]
2469        prot-silent     (19),   -- delta=[morph of length 1, same AA as at
2470                                -- variation-location]
2471        prot-other      (20),   -- delta=any
2472
2473        other           (255)   -- delta=any
2474    },
2475
2476    -- Sequence that replaces the location, in biological order.
2477    delta SEQUENCE OF Delta-item,
2478
2479    -- 'observation' is used to label items in a Variation-ref package
2480    -- This field is explicitly a bit-field, so the bitwise OR (= sum) of any
2481    -- of the values may be observed.
2482    observation INTEGER {
2483        asserted        (1),   -- inst represents the asserted base at a
2484                               -- position
2485        reference       (2),   -- inst represents the reference base at the
2486                               -- position
2487        variant         (4)    -- inst represent the observed variant at a
2488                               -- given position
2489    } OPTIONAL
2490}
2491
2492END
2493
2494
2495--**********************************************************************
2496--
2497--  NCBI Restriction Sites
2498--  by James Ostell, 1990
2499--  version 0.8
2500--
2501--**********************************************************************
2502
2503NCBI-Rsite DEFINITIONS ::=
2504BEGIN
2505
2506EXPORTS Rsite-ref;
2507
2508IMPORTS Dbtag FROM NCBI-General;
2509
2510Rsite-ref ::= CHOICE {
2511    str VisibleString ,     -- may be unparsable
2512    db  Dbtag }             -- pointer to a restriction site database
2513
2514END
2515
2516--**********************************************************************
2517--
2518--  NCBI RNAs
2519--  by James Ostell, 1990
2520--  version 0.8
2521--
2522--**********************************************************************
2523
2524NCBI-RNA DEFINITIONS ::=
2525BEGIN
2526
2527EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;
2528
2529IMPORTS Seq-loc FROM NCBI-Seqloc;
2530
2531--*** rnas ***********************************************
2532--*
2533--*  various rnas
2534--*
2535                         -- minimal RNA sequence
2536RNA-ref ::= SEQUENCE {
2537    type ENUMERATED {            -- type of RNA feature
2538        unknown (0) ,
2539        premsg (1) ,
2540        mRNA (2) ,
2541        tRNA (3) ,
2542        rRNA (4) ,
2543        snRNA (5) ,              -- will become ncRNA, with RNA-gen.class = snRNA
2544        scRNA (6) ,              -- will become ncRNA, with RNA-gen.class = scRNA
2545        snoRNA (7) ,             -- will become ncRNA, with RNA-gen.class = snoRNA
2546        ncRNA (8) ,              -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
2547        tmRNA (9) ,
2548        miscRNA (10) ,
2549        other (255) } ,
2550    pseudo BOOLEAN OPTIONAL ,
2551    ext CHOICE {
2552        name VisibleString ,        -- for naming "other" type
2553        tRNA Trna-ext ,             -- for tRNAs
2554        gen RNA-gen } OPTIONAL      -- generic fields for ncRNA, tmRNA, miscRNA
2555    }
2556
2557Trna-ext ::= SEQUENCE {                 -- tRNA feature extensions
2558    aa CHOICE {                         -- aa this carries
2559        iupacaa INTEGER ,
2560        ncbieaa INTEGER ,
2561        ncbi8aa INTEGER ,
2562        ncbistdaa INTEGER } OPTIONAL ,
2563    codon SET OF INTEGER OPTIONAL ,     -- codon(s) as in Genetic-code
2564    anticodon Seq-loc OPTIONAL }        -- location of anticodon
2565
2566RNA-gen ::= SEQUENCE {
2567    class VisibleString OPTIONAL ,      -- for ncRNAs, the class of non-coding RNA:
2568                                        -- examples: antisense_RNA, guide_RNA, snRNA
2569    product VisibleString OPTIONAL ,
2570    quals RNA-qual-set OPTIONAL         -- e.g., tag_peptide qualifier for tmRNAs
2571}
2572
2573RNA-qual ::= SEQUENCE {                 -- Additional data values for RNA-gen,
2574    qual VisibleString ,                -- in a tag (qual), value (val) format
2575    val VisibleString }
2576
2577RNA-qual-set ::= SEQUENCE OF RNA-qual
2578
2579END
2580
2581--**********************************************************************
2582--
2583--  NCBI Genes
2584--  by James Ostell, 1990
2585--  version 0.8
2586--
2587--**********************************************************************
2588
2589NCBI-Gene DEFINITIONS ::=
2590BEGIN
2591
2592EXPORTS Gene-ref, Gene-nomenclature;
2593
2594IMPORTS Dbtag FROM NCBI-General;
2595
2596--*** Gene ***********************************************
2597--*
2598--*  reference to a gene
2599--*
2600
2601Gene-ref ::= SEQUENCE {
2602    locus VisibleString OPTIONAL ,        -- Official gene symbol
2603    allele VisibleString OPTIONAL ,       -- Official allele designation
2604    desc VisibleString OPTIONAL ,         -- descriptive name
2605    maploc VisibleString OPTIONAL ,       -- descriptive map location
2606    pseudo BOOLEAN DEFAULT FALSE ,        -- pseudogene
2607    db SET OF Dbtag OPTIONAL ,            -- ids in other dbases
2608    syn SET OF VisibleString OPTIONAL ,   -- synonyms for locus
2609    locus-tag VisibleString OPTIONAL ,    -- systematic gene name (e.g., MI0001, ORF0069)
2610    formal-name Gene-nomenclature OPTIONAL
2611}
2612
2613Gene-nomenclature ::= SEQUENCE {
2614    status ENUMERATED {
2615        unknown (0) ,
2616        official (1) ,
2617        interim (2)
2618    } ,
2619    symbol VisibleString OPTIONAL ,
2620    name VisibleString OPTIONAL ,
2621    source Dbtag OPTIONAL
2622}
2623
2624END
2625
2626
2627--**********************************************************************
2628--
2629--  NCBI Organism
2630--  by James Ostell, 1994
2631--  version 3.0
2632--
2633--**********************************************************************
2634
2635NCBI-Organism DEFINITIONS ::=
2636BEGIN
2637
2638EXPORTS Org-ref;
2639
2640IMPORTS Dbtag FROM NCBI-General;
2641
2642--*** Org-ref ***********************************************
2643--*
2644--*  Reference to an organism
2645--*     defines only the organism.. lower levels of detail for biological
2646--*     molecules are provided by the Source object
2647--*
2648
2649Org-ref ::= SEQUENCE {
2650    taxname VisibleString OPTIONAL ,   -- preferred formal name
2651    common VisibleString OPTIONAL ,    -- common name
2652    mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
2653    db SET OF Dbtag OPTIONAL ,         -- ids in taxonomic or culture dbases
2654    syn SET OF VisibleString OPTIONAL ,  -- synonyms for taxname or common
2655    orgname OrgName OPTIONAL }
2656
2657
2658OrgName ::= SEQUENCE {
2659    name CHOICE {
2660        binomial BinomialOrgName ,         -- genus/species type name
2661        virus VisibleString ,              -- virus names are different
2662        hybrid MultiOrgName ,              -- hybrid between organisms
2663        namedhybrid BinomialOrgName ,      -- some hybrids have genus x species name
2664        partial PartialOrgName } OPTIONAL , -- when genus not known
2665    attrib VisibleString OPTIONAL ,        -- attribution of name
2666    mod SEQUENCE OF OrgMod OPTIONAL ,
2667    lineage VisibleString OPTIONAL ,       -- lineage with semicolon separators
2668    gcode INTEGER OPTIONAL ,               -- genetic code (see CdRegion)
2669    mgcode INTEGER OPTIONAL ,              -- mitochondrial genetic code
2670    div VisibleString OPTIONAL ,           -- GenBank division code
2671    pgcode INTEGER OPTIONAL }              -- plastid genetic code
2672
2673
2674OrgMod ::= SEQUENCE {
2675    subtype INTEGER {
2676        strain (2) ,
2677        substrain (3) ,
2678        type (4) ,
2679        subtype (5) ,
2680        variety (6) ,
2681        serotype (7) ,
2682        serogroup (8) ,
2683        serovar (9) ,
2684        cultivar (10) ,
2685        pathovar (11) ,
2686        chemovar (12) ,
2687        biovar (13) ,
2688        biotype (14) ,
2689        group (15) ,
2690        subgroup (16) ,
2691        isolate (17) ,
2692        common (18) ,
2693        acronym (19) ,
2694        dosage (20) ,          -- chromosome dosage of hybrid
2695        nat-host (21) ,        -- natural host of this specimen
2696        sub-species (22) ,
2697        specimen-voucher (23) ,
2698        authority (24) ,
2699        forma (25) ,
2700        forma-specialis (26) ,
2701        ecotype (27) ,
2702        synonym (28) ,
2703        anamorph (29) ,
2704        teleomorph (30) ,
2705        breed (31) ,
2706        gb-acronym (32) ,       -- used by taxonomy database
2707        gb-anamorph (33) ,      -- used by taxonomy database
2708        gb-synonym (34) ,       -- used by taxonomy database
2709        culture-collection (35) ,
2710        bio-material (36) ,
2711        metagenome-source (37) ,
2712        type-material (38) ,
2713        old-lineage (253) ,
2714        old-name (254) ,
2715        other (255) } ,         -- ASN5: old-name (254) will be added to next spec
2716    subname VisibleString ,
2717    attrib VisibleString OPTIONAL }  -- attribution/source of name
2718
2719BinomialOrgName ::= SEQUENCE {
2720    genus VisibleString ,               -- required
2721    species VisibleString OPTIONAL ,    -- species required if subspecies used
2722    subspecies VisibleString OPTIONAL }
2723
2724MultiOrgName ::= SEQUENCE OF OrgName   -- the first will be used to assign division
2725
2726PartialOrgName ::= SEQUENCE OF TaxElement  -- when we don't know the genus
2727
2728TaxElement ::= SEQUENCE {
2729    fixed-level INTEGER {
2730       other (0) ,                     -- level must be set in string
2731       family (1) ,
2732       order (2) ,
2733       class (3) } ,
2734    level VisibleString OPTIONAL ,
2735    name VisibleString }
2736
2737END
2738
2739
2740--**********************************************************************
2741--
2742--  NCBI BioSource
2743--  by James Ostell, 1994
2744--  version 3.0
2745--
2746--**********************************************************************
2747
2748NCBI-BioSource DEFINITIONS ::=
2749BEGIN
2750
2751EXPORTS BioSource, SubSource;
2752
2753IMPORTS Org-ref FROM NCBI-Organism;
2754
2755--********************************************************************
2756--
2757-- BioSource gives the source of the biological material
2758--   for sequences
2759--
2760--********************************************************************
2761
2762BioSource ::= SEQUENCE {
2763    genome INTEGER {         -- biological context
2764        unknown (0) ,
2765        genomic (1) ,
2766        chloroplast (2) ,
2767        chromoplast (3) ,
2768        kinetoplast (4) ,
2769        mitochondrion (5) ,
2770        plastid (6) ,
2771        macronuclear (7) ,
2772        extrachrom (8) ,
2773        plasmid (9) ,
2774        transposon (10) ,
2775        insertion-seq (11) ,
2776        cyanelle (12) ,
2777        proviral (13) ,
2778        virion (14) ,
2779        nucleomorph (15) ,
2780        apicoplast (16) ,
2781        leucoplast (17) ,
2782        proplastid (18) ,
2783        endogenous-virus (19) ,
2784        hydrogenosome (20) ,
2785        chromosome (21) ,
2786        chromatophore (22) ,
2787        plasmid-in-mitochondrion (23) ,
2788        plasmid-in-plastid (24)
2789      } DEFAULT unknown ,
2790    origin INTEGER {
2791      unknown (0) ,
2792      natural (1) ,                    -- normal biological entity
2793      natmut (2) ,                     -- naturally occurring mutant
2794      mut (3) ,                        -- artificially mutagenized
2795      artificial (4) ,                 -- artificially engineered
2796      synthetic (5) ,                  -- purely synthetic
2797      other (255)
2798    } DEFAULT unknown ,
2799    org Org-ref ,
2800    subtype SEQUENCE OF SubSource OPTIONAL ,
2801    is-focus NULL OPTIONAL ,           -- to distinguish biological focus
2802    pcr-primers PCRReactionSet OPTIONAL }
2803
2804PCRReactionSet ::= SET OF PCRReaction
2805
2806PCRReaction ::= SEQUENCE {
2807    forward PCRPrimerSet OPTIONAL ,
2808    reverse PCRPrimerSet OPTIONAL }
2809
2810PCRPrimerSet ::= SET OF PCRPrimer
2811
2812PCRPrimer ::= SEQUENCE {
2813    seq PCRPrimerSeq OPTIONAL ,
2814    name PCRPrimerName OPTIONAL }
2815
2816PCRPrimerSeq ::= VisibleString
2817
2818PCRPrimerName ::= VisibleString
2819
2820SubSource ::= SEQUENCE {
2821    subtype INTEGER {
2822        chromosome (1) ,
2823        map (2) ,
2824        clone (3) ,
2825        subclone (4) ,
2826        haplotype (5) ,
2827        genotype (6) ,
2828        sex (7) ,
2829        cell-line (8) ,
2830        cell-type (9) ,
2831        tissue-type (10) ,
2832        clone-lib (11) ,
2833        dev-stage (12) ,
2834        frequency (13) ,
2835        germline (14) ,
2836        rearranged (15) ,
2837        lab-host (16) ,
2838        pop-variant (17) ,
2839        tissue-lib (18) ,
2840        plasmid-name (19) ,
2841        transposon-name (20) ,
2842        insertion-seq-name (21) ,
2843        plastid-name (22) ,
2844        country (23) ,
2845        segment (24) ,
2846        endogenous-virus-name (25) ,
2847        transgenic (26) ,
2848        environmental-sample (27) ,
2849        isolation-source (28) ,
2850        lat-lon (29) ,          -- +/- decimal degrees
2851        collection-date (30) ,  -- DD-MMM-YYYY format
2852        collected-by (31) ,     -- name of person who collected the sample
2853        identified-by (32) ,    -- name of person who identified the sample
2854        fwd-primer-seq (33) ,   -- sequence (possibly more than one; semicolon-separated)
2855        rev-primer-seq (34) ,   -- sequence (possibly more than one; semicolon-separated)
2856        fwd-primer-name (35) ,
2857        rev-primer-name (36) ,
2858        metagenomic (37) ,
2859        mating-type (38) ,
2860        linkage-group (39) ,
2861        haplogroup (40) ,
2862        whole-replicon (41) ,
2863        phenotype (42) ,
2864        altitude (43) ,
2865        other (255) } ,
2866    name VisibleString ,
2867    attrib VisibleString OPTIONAL }    -- attribution/source of this name
2868
2869END
2870
2871--**********************************************************************
2872--
2873--  NCBI Protein
2874--  by James Ostell, 1990
2875--  version 0.8
2876--
2877--**********************************************************************
2878
2879NCBI-Protein DEFINITIONS ::=
2880BEGIN
2881
2882EXPORTS Prot-ref;
2883
2884IMPORTS Dbtag FROM NCBI-General;
2885
2886--*** Prot-ref ***********************************************
2887--*
2888--*  Reference to a protein name
2889--*
2890
2891Prot-ref ::= SEQUENCE {
2892    name SET OF VisibleString OPTIONAL ,      -- protein name
2893    desc VisibleString OPTIONAL ,      -- description (instead of name)
2894    ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
2895    activity SET OF VisibleString OPTIONAL ,  -- activities
2896    db SET OF Dbtag OPTIONAL ,         -- ids in other dbases
2897    processed ENUMERATED {             -- processing status
2898       not-set (0) ,
2899       preprotein (1) ,
2900       mature (2) ,
2901       signal-peptide (3) ,
2902       transit-peptide (4) ,
2903       propeptide (5) } DEFAULT not-set }
2904
2905END
2906--********************************************************************
2907--
2908--  Transcription Initiation Site Feature Data Block
2909--  James Ostell, 1991
2910--  Philip Bucher, David Ghosh
2911--  version 1.1
2912--
2913--
2914--
2915--********************************************************************
2916
2917NCBI-TxInit DEFINITIONS ::=
2918BEGIN
2919
2920EXPORTS Txinit;
2921
2922IMPORTS Gene-ref FROM NCBI-Gene
2923        Prot-ref FROM NCBI-Protein
2924        Org-ref FROM NCBI-Organism;
2925
2926Txinit ::= SEQUENCE {
2927    name VisibleString ,    -- descriptive name of initiation site
2928    syn SEQUENCE OF VisibleString OPTIONAL ,   -- synonyms
2929    gene SEQUENCE OF Gene-ref OPTIONAL ,  -- gene(s) transcribed
2930    protein SEQUENCE OF Prot-ref OPTIONAL ,   -- protein(s) produced
2931    rna SEQUENCE OF VisibleString OPTIONAL ,  -- rna(s) produced
2932    expression VisibleString OPTIONAL ,  -- tissue/time of expression
2933    txsystem ENUMERATED {       -- transcription apparatus used at this site
2934        unknown (0) ,
2935        pol1 (1) ,      -- eukaryotic Pol I
2936        pol2 (2) ,      -- eukaryotic Pol II
2937        pol3 (3) ,      -- eukaryotic Pol III
2938        bacterial (4) ,
2939        viral (5) ,
2940        rna (6) ,       -- RNA replicase
2941        organelle (7) ,
2942        other (255) } ,
2943    txdescr VisibleString OPTIONAL ,   -- modifiers on txsystem
2944    txorg Org-ref OPTIONAL ,  -- organism supplying transcription apparatus
2945    mapping-precise BOOLEAN DEFAULT FALSE ,  -- mapping precise or approx
2946    location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
2947    inittype ENUMERATED {
2948        unknown (0) ,
2949        single (1) ,
2950        multiple (2) ,
2951        region (3) } OPTIONAL ,
2952    evidence SET OF Tx-evidence OPTIONAL }
2953
2954Tx-evidence ::= SEQUENCE {
2955    exp-code ENUMERATED {
2956        unknown (0) ,
2957        rna-seq (1) ,   -- direct RNA sequencing
2958        rna-size (2) ,  -- RNA length measurement
2959        np-map (3) ,    -- nuclease protection mapping with homologous sequence ladder
2960        np-size (4) ,   -- nuclease protected fragment length measurement
2961        pe-seq (5) ,    -- dideoxy RNA sequencing
2962        cDNA-seq (6) ,  -- full-length cDNA sequencing
2963        pe-map (7) ,    -- primer extension mapping with homologous sequence ladder
2964        pe-size (8) ,   -- primer extension product length measurement
2965        pseudo-seq (9) , -- full-length processed pseudogene sequencing
2966        rev-pe-map (10) ,   -- see NOTE (1) below
2967        other (255) } ,
2968    expression-system ENUMERATED {
2969        unknown (0) ,
2970        physiological (1) ,
2971        in-vitro (2) ,
2972        oocyte (3) ,
2973        transfection (4) ,
2974        transgenic (5) ,
2975        other (255) } DEFAULT physiological ,
2976    low-prec-data BOOLEAN DEFAULT FALSE ,
2977    from-homolog BOOLEAN DEFAULT FALSE }     -- experiment actually done on
2978                                             --  close homolog
2979
2980    -- NOTE (1) length measurement of a reverse direction primer-extension
2981    --          product (blocked  by  RNA  5'end)  by  comparison with
2982    --          homologous sequence ladder (J. Mol. Biol. 199, 587)
2983
2984END
2985
2986--$Revision: 1.10 $
2987--  ----------------------------------------------------------------------------
2988--
2989--                            PUBLIC DOMAIN NOTICE
2990--                National Center for Biotechnology Information
2991--
2992--  This software/database is a "United States Government Work" under the terms
2993--  of the United States Copyright Act.  It was written as part of the author's
2994--  official duties as a United States Government employee and thus cannot be
2995--  copyrighted.  This software/database is freely available to the public for
2996--  use.  The National Library of Medicine and the U.S. Government have not
2997--  placed any restriction on its use or reproduction.
2998--
2999--  Although all reasonable efforts have been taken to ensure the accuracy and
3000--  reliability of the software and data, the NLM and the U.S. Government do not
3001--  and cannot warrant the performance or results that may be obtained by using
3002--  this software or data.  The NLM and the U.S. Government disclaim all
3003--  warranties, express or implied, including warranties of performance,
3004--  merchantability or fitness for any particular purpose.
3005--
3006--  Please cite the authors in any work or product based on this material.
3007--
3008--  ----------------------------------------------------------------------------
3009--
3010--  Authors: Mike DiCuccio, Eugene Vasilchenko
3011--
3012--  ASN.1 interface to table readers
3013--
3014--  ----------------------------------------------------------------------------
3015
3016NCBI-SeqTable DEFINITIONS ::=
3017
3018BEGIN
3019
3020EXPORTS
3021    SeqTable-column-info, SeqTable-column, Seq-table;
3022
3023IMPORTS
3024    Seq-id, Seq-loc, Seq-interval   FROM NCBI-Seqloc;
3025
3026
3027SeqTable-column-info ::= SEQUENCE {
3028    -- user friendly column name, can be skipped
3029    title VisibleString OPTIONAL,
3030
3031    -- identification of the column data in the objects described by the table
3032    field-id INTEGER { -- known column data types
3033        -- position types
3034        location        (0), -- location as Seq-loc
3035        location-id     (1), -- location Seq-id
3036        location-gi     (2), -- gi
3037        location-from   (3), -- interval from
3038        location-to     (4), -- interval to
3039        location-strand (5), -- location strand
3040        location-fuzz-from-lim (6),
3041        location-fuzz-to-lim   (7),
3042
3043        product         (10), -- product as Seq-loc
3044        product-id      (11), -- product Seq-id
3045        product-gi      (12), -- product gi
3046        product-from    (13), -- product interval from
3047        product-to      (14), -- product interval to
3048        product-strand  (15), -- product strand
3049        product-fuzz-from-lim (16),
3050        product-fuzz-to-lim   (17),
3051
3052        -- main feature fields
3053        id-local        (20), -- id.local.id
3054        xref-id-local   (21), -- xref.id.local.id
3055        partial         (22),
3056        comment         (23),
3057        title           (24),
3058        ext             (25), -- field-name must be "E.xxx", see below
3059        qual            (26), -- field-name must be "Q.xxx", see below
3060        dbxref          (27), -- field-name must be "D.xxx", see below
3061
3062        -- various data fields
3063        data-imp-key        (30),
3064        data-region         (31),
3065        data-cdregion-frame (32),
3066
3067        -- extra fields, see also special values for str below
3068        ext-type        (40),
3069        qual-qual       (41),
3070        qual-val        (42),
3071        dbxref-db       (43),
3072        dbxref-tag      (44)
3073    } OPTIONAL,
3074
3075    -- any column can be identified by ASN.1 text locator string
3076    -- with omitted object type.
3077    -- examples:
3078    --   "data.gene.locus" for Seq-feat.data.gene.locus
3079    --   "data.imp.key" for Seq-feat.data.imp.key
3080    --   "qual.qual"
3081    --    - Seq-feat.qual is SEQUENCE so several columns are allowed
3082    --      see also "Q.xxx" special value for shorter qual representation
3083    --   "ext.type.str"
3084    --   "ext.data.label.str"
3085    --   "ext.data.data.int"
3086    --      see also "E.xxx" special value for shorter ext representation
3087    -- special values start with capital letter:
3088    --   "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
3089    --    - Seq-feat.ext.data is SEQUENCE so several columns are allowed
3090    --   "Q.xxx" - qual.qual = xxx, qual.val = data
3091    --    - Seq-feat.qual is SEQUENCE so several columns are allowed
3092    --   "D.xxx" - dbxref.id = xxx, dbxref.tag = data
3093    --    - Seq-feat.dbxref is SET so several columns are allowed
3094    field-name  VisibleString OPTIONAL
3095}
3096
3097
3098CommonString-table ::= SEQUENCE {
3099    -- set of possible values
3100    strings     SEQUENCE OF UTF8String,
3101
3102    -- indexes of values in array 'strings' for each data row
3103    indexes     SEQUENCE OF INTEGER
3104}
3105
3106
3107CommonBytes-table ::= SEQUENCE {
3108    -- set of possible values
3109    bytes       SEQUENCE OF OCTET STRING,
3110
3111    -- indexes of values in array 'bytes' for each data row
3112    indexes     SEQUENCE OF INTEGER
3113}
3114
3115
3116Scaled-int-multi-data ::= SEQUENCE {
3117    -- output data[i] = data[i]*mul+add
3118    mul     INTEGER,
3119    add     INTEGER,
3120    data    SeqTable-multi-data,
3121
3122    -- min/max scaled value
3123    -- should be set if scaled values may not fit in 32-bit signed integer
3124    min     INTEGER OPTIONAL,
3125    max     INTEGER OPTIONAL
3126}
3127
3128
3129Scaled-real-multi-data ::= SEQUENCE {
3130    -- output data[i] = data[i]*mul+add
3131    mul     REAL,
3132    add     REAL,
3133    data    SeqTable-multi-data
3134}
3135
3136
3137-- Class for serializing bm::bvector<>
3138-- see include/util/bitset/bm.h
3139-- Since bvector<> serialization doesn't keep size we have to add it explicitly
3140BVector-data ::= SEQUENCE {
3141    size    INTEGER,
3142    data    OCTET STRING
3143}
3144
3145
3146SeqTable-multi-data ::= CHOICE {
3147    -- a set of 4-byte integers, one per row
3148    int         SEQUENCE OF INTEGER,
3149
3150    -- a set of reals, one per row
3151    real        SEQUENCE OF REAL,
3152
3153    -- a set of strings, one per row
3154    string      SEQUENCE OF UTF8String,
3155
3156    -- a set of byte arrays, one per row
3157    bytes       SEQUENCE OF OCTET STRING,
3158
3159    -- a set of string with small set of possible values
3160    common-string   CommonString-table,
3161
3162    -- a set of byte arrays with small set of possible values
3163    common-bytes    CommonBytes-table,
3164
3165    -- a set of bits, one per row
3166    -- Most-significant bit in each octet comes first.
3167    bit         OCTET STRING,
3168
3169    -- a set of locations, one per row
3170    loc         SEQUENCE OF Seq-loc,
3171    id          SEQUENCE OF Seq-id,
3172    interval    SEQUENCE OF Seq-interval,
3173
3174    -- delta-encoded data (int/bit -> int)
3175    int-delta   SeqTable-multi-data,
3176
3177    -- scaled data (int/bit -> int)
3178    int-scaled  Scaled-int-multi-data,
3179
3180    -- scaled data (int/bit -> real)
3181    real-scaled Scaled-real-multi-data,
3182
3183    -- a set of bit, represented as serialized bvector,
3184    -- see include/util/bitset/bm.h
3185    bit-bvector BVector-data,
3186
3187    -- a set of signed 1-byte integers encoded as sequential octets
3188    int1        OCTET STRING,
3189
3190    -- a set of signed 2-byte integers
3191    int2        SEQUENCE OF INTEGER,
3192
3193    -- a set of signed 8-byte integers
3194    int8        SEQUENCE OF INTEGER
3195}
3196
3197
3198SeqTable-single-data ::= CHOICE {
3199    -- integer
3200    int         INTEGER,
3201
3202    -- real
3203    real        REAL,
3204
3205    -- string
3206    string      UTF8String,
3207
3208    -- byte array
3209    bytes       OCTET STRING,
3210
3211    -- bit
3212    bit         BOOLEAN,
3213
3214    -- location
3215    loc         Seq-loc,
3216    id          Seq-id,
3217    interval    Seq-interval,
3218
3219    int8        INTEGER
3220}
3221
3222
3223SeqTable-sparse-index ::= CHOICE {
3224    -- Indexes of rows with values
3225    indexes SEQUENCE OF INTEGER,
3226
3227    -- Bitset of rows with values, set bit means the row has value.
3228    -- Most-significant bit in each octet comes first.
3229    bit-set OCTET STRING,
3230
3231    -- Indexes of rows with values, delta-encoded
3232    indexes-delta SEQUENCE OF INTEGER,
3233
3234    -- Bitset of rows with values, as serialized bvector<>,
3235    -- see include/util/bitset/bm.h
3236    bit-set-bvector BVector-data
3237}
3238
3239
3240SeqTable-column ::= SEQUENCE {
3241    -- column description or reference to previously defined info
3242    header      SeqTable-column-info,   -- information about data
3243
3244    -- row data
3245    data        SeqTable-multi-data OPTIONAL,
3246
3247    -- in case not all rows contain data this field will contain sparse info
3248    sparse      SeqTable-sparse-index OPTIONAL,
3249
3250    -- default value for sparse table, or if row data is too short
3251    default     SeqTable-single-data OPTIONAL,
3252
3253    -- single value for indexes not listed in sparse table
3254    sparse-other SeqTable-single-data OPTIONAL
3255}
3256
3257
3258Seq-table ::= SEQUENCE {
3259    -- type of features in this table, equal to Seq-feat.data variant index
3260    feat-type   INTEGER,
3261
3262    -- subtype of features in this table, defined in header SeqFeatData.hpp
3263    feat-subtype INTEGER OPTIONAL,
3264
3265    -- number of rows
3266    num-rows    INTEGER,
3267
3268    -- data in columns
3269    columns     SEQUENCE OF SeqTable-column
3270}
3271
3272
3273END
3274--$Revision: 6.4 $
3275--**********************************************************************
3276--
3277--  NCBI Sequence Alignment elements
3278--  by James Ostell, 1990
3279--
3280--**********************************************************************
3281
3282NCBI-Seqalign DEFINITIONS ::=
3283BEGIN
3284
3285EXPORTS Seq-align, Score, Score-set, Seq-align-set;
3286
3287IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
3288        User-object, Object-id FROM NCBI-General;
3289
3290--*** Sequence Alignment ********************************
3291--*
3292
3293Seq-align-set ::= SET OF Seq-align
3294
3295Seq-align ::= SEQUENCE {
3296    type ENUMERATED {
3297        not-set (0) ,
3298        global (1) ,
3299        diags (2) ,     -- unbroken, but not ordered, diagonals
3300        partial (3) ,   -- mapping pieces together
3301        disc (4) ,      -- discontinuous alignment
3302        other (255) } ,
3303    dim INTEGER OPTIONAL ,     -- dimensionality
3304    score SET OF Score OPTIONAL ,   -- for whole alignment
3305    segs CHOICE {                   -- alignment data
3306        dendiag SEQUENCE OF Dense-diag ,
3307        denseg              Dense-seg ,
3308        std     SEQUENCE OF Std-seg ,
3309        packed              Packed-seg ,
3310        disc                Seq-align-set,
3311        spliced             Spliced-seg,
3312        sparse              Sparse-seg
3313    } ,
3314
3315    -- regions of sequence over which align
3316    --  was computed
3317    bounds SET OF Seq-loc OPTIONAL,
3318
3319    -- alignment id
3320    id SEQUENCE OF Object-id OPTIONAL,
3321
3322    --extra info
3323    ext SEQUENCE OF User-object OPTIONAL
3324}
3325
3326Dense-diag ::= SEQUENCE {         -- for (multiway) diagonals
3327    dim INTEGER DEFAULT 2 ,    -- dimensionality
3328    ids SEQUENCE OF Seq-id ,   -- sequences in order
3329    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order
3330    len INTEGER ,                 -- len of aligned segments
3331    strands SEQUENCE OF Na-strand OPTIONAL ,
3332    scores SET OF Score OPTIONAL }
3333
3334    -- Dense-seg: the densist packing for sequence alignments only.
3335    --            a start of -1 indicates a gap for that sequence of
3336    --            length lens.
3337    --
3338    -- id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
3339    -- id=200  AAGGCCTTTTAG.......GATGATGATGA
3340    -- id=300  ....CCTTTTAGAGATGATGAT....ATGA
3341    --
3342    -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
3343    -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
3344    -- lens = { 4, 8, 7, 3, 4, 4 }
3345    --
3346
3347Dense-seg ::= SEQUENCE {          -- for (multiway) global or partial alignments
3348    dim INTEGER DEFAULT 2 ,       -- dimensionality
3349    numseg INTEGER ,              -- number of segments here
3350    ids SEQUENCE OF Seq-id ,      -- sequences in order
3351    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order within segs
3352    lens SEQUENCE OF INTEGER ,    -- lengths in ids order within segs
3353    strands SEQUENCE OF Na-strand OPTIONAL ,
3354    scores SEQUENCE OF Score OPTIONAL }  -- score for each seg
3355
3356Packed-seg ::= SEQUENCE {         -- for (multiway) global or partial alignments
3357    dim INTEGER DEFAULT 2 ,       -- dimensionality
3358    numseg INTEGER ,              -- number of segments here
3359    ids SEQUENCE OF Seq-id ,      -- sequences in order
3360    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order for whole alignment
3361    present OCTET STRING ,        -- Boolean if each sequence present or absent in
3362                                  --   each segment
3363    lens SEQUENCE OF INTEGER ,    -- length of each segment
3364    strands SEQUENCE OF Na-strand OPTIONAL ,
3365    scores SEQUENCE OF Score OPTIONAL }  -- score for each segment
3366
3367Std-seg ::= SEQUENCE {
3368    dim INTEGER DEFAULT 2 ,       -- dimensionality
3369    ids SEQUENCE OF Seq-id OPTIONAL ,
3370    loc SEQUENCE OF Seq-loc ,
3371    scores SET OF Score OPTIONAL }
3372
3373
3374Spliced-seg ::= SEQUENCE {
3375    -- product is either protein or transcript (cDNA)
3376    product-id Seq-id OPTIONAL,
3377    genomic-id Seq-id OPTIONAL,
3378
3379    -- should be 'plus' or 'minus'
3380    product-strand Na-strand OPTIONAL ,
3381    genomic-strand Na-strand OPTIONAL ,
3382
3383    product-type ENUMERATED {
3384        transcript(0),
3385        protein(1)
3386    },
3387
3388    -- set of segments involved
3389    -- each segment corresponds to one exon
3390    -- exons are always in biological order
3391    exons SEQUENCE OF Spliced-exon ,
3392
3393    -- optional poly(A) tail
3394    poly-a INTEGER OPTIONAL,
3395
3396    -- length of the product, in bases/residues
3397    -- from this, a 3' unaligned length can be extracted; this also captures
3398    -- the case in which a protein aligns leaving a partial codon alignment
3399    -- at the 3' end
3400    product-length INTEGER OPTIONAL,
3401
3402    -- alignment descriptors / modifiers
3403    -- this provides us a set for extension
3404    modifiers SET OF Spliced-seg-modifier OPTIONAL
3405}
3406
3407Spliced-seg-modifier ::= CHOICE {
3408    -- protein aligns from the start and the first codon
3409    -- on both product and genomic is start codon
3410    start-codon-found BOOLEAN,
3411
3412    -- protein aligns to it's end and there is stop codon
3413    -- on the genomic right after the alignment
3414    stop-codon-found BOOLEAN
3415}
3416
3417
3418-- complete or partial exon
3419-- two consecutive Spliced-exons may belong to one exon
3420Spliced-exon ::= SEQUENCE {
3421    -- product-end >= product-start
3422    product-start Product-pos ,
3423    product-end Product-pos ,
3424
3425    -- genomic-end >= genomic-start
3426    genomic-start INTEGER ,
3427    genomic-end INTEGER ,
3428
3429    -- product is either protein or transcript (cDNA)
3430    product-id Seq-id OPTIONAL ,
3431    genomic-id Seq-id OPTIONAL ,
3432
3433    -- should be 'plus' or 'minus'
3434    product-strand Na-strand OPTIONAL ,
3435
3436    -- genomic-strand represents the strand of translation
3437    genomic-strand Na-strand OPTIONAL ,
3438
3439    -- basic seqments always are in biologic order
3440    parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
3441
3442    -- scores for this exon
3443    scores Score-set OPTIONAL ,
3444
3445    -- splice sites
3446    acceptor-before-exon Splice-site OPTIONAL,
3447    donor-after-exon Splice-site OPTIONAL,
3448
3449    -- flag: is this exon complete or partial?
3450    partial BOOLEAN OPTIONAL,
3451
3452    --extra info
3453    ext SEQUENCE OF User-object OPTIONAL
3454}
3455
3456
3457Product-pos ::= CHOICE {
3458    nucpos INTEGER,
3459    protpos Prot-pos
3460}
3461
3462
3463-- codon based position on protein (1/3 of aminoacid)
3464Prot-pos ::= SEQUENCE {
3465    -- standard protein position
3466    amin INTEGER ,
3467
3468    -- 0, 1, 2, or 3 as for Cdregion
3469    -- 0 = not set
3470    -- 1, 2, 3 = actual frame
3471    frame INTEGER DEFAULT 0
3472}
3473
3474
3475-- Spliced-exon-chunk: piece of an exon
3476-- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
3477-- protein)
3478Spliced-exon-chunk ::= CHOICE {
3479    -- both sequences represented, product and genomic sequences match
3480    match INTEGER ,
3481
3482    -- both sequences represented, product and genomic sequences do not match
3483    mismatch INTEGER ,
3484
3485    -- both sequences are represented, there is sufficient similarity
3486    -- between product and genomic sequences. Can be used to replace stretches
3487    -- of matches and mismatches, mostly for protein to genomic where
3488    -- definition of match or mismatch depends on translation table
3489    diag INTEGER ,
3490
3491     -- insertion in product sequence (i.e. gap in the genomic sequence)
3492    product-ins INTEGER ,
3493
3494     -- insertion in genomic sequence (i.e. gap in the product sequence)
3495    genomic-ins INTEGER
3496}
3497
3498
3499-- site involved in splice
3500Splice-site ::= SEQUENCE {
3501    -- typically two bases in the intronic region, always
3502    -- in IUPAC format
3503    bases VisibleString
3504}
3505
3506
3507-- ==========================================================================
3508--
3509-- Sparse-seg follows the semantics of dense-seg and is more optimal for
3510-- representing sparse multiple alignments
3511--
3512-- ==========================================================================
3513
3514
3515Sparse-seg ::= SEQUENCE {
3516    master-id Seq-id OPTIONAL,
3517
3518    -- pairwise alignments constituting this multiple alignment
3519    rows SET OF Sparse-align,
3520
3521    -- per-row scores
3522    row-scores SET OF Score OPTIONAL,
3523
3524    -- index of extra items
3525    ext  SET OF Sparse-seg-ext OPTIONAL
3526}
3527
3528Sparse-align ::= SEQUENCE {
3529    first-id Seq-id,
3530    second-id Seq-id,
3531
3532    numseg INTEGER,                      --number of segments
3533    first-starts SEQUENCE OF INTEGER ,   --starts on the first sequence [numseg]
3534    second-starts SEQUENCE OF INTEGER ,  --starts on the second sequence [numseg]
3535    lens SEQUENCE OF INTEGER ,           --lengths of segments [numseg]
3536    second-strands SEQUENCE OF Na-strand OPTIONAL ,
3537
3538    -- per-segment scores
3539    seg-scores SET OF Score OPTIONAL
3540}
3541
3542Sparse-seg-ext ::= SEQUENCE {
3543    --seg-ext SET OF {
3544    --    index INTEGER,
3545    --    data User-field
3546    -- }
3547    index INTEGER
3548}
3549
3550
3551
3552-- use of Score is discouraged for external ASN.1 specifications
3553Score ::= SEQUENCE {
3554    id Object-id OPTIONAL ,
3555    value CHOICE {
3556        real REAL ,
3557        int INTEGER
3558    }
3559}
3560
3561-- use of Score-set is encouraged for external ASN.1 specifications
3562Score-set ::= SET OF Score
3563
3564END
3565
3566--$Revision: 6.0 $
3567--**********************************************************************
3568--
3569--  NCBI Sequence Analysis Results (other than alignments)
3570--  by James Ostell, 1990
3571--
3572--**********************************************************************
3573
3574NCBI-Seqres DEFINITIONS ::=
3575BEGIN
3576
3577EXPORTS Seq-graph;
3578
3579IMPORTS Seq-loc FROM NCBI-Seqloc;
3580
3581--*** Sequence Graph ********************************
3582--*
3583--*   for values mapped by residue or range to sequence
3584--*
3585
3586Seq-graph ::= SEQUENCE {
3587    title VisibleString OPTIONAL ,
3588    comment VisibleString OPTIONAL ,
3589    loc Seq-loc ,                       -- region this applies to
3590    title-x VisibleString OPTIONAL ,    -- title for x-axis
3591    title-y VisibleString OPTIONAL ,
3592    comp INTEGER OPTIONAL ,             -- compression (residues/value)
3593    a REAL OPTIONAL ,                   -- for scaling values
3594    b REAL OPTIONAL ,                   -- display = (a x value) + b
3595    numval INTEGER ,                    -- number of values in graph
3596    graph CHOICE {
3597        real Real-graph ,
3598        int Int-graph ,
3599        byte Byte-graph } }
3600
3601Real-graph ::= SEQUENCE {
3602    max REAL ,                          -- top of graph
3603    min REAL ,                          -- bottom of graph
3604    axis REAL ,                         -- value to draw axis on
3605    values SEQUENCE OF REAL }
3606
3607Int-graph ::= SEQUENCE {
3608    max INTEGER ,
3609    min INTEGER ,
3610    axis INTEGER ,
3611    values SEQUENCE OF INTEGER }
3612
3613Byte-graph ::= SEQUENCE {              -- integer from 0-255
3614    max INTEGER ,
3615    min INTEGER ,
3616    axis INTEGER ,
3617    values OCTET STRING }
3618
3619END
3620
3621--$Revision: 6.1 $
3622--********************************************************************
3623--
3624--  Direct Submission of Sequence Data
3625--  James Ostell, 1991
3626--
3627--  This is a trial specification for direct submission of sequence
3628--    data worked out between NCBI and EMBL
3629--  Later revised to reflect work with GenBank and Integrated database
3630--
3631--  Version 3.0, 1994
3632--    This is the official NCBI sequence submission format now.
3633--
3634--********************************************************************
3635
3636NCBI-Submit DEFINITIONS ::=
3637BEGIN
3638
3639EXPORTS Seq-submit, Contact-info;
3640
3641IMPORTS Cit-sub, Author FROM NCBI-Biblio
3642        Date, Object-id FROM NCBI-General
3643        Seq-annot FROM NCBI-Sequence
3644        Seq-id FROM NCBI-Seqloc
3645        Seq-entry FROM NCBI-Seqset;
3646
3647Seq-submit ::= SEQUENCE {
3648    sub Submit-block ,
3649    data CHOICE {
3650        entrys  SET OF Seq-entry ,  -- sequence(s)
3651        annots  SET OF Seq-annot ,  -- annotation(s)
3652        delete  SET OF Seq-id } } -- deletions of entries
3653
3654Submit-block ::= SEQUENCE {
3655    contact Contact-info ,        -- who to contact
3656    cit Cit-sub ,                 -- citation for this submission
3657    hup BOOLEAN DEFAULT FALSE ,   -- hold until publish
3658    reldate Date OPTIONAL ,       -- release by date
3659    subtype INTEGER {             -- type of submission
3660        new (1) ,                 -- new data
3661        update (2) ,              -- update by author
3662        revision (3) ,            -- 3rd party (non-author) update
3663        other (255) } OPTIONAL ,
3664    tool VisibleString OPTIONAL,  -- tool used to make submission
3665    user-tag VisibleString OPTIONAL, -- user supplied id for this submission
3666    comment VisibleString OPTIONAL } -- user comments/advice to database
3667
3668Contact-info ::= SEQUENCE {      -- who to contact to discuss the submission
3669    name VisibleString OPTIONAL ,        -- OBSOLETE: will be removed
3670    address SEQUENCE OF VisibleString OPTIONAL ,
3671    phone VisibleString OPTIONAL ,
3672    fax VisibleString OPTIONAL ,
3673    email VisibleString OPTIONAL ,
3674    telex VisibleString OPTIONAL ,
3675    owner-id Object-id OPTIONAL ,         -- for owner accounts
3676    password OCTET STRING OPTIONAL ,
3677    last-name VisibleString OPTIONAL ,  -- structured to replace name above
3678    first-name VisibleString OPTIONAL ,
3679    middle-initial VisibleString OPTIONAL ,
3680    contact Author OPTIONAL }           -- WARNING: this will replace the above
3681
3682END
3683
3684--$Revision: 1.15 $
3685--**********************************************************************
3686--
3687--  Definitions for Cn3D-specific data (rendering settings,
3688--    user annotations, etc.)
3689--
3690--  by Paul Thiessen
3691--
3692--  National Center for Biotechnology Information
3693--  National Institutes of Health
3694--  Bethesda, MD 20894 USA
3695--
3696-- asntool -m cn3d.asn -w 100 -o cn3d.h
3697-- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
3698--   -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
3699--**********************************************************************
3700
3701NCBI-Cn3d DEFINITIONS ::=
3702-- Cn3D-specific information
3703
3704BEGIN
3705
3706EXPORTS  Cn3d-style-dictionary, Cn3d-user-annotations;
3707
3708IMPORTS  Biostruc-id FROM MMDB
3709         Molecule-id, Residue-id FROM MMDB-Chemical-graph;
3710
3711
3712-- values of enumerations must match those in cn3d/style_manager.hpp!
3713
3714Cn3d-backbone-type ::= ENUMERATED {     -- for different types of backbones
3715    off (1),
3716    trace (2),
3717    partial (3),
3718    complete (4)
3719}
3720
3721Cn3d-drawing-style ::= ENUMERATED {     -- atom/bond/object rendering styles
3722    -- for atoms and bonds
3723    wire (1),
3724    tubes (2),
3725    ball-and-stick (3),
3726    space-fill (4),
3727    wire-worm (5),
3728    tube-worm (6),
3729    -- for 3d-objects
3730    with-arrows (7),
3731    without-arrows (8)
3732}
3733
3734Cn3d-color-scheme ::= ENUMERATED {  -- available color schemes (not all
3735                                    -- necessarily applicable to all objects)
3736    element (1),
3737    object (2),
3738    molecule (3),
3739    domain (4),
3740    residue (20),
3741    secondary-structure (5),
3742    user-select (6),
3743    -- different alignment conservation coloring (currently only for proteins)
3744    aligned (7),
3745    identity (8),
3746    variety (9),
3747    weighted-variety (10),
3748    information-content (11),
3749    fit (12),
3750    block-fit (17),
3751    block-z-fit (18),
3752    block-row-fit (19),
3753    -- other schemes
3754    temperature (13),
3755    hydrophobicity (14),
3756    charge (15),
3757    rainbow (16)
3758}
3759
3760-- RGB triplet, interpreted (after division by the scale-factor) as floating
3761-- point values which should range from [0..1]. The default scale-factor is
3762-- 255, so that one can conveniently set integer byte values [0..255] for
3763-- colors with the scale-factor already set appropriately to map to [0..1].
3764--    An alpha value is allowed, but is currently ignored by Cn3D.
3765Cn3d-color ::= SEQUENCE {
3766    scale-factor INTEGER DEFAULT 255,
3767    red INTEGER,
3768    green INTEGER,
3769    blue INTEGER,
3770    alpha INTEGER DEFAULT 255
3771}
3772
3773Cn3d-backbone-style ::= SEQUENCE {  -- style blob for backbones only
3774    type Cn3d-backbone-type,
3775    style Cn3d-drawing-style,
3776    color-scheme Cn3d-color-scheme,
3777    user-color Cn3d-color
3778}
3779
3780Cn3d-general-style ::= SEQUENCE {   -- style blob for other objects
3781    is-on BOOLEAN,
3782    style Cn3d-drawing-style,
3783    color-scheme Cn3d-color-scheme,
3784    user-color Cn3d-color
3785}
3786
3787Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
3788    spacing INTEGER,        -- zero means none
3789    type ENUMERATED {
3790        one-letter (1),
3791        three-letter (2)
3792    },
3793    number ENUMERATED {
3794        none (0),
3795        sequential (1),     -- from 1, by residues present, to match sequence
3796        pdb (2)             -- use number assigned by PDB
3797    },
3798    termini BOOLEAN,
3799    white BOOLEAN           -- all white, or (if false) color of alpha carbon
3800}
3801
3802-- rendering settings for Cn3D (mirrors StyleSettings class)
3803Cn3d-style-settings ::= SEQUENCE {
3804    name VisibleString OPTIONAL,                -- a name (for favorites)
3805    protein-backbone Cn3d-backbone-style,       -- backbone styles
3806    nucleotide-backbone Cn3d-backbone-style,
3807    protein-sidechains Cn3d-general-style,      -- styles for other stuff
3808    nucleotide-sidechains Cn3d-general-style,
3809    heterogens Cn3d-general-style,
3810    solvents Cn3d-general-style,
3811    connections Cn3d-general-style,
3812    helix-objects Cn3d-general-style,
3813    strand-objects Cn3d-general-style,
3814    virtual-disulfides-on BOOLEAN,              -- virtual disulfides
3815    virtual-disulfide-color Cn3d-color,
3816    hydrogens-on BOOLEAN,                       -- hydrogens
3817    background-color Cn3d-color,                -- background
3818    -- floating point parameters - scale-factor applies to all the following:
3819    scale-factor INTEGER,
3820    space-fill-proportion INTEGER,
3821    ball-radius INTEGER,
3822    stick-radius INTEGER,
3823    tube-radius INTEGER,
3824    tube-worm-radius INTEGER,
3825    helix-radius INTEGER,
3826    strand-width INTEGER,
3827    strand-thickness INTEGER,
3828    -- backbone labels (no labels if not present)
3829    protein-labels Cn3d-backbone-label-style OPTIONAL,
3830    nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
3831    -- ion labels
3832    ion-labels BOOLEAN OPTIONAL
3833}
3834
3835Cn3d-style-settings-set ::= SET OF Cn3d-style-settings
3836
3837Cn3d-style-table-id ::= INTEGER
3838
3839Cn3d-style-table-item ::= SEQUENCE {
3840    id Cn3d-style-table-id,
3841    style Cn3d-style-settings
3842}
3843
3844-- the global settings, and a lookup table of styles for user annotations.
3845Cn3d-style-dictionary ::= SEQUENCE {
3846    global-style Cn3d-style-settings,
3847    style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
3848}
3849
3850-- a range of residues in a chain, identified by MMDB residue-id
3851-- (e.g., numbered from 1)
3852Cn3d-residue-range ::= SEQUENCE {
3853    from Residue-id,
3854    to Residue-id
3855}
3856
3857-- set of locations on a particular chain
3858Cn3d-molecule-location ::= SEQUENCE {
3859    molecule-id Molecule-id,    -- MMDB molecule id
3860    -- which residues; whole molecule implied if absent
3861    residues SEQUENCE OF Cn3d-residue-range OPTIONAL
3862}
3863
3864-- set of locations on a particular structure object (e.g., a PDB/MMDB
3865-- structure), which may include multiple ranges of residues each on
3866-- multiple chains.
3867Cn3d-object-location ::= SEQUENCE {
3868    structure-id Biostruc-id,
3869    residues SEQUENCE OF Cn3d-molecule-location
3870}
3871
3872-- information for an individual user annotation
3873Cn3d-user-annotation ::= SEQUENCE {
3874    name VisibleString,                 -- a (short) name for this annotation
3875    description VisibleString OPTIONAL, -- an optional longer description
3876    style-id Cn3d-style-table-id,       -- how to draw this annotation
3877    residues SEQUENCE OF Cn3d-object-location,  -- which residues to cover
3878    is-on BOOLEAN   -- whether this annotation is to be turned on in Cn3D
3879}
3880
3881-- a GL-ordered transformation matrix
3882Cn3d-GL-matrix ::= SEQUENCE {
3883    m0  REAL, m1  REAL, m2  REAL, m3  REAL,
3884    m4  REAL, m5  REAL, m6  REAL, m7  REAL,
3885    m8  REAL, m9  REAL, m10 REAL, m11 REAL,
3886    m12 REAL, m13 REAL, m14 REAL, m15 REAL
3887}
3888
3889-- a floating point 3d vector
3890Cn3d-vector ::= SEQUENCE {
3891    x REAL,
3892    y REAL,
3893    z REAL
3894}
3895
3896-- parameters used to set up the camera in Cn3D
3897Cn3d-view-settings ::= SEQUENCE {
3898    camera-distance REAL,       -- camera on +Z axis this distance from origin
3899    camera-angle-rad REAL,      -- camera angle
3900    camera-look-at-X REAL,      -- X,Y of point in Z=0 plane camera points at
3901    camera-look-at-Y REAL,
3902    camera-clip-near REAL,      -- distance of clipping planes from camera
3903    camera-clip-far REAL,
3904    matrix Cn3d-GL-matrix,      -- transformation of objects in the scene
3905    rotation-center Cn3d-vector -- center of rotation of whole scene
3906}
3907
3908-- The list of annotations for a given CDD/mime. If residue regions overlap
3909-- between annotations that are turned on, the last annotation in this list
3910-- that contains these residues will be used as the display style for these
3911-- residues.
3912--   Also contains the current viewpoint, so that user's camera angle
3913-- can be stored and reproduced, for illustrations, on-line figures, etc.
3914Cn3d-user-annotations ::= SEQUENCE {
3915    annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
3916    view Cn3d-view-settings OPTIONAL
3917}
3918
3919END
3920
3921--$Revision: 6.3 $
3922--****************************************************************
3923--
3924--  NCBI Project Definition Module
3925--  by Jim Ostell and Jonathan Kans, 1998
3926--
3927--****************************************************************
3928
3929NCBI-Project DEFINITIONS ::=
3930BEGIN
3931
3932EXPORTS Project, Project-item;
3933
3934IMPORTS Date FROM NCBI-General
3935        PubMedId FROM NCBI-Biblio
3936        Seq-id, Seq-loc FROM NCBI-Seqloc
3937        Seq-annot, Pubdesc FROM NCBI-Sequence
3938        Seq-entry FROM NCBI-Seqset
3939        Pubmed-entry FROM NCBI-PubMed;
3940
3941Project ::= SEQUENCE {
3942    descr Project-descr OPTIONAL ,
3943    data Project-item }
3944
3945Project-item ::= CHOICE {
3946    pmuid SET OF INTEGER ,
3947    protuid SET OF INTEGER ,
3948    nucuid SET OF INTEGER ,
3949    sequid SET OF INTEGER ,
3950    genomeuid SET OF INTEGER ,
3951    structuid SET OF INTEGER ,
3952    pmid SET OF PubMedId ,
3953    protid SET OF Seq-id ,
3954    nucid SET OF Seq-id ,
3955    seqid SET OF Seq-id ,
3956    genomeid SET OF Seq-id ,
3957    structid NULL ,
3958    pment SET OF Pubmed-entry ,
3959    protent SET OF Seq-entry ,
3960    nucent SET OF Seq-entry ,
3961    seqent SET OF Seq-entry ,
3962    genomeent SET OF Seq-entry ,
3963    structent NULL ,
3964    seqannot SET OF Seq-annot ,
3965    loc SET OF Seq-loc ,
3966    proj SET OF Project
3967}
3968
3969Project-descr ::= SEQUENCE {
3970    id SET OF Project-id ,
3971    name VisibleString OPTIONAL ,
3972    descr SET OF Projdesc OPTIONAL }
3973
3974Projdesc ::= CHOICE {
3975    pub Pubdesc ,
3976    date Date ,
3977    comment VisibleString ,
3978    title VisibleString
3979}
3980
3981Project-id ::= VisibleString
3982
3983END
3984
3985
3986--$Revision: 6.0 $
3987--*********************************************************************
3988--
3989--  access.asn
3990--
3991--     messages for data access
3992--
3993--*********************************************************************
3994
3995NCBI-Access DEFINITIONS ::=
3996BEGIN
3997
3998EXPORTS Link-set;
3999
4000    -- links between same class = neighbors
4001    -- links between other classes = links
4002
4003Link-set ::= SEQUENCE {
4004    num INTEGER ,                         -- number of links to this doc type
4005    uids SEQUENCE OF INTEGER OPTIONAL ,     -- the links
4006    weights SEQUENCE OF INTEGER OPTIONAL }  -- the weights
4007
4008
4009END
4010--$Revision: 6.0 $
4011--**********************************************************************
4012--
4013--  NCBI Sequence Feature Definition Module
4014--  by James Ostell, 1994
4015--
4016--**********************************************************************
4017
4018NCBI-FeatDef DEFINITIONS ::=
4019BEGIN
4020
4021EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
4022
4023
4024FeatDef ::= SEQUENCE {
4025    typelabel VisibleString ,	   -- short label for type eg "CDS"
4026    menulabel VisibleString ,	   -- label for a menu eg "Coding Region"
4027    featdef-key INTEGER ,		   -- unique for this feature definition
4028    seqfeat-key INTEGER ,		   -- SeqFeat.data.choice from objfeat.h
4029    entrygroup INTEGER ,		   -- Group for data entry
4030    displaygroup INTEGER ,		   -- Group for data display
4031    molgroup FeatMolType           -- Type of Molecule used for
4032}
4033
4034FeatMolType ::= ENUMERATED {
4035	aa (1),  -- proteins
4036    na (2),  -- nucleic acids
4037    both (3) }  -- both
4038
4039FeatDefSet ::= SEQUENCE OF FeatDef   -- collections of defintions
4040
4041FeatDispGroup ::= SEQUENCE {
4042	groupkey INTEGER ,
4043    groupname VisibleString }
4044
4045FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
4046
4047FeatDefGroupSet ::= SEQUENCE {
4048	groups FeatDispGroupSet ,
4049	defs FeatDefSet }
4050
4051END
4052
4053
4054--$Revision: 6.12 $
4055--****************************************************************
4056--
4057--  NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
4058--  by Jonathan Epstein, February 1996
4059--
4060--****************************************************************
4061
4062NCBI-Mime DEFINITIONS ::=
4063BEGIN
4064
4065EXPORTS Ncbi-mime-asn1;
4066IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
4067    Cdd FROM NCBI-Cdd
4068	Seq-entry FROM NCBI-Seqset
4069	Seq-annot FROM NCBI-Sequence
4070    Medline-entry FROM NCBI-Medline
4071    Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;
4072
4073Ncbi-mime-asn1 ::= CHOICE {
4074	entrez	Entrez-general,			-- just a structure
4075    alignstruc  Biostruc-align,     -- structures & sequences & alignments
4076	alignseq	Biostruc-align-seq,	-- sequence alignment
4077    strucseq    Biostruc-seq,       -- structure & sequences
4078    strucseqs   Biostruc-seqs,      -- structure & sequences & alignments
4079    general     Biostruc-seqs-aligns-cdd    -- all-purpose "grab bag"
4080	-- others may be added here in the future
4081}
4082
4083-- generic bundle of sequence and alignment info
4084Bundle-seqs-aligns ::= SEQUENCE {
4085    sequences SET OF Seq-entry OPTIONAL,        -- sequences
4086    seqaligns SET OF Seq-annot OPTIONAL,        -- sequence alignments
4087    strucaligns Biostruc-annot-set OPTIONAL,    -- structure alignments
4088    imports SET OF Seq-annot OPTIONAL,          -- imports (updates in Cn3D)
4089    style-dictionary Cn3d-style-dictionary OPTIONAL,    -- Cn3D stuff
4090    user-annotations Cn3d-user-annotations OPTIONAL
4091}
4092
4093Biostruc-seqs-aligns-cdd ::= SEQUENCE {
4094    seq-align-data CHOICE {
4095        bundle Bundle-seqs-aligns,          -- either seqs + alignments
4096        cdd Cdd                             -- or CDD (which contains these)
4097    },
4098    structures SET OF Biostruc OPTIONAL,    -- structures
4099    structure-type ENUMERATED {             -- type of structures to load if
4100        ncbi-backbone(2),                   -- not present; meanings and
4101        ncbi-all-atom(3),                   -- values are same as MMDB's
4102        pdb-model(4)                        -- Model-type
4103    } OPTIONAL
4104}
4105
4106Biostruc-align ::= SEQUENCE {
4107	master	Biostruc,
4108	slaves	SET OF Biostruc,
4109	alignments	Biostruc-annot-set,	-- structure alignments
4110	sequences SET OF Seq-entry,	-- sequences
4111	seqalign SET OF Seq-annot,
4112	style-dictionary Cn3d-style-dictionary OPTIONAL,
4113	user-annotations Cn3d-user-annotations OPTIONAL
4114}
4115
4116Biostruc-align-seq ::= SEQUENCE {	-- display seq structure align only
4117	sequences SET OF Seq-entry,	-- sequences
4118	seqalign SET OF Seq-annot,
4119	style-dictionary Cn3d-style-dictionary OPTIONAL,
4120	user-annotations Cn3d-user-annotations OPTIONAL
4121}
4122
4123Biostruc-seq ::= SEQUENCE {	-- display  structure seq added by yanli
4124	structure Biostruc,
4125	sequences SET OF Seq-entry,
4126	style-dictionary Cn3d-style-dictionary OPTIONAL,
4127	user-annotations Cn3d-user-annotations OPTIONAL
4128}
4129
4130Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
4131	structure Biostruc,
4132	sequences SET OF Seq-entry,	-- sequences
4133	seqalign SET OF Seq-annot,
4134	style-dictionary Cn3d-style-dictionary OPTIONAL,
4135	user-annotations Cn3d-user-annotations OPTIONAL
4136}
4137
4138Entrez-style ::= ENUMERATED {
4139	docsum (1),
4140	genbank (2) ,
4141	genpept (3) ,
4142	fasta (4) ,
4143	asn1 (5) ,
4144	graphic (6) ,
4145	alignment (7) ,
4146	globalview (8) ,
4147	report (9) ,
4148	medlars (10) ,
4149	embl (11) ,
4150	pdb (12) ,
4151	kinemage (13) }
4152
4153Entrez-general ::= SEQUENCE {
4154	title VisibleString OPTIONAL,
4155	data CHOICE {
4156		ml	Medline-entry ,
4157		prot	Seq-entry ,
4158		nuc	Seq-entry ,
4159		genome	Seq-entry ,
4160		structure Biostruc ,
4161		strucAnnot Biostruc-annot-set } ,
4162	style Entrez-style ,
4163	location VisibleString OPTIONAL }
4164END
4165--$Revision: 6.0 $
4166--********************************************************************
4167--
4168--  Print Templates
4169--  James Ostell, 1993
4170--
4171--
4172--********************************************************************
4173
4174NCBI-ObjPrt DEFINITIONS ::=
4175BEGIN
4176
4177EXPORTS PrintTemplate, PrintTemplateSet;
4178
4179PrintTemplate ::= SEQUENCE {
4180    name TemplateName ,  -- name for this template
4181    labelfrom VisibleString OPTIONAL,    -- ASN.1 path to get label from
4182    format PrintFormat }
4183
4184TemplateName ::= VisibleString
4185
4186PrintTemplateSet ::= SEQUENCE OF PrintTemplate
4187
4188PrintFormat ::= SEQUENCE {
4189    asn1 VisibleString ,    -- ASN.1 partial path for this
4190    label VisibleString OPTIONAL ,   -- printable label
4191    prefix VisibleString OPTIONAL,
4192    suffix VisibleString OPTIONAL,
4193    form PrintForm }
4194
4195PrintForm ::=   CHOICE {      -- Forms for various ASN.1 components
4196    block PrintFormBlock,
4197    boolean PrintFormBoolean,
4198    enum PrintFormEnum,
4199    text PrintFormText,
4200    use-template TemplateName,
4201    user UserFormat ,
4202    null NULL }               -- rarely used
4203
4204UserFormat ::= SEQUENCE {
4205    printfunc VisibleString ,
4206    defaultfunc VisibleString OPTIONAL }
4207
4208PrintFormBlock ::= SEQUENCE {  -- for SEQUENCE, SET
4209    separator VisibleString OPTIONAL ,
4210    components SEQUENCE OF PrintFormat }
4211
4212PrintFormBoolean ::= SEQUENCE {
4213    true VisibleString OPTIONAL ,
4214    false VisibleString OPTIONAL }
4215
4216PrintFormEnum ::= SEQUENCE {
4217    values SEQUENCE OF VisibleString OPTIONAL }
4218
4219PrintFormText ::= SEQUENCE {
4220    textfunc VisibleString OPTIONAL }
4221
4222END
4223
4224--$Revision: 6.11 $
4225--*********************************************************
4226--
4227-- ASN.1 and XML for the components of a GenBank format sequence
4228-- J.Ostell 2002
4229-- Updated 25 May 2010
4230--
4231--*********************************************************
4232
4233NCBI-GBSeq DEFINITIONS ::=
4234BEGIN
4235
4236--********
4237--  GBSeq represents the elements in a GenBank style report
4238--    of a sequence with some small additions to structure and support
4239--    for protein (GenPept) versions of GenBank format as seen in
4240--    Entrez. While this represents the simplification, reduction of
4241--    detail, and flattening to a single sequence perspective of GenBank
4242--    format (compared with the full ASN.1 or XML from which GenBank and
4243--    this format is derived at NCBI), it is presented in ASN.1 or XML for
4244--    automated parsing and processing. It is hoped that this compromise
4245--    will be useful for those bulk processing at the GenBank format level
4246--    of detail today. Since it is a compromise, a number of pragmatic
4247--    decisions have been made.
4248--
4249--  In pursuit of simplicity and familiarity a number of
4250--    fields do not have full substructure defined here where there is
4251--    already a standard GenBank format string. For example:
4252--
4253--   Date  DD-Mon-YYYY
4254--   Authors   LastName, Intials (with periods)
4255--   Journal   JounalName Volume (issue), page-range (year)
4256--   FeatureLocations as per GenBank feature table, but FeatureIntervals
4257--    may also be provided as a convenience
4258--   FeatureQualifiers  as per GenBank feature table
4259--   Primary has a string that represents a table to construct
4260--    a third party (TPA) sequence.
4261--   other-seqids can have strings with the "vertical bar format" sequence
4262--    identifiers used in BLAST for example, when they are non-genbank types.
4263--    Currently in GenBank format you only see GI, but there are others, like
4264--    patents, submitter clone names, etc which will appear here, as they
4265--    always have in the ASN.1 format, and full XML format.
4266--   source-db is a formatted text block for peptides in GenPept format that
4267--    carries information from the source protein database.
4268--
4269--  There are also a number of elements that could have been
4270--   more exactly specified, but in the interest of simplicity
4271--   have been simply left as options. For example..
4272--
4273--  accession and accession.version will always appear in a GenBank record
4274--   they are optional because this format can also be used for non-GenBank
4275--   sequences, and in that case will have only "other-seqids".
4276--
4277--  sequences will normally all have "sequence" filled in. But contig records
4278--    will have a "join" statement in the "contig" slot, and no "sequence".
4279--    We also may consider a retrieval option with no sequence of any kind
4280--     and no feature table to quickly check minimal values.
4281--
4282--  a reference may have an author list, or be from a consortium, or both.
4283--
4284--  some fields, such as taxonomy, do appear as separate elements in GenBank
4285--    format but without a specific linetype (in GenBank format this comes
4286--    under ORGANISM). Another example is the separation of primary accession
4287--    from the list of secondary accessions. In GenBank format primary
4288--    accession is just the first one on the list that includes all secondaries
4289--    after it.
4290--
4291--  create-date deserves special comment. The date you see on the right hand
4292--    side of the LOCUS line in GenBank format is actually the last date the
4293--    the record was modified (or the update-date). The date the record was
4294--    first submitted to GenBank appears in the first submission citation in
4295--    the reference section. Internally in the databases and ASN.1 NCBI keeps
4296--    the first date the record was released into the sequence database at
4297--    NCBI as create-date. For records from EMBL, which supports create-date,
4298--    it is the date provided by EMBL. For DDBJ records, which do not supply
4299--    a create-date (same as GenBank format) the create-date is the first date
4300--    NCBI saw the record from DDBJ. For older GenBank records, before NCBI
4301--    took responsibility for GenBank, it is just the first date NCBI saw the
4302--    record. Create-date can be very useful, so we expose it here, but users
4303--    must understand it is only an approximation and comes from many sources,
4304--    and with many exceptions and caveats. It does NOT tell you the first
4305--    date the public might have seen this record and thus is NOT an accurate
4306--    measure for legal issues of precedence.
4307--
4308--********
4309
4310GBSet ::= SEQUENCE OF GBSeq
4311
4312GBSeq ::= SEQUENCE {
4313    locus VisibleString OPTIONAL ,
4314    length INTEGER ,
4315    strandedness VisibleString OPTIONAL ,
4316    moltype VisibleString ,
4317    topology VisibleString OPTIONAL ,
4318    division VisibleString OPTIONAL ,
4319    update-date VisibleString OPTIONAL ,
4320    create-date VisibleString OPTIONAL ,
4321    update-release VisibleString OPTIONAL ,
4322    create-release VisibleString OPTIONAL ,
4323    definition VisibleString OPTIONAL ,
4324    primary-accession VisibleString OPTIONAL ,
4325    entry-version VisibleString OPTIONAL ,
4326    accession-version VisibleString OPTIONAL ,
4327    other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
4328    secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
4329    project VisibleString OPTIONAL ,
4330    keywords SEQUENCE OF GBKeyword OPTIONAL ,
4331    segment VisibleString OPTIONAL ,
4332    source VisibleString OPTIONAL ,
4333    organism VisibleString OPTIONAL ,
4334    taxonomy VisibleString OPTIONAL ,
4335    references SEQUENCE OF GBReference OPTIONAL ,
4336    comment VisibleString OPTIONAL ,
4337    comment-set SEQUENCE OF GBComment OPTIONAL ,
4338    struc-comments SEQUENCE OF GBStrucComment OPTIONAL ,
4339    primary VisibleString OPTIONAL ,
4340    source-db VisibleString OPTIONAL ,
4341    database-reference VisibleString OPTIONAL ,
4342    feature-table SEQUENCE OF GBFeature OPTIONAL ,
4343    feature-set SEQUENCE OF GBFeatureSet OPTIONAL ,
4344    sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
4345    contig VisibleString OPTIONAL ,
4346    alt-seq SEQUENCE OF GBAltSeqData OPTIONAL ,
4347    xrefs SEQUENCE OF GBXref OPTIONAL
4348}
4349
4350GBSeqid ::= VisibleString
4351
4352GBSecondary-accn ::= VisibleString
4353
4354GBKeyword ::= VisibleString
4355
4356GBReference ::= SEQUENCE {
4357    reference VisibleString ,
4358    position VisibleString OPTIONAL ,
4359    authors SEQUENCE OF GBAuthor OPTIONAL ,
4360    consortium VisibleString OPTIONAL ,
4361    title VisibleString OPTIONAL ,
4362    journal VisibleString ,
4363    xref SEQUENCE OF GBXref OPTIONAL ,
4364    pubmed INTEGER OPTIONAL ,
4365    remark VisibleString OPTIONAL
4366}
4367
4368GBAuthor ::= VisibleString
4369
4370GBXref ::= SEQUENCE {
4371    dbname VisibleString ,
4372    id VisibleString
4373}
4374
4375GBComment ::= SEQUENCE {
4376    type VisibleString OPTIONAL ,
4377    paragraphs SEQUENCE OF GBCommentParagraph
4378}
4379
4380GBCommentParagraph ::= VisibleString
4381
4382GBStrucComment ::= SEQUENCE {
4383    name VisibleString OPTIONAL ,
4384    items SEQUENCE OF GBStrucCommentItem
4385}
4386
4387GBStrucCommentItem ::= SEQUENCE {
4388    tag VisibleString OPTIONAL ,
4389    value VisibleString OPTIONAL ,
4390    url VisibleString OPTIONAL
4391}
4392
4393GBFeatureSet ::= SEQUENCE {
4394    annot-source VisibleString OPTIONAL ,
4395    features SEQUENCE OF GBFeature
4396}
4397
4398GBFeature ::= SEQUENCE {
4399    key VisibleString ,
4400    location VisibleString ,
4401    intervals SEQUENCE OF GBInterval OPTIONAL ,
4402    operator VisibleString OPTIONAL ,
4403    partial5 BOOLEAN OPTIONAL ,
4404    partial3 BOOLEAN OPTIONAL ,
4405    quals SEQUENCE OF GBQualifier OPTIONAL ,
4406    xrefs SEQUENCE OF GBXref OPTIONAL
4407}
4408
4409GBInterval ::= SEQUENCE {
4410    from INTEGER OPTIONAL ,
4411    to INTEGER OPTIONAL ,
4412    point INTEGER OPTIONAL ,
4413    iscomp BOOLEAN OPTIONAL ,
4414    interbp BOOLEAN OPTIONAL ,
4415    accession VisibleString
4416}
4417
4418GBQualifier ::= SEQUENCE {
4419    name VisibleString ,
4420    value VisibleString OPTIONAL
4421}
4422
4423GBAltSeqData ::= SEQUENCE {
4424    name VisibleString ,  -- e.g., contig, wgs, scaffold, cage, genome
4425    items SEQUENCE OF GBAltSeqItem OPTIONAL
4426}
4427
4428GBAltSeqItem ::= SEQUENCE {
4429    interval GBInterval OPTIONAL ,
4430    isgap BOOLEAN OPTIONAL ,
4431    gap-length INTEGER OPTIONAL ,
4432    gap-type VisibleString OPTIONAL ,
4433    gap-linkage VisibleString OPTIONAL ,
4434    gap-comment VisibleString OPTIONAL ,
4435    first-accn VisibleString OPTIONAL ,
4436    last-accn VisibleString OPTIONAL ,
4437    value VisibleString OPTIONAL
4438}
4439
4440END
4441
4442--$Revision: 1.9 $
4443--************************************************************************
4444--
4445-- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
4446-- The International Nucleotide Sequence Database (INSD) collaboration
4447-- Version 1.6, 25 May 2010
4448--
4449--************************************************************************
4450
4451INSD-INSDSeq DEFINITIONS ::=
4452BEGIN
4453
4454--  INSDSeq provides the elements of a sequence as presented in the
4455--    GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
4456--    additional structure.
4457--    Although this single perspective of the three flatfile formats
4458--    provides a useful simplification, it hides to some extent the
4459--    details of the actual data underlying those formats. Nevertheless,
4460--    the XML version of INSD-Seq is being provided with
4461--    the hopes that it will prove useful to those who bulk-process
4462--    sequence data at the flatfile-format level of detail. Further
4463--    documentation regarding the content and conventions of those formats
4464--    can be found at:
4465--
4466--    URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
4467--    http://www.ddbj.nig.ac.jp/FT/full_index.html
4468--    http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
4469--    http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
4470--
4471--    URLs for DDBJ, EMBL, and GenBank Release Notes :
4472--    ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
4473--    http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
4474--    ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
4475--
4476--    Because INSDSeq is a compromise, a number of pragmatic decisions have
4477--    been made:
4478--
4479--  In pursuit of simplicity and familiarity a number of fields do not
4480--    have full substructure defined here where there is already a
4481--    standard flatfile format string. For example:
4482--
4483--   Dates:      DD-MON-YYYY (eg 10-JUN-2003)
4484--
4485--   Author:     LastName, Initials  (eg Smith, J.N.)
4486--            or Lastname Initials   (eg Smith J.N.)
4487--
4488--   Journal:    JournalName Volume (issue), page-range (year)
4489--            or JournalName Volume(issue):page-range(year)
4490--            eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
4491--               Appl. Environ. Microbiol. 61(4):1646-1648(1995).
4492--
4493--  FeatureLocations are representated as in the flatfile feature table,
4494--    but FeatureIntervals may also be provided as a convenience
4495--
4496--  FeatureQualifiers are represented as in the flatfile feature table.
4497--
4498--  Primary has a string that represents a table to construct
4499--    a third party (TPA) sequence.
4500--
4501--  other-seqids can have strings with the "vertical bar format" sequence
4502--    identifiers used in BLAST for example, when they are non-INSD types.
4503--
4504--  Currently in flatfile format you only see Accession numbers, but there
4505--    are others, like patents, submitter clone names, etc which will
4506--    appear here
4507--
4508--  There are also a number of elements that could have been more exactly
4509--    specified, but in the interest of simplicity have been simply left as
4510--    optional. For example:
4511--
4512--  All publicly accessible sequence records in INSDSeq format will
4513--    include accession and accession.version. However, these elements are
4514--    optional in optional in INSDSeq so that this format can also be used
4515--    for non-public sequence data, prior to the assignment of accessions and
4516--    version numbers. In such cases, records will have only "other-seqids".
4517--
4518--  sequences will normally all have "sequence" filled in. But contig records
4519--    will have a "join" statement in the "contig" slot, and no "sequence".
4520--    We also may consider a retrieval option with no sequence of any kind
4521--    and no feature table to quickly check minimal values.
4522--
4523--  Four (optional) elements are specific to records represented via the EMBL
4524--    sequence database: INSDSeq_update-release, INSDSeq_create-release,
4525--    INSDSeq_entry-version, and INSDSeq_database-reference.
4526--
4527--  One (optional) element is specific to records originating at the GenBank
4528--    and DDBJ sequence databases: INSDSeq_segment.
4529--
4530--********
4531
4532INSDSet ::= SEQUENCE OF INSDSeq
4533
4534INSDSeq ::= SEQUENCE {
4535    locus VisibleString OPTIONAL ,
4536    length INTEGER ,
4537    strandedness VisibleString OPTIONAL ,
4538    moltype VisibleString ,
4539    topology VisibleString OPTIONAL ,
4540    division VisibleString OPTIONAL ,
4541    update-date VisibleString OPTIONAL ,
4542    create-date VisibleString OPTIONAL ,
4543    update-release VisibleString OPTIONAL ,
4544    create-release VisibleString OPTIONAL ,
4545    definition VisibleString OPTIONAL ,
4546    primary-accession VisibleString OPTIONAL ,
4547    entry-version VisibleString OPTIONAL ,
4548    accession-version VisibleString OPTIONAL ,
4549    other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
4550    secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,
4551
4552--  INSDSeq_project has been deprecated in favor of INSDSeq_xrefs .
4553--  This element may be be removed from a future version of this DTD.
4554
4555    project VisibleString OPTIONAL ,
4556
4557    keywords SEQUENCE OF INSDKeyword OPTIONAL ,
4558    segment VisibleString OPTIONAL ,
4559    source VisibleString OPTIONAL ,
4560    organism VisibleString OPTIONAL ,
4561    taxonomy VisibleString OPTIONAL ,
4562    references SEQUENCE OF INSDReference OPTIONAL ,
4563    comment VisibleString OPTIONAL ,
4564    comment-set SEQUENCE OF INSDComment OPTIONAL ,
4565    struc-comments SEQUENCE OF INSDStrucComment OPTIONAL ,
4566    primary VisibleString OPTIONAL ,
4567    source-db VisibleString OPTIONAL ,
4568    database-reference VisibleString OPTIONAL ,
4569    feature-table SEQUENCE OF INSDFeature OPTIONAL ,
4570    feature-set SEQUENCE OF INSDFeatureSet OPTIONAL ,
4571    sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
4572    contig VisibleString OPTIONAL ,
4573    alt-seq SEQUENCE OF INSDAltSeqData OPTIONAL ,
4574
4575--  INSDSeq_xrefs provides cross-references from a sequence record
4576--  to other database resources. These cross-references are at the
4577--  level of the entire record, rather than at the level of a specific
4578--  feature. These cross-references can include: BioProject, BioSample,
4579--  Sequence Read Archive, etc.
4580
4581    xrefs SEQUENCE OF INSDXref OPTIONAL
4582}
4583
4584INSDSeqid ::= VisibleString
4585
4586INSDSecondary-accn ::= VisibleString
4587
4588INSDKeyword ::= VisibleString
4589
4590-- INSDReference_position contains a string value indicating the
4591-- basepair span(s) to which a reference applies. The allowable
4592-- formats are:
4593--
4594--   X..Y  : Where X and Y are integers separated by two periods,
4595--           X >= 1 , Y <= sequence length, and X <= Y
4596--
4597--           Multiple basepair spans can exist, separated by a
4598--           semi-colon and a space. For example : 10..20; 100..500
4599--
4600--   sites : The string literal 'sites', indicating that a reference
4601--           provides sequence annotation information, but the specific
4602--           basepair spans are either not captured, or were too numerous
4603--           to record.
4604--
4605--           The 'sites' literal string is singly occuring, and
4606--            cannot be used in conjunction with any X..Y basepair spans.
4607--
4608--           'sites' is a convention utilized by GenBank, and might
4609--           not be presented in XML provided by EMBL and DDBJ.
4610--
4611--   References that lack an INSDReference_position element are not
4612--   attributed to any particular region of the sequence.
4613
4614INSDReference ::= SEQUENCE {
4615    reference VisibleString ,
4616    position VisibleString OPTIONAL ,
4617    authors SEQUENCE OF INSDAuthor OPTIONAL ,
4618    consortium VisibleString OPTIONAL ,
4619    title VisibleString OPTIONAL ,
4620    journal VisibleString ,
4621    xref SEQUENCE OF INSDXref OPTIONAL ,
4622    pubmed INTEGER OPTIONAL ,
4623    remark VisibleString OPTIONAL
4624}
4625
4626INSDAuthor ::= VisibleString
4627
4628-- INSDXref provides a method for referring to records in
4629-- other databases. INSDXref_dbname is a string value that
4630-- provides the name of the database, and INSDXref_dbname
4631-- is a string value that provides the record's identifier
4632-- in that database.
4633
4634INSDXref ::= SEQUENCE {
4635    dbname VisibleString ,
4636    id VisibleString
4637}
4638
4639INSDComment ::= SEQUENCE {
4640    type VisibleString OPTIONAL ,
4641    paragraphs SEQUENCE OF INSDCommentParagraph
4642}
4643
4644INSDCommentParagraph ::= VisibleString
4645
4646INSDStrucComment ::= SEQUENCE {
4647    name VisibleString OPTIONAL ,
4648    items SEQUENCE OF INSDStrucCommentItem
4649}
4650
4651INSDStrucCommentItem ::= SEQUENCE {
4652    tag VisibleString OPTIONAL ,
4653    value VisibleString OPTIONAL ,
4654    url VisibleString OPTIONAL
4655}
4656
4657-- INSDFeature_operator contains a string value describing
4658-- the relationship among a set of INSDInterval within
4659-- INSDFeature_intervals. The allowable formats are:
4660--
4661--   join :  The string literal 'join' indicates that the
4662--           INSDInterval intervals are biologically joined
4663--           together into a contiguous molecule.
4664--
4665--   order : The string literal 'order' indicates that the
4666--           INSDInterval intervals are in the presented
4667--           order, but they are not necessarily contiguous.
4668--
4669--   Either 'join' or 'order' is required if INSDFeature_intervals
4670--   is comprised of more than one INSDInterval .
4671
4672INSDFeatureSet ::= SEQUENCE {
4673    annot-source VisibleString OPTIONAL ,
4674    features SEQUENCE OF INSDFeature
4675}
4676
4677INSDFeature ::= SEQUENCE {
4678    key VisibleString ,
4679    location VisibleString ,
4680    intervals SEQUENCE OF INSDInterval OPTIONAL ,
4681    operator VisibleString OPTIONAL ,
4682    partial5 BOOLEAN OPTIONAL ,
4683    partial3 BOOLEAN OPTIONAL ,
4684    quals SEQUENCE OF INSDQualifier OPTIONAL ,
4685    xrefs SEQUENCE OF INSDXref OPTIONAL
4686}
4687
4688-- INSDInterval_iscomp is a boolean indicating whether
4689-- an INSDInterval_from / INSDInterval_to location
4690-- represents a location on the complement strand.
4691-- When INSDInterval_iscomp is TRUE, it essentially
4692-- confirms that a 'from' value which is greater than
4693-- a 'to' value is intentional, because the location
4694-- is on the opposite strand of the presented sequence.
4695
4696-- INSDInterval_interbp is a boolean indicating whether
4697-- a feature (such as a restriction site) is located
4698-- between two adjacent basepairs. When INSDInterval_interbp
4699-- is TRUE, the 'from' and 'to' values will differ by
4700-- exactly one base for linear molecules. For circular
4701-- molecules, if the inter-basepair position falls between
4702-- the last and the first base, then 'from' will be the
4703-- final base (equal to the length of the sequence), and
4704-- 'to' will have a value of 1.
4705
4706INSDInterval ::= SEQUENCE {
4707    from INTEGER OPTIONAL ,
4708    to INTEGER OPTIONAL ,
4709    point INTEGER OPTIONAL ,
4710    iscomp BOOLEAN OPTIONAL ,
4711    interbp BOOLEAN OPTIONAL ,
4712    accession VisibleString
4713}
4714
4715INSDQualifier ::= SEQUENCE {
4716    name VisibleString ,
4717    value VisibleString OPTIONAL
4718}
4719
4720-- INSDAltSeqData provides for sequence representations other than
4721-- literal basepair abbreviations (INSDSeq_sequence), such as the
4722-- CONTIG/CO linetype of the GenBank and EMBL flatfile formats.
4723-- It also accomodates the specification of accession-number ranges,
4724-- which are presented on a WGS master record (for the contigs and
4725-- and scaffolds of a WGS project).
4726
4727INSDAltSeqData ::= SEQUENCE {
4728    name VisibleString ,  -- e.g., contig, wgs, scaffold, cage, genome
4729    items SEQUENCE OF INSDAltSeqItem OPTIONAL
4730}
4731
4732INSDAltSeqItem ::= SEQUENCE {
4733    interval INSDInterval OPTIONAL ,
4734    isgap BOOLEAN OPTIONAL ,
4735    gap-length INTEGER OPTIONAL ,
4736    gap-type VisibleString OPTIONAL ,
4737    gap-linkage VisibleString OPTIONAL ,
4738    gap-comment VisibleString OPTIONAL ,
4739    first-accn VisibleString OPTIONAL ,
4740    last-accn VisibleString OPTIONAL ,
4741    value VisibleString OPTIONAL
4742}
4743
4744END
4745
4746--$Revision: 6.1 $
4747--**********************************************************************
4748--
4749--  ASN.1 for a tiny Bioseq in XML
4750--    basically a structured FASTA file with a few extras
4751--    in this case we drop all modularity of components
4752--      All ids are Optional - simpler structure, less checking
4753--      Components of organism are hard coded - can't easily add or change
4754--      sequence is just string whether DNA or protein
4755--  by James Ostell, 2000
4756--
4757--**********************************************************************
4758
4759NCBI-TSeq DEFINITIONS ::=
4760BEGIN
4761
4762TSeq ::= SEQUENCE {
4763	seqtype ENUMERATED {
4764		nucleotide (1),
4765		protein (2) },
4766	gi INTEGER OPTIONAL,
4767	accver VisibleString OPTIONAL,
4768	sid VisibleString OPTIONAL,
4769	local VisibleString OPTIONAL,
4770	taxid INTEGER OPTIONAL,
4771	orgname VisibleString OPTIONAL,
4772	defline VisibleString,
4773	length INTEGER,
4774	sequence VisibleString }
4775
4776TSeqSet ::= SEQUENCE OF TSeq    -- a bunch of them
4777
4778END
4779
4780--$Id: scoremat.asn,v 1.14 2011/12/21 15:29:33 kazimird Exp $
4781-- ===========================================================================
4782--
4783--                            PUBLIC DOMAIN NOTICE
4784--               National Center for Biotechnology Information
4785--
4786--  This software/database is a "United States Government Work" under the
4787--  terms of the United States Copyright Act.  It was written as part of
4788--  the author's official duties as a United States Government employee and
4789--  thus cannot be copyrighted.  This software/database is freely available
4790--  to the public for use. The National Library of Medicine and the U.S.
4791--  Government have not placed any restriction on its use or reproduction.
4792--
4793--  Although all reasonable efforts have been taken to ensure the accuracy
4794--  and reliability of the software and data, the NLM and the U.S.
4795--  Government do not and cannot warrant the performance or results that
4796--  may be obtained by using this software or data. The NLM and the U.S.
4797--  Government disclaim all warranties, express or implied, including
4798--  warranties of performance, merchantability or fitness for any particular
4799--  purpose.
4800--
4801--  Please cite the author in any work or product based on this material.
4802--
4803-- ===========================================================================
4804--
4805-- Author:  Christiam Camacho
4806--
4807-- File Description:
4808--      ASN.1 definitions for scoring matrix
4809--
4810-- ===========================================================================
4811
4812NCBI-ScoreMat DEFINITIONS ::= BEGIN
4813
4814EXPORTS    Pssm, PssmIntermediateData, PssmFinalData,
4815           PssmParameters, PssmWithParameters;
4816
4817IMPORTS    Object-id   FROM NCBI-General
4818           Seq-entry   FROM NCBI-Seqset;
4819
4820-- a rudimentary block/core-model, to be used with block-based alignment
4821-- routines and threading
4822
4823BlockProperty ::= SEQUENCE {
4824  type     INTEGER { unassigned  (0),
4825                     threshold   (1),       -- score threshold for heuristics
4826		     minscore    (2),       -- observed minimum score in CD
4827		     maxscore    (3),       -- observed maximum score in CD
4828		     meanscore   (4),       -- observed mean score in CD
4829		     variance    (5),       -- observed score variance
4830		     name       (10),       -- just name the block
4831		     is-optional(20),       -- block may not have to be used
4832                     other     (255) },
4833  intvalue  INTEGER OPTIONAL,
4834  textvalue VisibleString OPTIONAL
4835}
4836
4837CoreBlock ::= SEQUENCE {
4838  start          INTEGER,                   -- begin of block on query
4839  stop           INTEGER,                   -- end of block on query
4840  minstart       INTEGER OPTIONAL,          -- optional N-terminal extension
4841  maxstop        INTEGER OPTIONAL,          -- optional C-terminal extension
4842  property       SEQUENCE OF BlockProperty OPTIONAL
4843}
4844
4845LoopConstraint ::= SEQUENCE {
4846  minlength      INTEGER DEFAULT 0,         -- minimum length of unaligned region
4847  maxlength      INTEGER DEFAULT 100000     -- maximum length of unaligned region
4848}
4849
4850CoreDef ::= SEQUENCE {
4851  nblocks        INTEGER,                   -- number of core elements/blocks
4852  blocks         SEQUENCE OF CoreBlock,     -- nblocks locations
4853  loops          SEQUENCE OF LoopConstraint, -- (nblocks+1) constraints
4854
4855  isDiscontinuous BOOLEAN OPTIONAL,         -- is it a discontinuous domain
4856
4857  insertions SEQUENCE OF INTEGER OPTIONAL   -- positions of long insertions
4858}
4859
4860Site-annot ::= SEQUENCE {
4861  startPosition  INTEGER,                -- location of the annotation,
4862  stopPosition   INTEGER,                -- start and stop position in the
4863                                         -- PSSM
4864
4865  description    VisibleString OPTIONAL, -- holds description or names, that
4866                                         -- can be used for labels in
4867                                         -- visualization
4868
4869  type           INTEGER OPTIONAL,       -- type of the annotated feature,
4870                                         -- similarly to Align-annot in
4871                                         -- NCBI-Cdd
4872
4873  aliases        SEQUENCE OF VisibleString OPTIONAL, -- additional names for
4874                                                     -- the annotation
4875
4876  motif          VisibleString OPTIONAL, -- motif to validate mapping of sites
4877
4878  motifuse       INTEGER OPTIONAL        -- 0 for validation
4879                                         -- 1 for motif in seqloc
4880                                         -- 2 for multiple motifs in seqloc
4881}
4882
4883Site-annot-set ::= SEQUENCE OF Site-annot
4884
4885-- ===========================================================================
4886-- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
4887-- ===========================================
4888--
4889-- Two possible inputs to PSI-BLAST and formatrpsdb:
4890-- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix
4891--    of frequency ratios)
4892-- 2) PssmWithParams where pssm field contains final PSSM data (matrix of
4893--    scores and statistical parameters) - such as written by cddumper
4894--
4895-- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
4896-- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
4897-- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
4898-- statistical parameters are used to perform the search in PSI-BLAST and the
4899-- same data and the data in PssmWithParams::params::rpsdbparams is used to
4900-- build the PSSM and ultimately the RPS-BLAST database
4901--
4902--
4903--                 reads    ++++++++++++++ writes
4904-- PssmWithParams  ====>    + PSI-BLAST  + =====> PssmWithParams
4905--                          ++++++++++++++             |  ^
4906--         ^                                           |  |
4907--         |                                           |  |
4908--         +===========================================+  |
4909--                                                     |  |
4910--         +===========================================+  |
4911--         |                                              |
4912-- reads   |                                              |
4913--         v                                              |
4914--  +++++++++++++++ writes +++++++++++++++++++++++        |
4915--  | formatrpsdb | =====> | RPS-BLAST databases |        |
4916--  +++++++++++++++        +++++++++++++++++++++++        |
4917--                                   ^                    |
4918--                                   |                    |
4919--                                   | reads              |
4920--                             +++++++++++++              |
4921--                             | RPS-BLAST |              |
4922--                             +++++++++++++              |
4923--                                                        |
4924--       reads  ++++++++++++               writes         |
4925--  Cdd ======> | cddumper | =============================+
4926--              ++++++++++++
4927--
4928-- ===========================================================================
4929
4930-- Contains the PSSM's scores and its associated statistical parameters.
4931-- Dimensions and order in which scores are stored must be the same as that
4932-- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
4933PssmFinalData ::= SEQUENCE {
4934
4935    -- PSSM's scores
4936    scores              SEQUENCE OF INTEGER,
4937
4938    -- Karlin & Altschul parameter produced during the PSSM's calculation
4939    lambda              REAL,
4940
4941    -- Karlin & Altschul parameter produced during the PSSM's calculation
4942	kappa               REAL,
4943
4944    -- Karlin & Altschul parameter produced during the PSSM's calculation
4945    h                   REAL,
4946
4947    -- scaling factor used to obtain more precision when building the PSSM.
4948    -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
4949    -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
4950    -- given a PSSM which contains a scaled-up PSSM (indicated by having a
4951    -- scalingFactor greater than 1), then it will scale down the PSSM to
4952    -- perform the initial stages of the search with it.
4953    -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided
4954    -- scaled-up PSSMs, it will ensure that all PSSMs used to build the
4955    -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST
4956    -- will silently produce incorrect results).
4957    scalingFactor       INTEGER DEFAULT 1,
4958
4959    -- Karlin & Altschul parameter produced during the PSSM's calculation
4960    lambdaUngapped      REAL OPTIONAL,
4961
4962    -- Karlin & Altschul parameter produced during the PSSM's calculation
4963	kappaUngapped       REAL OPTIONAL,
4964
4965    -- Karlin & Altschul parameter produced during the PSSM's calculation
4966    hUngapped           REAL OPTIONAL
4967}
4968
4969-- Contains the PSSM's intermediate data used to create the PSSM's scores
4970-- and statistical parameters. Dimensions and order in which scores are
4971-- stored must be the same as that specified in Pssm::numRows,
4972-- Pssm::numColumns, and Pssm::byrow
4973PssmIntermediateData ::= SEQUENCE {
4974
4975    -- observed residue frequencies (or counts) per position of the PSSM
4976    -- (prior to application of pseudocounts)
4977    resFreqsPerPos              SEQUENCE OF INTEGER OPTIONAL,
4978
4979    -- Weighted observed residue frequencies per position of the PSSM.
4980    -- (N.B.: each position's weights should add up to 1.0).
4981    -- This field corresponds to f_i (f sub i) in equation 2 of
4982    -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
4983    -- NOTE: this is needed for diagnostics information only (i.e.:
4984    -- -out_ascii_pssm option in psiblast)
4985    weightedResFreqsPerPos      SEQUENCE OF REAL OPTIONAL,
4986
4987    -- PSSM's frequency ratios
4988    freqRatios                  SEQUENCE OF REAL,
4989
4990    -- Information content per position of the PSSM
4991    -- NOTE: this is needed for diagnostics information only (i.e.:
4992    -- -out_ascii_pssm option in psiblast)
4993    informationContent          SEQUENCE OF REAL OPTIONAL,
4994
4995    -- Relative weight for columns of the PSSM without gaps to pseudocounts
4996    -- NOTE: this is needed for diagnostics information only (i.e.:
4997    -- -out_ascii_pssm option in psiblast)
4998    gaplessColumnWeights        SEQUENCE OF REAL OPTIONAL,
4999
5000    -- Used in sequence weights computation
5001    -- NOTE: this is needed for diagnostics information only (i.e.:
5002    -- -out_ascii_pssm option in psiblast)
5003    sigma                       SEQUENCE OF REAL OPTIONAL,
5004
5005    -- Length of the aligned regions per position of the query sequence
5006    -- NOTE: this is needed for diagnostics information only (i.e.:
5007    -- -out_ascii_pssm option in psiblast)
5008    intervalSizes               SEQUENCE OF INTEGER OPTIONAL,
5009
5010    -- Number of matching sequences per position of the PSSM (including the
5011    -- query)
5012    -- NOTE: this is needed for diagnostics information only (i.e.:
5013    -- -out_ascii_pssm option in psiblast)
5014    numMatchingSeqs             SEQUENCE OF INTEGER OPTIONAL,
5015
5016    -- Number of independent observations per position of the PSSM
5017    -- NOTE: this is needed for building CDD database for DELTA-BLAST
5018    numIndeptObsr               SEQUENCE OF REAL OPTIONAL
5019}
5020
5021-- Position-specific scoring matrix
5022--
5023-- Column indices on the PSSM refer to the positions corresponding to the
5024-- query/master sequence, i.e. the number of columns (N) is the same
5025-- as the length of the query/master sequence.
5026-- Row indices refer to individual amino acid types, i.e. the number of
5027-- rows (M) is the same as the number of different residues in the
5028-- alphabet we use. Consequently, row labels are amino acid identifiers.
5029--
5030-- PSSMs are stored as linear arrays of integers. By default, we store
5031-- them column-by-column, M values for the first column followed by M
5032-- values for the second column, and so on. In order to provide
5033-- flexibility for external applications, the boolean field "byrow" is
5034-- provided to specify the storage order.
5035Pssm ::= SEQUENCE {
5036
5037    -- Is the this a protein or nucleotide scoring matrix?
5038    isProtein       BOOLEAN DEFAULT TRUE,
5039
5040    -- PSSM identifier
5041    identifier      Object-id OPTIONAL,
5042
5043    -- The dimensions of the matrix are returned so the client can
5044    -- verify that all data was received.
5045
5046    numRows         INTEGER,	-- number of rows
5047    numColumns      INTEGER,	-- number of columns
5048
5049    -- row-labels is given to note the order of residue types so that it can
5050    -- be cross-checked between applications.
5051    -- If this field is not given, the matrix values are presented in
5052    -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
5053    -- for proteins the values returned correspond to
5054    -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
5055    rowLabels       SEQUENCE OF VisibleString OPTIONAL,
5056
5057    -- are matrices stored row by row?
5058    byRow           BOOLEAN DEFAULT FALSE,
5059
5060    -- PSSM representative sequence (master)
5061    query           Seq-entry OPTIONAL,
5062
5063    -- both intermediateData and finalData can be provided, but at least one of
5064    -- them must be provided.
5065    -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData
5066    -- representation.
5067
5068    -- Intermediate or final data for the PSSM
5069    intermediateData    PssmIntermediateData OPTIONAL,
5070
5071    -- Final representation for the PSSM
5072    finalData           PssmFinalData OPTIONAL
5073}
5074
5075-- This structure is used to create the RPS-BLAST database auxiliary file
5076-- (*.aux) and it contains parameters set at creation time of the PSSM.
5077-- Also, the matrixName field is used by formatrpsdb to build a PSSM from
5078-- a Pssm structure which only contains PssmIntermediateData.
5079FormatRpsDbParameters ::= SEQUENCE {
5080
5081    -- name of the underlying score matrix whose frequency ratios were
5082    -- used in PSSM construction (e.g.: BLOSUM62)
5083    matrixName   VisibleString,
5084
5085    -- gap opening penalty corresponding to the matrix above
5086    gapOpen      INTEGER OPTIONAL,
5087
5088    -- gap extension penalty corresponding to the matrix above
5089    gapExtend    INTEGER OPTIONAL
5090
5091}
5092
5093-- Populated by PSSM engine of PSI-BLAST, original source for these values
5094-- are the PSI-BLAST options specified using the BLAST options API
5095PssmParameters ::= SEQUENCE {
5096
5097    -- pseudocount constant used for PSSM. This field corresponds to beta in
5098    -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
5099    pseudocount INTEGER OPTIONAL,
5100
5101    -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
5102    -- populated by PSI-BLAST
5103    rpsdbparams     FormatRpsDbParameters OPTIONAL,
5104
5105    -- alignment constraints needed by sequence-structure threader
5106    -- and other global or local block-alignment algorithms
5107    constraints     CoreDef OPTIONAL,
5108
5109    -- bit score threshold for specific conserved domain hits
5110    bitScoreThresh  REAL OPTIONAL,
5111
5112    -- conserved functional sites with annotations
5113    annotatedSites  Site-annot-set OPTIONAL
5114}
5115
5116-- Envelope containing PSSM and the parameters used to create it.
5117-- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
5118PssmWithParameters ::= SEQUENCE {
5119
5120    -- This field is applicable to PSI-BLAST and formatrpsdb.
5121    -- When both the intermediate and final PSSM data are provided in this
5122    -- field, the final data (matrix of scores and associated statistical
5123    -- parameters) takes precedence and that data is used for further
5124    -- processing. The rationale for this is that the PSSM's scores and
5125    -- statistical parameters might have been calculated by other applications
5126    -- and it might not be possible to recreate it by using PSI-BLAST's PSSM
5127    -- engine.
5128	pssm        Pssm,
5129
5130    -- This field's rpsdbparams is used to specify the values of options
5131    -- for processing by formatrpsdb. If these are not set, the command
5132    -- line defaults of formatrpsdb are applied. This field is used
5133    -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
5134    -- the PSSM is the same as the one being specified through the BLAST
5135    -- Options API. If this field is omitted, no verification will be
5136    -- performed, so be careful to keep track of what matrix was used to build
5137    -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
5138    params      PssmParameters OPTIONAL
5139}
5140
5141END
5142--$Revision: 1.167 $
5143--**********************************************************************
5144--
5145--  NCBI ASN.1 macro editing language specifications
5146--
5147--  by Colleen Bollin, 2007
5148--
5149--**********************************************************************
5150
5151NCBI-Macro DEFINITIONS ::=
5152BEGIN
5153
5154EXPORTS AECR-action, Parse-action, Macro-action-list, Suspect-rule-set;
5155
5156-- simple constraints --
5157
5158String-location ::= ENUMERATED {
5159    contains (1) ,
5160    equals (2) ,
5161    starts (3) ,
5162    ends (4) ,
5163    inlist (5) }
5164
5165Word-substitution ::= SEQUENCE {
5166    word VisibleString OPTIONAL ,
5167    synonyms SET OF VisibleString OPTIONAL ,
5168    case-sensitive BOOLEAN DEFAULT FALSE ,
5169    whole-word BOOLEAN DEFAULT FALSE }
5170
5171Word-substitution-set ::= SET OF Word-substitution
5172
5173String-constraint ::= SEQUENCE {
5174    match-text VisibleString OPTIONAL ,
5175    match-location String-location DEFAULT contains ,
5176    case-sensitive BOOLEAN DEFAULT FALSE ,
5177    ignore-space BOOLEAN DEFAULT FALSE ,
5178    ignore-punct BOOLEAN DEFAULT FALSE ,
5179    ignore-words Word-substitution-set OPTIONAL ,
5180    whole-word BOOLEAN DEFAULT FALSE ,
5181    not-present BOOLEAN DEFAULT FALSE ,
5182    is-all-caps BOOLEAN DEFAULT FALSE ,
5183    is-all-lower BOOLEAN DEFAULT FALSE ,
5184    is-all-punct BOOLEAN DEFAULT FALSE ,
5185    ignore-weasel BOOLEAN DEFAULT FALSE ,
5186    is-first-cap BOOLEAN DEFAULT FALSE ,
5187    is-first-each-cap BOOLEAN DEFAULT FALSE }
5188
5189String-constraint-set ::= SET OF String-constraint
5190
5191Strand-constraint ::= ENUMERATED {
5192    any (0) ,
5193    plus (1) ,
5194    minus (2) }
5195
5196Seqtype-constraint ::= ENUMERATED {
5197    any (0) ,
5198    nuc (1) ,
5199    prot (2) }
5200
5201Partial-constraint ::= ENUMERATED {
5202    either (0) ,
5203    partial (1) ,
5204    complete (2) }
5205
5206Location-type-constraint ::= ENUMERATED {
5207    any (0) ,
5208    single-interval (1) ,
5209    joined (2) ,
5210    ordered (3) }
5211
5212Location-pos-constraint ::= CHOICE {
5213    dist-from-end INTEGER ,
5214    max-dist-from-end INTEGER ,
5215    min-dist-from-end INTEGER }
5216
5217Location-constraint ::= SEQUENCE {
5218    strand Strand-constraint DEFAULT any ,
5219    seq-type Seqtype-constraint DEFAULT any ,
5220    partial5 Partial-constraint DEFAULT either ,
5221    partial3 Partial-constraint DEFAULT either ,
5222    location-type Location-type-constraint DEFAULT any ,
5223    end5 Location-pos-constraint OPTIONAL ,
5224    end3 Location-pos-constraint OPTIONAL }
5225
5226Object-type-constraint ::= ENUMERATED {
5227    any (0) ,
5228    feature (1) ,
5229    descriptor (2) }
5230
5231-- feature values --
5232
5233Macro-feature-type ::= ENUMERATED {
5234    any (0) ,
5235    gene (1) ,
5236    org (2) ,
5237    cds (3) ,
5238    prot (4) ,
5239    preRNA (5) ,
5240    mRNA (6) ,
5241    tRNA (7) ,
5242    rRNA (8) ,
5243    snRNA (9) ,
5244    scRNA (10) ,
5245    otherRNA (11) ,
5246    pub (12) ,
5247    seq (13) ,
5248    imp (14) ,
5249    allele (15) ,
5250    attenuator (16) ,
5251    c-region (17) ,
5252    caat-signal (18) ,
5253    imp-CDS (19) ,
5254    conflict (20) ,
5255    d-loop (21) ,
5256    d-segment (22) ,
5257    enhancer (23) ,
5258    exon (24) ,
5259    gC-signal (25) ,
5260    iDNA (26) ,
5261    intron (27) ,
5262    j-segment (28) ,
5263    ltr (29) ,
5264    mat-peptide (30) ,
5265    misc-binding (31) ,
5266    misc-difference (32) ,
5267    misc-feature (33) ,
5268    misc-recomb (34) ,
5269    misc-RNA (35) ,
5270    misc-signal (36) ,
5271    misc-structure (37) ,
5272    modified-base (38) ,
5273    mutation (39) ,
5274    n-region (40) ,
5275    old-sequence (41) ,
5276    polyA-signal (42) ,
5277    polyA-site (43) ,
5278    precursor-RNA (44) ,
5279    prim-transcript (45) ,
5280    primer-bind (46) ,
5281    promoter (47) ,
5282    protein-bind (48) ,
5283    rbs (49) ,
5284    repeat-region (50) ,
5285    rep-origin (51) ,
5286    s-region (52) ,
5287    sig-peptide (53) ,
5288    source (54) ,
5289    stem-loop (55) ,
5290    sts (56) ,
5291    tata-signal (57) ,
5292    terminator (58) ,
5293    transit-peptide (59) ,
5294    unsure (60) ,
5295    v-region (61) ,
5296    v-segment (62) ,
5297    variation (63) ,
5298    virion (64) ,
5299    n3clip (65) ,
5300    n3UTR (66) ,
5301    n5clip (67) ,
5302    n5UTR (68) ,
5303    n10-signal (69) ,
5304    n35-signal (70) ,
5305    site-ref (71) ,
5306    region (72) ,
5307    comment (73) ,
5308    bond (74) ,
5309    site (75) ,
5310    rsite (76) ,
5311    user (77) ,
5312    txinit (78) ,
5313    num (79) ,
5314    psec-str (80) ,
5315    non-std-residue (81) ,
5316    het (82) ,
5317    biosrc (83) ,
5318    preprotein (84) ,
5319    mat-peptide-aa (85) ,
5320    sig-peptide-aa (86) ,
5321    transit-peptide-aa (87) ,
5322    snoRNA (88) ,
5323    gap (89) ,
5324    operon (90) ,
5325    oriT (91) ,
5326    ncRNA (92) ,
5327    tmRNA (93) ,
5328    mobile-element (94) ,
5329    regulatory (95) }
5330
5331Feat-qual-legal ::= ENUMERATED {
5332    allele (1) ,
5333    activity (2) ,
5334    anticodon (3) ,
5335    bound-moiety (4) ,
5336    chromosome (5),
5337    citation (6),
5338    codon (7) ,
5339    codon-start (8) ,
5340    codons-recognized (9) ,
5341    compare (10) ,
5342    cons-splice (11) ,
5343    db-xref (12) ,
5344    description (13) ,
5345    direction (14) ,
5346    ec-number (15) ,
5347    environmental-sample (16) ,
5348    evidence (17) ,
5349    exception (18) ,
5350    experiment (19) ,
5351    focus (20) ,
5352    frequency (21) ,
5353    function (22) ,
5354    gene (23) ,
5355    gene-description (24) ,
5356    inference (25) ,
5357    label (26) ,
5358    locus-tag (27) ,
5359    map (28) ,
5360    mobile-element (29) ,
5361    mod-base (30) ,
5362    mol-type (31) ,
5363    ncRNA-class (32) ,
5364    note (33) ,
5365    number (34) ,
5366    old-locus-tag (35) ,
5367    operon (36) ,
5368    organism (37) ,
5369    organelle (38) ,
5370    partial (39) ,
5371    phenotype (40) ,
5372    plasmid (41) ,
5373    product (42) ,
5374    protein-id (43) ,
5375    pseudo (44) ,
5376    rearranged (45) ,
5377    replace (46) ,
5378    rpt-family (47) ,
5379    rpt-type (48) ,
5380    rpt-unit (49) ,
5381    rpt-unit-seq (50) ,
5382    rpt-unit-range (51) ,
5383    segment (52) ,
5384    sequenced-mol (53) ,
5385    standard-name (54) ,
5386    synonym (55) ,
5387    transcript-id (56) ,
5388    transgenic (57) ,
5389    translation (58) ,
5390    transl-except (59) ,
5391    transl-table (60) ,
5392    usedin (61),
5393    mobile-element-type (62),
5394    mobile-element-name (63),
5395    gene-comment (64) ,
5396    satellite (65) ,
5397    satellite-type (66) ,
5398    satellite-name (67) ,
5399    location (68) ,
5400    tag-peptide (69) ,
5401    mobile-element-type-type (70) ,
5402    name (71) ,
5403    pcr-conditions (72) ,
5404    regulatory-class (73) }
5405
5406Feat-qual-legal-val ::= SEQUENCE {
5407    qual Feat-qual-legal ,
5408    val  VisibleString }
5409
5410Feat-qual-legal-val-choice ::= CHOICE {
5411    qual Feat-qual-legal-val }
5412
5413Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice
5414
5415Feat-qual-choice ::= CHOICE {
5416    legal-qual Feat-qual-legal ,
5417    illegal-qual String-constraint }
5418
5419Feature-field ::= SEQUENCE {
5420    type Macro-feature-type ,
5421    field Feat-qual-choice }
5422
5423Feature-field-legal ::= SEQUENCE {
5424    type Macro-feature-type ,
5425    field Feat-qual-legal }
5426
5427Feature-field-pair ::= SEQUENCE {
5428    type Macro-feature-type ,
5429    field-from Feat-qual-choice ,
5430    field-to Feat-qual-choice }
5431
5432Rna-feat-type ::= CHOICE {
5433    any NULL ,
5434    preRNA NULL ,
5435    mRNA NULL ,
5436    tRNA NULL ,
5437    rRNA NULL ,
5438    ncRNA VisibleString ,
5439    tmRNA NULL,
5440    miscRNA NULL }
5441
5442Rna-field ::= ENUMERATED {
5443    product (1) ,
5444    comment (2) ,
5445    codons-recognized (3) ,
5446    ncrna-class (4) ,
5447    anticodon (5) ,
5448    transcript-id (6) ,
5449    gene-locus (7) ,
5450    gene-description (8) ,
5451    gene-maploc (9) ,
5452    gene-locus-tag (10) ,
5453    gene-synonym (11) ,
5454    gene-comment (12) ,
5455    tag-peptide (13) }
5456
5457Rna-qual ::= SEQUENCE {
5458    type Rna-feat-type ,
5459    field Rna-field }
5460
5461Rna-qual-pair ::= SEQUENCE {
5462    type Rna-feat-type ,
5463    field-from Rna-field ,
5464    field-to Rna-field }
5465
5466Source-qual ::= ENUMERATED {
5467    acronym (1) ,
5468    anamorph (2) ,
5469    authority (3) ,
5470    bio-material (4) ,
5471    biotype (5) ,
5472    biovar (6) ,
5473    breed (7) ,
5474    cell-line (8) ,
5475    cell-type (9) ,
5476    chemovar (10) ,
5477    chromosome (11) ,
5478    clone (12) ,
5479    clone-lib (13) ,
5480    collected-by (14) ,
5481    collection-date (15) ,
5482    common (16) ,
5483    common-name (17) ,
5484    country (18) ,
5485    cultivar (19) ,
5486    culture-collection (20) ,
5487    dev-stage (21) ,
5488    division (22) ,
5489    dosage (23) ,
5490    ecotype (24) ,
5491    endogenous-virus-name (25) ,
5492    environmental-sample (26) ,
5493    forma (27) ,
5494    forma-specialis (28) ,
5495    frequency (29) ,
5496    fwd-primer-name (30) ,
5497    fwd-primer-seq (31) ,
5498    gb-acronym (32) ,
5499    gb-anamorph (33) ,
5500    gb-synonym (34) ,
5501    genotype (35) ,
5502    germline (36) ,
5503    group (37) ,
5504    haplotype (38) ,
5505    identified-by (39) ,
5506    insertion-seq-name (40) ,
5507    isolate (41) ,
5508    isolation-source (42) ,
5509    lab-host (43) ,
5510    lat-lon (44) ,
5511    lineage (45) ,
5512    map (46) ,
5513    metagenome-source (47) ,
5514    metagenomic (48) ,
5515    old-lineage (49) ,
5516    old-name (50) ,
5517    orgmod-note (51) ,
5518    nat-host (52) ,
5519    pathovar (53) ,
5520    plasmid-name (54) ,
5521    plastid-name (55) ,
5522    pop-variant (56) ,
5523    rearranged (57) ,
5524    rev-primer-name (58) ,
5525    rev-primer-seq (59) ,
5526    segment (60) ,
5527    serogroup (61) ,
5528    serotype (62) ,
5529    serovar (63) ,
5530    sex (64) ,
5531    specimen-voucher (65) ,
5532    strain (66) ,
5533    subclone (67) ,
5534    subgroup (68) ,
5535    subsource-note (69),
5536    sub-species (70) ,
5537    substrain (71) ,
5538    subtype (72) ,
5539    synonym (73) ,
5540    taxname (74) ,
5541    teleomorph (75) ,
5542    tissue-lib (76) ,
5543    tissue-type (77) ,
5544    transgenic (78) ,
5545    transposon-name (79) ,
5546    type (80) ,
5547    variety (81) ,
5548    specimen-voucher-INST (82) ,
5549    specimen-voucher-COLL (83) ,
5550    specimen-voucher-SpecID (84) ,
5551    culture-collection-INST (85) ,
5552    culture-collection-COLL (86) ,
5553    culture-collection-SpecID (87) ,
5554    bio-material-INST (88) ,
5555    bio-material-COLL (89) ,
5556    bio-material-SpecID (90),
5557    all-notes (91),
5558    mating-type (92),
5559    linkage-group (93) ,
5560    haplogroup (94),
5561    all-quals (95),
5562    dbxref (96) ,
5563    taxid (97) ,
5564    all-primers (98) ,
5565    altitude (99) ,
5566    type-material (100)
5567}
5568
5569Source-qual-pair ::= SEQUENCE {
5570    field-from Source-qual ,
5571    field-to Source-qual }
5572
5573Source-location ::= ENUMERATED {
5574    unknown (0) ,
5575    genomic (1) ,
5576    chloroplast (2) ,
5577    chromoplast (3) ,
5578    kinetoplast (4) ,
5579    mitochondrion (5) ,
5580    plastid (6) ,
5581    macronuclear (7) ,
5582    extrachrom (8) ,
5583    plasmid (9) ,
5584    transposon (10) ,
5585    insertion-seq (11) ,
5586    cyanelle (12) ,
5587    proviral (13) ,
5588    virion (14) ,
5589    nucleomorph (15) ,
5590    apicoplast (16) ,
5591    leucoplast (17) ,
5592    proplastid (18) ,
5593    endogenous-virus (19) ,
5594    hydrogenosome (20) ,
5595    chromosome (21) ,
5596    chromatophore (22) }
5597
5598Source-origin ::= ENUMERATED {
5599    unknown (0) ,
5600    natural (1) ,
5601    natmut (2) ,
5602    mut (3) ,
5603    artificial (4) ,
5604    synthetic (5) ,
5605    other (255) }
5606
5607Source-qual-choice ::= CHOICE {
5608    textqual Source-qual ,
5609    location Source-location,
5610    origin Source-origin ,
5611    gcode INTEGER  ,
5612    mgcode INTEGER  }
5613
5614Source-qual-text-val ::= SEQUENCE {
5615    srcqual Source-qual ,
5616    val VisibleString }
5617
5618Source-qual-val-choice ::= CHOICE {
5619    textqual Source-qual-text-val ,
5620    location Source-location,
5621    origin Source-origin ,
5622    gcode INTEGER ,
5623    mgcode INTEGER }
5624
5625Source-qual-val-set ::= SET OF Source-qual-val-choice
5626
5627CDSGeneProt-field ::= ENUMERATED {
5628    cds-comment (1) ,
5629    gene-locus (2) ,
5630    gene-description (3) ,
5631    gene-comment (4) ,
5632    gene-allele (5) ,
5633    gene-maploc (6) ,
5634    gene-locus-tag (7) ,
5635    gene-synonym (8) ,
5636    gene-old-locus-tag (9) ,
5637    mrna-product (10) ,
5638    mrna-comment (11) ,
5639    prot-name (12) ,
5640    prot-description (13) ,
5641    prot-ec-number (14) ,
5642    prot-activity (15) ,
5643    prot-comment (16) ,
5644    mat-peptide-name (17) ,
5645    mat-peptide-description (18) ,
5646    mat-peptide-ec-number (19) ,
5647    mat-peptide-activity (20) ,
5648    mat-peptide-comment (21) ,
5649    cds-inference (22) ,
5650    gene-inference (23) ,
5651    codon-start (24) }
5652
5653CDSGeneProt-field-pair ::= SEQUENCE {
5654    field-from CDSGeneProt-field ,
5655    field-to CDSGeneProt-field }
5656
5657Molecule-type ::= ENUMERATED {
5658  unknown (0) ,
5659  genomic (1) ,
5660  precursor-RNA (2) ,
5661  mRNA (3) ,
5662  rRNA (4) ,
5663  tRNA (5) ,
5664  genomic-mRNA (6) ,
5665  cRNA (7) ,
5666  transcribed-RNA (8) ,
5667  ncRNA (9) ,
5668  transfer-messenger-RNA (10) ,
5669  macro-other (11) }
5670
5671Technique-type ::= ENUMERATED {
5672  unknown (0) ,
5673  standard (1) ,
5674  est (2) ,
5675  sts (3) ,
5676  survey (4) ,
5677  genetic-map (5) ,
5678  physical-map (6) ,
5679  derived (7) ,
5680  concept-trans (8) ,
5681  seq-pept (9) ,
5682  both (10) ,
5683  seq-pept-overlap (11) ,
5684  seq-pept-homol (12) ,
5685  concept-trans-a (13) ,
5686  htgs-1 (14) ,
5687  htgs-2 (15) ,
5688  htgs-3 (16) ,
5689  fli-cDNA (17) ,
5690  htgs-0 (18) ,
5691  htc (19) ,
5692  wgs (20) ,
5693  barcode (21) ,
5694  composite-wgs-htgs (22) ,
5695  tsa (23) ,
5696  targeted (24) ,
5697  other (25) }
5698
5699Completedness-type ::= ENUMERATED {
5700  unknown (0) ,
5701  complete (1) ,
5702  partial (2) ,
5703  no-left (3) ,
5704  no-right (4) ,
5705  no-ends (5) ,
5706  has-left (6) ,
5707  has-right (7) ,
5708  other (6) }
5709
5710Molecule-class-type ::= ENUMERATED {
5711  unknown (0) ,
5712  dna (1) ,
5713  rna (2) ,
5714  protein (3) ,
5715  nucleotide (4),
5716  other (5) }
5717
5718Topology-type ::= ENUMERATED {
5719  unknown (0) ,
5720  linear (1) ,
5721  circular (2) ,
5722  tandem (3) ,
5723  other (4) }
5724
5725Strand-type ::= ENUMERATED {
5726  unknown (0) ,
5727  single (1) ,
5728  double (2) ,
5729  mixed (3) ,
5730  mixed-rev (4) ,
5731  other (5) }
5732
5733Molinfo-field ::= CHOICE {
5734    molecule Molecule-type ,
5735    technique Technique-type ,
5736    completedness Completedness-type ,
5737    mol-class Molecule-class-type ,
5738    topology Topology-type ,
5739    strand Strand-type }
5740
5741Molinfo-molecule-pair ::= SEQUENCE {
5742    from Molecule-type ,
5743    to Molecule-type }
5744
5745Molinfo-technique-pair ::= SEQUENCE {
5746    from Technique-type ,
5747    to Technique-type }
5748
5749Molinfo-completedness-pair ::= SEQUENCE {
5750    from Completedness-type ,
5751    to Completedness-type }
5752
5753Molinfo-mol-class-pair ::= SEQUENCE {
5754    from Molecule-class-type ,
5755    to Molecule-class-type }
5756
5757Molinfo-topology-pair ::= SEQUENCE {
5758    from Topology-type ,
5759    to Topology-type }
5760
5761Molinfo-strand-pair ::= SEQUENCE {
5762    from Strand-type ,
5763    to Strand-type }
5764
5765Molinfo-field-pair ::= CHOICE {
5766    molecule Molinfo-molecule-pair ,
5767    technique Molinfo-technique-pair ,
5768    completedness Molinfo-completedness-pair ,
5769    mol-class Molinfo-mol-class-pair ,
5770    topology Molinfo-topology-pair ,
5771    strand Molinfo-strand-pair }
5772
5773Molinfo-field-list ::= SET OF Molinfo-field
5774
5775Molinfo-field-constraint ::= SEQUENCE {
5776    field Molinfo-field ,
5777    is-not BOOLEAN DEFAULT FALSE }
5778
5779-- publication fields --
5780
5781Publication-field ::=  ENUMERATED {
5782    cit (1) ,
5783    authors (2) ,
5784    journal (3) ,
5785    volume (4) ,
5786    issue (5) ,
5787    pages (6) ,
5788    date (7) ,
5789    serial-number (8) ,
5790    title (9) ,
5791    affiliation (10) ,
5792    affil-div (11) ,
5793    affil-city (12) ,
5794    affil-sub (13) ,
5795    affil-country (14) ,
5796    affil-street (15) ,
5797    affil-email (16) ,
5798    affil-fax (17) ,
5799    affil-phone (18) ,
5800    affil-zipcode (19),
5801    authors-initials (20),
5802    pmid (21),
5803    pub-class (22)
5804    }
5805
5806-- structured comment fields --
5807
5808Structured-comment-field ::= CHOICE {
5809  database NULL ,
5810  named VisibleString ,
5811  field-name NULL
5812  }
5813
5814Structured-comment-field-pair ::= SEQUENCE {
5815  from Structured-comment-field ,
5816  to Structured-comment-field
5817  }
5818
5819-- misc fields --
5820-- these would not appear in pairs --
5821Misc-field ::= ENUMERATED {
5822    genome-project-id (1) ,
5823    comment-descriptor (2) ,
5824    defline (3) ,
5825    keyword (4)
5826    }
5827
5828-- dblink fields --
5829DBLink-field-type ::= ENUMERATED {
5830  trace-assembly (1) ,
5831  bio-sample (2) ,
5832  probe-db (3) ,
5833  sequence-read-archve (4) ,
5834  bio-project (5) ,
5835  assembly (6) }
5836
5837DBLink-field-pair ::= SEQUENCE {
5838  from DBLink-field-type ,
5839  to DBLink-field-type
5840  }
5841
5842-- complex constraints --
5843
5844Pub-type ::= ENUMERATED {
5845  any (0) ,
5846  published (1) ,
5847  unpublished (2) ,
5848  in-press (3) ,
5849  submitter-block (4) }
5850
5851Pub-field-constraint ::= SEQUENCE {
5852  field Publication-field ,
5853  constraint String-constraint }
5854
5855Pub-field-special-constraint-type ::= CHOICE {
5856  is-present NULL ,
5857  is-not-present NULL ,
5858  is-all-caps NULL ,
5859  is-all-lower NULL ,
5860  is-all-punct NULL }
5861
5862Pub-field-special-constraint ::= SEQUENCE {
5863  field Publication-field ,
5864  constraint Pub-field-special-constraint-type }
5865
5866Publication-constraint ::= SEQUENCE {
5867  type Pub-type ,
5868  field Pub-field-constraint OPTIONAL ,
5869  special-field Pub-field-special-constraint OPTIONAL }
5870
5871Source-constraint ::= SEQUENCE {
5872  field1 Source-qual-choice OPTIONAL ,
5873  field2 Source-qual-choice OPTIONAL ,
5874  constraint String-constraint OPTIONAL ,
5875  type-constraint Object-type-constraint OPTIONAL }
5876
5877CDSGeneProt-feature-type-constraint ::= ENUMERATED {
5878    gene (1) ,
5879    mRNA (2) ,
5880    cds (3) ,
5881    prot (4) ,
5882    exon (5) ,
5883    mat-peptide (6) }
5884
5885CDSGeneProt-pseudo-constraint ::= SEQUENCE {
5886    feature CDSGeneProt-feature-type-constraint ,
5887    is-pseudo BOOLEAN DEFAULT TRUE }
5888
5889CDSGeneProt-constraint-field ::= CHOICE {
5890  field CDSGeneProt-field }
5891
5892CDSGeneProt-qual-constraint ::= SEQUENCE {
5893  field1 CDSGeneProt-constraint-field OPTIONAL ,
5894  field2 CDSGeneProt-constraint-field OPTIONAL ,
5895  constraint String-constraint OPTIONAL }
5896
5897Field-constraint ::= SEQUENCE {
5898  field Field-type ,
5899  string-constraint String-constraint }
5900
5901Sequence-constraint-rnamol ::= ENUMERATED {
5902  any (0) ,
5903  genomic (1) ,
5904  precursor-RNA (2) ,
5905  mRNA (3) ,
5906  rRNA (4) ,
5907  tRNA (5) ,
5908  genomic-mRNA (6) ,
5909  cRNA (7) ,
5910  transcribed-RNA (8) ,
5911  ncRNA (9) ,
5912  transfer-messenger-RNA (10) }
5913
5914Sequence-constraint-mol-type-constraint ::= CHOICE {
5915  any NULL ,
5916  nucleotide NULL ,
5917  dna NULL ,
5918  rna Sequence-constraint-rnamol ,
5919  protein NULL }
5920
5921Quantity-constraint ::= CHOICE {
5922  equals INTEGER ,
5923  greater-than INTEGER ,
5924  less-than INTEGER }
5925
5926Feature-strandedness-constraint ::= ENUMERATED {
5927  any (0) ,
5928  minus-only (1) ,
5929  plus-only (2) ,
5930  at-least-one-minus (3) ,
5931  at-least-one-plus (4) ,
5932  no-minus (5) ,
5933  no-plus (6) }
5934
5935Sequence-constraint ::= SEQUENCE {
5936    seqtype Sequence-constraint-mol-type-constraint OPTIONAL ,
5937    id String-constraint OPTIONAL ,
5938    feature Macro-feature-type ,
5939    num-type-features Quantity-constraint OPTIONAL ,
5940    num-features Quantity-constraint OPTIONAL ,
5941    length Quantity-constraint OPTIONAL ,
5942    strandedness Feature-strandedness-constraint DEFAULT any }
5943
5944Match-type-constraint ::= ENUMERATED {
5945  dont-care (0) ,
5946  yes (1) ,
5947  no (2) }
5948
5949Translation-constraint ::= SEQUENCE {
5950  actual-strings String-constraint-set ,
5951  transl-strings String-constraint-set ,
5952  internal-stops Match-type-constraint DEFAULT dont-care ,
5953  num-mismatches Quantity-constraint OPTIONAL }
5954
5955Constraint-choice ::= CHOICE {
5956    string String-constraint ,
5957    location Location-constraint ,
5958    field  Field-constraint ,
5959    source Source-constraint ,
5960    cdsgeneprot-qual CDSGeneProt-qual-constraint ,
5961    cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint ,
5962    sequence Sequence-constraint ,
5963    pub Publication-constraint ,
5964    molinfo Molinfo-field-constraint ,
5965    field-missing Field-type ,
5966    translation Translation-constraint }
5967
5968Constraint-choice-set ::= SET OF Constraint-choice
5969
5970Text-marker ::= CHOICE {
5971    free-text VisibleString ,
5972    digits NULL ,
5973    letters NULL }
5974
5975Text-portion ::= SEQUENCE {
5976    left-marker Text-marker  OPTIONAL ,
5977    include-left BOOLEAN ,
5978    right-marker Text-marker  OPTIONAL ,
5979    include-right BOOLEAN ,
5980    inside BOOLEAN ,
5981    case-sensitive BOOLEAN DEFAULT FALSE ,
5982    whole-word BOOLEAN DEFAULT FALSE }
5983
5984Field-edit-location ::= ENUMERATED {
5985    anywhere (0) ,
5986    beginning (1) ,
5987    end (2) }
5988
5989Field-edit ::= SEQUENCE {
5990    find-txt VisibleString ,
5991    repl-txt VisibleString OPTIONAL ,
5992    location Field-edit-location DEFAULT anywhere ,
5993    case-insensitive BOOLEAN DEFAULT FALSE }
5994
5995Field-type ::= CHOICE {
5996    source-qual Source-qual-choice ,
5997    feature-field Feature-field ,
5998    rna-field Rna-qual ,
5999    cds-gene-prot CDSGeneProt-field ,
6000    molinfo-field Molinfo-field ,
6001    pub Publication-field ,
6002    struc-comment-field Structured-comment-field ,
6003    misc Misc-field ,
6004    dblink DBLink-field-type }
6005
6006Field-pair-type ::= CHOICE {
6007    source-qual Source-qual-pair ,
6008    feature-field Feature-field-pair ,
6009    rna-field Rna-qual-pair ,
6010    cds-gene-prot CDSGeneProt-field-pair ,
6011    molinfo-field Molinfo-field-pair ,
6012    struc-comment-field Structured-comment-field-pair ,
6013    dblink DBLink-field-pair}
6014
6015ExistingTextOption ::= ENUMERATED {
6016  replace-old (1) ,
6017  append-semi (2) ,
6018  append-space (3) ,
6019  append-colon (4) ,
6020  append-comma (5) ,
6021  append-none (6) ,
6022  prefix-semi (7) ,
6023  prefix-space (8) ,
6024  prefix-colon (9) ,
6025  prefix-comma (10) ,
6026  prefix-none (11) ,
6027  leave-old (12) ,
6028  add-qual (13) }
6029
6030Apply-action ::= SEQUENCE {
6031    field Field-type ,
6032    value VisibleString ,
6033    existing-text ExistingTextOption }
6034
6035Edit-action ::= SEQUENCE {
6036    edit Field-edit ,
6037    field Field-type }
6038
6039Cap-change ::= ENUMERATED {
6040    none (0) ,
6041    tolower (1) ,
6042    toupper (2) ,
6043    firstcap (3) ,
6044    firstcaprestnochange (4) ,
6045    firstlower-restnochange (5) ,
6046    cap-word-space (6) ,
6047    cap-word-space-punc (7)
6048    }
6049
6050Text-transform ::= CHOICE {
6051  edit Field-edit ,
6052  caps Cap-change ,
6053  remove Text-portion }
6054
6055Text-transform-set ::= SET OF Text-transform
6056
6057Convert-action ::= SEQUENCE {
6058    fields Field-pair-type ,
6059    strip-name BOOLEAN DEFAULT FALSE ,
6060    keep-original BOOLEAN DEFAULT FALSE ,
6061    capitalization Cap-change DEFAULT none ,
6062    existing-text ExistingTextOption }
6063
6064Copy-action ::= SEQUENCE {
6065    fields Field-pair-type ,
6066    existing-text ExistingTextOption }
6067
6068Swap-action ::= SEQUENCE {
6069    fields Field-pair-type }
6070
6071AECRParse-action ::= SEQUENCE {
6072    portion Text-portion ,
6073    fields Field-pair-type ,
6074    remove-from-parsed BOOLEAN DEFAULT FALSE ,
6075    remove-left BOOLEAN DEFAULT FALSE ,
6076    remove-right BOOLEAN DEFAULT FALSE ,
6077    transform Text-transform-set OPTIONAL ,
6078    existing-text ExistingTextOption }
6079
6080Remove-action ::= SEQUENCE {
6081    field Field-type }
6082
6083Remove-outside-action ::= SEQUENCE {
6084    portion Text-portion ,
6085    field Field-type ,
6086    remove-if-not-found BOOLEAN DEFAULT FALSE }
6087
6088Action-choice ::= CHOICE {
6089    apply Apply-action ,
6090    edit Edit-action ,
6091    convert Convert-action ,
6092    copy Copy-action ,
6093    swap Swap-action ,
6094    remove Remove-action ,
6095    parse AECRParse-action ,
6096    remove-outside Remove-outside-action }
6097
6098AECR-action ::= SEQUENCE {
6099    action Action-choice ,
6100    also-change-mrna BOOLEAN DEFAULT FALSE ,
6101    constraint Constraint-choice-set OPTIONAL }
6102
6103Parse-src-org-choice ::= CHOICE {
6104    source-qual Source-qual ,
6105    taxname-after-binomial NULL }
6106
6107Parse-src-org ::= SEQUENCE {
6108    field Parse-src-org-choice ,
6109    type Object-type-constraint DEFAULT any }
6110
6111-- For Parse-src-general-id tag, specify the db of the id from which you
6112-- want to retrieve the tag.  If empty or null, any db will do.
6113Parse-src-general-id ::= CHOICE {
6114    whole-text NULL ,
6115    db NULL ,
6116    tag VisibleString }
6117
6118Parse-src ::= CHOICE {
6119    defline NULL ,
6120    flatfile NULL ,
6121    local-id NULL ,
6122    org Parse-src-org ,
6123    comment NULL ,
6124    bankit-comment NULL ,
6125    structured-comment VisibleString ,
6126    file-id NULL ,
6127    general-id Parse-src-general-id }
6128
6129Parse-dst-org ::= SEQUENCE {
6130    field Source-qual-choice ,
6131    type Object-type-constraint DEFAULT any }
6132
6133Parse-dest ::= CHOICE {
6134    defline NULL ,
6135    org Parse-dst-org ,
6136    featqual Feature-field-legal ,
6137    comment-descriptor NULL ,
6138    dbxref VisibleString }
6139
6140Parse-action ::= SEQUENCE {
6141    portion Text-portion ,
6142    src Parse-src ,
6143    dest Parse-dest ,
6144    capitalization Cap-change DEFAULT none ,
6145    remove-from-parsed BOOLEAN DEFAULT FALSE ,
6146    transform Text-transform-set OPTIONAL ,
6147    existing-text ExistingTextOption }
6148
6149Location-interval ::= SEQUENCE {
6150    from INTEGER ,
6151    to INTEGER  }
6152
6153Location-choice ::= CHOICE {
6154    interval Location-interval ,
6155    whole-sequence NULL ,
6156    point INTEGER }
6157
6158Sequence-list ::= SET OF VisibleString
6159Sequence-list-choice ::= CHOICE {
6160    list Sequence-list ,
6161    all NULL }
6162
6163Apply-feature-action ::= SEQUENCE {
6164    type Macro-feature-type ,
6165    partial5 BOOLEAN DEFAULT FALSE ,
6166    partial3 BOOLEAN DEFAULT FALSE ,
6167    plus-strand BOOLEAN DEFAULT TRUE ,
6168    location Location-choice ,
6169    seq-list Sequence-list-choice ,
6170    add-redundant BOOLEAN DEFAULT TRUE ,
6171    add-mrna BOOLEAN DEFAULT FALSE ,
6172    apply-to-parts BOOLEAN DEFAULT FALSE ,
6173    only-seg-num INTEGER DEFAULT -1 ,
6174    fields Feat-qual-legal-set OPTIONAL,
6175    src-fields Source-qual-val-set OPTIONAL }
6176
6177Remove-feature-action ::= SEQUENCE {
6178    type Macro-feature-type ,
6179    constraint Constraint-choice-set OPTIONAL }
6180
6181-- for convert features --
6182Convert-from-CDS-options ::= SEQUENCE {
6183  remove-mRNA BOOLEAN ,
6184  remove-gene BOOLEAN ,
6185  remove-transcript-id BOOLEAN }
6186
6187Convert-feature-src-options ::= CHOICE {
6188  cds Convert-from-CDS-options }
6189
6190Bond-type ::= ENUMERATED {
6191  disulfide (1) ,
6192  thioester (2) ,
6193  crosslink (3) ,
6194  thioether (4) ,
6195  other (5) }
6196
6197Site-type ::= ENUMERATED {
6198  active (1) ,
6199  binding (2) ,
6200  cleavage (3) ,
6201  inhibit (4) ,
6202  modified (5) ,
6203  glycosylation (6) ,
6204  myristoylation (7) ,
6205  mutagenized (8) ,
6206  metal-binding (9) ,
6207  phosphorylation (10) ,
6208  acetylation (11) ,
6209  amidation (12) ,
6210  methylation (13) ,
6211  hydroxylation (14) ,
6212  sulfatation (15) ,
6213  oxidative-deamination (16) ,
6214  pyrrolidone-carboxylic-acid (17) ,
6215  gamma-carboxyglutamic-acid (18) ,
6216  blocked (19) ,
6217  lipid-binding (20) ,
6218  np-binding (21) ,
6219  dna-binding (22) ,
6220  signal-peptide (23) ,
6221  transit-peptide (24) ,
6222  transmembrane-region (25) ,
6223  nitrosylation (26) ,
6224  other (27) }
6225
6226-- other choice is to create protein sequences, skipping bad --
6227Region-type ::= SEQUENCE {
6228  create-nucleotide BOOLEAN }
6229
6230Convert-feature-dst-options ::= CHOICE {
6231  bond Bond-type ,
6232  site Site-type ,
6233  region Region-type ,
6234  ncrna-class VisibleString ,
6235  remove-original BOOLEAN }
6236
6237Convert-feature-action ::= SEQUENCE {
6238  type-from Macro-feature-type ,
6239  type-to Macro-feature-type ,
6240  src-options Convert-feature-src-options OPTIONAL ,
6241  dst-options Convert-feature-dst-options OPTIONAL ,
6242  leave-original BOOLEAN ,
6243  src-feat-constraint Constraint-choice-set OPTIONAL }
6244
6245Feature-location-strand-from ::= ENUMERATED {
6246  any (0) ,
6247  plus (1) ,
6248  minus (2) ,
6249  unknown (3) ,
6250  both (4) }
6251
6252Feature-location-strand-to ::= ENUMERATED {
6253  plus (1) ,
6254  minus (2) ,
6255  unknown (3) ,
6256  both (4) ,
6257  reverse (5) }
6258
6259Edit-location-strand ::= SEQUENCE {
6260  strand-from Feature-location-strand-from ,
6261  strand-to   Feature-location-strand-to }
6262
6263Partial-5-set-constraint ::= ENUMERATED {
6264  all (0) ,
6265  at-end (1) ,
6266  bad-start (2) ,
6267  frame-not-one (3) }
6268
6269Partial-5-set-action ::= SEQUENCE {
6270  constraint Partial-5-set-constraint ,
6271  extend BOOLEAN }
6272
6273Partial-5-clear-constraint ::= ENUMERATED {
6274  all (0) ,
6275  not-at-end (1) ,
6276  good-start (2) }
6277
6278Partial-3-set-constraint ::= ENUMERATED {
6279  all (0) ,
6280  at-end (1) ,
6281  bad-end (2) }
6282
6283Partial-3-set-action ::= SEQUENCE {
6284  constraint Partial-3-set-constraint ,
6285  extend BOOLEAN }
6286
6287Partial-3-clear-constraint ::= ENUMERATED {
6288  all (0) ,
6289  not-at-end (1) ,
6290  good-end (2) }
6291
6292Partial-both-set-constraint ::= ENUMERATED {
6293  all (0) ,
6294  at-end (1) }
6295
6296Partial-both-set-action ::= SEQUENCE {
6297  constraint Partial-both-set-constraint ,
6298  extend BOOLEAN }
6299
6300Partial-both-clear-constraint ::= ENUMERATED {
6301  all (0) ,
6302  not-at-end (1) }
6303
6304Convert-location-type ::= ENUMERATED {
6305  join (1) ,
6306  order (2) ,
6307  merge (3) }
6308
6309Extend-to-feature ::= SEQUENCE {
6310  type Macro-feature-type ,
6311  include-feat BOOLEAN ,
6312  distance Quantity-constraint OPTIONAL }
6313
6314Location-edit-type ::= CHOICE {
6315  strand Edit-location-strand ,
6316  set-5-partial Partial-5-set-action ,
6317  clear-5-partial Partial-5-clear-constraint ,
6318  set-3-partial Partial-3-set-action ,
6319  clear-3-partial Partial-3-clear-constraint ,
6320  set-both-partial Partial-both-set-action ,
6321  clear-both-partial Partial-both-clear-constraint ,
6322  convert Convert-location-type ,
6323  extend-5 NULL ,
6324  extend-3 NULL ,
6325  extend-5-to-feat Extend-to-feature ,
6326  extend-3-to-feat Extend-to-feature }
6327
6328Edit-feature-location-action ::= SEQUENCE {
6329  type Macro-feature-type ,
6330  action Location-edit-type ,
6331  retranslate-cds BOOLEAN OPTIONAL ,
6332  also-edit-gene BOOLEAN OPTIONAL ,
6333  constraint Constraint-choice-set OPTIONAL }
6334
6335Molinfo-block ::= SEQUENCE {
6336    to-list Molinfo-field-list  ,
6337    from-list Molinfo-field-list OPTIONAL ,
6338    constraint Constraint-choice-set OPTIONAL }
6339
6340Descriptor-type ::= ENUMERATED {
6341  all (0) ,
6342  title (1) ,
6343  source (2) ,
6344  publication (3) ,
6345  comment (4) ,
6346  genbank (5) ,
6347  user (6) ,
6348  create-date (7) ,
6349  update-date (8) ,
6350  mol-info (9) ,
6351  structured-comment (10) ,
6352  genome-project-id (11) }
6353
6354Remove-descriptor-action ::= SEQUENCE {
6355  type Descriptor-type ,
6356  constraint Constraint-choice-set OPTIONAL }
6357
6358Autodef-list-type ::= ENUMERATED {
6359  feature-list (1) ,
6360  complete-sequence (2) ,
6361  complete-genome (3) ,
6362  sequence (4) }
6363
6364Autodef-misc-feat-parse-rule ::= ENUMERATED {
6365  use-comment-before-first-semicolon (1) ,
6366  look-for-noncoding-products (2) }
6367
6368Autodef-action ::= SEQUENCE {
6369  modifiers SET OF Source-qual OPTIONAL ,
6370  clause-list-type Autodef-list-type ,
6371  misc-feat-parse-rule Autodef-misc-feat-parse-rule DEFAULT look-for-noncoding-products }
6372
6373Fix-pub-caps-action ::= SEQUENCE {
6374  title BOOLEAN OPTIONAL ,
6375  authors BOOLEAN OPTIONAL ,
6376  affiliation BOOLEAN OPTIONAL ,
6377  affil-country BOOLEAN OPTIONAL ,
6378  punct-only BOOLEAN DEFAULT FALSE ,
6379  constraint Constraint-choice-set OPTIONAL }
6380
6381Sort-order ::= ENUMERATED {
6382  short-to-long (1) ,
6383  long-to-short (2) ,
6384  alphabetical (3) }
6385
6386Sort-fields-action ::= SEQUENCE {
6387  field Field-type ,
6388  order Sort-order ,
6389  constraint Constraint-choice-set OPTIONAL }
6390
6391Fix-author-caps ::= SEQUENCE {
6392  last-name-only BOOLEAN }
6393
6394Fix-caps-action ::= CHOICE {
6395  pub Fix-pub-caps-action ,
6396  src-country NULL ,
6397  mouse-strain NULL ,
6398  src-qual Source-qual ,
6399  author Fix-author-caps }
6400
6401Fix-format-action ::= CHOICE {
6402  collection-date NULL ,
6403  lat-lon NULL ,
6404  primers NULL ,
6405  protein-name NULL }
6406
6407Remove-duplicate-feature-action ::= SEQUENCE {
6408  type Macro-feature-type ,
6409  ignore-partials BOOLEAN ,
6410  case-sensitive BOOLEAN ,
6411  remove-proteins BOOLEAN ,
6412  rd-constraint Constraint-choice-set OPTIONAL }
6413
6414Gene-xref-suppression-type ::= ENUMERATED {
6415  any (0) ,
6416  suppressing (1) ,
6417  non-suppressing (2) }
6418
6419Gene-xref-necessary-type ::= ENUMERATED {
6420  any (0) ,
6421  necessary (1) ,
6422  unnecessary (2) }
6423
6424Gene-xref-type ::= SEQUENCE {
6425  feature Macro-feature-type ,
6426  suppression Gene-xref-suppression-type ,
6427  necessary Gene-xref-necessary-type }
6428
6429Xref-type ::= CHOICE {
6430  gene Gene-xref-type }
6431
6432Remove-xrefs-action ::= SEQUENCE {
6433  xref-type Xref-type ,
6434  constraint Constraint-choice-set OPTIONAL }
6435
6436Make-gene-xref-action ::= SEQUENCE {
6437  feature Macro-feature-type ,
6438  constraint Constraint-choice-set OPTIONAL }
6439
6440Author-fix-type ::= ENUMERATED {
6441  truncate-middle-initials (1) ,
6442  strip-suffix (2) ,
6443  move-middle-to-first (3) }
6444
6445Author-fix-action ::= SEQUENCE {
6446  fix-type Author-fix-type ,
6447  constraint Constraint-choice-set OPTIONAL }
6448
6449Update-sequences-action ::= SEQUENCE {
6450  filename VisibleString ,
6451  add-cit-subs BOOLEAN DEFAULT FALSE }
6452
6453Create-TSA-ids-src ::= CHOICE {
6454  local-id NULL ,
6455  defline Text-portion
6456}
6457
6458Create-TSA-ids-action ::= SEQUENCE {
6459  src Create-TSA-ids-src ,
6460  suffix VisibleString OPTIONAL ,
6461  id-text-portion Text-portion OPTIONAL }
6462
6463Autofix-action ::= SEQUENCE {
6464  test-name VisibleString }
6465
6466Fix-sets-action ::= CHOICE {
6467  remove-single-item-set NULL ,
6468  renormalize-nuc-prot-sets NULL ,
6469  fix-pop-to-phy NULL
6470}
6471
6472Table-match-type ::= CHOICE {
6473  feature-id NULL ,
6474  gene-locus-tag NULL ,
6475  protein-id NULL,
6476  dbxref NULL ,
6477  nuc-id NULL ,
6478  src-qual Source-qual-choice ,
6479  protein-name NULL ,
6480  bioproject NULL ,
6481  any NULL
6482}
6483
6484Table-match ::= SEQUENCE {
6485  match-type Table-match-type ,
6486  match-location String-location DEFAULT equals
6487}
6488
6489Apply-table-extra-data ::= CHOICE {
6490  table NULL }
6491
6492Apply-table-action ::= SEQUENCE {
6493  filename VisibleString ,
6494  match-type Table-match ,
6495  in-memory-table Apply-table-extra-data OPTIONAL ,
6496  also-change-mrna BOOLEAN DEFAULT FALSE ,
6497  skip-blanks BOOLEAN DEFAULT TRUE
6498}
6499
6500Add-file-action ::= SEQUENCE {
6501  filename VisibleString ,
6502  in-memory-table Apply-table-extra-data OPTIONAL
6503}
6504
6505Add-descriptor-list-action ::= SEQUENCE {
6506  descriptor-list Add-file-action ,
6507  constraint Constraint-choice-set OPTIONAL
6508}
6509
6510Remove-sequences-action ::= SEQUENCE {
6511  constraint Constraint-choice-set
6512}
6513
6514Update-replaced-ec-numbers-action ::= SEQUENCE {
6515  delete-improper-format BOOLEAN ,
6516  delete-unrecognized BOOLEAN ,
6517  delete-multiple-replacement BOOLEAN
6518}
6519
6520Retranslate-cds-action ::= SEQUENCE {
6521  obey-stop-codon BOOLEAN
6522}
6523
6524Truncated-ends-partial-type ::= ENUMERATED {
6525  always (1) ,
6526  unless-pseudo (2) ,
6527  never (3) }
6528
6529Adjust-features-for-gaps-action ::= SEQUENCE {
6530  type Macro-feature-type ,
6531  adjust-for-unknown-length-gaps BOOLEAN ,
6532  adjust-for-known-length-gaps BOOLEAN ,
6533  make-truncated-ends-partial Truncated-ends-partial-type ,
6534  trim-ends-in-gaps BOOLEAN ,
6535  split-for-internal-gaps BOOLEAN ,
6536  even-when-gaps-are-in-introns BOOLEAN
6537}
6538
6539Macro-action-choice ::= CHOICE {
6540  aecr AECR-action ,
6541  parse Parse-action ,
6542  add-feature Apply-feature-action ,
6543  remove-feature Remove-feature-action ,
6544  convert-feature Convert-feature-action ,
6545  edit-location Edit-feature-location-action ,
6546  remove-descriptor Remove-descriptor-action ,
6547  autodef Autodef-action ,
6548  removesets NULL ,
6549  trim-junk-from-primer-seq NULL ,
6550  trim-stop-from-complete-cds NULL ,
6551  fix-usa-and-states NULL ,
6552  synchronize-cds-partials NULL ,
6553  adjust-for-consensus-splice NULL ,
6554  fix-pub-caps Fix-pub-caps-action ,
6555  remove-seg-gaps NULL ,
6556  sort-fields Sort-fields-action ,
6557  apply-molinfo-block Molinfo-block ,
6558  fix-caps Fix-caps-action ,
6559  fix-format Fix-format-action ,
6560  fix-spell NULL ,
6561  remove-duplicate-features Remove-duplicate-feature-action ,
6562  remove-lineage-notes NULL ,
6563  remove-xrefs Remove-xrefs-action ,
6564  make-gene-xrefs Make-gene-xref-action ,
6565  make-bold-xrefs NULL ,
6566  fix-author Author-fix-action ,
6567  update-sequences Update-sequences-action ,
6568  add-trans-splicing NULL ,
6569  remove-invalid-ecnumbers NULL ,
6570  create-tsa-ids Create-TSA-ids-action ,
6571  perform-autofix Autofix-action ,
6572  fix-sets Fix-sets-action ,
6573  apply-table Apply-table-action ,
6574  remove-sequences Remove-sequences-action ,
6575  propagate-sequence-technology NULL ,
6576  add-file-descriptors Add-descriptor-list-action ,
6577  propagate-missing-old-name NULL ,
6578  autoapply-structured-comments NULL ,
6579  reorder-structured-comments NULL ,
6580  remove-duplicate-structured-comments NULL ,
6581  lookup-taxonomy NULL ,
6582  lookup-pubs NULL ,
6583  trim-terminal-ns NULL ,
6584  update-replaced-ecnumbers Update-replaced-ec-numbers-action ,
6585  instantiate-protein-titles NULL ,
6586  retranslate-cds Retranslate-cds-action ,
6587  add-selenocysteine-except NULL ,
6588  join-short-trnas NULL ,
6589  adjust-features-for-gaps Adjust-features-for-gaps-action }
6590
6591Macro-action-list ::= SET OF Macro-action-choice
6592
6593Search-func ::= CHOICE {
6594  string-constraint String-constraint ,
6595  contains-plural NULL ,
6596  n-or-more-brackets-or-parentheses INTEGER ,
6597  three-numbers NULL ,
6598  underscore NULL ,
6599  prefix-and-numbers VisibleString ,
6600  all-caps NULL ,
6601  unbalanced-paren NULL ,
6602  too-long INTEGER ,
6603  has-term VisibleString }
6604
6605Simple-replace ::= SEQUENCE {
6606  replace VisibleString OPTIONAL,
6607  whole-string BOOLEAN DEFAULT FALSE ,
6608  weasel-to-putative BOOLEAN DEFAULT FALSE }
6609
6610Replace-func ::= CHOICE {
6611  simple-replace Simple-replace ,
6612  haem-replace VisibleString }
6613
6614Replace-rule ::= SEQUENCE {
6615  replace-func Replace-func ,
6616  move-to-note BOOLEAN DEFAULT FALSE }
6617
6618Fix-type ::= ENUMERATED {
6619  none (0) ,
6620  typo (1) ,
6621  putative-typo (2) ,
6622  quickfix (3) ,
6623  no-organelle-for-prokaryote (4),
6624  might-be-nonfunctional (5),
6625  database (6),
6626  remove-organism-name (7),
6627  inappropriate-symbol (8),
6628  evolutionary-relationship (9),
6629  use-protein (10),
6630  hypothetical (11),
6631  british (12),
6632  description (13),
6633  gene (14) }
6634
6635Suspect-rule ::= SEQUENCE {
6636  find Search-func ,
6637  except Search-func OPTIONAL ,
6638  feat-constraint Constraint-choice-set OPTIONAL ,
6639  rule-type Fix-type DEFAULT none ,
6640  replace Replace-rule OPTIONAL ,
6641  description VisibleString OPTIONAL ,
6642  fatal BOOLEAN DEFAULT FALSE }
6643
6644Suspect-rule-set ::= SET OF Suspect-rule
6645
6646
6647END
6648