ncbi/asn/asn.all

--$Revision: 6.0 $
--**********************************************************************
--
--  asn.all
--  this file contains all NCBI ASN.1 specifications together
--
--  by James Ostell, 1990
--
--**********************************************************************

--$Revision: 6.6 $
--**********************************************************************
--
--  NCBI General Data elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-General DEFINITIONS ::=
BEGIN

EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;

-- StringStore is really a VisibleString.  It is used to define very
--   long strings which may need to be stored by the receiving program
--   in special structures, such as a ByteStore, but it's just a hint.
--   AsnTool stores StringStores in ByteStore structures.
-- OCTET STRINGs are also stored in ByteStores by AsnTool
--
-- typedef struct bsunit {             /* for building multiline strings */
   -- Nlm_Handle str;            /* the string piece */
   -- Nlm_Int2 len_avail,
       -- len;
   -- struct bsunit PNTR next; }       /* the next one */
-- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
--
-- typedef struct bytestore {
   -- Nlm_Int4 seekptr,       /* current position */
      -- totlen,             /* total stored data length in bytes */
      -- chain_offset;       /* offset in ByteStore of first byte in curchain */
   -- Nlm_BSUnitPtr chain,       /* chain of elements */
      -- curchain;           /* the BSUnit containing seekptr */
-- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
--
-- AsnTool incorporates this as a primitive type, so the definition
--   is here just for completeness
--
--  StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
--

-- BigInt is really an INTEGER. It is used to warn the receiving code to expect
--   a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
--
--   Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
--   BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
--

-- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
--  of ASN.1
--  It stores only a date
--

Date ::= CHOICE {
    str VisibleString ,        -- for those unparsed dates
    std Date-std }             -- use this if you can

Date-std ::= SEQUENCE {        -- NOTE: this is NOT a unix tm struct
    year INTEGER ,             -- full year (including 1900)
    month INTEGER OPTIONAL ,   -- month (1-12)
    day INTEGER OPTIONAL ,     -- day of month (1-31)
    season VisibleString OPTIONAL ,  -- for "spring", "may-june", etc
    hour INTEGER OPTIONAL ,    -- hour of day (0-23)
    minute INTEGER OPTIONAL ,  -- minute of hour (0-59)
    second INTEGER OPTIONAL }  -- second of minute (0-59)

-- Dbtag is generalized for tagging
-- eg. { "Social Security", str "023-79-8841" }
-- or  { "member", id 8882224 }

Dbtag ::= SEQUENCE {
    db VisibleString ,          -- name of database or system
    tag Object-id }         -- appropriate tag

-- Object-id can tag or name anything
--

Object-id ::= CHOICE {
    id INTEGER ,
    str VisibleString }

-- Person-id is to define a std element for people
--

Person-id ::= CHOICE {
    dbtag Dbtag ,               -- any defined database tag
    name Name-std ,             -- structured name
    ml VisibleString ,          -- MEDLINE name (semi-structured)
                                --    eg. "Jones RM"
    str VisibleString,          -- unstructured name
    consortium VisibleString }  -- consortium name

Name-std ::= SEQUENCE { -- Structured names
    last VisibleString ,
    first VisibleString OPTIONAL ,
    middle VisibleString OPTIONAL ,
    full VisibleString OPTIONAL ,    -- full name eg. "J. John Smith, Esq"
    initials VisibleString OPTIONAL,  -- first + middle initials
    suffix VisibleString OPTIONAL ,   -- Jr, Sr, III
    title VisibleString OPTIONAL }    -- Dr., Sister, etc

--**** Int-fuzz **********************************************
--*
--*   uncertainties in integer values

Int-fuzz ::= CHOICE {
    p-m INTEGER ,                    -- plus or minus fixed amount
    range SEQUENCE {                 -- max to min
        max INTEGER ,
        min INTEGER } ,
    pct INTEGER ,                    -- % plus or minus (x10) 0-1000
    lim ENUMERATED {                 -- some limit value
        unk (0) ,                    -- unknown
        gt (1) ,                     -- greater than
        lt (2) ,                     -- less than
        tr (3) ,                     -- space to right of position
        tl (4) ,                     -- space to left of position
        circle (5) ,                 -- artificial break at origin of circle
        other (255) } ,              -- something else
    alt SET OF INTEGER }             -- set of alternatives for the integer


--**** User-object **********************************************
--*
--*   a general object for a user defined structured data item
--*    used by Seq-feat and Seq-descr

User-object ::= SEQUENCE {
    class VisibleString OPTIONAL ,   -- endeavor which designed this object
    type Object-id ,                 -- type of object within class
    data SEQUENCE OF User-field }    -- the object itself

User-field ::= SEQUENCE {
    label Object-id ,                -- field label
    num INTEGER OPTIONAL ,           -- required for strs, ints, reals, oss
    data CHOICE {                    -- field contents
        str UTF8String ,
        int INTEGER ,
        real REAL ,
        bool BOOLEAN ,
        os OCTET STRING ,
        object User-object ,         -- for using other definitions
        strs SEQUENCE OF UTF8String ,
        ints SEQUENCE OF INTEGER ,
        reals SEQUENCE OF REAL ,
        oss SEQUENCE OF OCTET STRING ,
        fields SEQUENCE OF User-field ,
        objects SEQUENCE OF User-object } }


END

--$Revision: 6.3 $
--****************************************************************
--
--  NCBI Bibliographic data elements
--  by James Ostell, 1990
--
--  Taken from the American National Standard for
--      Bibliographic References
--      ANSI Z39.29-1977
--  Version 3.0 - June 1994
--  PubMedId added in 1996
--  ArticleIds and eprint elements added in 1999
--
--****************************************************************

NCBI-Biblio DEFINITIONS ::=
BEGIN

EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
        Cit-proc, Cit-sub, Title, Author, PubMedId, DOI;

IMPORTS Person-id, Date, Dbtag FROM NCBI-General;

    -- Article Ids

ArticleId ::= CHOICE {         -- can be many ids for an article
	pubmed PubMedId ,      -- see types below
	medline MedlineUID ,
	doi DOI ,
	pii PII ,
	pmcid PmcID ,
	pmcpid PmcPid ,
        pmpid PmPid ,
        other Dbtag  }    -- generic catch all

PubMedId ::= INTEGER           -- Id from the PubMed database at NCBI
MedlineUID ::= INTEGER         -- Id from MEDLINE
DOI ::= VisibleString          -- Document Object Identifier
PII ::= VisibleString          -- Controlled Publisher Identifier
PmcID ::= INTEGER              -- PubMed Central Id
PmcPid ::= VisibleString       -- Publisher Id supplied to PubMed Central
PmPid ::= VisibleString        -- Publisher Id supplied to PubMed

ArticleIdSet ::= SET OF ArticleId

    -- Status Dates

PubStatus ::= INTEGER {            -- points of publication
    received  (1) ,            -- date manuscript received for review
    accepted  (2) ,            -- accepted for publication
    epublish  (3) ,            -- published electronically by publisher
    ppublish  (4) ,            -- published in print by publisher
    revised   (5) ,            -- article revised by publisher/author
    pmc       (6) ,            -- article first appeared in PubMed Central
    pmcr      (7) ,            -- article revision in PubMed Central
    pubmed    (8) ,            -- article citation first appeared in PubMed
    pubmedr   (9) ,            -- article citation revision in PubMed
    aheadofprint (10),         -- epublish, but will be followed by print
    premedline (11),           -- date into PreMedline status
    medline    (12),           -- date made a MEDLINE record
    other    (255) }

PubStatusDate ::= SEQUENCE {   -- done as a structure so fields can be added
    pubstatus PubStatus ,
    date Date }                -- time may be added later

PubStatusDateSet ::= SET OF PubStatusDate

    -- Citation Types

Cit-art ::= SEQUENCE {                  -- article in journal or book
    title Title OPTIONAL ,              -- title of paper (ANSI requires)
    authors Auth-list OPTIONAL ,        -- authors (ANSI requires)
    from CHOICE {                       -- journal or book
        journal Cit-jour ,
        book Cit-book ,
        proc Cit-proc } ,
    ids ArticleIdSet OPTIONAL }         -- lots of ids

Cit-jour ::= SEQUENCE {             -- Journal citation
    title Title ,                   -- title of journal
    imp Imprint }

Cit-book ::= SEQUENCE {              -- Book citation
    title Title ,                    -- Title of book
    coll Title OPTIONAL ,            -- part of a collection
    authors Auth-list,               -- authors
    imp Imprint }

Cit-proc ::= SEQUENCE {             -- Meeting proceedings
    book Cit-book ,                 -- citation to meeting
    meet Meeting }                  -- time and location of meeting

    -- Patent number and date-issue were made optional in 1997 to
    --   support patent applications being issued from the USPTO
    --   Semantically a Cit-pat must have either a patent number or
    --   an application number (or both) to be valid

Cit-pat ::= SEQUENCE {                  -- patent citation
    title VisibleString ,
    authors Auth-list,                  -- author/inventor
    country VisibleString ,             -- Patent Document Country
    doc-type VisibleString ,            -- Patent Document Type
    number VisibleString OPTIONAL,      -- Patent Document Number
    date-issue Date OPTIONAL,           -- Patent Issue/Pub Date
    class SEQUENCE OF VisibleString OPTIONAL ,      -- Patent Doc Class Code
    app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
    app-date Date OPTIONAL ,            -- Patent Appl File Date
    applicants Auth-list OPTIONAL ,     -- Applicants
    assignees Auth-list OPTIONAL ,      -- Assignees
    priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
    abstract VisibleString OPTIONAL }   -- abstract of patent

Patent-priority ::= SEQUENCE {
    country VisibleString ,             -- Patent country code
    number VisibleString ,              -- number assigned in that country
    date Date }                         -- date of application

Id-pat ::= SEQUENCE {                   -- just to identify a patent
    country VisibleString ,             -- Patent Document Country
    id CHOICE {
        number VisibleString ,          -- Patent Document Number
        app-number VisibleString } ,    -- Patent Doc Appl Number
    doc-type VisibleString OPTIONAL }   -- Patent Doc Type

Cit-let ::= SEQUENCE {                  -- letter, thesis, or manuscript
    cit Cit-book ,                      -- same fields as a book
    man-id VisibleString OPTIONAL ,     -- Manuscript identifier
    type ENUMERATED {
        manuscript (1) ,
        letter (2) ,
        thesis (3) } OPTIONAL }
                                -- NOTE: this is just to cite a
                                -- direct data submission, see NCBI-Submit
                                -- for the form of a sequence submission
Cit-sub ::= SEQUENCE {               -- citation for a direct submission
    authors Auth-list ,              -- not necessarily authors of the paper
    imp Imprint OPTIONAL ,			 -- this only used to get date.. will go
    medium ENUMERATED {              -- medium of submission
        paper   (1) ,
        tape    (2) ,
        floppy  (3) ,
        email   (4) ,
        other   (255) } OPTIONAL ,
    date Date OPTIONAL ,              -- replaces imp, will become required
    descr VisibleString OPTIONAL }    -- description of changes for public view

Cit-gen ::= SEQUENCE {      -- NOT from ANSI, this is a catchall
    cit VisibleString OPTIONAL ,     -- anything, not parsable
    authors Auth-list OPTIONAL ,
    muid INTEGER OPTIONAL ,      -- medline uid
    journal Title OPTIONAL ,
    volume VisibleString OPTIONAL ,
    issue VisibleString OPTIONAL ,
    pages VisibleString OPTIONAL ,
    date Date OPTIONAL ,
    serial-number INTEGER OPTIONAL ,   -- for GenBank style references
    title VisibleString OPTIONAL ,     -- eg. cit="unpublished",title="title"
	pmid PubMedId OPTIONAL }           -- PubMed Id


    -- Authorship Group
Auth-list ::= SEQUENCE {
        names CHOICE {
            std SEQUENCE OF Author ,        -- full citations
            ml SEQUENCE OF VisibleString ,  -- MEDLINE, semi-structured
            str SEQUENCE OF VisibleString } , -- free for all
        affil Affil OPTIONAL }        -- author affiliation

Author ::= SEQUENCE {
    name Person-id ,                        -- Author, Primary or Secondary
    level ENUMERATED {
        primary (1),
        secondary (2) } OPTIONAL ,
    role ENUMERATED {                   -- Author Role Indicator
        compiler (1),
        editor (2),
        patent-assignee (3),
        translator (4) } OPTIONAL ,
    affil Affil OPTIONAL ,
    is-corr BOOLEAN OPTIONAL }          -- TRUE if corresponding author

Affil ::= CHOICE {
    str VisibleString ,                 -- unparsed string
    std SEQUENCE {                      -- std representation
    affil VisibleString OPTIONAL ,      -- Author Affiliation, Name
    div VisibleString OPTIONAL ,        -- Author Affiliation, Division
    city VisibleString OPTIONAL ,       -- Author Affiliation, City
    sub VisibleString OPTIONAL ,        -- Author Affiliation, County Sub
    country VisibleString OPTIONAL ,    -- Author Affiliation, Country
    street VisibleString OPTIONAL ,    -- street address, not ANSI
    email VisibleString OPTIONAL ,
    fax VisibleString OPTIONAL ,
    phone VisibleString OPTIONAL ,
    postal-code VisibleString OPTIONAL }}

    -- Title Group
    -- Valid for = A = Analytic (Cit-art)
    --             J = Journals (Cit-jour)
    --             B = Book (Cit-book)
                                                 -- Valid for:
Title ::= SET OF CHOICE {
    name VisibleString ,    -- Title, Anal,Coll,Mono    AJB
    tsub VisibleString ,    -- Title, Subordinate       A B
    trans VisibleString ,   -- Title, Translated        AJB
    jta VisibleString ,     -- Title, Abbreviated        J
    iso-jta VisibleString , -- specifically ISO jta      J
    ml-jta VisibleString ,  -- specifically MEDLINE jta  J
    coden VisibleString ,   -- a coden                   J
    issn VisibleString ,    -- ISSN                      J
    abr VisibleString ,     -- Title, Abbreviated         B
    isbn VisibleString }    -- ISBN                       B

Imprint ::= SEQUENCE {                  -- Imprint group
    date Date ,                         -- date of publication
    volume VisibleString OPTIONAL ,
    issue VisibleString OPTIONAL ,
    pages VisibleString OPTIONAL ,
    section VisibleString OPTIONAL ,
    pub Affil OPTIONAL,                     -- publisher, required for book
    cprt Date OPTIONAL,                     -- copyright date, "    "   "
    part-sup VisibleString OPTIONAL ,       -- part/sup of volume
    language VisibleString DEFAULT "ENG" ,  -- put here for simplicity
    prepub ENUMERATED {                     -- for prepublication citations
        submitted (1) ,                     -- submitted, not accepted
        in-press (2) ,                      -- accepted, not published
        other (255)  } OPTIONAL ,
    part-supi VisibleString OPTIONAL ,      -- part/sup on issue
    retract CitRetract OPTIONAL ,           -- retraction info
    pubstatus PubStatus OPTIONAL ,          -- current status of this publication
    history PubStatusDateSet OPTIONAL }     -- dates for this record

CitRetract ::= SEQUENCE {
    type ENUMERATED {                    -- retraction of an entry
        retracted (1) ,               -- this citation retracted
        notice (2) ,                  -- this citation is a retraction notice
        in-error (3) ,                -- an erratum was published about this
        erratum (4) } ,               -- this is a published erratum
    exp VisibleString OPTIONAL }      -- citation and/or explanation

Meeting ::= SEQUENCE {
    number VisibleString ,
    date Date ,
    place Affil OPTIONAL }


END


--$Revision: 6.0 $
--**********************************************************************
--
--  MEDLINE data definitions
--  James Ostell, 1990
--
--  enhanced in 1996 to support PubMed records as well by simply adding
--    the PubMedId and making MedlineId optional
--
--**********************************************************************

NCBI-Medline DEFINITIONS ::=
BEGIN

EXPORTS Medline-entry, Medline-si;

IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
        Date FROM NCBI-General;

                                -- a MEDLINE or PubMed entry
Medline-entry ::= SEQUENCE {
    uid INTEGER OPTIONAL ,      -- MEDLINE UID, sometimes not yet available if from PubMed
    em Date ,                   -- Entry Month
    cit Cit-art ,               -- article citation
    abstract VisibleString OPTIONAL ,
    mesh SET OF Medline-mesh OPTIONAL ,
    substance SET OF Medline-rn OPTIONAL ,
    xref SET OF Medline-si OPTIONAL ,
    idnum SET OF VisibleString OPTIONAL ,  -- ID Number (grants, contracts)
    gene SET OF VisibleString OPTIONAL ,
    pmid PubMedId OPTIONAL ,               -- MEDLINE records may include the PubMedId
    pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
    mlfield SET OF Medline-field OPTIONAL ,  -- additional Medline field types
    status INTEGER {
	publisher (1) ,      -- record as supplied by publisher
        premedline (2) ,     -- premedline record
        medline (3) } DEFAULT medline }  -- regular medline record

Medline-mesh ::= SEQUENCE {
    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point (*)
    term VisibleString ,                   -- the MeSH term
    qual SET OF Medline-qual OPTIONAL }    -- qualifiers

Medline-qual ::= SEQUENCE {
    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point
    subh VisibleString }             -- the subheading

Medline-rn ::= SEQUENCE {       -- medline substance records
    type ENUMERATED {           -- type of record
        nameonly (0) ,
        cas (1) ,               -- CAS number
        ec (2) } ,              -- EC number
    cit VisibleString OPTIONAL ,  -- CAS or EC number if present
    name VisibleString }          -- name (always present)

Medline-si ::= SEQUENCE {       -- medline cross reference records
    type ENUMERATED {           -- type of xref
        ddbj (1) ,              -- DNA Data Bank of Japan
        carbbank (2) ,          -- Carbohydrate Structure Database
        embl (3) ,              -- EMBL Data Library
        hdb (4) ,               -- Hybridoma Data Bank
        genbank (5) ,           -- GenBank
        hgml (6) ,              -- Human Gene Map Library
        mim (7) ,               -- Mendelian Inheritance in Man
        msd (8) ,               -- Microbial Strains Database
        pdb (9) ,               -- Protein Data Bank (Brookhaven)
        pir (10) ,              -- Protein Identification Resource
        prfseqdb (11) ,         -- Protein Research Foundation (Japan)
        psd (12) ,              -- Protein Sequence Database (Japan)
        swissprot (13) ,        -- SwissProt
        gdb (14) } ,            -- Genome Data Base
    cit VisibleString OPTIONAL }    -- the citation/accession number

Medline-field ::= SEQUENCE {
    type INTEGER {              -- Keyed type
	other (0) ,             -- look in line code
	comment (1) ,           -- comment line
        erratum (2) } ,         -- retracted, corrected, etc
    str VisibleString ,         -- the text
    ids SEQUENCE OF DocRef OPTIONAL }  -- pointers relevant to this text

DocRef ::= SEQUENCE {           -- reference to a document
    type INTEGER {
        medline (1) ,
        pubmed (2) ,
        ncbigi (3) } ,
    uid INTEGER }

END

--$Revision: 6.0 $
--**********************************************************************
--
--  PUBMED data definitions
--
--**********************************************************************

NCBI-PubMed DEFINITIONS ::=
BEGIN

EXPORTS Pubmed-entry, Pubmed-url;

IMPORTS PubMedId FROM NCBI-Biblio
        Medline-entry FROM NCBI-Medline;

Pubmed-entry ::= SEQUENCE {        -- a PubMed entry
    -- PUBMED records must include the PubMedId
    pmid PubMedId,

    -- Medline entry information
    medent Medline-entry OPTIONAL,

    -- Publisher name
    publisher VisibleString OPTIONAL,

    -- List of URL to publisher cite
    urls SET OF Pubmed-url OPTIONAL,

    -- Publisher's article identifier
    pubid VisibleString OPTIONAL
}

Pubmed-url ::= SEQUENCE {
    location VisibleString OPTIONAL, -- Location code
    url VisibleString                -- Selected URL for location
}

END
--$Revision: 6.0 $
--**********************************************************************
--
--  MEDLARS data definitions
--  Grigoriy Starchenko, 1997
--
--**********************************************************************

NCBI-Medlars DEFINITIONS ::=
BEGIN

EXPORTS Medlars-entry, Medlars-record;

IMPORTS PubMedId FROM NCBI-Biblio;

Medlars-entry ::= SEQUENCE {     -- a MEDLARS entry
    pmid PubMedId,               -- All entries in PubMed must have it
    muid INTEGER OPTIONAL,       -- Medline(OCCS) id
    recs SET OF Medlars-record   -- List of Medlars records
}

Medlars-record ::= SEQUENCE {
    code INTEGER,                -- Unit record field type integer form
    abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
    data VisibleString           -- Unit record data
}

END
--$Revision: 6.0 $
--********************************************************************
--
--  Publication common set
--  James Ostell, 1990
--
--  This is the base class definitions for Publications of all sorts
--
--  support for PubMedId added in 1996
--********************************************************************

NCBI-Pub DEFINITIONS ::=
BEGIN

EXPORTS Pub, Pub-set, Pub-equiv;

IMPORTS Medline-entry FROM NCBI-Medline
        Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
        Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;

Pub ::= CHOICE {
    gen Cit-gen ,        -- general or generic unparsed
    sub Cit-sub ,        -- submission
    medline Medline-entry ,
    muid INTEGER ,       -- medline uid
    article Cit-art ,
    journal Cit-jour ,
    book Cit-book ,
    proc Cit-proc ,      -- proceedings of a meeting
    patent Cit-pat ,
    pat-id Id-pat ,      -- identify a patent
    man Cit-let ,        -- manuscript, thesis, or letter
    equiv Pub-equiv,     -- to cite a variety of ways
	pmid PubMedId }      -- PubMedId

Pub-equiv ::= SET OF Pub   -- equivalent identifiers for same citation

Pub-set ::= CHOICE {
    pub SET OF Pub ,
    medline SET OF Medline-entry ,
    article SET OF Cit-art ,
    journal SET OF Cit-jour ,
    book SET OF Cit-book ,
    proc SET OF Cit-proc ,      -- proceedings of a meeting
    patent SET OF Cit-pat }

END

--$Revision: 6.7 $
--**********************************************************************
--
--  NCBI Sequence location and identifier elements
--  by James Ostell, 1990
--
--  Version 3.0 - 1994
--
--**********************************************************************

NCBI-Seqloc DEFINITIONS ::=
BEGIN

EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
        Na-strand, Giimport-id;

IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
        Id-pat FROM NCBI-Biblio
        Feat-id FROM NCBI-Seqfeat;

--*** Sequence identifiers ********************************
--*

Seq-id ::= CHOICE {
    local Object-id ,            -- local use
    gibbsq INTEGER ,             -- Geninfo backbone seqid
    gibbmt INTEGER ,             -- Geninfo backbone moltype
    giim Giimport-id ,           -- Geninfo import id
    genbank Textseq-id ,
    embl Textseq-id ,
    pir Textseq-id ,
    swissprot Textseq-id ,
    patent Patent-seq-id ,
    other Textseq-id ,           -- for historical reasons, 'other' = 'refseq'
    general Dbtag ,              -- for other databases
    gi INTEGER ,                 -- GenInfo Integrated Database
    ddbj Textseq-id ,            -- DDBJ
    prf Textseq-id ,             -- PRF SEQDB
    pdb PDB-seq-id ,             -- PDB sequence
    tpg Textseq-id ,             -- Third Party Annot/Seq Genbank
    tpe Textseq-id ,             -- Third Party Annot/Seq EMBL
    tpd Textseq-id ,             -- Third Party Annot/Seq DDBJ
    gpipe Textseq-id ,           -- Internal NCBI genome pipeline processing ID
    named-annot-track Textseq-id -- Internal named annotation tracking ID
}

Seq-id-set ::= SET OF Seq-id


Patent-seq-id ::= SEQUENCE {
    seqid INTEGER ,         -- number of sequence in patent
    cit Id-pat }           -- patent citation

Textseq-id ::= SEQUENCE {
    name VisibleString OPTIONAL ,
    accession VisibleString OPTIONAL ,
    release VisibleString OPTIONAL ,
    version INTEGER OPTIONAL }

Giimport-id ::= SEQUENCE {
    id INTEGER ,                     -- the id to use here
    db VisibleString OPTIONAL ,      -- dbase used in
    release VisibleString OPTIONAL } -- the release

PDB-seq-id ::= SEQUENCE {            -- must set either chain or chain_id, but not both
      mol PDB-mol-id ,                    -- the molecule name
      chain INTEGER DEFAULT 32 ,          -- Deprecated : For single ASCII character
                                          -- chain identifiers of pre-2015 PDB structures
      rel Date OPTIONAL ,                 -- release date, month and year
      chain-id VisibleString OPTIONAL }   -- chain identifier

PDB-mol-id ::= VisibleString  -- name of mol, 4 chars

--*** Sequence locations **********************************
--*

Seq-loc ::= CHOICE {
    null NULL ,           -- not placed
    empty Seq-id ,        -- to NULL one Seq-id in a collection
    whole Seq-id ,        -- whole sequence
    int Seq-interval ,    -- from to
    packed-int Packed-seqint ,
    pnt Seq-point ,
    packed-pnt Packed-seqpnt ,
    mix Seq-loc-mix ,
    equiv Seq-loc-equiv ,  -- equivalent sets of locations
    bond Seq-bond ,
    feat Feat-id }         -- indirect, through a Seq-feat


Seq-interval ::= SEQUENCE {
    from INTEGER ,
    to INTEGER ,
    strand Na-strand OPTIONAL ,
    id Seq-id ,    -- WARNING: this used to be optional
    fuzz-from Int-fuzz OPTIONAL ,
    fuzz-to Int-fuzz OPTIONAL }

Packed-seqint ::= SEQUENCE OF Seq-interval

Seq-point ::= SEQUENCE {
    point INTEGER ,
    strand Na-strand OPTIONAL ,
    id Seq-id ,     -- WARNING: this used to be optional
    fuzz Int-fuzz OPTIONAL }

Packed-seqpnt ::= SEQUENCE {
    strand Na-strand OPTIONAL ,
    id Seq-id ,
    fuzz Int-fuzz OPTIONAL ,
    points SEQUENCE OF INTEGER }

Na-strand ::= ENUMERATED {          -- strand of nucleic acid
    unknown (0) ,
    plus (1) ,
    minus (2) ,
    both (3) ,                -- in forward orientation
    both-rev (4) ,            -- in reverse orientation
    other (255) }

Seq-bond ::= SEQUENCE {         -- bond between residues
    a Seq-point ,           -- connection to a least one residue
    b Seq-point OPTIONAL }  -- other end may not be available

Seq-loc-mix ::= SEQUENCE OF Seq-loc   -- this will hold anything

Seq-loc-equiv ::= SET OF Seq-loc      -- for a set of equivalent locations

END


--$Revision: 6.26 $
--**********************************************************************
--
--  NCBI Sequence elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-Sequence DEFINITIONS ::=
BEGIN

EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
        Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
        Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap;

IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
        Seq-align FROM NCBI-Seqalign
        Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat
        Seq-graph FROM NCBI-Seqres
        Pub-equiv FROM NCBI-Pub
        Org-ref FROM NCBI-Organism
        BioSource FROM NCBI-BioSource
        Seq-id, Seq-loc FROM NCBI-Seqloc
        GB-block FROM GenBank-General
        PIR-block FROM PIR-General
        EMBL-block FROM EMBL-General
        SP-block FROM SP-General
        PRF-block FROM PRF-General
        PDB-block FROM PDB-General
        Seq-table FROM NCBI-SeqTable;

--*** Sequence ********************************
--*

Bioseq ::= SEQUENCE {
    id SET OF Seq-id ,            -- equivalent identifiers
    descr Seq-descr OPTIONAL , -- descriptors
    inst Seq-inst ,            -- the sequence data
    annot SET OF Seq-annot OPTIONAL }

--*** Descriptors *****************************
--*

Seq-descr ::= SET OF Seqdesc

Seqdesc ::= CHOICE {
    mol-type GIBB-mol ,          -- type of molecule
    modif SET OF GIBB-mod ,             -- modifiers
    method GIBB-method ,         -- sequencing method
    name VisibleString ,         -- a name for this sequence
    title VisibleString ,        -- a title for this sequence
    org Org-ref ,                -- if all from one organism
    comment VisibleString ,      -- a more extensive comment
    num Numbering ,              -- a numbering system
    maploc Dbtag ,               -- map location of this sequence
    pir PIR-block ,              -- PIR specific info
    genbank GB-block ,           -- GenBank specific info
    pub Pubdesc ,                -- a reference to the publication
    region VisibleString ,       -- overall region (globin locus)
    user User-object ,           -- user defined object
    sp SP-block ,                -- SWISSPROT specific info
    dbxref Dbtag ,               -- xref to other databases
    embl EMBL-block ,            -- EMBL specific information
    create-date Date ,           -- date entry first created/released
    update-date Date ,           -- date of last update
    prf PRF-block ,              -- PRF specific information
    pdb PDB-block ,              -- PDB specific information
    het Heterogen ,              -- cofactor, etc associated but not bound
    source BioSource ,           -- source of materials, includes Org-ref
    molinfo MolInfo ,            -- info on the molecule and techniques
    modelev ModelEvidenceSupport -- model evidence for XM records
}

--******* NOTE:
--*       mol-type, modif, method, and org are consolidated and expanded
--*       in Org-ref, BioSource, and MolInfo in this specification. They
--*       will be removed in later specifications. Do not use them in the
--*       the future. Instead expect the new structures.
--*
--***************************

--********************************************************************
--
-- MolInfo gives information on the
-- classification of the type and quality of the sequence
--
-- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
--
--********************************************************************

MolInfo ::= SEQUENCE {
    biomol INTEGER {
        unknown (0) ,
        genomic (1) ,
        pre-RNA (2) ,              -- precursor RNA of any sort really
        mRNA (3) ,
        rRNA (4) ,
        tRNA (5) ,
        snRNA (6) ,
        scRNA (7) ,
        peptide (8) ,
        other-genetic (9) ,      -- other genetic material
        genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
        cRNA (11) ,              -- viral RNA genome copy intermediate
        snoRNA (12) ,            -- small nucleolar RNA
        transcribed-RNA (13) ,   -- transcribed RNA other than existing classes
        ncRNA (14) ,
        tmRNA (15) ,
        other (255) } DEFAULT unknown ,
    tech INTEGER {
        unknown (0) ,
        standard (1) ,          -- standard sequencing
        est (2) ,               -- Expressed Sequence Tag
        sts (3) ,               -- Sequence Tagged Site
        survey (4) ,            -- one-pass genomic sequence
        genemap (5) ,           -- from genetic mapping techniques
        physmap (6) ,           -- from physical mapping techniques
        derived (7) ,           -- derived from other data, not a primary entity
        concept-trans (8) ,     -- conceptual translation
        seq-pept (9) ,          -- peptide was sequenced
        both (10) ,             -- concept transl. w/ partial pept. seq.
        seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
        seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
        concept-trans-a (13) ,  -- conceptual transl. supplied by author
        htgs-1 (14) ,           -- unordered High Throughput sequence contig
        htgs-2 (15) ,           -- ordered High Throughput sequence contig
        htgs-3 (16) ,           -- finished High Throughput sequence
        fli-cdna (17) ,         -- full length insert cDNA
        htgs-0 (18) ,           -- single genomic reads for coordination
        htc (19) ,              -- high throughput cDNA
        wgs (20) ,              -- whole genome shotgun sequencing
        barcode (21) ,          -- barcode of life project
        composite-wgs-htgs (22) , -- composite of WGS and HTGS
        tsa (23) ,              -- transcriptome shotgun assembly
        targeted (24) ,         -- targeted locus sets/studies
        other (255) }           -- use Source.techexp
               DEFAULT unknown ,
    techexp VisibleString OPTIONAL ,   -- explanation if tech not enough
    --
    -- Completeness is not indicated in most records.  For genomes, assume
    -- the sequences are incomplete unless specifically marked as complete.
    -- For mRNAs, assume the ends are not known exactly unless marked as
    -- having the left or right end.
    --
    completeness INTEGER {
      unknown (0) ,
      complete (1) ,                   -- complete biological entity
      partial (2) ,                    -- partial but no details given
      no-left (3) ,                    -- missing 5' or NH3 end
      no-right (4) ,                   -- missing 3' or COOH end
      no-ends (5) ,                    -- missing both ends
      has-left (6) ,                   -- 5' or NH3 end present
      has-right (7) ,                  -- 3' or COOH end present
      other (255) } DEFAULT unknown ,
    gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA


GIBB-mol ::= ENUMERATED {       -- type of molecule represented
    unknown (0) ,
    genomic (1) ,
    pre-mRNA (2) ,              -- precursor RNA of any sort really
    mRNA (3) ,
    rRNA (4) ,
    tRNA (5) ,
    snRNA (6) ,
    scRNA (7) ,
    peptide (8) ,
    other-genetic (9) ,      -- other genetic material
    genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
    other (255) }

GIBB-mod ::= ENUMERATED {        -- GenInfo Backbone modifiers
    dna (0) ,
    rna (1) ,
    extrachrom (2) ,
    plasmid (3) ,
    mitochondrial (4) ,
    chloroplast (5) ,
    kinetoplast (6) ,
    cyanelle (7) ,
    synthetic (8) ,
    recombinant (9) ,
    partial (10) ,
    complete (11) ,
    mutagen (12) ,    -- subject of mutagenesis ?
    natmut (13) ,     -- natural mutant ?
    transposon (14) ,
    insertion-seq (15) ,
    no-left (16) ,    -- missing left end (5' for na, NH2 for aa)
    no-right (17) ,   -- missing right end (3' or COOH)
    macronuclear (18) ,
    proviral (19) ,
    est (20) ,        -- expressed sequence tag
    sts (21) ,        -- sequence tagged site
    survey (22) ,     -- one pass survey sequence
    chromoplast (23) ,
    genemap (24) ,    -- is a genetic map
    restmap (25) ,    -- is an ordered restriction map
    physmap (26) ,    -- is a physical map (not ordered restriction map)
    other (255) }

GIBB-method ::= ENUMERATED {        -- sequencing methods
    concept-trans (1) ,    -- conceptual translation
    seq-pept (2) ,         -- peptide was sequenced
    both (3) ,             -- concept transl. w/ partial pept. seq.
    seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
    seq-pept-homol (5) ,   -- sequenced peptide, ordered by homology
    concept-trans-a (6) ,  -- conceptual transl. supplied by author
    other (255) }

Numbering ::= CHOICE {           -- any display numbering system
    cont Num-cont ,              -- continuous numbering
    enum Num-enum ,              -- enumerated names for residues
    ref Num-ref ,                -- by reference to another sequence
    real Num-real }              -- supports mapping to a float system

Num-cont ::= SEQUENCE {          -- continuous display numbering system
    refnum INTEGER DEFAULT 1,         -- number assigned to first residue
    has-zero BOOLEAN DEFAULT FALSE ,  -- 0 used?
    ascending BOOLEAN DEFAULT TRUE }  -- ascending numbers?

Num-enum ::= SEQUENCE {          -- any tags to residues
    num INTEGER ,                        -- number of tags to follow
    names SEQUENCE OF VisibleString }    -- the tags

Num-ref ::= SEQUENCE {           -- by reference to other sequences
    type ENUMERATED {            -- type of reference
        not-set (0) ,
        sources (1) ,            -- by segmented or const seq sources
        aligns (2) } ,           -- by alignments given below
    aligns Seq-align OPTIONAL }

Num-real ::= SEQUENCE {          -- mapping to floating point system
    a REAL ,                     -- from an integer system used by Bioseq
    b REAL ,                     -- position = (a * int_position) + b
    units VisibleString OPTIONAL }

Pubdesc ::= SEQUENCE {              -- how sequence presented in pub
    pub Pub-equiv ,                 -- the citation(s)
    name VisibleString OPTIONAL ,   -- name used in paper
    fig VisibleString OPTIONAL ,    -- figure in paper
    num Numbering OPTIONAL ,        -- numbering from paper
    numexc BOOLEAN OPTIONAL ,       -- numbering problem with paper
    poly-a BOOLEAN OPTIONAL ,       -- poly A tail indicated in figure?
    maploc VisibleString OPTIONAL , -- map location reported in paper
    seq-raw StringStore OPTIONAL ,  -- original sequence from paper
    align-group INTEGER OPTIONAL ,  -- this seq aligned with others in paper
    comment VisibleString OPTIONAL, -- any comment on this pub in context
    reftype INTEGER {           -- type of reference in a GenBank record
        seq (0) ,               -- refers to sequence
        sites (1) ,             -- refers to unspecified features
        feats (2) ,             -- refers to specified features
        no-target (3) }         -- nothing specified (EMBL)
        DEFAULT seq }

Heterogen ::= VisibleString       -- cofactor, prosthetic group, inhibitor, etc

--*** Instances of sequences *******************************
--*

Seq-inst ::= SEQUENCE {            -- the sequence data itself
    repr ENUMERATED {              -- representation class
        not-set (0) ,              -- empty
        virtual (1) ,              -- no seq data
        raw (2) ,                  -- continuous sequence
        seg (3) ,                  -- segmented sequence
        const (4) ,                -- constructed sequence
        ref (5) ,                  -- reference to another sequence
        consen (6) ,               -- consensus sequence or pattern
        map (7) ,                  -- ordered map of any kind
        delta (8) ,              -- sequence made by changes (delta) to others
        other (255) } ,
    mol ENUMERATED {               -- molecule class in living organism
        not-set (0) ,              --   > cdna = rna
        dna (1) ,
        rna (2) ,
        aa (3) ,
        na (4) ,                   -- just a nucleic acid
        other (255) } ,
    length INTEGER OPTIONAL ,      -- length of sequence in residues
    fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
    topology ENUMERATED {          -- topology of molecule
        not-set (0) ,
        linear (1) ,
        circular (2) ,
        tandem (3) ,               -- some part of tandem repeat
        other (255) } DEFAULT linear ,
    strand ENUMERATED {            -- strandedness in living organism
        not-set (0) ,
        ss (1) ,                   -- single strand
        ds (2) ,                   -- double strand
        mixed (3) ,
        other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
    seq-data Seq-data OPTIONAL ,   -- the sequence
    ext Seq-ext OPTIONAL ,         -- extensions for special types
    hist Seq-hist OPTIONAL }       -- sequence history

--*** Sequence Extensions **********************************
--*  for representing more complex types
--*  const type uses Seq-hist.assembly

Seq-ext ::= CHOICE {
    seg Seg-ext ,        -- segmented sequences
    ref Ref-ext ,        -- hot link to another sequence (a view)
    map Map-ext ,        -- ordered map of markers
    delta Delta-ext }

Seg-ext ::= SEQUENCE OF Seq-loc

Ref-ext ::= Seq-loc

Map-ext ::= SEQUENCE OF Seq-feat

Delta-ext ::= SEQUENCE OF Delta-seq

Delta-seq ::= CHOICE {
    loc Seq-loc ,       -- point to a sequence
    literal Seq-literal }   -- a piece of sequence

Seq-literal ::= SEQUENCE {
    length INTEGER ,         -- must give a length in residues
    fuzz Int-fuzz OPTIONAL , -- could be unsure
    seq-data Seq-data OPTIONAL } -- may have the data

--*** Sequence History Record ***********************************
--** assembly = records how seq was assembled from others
--** replaces = records sequences made obsolete by this one
--** replaced-by = this seq is made obsolete by another(s)

Seq-hist ::= SEQUENCE {
    assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
    replaces Seq-hist-rec OPTIONAL ,    -- seq makes these seqs obsolete
    replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
    deleted CHOICE {
        bool BOOLEAN ,
        date Date } OPTIONAL }

Seq-hist-rec ::= SEQUENCE {
    date Date OPTIONAL ,
    ids SET OF Seq-id }

--*** Various internal sequence representations ************
--*      all are controlled, fixed length forms

Seq-data ::= CHOICE {              -- sequence representations
    iupacna IUPACna ,              -- IUPAC 1 letter nuc acid code
    iupacaa IUPACaa ,              -- IUPAC 1 letter amino acid code
    ncbi2na NCBI2na ,              -- 2 bit nucleic acid code
    ncbi4na NCBI4na ,              -- 4 bit nucleic acid code
    ncbi8na NCBI8na ,              -- 8 bit extended nucleic acid code
    ncbipna NCBIpna ,              -- nucleic acid probabilities
    ncbi8aa NCBI8aa ,              -- 8 bit extended amino acid codes
    ncbieaa NCBIeaa ,              -- extended ASCII 1 letter aa codes
    ncbipaa NCBIpaa ,              -- amino acid probabilities
    ncbistdaa NCBIstdaa,           -- consecutive codes for std aas
    gap Seq-gap                    -- gap types
}

Seq-gap ::= SEQUENCE {
    type INTEGER {
        unknown(0),
        fragment(1),               -- Deprecated. Used only for AGP 1.1
        clone(2),                  -- Deprecated. Used only for AGP 1.1
        short-arm(3),
        heterochromatin(4),
        centromere(5),
        telomere(6),
        repeat(7),
        contig(8),
        scaffold(9),
        other(255)
    },
    linkage INTEGER {
        unlinked(0),
        linked(1),
        other(255)
    } OPTIONAL,
    linkage-evidence SET OF Linkage-evidence OPTIONAL
}

Linkage-evidence ::= SEQUENCE {
    type INTEGER {
        paired-ends(0),
        align-genus(1),
        align-xgenus(2),
        align-trnscpt(3),
        within-clone(4),
        clone-contig(5),
        map(6),
        strobe(7),
        unspecified(8),
        pcr(9),
        other(255)
    }
}

IUPACna ::= StringStore       -- IUPAC 1 letter codes, no spaces
IUPACaa ::= StringStore       -- IUPAC 1 letter codes, no spaces
NCBI2na ::= OCTET STRING      -- 00=A, 01=C, 10=G, 11=T
NCBI4na ::= OCTET STRING      -- 1 bit each for agct
                              -- 0001=A, 0010=C, 0100=G, 1000=T/U
                              -- 0101=Purine, 1010=Pyrimidine, etc
NCBI8na ::= OCTET STRING      -- for modified nucleic acids
NCBIpna ::= OCTET STRING      -- 5 octets/base, prob for a,c,g,t,n
                              -- probabilities are coded 0-255 = 0.0-1.0
NCBI8aa ::= OCTET STRING      -- for modified amino acids
NCBIeaa ::= StringStore       -- ASCII extended 1 letter aa codes
                              -- IUPAC codes + U=selenocysteine
NCBIpaa ::= OCTET STRING      -- 25 octets/aa, prob for IUPAC aas in order:
                              -- A-Y,B,Z,X,(ter),anything
                              -- probabilities are coded 0-255 = 0.0-1.0
NCBIstdaa ::= OCTET STRING    -- codes 0-25, 1 per byte

--*** Sequence Annotation *************************************
--*

-- This is a replica of Textseq-id
-- This is specific for annotations, and exists to maintain a semantic
-- difference between IDs assigned to annotations and IDs assigned to
-- sequences
Textannot-id ::= SEQUENCE {
    name	  VisibleString OPTIONAL ,
    accession VisibleString OPTIONAL ,
    release   VisibleString OPTIONAL ,
    version   INTEGER       OPTIONAL
}

Annot-id ::= CHOICE {
    local Object-id ,
    ncbi INTEGER ,
    general Dbtag,
    other Textannot-id
}

Annot-descr ::= SET OF Annotdesc

Annotdesc ::= CHOICE {
    name VisibleString ,         -- a short name for this collection
    title VisibleString ,        -- a title for this collection
    comment VisibleString ,      -- a more extensive comment
    pub Pubdesc ,                -- a reference to the publication
    user User-object ,           -- user defined object
    create-date Date ,           -- date entry first created/released
    update-date Date ,           -- date of last update
    src Seq-id ,                 -- source sequence from which annot came
    align Align-def,             -- definition of the SeqAligns
    region Seq-loc }             -- all contents cover this region

Align-def ::= SEQUENCE {
    align-type INTEGER {         -- class of align Seq-annot
      ref (1) ,                  -- set of alignments to the same sequence
      alt (2) ,                  -- set of alternate alignments of the same seqs
      blocks (3) ,               -- set of aligned blocks in the same seqs
      other (255) } ,
    ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now

Seq-annot ::= SEQUENCE {
    id SET OF Annot-id OPTIONAL ,
    db INTEGER {                 -- source of annotation
        genbank (1) ,
        embl (2) ,
        ddbj (3) ,
        pir  (4) ,
        sp   (5) ,
        bbone (6) ,
        pdb   (7) ,
        other (255) } OPTIONAL ,
    name VisibleString OPTIONAL ,-- source if "other" above
    desc Annot-descr OPTIONAL ,  -- used only for stand alone Seq-annots
    data CHOICE {
        ftable SET OF Seq-feat ,
        align SET OF Seq-align ,
        graph SET OF Seq-graph ,
        ids SET OF Seq-id ,      -- used for communication between tools
        locs SET OF Seq-loc ,    -- used for communication between tools
        seq-table Seq-table } }  -- features in table form

END


--$Revision: 6.6 $
--**********************************************************************
--
--  NCBI Sequence Collections
--  by James Ostell, 1990
--
--  Version 3.0 - 1994
--
--**********************************************************************

NCBI-Seqset DEFINITIONS ::=
BEGIN

EXPORTS Bioseq-set, Seq-entry;

IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
        Object-id, Dbtag, Date FROM NCBI-General;

--*** Sequence Collections ********************************
--*

Bioseq-set ::= SEQUENCE {      -- just a collection
    id Object-id OPTIONAL ,
    coll Dbtag OPTIONAL ,          -- to identify a collection
    level INTEGER OPTIONAL ,       -- nesting level
    class ENUMERATED {
        not-set (0) ,
        nuc-prot (1) ,              -- nuc acid and coded proteins
        segset (2) ,                -- segmented sequence + parts
        conset (3) ,                -- constructed sequence + parts
        parts (4) ,                 -- parts for 2 or 3
        gibb (5) ,                  -- geninfo backbone
        gi (6) ,                    -- geninfo
        genbank (7) ,               -- converted genbank
        pir (8) ,                   -- converted pir
        pub-set (9) ,               -- all the seqs from a single publication
        equiv (10) ,                -- a set of equivalent maps or seqs
        swissprot (11) ,            -- converted SWISSPROT
        pdb-entry (12) ,            -- a complete PDB entry
        mut-set (13) ,              -- set of mutations
        pop-set (14) ,              -- population study
        phy-set (15) ,              -- phylogenetic study
        eco-set (16) ,              -- ecological sample study
        gen-prod-set (17) ,         -- genomic products, chrom+mRNA+protein
        wgs-set (18) ,              -- whole genome shotgun project
        named-annot (19) ,          -- named annotation set
        named-annot-prod (20) ,     -- with instantiated mRNA+protein
        read-set (21) ,             -- set from a single read
        paired-end-reads (22) ,     -- paired sequences within a read-set
        small-genome-set (23) ,     -- viral segments or mitochondrial minicircles
        other (255) } DEFAULT not-set ,
    release VisibleString OPTIONAL ,
    date Date OPTIONAL ,
    descr Seq-descr OPTIONAL ,
    seq-set SEQUENCE OF Seq-entry ,
    annot SET OF Seq-annot OPTIONAL }

Seq-entry ::= CHOICE {
        seq Bioseq ,
        set Bioseq-set }

END

--$Revision: 6.0 $
--  *********************************************************************
--
--  These are code and conversion tables for NCBI sequence codes
--  ASN.1 for the sequences themselves are define in seq.asn
--
--  Seq-map-table and Seq-code-table REQUIRE that codes start with 0
--    and increase continuously.  So IUPAC codes, which are upper case
--    letters will always have 65 0 cells before the codes begin.  This
--    allows all codes to do indexed lookups for things
--
--  Valid names for code tables are:
--    IUPACna
--    IUPACaa
--    IUPACeaa
--    IUPACaa3     3 letter amino acid codes : parallels IUPACeaa
--                   display only, not a data exchange type
--    NCBI2na
--    NCBI4na
--    NCBI8na
--    NCBI8aa
--    NCBIstdaa
--     probability types map to IUPAC types for display as characters

NCBI-SeqCode DEFINITIONS ::=
BEGIN

EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;

Seq-code-type ::= ENUMERATED {              -- sequence representations
    iupacna (1) ,              -- IUPAC 1 letter nuc acid code
    iupacaa (2) ,              -- IUPAC 1 letter amino acid code
    ncbi2na (3) ,              -- 2 bit nucleic acid code
    ncbi4na (4) ,              -- 4 bit nucleic acid code
    ncbi8na (5) ,              -- 8 bit extended nucleic acid code
    ncbipna (6) ,              -- nucleic acid probabilities
    ncbi8aa (7) ,              -- 8 bit extended amino acid codes
    ncbieaa (8) ,              -- extended ASCII 1 letter aa codes
    ncbipaa (9) ,              -- amino acid probabilities
    iupacaa3 (10) ,            -- 3 letter code only for display
    ncbistdaa (11) }           -- consecutive codes for std aas, 0-25

Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings
    from Seq-code-type ,      -- code to map from
    to Seq-code-type ,        -- code to map to
    num INTEGER ,             -- number of rows in table
    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
    table SEQUENCE OF INTEGER }  -- table of values, in from-to order

Seq-code-table ::= SEQUENCE { -- for names of coded values
    code Seq-code-type ,      -- name of code
    num INTEGER ,             -- number of rows in table
    one-letter BOOLEAN ,   -- symbol is ALWAYS 1 letter?
    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
    table SEQUENCE OF
        SEQUENCE {
            symbol VisibleString ,      -- the printed symbol or letter
            name VisibleString } ,      -- an explanatory name or string
    comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid

Seq-code-set ::= SEQUENCE {    -- for distribution
    codes SET OF Seq-code-table OPTIONAL ,
    maps SET OF Seq-map-table OPTIONAL }

END

--$Revision: 6.0 $
--*********************************************************************
--
-- 1990 - J.Ostell
-- Version 3.0 - June 1994
--
--*********************************************************************
--*********************************************************************
--
--  EMBL specific data
--  This block of specifications was developed by Reiner Fuchs of EMBL
--  Updated by J.Ostell, 1994
--
--*********************************************************************

EMBL-General DEFINITIONS ::=
BEGIN

EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;

IMPORTS Date, Object-id FROM NCBI-General;

EMBL-dbname ::= CHOICE {
    code ENUMERATED {
        embl(0),
        genbank(1),
        ddbj(2),
        geninfo(3),
        medline(4),
        swissprot(5),
        pir(6),
        pdb(7),
        epd(8),
        ecd(9),
        tfd(10),
        flybase(11),
        prosite(12),
        enzyme(13),
        mim(14),
        ecoseq(15),
        hiv(16) ,
        other (255) } ,
    name    VisibleString }

EMBL-xref ::= SEQUENCE {
    dbname EMBL-dbname,
    id SEQUENCE OF Object-id }

EMBL-block ::= SEQUENCE {
    class ENUMERATED {
        not-set(0),
        standard(1),
        unannotated(2),
        other(255) } DEFAULT standard,
    div ENUMERATED {
        fun(0),
        inv(1),
        mam(2),
        org(3),
        phg(4),
        pln(5),
        pri(6),
        pro(7),
        rod(8),
        syn(9),
        una(10),
        vrl(11),
        vrt(12),
        pat(13),
        est(14),
        sts(15),
        other (255) } OPTIONAL,
    creation-date Date,
    update-date Date,
    extra-acc SEQUENCE OF VisibleString OPTIONAL,
    keywords SEQUENCE OF VisibleString OPTIONAL,
    xref SEQUENCE OF EMBL-xref OPTIONAL }

END

--*********************************************************************
--
--  SWISSPROT specific data
--  This block of specifications was developed by Mark Cavanaugh of
--      NCBI working with Amos Bairoch of SWISSPROT
--
--*********************************************************************

SP-General DEFINITIONS ::=
BEGIN

EXPORTS SP-block;

IMPORTS Date, Dbtag FROM NCBI-General
        Seq-id FROM NCBI-Seqloc;

SP-block ::= SEQUENCE {         -- SWISSPROT specific descriptions
    class ENUMERATED {
        not-set (0) ,
        standard (1) ,      -- conforms to all SWISSPROT checks
        prelim (2) ,        -- only seq and biblio checked
        other (255) } ,
    extra-acc SET OF VisibleString OPTIONAL ,  -- old SWISSPROT ids
    imeth BOOLEAN DEFAULT FALSE ,  -- seq known to start with Met
    plasnm SET OF VisibleString OPTIONAL,  -- plasmid names carrying gene
    seqref SET OF Seq-id OPTIONAL,         -- xref to other sequences
    dbref SET OF Dbtag OPTIONAL ,          -- xref to non-sequence dbases
    keywords SET OF VisibleString OPTIONAL , -- keywords
    created Date OPTIONAL ,         -- creation date
    sequpd Date OPTIONAL ,          -- sequence update
    annotupd Date OPTIONAL }        -- annotation update

END

--*********************************************************************
--
--  PIR specific data
--  This block of specifications was developed by Jim Ostell of
--      NCBI
--
--*********************************************************************

PIR-General DEFINITIONS ::=
BEGIN

EXPORTS PIR-block;

IMPORTS Seq-id FROM NCBI-Seqloc;

PIR-block ::= SEQUENCE {          -- PIR specific descriptions
    had-punct BOOLEAN OPTIONAL ,      -- had punctuation in sequence ?
    host VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,     -- source line
    summary VisibleString OPTIONAL ,
    genetic VisibleString OPTIONAL ,
    includes VisibleString OPTIONAL ,
    placement VisibleString OPTIONAL ,
    superfamily VisibleString OPTIONAL ,
    keywords SEQUENCE OF VisibleString OPTIONAL ,
    cross-reference VisibleString OPTIONAL ,
    date VisibleString OPTIONAL ,
    seq-raw VisibleString OPTIONAL ,  -- seq with punctuation
    seqref SET OF Seq-id OPTIONAL }         -- xref to other sequences

END

--*********************************************************************
--
--  GenBank specific data
--  This block of specifications was developed by Jim Ostell of
--      NCBI
--
--*********************************************************************

GenBank-General DEFINITIONS ::=
BEGIN

EXPORTS GB-block;

IMPORTS Date FROM NCBI-General;

GB-block ::= SEQUENCE {          -- GenBank specific descriptions
    extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,     -- source line
    keywords SEQUENCE OF VisibleString OPTIONAL ,
    origin VisibleString OPTIONAL,
    date VisibleString OPTIONAL ,       -- OBSOLETE old form Entry Date
    entry-date Date OPTIONAL ,          -- replaces date
    div VisibleString OPTIONAL ,        -- GenBank division
    taxonomy VisibleString OPTIONAL }   -- continuation line of organism

END

--**********************************************************************
-- PRF specific definition
--    PRF is a protein sequence database crated and maintained by
--    Protein Research Foundation, Minoo-city, Osaka, Japan.
--
--    Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
--            Kyoto Univ., Japan
--
--**********************************************************************

PRF-General DEFINITIONS ::=
BEGIN

EXPORTS PRF-block;

PRF-block ::= SEQUENCE {
      extra-src       PRF-ExtraSrc OPTIONAL,
      keywords        SEQUENCE OF VisibleString OPTIONAL
}

PRF-ExtraSrc ::= SEQUENCE {
      host    VisibleString OPTIONAL,
      part    VisibleString OPTIONAL,
      state   VisibleString OPTIONAL,
      strain  VisibleString OPTIONAL,
      taxon   VisibleString OPTIONAL
}

END

--*********************************************************************
--
--  PDB specific data
--  This block of specifications was developed by Jim Ostell and
--      Steve Bryant of NCBI
--
--*********************************************************************

PDB-General DEFINITIONS ::=
BEGIN

EXPORTS PDB-block;

IMPORTS Date FROM NCBI-General;

PDB-block ::= SEQUENCE {          -- PDB specific descriptions
    deposition Date ,         -- deposition date  month,year
    class VisibleString ,
    compound SEQUENCE OF VisibleString ,
    source SEQUENCE OF VisibleString ,
    exp-method VisibleString OPTIONAL ,  -- present if NOT X-ray diffraction
    replace PDB-replace OPTIONAL } -- replacement history

PDB-replace ::= SEQUENCE {
    date Date ,
    ids SEQUENCE OF VisibleString }   -- entry ids replace by this one

END

--$Revision: 6.53 $
--**********************************************************************
--
--  NCBI Sequence Feature elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-Seqfeat DEFINITIONS ::=
BEGIN

EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport;

IMPORTS Gene-ref FROM NCBI-Gene
        Prot-ref FROM NCBI-Protein
        Org-ref FROM NCBI-Organism
        Variation-ref FROM NCBI-Variation
        BioSource FROM NCBI-BioSource
        RNA-ref FROM NCBI-RNA
        Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc
        Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
        Rsite-ref FROM NCBI-Rsite
        Txinit FROM NCBI-TxInit
        DOI, PubMedId FROM NCBI-Biblio
        Pub-set FROM NCBI-Pub
        Object-id, Dbtag, User-object FROM NCBI-General;

--*** Feature identifiers ********************************
--*

Feat-id ::= CHOICE {
    gibb INTEGER ,            -- geninfo backbone
    giim Giimport-id ,        -- geninfo import
    local Object-id ,         -- for local software use
    general Dbtag }           -- for use by various databases

--*** Seq-feat *******************************************
--*  sequence feature generalization

Seq-feat ::= SEQUENCE {
    id Feat-id OPTIONAL ,
    data SeqFeatData ,           -- the specific data
    partial BOOLEAN OPTIONAL ,    -- incomplete in some way?
    except BOOLEAN OPTIONAL ,     -- something funny about this?
    comment VisibleString OPTIONAL ,
    product Seq-loc OPTIONAL ,    -- product of process
    location Seq-loc ,            -- feature made from
    qual SEQUENCE OF Gb-qual OPTIONAL ,  -- qualifiers
    title VisibleString OPTIONAL ,   -- for user defined label
    ext User-object OPTIONAL ,    -- user defined structure extension
    cit Pub-set OPTIONAL ,        -- citations for this feature
    exp-ev ENUMERATED {           -- evidence for existence of feature
        experimental (1) ,        -- any reasonable experimental check
        not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
    xref SET OF SeqFeatXref OPTIONAL ,   -- cite other relevant features
    dbxref SET OF Dbtag OPTIONAL ,  -- support for xref to other databases
    pseudo BOOLEAN OPTIONAL ,     -- annotated on pseudogene?
    except-text VisibleString OPTIONAL , -- explain if except=TRUE
    ids SET OF Feat-id OPTIONAL ,       -- set of Ids; will replace 'id' field
    exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field
    support SeqFeatSupport OPTIONAL  -- will replace /experiment, /inference, model-evidence
}

SeqFeatData ::= CHOICE {
    gene Gene-ref ,
    org Org-ref ,
    cdregion Cdregion ,
    prot Prot-ref ,
    rna RNA-ref ,
    pub Pubdesc ,              -- publication applies to this seq
    seq Seq-loc ,              -- to annotate origin from another seq
    imp Imp-feat ,
    region VisibleString,      -- named region (globin locus)
    comment NULL ,             -- just a comment
    bond ENUMERATED {
        disulfide (1) ,
        thiolester (2) ,
        xlink (3) ,
        thioether (4) ,
        other (255) } ,
    site ENUMERATED {
        active (1) ,
        binding (2) ,
        cleavage (3) ,
        inhibit (4) ,
        modified (5),
        glycosylation (6) ,
        myristoylation (7) ,
        mutagenized (8) ,
        metal-binding (9) ,
        phosphorylation (10) ,
        acetylation (11) ,
        amidation (12) ,
        methylation (13) ,
        hydroxylation (14) ,
        sulfatation (15) ,
        oxidative-deamination (16) ,
        pyrrolidone-carboxylic-acid (17) ,
        gamma-carboxyglutamic-acid (18) ,
        blocked (19) ,
        lipid-binding (20) ,
        np-binding (21) ,
        dna-binding (22) ,
        signal-peptide (23) ,
        transit-peptide (24) ,
        transmembrane-region (25) ,
        nitrosylation (26) ,
        other (255) } ,
    rsite Rsite-ref ,       -- restriction site  (for maps really)
    user User-object ,      -- user defined structure
    txinit Txinit ,         -- transcription initiation
    num Numbering ,         -- a numbering system
    psec-str ENUMERATED {   -- protein secondary structure
        helix (1) ,         -- any helix
        sheet (2) ,         -- beta sheet
        turn  (3) } ,       -- beta or gamma turn
    non-std-residue VisibleString ,  -- non-standard residue here in seq
    het Heterogen ,         -- cofactor, prosthetic grp, etc, bound to seq
    biosrc BioSource,
    clone Clone-ref,
    variation Variation-ref
}

SeqFeatXref ::= SEQUENCE {       -- both optional because can have one or both
    id Feat-id OPTIONAL ,        -- the feature copied
    data SeqFeatData OPTIONAL }  -- the specific data

SeqFeatSupport ::= SEQUENCE {
  experiment SET OF ExperimentSupport OPTIONAL ,
  inference SET OF InferenceSupport OPTIONAL ,
  model-evidence SET OF ModelEvidenceSupport OPTIONAL
}

EvidenceCategory ::= INTEGER {
  not-set (0) ,
  coordinates (1) ,
  description (2) ,
  existence (3)
}

ExperimentSupport ::= SEQUENCE {
  category EvidenceCategory OPTIONAL ,
  explanation VisibleString ,
  pmids SET OF PubMedId OPTIONAL ,
  dois SET OF DOI OPTIONAL
}

Program-id ::= SEQUENCE {
  name VisibleString ,
  version VisibleString OPTIONAL
}

EvidenceBasis ::= SEQUENCE {
  programs SET OF Program-id OPTIONAL ,
  accessions SET OF Seq-id OPTIONAL
}

InferenceSupport ::= SEQUENCE {
  category EvidenceCategory OPTIONAL ,
  type INTEGER {
    not-set (0) ,
    similar-to-sequence (1) ,
    similar-to-aa (2) ,
    similar-to-dna (3) ,
    similar-to-rna (4) ,
    similar-to-mrna (5) ,
    similiar-to-est (6) ,
    similar-to-other-rna (7) ,
    profile (8) ,
    nucleotide-motif (9) ,
    protein-motif (10) ,
    ab-initio-prediction (11) ,
    alignment (12) ,
    other (255)
  } DEFAULT not-set ,
  other-type VisibleString OPTIONAL ,
  same-species BOOLEAN DEFAULT FALSE ,
  basis EvidenceBasis ,
  pmids SET OF PubMedId OPTIONAL ,
  dois SET OF DOI OPTIONAL
}

ModelEvidenceItem ::= SEQUENCE {
  id Seq-id ,
  exon-count INTEGER OPTIONAL ,
  exon-length INTEGER OPTIONAL ,
  full-length BOOLEAN DEFAULT FALSE ,
  supports-all-exon-combo BOOLEAN DEFAULT FALSE
}

ModelEvidenceSupport ::= SEQUENCE {
  method VisibleString OPTIONAL ,
  mrna SET OF ModelEvidenceItem OPTIONAL ,
  est SET OF ModelEvidenceItem OPTIONAL ,
  protein SET OF ModelEvidenceItem OPTIONAL ,
  identification Seq-id OPTIONAL ,
  dbxref SET OF Dbtag OPTIONAL ,
  exon-count INTEGER OPTIONAL ,
  exon-length INTEGER OPTIONAL ,
  full-length BOOLEAN DEFAULT FALSE ,
  supports-all-exon-combo BOOLEAN DEFAULT FALSE
}

--*** CdRegion ***********************************************
--*
--*  Instructions to translate from a nucleic acid to a peptide
--*    conflict means it's supposed to translate but doesn't
--*


Cdregion ::= SEQUENCE {
    orf BOOLEAN OPTIONAL ,             -- just an ORF ?
    frame ENUMERATED {
        not-set (0) ,                  -- not set, code uses one
        one (1) ,
        two (2) ,
        three (3) } DEFAULT not-set ,      -- reading frame
    conflict BOOLEAN OPTIONAL ,        -- conflict
    gaps INTEGER OPTIONAL ,            -- number of gaps on conflict/except
    mismatch INTEGER OPTIONAL ,        -- number of mismatches on above
    code Genetic-code OPTIONAL ,       -- genetic code used
    code-break SEQUENCE OF Code-break OPTIONAL ,   -- individual exceptions
    stops INTEGER OPTIONAL }           -- number of stop codons on above

                    -- each code is 64 cells long, in the order where
                    -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
                    -- NOTE: this order does NOT correspond to a Seq-data
                    -- encoding.  It is "natural" to codon usage instead.
                    -- the value in each cell is the AA coded for
                    -- start= AA coded only if first in peptide
                    --   in start array, if codon is not a legitimate start
                    --   codon, that cell will have the "gap" symbol for
                    --   that alphabet.  Otherwise it will have the AA
                    --   encoded when that codon is used at the start.

Genetic-code ::= SET OF CHOICE {
    name VisibleString ,               -- name of a code
    id INTEGER ,                       -- id in dbase
    ncbieaa VisibleString ,            -- indexed to IUPAC extended
    ncbi8aa OCTET STRING ,             -- indexed to NCBI8aa
    ncbistdaa OCTET STRING ,           -- indexed to NCBIstdaa
    sncbieaa VisibleString ,            -- start, indexed to IUPAC extended
    sncbi8aa OCTET STRING ,             -- start, indexed to NCBI8aa
    sncbistdaa OCTET STRING }           -- start, indexed to NCBIstdaa

Code-break ::= SEQUENCE {              -- specific codon exceptions
    loc Seq-loc ,                      -- location of exception
    aa CHOICE {                        -- the amino acid
        ncbieaa INTEGER ,              -- ASCII value of NCBIeaa code
        ncbi8aa INTEGER ,              -- NCBI8aa code
        ncbistdaa INTEGER } }           -- NCBIstdaa code

Genetic-code-table ::= SET OF Genetic-code     -- table of genetic codes

--*** Import ***********************************************
--*
--*  Features imported from other databases
--*

Imp-feat ::= SEQUENCE {
    key VisibleString ,
    loc VisibleString OPTIONAL ,         -- original location string
    descr VisibleString OPTIONAL }       -- text description

Gb-qual ::= SEQUENCE {
    qual VisibleString ,
    val VisibleString }


--*** Clone-ref ***********************************************
--*
--*  Specification of clone features
--*

Clone-ref ::= SEQUENCE {
    name VisibleString,        -- Official clone symbol
    library VisibleString OPTIONAL,     -- Library name

    concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
    unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
    placement-method INTEGER {
        end-seq (0),           -- Clone placed by end sequence
        insert-alignment (1),  -- Clone placed by insert alignment
        sts (2),               -- Clone placed by STS
        fish (3),
        fingerprint (4),
        end-seq-insert-alignment (5), -- combined end-seq and insert align
        external (253),           -- Placement provided externally
        curated (254),            -- Human placed or approved
        other (255)
    } OPTIONAL,
    clone-seq Clone-seq-set OPTIONAL
}

Clone-seq-set ::= SET OF Clone-seq


Clone-seq ::= SEQUENCE {
    type INTEGER {
        insert (0),
        end (1),
        other (255)
    },
    confidence INTEGER {
        multiple (0),        -- Multiple hits
        na (1),              -- Unspecified
        nohit-rep (2),       -- No hits, end flagged repetitive
        nohitnorep (3),      -- No hits, end not flagged repetitive
        other-chrm (4),      -- Hit on different chromosome
        unique (5),
        virtual (6),         -- Virtual (hasn't been sequenced)
        multiple-rep (7),    -- Multiple hits, end flagged repetitive
        multiplenorep (8),   -- Multiple hits, end not flagged repetitive
        no-hit (9),          -- No hits
        other (255)
    } OPTIONAL,
    location Seq-loc,        -- location on sequence
    seq Seq-loc OPTIONAL,    -- clone sequence location
    align-id Dbtag OPTIONAL, -- internal alignment identifier
    support INTEGER {
        prototype (0),       -- sequence used to place clone
        supporting (1),      -- sequence supports placement
        supports-other(2),   -- supports a different placement
        non-supporting (3)   -- does not support any placement
    } OPTIONAL
}

END


--*** Variation-ref ***********************************************
--*
--*  Specification of variation features
--*

NCBI-Variation DEFINITIONS ::=
BEGIN

EXPORTS Variation-ref, Variation-inst, VariantProperties,
        Population-data, Phenotype;

IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General
        Seq-literal FROM NCBI-Sequence
        SubSource FROM NCBI-BioSource
        Seq-loc FROM NCBI-Seqloc
        Pub FROM NCBI-Pub;


-- --------------------------------------------------------------------------
-- Historically, the dbSNP definitions document data structures used in the
-- processing and annotation of variations by the dbSNP group.  The intention
-- is to provide information to clients that reflect internal information
-- produced during the mapping of SNPs
-- --------------------------------------------------------------------------

VariantProperties ::= SEQUENCE {
    version INTEGER,

    -- NOTE:
    -- The format for most of these values is as an integer
    -- Unless otherwise noted, these integers represent a bitwise OR (= simple
    -- sum) of the possible values, and as such, these values represent the
    -- specific bit flags that may be set for each of the possible attributes
    -- here.

    resource-link INTEGER {
        preserved        (1), -- Clinical, Pubmed, Cited, (0x01)
        provisional      (2), -- Provisional Third Party Annotations (0x02)
        has3D            (4), -- Has 3D strcture SNP3D table (0x04)
        submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08)
        clinical        (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10)
        genotypeKit     (32)  -- Marker exists on high density genotyping kit
                              -- (0x20)
    } OPTIONAL,

    gene-location INTEGER {
        in-gene         (1), -- Sequence intervals covered by a gene ID but not
                             -- having an aligned transcript (0x01)
        near-gene-5     (2), -- Within 2kb of the 5' end of a gene feature
        near-gene-3     (4), -- Within 0.5kb of the 3' end of a gene feature
        intron          (8), -- In Intron (0x08)
        donor          (16), -- In donor splice-site (0x10)
        acceptor       (32), -- In acceptor splice-site (0x20)
        utr-5          (64), -- In 5' UTR (0x40)
        utr-3         (128), -- In 3' UTR (0x80)
        in-start-codon(256), -- the variant is observed in a start codon
                             -- (0x100)
        in-stop-codon (512), -- the variant is observed in a stop codon
                             -- (0x200)
        intergenic   (1024), -- variant located between genes (0x400)
        conserved-noncoding(2048) -- variant is located in a conserved
                                  -- non-coding region (0x800)
    } OPTIONAL,

    effect INTEGER {
        no-change      (0), -- known to cause no functional changes
                            -- since 0 does not combine with any other bit
                            -- value, 'no-change' specifically implies that
                            -- there are no consequences
        synonymous     (1), -- one allele in the set does not change the encoded
                            -- amino acid (0x1)
        nonsense       (2), -- one allele in the set changes to STOP codon
                            -- (TER).  (0x2)
        missense       (4), -- one allele in the set changes protein peptide
                            -- (0x4)
        frameshift     (8), -- one allele in the set changes all downstream
                            -- amino acids (0x8)

        up-regulator  (16), -- the variant causes increased transcription
                            -- (0x10)
        down-regulator(32), -- the variant causes decreased transcription
                            -- (0x20)
        methylation   (64),
        stop-gain     (128), -- reference codon is not stop codon, but the snp
                             -- variant allele changes the codon to a
                             -- terminating codon.
        stop-loss     (256)  -- reverse of STOP-GAIN: reference codon is a
                             -- stop codon, but a snp variant allele changes
                             -- the codon to a non-terminating codon.
    } OPTIONAL,

    mapping INTEGER {
        has-other-snp         (1), -- Another SNP has the same mapped positions
                                   -- on reference assembly (0x01)
        has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different
                                   -- chromosomes on different assemblies (0x02)
        is-assembly-specific  (4)  -- Only maps to 1 assembly (0x04)
    } OPTIONAL,

    -- map-weight captures specificity of placement
    -- NOTE: This is *NOT* a bitfield
    map-weight INTEGER {
        is-uniquely-placed(1),
        placed-twice-on-same-chrom(2),
        placed-twice-on-diff-chrom(3),
        many-placements(10)
    } OPTIONAL,

    frequency-based-validation INTEGER {
        is-mutation       (1), -- low frequency variation that is cited in
                               -- journal or other reputable sources (0x01)
        above-5pct-all    (2), -- >5% minor allele freq in each and all
                               -- populations (0x02)
        above-5pct-1plus  (4), -- >5% minor allele freq in 1+ populations (0x04)
        validated         (8), -- Bit is set if the variant has a minor allele
                               -- observed in two or more separate chromosomes
        above-1pct-all   (16), -- >1% minor allele freq in each and all
                               -- populations (0x10)
        above-1pct-1plus (32)  -- >1% minor allele freq in 1+ populations (0x20)
    } OPTIONAL,

    genotype INTEGER {
        in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01)
        has-genotypes    (2)  -- SNP has individual genotype (0x02)
    } OPTIONAL,

    -- project IDs are IDs from BioProjects
    -- in order to report information about project relationships, we
    -- require projects to be registered
    -- This field in many ways duplicates dbxrefs; however, the
    -- intention of this field is to more adequately reflect
    -- ownership and data source
    --
    -- 11/9/2010: DO NOT USE
    -- This field was changed in the spec in a breaking way; using it will
    -- break clients.  We are officially suppressing / abandoning this field.
    -- Clients who need to use this should instead place the data in
    -- Seq-feat.dbxref, using the db name 'BioProject'
    project-data SET OF INTEGER OPTIONAL,

    quality-check INTEGER {
        contig-allele-missing   (1), -- Reference sequence allele at the mapped
                                     -- position is not present in the SNP
                                     -- allele list, adjusted for orientation
                                     -- (0x01)
        withdrawn-by-submitter  (2), -- One member SS is withdrawn by submitter
                                     -- (0x02)
        non-overlapping-alleles (4), -- RS set has 2+ alleles from different
                                     -- submissions and these sets share no
                                     -- alleles in common (0x04)
        strain-specific         (8), -- Straing specific fixed difference (0x08)
        genotype-conflict      (16)  -- Has Genotype Conflict (0x10)
    } OPTIONAL,

    confidence INTEGER {
        unknown         (0),
        likely-artifact (1),
        other           (255)
    } OPTIONAL,

    -- has this variant been validated?
    -- While a boolean flag offers no subtle distinctions of validation
    -- methods, occasionally it is only known as a single boolean value
    -- NOTE: this flag is redundant and should be omitted if more comprehensive
    -- validation information is present
    other-validation BOOLEAN OPTIONAL,

    -- origin of this allele, if known
    -- note that these are powers-of-two, and represent bits; thus, we can
    -- represent more than one state simultaneously through a bitwise OR
    allele-origin INTEGER {
        unknown         (0),
        germline        (1),
        somatic         (2),
        inherited       (4),
        paternal        (8),
        maternal        (16),
        de-novo         (32),
        biparental      (64),
        uniparental     (128),
        not-tested      (256),
        tested-inconclusive (512),
        not-reported   (1024),

        -- stopper - 2^31
        other           (1073741824)
    } OPTIONAL,

    -- observed allele state, if known
    -- NOTE: THIS IS NOT A BITFIELD!
    allele-state INTEGER {
        unknown         (0),
        homozygous      (1),
        heterozygous    (2),
        hemizygous      (3),
        nullizygous     (4),
        other           (255)
    } OPTIONAL,

    -- NOTE:
    -- 'allele-frequency' here refers to the minor allele frequency of the
    -- default population
    allele-frequency REAL OPTIONAL,

    -- is this variant the ancestral allele?
    is-ancestral-allele BOOLEAN OPTIONAL
}

Phenotype ::= SEQUENCE {
    source VisibleString OPTIONAL,
    term VisibleString OPTIONAL,
    xref SET OF Dbtag OPTIONAL,

    -- does this variant have known clinical significance?
    clinical-significance INTEGER {
        unknown                 (0),
        untested                (1),
        non-pathogenic          (2),
        probable-non-pathogenic (3),
        probable-pathogenic     (4),
        pathogenic              (5),
        drug-response           (6),
        histocompatibility      (7),
        other                   (255)
    } OPTIONAL
}

Population-data ::= SEQUENCE {
    -- assayed population (e.g. HAPMAP-CEU)
    population VisibleString,
    genotype-frequency REAL OPTIONAL,
    chromosomes-tested INTEGER OPTIONAL,
    sample-ids SET OF Object-id OPTIONAL,
    allele-frequency REAL OPTIONAL,

    -- This field is an explicit bit-field
    -- Valid values should be a bitwise combination (= simple sum)
    -- of any of the values below
    flags INTEGER {
        is-default-population   (1),
        is-minor-allele         (2),
        is-rare-allele          (4)
    } OPTIONAL
}

Ext-loc ::= SEQUENCE {
    id Object-id,
    location Seq-loc
}


Variation-ref ::= SEQUENCE {
    -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
    -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
    --
    -- we relate three kinds of IDs here:
    --  - our current object's id
    --  - the id of this object's parent, if it exists
    --  - the sample ID that this item originates from
    id        Dbtag OPTIONAL,
    parent-id Dbtag OPTIONAL,
    sample-id Object-id OPTIONAL,
    other-ids SET OF Dbtag OPTIONAL,

    -- names and synonyms
    -- some variants have well-known canonical names and possible accepted
    -- synonyms
    name VisibleString OPTIONAL,
    synonyms SET OF VisibleString OPTIONAL,

    -- tag for comment and descriptions
    description VisibleString OPTIONAL,

    -- phenotype
    phenotype SET OF Phenotype OPTIONAL,

    -- sequencing / acuisition method
    method SET OF INTEGER {
        unknown             (0),
        bac-acgh            (1),
        computational       (2),
        curated             (3),
        digital-array       (4),
        expression-array    (5),
        fish                (6),
        flanking-sequence   (7),
        maph                (8),
        mcd-analysis        (9),
        mlpa                (10),
        oea-assembly        (11),
        oligo-acgh          (12),
        paired-end          (13),
        pcr                 (14),
        qpcr                (15),
        read-depth          (16),
        roma                (17),
        rt-pcr              (18),
        sage                (19),
        sequence-alignment  (20),
        sequencing          (21),
        snp-array           (22),
        snp-genoytyping     (23),
        southern            (24),
        western             (25),
        optical-mapping     (26),

        other               (255)
    } OPTIONAL,

    -- Note about SNP representation and pretinent fields: allele-frequency,
    -- population, quality-codes:
    -- The case of multiple alleles for a SNP would be described by
    -- parent-feature of type Variation-set.diff-alleles, where the child
    -- features of type Variation-inst, all at the same location, would
    -- describe individual alleles.

    -- population data
    -- DEPRECATED - do not use
    population-data SET OF Population-data OPTIONAL,

    -- variant properties bit fields
    variant-prop VariantProperties OPTIONAL,

    -- has this variant been validated?
    -- DEPRECATED: new field = VariantProperties.other-validation
    validated BOOLEAN OPTIONAL,

    -- link-outs to GeneTests database
    -- DEPRECATED - do not use
    clinical-test SET OF Dbtag OPTIONAL,

    -- origin of this allele, if known
    -- note that these are powers-of-two, and represent bits; thus, we can
    -- represent more than one state simultaneously through a bitwise OR
    -- DEPRECATED: new field = VariantProperties.allele-origin
    allele-origin INTEGER {
        unknown         (0),
        germline        (1),
        somatic         (2),
        inherited       (4),
        paternal        (8),
        maternal        (16),
        de-novo         (32),
        biparental      (64),
        uniparental     (128),
        not-tested      (256),
        tested-inconclusive (512),

        -- stopper - 2^31
        other           (1073741824)
    } OPTIONAL,

    -- observed allele state, if known
    -- DEPRECATED: new field = VariantProperties.allele-state
    allele-state INTEGER {
        unknown         (0),
        homozygous      (1),
        heterozygous    (2),
        hemizygous      (3),
        nullizygous     (4),
        other           (255)
    } OPTIONAL,

    -- NOTE:
    -- 'allele-frequency' here refers to the minor allele frequency of the
    -- default population
    -- DEPRECATED: new field = VariantProperties.allele-frequency
    allele-frequency REAL OPTIONAL,

    -- is this variant the ancestral allele?
    -- DEPRECATED: new field = VariantProperties.is-ancestral-allele
    is-ancestral-allele BOOLEAN OPTIONAL,

    -- publication support.
    -- Note: made this pub instead of pub-equiv, since
    -- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like
    -- Pub is more often used as top-level container
    -- DEPRECATED - do not use; use Seq-feat.dbxref instead
    pub Pub OPTIONAL,

    data CHOICE {
        unknown NULL,
        note    VisibleString, --free-form
        uniparental-disomy NULL,

        -- actual sequence-edit at feat.location
        instance        Variation-inst,

        -- Set of related Variations.
        -- Location of the set equals to the union of member locations
        set SEQUENCE {
            type INTEGER {
                unknown     (0),
                compound    (1), -- complex change at the same location on the
                                 -- same molecule
                products    (2), -- different products arising from the same
                                 -- variation in a precursor, e.g. r.[13g>a,
                                 -- 13_88del]
                haplotype   (3), -- changes on the same allele, e.g
                                 -- r.[13g>a;15u>c]
                genotype    (4), -- changes on different alleles in the same
                                 -- genotype, e.g. g.[476C>T]+[476C>T]
                mosaic      (5), -- different genotypes in the same individual
                individual  (6), -- same organism; allele relationship unknown,
                                 -- e.g. g.[476C>T(+)183G>C]
                population  (7), -- population
                alleles     (8), -- set represents a set of observed alleles
                package     (9), -- set represents a package of observations at
                                 -- a given location, generally containing
                                 -- asserted + reference
                other       (255)
            },
            variations SET OF Variation-ref,
            name  VisibleString OPTIONAL
        },

        -- variant is a complex and undescribed change at the location
        -- This type of variant is known to occur in dbVar submissions
        complex NULL
    },

    consequence SET OF CHOICE {
        unknown     NULL,
        splicing    NULL, --some effect on splicing
        note        VisibleString,  --freeform

        -- Describe resulting variation in the product, e.g. missense,
        -- nonsense, silent, neutral, etc in a protein, that arises from
        -- THIS variation.
        variation   Variation-ref,

        -- see http://www.hgvs.org/mutnomen/recs-prot.html
        frameshift SEQUENCE {
            phase INTEGER OPTIONAL,
            x-length INTEGER OPTIONAL
        },

        loss-of-heterozygosity SEQUENCE {
            -- In germline comparison, it will be reference genome assembly
            -- (default) or reference/normal population. In somatic mutation,
            -- it will be a name of the normal tissue.
            reference VisibleString OPTIONAL,

            -- Name of the testing subject type or the testing tissue.
            test VisibleString OPTIONAL
        }
    } OPTIONAL,

    -- Observed location, if different from the parent set or feature.location.
    -- DEPRECATED - do not use
    location        Seq-loc OPTIONAL,

    -- reference other locs, e.g. mapped source
    -- DEPRECATED - do not use
    ext-locs SET OF Ext-loc OPTIONAL,

    -- DEPRECATED - do not use; use Seq-feat.exts instead
    ext             User-object OPTIONAL,

    somatic-origin SET OF SEQUENCE {
        -- description of the somatic origin itself
        source SubSource OPTIONAL,
        -- condition related to this origin's type
        condition SEQUENCE {
            description VisibleString OPTIONAL,
            -- reference to BioTerm / other descriptive database
            object-id SET OF Dbtag OPTIONAL
        } OPTIONAL
    } OPTIONAL

}


Delta-item ::= SEQUENCE {
    seq CHOICE {
        literal Seq-literal,
        loc Seq-loc,
        this NULL --same location as variation-ref itself
    } OPTIONAL,

    -- Multiplier allows representing a tandem, e.g.  ATATAT as AT*3
    -- This allows describing CNV/SSR where delta=self  with a
    -- multiplier which specifies the count of the repeat unit.

    multiplier          INTEGER OPTIONAL, --assumed 1 if not specified.
    multiplier-fuzz     Int-fuzz OPTIONAL,

    action INTEGER {

        -- replace len(seq) positions starting with location.start with seq
        morph      (0),

        -- go downstream by distance specified by multiplier (upstream if < 0),
        -- in genomic context.
        offset     (1),

        -- excise sequence at location
        -- if multiplier is specified, delete len(location)*multiplier
        -- positions downstream
        del-at     (2),

        -- insert seq before the location.start
        ins-before (3)

    } DEFAULT morph
}


-- Variation instance
Variation-inst ::= SEQUENCE {
    type INTEGER {
        unknown         (0),    -- delta=[]
        identity        (1),    -- delta=[]
        inv             (2),    -- delta=[del, ins.seq=
                                -- RevComp(variation-location)]
        snv             (3),    -- delta=[morph of length 1]
                                -- NOTE: this is snV not snP; the latter
                                -- requires frequency-based validation to be
                                -- established in VariantProperties
                                -- the strict definition of SNP is an SNV with
                                -- an established population frequency of at
                                -- least 1% in at least 1 popuplation
        mnp             (4),    -- delta=[morph of length >1]
        delins          (5),    -- delta=[del, ins]
        del             (6),    -- delta=[del]
        ins             (7),    -- delta=[ins]
        microsatellite  (8),    -- delta=[del, ins.seq= repeat-unit with fuzzy
                                -- multiplier]
                                -- variation-location is the microsat expansion
                                -- on the sequence
        transposon      (9),    -- delta=[del, ins.seq= known donor or 'this']
                                -- variation-location is equiv of transposon
                                -- locs.
        cnv             (10),   -- delta=[del, ins= 'this' with fuzzy
                                -- multiplier]
        direct-copy     (11),   -- delta=[ins.seq= upstream location on the
                                -- same strand]
        rev-direct-copy (12),   -- delta=[ins.seq= downstream location on the
                                -- same strand]
        inverted-copy   (13),   -- delta=[ins.seq= upstream location on the
                                -- opposite strand]
        everted-copy    (14),   -- delta=[ins.seq= downstream location on the
                                -- opposite strand]
        translocation   (15),   -- delta=like delins
        prot-missense   (16),   -- delta=[morph of length 1]
        prot-nonsense   (17),   -- delta=[del]; variation-location is the tail
                                -- of the protein being truncated
        prot-neutral    (18),   -- delta=[morph of length 1]
        prot-silent     (19),   -- delta=[morph of length 1, same AA as at
                                -- variation-location]
        prot-other      (20),   -- delta=any

        other           (255)   -- delta=any
    },

    -- Sequence that replaces the location, in biological order.
    delta SEQUENCE OF Delta-item,

    -- 'observation' is used to label items in a Variation-ref package
    -- This field is explicitly a bit-field, so the bitwise OR (= sum) of any
    -- of the values may be observed.
    observation INTEGER {
        asserted        (1),   -- inst represents the asserted base at a
                               -- position
        reference       (2),   -- inst represents the reference base at the
                               -- position
        variant         (4)    -- inst represent the observed variant at a
                               -- given position
    } OPTIONAL
}

END


--**********************************************************************
--
--  NCBI Restriction Sites
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Rsite DEFINITIONS ::=
BEGIN

EXPORTS Rsite-ref;

IMPORTS Dbtag FROM NCBI-General;

Rsite-ref ::= CHOICE {
    str VisibleString ,     -- may be unparsable
    db  Dbtag }             -- pointer to a restriction site database

END

--**********************************************************************
--
--  NCBI RNAs
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-RNA DEFINITIONS ::=
BEGIN

EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;

IMPORTS Seq-loc FROM NCBI-Seqloc;

--*** rnas ***********************************************
--*
--*  various rnas
--*
                         -- minimal RNA sequence
RNA-ref ::= SEQUENCE {
    type ENUMERATED {            -- type of RNA feature
        unknown (0) ,
        premsg (1) ,
        mRNA (2) ,
        tRNA (3) ,
        rRNA (4) ,
        snRNA (5) ,              -- will become ncRNA, with RNA-gen.class = snRNA
        scRNA (6) ,              -- will become ncRNA, with RNA-gen.class = scRNA
        snoRNA (7) ,             -- will become ncRNA, with RNA-gen.class = snoRNA
        ncRNA (8) ,              -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
        tmRNA (9) ,
        miscRNA (10) ,
        other (255) } ,
    pseudo BOOLEAN OPTIONAL ,
    ext CHOICE {
        name VisibleString ,        -- for naming "other" type
        tRNA Trna-ext ,             -- for tRNAs
        gen RNA-gen } OPTIONAL      -- generic fields for ncRNA, tmRNA, miscRNA
    }

Trna-ext ::= SEQUENCE {                 -- tRNA feature extensions
    aa CHOICE {                         -- aa this carries
        iupacaa INTEGER ,
        ncbieaa INTEGER ,
        ncbi8aa INTEGER ,
        ncbistdaa INTEGER } OPTIONAL ,
    codon SET OF INTEGER OPTIONAL ,     -- codon(s) as in Genetic-code
    anticodon Seq-loc OPTIONAL }        -- location of anticodon

RNA-gen ::= SEQUENCE {
    class VisibleString OPTIONAL ,      -- for ncRNAs, the class of non-coding RNA:
                                        -- examples: antisense_RNA, guide_RNA, snRNA
    product VisibleString OPTIONAL ,
    quals RNA-qual-set OPTIONAL         -- e.g., tag_peptide qualifier for tmRNAs
}

RNA-qual ::= SEQUENCE {                 -- Additional data values for RNA-gen,
    qual VisibleString ,                -- in a tag (qual), value (val) format
    val VisibleString }

RNA-qual-set ::= SEQUENCE OF RNA-qual

END

--**********************************************************************
--
--  NCBI Genes
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Gene DEFINITIONS ::=
BEGIN

EXPORTS Gene-ref, Gene-nomenclature;

IMPORTS Dbtag FROM NCBI-General;

--*** Gene ***********************************************
--*
--*  reference to a gene
--*

Gene-ref ::= SEQUENCE {
    locus VisibleString OPTIONAL ,        -- Official gene symbol
    allele VisibleString OPTIONAL ,       -- Official allele designation
    desc VisibleString OPTIONAL ,         -- descriptive name
    maploc VisibleString OPTIONAL ,       -- descriptive map location
    pseudo BOOLEAN DEFAULT FALSE ,        -- pseudogene
    db SET OF Dbtag OPTIONAL ,            -- ids in other dbases
    syn SET OF VisibleString OPTIONAL ,   -- synonyms for locus
    locus-tag VisibleString OPTIONAL ,    -- systematic gene name (e.g., MI0001, ORF0069)
    formal-name Gene-nomenclature OPTIONAL
}

Gene-nomenclature ::= SEQUENCE {
    status ENUMERATED {
        unknown (0) ,
        official (1) ,
        interim (2)
    } ,
    symbol VisibleString OPTIONAL ,
    name VisibleString OPTIONAL ,
    source Dbtag OPTIONAL
}

END


--**********************************************************************
--
--  NCBI Organism
--  by James Ostell, 1994
--  version 3.0
--
--**********************************************************************

NCBI-Organism DEFINITIONS ::=
BEGIN

EXPORTS Org-ref;

IMPORTS Dbtag FROM NCBI-General;

--*** Org-ref ***********************************************
--*
--*  Reference to an organism
--*     defines only the organism.. lower levels of detail for biological
--*     molecules are provided by the Source object
--*

Org-ref ::= SEQUENCE {
    taxname VisibleString OPTIONAL ,   -- preferred formal name
    common VisibleString OPTIONAL ,    -- common name
    mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
    db SET OF Dbtag OPTIONAL ,         -- ids in taxonomic or culture dbases
    syn SET OF VisibleString OPTIONAL ,  -- synonyms for taxname or common
    orgname OrgName OPTIONAL }


OrgName ::= SEQUENCE {
    name CHOICE {
        binomial BinomialOrgName ,         -- genus/species type name
        virus VisibleString ,              -- virus names are different
        hybrid MultiOrgName ,              -- hybrid between organisms
        namedhybrid BinomialOrgName ,      -- some hybrids have genus x species name
        partial PartialOrgName } OPTIONAL , -- when genus not known
    attrib VisibleString OPTIONAL ,        -- attribution of name
    mod SEQUENCE OF OrgMod OPTIONAL ,
    lineage VisibleString OPTIONAL ,       -- lineage with semicolon separators
    gcode INTEGER OPTIONAL ,               -- genetic code (see CdRegion)
    mgcode INTEGER OPTIONAL ,              -- mitochondrial genetic code
    div VisibleString OPTIONAL ,           -- GenBank division code
    pgcode INTEGER OPTIONAL }              -- plastid genetic code


OrgMod ::= SEQUENCE {
    subtype INTEGER {
        strain (2) ,
        substrain (3) ,
        type (4) ,
        subtype (5) ,
        variety (6) ,
        serotype (7) ,
        serogroup (8) ,
        serovar (9) ,
        cultivar (10) ,
        pathovar (11) ,
        chemovar (12) ,
        biovar (13) ,
        biotype (14) ,
        group (15) ,
        subgroup (16) ,
        isolate (17) ,
        common (18) ,
        acronym (19) ,
        dosage (20) ,          -- chromosome dosage of hybrid
        nat-host (21) ,        -- natural host of this specimen
        sub-species (22) ,
        specimen-voucher (23) ,
        authority (24) ,
        forma (25) ,
        forma-specialis (26) ,
        ecotype (27) ,
        synonym (28) ,
        anamorph (29) ,
        teleomorph (30) ,
        breed (31) ,
        gb-acronym (32) ,       -- used by taxonomy database
        gb-anamorph (33) ,      -- used by taxonomy database
        gb-synonym (34) ,       -- used by taxonomy database
        culture-collection (35) ,
        bio-material (36) ,
        metagenome-source (37) ,
        type-material (38) ,
        old-lineage (253) ,
        old-name (254) ,
        other (255) } ,         -- ASN5: old-name (254) will be added to next spec
    subname VisibleString ,
    attrib VisibleString OPTIONAL }  -- attribution/source of name

BinomialOrgName ::= SEQUENCE {
    genus VisibleString ,               -- required
    species VisibleString OPTIONAL ,    -- species required if subspecies used
    subspecies VisibleString OPTIONAL }

MultiOrgName ::= SEQUENCE OF OrgName   -- the first will be used to assign division

PartialOrgName ::= SEQUENCE OF TaxElement  -- when we don't know the genus

TaxElement ::= SEQUENCE {
    fixed-level INTEGER {
       other (0) ,                     -- level must be set in string
       family (1) ,
       order (2) ,
       class (3) } ,
    level VisibleString OPTIONAL ,
    name VisibleString }

END


--**********************************************************************
--
--  NCBI BioSource
--  by James Ostell, 1994
--  version 3.0
--
--**********************************************************************

NCBI-BioSource DEFINITIONS ::=
BEGIN

EXPORTS BioSource, SubSource;

IMPORTS Org-ref FROM NCBI-Organism;

--********************************************************************
--
-- BioSource gives the source of the biological material
--   for sequences
--
--********************************************************************

BioSource ::= SEQUENCE {
    genome INTEGER {         -- biological context
        unknown (0) ,
        genomic (1) ,
        chloroplast (2) ,
        chromoplast (3) ,
        kinetoplast (4) ,
        mitochondrion (5) ,
        plastid (6) ,
        macronuclear (7) ,
        extrachrom (8) ,
        plasmid (9) ,
        transposon (10) ,
        insertion-seq (11) ,
        cyanelle (12) ,
        proviral (13) ,
        virion (14) ,
        nucleomorph (15) ,
        apicoplast (16) ,
        leucoplast (17) ,
        proplastid (18) ,
        endogenous-virus (19) ,
        hydrogenosome (20) ,
        chromosome (21) ,
        chromatophore (22) ,
        plasmid-in-mitochondrion (23) ,
        plasmid-in-plastid (24)
      } DEFAULT unknown ,
    origin INTEGER {
      unknown (0) ,
      natural (1) ,                    -- normal biological entity
      natmut (2) ,                     -- naturally occurring mutant
      mut (3) ,                        -- artificially mutagenized
      artificial (4) ,                 -- artificially engineered
      synthetic (5) ,                  -- purely synthetic
      other (255)
    } DEFAULT unknown ,
    org Org-ref ,
    subtype SEQUENCE OF SubSource OPTIONAL ,
    is-focus NULL OPTIONAL ,           -- to distinguish biological focus
    pcr-primers PCRReactionSet OPTIONAL }

PCRReactionSet ::= SET OF PCRReaction

PCRReaction ::= SEQUENCE {
    forward PCRPrimerSet OPTIONAL ,
    reverse PCRPrimerSet OPTIONAL }

PCRPrimerSet ::= SET OF PCRPrimer

PCRPrimer ::= SEQUENCE {
    seq PCRPrimerSeq OPTIONAL ,
    name PCRPrimerName OPTIONAL }

PCRPrimerSeq ::= VisibleString

PCRPrimerName ::= VisibleString

SubSource ::= SEQUENCE {
    subtype INTEGER {
        chromosome (1) ,
        map (2) ,
        clone (3) ,
        subclone (4) ,
        haplotype (5) ,
        genotype (6) ,
        sex (7) ,
        cell-line (8) ,
        cell-type (9) ,
        tissue-type (10) ,
        clone-lib (11) ,
        dev-stage (12) ,
        frequency (13) ,
        germline (14) ,
        rearranged (15) ,
        lab-host (16) ,
        pop-variant (17) ,
        tissue-lib (18) ,
        plasmid-name (19) ,
        transposon-name (20) ,
        insertion-seq-name (21) ,
        plastid-name (22) ,
        country (23) ,
        segment (24) ,
        endogenous-virus-name (25) ,
        transgenic (26) ,
        environmental-sample (27) ,
        isolation-source (28) ,
        lat-lon (29) ,          -- +/- decimal degrees
        collection-date (30) ,  -- DD-MMM-YYYY format
        collected-by (31) ,     -- name of person who collected the sample
        identified-by (32) ,    -- name of person who identified the sample
        fwd-primer-seq (33) ,   -- sequence (possibly more than one; semicolon-separated)
        rev-primer-seq (34) ,   -- sequence (possibly more than one; semicolon-separated)
        fwd-primer-name (35) ,
        rev-primer-name (36) ,
        metagenomic (37) ,
        mating-type (38) ,
        linkage-group (39) ,
        haplogroup (40) ,
        whole-replicon (41) ,
        phenotype (42) ,
        altitude (43) ,
        other (255) } ,
    name VisibleString ,
    attrib VisibleString OPTIONAL }    -- attribution/source of this name

END

--**********************************************************************
--
--  NCBI Protein
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Protein DEFINITIONS ::=
BEGIN

EXPORTS Prot-ref;

IMPORTS Dbtag FROM NCBI-General;

--*** Prot-ref ***********************************************
--*
--*  Reference to a protein name
--*

Prot-ref ::= SEQUENCE {
    name SET OF VisibleString OPTIONAL ,      -- protein name
    desc VisibleString OPTIONAL ,      -- description (instead of name)
    ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
    activity SET OF VisibleString OPTIONAL ,  -- activities
    db SET OF Dbtag OPTIONAL ,         -- ids in other dbases
    processed ENUMERATED {             -- processing status
       not-set (0) ,
       preprotein (1) ,
       mature (2) ,
       signal-peptide (3) ,
       transit-peptide (4) ,
       propeptide (5) } DEFAULT not-set }

END
--********************************************************************
--
--  Transcription Initiation Site Feature Data Block
--  James Ostell, 1991
--  Philip Bucher, David Ghosh
--  version 1.1
--
--
--
--********************************************************************

NCBI-TxInit DEFINITIONS ::=
BEGIN

EXPORTS Txinit;

IMPORTS Gene-ref FROM NCBI-Gene
        Prot-ref FROM NCBI-Protein
        Org-ref FROM NCBI-Organism;

Txinit ::= SEQUENCE {
    name VisibleString ,    -- descriptive name of initiation site
    syn SEQUENCE OF VisibleString OPTIONAL ,   -- synonyms
    gene SEQUENCE OF Gene-ref OPTIONAL ,  -- gene(s) transcribed
    protein SEQUENCE OF Prot-ref OPTIONAL ,   -- protein(s) produced
    rna SEQUENCE OF VisibleString OPTIONAL ,  -- rna(s) produced
    expression VisibleString OPTIONAL ,  -- tissue/time of expression
    txsystem ENUMERATED {       -- transcription apparatus used at this site
        unknown (0) ,
        pol1 (1) ,      -- eukaryotic Pol I
        pol2 (2) ,      -- eukaryotic Pol II
        pol3 (3) ,      -- eukaryotic Pol III
        bacterial (4) ,
        viral (5) ,
        rna (6) ,       -- RNA replicase
        organelle (7) ,
        other (255) } ,
    txdescr VisibleString OPTIONAL ,   -- modifiers on txsystem
    txorg Org-ref OPTIONAL ,  -- organism supplying transcription apparatus
    mapping-precise BOOLEAN DEFAULT FALSE ,  -- mapping precise or approx
    location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
    inittype ENUMERATED {
        unknown (0) ,
        single (1) ,
        multiple (2) ,
        region (3) } OPTIONAL ,
    evidence SET OF Tx-evidence OPTIONAL }

Tx-evidence ::= SEQUENCE {
    exp-code ENUMERATED {
        unknown (0) ,
        rna-seq (1) ,   -- direct RNA sequencing
        rna-size (2) ,  -- RNA length measurement
        np-map (3) ,    -- nuclease protection mapping with homologous sequence ladder
        np-size (4) ,   -- nuclease protected fragment length measurement
        pe-seq (5) ,    -- dideoxy RNA sequencing
        cDNA-seq (6) ,  -- full-length cDNA sequencing
        pe-map (7) ,    -- primer extension mapping with homologous sequence ladder
        pe-size (8) ,   -- primer extension product length measurement
        pseudo-seq (9) , -- full-length processed pseudogene sequencing
        rev-pe-map (10) ,   -- see NOTE (1) below
        other (255) } ,
    expression-system ENUMERATED {
        unknown (0) ,
        physiological (1) ,
        in-vitro (2) ,
        oocyte (3) ,
        transfection (4) ,
        transgenic (5) ,
        other (255) } DEFAULT physiological ,
    low-prec-data BOOLEAN DEFAULT FALSE ,
    from-homolog BOOLEAN DEFAULT FALSE }     -- experiment actually done on
                                             --  close homolog

    -- NOTE (1) length measurement of a reverse direction primer-extension
    --          product (blocked  by  RNA  5'end)  by  comparison with
    --          homologous sequence ladder (J. Mol. Biol. 199, 587)

END

--$Revision: 1.10 $
--  ----------------------------------------------------------------------------
--
--                            PUBLIC DOMAIN NOTICE
--                National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the terms
--  of the United States Copyright Act.  It was written as part of the author's
--  official duties as a United States Government employee and thus cannot be
--  copyrighted.  This software/database is freely available to the public for
--  use.  The National Library of Medicine and the U.S. Government have not
--  placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy and
--  reliability of the software and data, the NLM and the U.S. Government do not
--  and cannot warrant the performance or results that may be obtained by using
--  this software or data.  The NLM and the U.S. Government disclaim all
--  warranties, express or implied, including warranties of performance,
--  merchantability or fitness for any particular purpose.
--
--  Please cite the authors in any work or product based on this material.
--
--  ----------------------------------------------------------------------------
--
--  Authors: Mike DiCuccio, Eugene Vasilchenko
--
--  ASN.1 interface to table readers
--
--  ----------------------------------------------------------------------------

NCBI-SeqTable DEFINITIONS ::=

BEGIN

EXPORTS
    SeqTable-column-info, SeqTable-column, Seq-table;

IMPORTS
    Seq-id, Seq-loc, Seq-interval   FROM NCBI-Seqloc;


SeqTable-column-info ::= SEQUENCE {
    -- user friendly column name, can be skipped
    title VisibleString OPTIONAL,

    -- identification of the column data in the objects described by the table
    field-id INTEGER { -- known column data types
        -- position types
        location        (0), -- location as Seq-loc
        location-id     (1), -- location Seq-id
        location-gi     (2), -- gi
        location-from   (3), -- interval from
        location-to     (4), -- interval to
        location-strand (5), -- location strand
        location-fuzz-from-lim (6),
        location-fuzz-to-lim   (7),

        product         (10), -- product as Seq-loc
        product-id      (11), -- product Seq-id
        product-gi      (12), -- product gi
        product-from    (13), -- product interval from
        product-to      (14), -- product interval to
        product-strand  (15), -- product strand
        product-fuzz-from-lim (16),
        product-fuzz-to-lim   (17),

        -- main feature fields
        id-local        (20), -- id.local.id
        xref-id-local   (21), -- xref.id.local.id
        partial         (22),
        comment         (23),
        title           (24),
        ext             (25), -- field-name must be "E.xxx", see below
        qual            (26), -- field-name must be "Q.xxx", see below
        dbxref          (27), -- field-name must be "D.xxx", see below

        -- various data fields
        data-imp-key        (30),
        data-region         (31),
        data-cdregion-frame (32),

        -- extra fields, see also special values for str below
        ext-type        (40),
        qual-qual       (41),
        qual-val        (42),
        dbxref-db       (43),
        dbxref-tag      (44)
    } OPTIONAL,

    -- any column can be identified by ASN.1 text locator string
    -- with omitted object type.
    -- examples:
    --   "data.gene.locus" for Seq-feat.data.gene.locus
    --   "data.imp.key" for Seq-feat.data.imp.key
    --   "qual.qual"
    --    - Seq-feat.qual is SEQUENCE so several columns are allowed
    --      see also "Q.xxx" special value for shorter qual representation
    --   "ext.type.str"
    --   "ext.data.label.str"
    --   "ext.data.data.int"
    --      see also "E.xxx" special value for shorter ext representation
    -- special values start with capital letter:
    --   "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
    --    - Seq-feat.ext.data is SEQUENCE so several columns are allowed
    --   "Q.xxx" - qual.qual = xxx, qual.val = data
    --    - Seq-feat.qual is SEQUENCE so several columns are allowed
    --   "D.xxx" - dbxref.id = xxx, dbxref.tag = data
    --    - Seq-feat.dbxref is SET so several columns are allowed
    field-name  VisibleString OPTIONAL
}


CommonString-table ::= SEQUENCE {
    -- set of possible values
    strings     SEQUENCE OF UTF8String,

    -- indexes of values in array 'strings' for each data row
    indexes     SEQUENCE OF INTEGER
}


CommonBytes-table ::= SEQUENCE {
    -- set of possible values
    bytes       SEQUENCE OF OCTET STRING,

    -- indexes of values in array 'bytes' for each data row
    indexes     SEQUENCE OF INTEGER
}


Scaled-int-multi-data ::= SEQUENCE {
    -- output data[i] = data[i]*mul+add
    mul     INTEGER,
    add     INTEGER,
    data    SeqTable-multi-data,

    -- min/max scaled value
    -- should be set if scaled values may not fit in 32-bit signed integer
    min     INTEGER OPTIONAL,
    max     INTEGER OPTIONAL
}


Scaled-real-multi-data ::= SEQUENCE {
    -- output data[i] = data[i]*mul+add
    mul     REAL,
    add     REAL,
    data    SeqTable-multi-data
}


-- Class for serializing bm::bvector<>
-- see include/util/bitset/bm.h
-- Since bvector<> serialization doesn't keep size we have to add it explicitly
BVector-data ::= SEQUENCE {
    size    INTEGER,
    data    OCTET STRING
}


SeqTable-multi-data ::= CHOICE {
    -- a set of 4-byte integers, one per row
    int         SEQUENCE OF INTEGER,

    -- a set of reals, one per row
    real        SEQUENCE OF REAL,

    -- a set of strings, one per row
    string      SEQUENCE OF UTF8String,

    -- a set of byte arrays, one per row
    bytes       SEQUENCE OF OCTET STRING,

    -- a set of string with small set of possible values
    common-string   CommonString-table,

    -- a set of byte arrays with small set of possible values
    common-bytes    CommonBytes-table,

    -- a set of bits, one per row
    -- Most-significant bit in each octet comes first.
    bit         OCTET STRING,

    -- a set of locations, one per row
    loc         SEQUENCE OF Seq-loc,
    id          SEQUENCE OF Seq-id,
    interval    SEQUENCE OF Seq-interval,

    -- delta-encoded data (int/bit -> int)
    int-delta   SeqTable-multi-data,

    -- scaled data (int/bit -> int)
    int-scaled  Scaled-int-multi-data,

    -- scaled data (int/bit -> real)
    real-scaled Scaled-real-multi-data,

    -- a set of bit, represented as serialized bvector,
    -- see include/util/bitset/bm.h
    bit-bvector BVector-data,

    -- a set of signed 1-byte integers encoded as sequential octets
    int1        OCTET STRING,

    -- a set of signed 2-byte integers
    int2        SEQUENCE OF INTEGER,

    -- a set of signed 8-byte integers
    int8        SEQUENCE OF INTEGER
}


SeqTable-single-data ::= CHOICE {
    -- integer
    int         INTEGER,

    -- real
    real        REAL,

    -- string
    string      UTF8String,

    -- byte array
    bytes       OCTET STRING,

    -- bit
    bit         BOOLEAN,

    -- location
    loc         Seq-loc,
    id          Seq-id,
    interval    Seq-interval,

    int8        INTEGER
}


SeqTable-sparse-index ::= CHOICE {
    -- Indexes of rows with values
    indexes SEQUENCE OF INTEGER,

    -- Bitset of rows with values, set bit means the row has value.
    -- Most-significant bit in each octet comes first.
    bit-set OCTET STRING,

    -- Indexes of rows with values, delta-encoded
    indexes-delta SEQUENCE OF INTEGER,

    -- Bitset of rows with values, as serialized bvector<>,
    -- see include/util/bitset/bm.h
    bit-set-bvector BVector-data
}


SeqTable-column ::= SEQUENCE {
    -- column description or reference to previously defined info
    header      SeqTable-column-info,   -- information about data

    -- row data
    data        SeqTable-multi-data OPTIONAL,

    -- in case not all rows contain data this field will contain sparse info
    sparse      SeqTable-sparse-index OPTIONAL,

    -- default value for sparse table, or if row data is too short
    default     SeqTable-single-data OPTIONAL,

    -- single value for indexes not listed in sparse table
    sparse-other SeqTable-single-data OPTIONAL
}


Seq-table ::= SEQUENCE {
    -- type of features in this table, equal to Seq-feat.data variant index
    feat-type   INTEGER,

    -- subtype of features in this table, defined in header SeqFeatData.hpp
    feat-subtype INTEGER OPTIONAL,

    -- number of rows
    num-rows    INTEGER,

    -- data in columns
    columns     SEQUENCE OF SeqTable-column
}


END
--$Revision: 6.4 $
--**********************************************************************
--
--  NCBI Sequence Alignment elements
--  by James Ostell, 1990
--
--**********************************************************************

NCBI-Seqalign DEFINITIONS ::=
BEGIN

EXPORTS Seq-align, Score, Score-set, Seq-align-set;

IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
        User-object, Object-id FROM NCBI-General;

--*** Sequence Alignment ********************************
--*

Seq-align-set ::= SET OF Seq-align

Seq-align ::= SEQUENCE {
    type ENUMERATED {
        not-set (0) ,
        global (1) ,
        diags (2) ,     -- unbroken, but not ordered, diagonals
        partial (3) ,   -- mapping pieces together
        disc (4) ,      -- discontinuous alignment
        other (255) } ,
    dim INTEGER OPTIONAL ,     -- dimensionality
    score SET OF Score OPTIONAL ,   -- for whole alignment
    segs CHOICE {                   -- alignment data
        dendiag SEQUENCE OF Dense-diag ,
        denseg              Dense-seg ,
        std     SEQUENCE OF Std-seg ,
        packed              Packed-seg ,
        disc                Seq-align-set,
        spliced             Spliced-seg,
        sparse              Sparse-seg
    } ,

    -- regions of sequence over which align
    --  was computed
    bounds SET OF Seq-loc OPTIONAL,

    -- alignment id
    id SEQUENCE OF Object-id OPTIONAL,

    --extra info
    ext SEQUENCE OF User-object OPTIONAL
}

Dense-diag ::= SEQUENCE {         -- for (multiway) diagonals
    dim INTEGER DEFAULT 2 ,    -- dimensionality
    ids SEQUENCE OF Seq-id ,   -- sequences in order
    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order
    len INTEGER ,                 -- len of aligned segments
    strands SEQUENCE OF Na-strand OPTIONAL ,
    scores SET OF Score OPTIONAL }

    -- Dense-seg: the densist packing for sequence alignments only.
    --            a start of -1 indicates a gap for that sequence of
    --            length lens.
    --
    -- id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
    -- id=200  AAGGCCTTTTAG.......GATGATGATGA
    -- id=300  ....CCTTTTAGAGATGATGAT....ATGA
    --
    -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
    -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
    -- lens = { 4, 8, 7, 3, 4, 4 }
    --

Dense-seg ::= SEQUENCE {          -- for (multiway) global or partial alignments
    dim INTEGER DEFAULT 2 ,       -- dimensionality
    numseg INTEGER ,              -- number of segments here
    ids SEQUENCE OF Seq-id ,      -- sequences in order
    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order within segs
    lens SEQUENCE OF INTEGER ,    -- lengths in ids order within segs
    strands SEQUENCE OF Na-strand OPTIONAL ,
    scores SEQUENCE OF Score OPTIONAL }  -- score for each seg

Packed-seg ::= SEQUENCE {         -- for (multiway) global or partial alignments
    dim INTEGER DEFAULT 2 ,       -- dimensionality
    numseg INTEGER ,              -- number of segments here
    ids SEQUENCE OF Seq-id ,      -- sequences in order
    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order for whole alignment
    present OCTET STRING ,        -- Boolean if each sequence present or absent in
                                  --   each segment
    lens SEQUENCE OF INTEGER ,    -- length of each segment
    strands SEQUENCE OF Na-strand OPTIONAL ,
    scores SEQUENCE OF Score OPTIONAL }  -- score for each segment

Std-seg ::= SEQUENCE {
    dim INTEGER DEFAULT 2 ,       -- dimensionality
    ids SEQUENCE OF Seq-id OPTIONAL ,
    loc SEQUENCE OF Seq-loc ,
    scores SET OF Score OPTIONAL }


Spliced-seg ::= SEQUENCE {
    -- product is either protein or transcript (cDNA)
    product-id Seq-id OPTIONAL,
    genomic-id Seq-id OPTIONAL,

    -- should be 'plus' or 'minus'
    product-strand Na-strand OPTIONAL ,
    genomic-strand Na-strand OPTIONAL ,

    product-type ENUMERATED {
        transcript(0),
        protein(1)
    },

    -- set of segments involved
    -- each segment corresponds to one exon
    -- exons are always in biological order
    exons SEQUENCE OF Spliced-exon ,

    -- optional poly(A) tail
    poly-a INTEGER OPTIONAL,

    -- length of the product, in bases/residues
    -- from this, a 3' unaligned length can be extracted; this also captures
    -- the case in which a protein aligns leaving a partial codon alignment
    -- at the 3' end
    product-length INTEGER OPTIONAL,

    -- alignment descriptors / modifiers
    -- this provides us a set for extension
    modifiers SET OF Spliced-seg-modifier OPTIONAL
}

Spliced-seg-modifier ::= CHOICE {
    -- protein aligns from the start and the first codon
    -- on both product and genomic is start codon
    start-codon-found BOOLEAN,

    -- protein aligns to it's end and there is stop codon
    -- on the genomic right after the alignment
    stop-codon-found BOOLEAN
}


-- complete or partial exon
-- two consecutive Spliced-exons may belong to one exon
Spliced-exon ::= SEQUENCE {
    -- product-end >= product-start
    product-start Product-pos ,
    product-end Product-pos ,

    -- genomic-end >= genomic-start
    genomic-start INTEGER ,
    genomic-end INTEGER ,

    -- product is either protein or transcript (cDNA)
    product-id Seq-id OPTIONAL ,
    genomic-id Seq-id OPTIONAL ,

    -- should be 'plus' or 'minus'
    product-strand Na-strand OPTIONAL ,

    -- genomic-strand represents the strand of translation
    genomic-strand Na-strand OPTIONAL ,

    -- basic seqments always are in biologic order
    parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,

    -- scores for this exon
    scores Score-set OPTIONAL ,

    -- splice sites
    acceptor-before-exon Splice-site OPTIONAL,
    donor-after-exon Splice-site OPTIONAL,

    -- flag: is this exon complete or partial?
    partial BOOLEAN OPTIONAL,

    --extra info
    ext SEQUENCE OF User-object OPTIONAL
}


Product-pos ::= CHOICE {
    nucpos INTEGER,
    protpos Prot-pos
}


-- codon based position on protein (1/3 of aminoacid)
Prot-pos ::= SEQUENCE {
    -- standard protein position
    amin INTEGER ,

    -- 0, 1, 2, or 3 as for Cdregion
    -- 0 = not set
    -- 1, 2, 3 = actual frame
    frame INTEGER DEFAULT 0
}


-- Spliced-exon-chunk: piece of an exon
-- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
-- protein)
Spliced-exon-chunk ::= CHOICE {
    -- both sequences represented, product and genomic sequences match
    match INTEGER ,

    -- both sequences represented, product and genomic sequences do not match
    mismatch INTEGER ,

    -- both sequences are represented, there is sufficient similarity
    -- between product and genomic sequences. Can be used to replace stretches
    -- of matches and mismatches, mostly for protein to genomic where
    -- definition of match or mismatch depends on translation table
    diag INTEGER ,

     -- insertion in product sequence (i.e. gap in the genomic sequence)
    product-ins INTEGER ,

     -- insertion in genomic sequence (i.e. gap in the product sequence)
    genomic-ins INTEGER
}


-- site involved in splice
Splice-site ::= SEQUENCE {
    -- typically two bases in the intronic region, always
    -- in IUPAC format
    bases VisibleString
}


-- ==========================================================================
--
-- Sparse-seg follows the semantics of dense-seg and is more optimal for
-- representing sparse multiple alignments
--
-- ==========================================================================


Sparse-seg ::= SEQUENCE {
    master-id Seq-id OPTIONAL,

    -- pairwise alignments constituting this multiple alignment
    rows SET OF Sparse-align,

    -- per-row scores
    row-scores SET OF Score OPTIONAL,

    -- index of extra items
    ext  SET OF Sparse-seg-ext OPTIONAL
}

Sparse-align ::= SEQUENCE {
    first-id Seq-id,
    second-id Seq-id,

    numseg INTEGER,                      --number of segments
    first-starts SEQUENCE OF INTEGER ,   --starts on the first sequence [numseg]
    second-starts SEQUENCE OF INTEGER ,  --starts on the second sequence [numseg]
    lens SEQUENCE OF INTEGER ,           --lengths of segments [numseg]
    second-strands SEQUENCE OF Na-strand OPTIONAL ,

    -- per-segment scores
    seg-scores SET OF Score OPTIONAL
}

Sparse-seg-ext ::= SEQUENCE {
    --seg-ext SET OF {
    --    index INTEGER,
    --    data User-field
    -- }
    index INTEGER
}


-- use of Score is discouraged for external ASN.1 specifications
Score ::= SEQUENCE {
    id Object-id OPTIONAL ,
    value CHOICE {
        real REAL ,
        int INTEGER
    }
}

-- use of Score-set is encouraged for external ASN.1 specifications
Score-set ::= SET OF Score

END

--$Revision: 6.0 $
--**********************************************************************
--
--  NCBI Sequence Analysis Results (other than alignments)
--  by James Ostell, 1990
--
--**********************************************************************

NCBI-Seqres DEFINITIONS ::=
BEGIN

EXPORTS Seq-graph;

IMPORTS Seq-loc FROM NCBI-Seqloc;

--*** Sequence Graph ********************************
--*
--*   for values mapped by residue or range to sequence
--*

Seq-graph ::= SEQUENCE {
    title VisibleString OPTIONAL ,
    comment VisibleString OPTIONAL ,
    loc Seq-loc ,                       -- region this applies to
    title-x VisibleString OPTIONAL ,    -- title for x-axis
    title-y VisibleString OPTIONAL ,
    comp INTEGER OPTIONAL ,             -- compression (residues/value)
    a REAL OPTIONAL ,                   -- for scaling values
    b REAL OPTIONAL ,                   -- display = (a x value) + b
    numval INTEGER ,                    -- number of values in graph
    graph CHOICE {
        real Real-graph ,
        int Int-graph ,
        byte Byte-graph } }

Real-graph ::= SEQUENCE {
    max REAL ,                          -- top of graph
    min REAL ,                          -- bottom of graph
    axis REAL ,                         -- value to draw axis on
    values SEQUENCE OF REAL }

Int-graph ::= SEQUENCE {
    max INTEGER ,
    min INTEGER ,
    axis INTEGER ,
    values SEQUENCE OF INTEGER }

Byte-graph ::= SEQUENCE {              -- integer from 0-255
    max INTEGER ,
    min INTEGER ,
    axis INTEGER ,
    values OCTET STRING }

END

--$Revision: 6.1 $
--********************************************************************
--
--  Direct Submission of Sequence Data
--  James Ostell, 1991
--
--  This is a trial specification for direct submission of sequence
--    data worked out between NCBI and EMBL
--  Later revised to reflect work with GenBank and Integrated database
--
--  Version 3.0, 1994
--    This is the official NCBI sequence submission format now.
--
--********************************************************************

NCBI-Submit DEFINITIONS ::=
BEGIN

EXPORTS Seq-submit, Contact-info;

IMPORTS Cit-sub, Author FROM NCBI-Biblio
        Date, Object-id FROM NCBI-General
        Seq-annot FROM NCBI-Sequence
        Seq-id FROM NCBI-Seqloc
        Seq-entry FROM NCBI-Seqset;

Seq-submit ::= SEQUENCE {
    sub Submit-block ,
    data CHOICE {
        entrys  SET OF Seq-entry ,  -- sequence(s)
        annots  SET OF Seq-annot ,  -- annotation(s)
        delete  SET OF Seq-id } } -- deletions of entries

Submit-block ::= SEQUENCE {
    contact Contact-info ,        -- who to contact
    cit Cit-sub ,                 -- citation for this submission
    hup BOOLEAN DEFAULT FALSE ,   -- hold until publish
    reldate Date OPTIONAL ,       -- release by date
    subtype INTEGER {             -- type of submission
        new (1) ,                 -- new data
        update (2) ,              -- update by author
        revision (3) ,            -- 3rd party (non-author) update
        other (255) } OPTIONAL ,
    tool VisibleString OPTIONAL,  -- tool used to make submission
    user-tag VisibleString OPTIONAL, -- user supplied id for this submission
    comment VisibleString OPTIONAL } -- user comments/advice to database

Contact-info ::= SEQUENCE {      -- who to contact to discuss the submission
    name VisibleString OPTIONAL ,        -- OBSOLETE: will be removed
    address SEQUENCE OF VisibleString OPTIONAL ,
    phone VisibleString OPTIONAL ,
    fax VisibleString OPTIONAL ,
    email VisibleString OPTIONAL ,
    telex VisibleString OPTIONAL ,
    owner-id Object-id OPTIONAL ,         -- for owner accounts
    password OCTET STRING OPTIONAL ,
    last-name VisibleString OPTIONAL ,  -- structured to replace name above
    first-name VisibleString OPTIONAL ,
    middle-initial VisibleString OPTIONAL ,
    contact Author OPTIONAL }           -- WARNING: this will replace the above

END

--$Revision: 1.15 $
--**********************************************************************
--
--  Definitions for Cn3D-specific data (rendering settings,
--    user annotations, etc.)
--
--  by Paul Thiessen
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
-- asntool -m cn3d.asn -w 100 -o cn3d.h
-- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
--   -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
--**********************************************************************

NCBI-Cn3d DEFINITIONS ::=
-- Cn3D-specific information

BEGIN

EXPORTS  Cn3d-style-dictionary, Cn3d-user-annotations;

IMPORTS  Biostruc-id FROM MMDB
         Molecule-id, Residue-id FROM MMDB-Chemical-graph;


-- values of enumerations must match those in cn3d/style_manager.hpp!

Cn3d-backbone-type ::= ENUMERATED {     -- for different types of backbones
    off (1),
    trace (2),
    partial (3),
    complete (4)
}

Cn3d-drawing-style ::= ENUMERATED {     -- atom/bond/object rendering styles
    -- for atoms and bonds
    wire (1),
    tubes (2),
    ball-and-stick (3),
    space-fill (4),
    wire-worm (5),
    tube-worm (6),
    -- for 3d-objects
    with-arrows (7),
    without-arrows (8)
}

Cn3d-color-scheme ::= ENUMERATED {  -- available color schemes (not all
                                    -- necessarily applicable to all objects)
    element (1),
    object (2),
    molecule (3),
    domain (4),
    residue (20),
    secondary-structure (5),
    user-select (6),
    -- different alignment conservation coloring (currently only for proteins)
    aligned (7),
    identity (8),
    variety (9),
    weighted-variety (10),
    information-content (11),
    fit (12),
    block-fit (17),
    block-z-fit (18),
    block-row-fit (19),
    -- other schemes
    temperature (13),
    hydrophobicity (14),
    charge (15),
    rainbow (16)
}

-- RGB triplet, interpreted (after division by the scale-factor) as floating
-- point values which should range from [0..1]. The default scale-factor is
-- 255, so that one can conveniently set integer byte values [0..255] for
-- colors with the scale-factor already set appropriately to map to [0..1].
--    An alpha value is allowed, but is currently ignored by Cn3D.
Cn3d-color ::= SEQUENCE {
    scale-factor INTEGER DEFAULT 255,
    red INTEGER,
    green INTEGER,
    blue INTEGER,
    alpha INTEGER DEFAULT 255
}

Cn3d-backbone-style ::= SEQUENCE {  -- style blob for backbones only
    type Cn3d-backbone-type,
    style Cn3d-drawing-style,
    color-scheme Cn3d-color-scheme,
    user-color Cn3d-color
}

Cn3d-general-style ::= SEQUENCE {   -- style blob for other objects
    is-on BOOLEAN,
    style Cn3d-drawing-style,
    color-scheme Cn3d-color-scheme,
    user-color Cn3d-color
}

Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
    spacing INTEGER,        -- zero means none
    type ENUMERATED {
        one-letter (1),
        three-letter (2)
    },
    number ENUMERATED {
        none (0),
        sequential (1),     -- from 1, by residues present, to match sequence
        pdb (2)             -- use number assigned by PDB
    },
    termini BOOLEAN,
    white BOOLEAN           -- all white, or (if false) color of alpha carbon
}

-- rendering settings for Cn3D (mirrors StyleSettings class)
Cn3d-style-settings ::= SEQUENCE {
    name VisibleString OPTIONAL,                -- a name (for favorites)
    protein-backbone Cn3d-backbone-style,       -- backbone styles
    nucleotide-backbone Cn3d-backbone-style,
    protein-sidechains Cn3d-general-style,      -- styles for other stuff
    nucleotide-sidechains Cn3d-general-style,
    heterogens Cn3d-general-style,
    solvents Cn3d-general-style,
    connections Cn3d-general-style,
    helix-objects Cn3d-general-style,
    strand-objects Cn3d-general-style,
    virtual-disulfides-on BOOLEAN,              -- virtual disulfides
    virtual-disulfide-color Cn3d-color,
    hydrogens-on BOOLEAN,                       -- hydrogens
    background-color Cn3d-color,                -- background
    -- floating point parameters - scale-factor applies to all the following:
    scale-factor INTEGER,
    space-fill-proportion INTEGER,
    ball-radius INTEGER,
    stick-radius INTEGER,
    tube-radius INTEGER,
    tube-worm-radius INTEGER,
    helix-radius INTEGER,
    strand-width INTEGER,
    strand-thickness INTEGER,
    -- backbone labels (no labels if not present)
    protein-labels Cn3d-backbone-label-style OPTIONAL,
    nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
    -- ion labels
    ion-labels BOOLEAN OPTIONAL
}

Cn3d-style-settings-set ::= SET OF Cn3d-style-settings

Cn3d-style-table-id ::= INTEGER

Cn3d-style-table-item ::= SEQUENCE {
    id Cn3d-style-table-id,
    style Cn3d-style-settings
}

-- the global settings, and a lookup table of styles for user annotations.
Cn3d-style-dictionary ::= SEQUENCE {
    global-style Cn3d-style-settings,
    style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
}

-- a range of residues in a chain, identified by MMDB residue-id
-- (e.g., numbered from 1)
Cn3d-residue-range ::= SEQUENCE {
    from Residue-id,
    to Residue-id
}

-- set of locations on a particular chain
Cn3d-molecule-location ::= SEQUENCE {
    molecule-id Molecule-id,    -- MMDB molecule id
    -- which residues; whole molecule implied if absent
    residues SEQUENCE OF Cn3d-residue-range OPTIONAL
}

-- set of locations on a particular structure object (e.g., a PDB/MMDB
-- structure), which may include multiple ranges of residues each on
-- multiple chains.
Cn3d-object-location ::= SEQUENCE {
    structure-id Biostruc-id,
    residues SEQUENCE OF Cn3d-molecule-location
}

-- information for an individual user annotation
Cn3d-user-annotation ::= SEQUENCE {
    name VisibleString,                 -- a (short) name for this annotation
    description VisibleString OPTIONAL, -- an optional longer description
    style-id Cn3d-style-table-id,       -- how to draw this annotation
    residues SEQUENCE OF Cn3d-object-location,  -- which residues to cover
    is-on BOOLEAN   -- whether this annotation is to be turned on in Cn3D
}

-- a GL-ordered transformation matrix
Cn3d-GL-matrix ::= SEQUENCE {
    m0  REAL, m1  REAL, m2  REAL, m3  REAL,
    m4  REAL, m5  REAL, m6  REAL, m7  REAL,
    m8  REAL, m9  REAL, m10 REAL, m11 REAL,
    m12 REAL, m13 REAL, m14 REAL, m15 REAL
}

-- a floating point 3d vector
Cn3d-vector ::= SEQUENCE {
    x REAL,
    y REAL,
    z REAL
}

-- parameters used to set up the camera in Cn3D
Cn3d-view-settings ::= SEQUENCE {
    camera-distance REAL,       -- camera on +Z axis this distance from origin
    camera-angle-rad REAL,      -- camera angle
    camera-look-at-X REAL,      -- X,Y of point in Z=0 plane camera points at
    camera-look-at-Y REAL,
    camera-clip-near REAL,      -- distance of clipping planes from camera
    camera-clip-far REAL,
    matrix Cn3d-GL-matrix,      -- transformation of objects in the scene
    rotation-center Cn3d-vector -- center of rotation of whole scene
}

-- The list of annotations for a given CDD/mime. If residue regions overlap
-- between annotations that are turned on, the last annotation in this list
-- that contains these residues will be used as the display style for these
-- residues.
--   Also contains the current viewpoint, so that user's camera angle
-- can be stored and reproduced, for illustrations, on-line figures, etc.
Cn3d-user-annotations ::= SEQUENCE {
    annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
    view Cn3d-view-settings OPTIONAL
}

END

--$Revision: 6.3 $
--****************************************************************
--
--  NCBI Project Definition Module
--  by Jim Ostell and Jonathan Kans, 1998
--
--****************************************************************

NCBI-Project DEFINITIONS ::=
BEGIN

EXPORTS Project, Project-item;

IMPORTS Date FROM NCBI-General
        PubMedId FROM NCBI-Biblio
        Seq-id, Seq-loc FROM NCBI-Seqloc
        Seq-annot, Pubdesc FROM NCBI-Sequence
        Seq-entry FROM NCBI-Seqset
        Pubmed-entry FROM NCBI-PubMed;

Project ::= SEQUENCE {
    descr Project-descr OPTIONAL ,
    data Project-item }

Project-item ::= CHOICE {
    pmuid SET OF INTEGER ,
    protuid SET OF INTEGER ,
    nucuid SET OF INTEGER ,
    sequid SET OF INTEGER ,
    genomeuid SET OF INTEGER ,
    structuid SET OF INTEGER ,
    pmid SET OF PubMedId ,
    protid SET OF Seq-id ,
    nucid SET OF Seq-id ,
    seqid SET OF Seq-id ,
    genomeid SET OF Seq-id ,
    structid NULL ,
    pment SET OF Pubmed-entry ,
    protent SET OF Seq-entry ,
    nucent SET OF Seq-entry ,
    seqent SET OF Seq-entry ,
    genomeent SET OF Seq-entry ,
    structent NULL ,
    seqannot SET OF Seq-annot ,
    loc SET OF Seq-loc ,
    proj SET OF Project
}

Project-descr ::= SEQUENCE {
    id SET OF Project-id ,
    name VisibleString OPTIONAL ,
    descr SET OF Projdesc OPTIONAL }

Projdesc ::= CHOICE {
    pub Pubdesc ,
    date Date ,
    comment VisibleString ,
    title VisibleString
}

Project-id ::= VisibleString

END


--$Revision: 6.0 $
--*********************************************************************
--
--  access.asn
--
--     messages for data access
--
--*********************************************************************

NCBI-Access DEFINITIONS ::=
BEGIN

EXPORTS Link-set;

    -- links between same class = neighbors
    -- links between other classes = links

Link-set ::= SEQUENCE {
    num INTEGER ,                         -- number of links to this doc type
    uids SEQUENCE OF INTEGER OPTIONAL ,     -- the links
    weights SEQUENCE OF INTEGER OPTIONAL }  -- the weights


END
--$Revision: 6.0 $
--**********************************************************************
--
--  NCBI Sequence Feature Definition Module
--  by James Ostell, 1994
--
--**********************************************************************

NCBI-FeatDef DEFINITIONS ::=
BEGIN

EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;


FeatDef ::= SEQUENCE {
    typelabel VisibleString ,	   -- short label for type eg "CDS"
    menulabel VisibleString ,	   -- label for a menu eg "Coding Region"
    featdef-key INTEGER ,		   -- unique for this feature definition
    seqfeat-key INTEGER ,		   -- SeqFeat.data.choice from objfeat.h
    entrygroup INTEGER ,		   -- Group for data entry
    displaygroup INTEGER ,		   -- Group for data display
    molgroup FeatMolType           -- Type of Molecule used for
}

FeatMolType ::= ENUMERATED {
	aa (1),  -- proteins
    na (2),  -- nucleic acids
    both (3) }  -- both

FeatDefSet ::= SEQUENCE OF FeatDef   -- collections of defintions

FeatDispGroup ::= SEQUENCE {
	groupkey INTEGER ,
    groupname VisibleString }

FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup

FeatDefGroupSet ::= SEQUENCE {
	groups FeatDispGroupSet ,
	defs FeatDefSet }

END


--$Revision: 6.12 $
--****************************************************************
--
--  NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
--  by Jonathan Epstein, February 1996
--
--****************************************************************

NCBI-Mime DEFINITIONS ::=
BEGIN

EXPORTS Ncbi-mime-asn1;
IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
    Cdd FROM NCBI-Cdd
	Seq-entry FROM NCBI-Seqset
	Seq-annot FROM NCBI-Sequence
    Medline-entry FROM NCBI-Medline
    Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;

Ncbi-mime-asn1 ::= CHOICE {
	entrez	Entrez-general,			-- just a structure
    alignstruc  Biostruc-align,     -- structures & sequences & alignments
	alignseq	Biostruc-align-seq,	-- sequence alignment
    strucseq    Biostruc-seq,       -- structure & sequences
    strucseqs   Biostruc-seqs,      -- structure & sequences & alignments
    general     Biostruc-seqs-aligns-cdd    -- all-purpose "grab bag"
	-- others may be added here in the future
}

-- generic bundle of sequence and alignment info
Bundle-seqs-aligns ::= SEQUENCE {
    sequences SET OF Seq-entry OPTIONAL,        -- sequences
    seqaligns SET OF Seq-annot OPTIONAL,        -- sequence alignments
    strucaligns Biostruc-annot-set OPTIONAL,    -- structure alignments
    imports SET OF Seq-annot OPTIONAL,          -- imports (updates in Cn3D)
    style-dictionary Cn3d-style-dictionary OPTIONAL,    -- Cn3D stuff
    user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-seqs-aligns-cdd ::= SEQUENCE {
    seq-align-data CHOICE {
        bundle Bundle-seqs-aligns,          -- either seqs + alignments
        cdd Cdd                             -- or CDD (which contains these)
    },
    structures SET OF Biostruc OPTIONAL,    -- structures
    structure-type ENUMERATED {             -- type of structures to load if
        ncbi-backbone(2),                   -- not present; meanings and
        ncbi-all-atom(3),                   -- values are same as MMDB's
        pdb-model(4)                        -- Model-type
    } OPTIONAL
}

Biostruc-align ::= SEQUENCE {
	master	Biostruc,
	slaves	SET OF Biostruc,
	alignments	Biostruc-annot-set,	-- structure alignments
	sequences SET OF Seq-entry,	-- sequences
	seqalign SET OF Seq-annot,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-align-seq ::= SEQUENCE {	-- display seq structure align only
	sequences SET OF Seq-entry,	-- sequences
	seqalign SET OF Seq-annot,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-seq ::= SEQUENCE {	-- display  structure seq added by yanli
	structure Biostruc,
	sequences SET OF Seq-entry,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
	structure Biostruc,
	sequences SET OF Seq-entry,	-- sequences
	seqalign SET OF Seq-annot,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Entrez-style ::= ENUMERATED {
	docsum (1),
	genbank (2) ,
	genpept (3) ,
	fasta (4) ,
	asn1 (5) ,
	graphic (6) ,
	alignment (7) ,
	globalview (8) ,
	report (9) ,
	medlars (10) ,
	embl (11) ,
	pdb (12) ,
	kinemage (13) }

Entrez-general ::= SEQUENCE {
	title VisibleString OPTIONAL,
	data CHOICE {
		ml	Medline-entry ,
		prot	Seq-entry ,
		nuc	Seq-entry ,
		genome	Seq-entry ,
		structure Biostruc ,
		strucAnnot Biostruc-annot-set } ,
	style Entrez-style ,
	location VisibleString OPTIONAL }
END
--$Revision: 6.0 $
--********************************************************************
--
--  Print Templates
--  James Ostell, 1993
--
--
--********************************************************************

NCBI-ObjPrt DEFINITIONS ::=
BEGIN

EXPORTS PrintTemplate, PrintTemplateSet;

PrintTemplate ::= SEQUENCE {
    name TemplateName ,  -- name for this template
    labelfrom VisibleString OPTIONAL,    -- ASN.1 path to get label from
    format PrintFormat }

TemplateName ::= VisibleString

PrintTemplateSet ::= SEQUENCE OF PrintTemplate

PrintFormat ::= SEQUENCE {
    asn1 VisibleString ,    -- ASN.1 partial path for this
    label VisibleString OPTIONAL ,   -- printable label
    prefix VisibleString OPTIONAL,
    suffix VisibleString OPTIONAL,
    form PrintForm }

PrintForm ::=   CHOICE {      -- Forms for various ASN.1 components
    block PrintFormBlock,
    boolean PrintFormBoolean,
    enum PrintFormEnum,
    text PrintFormText,
    use-template TemplateName,
    user UserFormat ,
    null NULL }               -- rarely used

UserFormat ::= SEQUENCE {
    printfunc VisibleString ,
    defaultfunc VisibleString OPTIONAL }

PrintFormBlock ::= SEQUENCE {  -- for SEQUENCE, SET
    separator VisibleString OPTIONAL ,
    components SEQUENCE OF PrintFormat }

PrintFormBoolean ::= SEQUENCE {
    true VisibleString OPTIONAL ,
    false VisibleString OPTIONAL }

PrintFormEnum ::= SEQUENCE {
    values SEQUENCE OF VisibleString OPTIONAL }

PrintFormText ::= SEQUENCE {
    textfunc VisibleString OPTIONAL }

END

--$Revision: 6.11 $
--*********************************************************
--
-- ASN.1 and XML for the components of a GenBank format sequence
-- J.Ostell 2002
-- Updated 25 May 2010
--
--*********************************************************

NCBI-GBSeq DEFINITIONS ::=
BEGIN

--********
--  GBSeq represents the elements in a GenBank style report
--    of a sequence with some small additions to structure and support
--    for protein (GenPept) versions of GenBank format as seen in
--    Entrez. While this represents the simplification, reduction of
--    detail, and flattening to a single sequence perspective of GenBank
--    format (compared with the full ASN.1 or XML from which GenBank and
--    this format is derived at NCBI), it is presented in ASN.1 or XML for
--    automated parsing and processing. It is hoped that this compromise
--    will be useful for those bulk processing at the GenBank format level
--    of detail today. Since it is a compromise, a number of pragmatic
--    decisions have been made.
--
--  In pursuit of simplicity and familiarity a number of
--    fields do not have full substructure defined here where there is
--    already a standard GenBank format string. For example:
--
--   Date  DD-Mon-YYYY
--   Authors   LastName, Intials (with periods)
--   Journal   JounalName Volume (issue), page-range (year)
--   FeatureLocations as per GenBank feature table, but FeatureIntervals
--    may also be provided as a convenience
--   FeatureQualifiers  as per GenBank feature table
--   Primary has a string that represents a table to construct
--    a third party (TPA) sequence.
--   other-seqids can have strings with the "vertical bar format" sequence
--    identifiers used in BLAST for example, when they are non-genbank types.
--    Currently in GenBank format you only see GI, but there are others, like
--    patents, submitter clone names, etc which will appear here, as they
--    always have in the ASN.1 format, and full XML format.
--   source-db is a formatted text block for peptides in GenPept format that
--    carries information from the source protein database.
--
--  There are also a number of elements that could have been
--   more exactly specified, but in the interest of simplicity
--   have been simply left as options. For example..
--
--  accession and accession.version will always appear in a GenBank record
--   they are optional because this format can also be used for non-GenBank
--   sequences, and in that case will have only "other-seqids".
--
--  sequences will normally all have "sequence" filled in. But contig records
--    will have a "join" statement in the "contig" slot, and no "sequence".
--    We also may consider a retrieval option with no sequence of any kind
--     and no feature table to quickly check minimal values.
--
--  a reference may have an author list, or be from a consortium, or both.
--
--  some fields, such as taxonomy, do appear as separate elements in GenBank
--    format but without a specific linetype (in GenBank format this comes
--    under ORGANISM). Another example is the separation of primary accession
--    from the list of secondary accessions. In GenBank format primary
--    accession is just the first one on the list that includes all secondaries
--    after it.
--
--  create-date deserves special comment. The date you see on the right hand
--    side of the LOCUS line in GenBank format is actually the last date the
--    the record was modified (or the update-date). The date the record was
--    first submitted to GenBank appears in the first submission citation in
--    the reference section. Internally in the databases and ASN.1 NCBI keeps
--    the first date the record was released into the sequence database at
--    NCBI as create-date. For records from EMBL, which supports create-date,
--    it is the date provided by EMBL. For DDBJ records, which do not supply
--    a create-date (same as GenBank format) the create-date is the first date
--    NCBI saw the record from DDBJ. For older GenBank records, before NCBI
--    took responsibility for GenBank, it is just the first date NCBI saw the
--    record. Create-date can be very useful, so we expose it here, but users
--    must understand it is only an approximation and comes from many sources,
--    and with many exceptions and caveats. It does NOT tell you the first
--    date the public might have seen this record and thus is NOT an accurate
--    measure for legal issues of precedence.
--
--********

GBSet ::= SEQUENCE OF GBSeq

GBSeq ::= SEQUENCE {
    locus VisibleString OPTIONAL ,
    length INTEGER ,
    strandedness VisibleString OPTIONAL ,
    moltype VisibleString ,
    topology VisibleString OPTIONAL ,
    division VisibleString OPTIONAL ,
    update-date VisibleString OPTIONAL ,
    create-date VisibleString OPTIONAL ,
    update-release VisibleString OPTIONAL ,
    create-release VisibleString OPTIONAL ,
    definition VisibleString OPTIONAL ,
    primary-accession VisibleString OPTIONAL ,
    entry-version VisibleString OPTIONAL ,
    accession-version VisibleString OPTIONAL ,
    other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
    secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
    project VisibleString OPTIONAL ,
    keywords SEQUENCE OF GBKeyword OPTIONAL ,
    segment VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,
    organism VisibleString OPTIONAL ,
    taxonomy VisibleString OPTIONAL ,
    references SEQUENCE OF GBReference OPTIONAL ,
    comment VisibleString OPTIONAL ,
    comment-set SEQUENCE OF GBComment OPTIONAL ,
    struc-comments SEQUENCE OF GBStrucComment OPTIONAL ,
    primary VisibleString OPTIONAL ,
    source-db VisibleString OPTIONAL ,
    database-reference VisibleString OPTIONAL ,
    feature-table SEQUENCE OF GBFeature OPTIONAL ,
    feature-set SEQUENCE OF GBFeatureSet OPTIONAL ,
    sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
    contig VisibleString OPTIONAL ,
    alt-seq SEQUENCE OF GBAltSeqData OPTIONAL ,
    xrefs SEQUENCE OF GBXref OPTIONAL
}

GBSeqid ::= VisibleString

GBSecondary-accn ::= VisibleString

GBKeyword ::= VisibleString

GBReference ::= SEQUENCE {
    reference VisibleString ,
    position VisibleString OPTIONAL ,
    authors SEQUENCE OF GBAuthor OPTIONAL ,
    consortium VisibleString OPTIONAL ,
    title VisibleString OPTIONAL ,
    journal VisibleString ,
    xref SEQUENCE OF GBXref OPTIONAL ,
    pubmed INTEGER OPTIONAL ,
    remark VisibleString OPTIONAL
}

GBAuthor ::= VisibleString

GBXref ::= SEQUENCE {
    dbname VisibleString ,
    id VisibleString
}

GBComment ::= SEQUENCE {
    type VisibleString OPTIONAL ,
    paragraphs SEQUENCE OF GBCommentParagraph
}

GBCommentParagraph ::= VisibleString

GBStrucComment ::= SEQUENCE {
    name VisibleString OPTIONAL ,
    items SEQUENCE OF GBStrucCommentItem
}

GBStrucCommentItem ::= SEQUENCE {
    tag VisibleString OPTIONAL ,
    value VisibleString OPTIONAL ,
    url VisibleString OPTIONAL
}

GBFeatureSet ::= SEQUENCE {
    annot-source VisibleString OPTIONAL ,
    features SEQUENCE OF GBFeature
}

GBFeature ::= SEQUENCE {
    key VisibleString ,
    location VisibleString ,
    intervals SEQUENCE OF GBInterval OPTIONAL ,
    operator VisibleString OPTIONAL ,
    partial5 BOOLEAN OPTIONAL ,
    partial3 BOOLEAN OPTIONAL ,
    quals SEQUENCE OF GBQualifier OPTIONAL ,
    xrefs SEQUENCE OF GBXref OPTIONAL
}

GBInterval ::= SEQUENCE {
    from INTEGER OPTIONAL ,
    to INTEGER OPTIONAL ,
    point INTEGER OPTIONAL ,
    iscomp BOOLEAN OPTIONAL ,
    interbp BOOLEAN OPTIONAL ,
    accession VisibleString
}

GBQualifier ::= SEQUENCE {
    name VisibleString ,
    value VisibleString OPTIONAL
}

GBAltSeqData ::= SEQUENCE {
    name VisibleString ,  -- e.g., contig, wgs, scaffold, cage, genome
    items SEQUENCE OF GBAltSeqItem OPTIONAL
}

GBAltSeqItem ::= SEQUENCE {
    interval GBInterval OPTIONAL ,
    isgap BOOLEAN OPTIONAL ,
    gap-length INTEGER OPTIONAL ,
    gap-type VisibleString OPTIONAL ,
    gap-linkage VisibleString OPTIONAL ,
    gap-comment VisibleString OPTIONAL ,
    first-accn VisibleString OPTIONAL ,
    last-accn VisibleString OPTIONAL ,
    value VisibleString OPTIONAL
}

END

--$Revision: 1.9 $
--************************************************************************
--
-- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
-- The International Nucleotide Sequence Database (INSD) collaboration
-- Version 1.6, 25 May 2010
--
--************************************************************************

INSD-INSDSeq DEFINITIONS ::=
BEGIN

--  INSDSeq provides the elements of a sequence as presented in the
--    GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
--    additional structure.
--    Although this single perspective of the three flatfile formats
--    provides a useful simplification, it hides to some extent the
--    details of the actual data underlying those formats. Nevertheless,
--    the XML version of INSD-Seq is being provided with
--    the hopes that it will prove useful to those who bulk-process
--    sequence data at the flatfile-format level of detail. Further
--    documentation regarding the content and conventions of those formats
--    can be found at:
--
--    URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
--    http://www.ddbj.nig.ac.jp/FT/full_index.html
--    http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
--    http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
--
--    URLs for DDBJ, EMBL, and GenBank Release Notes :
--    ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
--    http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
--    ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
--
--    Because INSDSeq is a compromise, a number of pragmatic decisions have
--    been made:
--
--  In pursuit of simplicity and familiarity a number of fields do not
--    have full substructure defined here where there is already a
--    standard flatfile format string. For example:
--
--   Dates:      DD-MON-YYYY (eg 10-JUN-2003)
--
--   Author:     LastName, Initials  (eg Smith, J.N.)
--            or Lastname Initials   (eg Smith J.N.)
--
--   Journal:    JournalName Volume (issue), page-range (year)
--            or JournalName Volume(issue):page-range(year)
--            eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
--               Appl. Environ. Microbiol. 61(4):1646-1648(1995).
--
--  FeatureLocations are representated as in the flatfile feature table,
--    but FeatureIntervals may also be provided as a convenience
--
--  FeatureQualifiers are represented as in the flatfile feature table.
--
--  Primary has a string that represents a table to construct
--    a third party (TPA) sequence.
--
--  other-seqids can have strings with the "vertical bar format" sequence
--    identifiers used in BLAST for example, when they are non-INSD types.
--
--  Currently in flatfile format you only see Accession numbers, but there
--    are others, like patents, submitter clone names, etc which will
--    appear here
--
--  There are also a number of elements that could have been more exactly
--    specified, but in the interest of simplicity have been simply left as
--    optional. For example:
--
--  All publicly accessible sequence records in INSDSeq format will
--    include accession and accession.version. However, these elements are
--    optional in optional in INSDSeq so that this format can also be used
--    for non-public sequence data, prior to the assignment of accessions and
--    version numbers. In such cases, records will have only "other-seqids".
--
--  sequences will normally all have "sequence" filled in. But contig records
--    will have a "join" statement in the "contig" slot, and no "sequence".
--    We also may consider a retrieval option with no sequence of any kind
--    and no feature table to quickly check minimal values.
--
--  Four (optional) elements are specific to records represented via the EMBL
--    sequence database: INSDSeq_update-release, INSDSeq_create-release,
--    INSDSeq_entry-version, and INSDSeq_database-reference.
--
--  One (optional) element is specific to records originating at the GenBank
--    and DDBJ sequence databases: INSDSeq_segment.
--
--********

INSDSet ::= SEQUENCE OF INSDSeq

INSDSeq ::= SEQUENCE {
    locus VisibleString OPTIONAL ,
    length INTEGER ,
    strandedness VisibleString OPTIONAL ,
    moltype VisibleString ,
    topology VisibleString OPTIONAL ,
    division VisibleString OPTIONAL ,
    update-date VisibleString OPTIONAL ,
    create-date VisibleString OPTIONAL ,
    update-release VisibleString OPTIONAL ,
    create-release VisibleString OPTIONAL ,
    definition VisibleString OPTIONAL ,
    primary-accession VisibleString OPTIONAL ,
    entry-version VisibleString OPTIONAL ,
    accession-version VisibleString OPTIONAL ,
    other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
    secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,

--  INSDSeq_project has been deprecated in favor of INSDSeq_xrefs .
--  This element may be be removed from a future version of this DTD.

    project VisibleString OPTIONAL ,

    keywords SEQUENCE OF INSDKeyword OPTIONAL ,
    segment VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,
    organism VisibleString OPTIONAL ,
    taxonomy VisibleString OPTIONAL ,
    references SEQUENCE OF INSDReference OPTIONAL ,
    comment VisibleString OPTIONAL ,
    comment-set SEQUENCE OF INSDComment OPTIONAL ,
    struc-comments SEQUENCE OF INSDStrucComment OPTIONAL ,
    primary VisibleString OPTIONAL ,
    source-db VisibleString OPTIONAL ,
    database-reference VisibleString OPTIONAL ,
    feature-table SEQUENCE OF INSDFeature OPTIONAL ,
    feature-set SEQUENCE OF INSDFeatureSet OPTIONAL ,
    sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
    contig VisibleString OPTIONAL ,
    alt-seq SEQUENCE OF INSDAltSeqData OPTIONAL ,

--  INSDSeq_xrefs provides cross-references from a sequence record
--  to other database resources. These cross-references are at the
--  level of the entire record, rather than at the level of a specific
--  feature. These cross-references can include: BioProject, BioSample,
--  Sequence Read Archive, etc.

    xrefs SEQUENCE OF INSDXref OPTIONAL
}

INSDSeqid ::= VisibleString

INSDSecondary-accn ::= VisibleString

INSDKeyword ::= VisibleString

-- INSDReference_position contains a string value indicating the
-- basepair span(s) to which a reference applies. The allowable
-- formats are:
--
--   X..Y  : Where X and Y are integers separated by two periods,
--           X >= 1 , Y <= sequence length, and X <= Y
--
--           Multiple basepair spans can exist, separated by a
--           semi-colon and a space. For example : 10..20; 100..500
--
--   sites : The string literal 'sites', indicating that a reference
--           provides sequence annotation information, but the specific
--           basepair spans are either not captured, or were too numerous
--           to record.
--
--           The 'sites' literal string is singly occuring, and
--            cannot be used in conjunction with any X..Y basepair spans.
--
--           'sites' is a convention utilized by GenBank, and might
--           not be presented in XML provided by EMBL and DDBJ.
--
--   References that lack an INSDReference_position element are not
--   attributed to any particular region of the sequence.

INSDReference ::= SEQUENCE {
    reference VisibleString ,
    position VisibleString OPTIONAL ,
    authors SEQUENCE OF INSDAuthor OPTIONAL ,
    consortium VisibleString OPTIONAL ,
    title VisibleString OPTIONAL ,
    journal VisibleString ,
    xref SEQUENCE OF INSDXref OPTIONAL ,
    pubmed INTEGER OPTIONAL ,
    remark VisibleString OPTIONAL
}

INSDAuthor ::= VisibleString

-- INSDXref provides a method for referring to records in
-- other databases. INSDXref_dbname is a string value that
-- provides the name of the database, and INSDXref_dbname
-- is a string value that provides the record's identifier
-- in that database.

INSDXref ::= SEQUENCE {
    dbname VisibleString ,
    id VisibleString
}

INSDComment ::= SEQUENCE {
    type VisibleString OPTIONAL ,
    paragraphs SEQUENCE OF INSDCommentParagraph
}

INSDCommentParagraph ::= VisibleString

INSDStrucComment ::= SEQUENCE {
    name VisibleString OPTIONAL ,
    items SEQUENCE OF INSDStrucCommentItem
}

INSDStrucCommentItem ::= SEQUENCE {
    tag VisibleString OPTIONAL ,
    value VisibleString OPTIONAL ,
    url VisibleString OPTIONAL
}

-- INSDFeature_operator contains a string value describing
-- the relationship among a set of INSDInterval within
-- INSDFeature_intervals. The allowable formats are:
--
--   join :  The string literal 'join' indicates that the
--           INSDInterval intervals are biologically joined
--           together into a contiguous molecule.
--
--   order : The string literal 'order' indicates that the
--           INSDInterval intervals are in the presented
--           order, but they are not necessarily contiguous.
--
--   Either 'join' or 'order' is required if INSDFeature_intervals
--   is comprised of more than one INSDInterval .

INSDFeatureSet ::= SEQUENCE {
    annot-source VisibleString OPTIONAL ,
    features SEQUENCE OF INSDFeature
}

INSDFeature ::= SEQUENCE {
    key VisibleString ,
    location VisibleString ,
    intervals SEQUENCE OF INSDInterval OPTIONAL ,
    operator VisibleString OPTIONAL ,
    partial5 BOOLEAN OPTIONAL ,
    partial3 BOOLEAN OPTIONAL ,
    quals SEQUENCE OF INSDQualifier OPTIONAL ,
    xrefs SEQUENCE OF INSDXref OPTIONAL
}

-- INSDInterval_iscomp is a boolean indicating whether
-- an INSDInterval_from / INSDInterval_to location
-- represents a location on the complement strand.
-- When INSDInterval_iscomp is TRUE, it essentially
-- confirms that a 'from' value which is greater than
-- a 'to' value is intentional, because the location
-- is on the opposite strand of the presented sequence.

-- INSDInterval_interbp is a boolean indicating whether
-- a feature (such as a restriction site) is located
-- between two adjacent basepairs. When INSDInterval_interbp
-- is TRUE, the 'from' and 'to' values will differ by
-- exactly one base for linear molecules. For circular
-- molecules, if the inter-basepair position falls between
-- the last and the first base, then 'from' will be the
-- final base (equal to the length of the sequence), and
-- 'to' will have a value of 1.

INSDInterval ::= SEQUENCE {
    from INTEGER OPTIONAL ,
    to INTEGER OPTIONAL ,
    point INTEGER OPTIONAL ,
    iscomp BOOLEAN OPTIONAL ,
    interbp BOOLEAN OPTIONAL ,
    accession VisibleString
}

INSDQualifier ::= SEQUENCE {
    name VisibleString ,
    value VisibleString OPTIONAL
}

-- INSDAltSeqData provides for sequence representations other than
-- literal basepair abbreviations (INSDSeq_sequence), such as the
-- CONTIG/CO linetype of the GenBank and EMBL flatfile formats.
-- It also accomodates the specification of accession-number ranges,
-- which are presented on a WGS master record (for the contigs and
-- and scaffolds of a WGS project).

INSDAltSeqData ::= SEQUENCE {
    name VisibleString ,  -- e.g., contig, wgs, scaffold, cage, genome
    items SEQUENCE OF INSDAltSeqItem OPTIONAL
}

INSDAltSeqItem ::= SEQUENCE {
    interval INSDInterval OPTIONAL ,
    isgap BOOLEAN OPTIONAL ,
    gap-length INTEGER OPTIONAL ,
    gap-type VisibleString OPTIONAL ,
    gap-linkage VisibleString OPTIONAL ,
    gap-comment VisibleString OPTIONAL ,
    first-accn VisibleString OPTIONAL ,
    last-accn VisibleString OPTIONAL ,
    value VisibleString OPTIONAL
}

END

--$Revision: 6.1 $
--**********************************************************************
--
--  ASN.1 for a tiny Bioseq in XML
--    basically a structured FASTA file with a few extras
--    in this case we drop all modularity of components
--      All ids are Optional - simpler structure, less checking
--      Components of organism are hard coded - can't easily add or change
--      sequence is just string whether DNA or protein
--  by James Ostell, 2000
--
--**********************************************************************

NCBI-TSeq DEFINITIONS ::=
BEGIN

TSeq ::= SEQUENCE {
	seqtype ENUMERATED {
		nucleotide (1),
		protein (2) },
	gi INTEGER OPTIONAL,
	accver VisibleString OPTIONAL,
	sid VisibleString OPTIONAL,
	local VisibleString OPTIONAL,
	taxid INTEGER OPTIONAL,
	orgname VisibleString OPTIONAL,
	defline VisibleString,
	length INTEGER,
	sequence VisibleString }

TSeqSet ::= SEQUENCE OF TSeq    -- a bunch of them

END

--$Id: scoremat.asn,v 1.14 2011/12/21 15:29:33 kazimird Exp $
-- ===========================================================================
--
--                            PUBLIC DOMAIN NOTICE
--               National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the
--  terms of the United States Copyright Act.  It was written as part of
--  the author's official duties as a United States Government employee and
--  thus cannot be copyrighted.  This software/database is freely available
--  to the public for use. The National Library of Medicine and the U.S.
--  Government have not placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy
--  and reliability of the software and data, the NLM and the U.S.
--  Government do not and cannot warrant the performance or results that
--  may be obtained by using this software or data. The NLM and the U.S.
--  Government disclaim all warranties, express or implied, including
--  warranties of performance, merchantability or fitness for any particular
--  purpose.
--
--  Please cite the author in any work or product based on this material.
--
-- ===========================================================================
--
-- Author:  Christiam Camacho
--
-- File Description:
--      ASN.1 definitions for scoring matrix
--
-- ===========================================================================

NCBI-ScoreMat DEFINITIONS ::= BEGIN

EXPORTS    Pssm, PssmIntermediateData, PssmFinalData,
           PssmParameters, PssmWithParameters;

IMPORTS    Object-id   FROM NCBI-General
           Seq-entry   FROM NCBI-Seqset;

-- a rudimentary block/core-model, to be used with block-based alignment
-- routines and threading

BlockProperty ::= SEQUENCE {
  type     INTEGER { unassigned  (0),
                     threshold   (1),       -- score threshold for heuristics
		     minscore    (2),       -- observed minimum score in CD
		     maxscore    (3),       -- observed maximum score in CD
		     meanscore   (4),       -- observed mean score in CD
		     variance    (5),       -- observed score variance
		     name       (10),       -- just name the block
		     is-optional(20),       -- block may not have to be used
                     other     (255) },
  intvalue  INTEGER OPTIONAL,
  textvalue VisibleString OPTIONAL
}

CoreBlock ::= SEQUENCE {
  start          INTEGER,                   -- begin of block on query
  stop           INTEGER,                   -- end of block on query
  minstart       INTEGER OPTIONAL,          -- optional N-terminal extension
  maxstop        INTEGER OPTIONAL,          -- optional C-terminal extension
  property       SEQUENCE OF BlockProperty OPTIONAL
}

LoopConstraint ::= SEQUENCE {
  minlength      INTEGER DEFAULT 0,         -- minimum length of unaligned region
  maxlength      INTEGER DEFAULT 100000     -- maximum length of unaligned region
}

CoreDef ::= SEQUENCE {
  nblocks        INTEGER,                   -- number of core elements/blocks
  blocks         SEQUENCE OF CoreBlock,     -- nblocks locations
  loops          SEQUENCE OF LoopConstraint, -- (nblocks+1) constraints

  isDiscontinuous BOOLEAN OPTIONAL,         -- is it a discontinuous domain

  insertions SEQUENCE OF INTEGER OPTIONAL   -- positions of long insertions
}

Site-annot ::= SEQUENCE {
  startPosition  INTEGER,                -- location of the annotation,
  stopPosition   INTEGER,                -- start and stop position in the
                                         -- PSSM

  description    VisibleString OPTIONAL, -- holds description or names, that
                                         -- can be used for labels in
                                         -- visualization

  type           INTEGER OPTIONAL,       -- type of the annotated feature,
                                         -- similarly to Align-annot in
                                         -- NCBI-Cdd

  aliases        SEQUENCE OF VisibleString OPTIONAL, -- additional names for
                                                     -- the annotation

  motif          VisibleString OPTIONAL, -- motif to validate mapping of sites

  motifuse       INTEGER OPTIONAL        -- 0 for validation
                                         -- 1 for motif in seqloc
                                         -- 2 for multiple motifs in seqloc
}

Site-annot-set ::= SEQUENCE OF Site-annot

-- ===========================================================================
-- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
-- ===========================================
--
-- Two possible inputs to PSI-BLAST and formatrpsdb:
-- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix
--    of frequency ratios)
-- 2) PssmWithParams where pssm field contains final PSSM data (matrix of
--    scores and statistical parameters) - such as written by cddumper
--
-- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
-- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
-- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
-- statistical parameters are used to perform the search in PSI-BLAST and the
-- same data and the data in PssmWithParams::params::rpsdbparams is used to
-- build the PSSM and ultimately the RPS-BLAST database
--
--
--                 reads    ++++++++++++++ writes
-- PssmWithParams  ====>    + PSI-BLAST  + =====> PssmWithParams
--                          ++++++++++++++             |  ^
--         ^                                           |  |
--         |                                           |  |
--         +===========================================+  |
--                                                     |  |
--         +===========================================+  |
--         |                                              |
-- reads   |                                              |
--         v                                              |
--  +++++++++++++++ writes +++++++++++++++++++++++        |
--  | formatrpsdb | =====> | RPS-BLAST databases |        |
--  +++++++++++++++        +++++++++++++++++++++++        |
--                                   ^                    |
--                                   |                    |
--                                   | reads              |
--                             +++++++++++++              |
--                             | RPS-BLAST |              |
--                             +++++++++++++              |
--                                                        |
--       reads  ++++++++++++               writes         |
--  Cdd ======> | cddumper | =============================+
--              ++++++++++++
--
-- ===========================================================================

-- Contains the PSSM's scores and its associated statistical parameters.
-- Dimensions and order in which scores are stored must be the same as that
-- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
PssmFinalData ::= SEQUENCE {

    -- PSSM's scores
    scores              SEQUENCE OF INTEGER,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    lambda              REAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
	kappa               REAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    h                   REAL,

    -- scaling factor used to obtain more precision when building the PSSM.
    -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
    -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
    -- given a PSSM which contains a scaled-up PSSM (indicated by having a
    -- scalingFactor greater than 1), then it will scale down the PSSM to
    -- perform the initial stages of the search with it.
    -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided
    -- scaled-up PSSMs, it will ensure that all PSSMs used to build the
    -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST
    -- will silently produce incorrect results).
    scalingFactor       INTEGER DEFAULT 1,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    lambdaUngapped      REAL OPTIONAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
	kappaUngapped       REAL OPTIONAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    hUngapped           REAL OPTIONAL
}

-- Contains the PSSM's intermediate data used to create the PSSM's scores
-- and statistical parameters. Dimensions and order in which scores are
-- stored must be the same as that specified in Pssm::numRows,
-- Pssm::numColumns, and Pssm::byrow
PssmIntermediateData ::= SEQUENCE {

    -- observed residue frequencies (or counts) per position of the PSSM
    -- (prior to application of pseudocounts)
    resFreqsPerPos              SEQUENCE OF INTEGER OPTIONAL,

    -- Weighted observed residue frequencies per position of the PSSM.
    -- (N.B.: each position's weights should add up to 1.0).
    -- This field corresponds to f_i (f sub i) in equation 2 of
    -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    weightedResFreqsPerPos      SEQUENCE OF REAL OPTIONAL,

    -- PSSM's frequency ratios
    freqRatios                  SEQUENCE OF REAL,

    -- Information content per position of the PSSM
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    informationContent          SEQUENCE OF REAL OPTIONAL,

    -- Relative weight for columns of the PSSM without gaps to pseudocounts
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    gaplessColumnWeights        SEQUENCE OF REAL OPTIONAL,

    -- Used in sequence weights computation
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    sigma                       SEQUENCE OF REAL OPTIONAL,

    -- Length of the aligned regions per position of the query sequence
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    intervalSizes               SEQUENCE OF INTEGER OPTIONAL,

    -- Number of matching sequences per position of the PSSM (including the
    -- query)
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    numMatchingSeqs             SEQUENCE OF INTEGER OPTIONAL,

    -- Number of independent observations per position of the PSSM
    -- NOTE: this is needed for building CDD database for DELTA-BLAST
    numIndeptObsr               SEQUENCE OF REAL OPTIONAL
}

-- Position-specific scoring matrix
--
-- Column indices on the PSSM refer to the positions corresponding to the
-- query/master sequence, i.e. the number of columns (N) is the same
-- as the length of the query/master sequence.
-- Row indices refer to individual amino acid types, i.e. the number of
-- rows (M) is the same as the number of different residues in the
-- alphabet we use. Consequently, row labels are amino acid identifiers.
--
-- PSSMs are stored as linear arrays of integers. By default, we store
-- them column-by-column, M values for the first column followed by M
-- values for the second column, and so on. In order to provide
-- flexibility for external applications, the boolean field "byrow" is
-- provided to specify the storage order.
Pssm ::= SEQUENCE {

    -- Is the this a protein or nucleotide scoring matrix?
    isProtein       BOOLEAN DEFAULT TRUE,

    -- PSSM identifier
    identifier      Object-id OPTIONAL,

    -- The dimensions of the matrix are returned so the client can
    -- verify that all data was received.

    numRows         INTEGER,	-- number of rows
    numColumns      INTEGER,	-- number of columns

    -- row-labels is given to note the order of residue types so that it can
    -- be cross-checked between applications.
    -- If this field is not given, the matrix values are presented in
    -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
    -- for proteins the values returned correspond to
    -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
    rowLabels       SEQUENCE OF VisibleString OPTIONAL,

    -- are matrices stored row by row?
    byRow           BOOLEAN DEFAULT FALSE,

    -- PSSM representative sequence (master)
    query           Seq-entry OPTIONAL,

    -- both intermediateData and finalData can be provided, but at least one of
    -- them must be provided.
    -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData
    -- representation.

    -- Intermediate or final data for the PSSM
    intermediateData    PssmIntermediateData OPTIONAL,

    -- Final representation for the PSSM
    finalData           PssmFinalData OPTIONAL
}

-- This structure is used to create the RPS-BLAST database auxiliary file
-- (*.aux) and it contains parameters set at creation time of the PSSM.
-- Also, the matrixName field is used by formatrpsdb to build a PSSM from
-- a Pssm structure which only contains PssmIntermediateData.
FormatRpsDbParameters ::= SEQUENCE {

    -- name of the underlying score matrix whose frequency ratios were
    -- used in PSSM construction (e.g.: BLOSUM62)
    matrixName   VisibleString,

    -- gap opening penalty corresponding to the matrix above
    gapOpen      INTEGER OPTIONAL,

    -- gap extension penalty corresponding to the matrix above
    gapExtend    INTEGER OPTIONAL

}

-- Populated by PSSM engine of PSI-BLAST, original source for these values
-- are the PSI-BLAST options specified using the BLAST options API
PssmParameters ::= SEQUENCE {

    -- pseudocount constant used for PSSM. This field corresponds to beta in
    -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
    pseudocount INTEGER OPTIONAL,

    -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
    -- populated by PSI-BLAST
    rpsdbparams     FormatRpsDbParameters OPTIONAL,

    -- alignment constraints needed by sequence-structure threader
    -- and other global or local block-alignment algorithms
    constraints     CoreDef OPTIONAL,

    -- bit score threshold for specific conserved domain hits
    bitScoreThresh  REAL OPTIONAL,

    -- conserved functional sites with annotations
    annotatedSites  Site-annot-set OPTIONAL
}

-- Envelope containing PSSM and the parameters used to create it.
-- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
PssmWithParameters ::= SEQUENCE {

    -- This field is applicable to PSI-BLAST and formatrpsdb.
    -- When both the intermediate and final PSSM data are provided in this
    -- field, the final data (matrix of scores and associated statistical
    -- parameters) takes precedence and that data is used for further
    -- processing. The rationale for this is that the PSSM's scores and
    -- statistical parameters might have been calculated by other applications
    -- and it might not be possible to recreate it by using PSI-BLAST's PSSM
    -- engine.
	pssm        Pssm,

    -- This field's rpsdbparams is used to specify the values of options
    -- for processing by formatrpsdb. If these are not set, the command
    -- line defaults of formatrpsdb are applied. This field is used
    -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
    -- the PSSM is the same as the one being specified through the BLAST
    -- Options API. If this field is omitted, no verification will be
    -- performed, so be careful to keep track of what matrix was used to build
    -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
    params      PssmParameters OPTIONAL
}

END
--$Revision: 1.167 $
--**********************************************************************
--
--  NCBI ASN.1 macro editing language specifications
--
--  by Colleen Bollin, 2007
--
--**********************************************************************

NCBI-Macro DEFINITIONS ::=
BEGIN

EXPORTS AECR-action, Parse-action, Macro-action-list, Suspect-rule-set;

-- simple constraints --

String-location ::= ENUMERATED {
    contains (1) ,
    equals (2) ,
    starts (3) ,
    ends (4) ,
    inlist (5) }

Word-substitution ::= SEQUENCE {
    word VisibleString OPTIONAL ,
    synonyms SET OF VisibleString OPTIONAL ,
    case-sensitive BOOLEAN DEFAULT FALSE ,
    whole-word BOOLEAN DEFAULT FALSE }

Word-substitution-set ::= SET OF Word-substitution

String-constraint ::= SEQUENCE {
    match-text VisibleString OPTIONAL ,
    match-location String-location DEFAULT contains ,
    case-sensitive BOOLEAN DEFAULT FALSE ,
    ignore-space BOOLEAN DEFAULT FALSE ,
    ignore-punct BOOLEAN DEFAULT FALSE ,
    ignore-words Word-substitution-set OPTIONAL ,
    whole-word BOOLEAN DEFAULT FALSE ,
    not-present BOOLEAN DEFAULT FALSE ,
    is-all-caps BOOLEAN DEFAULT FALSE ,
    is-all-lower BOOLEAN DEFAULT FALSE ,
    is-all-punct BOOLEAN DEFAULT FALSE ,
    ignore-weasel BOOLEAN DEFAULT FALSE ,
    is-first-cap BOOLEAN DEFAULT FALSE ,
    is-first-each-cap BOOLEAN DEFAULT FALSE }

String-constraint-set ::= SET OF String-constraint

Strand-constraint ::= ENUMERATED {
    any (0) ,
    plus (1) ,
    minus (2) }

Seqtype-constraint ::= ENUMERATED {
    any (0) ,
    nuc (1) ,
    prot (2) }

Partial-constraint ::= ENUMERATED {
    either (0) ,
    partial (1) ,
    complete (2) }

Location-type-constraint ::= ENUMERATED {
    any (0) ,
    single-interval (1) ,
    joined (2) ,
    ordered (3) }

Location-pos-constraint ::= CHOICE {
    dist-from-end INTEGER ,
    max-dist-from-end INTEGER ,
    min-dist-from-end INTEGER }

Location-constraint ::= SEQUENCE {
    strand Strand-constraint DEFAULT any ,
    seq-type Seqtype-constraint DEFAULT any ,
    partial5 Partial-constraint DEFAULT either ,
    partial3 Partial-constraint DEFAULT either ,
    location-type Location-type-constraint DEFAULT any ,
    end5 Location-pos-constraint OPTIONAL ,
    end3 Location-pos-constraint OPTIONAL }

Object-type-constraint ::= ENUMERATED {
    any (0) ,
    feature (1) ,
    descriptor (2) }

-- feature values --

Macro-feature-type ::= ENUMERATED {
    any (0) ,
    gene (1) ,
    org (2) ,
    cds (3) ,
    prot (4) ,
    preRNA (5) ,
    mRNA (6) ,
    tRNA (7) ,
    rRNA (8) ,
    snRNA (9) ,
    scRNA (10) ,
    otherRNA (11) ,
    pub (12) ,
    seq (13) ,
    imp (14) ,
    allele (15) ,
    attenuator (16) ,
    c-region (17) ,
    caat-signal (18) ,
    imp-CDS (19) ,
    conflict (20) ,
    d-loop (21) ,
    d-segment (22) ,
    enhancer (23) ,
    exon (24) ,
    gC-signal (25) ,
    iDNA (26) ,
    intron (27) ,
    j-segment (28) ,
    ltr (29) ,
    mat-peptide (30) ,
    misc-binding (31) ,
    misc-difference (32) ,
    misc-feature (33) ,
    misc-recomb (34) ,
    misc-RNA (35) ,
    misc-signal (36) ,
    misc-structure (37) ,
    modified-base (38) ,
    mutation (39) ,
    n-region (40) ,
    old-sequence (41) ,
    polyA-signal (42) ,
    polyA-site (43) ,
    precursor-RNA (44) ,
    prim-transcript (45) ,
    primer-bind (46) ,
    promoter (47) ,
    protein-bind (48) ,
    rbs (49) ,
    repeat-region (50) ,
    rep-origin (51) ,
    s-region (52) ,
    sig-peptide (53) ,
    source (54) ,
    stem-loop (55) ,
    sts (56) ,
    tata-signal (57) ,
    terminator (58) ,
    transit-peptide (59) ,
    unsure (60) ,
    v-region (61) ,
    v-segment (62) ,
    variation (63) ,
    virion (64) ,
    n3clip (65) ,
    n3UTR (66) ,
    n5clip (67) ,
    n5UTR (68) ,
    n10-signal (69) ,
    n35-signal (70) ,
    site-ref (71) ,
    region (72) ,
    comment (73) ,
    bond (74) ,
    site (75) ,
    rsite (76) ,
    user (77) ,
    txinit (78) ,
    num (79) ,
    psec-str (80) ,
    non-std-residue (81) ,
    het (82) ,
    biosrc (83) ,
    preprotein (84) ,
    mat-peptide-aa (85) ,
    sig-peptide-aa (86) ,
    transit-peptide-aa (87) ,
    snoRNA (88) ,
    gap (89) ,
    operon (90) ,
    oriT (91) ,
    ncRNA (92) ,
    tmRNA (93) ,
    mobile-element (94) ,
    regulatory (95) }

Feat-qual-legal ::= ENUMERATED {
    allele (1) ,
    activity (2) ,
    anticodon (3) ,
    bound-moiety (4) ,
    chromosome (5),
    citation (6),
    codon (7) ,
    codon-start (8) ,
    codons-recognized (9) ,
    compare (10) ,
    cons-splice (11) ,
    db-xref (12) ,
    description (13) ,
    direction (14) ,
    ec-number (15) ,
    environmental-sample (16) ,
    evidence (17) ,
    exception (18) ,
    experiment (19) ,
    focus (20) ,
    frequency (21) ,
    function (22) ,
    gene (23) ,
    gene-description (24) ,
    inference (25) ,
    label (26) ,
    locus-tag (27) ,
    map (28) ,
    mobile-element (29) ,
    mod-base (30) ,
    mol-type (31) ,
    ncRNA-class (32) ,
    note (33) ,
    number (34) ,
    old-locus-tag (35) ,
    operon (36) ,
    organism (37) ,
    organelle (38) ,
    partial (39) ,
    phenotype (40) ,
    plasmid (41) ,
    product (42) ,
    protein-id (43) ,
    pseudo (44) ,
    rearranged (45) ,
    replace (46) ,
    rpt-family (47) ,
    rpt-type (48) ,
    rpt-unit (49) ,
    rpt-unit-seq (50) ,
    rpt-unit-range (51) ,
    segment (52) ,
    sequenced-mol (53) ,
    standard-name (54) ,
    synonym (55) ,
    transcript-id (56) ,
    transgenic (57) ,
    translation (58) ,
    transl-except (59) ,
    transl-table (60) ,
    usedin (61),
    mobile-element-type (62),
    mobile-element-name (63),
    gene-comment (64) ,
    satellite (65) ,
    satellite-type (66) ,
    satellite-name (67) ,
    location (68) ,
    tag-peptide (69) ,
    mobile-element-type-type (70) ,
    name (71) ,
    pcr-conditions (72) ,
    regulatory-class (73) }

Feat-qual-legal-val ::= SEQUENCE {
    qual Feat-qual-legal ,
    val  VisibleString }

Feat-qual-legal-val-choice ::= CHOICE {
    qual Feat-qual-legal-val }

Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice

Feat-qual-choice ::= CHOICE {
    legal-qual Feat-qual-legal ,
    illegal-qual String-constraint }

Feature-field ::= SEQUENCE {
    type Macro-feature-type ,
    field Feat-qual-choice }

Feature-field-legal ::= SEQUENCE {
    type Macro-feature-type ,
    field Feat-qual-legal }

Feature-field-pair ::= SEQUENCE {
    type Macro-feature-type ,
    field-from Feat-qual-choice ,
    field-to Feat-qual-choice }

Rna-feat-type ::= CHOICE {
    any NULL ,
    preRNA NULL ,
    mRNA NULL ,
    tRNA NULL ,
    rRNA NULL ,
    ncRNA VisibleString ,
    tmRNA NULL,
    miscRNA NULL }

Rna-field ::= ENUMERATED {
    product (1) ,
    comment (2) ,
    codons-recognized (3) ,
    ncrna-class (4) ,
    anticodon (5) ,
    transcript-id (6) ,
    gene-locus (7) ,
    gene-description (8) ,
    gene-maploc (9) ,
    gene-locus-tag (10) ,
    gene-synonym (11) ,
    gene-comment (12) ,
    tag-peptide (13) }

Rna-qual ::= SEQUENCE {
    type Rna-feat-type ,
    field Rna-field }

Rna-qual-pair ::= SEQUENCE {
    type Rna-feat-type ,
    field-from Rna-field ,
    field-to Rna-field }

Source-qual ::= ENUMERATED {
    acronym (1) ,
    anamorph (2) ,
    authority (3) ,
    bio-material (4) ,
    biotype (5) ,
    biovar (6) ,
    breed (7) ,
    cell-line (8) ,
    cell-type (9) ,
    chemovar (10) ,
    chromosome (11) ,
    clone (12) ,
    clone-lib (13) ,
    collected-by (14) ,
    collection-date (15) ,
    common (16) ,
    common-name (17) ,
    country (18) ,
    cultivar (19) ,
    culture-collection (20) ,
    dev-stage (21) ,
    division (22) ,
    dosage (23) ,
    ecotype (24) ,
    endogenous-virus-name (25) ,
    environmental-sample (26) ,
    forma (27) ,
    forma-specialis (28) ,
    frequency (29) ,
    fwd-primer-name (30) ,
    fwd-primer-seq (31) ,
    gb-acronym (32) ,
    gb-anamorph (33) ,
    gb-synonym (34) ,
    genotype (35) ,
    germline (36) ,
    group (37) ,
    haplotype (38) ,
    identified-by (39) ,
    insertion-seq-name (40) ,
    isolate (41) ,
    isolation-source (42) ,
    lab-host (43) ,
    lat-lon (44) ,
    lineage (45) ,
    map (46) ,
    metagenome-source (47) ,
    metagenomic (48) ,
    old-lineage (49) ,
    old-name (50) ,
    orgmod-note (51) ,
    nat-host (52) ,
    pathovar (53) ,
    plasmid-name (54) ,
    plastid-name (55) ,
    pop-variant (56) ,
    rearranged (57) ,
    rev-primer-name (58) ,
    rev-primer-seq (59) ,
    segment (60) ,
    serogroup (61) ,
    serotype (62) ,
    serovar (63) ,
    sex (64) ,
    specimen-voucher (65) ,
    strain (66) ,
    subclone (67) ,
    subgroup (68) ,
    subsource-note (69),
    sub-species (70) ,
    substrain (71) ,
    subtype (72) ,
    synonym (73) ,
    taxname (74) ,
    teleomorph (75) ,
    tissue-lib (76) ,
    tissue-type (77) ,
    transgenic (78) ,
    transposon-name (79) ,
    type (80) ,
    variety (81) ,
    specimen-voucher-INST (82) ,
    specimen-voucher-COLL (83) ,
    specimen-voucher-SpecID (84) ,
    culture-collection-INST (85) ,
    culture-collection-COLL (86) ,
    culture-collection-SpecID (87) ,
    bio-material-INST (88) ,
    bio-material-COLL (89) ,
    bio-material-SpecID (90),
    all-notes (91),
    mating-type (92),
    linkage-group (93) ,
    haplogroup (94),
    all-quals (95),
    dbxref (96) ,
    taxid (97) ,
    all-primers (98) ,
    altitude (99) ,
    type-material (100)
}

Source-qual-pair ::= SEQUENCE {
    field-from Source-qual ,
    field-to Source-qual }

Source-location ::= ENUMERATED {
    unknown (0) ,
    genomic (1) ,
    chloroplast (2) ,
    chromoplast (3) ,
    kinetoplast (4) ,
    mitochondrion (5) ,
    plastid (6) ,
    macronuclear (7) ,
    extrachrom (8) ,
    plasmid (9) ,
    transposon (10) ,
    insertion-seq (11) ,
    cyanelle (12) ,
    proviral (13) ,
    virion (14) ,
    nucleomorph (15) ,
    apicoplast (16) ,
    leucoplast (17) ,
    proplastid (18) ,
    endogenous-virus (19) ,
    hydrogenosome (20) ,
    chromosome (21) ,
    chromatophore (22) }

Source-origin ::= ENUMERATED {
    unknown (0) ,
    natural (1) ,
    natmut (2) ,
    mut (3) ,
    artificial (4) ,
    synthetic (5) ,
    other (255) }

Source-qual-choice ::= CHOICE {
    textqual Source-qual ,
    location Source-location,
    origin Source-origin ,
    gcode INTEGER  ,
    mgcode INTEGER  }

Source-qual-text-val ::= SEQUENCE {
    srcqual Source-qual ,
    val VisibleString }

Source-qual-val-choice ::= CHOICE {
    textqual Source-qual-text-val ,
    location Source-location,
    origin Source-origin ,
    gcode INTEGER ,
    mgcode INTEGER }

Source-qual-val-set ::= SET OF Source-qual-val-choice

CDSGeneProt-field ::= ENUMERATED {
    cds-comment (1) ,
    gene-locus (2) ,
    gene-description (3) ,
    gene-comment (4) ,
    gene-allele (5) ,
    gene-maploc (6) ,
    gene-locus-tag (7) ,
    gene-synonym (8) ,
    gene-old-locus-tag (9) ,
    mrna-product (10) ,
    mrna-comment (11) ,
    prot-name (12) ,
    prot-description (13) ,
    prot-ec-number (14) ,
    prot-activity (15) ,
    prot-comment (16) ,
    mat-peptide-name (17) ,
    mat-peptide-description (18) ,
    mat-peptide-ec-number (19) ,
    mat-peptide-activity (20) ,
    mat-peptide-comment (21) ,
    cds-inference (22) ,
    gene-inference (23) ,
    codon-start (24) }

CDSGeneProt-field-pair ::= SEQUENCE {
    field-from CDSGeneProt-field ,
    field-to CDSGeneProt-field }

Molecule-type ::= ENUMERATED {
  unknown (0) ,
  genomic (1) ,
  precursor-RNA (2) ,
  mRNA (3) ,
  rRNA (4) ,
  tRNA (5) ,
  genomic-mRNA (6) ,
  cRNA (7) ,
  transcribed-RNA (8) ,
  ncRNA (9) ,
  transfer-messenger-RNA (10) ,
  macro-other (11) }

Technique-type ::= ENUMERATED {
  unknown (0) ,
  standard (1) ,
  est (2) ,
  sts (3) ,
  survey (4) ,
  genetic-map (5) ,
  physical-map (6) ,
  derived (7) ,
  concept-trans (8) ,
  seq-pept (9) ,
  both (10) ,
  seq-pept-overlap (11) ,
  seq-pept-homol (12) ,
  concept-trans-a (13) ,
  htgs-1 (14) ,
  htgs-2 (15) ,
  htgs-3 (16) ,
  fli-cDNA (17) ,
  htgs-0 (18) ,
  htc (19) ,
  wgs (20) ,
  barcode (21) ,
  composite-wgs-htgs (22) ,
  tsa (23) ,
  targeted (24) ,
  other (25) }

Completedness-type ::= ENUMERATED {
  unknown (0) ,
  complete (1) ,
  partial (2) ,
  no-left (3) ,
  no-right (4) ,
  no-ends (5) ,
  has-left (6) ,
  has-right (7) ,
  other (6) }

Molecule-class-type ::= ENUMERATED {
  unknown (0) ,
  dna (1) ,
  rna (2) ,
  protein (3) ,
  nucleotide (4),
  other (5) }

Topology-type ::= ENUMERATED {
  unknown (0) ,
  linear (1) ,
  circular (2) ,
  tandem (3) ,
  other (4) }

Strand-type ::= ENUMERATED {
  unknown (0) ,
  single (1) ,
  double (2) ,
  mixed (3) ,
  mixed-rev (4) ,
  other (5) }

Molinfo-field ::= CHOICE {
    molecule Molecule-type ,
    technique Technique-type ,
    completedness Completedness-type ,
    mol-class Molecule-class-type ,
    topology Topology-type ,
    strand Strand-type }

Molinfo-molecule-pair ::= SEQUENCE {
    from Molecule-type ,
    to Molecule-type }

Molinfo-technique-pair ::= SEQUENCE {
    from Technique-type ,
    to Technique-type }

Molinfo-completedness-pair ::= SEQUENCE {
    from Completedness-type ,
    to Completedness-type }

Molinfo-mol-class-pair ::= SEQUENCE {
    from Molecule-class-type ,
    to Molecule-class-type }

Molinfo-topology-pair ::= SEQUENCE {
    from Topology-type ,
    to Topology-type }

Molinfo-strand-pair ::= SEQUENCE {
    from Strand-type ,
    to Strand-type }

Molinfo-field-pair ::= CHOICE {
    molecule Molinfo-molecule-pair ,
    technique Molinfo-technique-pair ,
    completedness Molinfo-completedness-pair ,
    mol-class Molinfo-mol-class-pair ,
    topology Molinfo-topology-pair ,
    strand Molinfo-strand-pair }

Molinfo-field-list ::= SET OF Molinfo-field

Molinfo-field-constraint ::= SEQUENCE {
    field Molinfo-field ,
    is-not BOOLEAN DEFAULT FALSE }

-- publication fields --

Publication-field ::=  ENUMERATED {
    cit (1) ,
    authors (2) ,
    journal (3) ,
    volume (4) ,
    issue (5) ,
    pages (6) ,
    date (7) ,
    serial-number (8) ,
    title (9) ,
    affiliation (10) ,
    affil-div (11) ,
    affil-city (12) ,
    affil-sub (13) ,
    affil-country (14) ,
    affil-street (15) ,
    affil-email (16) ,
    affil-fax (17) ,
    affil-phone (18) ,
    affil-zipcode (19),
    authors-initials (20),
    pmid (21),
    pub-class (22)
    }

-- structured comment fields --

Structured-comment-field ::= CHOICE {
  database NULL ,
  named VisibleString ,
  field-name NULL
  }

Structured-comment-field-pair ::= SEQUENCE {
  from Structured-comment-field ,
  to Structured-comment-field
  }

-- misc fields --
-- these would not appear in pairs --
Misc-field ::= ENUMERATED {
    genome-project-id (1) ,
    comment-descriptor (2) ,
    defline (3) ,
    keyword (4)
    }

-- dblink fields --
DBLink-field-type ::= ENUMERATED {
  trace-assembly (1) ,
  bio-sample (2) ,
  probe-db (3) ,
  sequence-read-archve (4) ,
  bio-project (5) ,
  assembly (6) }

DBLink-field-pair ::= SEQUENCE {
  from DBLink-field-type ,
  to DBLink-field-type
  }

-- complex constraints --

Pub-type ::= ENUMERATED {
  any (0) ,
  published (1) ,
  unpublished (2) ,
  in-press (3) ,
  submitter-block (4) }

Pub-field-constraint ::= SEQUENCE {
  field Publication-field ,
  constraint String-constraint }

Pub-field-special-constraint-type ::= CHOICE {
  is-present NULL ,
  is-not-present NULL ,
  is-all-caps NULL ,
  is-all-lower NULL ,
  is-all-punct NULL }

Pub-field-special-constraint ::= SEQUENCE {
  field Publication-field ,
  constraint Pub-field-special-constraint-type }

Publication-constraint ::= SEQUENCE {
  type Pub-type ,
  field Pub-field-constraint OPTIONAL ,
  special-field Pub-field-special-constraint OPTIONAL }

Source-constraint ::= SEQUENCE {
  field1 Source-qual-choice OPTIONAL ,
  field2 Source-qual-choice OPTIONAL ,
  constraint String-constraint OPTIONAL ,
  type-constraint Object-type-constraint OPTIONAL }

CDSGeneProt-feature-type-constraint ::= ENUMERATED {
    gene (1) ,
    mRNA (2) ,
    cds (3) ,
    prot (4) ,
    exon (5) ,
    mat-peptide (6) }

CDSGeneProt-pseudo-constraint ::= SEQUENCE {
    feature CDSGeneProt-feature-type-constraint ,
    is-pseudo BOOLEAN DEFAULT TRUE }

CDSGeneProt-constraint-field ::= CHOICE {
  field CDSGeneProt-field }

CDSGeneProt-qual-constraint ::= SEQUENCE {
  field1 CDSGeneProt-constraint-field OPTIONAL ,
  field2 CDSGeneProt-constraint-field OPTIONAL ,
  constraint String-constraint OPTIONAL }

Field-constraint ::= SEQUENCE {
  field Field-type ,
  string-constraint String-constraint }

Sequence-constraint-rnamol ::= ENUMERATED {
  any (0) ,
  genomic (1) ,
  precursor-RNA (2) ,
  mRNA (3) ,
  rRNA (4) ,
  tRNA (5) ,
  genomic-mRNA (6) ,
  cRNA (7) ,
  transcribed-RNA (8) ,
  ncRNA (9) ,
  transfer-messenger-RNA (10) }

Sequence-constraint-mol-type-constraint ::= CHOICE {
  any NULL ,
  nucleotide NULL ,
  dna NULL ,
  rna Sequence-constraint-rnamol ,
  protein NULL }

Quantity-constraint ::= CHOICE {
  equals INTEGER ,
  greater-than INTEGER ,
  less-than INTEGER }

Feature-strandedness-constraint ::= ENUMERATED {
  any (0) ,
  minus-only (1) ,
  plus-only (2) ,
  at-least-one-minus (3) ,
  at-least-one-plus (4) ,
  no-minus (5) ,
  no-plus (6) }

Sequence-constraint ::= SEQUENCE {
    seqtype Sequence-constraint-mol-type-constraint OPTIONAL ,
    id String-constraint OPTIONAL ,
    feature Macro-feature-type ,
    num-type-features Quantity-constraint OPTIONAL ,
    num-features Quantity-constraint OPTIONAL ,
    length Quantity-constraint OPTIONAL ,
    strandedness Feature-strandedness-constraint DEFAULT any }

Match-type-constraint ::= ENUMERATED {
  dont-care (0) ,
  yes (1) ,
  no (2) }

Translation-constraint ::= SEQUENCE {
  actual-strings String-constraint-set ,
  transl-strings String-constraint-set ,
  internal-stops Match-type-constraint DEFAULT dont-care ,
  num-mismatches Quantity-constraint OPTIONAL }

Constraint-choice ::= CHOICE {
    string String-constraint ,
    location Location-constraint ,
    field  Field-constraint ,
    source Source-constraint ,
    cdsgeneprot-qual CDSGeneProt-qual-constraint ,
    cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint ,
    sequence Sequence-constraint ,
    pub Publication-constraint ,
    molinfo Molinfo-field-constraint ,
    field-missing Field-type ,
    translation Translation-constraint }

Constraint-choice-set ::= SET OF Constraint-choice

Text-marker ::= CHOICE {
    free-text VisibleString ,
    digits NULL ,
    letters NULL }

Text-portion ::= SEQUENCE {
    left-marker Text-marker  OPTIONAL ,
    include-left BOOLEAN ,
    right-marker Text-marker  OPTIONAL ,
    include-right BOOLEAN ,
    inside BOOLEAN ,
    case-sensitive BOOLEAN DEFAULT FALSE ,
    whole-word BOOLEAN DEFAULT FALSE }

Field-edit-location ::= ENUMERATED {
    anywhere (0) ,
    beginning (1) ,
    end (2) }

Field-edit ::= SEQUENCE {
    find-txt VisibleString ,
    repl-txt VisibleString OPTIONAL ,
    location Field-edit-location DEFAULT anywhere ,
    case-insensitive BOOLEAN DEFAULT FALSE }

Field-type ::= CHOICE {
    source-qual Source-qual-choice ,
    feature-field Feature-field ,
    rna-field Rna-qual ,
    cds-gene-prot CDSGeneProt-field ,
    molinfo-field Molinfo-field ,
    pub Publication-field ,
    struc-comment-field Structured-comment-field ,
    misc Misc-field ,
    dblink DBLink-field-type }

Field-pair-type ::= CHOICE {
    source-qual Source-qual-pair ,
    feature-field Feature-field-pair ,
    rna-field Rna-qual-pair ,
    cds-gene-prot CDSGeneProt-field-pair ,
    molinfo-field Molinfo-field-pair ,
    struc-comment-field Structured-comment-field-pair ,
    dblink DBLink-field-pair}

ExistingTextOption ::= ENUMERATED {
  replace-old (1) ,
  append-semi (2) ,
  append-space (3) ,
  append-colon (4) ,
  append-comma (5) ,
  append-none (6) ,
  prefix-semi (7) ,
  prefix-space (8) ,
  prefix-colon (9) ,
  prefix-comma (10) ,
  prefix-none (11) ,
  leave-old (12) ,
  add-qual (13) }

Apply-action ::= SEQUENCE {
    field Field-type ,
    value VisibleString ,
    existing-text ExistingTextOption }

Edit-action ::= SEQUENCE {
    edit Field-edit ,
    field Field-type }

Cap-change ::= ENUMERATED {
    none (0) ,
    tolower (1) ,
    toupper (2) ,
    firstcap (3) ,
    firstcaprestnochange (4) ,
    firstlower-restnochange (5) ,
    cap-word-space (6) ,
    cap-word-space-punc (7)
    }

Text-transform ::= CHOICE {
  edit Field-edit ,
  caps Cap-change ,
  remove Text-portion }

Text-transform-set ::= SET OF Text-transform

Convert-action ::= SEQUENCE {
    fields Field-pair-type ,
    strip-name BOOLEAN DEFAULT FALSE ,
    keep-original BOOLEAN DEFAULT FALSE ,
    capitalization Cap-change DEFAULT none ,
    existing-text ExistingTextOption }

Copy-action ::= SEQUENCE {
    fields Field-pair-type ,
    existing-text ExistingTextOption }

Swap-action ::= SEQUENCE {
    fields Field-pair-type }

AECRParse-action ::= SEQUENCE {
    portion Text-portion ,
    fields Field-pair-type ,
    remove-from-parsed BOOLEAN DEFAULT FALSE ,
    remove-left BOOLEAN DEFAULT FALSE ,
    remove-right BOOLEAN DEFAULT FALSE ,
    transform Text-transform-set OPTIONAL ,
    existing-text ExistingTextOption }

Remove-action ::= SEQUENCE {
    field Field-type }

Remove-outside-action ::= SEQUENCE {
    portion Text-portion ,
    field Field-type ,
    remove-if-not-found BOOLEAN DEFAULT FALSE }

Action-choice ::= CHOICE {
    apply Apply-action ,
    edit Edit-action ,
    convert Convert-action ,
    copy Copy-action ,
    swap Swap-action ,
    remove Remove-action ,
    parse AECRParse-action ,
    remove-outside Remove-outside-action }

AECR-action ::= SEQUENCE {
    action Action-choice ,
    also-change-mrna BOOLEAN DEFAULT FALSE ,
    constraint Constraint-choice-set OPTIONAL }

Parse-src-org-choice ::= CHOICE {
    source-qual Source-qual ,
    taxname-after-binomial NULL }

Parse-src-org ::= SEQUENCE {
    field Parse-src-org-choice ,
    type Object-type-constraint DEFAULT any }

-- For Parse-src-general-id tag, specify the db of the id from which you
-- want to retrieve the tag.  If empty or null, any db will do.
Parse-src-general-id ::= CHOICE {
    whole-text NULL ,
    db NULL ,
    tag VisibleString }

Parse-src ::= CHOICE {
    defline NULL ,
    flatfile NULL ,
    local-id NULL ,
    org Parse-src-org ,
    comment NULL ,
    bankit-comment NULL ,
    structured-comment VisibleString ,
    file-id NULL ,
    general-id Parse-src-general-id }

Parse-dst-org ::= SEQUENCE {
    field Source-qual-choice ,
    type Object-type-constraint DEFAULT any }

Parse-dest ::= CHOICE {
    defline NULL ,
    org Parse-dst-org ,
    featqual Feature-field-legal ,
    comment-descriptor NULL ,
    dbxref VisibleString }

Parse-action ::= SEQUENCE {
    portion Text-portion ,
    src Parse-src ,
    dest Parse-dest ,
    capitalization Cap-change DEFAULT none ,
    remove-from-parsed BOOLEAN DEFAULT FALSE ,
    transform Text-transform-set OPTIONAL ,
    existing-text ExistingTextOption }

Location-interval ::= SEQUENCE {
    from INTEGER ,
    to INTEGER  }

Location-choice ::= CHOICE {
    interval Location-interval ,
    whole-sequence NULL ,
    point INTEGER }

Sequence-list ::= SET OF VisibleString
Sequence-list-choice ::= CHOICE {
    list Sequence-list ,
    all NULL }

Apply-feature-action ::= SEQUENCE {
    type Macro-feature-type ,
    partial5 BOOLEAN DEFAULT FALSE ,
    partial3 BOOLEAN DEFAULT FALSE ,
    plus-strand BOOLEAN DEFAULT TRUE ,
    location Location-choice ,
    seq-list Sequence-list-choice ,
    add-redundant BOOLEAN DEFAULT TRUE ,
    add-mrna BOOLEAN DEFAULT FALSE ,
    apply-to-parts BOOLEAN DEFAULT FALSE ,
    only-seg-num INTEGER DEFAULT -1 ,
    fields Feat-qual-legal-set OPTIONAL,
    src-fields Source-qual-val-set OPTIONAL }

Remove-feature-action ::= SEQUENCE {
    type Macro-feature-type ,
    constraint Constraint-choice-set OPTIONAL }

-- for convert features --
Convert-from-CDS-options ::= SEQUENCE {
  remove-mRNA BOOLEAN ,
  remove-gene BOOLEAN ,
  remove-transcript-id BOOLEAN }

Convert-feature-src-options ::= CHOICE {
  cds Convert-from-CDS-options }

Bond-type ::= ENUMERATED {
  disulfide (1) ,
  thioester (2) ,
  crosslink (3) ,
  thioether (4) ,
  other (5) }

Site-type ::= ENUMERATED {
  active (1) ,
  binding (2) ,
  cleavage (3) ,
  inhibit (4) ,
  modified (5) ,
  glycosylation (6) ,
  myristoylation (7) ,
  mutagenized (8) ,
  metal-binding (9) ,
  phosphorylation (10) ,
  acetylation (11) ,
  amidation (12) ,
  methylation (13) ,
  hydroxylation (14) ,
  sulfatation (15) ,
  oxidative-deamination (16) ,
  pyrrolidone-carboxylic-acid (17) ,
  gamma-carboxyglutamic-acid (18) ,
  blocked (19) ,
  lipid-binding (20) ,
  np-binding (21) ,
  dna-binding (22) ,
  signal-peptide (23) ,
  transit-peptide (24) ,
  transmembrane-region (25) ,
  nitrosylation (26) ,
  other (27) }

-- other choice is to create protein sequences, skipping bad --
Region-type ::= SEQUENCE {
  create-nucleotide BOOLEAN }

Convert-feature-dst-options ::= CHOICE {
  bond Bond-type ,
  site Site-type ,
  region Region-type ,
  ncrna-class VisibleString ,
  remove-original BOOLEAN }

Convert-feature-action ::= SEQUENCE {
  type-from Macro-feature-type ,
  type-to Macro-feature-type ,
  src-options Convert-feature-src-options OPTIONAL ,
  dst-options Convert-feature-dst-options OPTIONAL ,
  leave-original BOOLEAN ,
  src-feat-constraint Constraint-choice-set OPTIONAL }

Feature-location-strand-from ::= ENUMERATED {
  any (0) ,
  plus (1) ,
  minus (2) ,
  unknown (3) ,
  both (4) }

Feature-location-strand-to ::= ENUMERATED {
  plus (1) ,
  minus (2) ,
  unknown (3) ,
  both (4) ,
  reverse (5) }

Edit-location-strand ::= SEQUENCE {
  strand-from Feature-location-strand-from ,
  strand-to   Feature-location-strand-to }

Partial-5-set-constraint ::= ENUMERATED {
  all (0) ,
  at-end (1) ,
  bad-start (2) ,
  frame-not-one (3) }

Partial-5-set-action ::= SEQUENCE {
  constraint Partial-5-set-constraint ,
  extend BOOLEAN }

Partial-5-clear-constraint ::= ENUMERATED {
  all (0) ,
  not-at-end (1) ,
  good-start (2) }

Partial-3-set-constraint ::= ENUMERATED {
  all (0) ,
  at-end (1) ,
  bad-end (2) }

Partial-3-set-action ::= SEQUENCE {
  constraint Partial-3-set-constraint ,
  extend BOOLEAN }

Partial-3-clear-constraint ::= ENUMERATED {
  all (0) ,
  not-at-end (1) ,
  good-end (2) }

Partial-both-set-constraint ::= ENUMERATED {
  all (0) ,
  at-end (1) }

Partial-both-set-action ::= SEQUENCE {
  constraint Partial-both-set-constraint ,
  extend BOOLEAN }

Partial-both-clear-constraint ::= ENUMERATED {
  all (0) ,
  not-at-end (1) }

Convert-location-type ::= ENUMERATED {
  join (1) ,
  order (2) ,
  merge (3) }

Extend-to-feature ::= SEQUENCE {
  type Macro-feature-type ,
  include-feat BOOLEAN ,
  distance Quantity-constraint OPTIONAL }

Location-edit-type ::= CHOICE {
  strand Edit-location-strand ,
  set-5-partial Partial-5-set-action ,
  clear-5-partial Partial-5-clear-constraint ,
  set-3-partial Partial-3-set-action ,
  clear-3-partial Partial-3-clear-constraint ,
  set-both-partial Partial-both-set-action ,
  clear-both-partial Partial-both-clear-constraint ,
  convert Convert-location-type ,
  extend-5 NULL ,
  extend-3 NULL ,
  extend-5-to-feat Extend-to-feature ,
  extend-3-to-feat Extend-to-feature }

Edit-feature-location-action ::= SEQUENCE {
  type Macro-feature-type ,
  action Location-edit-type ,
  retranslate-cds BOOLEAN OPTIONAL ,
  also-edit-gene BOOLEAN OPTIONAL ,
  constraint Constraint-choice-set OPTIONAL }

Molinfo-block ::= SEQUENCE {
    to-list Molinfo-field-list  ,
    from-list Molinfo-field-list OPTIONAL ,
    constraint Constraint-choice-set OPTIONAL }

Descriptor-type ::= ENUMERATED {
  all (0) ,
  title (1) ,
  source (2) ,
  publication (3) ,
  comment (4) ,
  genbank (5) ,
  user (6) ,
  create-date (7) ,
  update-date (8) ,
  mol-info (9) ,
  structured-comment (10) ,
  genome-project-id (11) }

Remove-descriptor-action ::= SEQUENCE {
  type Descriptor-type ,
  constraint Constraint-choice-set OPTIONAL }

Autodef-list-type ::= ENUMERATED {
  feature-list (1) ,
  complete-sequence (2) ,
  complete-genome (3) ,
  sequence (4) }

Autodef-misc-feat-parse-rule ::= ENUMERATED {
  use-comment-before-first-semicolon (1) ,
  look-for-noncoding-products (2) }

Autodef-action ::= SEQUENCE {
  modifiers SET OF Source-qual OPTIONAL ,
  clause-list-type Autodef-list-type ,
  misc-feat-parse-rule Autodef-misc-feat-parse-rule DEFAULT look-for-noncoding-products }

Fix-pub-caps-action ::= SEQUENCE {
  title BOOLEAN OPTIONAL ,
  authors BOOLEAN OPTIONAL ,
  affiliation BOOLEAN OPTIONAL ,
  affil-country BOOLEAN OPTIONAL ,
  punct-only BOOLEAN DEFAULT FALSE ,
  constraint Constraint-choice-set OPTIONAL }

Sort-order ::= ENUMERATED {
  short-to-long (1) ,
  long-to-short (2) ,
  alphabetical (3) }

Sort-fields-action ::= SEQUENCE {
  field Field-type ,
  order Sort-order ,
  constraint Constraint-choice-set OPTIONAL }

Fix-author-caps ::= SEQUENCE {
  last-name-only BOOLEAN }

Fix-caps-action ::= CHOICE {
  pub Fix-pub-caps-action ,
  src-country NULL ,
  mouse-strain NULL ,
  src-qual Source-qual ,
  author Fix-author-caps }

Fix-format-action ::= CHOICE {
  collection-date NULL ,
  lat-lon NULL ,
  primers NULL ,
  protein-name NULL }

Remove-duplicate-feature-action ::= SEQUENCE {
  type Macro-feature-type ,
  ignore-partials BOOLEAN ,
  case-sensitive BOOLEAN ,
  remove-proteins BOOLEAN ,
  rd-constraint Constraint-choice-set OPTIONAL }

Gene-xref-suppression-type ::= ENUMERATED {
  any (0) ,
  suppressing (1) ,
  non-suppressing (2) }

Gene-xref-necessary-type ::= ENUMERATED {
  any (0) ,
  necessary (1) ,
  unnecessary (2) }

Gene-xref-type ::= SEQUENCE {
  feature Macro-feature-type ,
  suppression Gene-xref-suppression-type ,
  necessary Gene-xref-necessary-type }

Xref-type ::= CHOICE {
  gene Gene-xref-type }

Remove-xrefs-action ::= SEQUENCE {
  xref-type Xref-type ,
  constraint Constraint-choice-set OPTIONAL }

Make-gene-xref-action ::= SEQUENCE {
  feature Macro-feature-type ,
  constraint Constraint-choice-set OPTIONAL }

Author-fix-type ::= ENUMERATED {
  truncate-middle-initials (1) ,
  strip-suffix (2) ,
  move-middle-to-first (3) }

Author-fix-action ::= SEQUENCE {
  fix-type Author-fix-type ,
  constraint Constraint-choice-set OPTIONAL }

Update-sequences-action ::= SEQUENCE {
  filename VisibleString ,
  add-cit-subs BOOLEAN DEFAULT FALSE }

Create-TSA-ids-src ::= CHOICE {
  local-id NULL ,
  defline Text-portion
}

Create-TSA-ids-action ::= SEQUENCE {
  src Create-TSA-ids-src ,
  suffix VisibleString OPTIONAL ,
  id-text-portion Text-portion OPTIONAL }

Autofix-action ::= SEQUENCE {
  test-name VisibleString }

Fix-sets-action ::= CHOICE {
  remove-single-item-set NULL ,
  renormalize-nuc-prot-sets NULL ,
  fix-pop-to-phy NULL
}

Table-match-type ::= CHOICE {
  feature-id NULL ,
  gene-locus-tag NULL ,
  protein-id NULL,
  dbxref NULL ,
  nuc-id NULL ,
  src-qual Source-qual-choice ,
  protein-name NULL ,
  bioproject NULL ,
  any NULL
}

Table-match ::= SEQUENCE {
  match-type Table-match-type ,
  match-location String-location DEFAULT equals
}

Apply-table-extra-data ::= CHOICE {
  table NULL }

Apply-table-action ::= SEQUENCE {
  filename VisibleString ,
  match-type Table-match ,
  in-memory-table Apply-table-extra-data OPTIONAL ,
  also-change-mrna BOOLEAN DEFAULT FALSE ,
  skip-blanks BOOLEAN DEFAULT TRUE
}

Add-file-action ::= SEQUENCE {
  filename VisibleString ,
  in-memory-table Apply-table-extra-data OPTIONAL
}

Add-descriptor-list-action ::= SEQUENCE {
  descriptor-list Add-file-action ,
  constraint Constraint-choice-set OPTIONAL
}

Remove-sequences-action ::= SEQUENCE {
  constraint Constraint-choice-set
}

Update-replaced-ec-numbers-action ::= SEQUENCE {
  delete-improper-format BOOLEAN ,
  delete-unrecognized BOOLEAN ,
  delete-multiple-replacement BOOLEAN
}

Retranslate-cds-action ::= SEQUENCE {
  obey-stop-codon BOOLEAN
}

Truncated-ends-partial-type ::= ENUMERATED {
  always (1) ,
  unless-pseudo (2) ,
  never (3) }

Adjust-features-for-gaps-action ::= SEQUENCE {
  type Macro-feature-type ,
  adjust-for-unknown-length-gaps BOOLEAN ,
  adjust-for-known-length-gaps BOOLEAN ,
  make-truncated-ends-partial Truncated-ends-partial-type ,
  trim-ends-in-gaps BOOLEAN ,
  split-for-internal-gaps BOOLEAN ,
  even-when-gaps-are-in-introns BOOLEAN
}

Macro-action-choice ::= CHOICE {
  aecr AECR-action ,
  parse Parse-action ,
  add-feature Apply-feature-action ,
  remove-feature Remove-feature-action ,
  convert-feature Convert-feature-action ,
  edit-location Edit-feature-location-action ,
  remove-descriptor Remove-descriptor-action ,
  autodef Autodef-action ,
  removesets NULL ,
  trim-junk-from-primer-seq NULL ,
  trim-stop-from-complete-cds NULL ,
  fix-usa-and-states NULL ,
  synchronize-cds-partials NULL ,
  adjust-for-consensus-splice NULL ,
  fix-pub-caps Fix-pub-caps-action ,
  remove-seg-gaps NULL ,
  sort-fields Sort-fields-action ,
  apply-molinfo-block Molinfo-block ,
  fix-caps Fix-caps-action ,
  fix-format Fix-format-action ,
  fix-spell NULL ,
  remove-duplicate-features Remove-duplicate-feature-action ,
  remove-lineage-notes NULL ,
  remove-xrefs Remove-xrefs-action ,
  make-gene-xrefs Make-gene-xref-action ,
  make-bold-xrefs NULL ,
  fix-author Author-fix-action ,
  update-sequences Update-sequences-action ,
  add-trans-splicing NULL ,
  remove-invalid-ecnumbers NULL ,
  create-tsa-ids Create-TSA-ids-action ,
  perform-autofix Autofix-action ,
  fix-sets Fix-sets-action ,
  apply-table Apply-table-action ,
  remove-sequences Remove-sequences-action ,
  propagate-sequence-technology NULL ,
  add-file-descriptors Add-descriptor-list-action ,
  propagate-missing-old-name NULL ,
  autoapply-structured-comments NULL ,
  reorder-structured-comments NULL ,
  remove-duplicate-structured-comments NULL ,
  lookup-taxonomy NULL ,
  lookup-pubs NULL ,
  trim-terminal-ns NULL ,
  update-replaced-ecnumbers Update-replaced-ec-numbers-action ,
  instantiate-protein-titles NULL ,
  retranslate-cds Retranslate-cds-action ,
  add-selenocysteine-except NULL ,
  join-short-trnas NULL ,
  adjust-features-for-gaps Adjust-features-for-gaps-action }

Macro-action-list ::= SET OF Macro-action-choice

Search-func ::= CHOICE {
  string-constraint String-constraint ,
  contains-plural NULL ,
  n-or-more-brackets-or-parentheses INTEGER ,
  three-numbers NULL ,
  underscore NULL ,
  prefix-and-numbers VisibleString ,
  all-caps NULL ,
  unbalanced-paren NULL ,
  too-long INTEGER ,
  has-term VisibleString }

Simple-replace ::= SEQUENCE {
  replace VisibleString OPTIONAL,
  whole-string BOOLEAN DEFAULT FALSE ,
  weasel-to-putative BOOLEAN DEFAULT FALSE }

Replace-func ::= CHOICE {
  simple-replace Simple-replace ,
  haem-replace VisibleString }

Replace-rule ::= SEQUENCE {
  replace-func Replace-func ,
  move-to-note BOOLEAN DEFAULT FALSE }

Fix-type ::= ENUMERATED {
  none (0) ,
  typo (1) ,
  putative-typo (2) ,
  quickfix (3) ,
  no-organelle-for-prokaryote (4),
  might-be-nonfunctional (5),
  database (6),
  remove-organism-name (7),
  inappropriate-symbol (8),
  evolutionary-relationship (9),
  use-protein (10),
  hypothetical (11),
  british (12),
  description (13),
  gene (14) }

Suspect-rule ::= SEQUENCE {
  find Search-func ,
  except Search-func OPTIONAL ,
  feat-constraint Constraint-choice-set OPTIONAL ,
  rule-type Fix-type DEFAULT none ,
  replace Replace-rule OPTIONAL ,
  description VisibleString OPTIONAL ,
  fatal BOOLEAN DEFAULT FALSE }

Suspect-rule-set ::= SET OF Suspect-rule


END