1--$Revision: 97143 $
2--**********************************************************************
3--
4--  NCBI General Data elements
5--  by James Ostell, 1990
6--  Version 3.0 - June 1994
7--
8--**********************************************************************
9
10NCBI-General DEFINITIONS ::=
11BEGIN
12
13EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object;
14
15-- StringStore is really a VisibleString.  It is used to define very
16--   long strings which may need to be stored by the receiving program
17--   in special structures, such as a ByteStore, but it's just a hint.
18--   AsnTool stores StringStores in ByteStore structures.
19-- OCTET STRINGs are also stored in ByteStores by AsnTool
20--
21-- typedef struct bsunit {             /* for building multiline strings */
22   -- Nlm_Handle str;            /* the string piece */
23   -- Nlm_Int2 len_avail,
24       -- len;
25   -- struct bsunit PNTR next; }       /* the next one */
26-- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
27--
28-- typedef struct bytestore {
29   -- Nlm_Int4 seekptr,       /* current position */
30      -- totlen,             /* total stored data length in bytes */
31      -- chain_offset;       /* offset in ByteStore of first byte in curchain */
32   -- Nlm_BSUnitPtr chain,       /* chain of elements */
33      -- curchain;           /* the BSUnit containing seekptr */
34-- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
35--
36-- AsnTool incorporates this as a primitive type, so the definition
37--   is here just for completness
38--
39--  StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
40--
41
42-- BigInt is really an INTEGER. It is used to warn the recieving code to expect
43--   a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
44--
45--   Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
46--   BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
47--
48
49-- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
50--  of ASN.1
51--  It stores only a date
52--
53
54Date ::= CHOICE {
55    str VisibleString ,        -- for those unparsed dates
56    std Date-std }             -- use this if you can
57
58Date-std ::= SEQUENCE {        -- NOTE: this is NOT a unix tm struct
59    year INTEGER ,             -- full year (including 1900)
60    month INTEGER OPTIONAL ,   -- month (1-12)
61    day INTEGER OPTIONAL ,     -- day of month (1-31)
62    season VisibleString OPTIONAL ,  -- for "spring", "may-june", etc
63    hour INTEGER OPTIONAL ,    -- hour of day (0-23)
64    minute INTEGER OPTIONAL ,  -- minute of hour (0-59)
65    second INTEGER OPTIONAL }  -- second of minute (0-59)
66
67-- Dbtag is generalized for tagging
68-- eg. { "Social Security", str "023-79-8841" }
69-- or  { "member", id 8882224 }
70
71Dbtag ::= SEQUENCE {
72    db VisibleString ,          -- name of database or system
73    tag Object-id }         -- appropriate tag
74
75-- Object-id can tag or name anything
76--
77
78Object-id ::= CHOICE {
79    id INTEGER ,
80    str VisibleString }
81
82-- Person-id is to define a std element for people
83--
84
85Person-id ::= CHOICE {
86    dbtag Dbtag ,               -- any defined database tag
87    name Name-std ,             -- structured name
88    ml VisibleString ,          -- MEDLINE name (semi-structured)
89                                --    eg. "Jones RM"
90    str VisibleString }         -- unstructured name
91
92Name-std ::= SEQUENCE { -- Structured names
93    last VisibleString ,
94    first VisibleString OPTIONAL ,
95    middle VisibleString OPTIONAL ,
96    full VisibleString OPTIONAL ,    -- full name eg. "J. John Poop, Esq"
97    initials VisibleString OPTIONAL,  -- first + middle initials
98    suffix VisibleString OPTIONAL ,   -- Jr, Sr, III
99    title VisibleString OPTIONAL }    -- Dr., Sister, etc
100
101--**** Int-fuzz **********************************************
102--*
103--*   uncertainties in integer values
104
105Int-fuzz ::= CHOICE {
106    p-m INTEGER ,                    -- plus or minus fixed amount
107    range SEQUENCE {                 -- max to min
108        max INTEGER ,
109        min INTEGER } ,
110    pct INTEGER ,                    -- % plus or minus (x10) 0-1000
111    lim ENUMERATED {                 -- some limit value
112        unk (0) ,                    -- unknown
113        gt (1) ,                     -- greater than
114        lt (2) ,                     -- less than
115        tr (3) ,                     -- space to right of position
116        tl (4) ,                     -- space to left of position
117        circle (5) ,                 -- artificial break at origin of circle
118        other (255) } ,              -- something else
119    alt SET OF INTEGER }             -- set of alternatives for the integer
120
121
122--**** User-object **********************************************
123--*
124--*   a general object for a user defined structured data item
125--*    used by Seq-feat and Seq-descr
126
127User-object ::= SEQUENCE {
128    class VisibleString OPTIONAL ,   -- endeavor which designed this object
129    type Object-id ,                 -- type of object within class
130    data SEQUENCE OF User-field }    -- the object itself
131
132User-field ::= SEQUENCE {
133    label Object-id ,                -- field label
134    num INTEGER OPTIONAL ,           -- required for strs, ints, reals, oss
135    data CHOICE {                    -- field contents
136        str VisibleString ,
137        int INTEGER ,
138        real REAL ,
139        bool BOOLEAN ,
140        os OCTET STRING ,
141        object User-object ,         -- for using other definitions
142        strs SEQUENCE OF VisibleString ,
143        ints SEQUENCE OF INTEGER ,
144        reals SEQUENCE OF REAL ,
145        oss SEQUENCE OF OCTET STRING ,
146        fields SEQUENCE OF User-field ,
147        objects SEQUENCE OF User-object } }
148
149
150
151END
152
153--$Revision: 97143 $
154--****************************************************************
155--
156--  NCBI Bibliographic data elements
157--  by James Ostell, 1990
158--
159--  Taken from the American National Standard for
160--      Bibliographic References
161--      ANSI Z39.29-1977
162--  Version 3.0 - June 1994
163--  PubMedId added in 1996
164--  ArticleIds and eprint elements added in 1999
165--
166--****************************************************************
167
168NCBI-Biblio DEFINITIONS ::=
169BEGIN
170
171EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
172        Cit-proc, Cit-sub, Title, Author, PubMedId;
173
174IMPORTS Person-id, Date, Dbtag FROM NCBI-General;
175
176    -- Article Ids
177
178ArticleId ::= CHOICE {         -- can be many ids for an article
179	pubmed PubMedId ,      -- see types below
180	medline MedlineUID ,
181	doi DOI ,
182	pii PII ,
183	pmcid PmcID ,
184	pmcpid PmcPid ,
185        pmpid PmPid ,
186        other Dbtag  }    -- generic catch all
187
188PubMedId ::= INTEGER           -- Id from the PubMed database at NCBI
189MedlineUID ::= INTEGER         -- Id from MEDLINE
190DOI ::= VisibleString          -- Document Object Identifier
191PII ::= VisibleString          -- Controlled Publisher Identifier
192PmcID ::= INTEGER              -- PubMed Central Id
193PmcPid ::= VisibleString       -- Publisher Id supplied to PubMed Central
194PmPid ::= VisibleString        -- Publisher Id supplied to PubMed
195
196ArticleIdSet ::= SET OF ArticleId
197
198    -- Status Dates
199
200PubStatus ::= INTEGER {            -- points of publication
201    received  (1) ,            -- date manuscript received for review
202    accepted  (2) ,            -- accepted for publication
203    epublish  (3) ,            -- published electronically by publisher
204    ppublish  (4) ,            -- published in print by publisher
205    revised   (5) ,            -- article revised by publisher/author
206    pmc       (6) ,            -- article first appeared in PubMed Central
207    pmcr      (7) ,            -- article revision in PubMed Central
208    pubmed    (8) ,            -- article citation first appeared in PubMed
209    pubmedr   (9) ,            -- article citation revision in PubMed
210    aheadofprint (10),         -- epublish, but will be followed by print
211    premedline (11),           -- date into PreMedline status
212    medline    (12),           -- date made a MEDLINE record
213    other    (255) }
214
215PubStatusDate ::= SEQUENCE {   -- done as a structure so fields can be added
216    pubstatus PubStatus ,
217    date Date }                -- time may be added later
218
219PubStatusDateSet ::= SET OF PubStatusDate
220
221    -- Citation Types
222
223Cit-art ::= SEQUENCE {                  -- article in journal or book
224    title Title OPTIONAL ,              -- title of paper (ANSI requires)
225    authors Auth-list OPTIONAL ,        -- authors (ANSI requires)
226    from CHOICE {                       -- journal or book
227        journal Cit-jour ,
228        book Cit-book ,
229        proc Cit-proc } ,
230    ids ArticleIdSet OPTIONAL }         -- lots of ids
231
232Cit-jour ::= SEQUENCE {             -- Journal citation
233    title Title ,                   -- title of journal
234    imp Imprint }
235
236Cit-book ::= SEQUENCE {              -- Book citation
237    title Title ,                    -- Title of book
238    coll Title OPTIONAL ,            -- part of a collection
239    authors Auth-list,               -- authors
240    imp Imprint }
241
242Cit-proc ::= SEQUENCE {             -- Meeting proceedings
243    book Cit-book ,                 -- citation to meeting
244    meet Meeting }                  -- time and location of meeting
245
246    -- Patent number and date-issue were made optional in 1997 to
247    --   support patent applications being issued from the USPTO
248    --   Semantically a Cit-pat must have either a patent number or
249    --   an application number (or both) to be valid
250
251Cit-pat ::= SEQUENCE {                  -- patent citation
252    title VisibleString ,
253    authors Auth-list,                  -- author/inventor
254    country VisibleString ,             -- Patent Document Country
255    doc-type VisibleString ,            -- Patent Document Type
256    number VisibleString OPTIONAL,      -- Patent Document Number
257    date-issue Date OPTIONAL,           -- Patent Issue/Pub Date
258    class SEQUENCE OF VisibleString OPTIONAL ,      -- Patent Doc Class Code
259    app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
260    app-date Date OPTIONAL ,            -- Patent Appl File Date
261    applicants Auth-list OPTIONAL ,     -- Applicants
262    assignees Auth-list OPTIONAL ,      -- Assignees
263    priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
264    abstract VisibleString OPTIONAL }   -- abstract of patent
265
266Patent-priority ::= SEQUENCE {
267    country VisibleString ,             -- Patent country code
268    number VisibleString ,              -- number assigned in that country
269    date Date }                         -- date of application
270
271Id-pat ::= SEQUENCE {                   -- just to identify a patent
272    country VisibleString ,             -- Patent Document Country
273    id CHOICE {
274        number VisibleString ,          -- Patent Document Number
275        app-number VisibleString } ,    -- Patent Doc Appl Number
276    doc-type VisibleString OPTIONAL }   -- Patent Doc Type
277
278Cit-let ::= SEQUENCE {                  -- letter, thesis, or manuscript
279    cit Cit-book ,                      -- same fields as a book
280    man-id VisibleString OPTIONAL ,     -- Manuscript identifier
281    type ENUMERATED {
282        manuscript (1) ,
283        letter (2) ,
284        thesis (3) } OPTIONAL }
285                                -- NOTE: this is just to cite a
286                                -- direct data submission, see NCBI-Submit
287                                -- for the form of a sequence submission
288Cit-sub ::= SEQUENCE {               -- citation for a direct submission
289    authors Auth-list ,              -- not necessarily authors of the paper
290    imp Imprint OPTIONAL ,			 -- this only used to get date.. will go
291    medium ENUMERATED {              -- medium of submission
292        paper   (1) ,
293        tape    (2) ,
294        floppy  (3) ,
295        email   (4) ,
296        other   (255) } OPTIONAL ,
297    date Date OPTIONAL ,              -- replaces imp, will become required
298    descr VisibleString OPTIONAL }    -- description of changes for public view
299
300Cit-gen ::= SEQUENCE {      -- NOT from ANSI, this is a catchall
301    cit VisibleString OPTIONAL ,     -- anything, not parsable
302    authors Auth-list OPTIONAL ,
303    muid INTEGER OPTIONAL ,      -- medline uid
304    journal Title OPTIONAL ,
305    volume VisibleString OPTIONAL ,
306    issue VisibleString OPTIONAL ,
307    pages VisibleString OPTIONAL ,
308    date Date OPTIONAL ,
309    serial-number INTEGER OPTIONAL ,   -- for GenBank style references
310    title VisibleString OPTIONAL ,     -- eg. cit="unpublished",title="title"
311	pmid PubMedId OPTIONAL }           -- PubMed Id
312
313
314    -- Authorship Group
315Auth-list ::= SEQUENCE {
316        names CHOICE {
317            std SEQUENCE OF Author ,        -- full citations
318            ml SEQUENCE OF VisibleString ,  -- MEDLINE, semi-structured
319            str SEQUENCE OF VisibleString } , -- free for all
320        affil Affil OPTIONAL }        -- author affiliation
321
322Author ::= SEQUENCE {
323    name Person-id ,                        -- Author, Primary or Secondary
324    level ENUMERATED {
325        primary (1),
326        secondary (2) } OPTIONAL ,
327    role ENUMERATED {                   -- Author Role Indicator
328        compiler (1),
329        editor (2),
330        patent-assignee (3),
331        translator (4) } OPTIONAL ,
332    affil Affil OPTIONAL ,
333    is-corr BOOLEAN OPTIONAL }          -- TRUE if corressponding author
334
335Affil ::= CHOICE {
336    str VisibleString ,                 -- unparsed string
337    std SEQUENCE {                      -- std representation
338    affil VisibleString OPTIONAL ,      -- Author Affiliation, Name
339    div VisibleString OPTIONAL ,        -- Author Affiliation, Division
340    city VisibleString OPTIONAL ,       -- Author Affiliation, City
341    sub VisibleString OPTIONAL ,        -- Author Affiliation, County Sub
342    country VisibleString OPTIONAL ,    -- Author Affiliation, Country
343    street VisibleString OPTIONAL ,    -- street address, not ANSI
344    email VisibleString OPTIONAL ,
345    fax VisibleString OPTIONAL ,
346    phone VisibleString OPTIONAL ,
347    postal-code VisibleString OPTIONAL }}
348
349    -- Title Group
350    -- Valid for = A = Analytic (Cit-art)
351    --             J = Journals (Cit-jour)
352    --             B = Book (Cit-book)
353                                                 -- Valid for:
354Title ::= SET OF CHOICE {
355    name VisibleString ,    -- Title, Anal,Coll,Mono    AJB
356    tsub VisibleString ,    -- Title, Subordinate       A B
357    trans VisibleString ,   -- Title, Translated        AJB
358    jta VisibleString ,     -- Title, Abbreviated        J
359    iso-jta VisibleString , -- specifically ISO jta      J
360    ml-jta VisibleString ,  -- specifically MEDLINE jta  J
361    coden VisibleString ,   -- a coden                   J
362    issn VisibleString ,    -- ISSN                      J
363    abr VisibleString ,     -- Title, Abbreviated         B
364    isbn VisibleString }    -- ISBN                       B
365
366Imprint ::= SEQUENCE {                  -- Imprint group
367    date Date ,                         -- date of publication
368    volume VisibleString OPTIONAL ,
369    issue VisibleString OPTIONAL ,
370    pages VisibleString OPTIONAL ,
371    section VisibleString OPTIONAL ,
372    pub Affil OPTIONAL,                     -- publisher, required for book
373    cprt Date OPTIONAL,                     -- copyright date, "    "   "
374    part-sup VisibleString OPTIONAL ,       -- part/sup of volume
375    language VisibleString DEFAULT "ENG" ,  -- put here for simplicity
376    prepub ENUMERATED {                     -- for prepublication citaions
377        submitted (1) ,                     -- submitted, not accepted
378        in-press (2) ,                      -- accepted, not published
379        other (255)  } OPTIONAL ,
380    part-supi VisibleString OPTIONAL ,      -- part/sup on issue
381    retract CitRetract OPTIONAL ,           -- retraction info
382    pubstatus PubStatus OPTIONAL ,          -- current status of this publication
383    history PubStatusDateSet OPTIONAL }     -- dates for this record
384
385CitRetract ::= SEQUENCE {
386    type ENUMERATED {                    -- retraction of an entry
387        retracted (1) ,               -- this citation retracted
388        notice (2) ,                  -- this citation is a retraction notice
389        in-error (3) ,                -- an erratum was published about this
390        erratum (4) } ,               -- this is a published erratum
391    exp VisibleString OPTIONAL }      -- citation and/or explanation
392
393Meeting ::= SEQUENCE {
394    number VisibleString ,
395    date Date ,
396    place Affil OPTIONAL }
397
398
399END
400
401
402--$Revision: 97143 $
403--**********************************************************************
404--
405--  MEDLINE data definitions
406--  James Ostell, 1990
407--
408--  enhanced in 1996 to support PubMed records as well by simply adding
409--    the PubMedId and making MedlineId optional
410--
411--**********************************************************************
412
413NCBI-Medline DEFINITIONS ::=
414BEGIN
415
416EXPORTS Medline-entry, Medline-si;
417
418IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
419        Date FROM NCBI-General;
420
421                                -- a MEDLINE or PubMed entry
422Medline-entry ::= SEQUENCE {
423    uid INTEGER OPTIONAL ,      -- MEDLINE UID, sometimes not yet available if from PubMed
424    em Date ,                   -- Entry Month
425    cit Cit-art ,               -- article citation
426    abstract VisibleString OPTIONAL ,
427    mesh SET OF Medline-mesh OPTIONAL ,
428    substance SET OF Medline-rn OPTIONAL ,
429    xref SET OF Medline-si OPTIONAL ,
430    idnum SET OF VisibleString OPTIONAL ,  -- ID Number (grants, contracts)
431    gene SET OF VisibleString OPTIONAL ,
432    pmid PubMedId OPTIONAL ,               -- MEDLINE records may include the PubMedId
433    pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
434    mlfield SET OF Medline-field OPTIONAL ,  -- additional Medline field types
435    status INTEGER {
436	publisher (1) ,      -- record as supplied by publisher
437        premedline (2) ,     -- premedline record
438        medline (3) } DEFAULT medline }  -- regular medline record
439
440Medline-mesh ::= SEQUENCE {
441    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point (*)
442    term VisibleString ,                   -- the MeSH term
443    qual SET OF Medline-qual OPTIONAL }    -- qualifiers
444
445Medline-qual ::= SEQUENCE {
446    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point
447    subh VisibleString }             -- the subheading
448
449Medline-rn ::= SEQUENCE {       -- medline substance records
450    type ENUMERATED {           -- type of record
451        nameonly (0) ,
452        cas (1) ,               -- CAS number
453        ec (2) } ,              -- EC number
454    cit VisibleString OPTIONAL ,  -- CAS or EC number if present
455    name VisibleString }          -- name (always present)
456
457Medline-si ::= SEQUENCE {       -- medline cross reference records
458    type ENUMERATED {           -- type of xref
459        ddbj (1) ,              -- DNA Data Bank of Japan
460        carbbank (2) ,          -- Carbohydrate Structure Database
461        embl (3) ,              -- EMBL Data Library
462        hdb (4) ,               -- Hybridoma Data Bank
463        genbank (5) ,           -- GenBank
464        hgml (6) ,              -- Human Gene Map Library
465        mim (7) ,               -- Mendelian Inheritance in Man
466        msd (8) ,               -- Microbial Strains Database
467        pdb (9) ,               -- Protein Data Bank (Brookhaven)
468        pir (10) ,              -- Protein Identification Resource
469        prfseqdb (11) ,         -- Protein Research Foundation (Japan)
470        psd (12) ,              -- Protein Sequence Database (Japan)
471        swissprot (13) ,        -- SwissProt
472        gdb (14) } ,            -- Genome Data Base
473    cit VisibleString OPTIONAL }    -- the citation/accession number
474
475Medline-field ::= SEQUENCE {
476    type INTEGER {              -- Keyed type
477	other (0) ,             -- look in line code
478	comment (1) ,           -- comment line
479        erratum (2) } ,         -- retracted, corrected, etc
480    str VisibleString ,         -- the text
481    ids SEQUENCE OF DocRef OPTIONAL }  -- pointers relevant to this text
482
483DocRef ::= SEQUENCE {           -- reference to a document
484    type INTEGER {
485        medline (1) ,
486        pubmed (2) ,
487        ncbigi (3) } ,
488    uid INTEGER }
489
490END
491
492--$Revision: 97143 $
493--**********************************************************************
494--
495--  MEDLARS data definitions
496--  Grigoriy Starchenko, 1997
497--
498--**********************************************************************
499
500NCBI-Medlars DEFINITIONS ::=
501BEGIN
502
503EXPORTS Medlars-entry, Medlars-record;
504
505IMPORTS PubMedId FROM NCBI-Biblio;
506
507Medlars-entry ::= SEQUENCE {     -- a MEDLARS entry
508    pmid PubMedId,               -- All entries in PubMed must have it
509    muid INTEGER OPTIONAL,       -- Medline(OCCS) id
510    recs SET OF Medlars-record   -- List of Medlars records
511}
512
513Medlars-record ::= SEQUENCE {
514    code INTEGER,                -- Unit record field type integer form
515    abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
516    data VisibleString           -- Unit record data
517}
518
519END
520--$Revision: 97143 $
521--********************************************************************
522--
523--  Publication common set
524--  James Ostell, 1990
525--
526--  This is the base class definitions for Publications of all sorts
527--
528--  support for PubMedId added in 1996
529--********************************************************************
530
531NCBI-Pub DEFINITIONS ::=
532BEGIN
533
534EXPORTS Pub, Pub-set, Pub-equiv;
535
536IMPORTS Medline-entry FROM NCBI-Medline
537        Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
538        Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;
539
540Pub ::= CHOICE {
541    gen Cit-gen ,        -- general or generic unparsed
542    sub Cit-sub ,        -- submission
543    medline Medline-entry ,
544    muid INTEGER ,       -- medline uid
545    article Cit-art ,
546    journal Cit-jour ,
547    book Cit-book ,
548    proc Cit-proc ,      -- proceedings of a meeting
549    patent Cit-pat ,
550    pat-id Id-pat ,      -- identify a patent
551    man Cit-let ,        -- manuscript, thesis, or letter
552    equiv Pub-equiv,     -- to cite a variety of ways
553	pmid PubMedId }      -- PubMedId
554
555Pub-equiv ::= SET OF Pub   -- equivalent identifiers for same citation
556
557Pub-set ::= CHOICE {
558    pub SET OF Pub ,
559    medline SET OF Medline-entry ,
560    article SET OF Cit-art ,
561    journal SET OF Cit-jour ,
562    book SET OF Cit-book ,
563    proc SET OF Cit-proc ,      -- proceedings of a meeting
564    patent SET OF Cit-pat }
565
566END
567
568--$Revision: 97143 $
569--**********************************************************************
570--
571--  PUBMED data definitions
572--
573--**********************************************************************
574
575NCBI-PubMed DEFINITIONS ::=
576BEGIN
577
578EXPORTS Pubmed-entry, Pubmed-url;
579
580IMPORTS PubMedId FROM NCBI-Biblio
581        Medline-entry FROM NCBI-Medline;
582
583Pubmed-entry ::= SEQUENCE {        -- a PubMed entry
584    -- PUBMED records must include the PubMedId
585    pmid PubMedId,
586
587    -- Medline entry information
588    medent Medline-entry OPTIONAL,
589
590    -- Publisher name
591    publisher VisibleString OPTIONAL,
592
593    -- List of URL to publisher cite
594    urls SET OF Pubmed-url OPTIONAL,
595
596    -- Publisher's article identifier
597    pubid VisibleString OPTIONAL
598}
599
600Pubmed-url ::= SEQUENCE {
601    location VisibleString OPTIONAL, -- Location code
602    url VisibleString                -- Selected URL for location
603}
604
605END
606--$Revision: 97143 $
607--**********************************************************************
608--
609--  NCBI Sequence location and identifier elements
610--  by James Ostell, 1990
611--
612--  Version 3.0 - 1994
613--
614--**********************************************************************
615
616NCBI-Seqloc DEFINITIONS ::=
617BEGIN
618
619EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
620        Na-strand, Giimport-id;
621
622IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
623        Id-pat FROM NCBI-Biblio
624        Feat-id FROM NCBI-Seqfeat;
625
626--*** Sequence identifiers ********************************
627--*
628
629Seq-id ::= CHOICE {
630    local Object-id ,      -- local use
631    gibbsq INTEGER ,         -- Geninfo backbone seqid
632    gibbmt INTEGER ,         -- Geninfo backbone moltype
633    giim Giimport-id ,       -- Geninfo import id
634    genbank Textseq-id ,
635    embl Textseq-id ,
636    pir Textseq-id ,
637    swissprot Textseq-id ,
638    patent Patent-seq-id ,
639    other Textseq-id ,       -- catch all
640    general Dbtag ,          -- for other databases
641    gi INTEGER ,             -- GenInfo Integrated Database
642    ddbj Textseq-id ,        -- DDBJ
643    prf Textseq-id ,         -- PRF SEQDB
644    pdb PDB-seq-id }         -- PDB sequence
645
646Patent-seq-id ::= SEQUENCE {
647    seqid INTEGER ,         -- number of sequence in patent
648    cit Id-pat }           -- patent citation
649
650Textseq-id ::= SEQUENCE {
651    name VisibleString OPTIONAL ,
652    accession VisibleString OPTIONAL ,
653    release VisibleString OPTIONAL ,
654    version INTEGER OPTIONAL }
655
656Giimport-id ::= SEQUENCE {
657    id INTEGER ,               -- the id to use here
658    db VisibleString OPTIONAL ,  -- dbase used in
659    release VisibleString OPTIONAL }   -- the release
660
661PDB-seq-id ::= SEQUENCE {
662    mol PDB-mol-id ,          -- the molecule name
663    chain INTEGER DEFAULT 32 ,-- a single ASCII character, chain id
664    rel Date OPTIONAL }   -- release date, month and year
665
666PDB-mol-id ::= VisibleString  -- name of mol, 4 chars
667
668--*** Sequence locations **********************************
669--*
670
671Seq-loc ::= CHOICE {
672    null NULL ,           -- not placed
673    empty Seq-id ,        -- to NULL one Seq-id in a collection
674    whole Seq-id ,        -- whole sequence
675    int Seq-interval ,    -- from to
676    packed-int Packed-seqint ,
677    pnt Seq-point ,
678    packed-pnt Packed-seqpnt ,
679    mix Seq-loc-mix ,
680    equiv Seq-loc-equiv ,  -- equivalent sets of locations
681    bond Seq-bond ,
682    feat Feat-id }         -- indirect, through a Seq-feat
683
684
685Seq-interval ::= SEQUENCE {
686    from INTEGER ,
687    to INTEGER ,
688    strand Na-strand OPTIONAL ,
689    id Seq-id ,    -- WARNING: this used to be optional
690    fuzz-from Int-fuzz OPTIONAL ,
691    fuzz-to Int-fuzz OPTIONAL }
692
693Packed-seqint ::= SEQUENCE OF Seq-interval
694
695Seq-point ::= SEQUENCE {
696    point INTEGER ,
697    strand Na-strand OPTIONAL ,
698    id Seq-id ,     -- WARNING: this used to be optional
699    fuzz Int-fuzz OPTIONAL }
700
701Packed-seqpnt ::= SEQUENCE {
702    strand Na-strand OPTIONAL ,
703    id Seq-id ,
704    fuzz Int-fuzz OPTIONAL ,
705    points SEQUENCE OF INTEGER }
706
707Na-strand ::= ENUMERATED {          -- strand of nucleid acid
708    unknown (0) ,
709    plus (1) ,
710    minus (2) ,
711    both (3) ,                -- in forward orientation
712    both-rev (4) ,            -- in reverse orientation
713    other (255) }
714
715Seq-bond ::= SEQUENCE {         -- bond between residues
716    a Seq-point ,           -- connection to a least one residue
717    b Seq-point OPTIONAL }  -- other end may not be available
718
719Seq-loc-mix ::= SEQUENCE OF Seq-loc   -- this will hold anything
720
721Seq-loc-equiv ::= SET OF Seq-loc      -- for a set of equivalent locations
722
723END
724
725
726--$Revision: 97143 $
727--**********************************************************************
728--
729--  NCBI Sequence Alignment elements
730--  by James Ostell, 1990
731--
732--**********************************************************************
733
734NCBI-Seqalign DEFINITIONS ::=
735BEGIN
736
737EXPORTS Seq-align, Score, Score-set, Seq-align-set;
738
739IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
740        Object-id FROM NCBI-General;
741
742--*** Sequence Alignment ********************************
743--*
744
745Seq-align-set ::= SET OF Seq-align
746
747Seq-align ::= SEQUENCE {
748    type ENUMERATED {
749        not-set (0) ,
750        global (1) ,
751        diags (2) ,		-- unbroken, but not ordered, diagonals
752        partial (3) ,           -- mapping pieces together
753	disc (4) ,              -- discontinuous alignment
754        other (255) } ,
755    dim INTEGER OPTIONAL ,     -- dimensionality
756    score SET OF Score OPTIONAL ,   -- for whole alignment
757    segs CHOICE {                   -- alignment data
758        dendiag SEQUENCE OF Dense-diag ,
759        denseg Dense-seg ,
760        std SEQUENCE OF Std-seg ,
761	packed Packed-seg ,
762	disc Seq-align-set } ,
763    bounds SET OF Seq-loc OPTIONAL }  -- regions of sequence over which align
764                                      --  was computed
765
766Dense-diag ::= SEQUENCE {         -- for (multiway) diagonals
767    dim INTEGER DEFAULT 2 ,    -- dimensionality
768    ids SEQUENCE OF Seq-id ,   -- sequences in order
769    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order
770    len INTEGER ,                 -- len of aligned segments
771    strands SEQUENCE OF Na-strand OPTIONAL ,
772    scores SET OF Score OPTIONAL }
773
774    -- Dense-seg: the densist packing for sequence alignments only.
775    --            a start of -1 indicates a gap for that sequence of
776    --            length lens.
777    --
778    -- id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
779    -- id=200  AAGGCCTTTTAG.......GATGATGATGA
780    -- id=300  ....CCTTTTAGAGATGATGAT....ATGA
781    --
782    -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
783    -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
784    -- lens = { 4, 8, 7, 3, 4, 4 }
785    --
786
787Dense-seg ::= SEQUENCE {          -- for (multiway) global or partial alignments
788    dim INTEGER DEFAULT 2 ,       -- dimensionality
789    numseg INTEGER ,              -- number of segments here
790    ids SEQUENCE OF Seq-id ,      -- sequences in order
791    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order within segs
792    lens SEQUENCE OF INTEGER ,    -- lengths in ids order within segs
793    strands SEQUENCE OF Na-strand OPTIONAL ,
794    scores SEQUENCE OF Score OPTIONAL }  -- score for each seg
795
796Packed-seg ::= SEQUENCE {         -- for (multiway) global or partial alignments
797    dim INTEGER DEFAULT 2 ,       -- dimensionality
798    numseg INTEGER ,              -- number of segments here
799    ids SEQUENCE OF Seq-id ,      -- sequences in order
800    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order for whole alignment
801    present OCTET STRING ,        -- Boolean if each sequence present or absent in
802                                  --   each segment
803    lens SEQUENCE OF INTEGER ,    -- length of each segment
804    strands SEQUENCE OF Na-strand OPTIONAL ,
805    scores SEQUENCE OF Score OPTIONAL }  -- score for each segment
806
807Std-seg ::= SEQUENCE {
808    dim INTEGER DEFAULT 2 ,       -- dimensionality
809    ids SEQUENCE OF Seq-id OPTIONAL ,
810    loc SEQUENCE OF Seq-loc ,
811    scores SET OF Score OPTIONAL }
812
813-- use of Score is discouraged for external ASN.1 specifications
814Score ::= SEQUENCE {
815    id Object-id OPTIONAL ,
816    value CHOICE {
817        real REAL ,
818        int INTEGER  } }
819
820-- use of Score-set is encouraged for external ASN.1 specifications
821Score-set ::= SET OF Score
822
823END
824
825--$Revision: 97143 $
826--*********************************************************************
827--
828-- 1990 - J.Ostell
829-- Version 3.0 - June 1994
830--
831--*********************************************************************
832--*********************************************************************
833--
834--  EMBL specific data
835--  This block of specifications was developed by Reiner Fuchs of EMBL
836--  Updated by J.Ostell, 1994
837--
838--*********************************************************************
839
840EMBL-General DEFINITIONS ::=
841BEGIN
842
843EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;
844
845IMPORTS Date, Object-id FROM NCBI-General;
846
847EMBL-dbname ::= CHOICE {
848    code ENUMERATED {
849        embl(0),
850        genbank(1),
851        ddbj(2),
852        geninfo(3),
853        medline(4),
854        swissprot(5),
855        pir(6),
856        pdb(7),
857        epd(8),
858        ecd(9),
859        tfd(10),
860        flybase(11),
861        prosite(12),
862        enzyme(13),
863        mim(14),
864        ecoseq(15),
865        hiv(16) ,
866        other (255) } ,
867    name    VisibleString }
868
869EMBL-xref ::= SEQUENCE {
870    dbname EMBL-dbname,
871    id SEQUENCE OF Object-id }
872
873EMBL-block ::= SEQUENCE {
874    class ENUMERATED {
875        not-set(0),
876        standard(1),
877        unannotated(2),
878        other(255) } DEFAULT standard,
879    div ENUMERATED {
880        fun(0),
881        inv(1),
882        mam(2),
883        org(3),
884        phg(4),
885        pln(5),
886        pri(6),
887        pro(7),
888        rod(8),
889        syn(9),
890        una(10),
891        vrl(11),
892        vrt(12),
893        pat(13),
894        est(14),
895        sts(15),
896        other (255) } OPTIONAL,
897    creation-date Date,
898    update-date Date,
899    extra-acc SEQUENCE OF VisibleString OPTIONAL,
900    keywords SEQUENCE OF VisibleString OPTIONAL,
901    xref SEQUENCE OF EMBL-xref OPTIONAL }
902
903END
904
905--*********************************************************************
906--
907--  SWISSPROT specific data
908--  This block of specifications was developed by Mark Cavanaugh of
909--      NCBI working with Amos Bairoch of SWISSPROT
910--
911--*********************************************************************
912
913SP-General DEFINITIONS ::=
914BEGIN
915
916EXPORTS SP-block;
917
918IMPORTS Date, Dbtag FROM NCBI-General
919        Seq-id FROM NCBI-Seqloc;
920
921SP-block ::= SEQUENCE {         -- SWISSPROT specific descriptions
922    class ENUMERATED {
923        not-set (0) ,
924        standard (1) ,      -- conforms to all SWISSPROT checks
925        prelim (2) ,        -- only seq and biblio checked
926        other (255) } ,
927    extra-acc SET OF VisibleString OPTIONAL ,  -- old SWISSPROT ids
928    imeth BOOLEAN DEFAULT FALSE ,  -- seq known to start with Met
929    plasnm SET OF VisibleString OPTIONAL,  -- plasmid names carrying gene
930    seqref SET OF Seq-id OPTIONAL,         -- xref to other sequences
931    dbref SET OF Dbtag OPTIONAL ,          -- xref to non-sequence dbases
932    keywords SET OF VisibleString OPTIONAL , -- keywords
933    created Date OPTIONAL ,         -- creation date
934    sequpd Date OPTIONAL ,          -- sequence update
935    annotupd Date OPTIONAL }        -- annotation update
936
937END
938
939--*********************************************************************
940--
941--  PIR specific data
942--  This block of specifications was developed by Jim Ostell of
943--      NCBI
944--
945--*********************************************************************
946
947PIR-General DEFINITIONS ::=
948BEGIN
949
950EXPORTS PIR-block;
951
952IMPORTS Seq-id FROM NCBI-Seqloc;
953
954PIR-block ::= SEQUENCE {          -- PIR specific descriptions
955    had-punct BOOLEAN OPTIONAL ,      -- had punctuation in sequence ?
956    host VisibleString OPTIONAL ,
957    source VisibleString OPTIONAL ,     -- source line
958    summary VisibleString OPTIONAL ,
959    genetic VisibleString OPTIONAL ,
960    includes VisibleString OPTIONAL ,
961    placement VisibleString OPTIONAL ,
962    superfamily VisibleString OPTIONAL ,
963    keywords SEQUENCE OF VisibleString OPTIONAL ,
964    cross-reference VisibleString OPTIONAL ,
965    date VisibleString OPTIONAL ,
966    seq-raw VisibleString OPTIONAL ,  -- seq with punctuation
967    seqref SET OF Seq-id OPTIONAL }         -- xref to other sequences
968
969END
970
971--*********************************************************************
972--
973--  GenBank specific data
974--  This block of specifications was developed by Jim Ostell of
975--      NCBI
976--
977--*********************************************************************
978
979GenBank-General DEFINITIONS ::=
980BEGIN
981
982EXPORTS GB-block;
983
984IMPORTS Date FROM NCBI-General;
985
986GB-block ::= SEQUENCE {          -- GenBank specific descriptions
987    extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
988    source VisibleString OPTIONAL ,     -- source line
989    keywords SEQUENCE OF VisibleString OPTIONAL ,
990    origin VisibleString OPTIONAL,
991    date VisibleString OPTIONAL ,       -- OBSOLETE old form Entry Date
992    entry-date Date OPTIONAL ,          -- replaces date
993    div VisibleString OPTIONAL ,        -- GenBank division
994    taxonomy VisibleString OPTIONAL }   -- continuation line of organism
995
996END
997
998--**********************************************************************
999-- PRF specific definition
1000--    PRF is a protein sequence database crated and maintained by
1001--    Protein Research Foundation, Minoo-city, Osaka, Japan.
1002--
1003--    Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
1004--            Kyoto Univ., Japan
1005--
1006--**********************************************************************
1007
1008PRF-General DEFINITIONS ::=
1009BEGIN
1010
1011EXPORTS PRF-block;
1012
1013PRF-block ::= SEQUENCE {
1014      extra-src       PRF-ExtraSrc OPTIONAL,
1015      keywords        SEQUENCE OF VisibleString OPTIONAL
1016}
1017
1018PRF-ExtraSrc ::= SEQUENCE {
1019      host    VisibleString OPTIONAL,
1020      part    VisibleString OPTIONAL,
1021      state   VisibleString OPTIONAL,
1022      strain  VisibleString OPTIONAL,
1023      taxon   VisibleString OPTIONAL
1024}
1025
1026END
1027
1028--*********************************************************************
1029--
1030--  PDB specific data
1031--  This block of specifications was developed by Jim Ostell and
1032--      Steve Bryant of NCBI
1033--
1034--*********************************************************************
1035
1036PDB-General DEFINITIONS ::=
1037BEGIN
1038
1039EXPORTS PDB-block;
1040
1041IMPORTS Date FROM NCBI-General;
1042
1043PDB-block ::= SEQUENCE {          -- PDB specific descriptions
1044    deposition Date ,         -- deposition date  month,year
1045    class VisibleString ,
1046    compound SEQUENCE OF VisibleString ,
1047    source SEQUENCE OF VisibleString ,
1048    exp-method VisibleString OPTIONAL ,  -- present if NOT X-ray diffraction
1049    replace PDB-replace OPTIONAL } -- replacement history
1050
1051PDB-replace ::= SEQUENCE {
1052    date Date ,
1053    ids SEQUENCE OF VisibleString }   -- entry ids replace by this one
1054
1055END
1056
1057--$Revision: 97143 $
1058--**********************************************************************
1059--
1060--  NCBI Sequence Feature elements
1061--  by James Ostell, 1990
1062--  Version 3.0 - June 1994
1063--
1064--**********************************************************************
1065
1066NCBI-Seqfeat DEFINITIONS ::=
1067BEGIN
1068
1069EXPORTS Seq-feat, Feat-id, Genetic-code;
1070
1071IMPORTS Gene-ref FROM NCBI-Gene
1072        Prot-ref FROM NCBI-Protein
1073        Org-ref FROM NCBI-Organism
1074        BioSource FROM NCBI-BioSource
1075        RNA-ref FROM NCBI-RNA
1076        Seq-loc, Giimport-id FROM NCBI-Seqloc
1077        Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
1078        Rsite-ref FROM NCBI-Rsite
1079        Txinit FROM NCBI-TxInit
1080        Pub-set FROM NCBI-Pub
1081        Object-id, Dbtag, User-object FROM NCBI-General;
1082
1083--*** Feature identifiers ********************************
1084--*
1085
1086Feat-id ::= CHOICE {
1087    gibb INTEGER ,            -- geninfo backbone
1088    giim Giimport-id ,        -- geninfo import
1089    local Object-id ,         -- for local software use
1090    general Dbtag }           -- for use by various databases
1091
1092--*** Seq-feat *******************************************
1093--*  sequence feature generalization
1094
1095Seq-feat ::= SEQUENCE {
1096    id Feat-id OPTIONAL ,
1097    data SeqFeatData ,           -- the specific data
1098    partial BOOLEAN OPTIONAL ,    -- incomplete in some way?
1099    except BOOLEAN OPTIONAL ,     -- something funny about this?
1100    comment VisibleString OPTIONAL ,
1101    product Seq-loc OPTIONAL ,    -- product of process
1102    location Seq-loc ,            -- feature made from
1103    qual SEQUENCE OF Gb-qual OPTIONAL ,  -- qualifiers
1104    title VisibleString OPTIONAL ,   -- for user defined label
1105    ext User-object OPTIONAL ,    -- user defined structure extension
1106    cit Pub-set OPTIONAL ,        -- citations for this feature
1107    exp-ev ENUMERATED {           -- evidence for existence of feature
1108        experimental (1) ,        -- any reasonable experimental check
1109        not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
1110    xref SET OF SeqFeatXref OPTIONAL ,   -- cite other relevant features
1111	dbxref SET OF Dbtag OPTIONAL ,  -- support for xref to other databases
1112    pseudo BOOLEAN OPTIONAL ,     -- annotated on pseudogene?
1113    except-text VisibleString OPTIONAL } -- explain if except=TRUE
1114
1115SeqFeatData ::= CHOICE {
1116    gene Gene-ref ,
1117    org Org-ref ,
1118    cdregion Cdregion ,
1119    prot Prot-ref ,
1120    rna RNA-ref ,
1121    pub Pubdesc ,              -- publication applies to this seq
1122    seq Seq-loc ,              -- to annotate origin from another seq
1123    imp Imp-feat ,
1124    region VisibleString,      -- named region (globin locus)
1125    comment NULL ,             -- just a comment
1126    bond ENUMERATED {
1127        disulfide (1) ,
1128        thiolester (2) ,
1129        xlink (3) ,
1130        thioether (4) ,
1131        other (255) } ,
1132    site ENUMERATED {
1133        active (1) ,
1134        binding (2) ,
1135        cleavage (3) ,
1136        inhibit (4) ,
1137        modified (5),
1138        glycosylation (6) ,
1139        myristoylation (7) ,
1140        mutagenized (8) ,
1141        metal-binding (9) ,
1142        phosphorylation (10) ,
1143        acetylation (11) ,
1144        amidation (12) ,
1145        methylation (13) ,
1146        hydroxylation (14) ,
1147        sulfatation (15) ,
1148        oxidative-deamination (16) ,
1149        pyrrolidone-carboxylic-acid (17) ,
1150        gamma-carboxyglutamic-acid (18) ,
1151        blocked (19) ,
1152        lipid-binding (20) ,
1153        np-binding (21) ,
1154        dna-binding (22) ,
1155        signal-peptide (23) ,
1156        transit-peptide (24) ,
1157        transmembrane-region (25) ,
1158        other (255) } ,
1159    rsite Rsite-ref ,       -- restriction site  (for maps really)
1160    user User-object ,      -- user defined structure
1161    txinit Txinit ,         -- transcription initiation
1162    num Numbering ,         -- a numbering system
1163    psec-str ENUMERATED {   -- protein secondary structure
1164        helix (1) ,         -- any helix
1165        sheet (2) ,         -- beta sheet
1166        turn  (3) } ,       -- beta or gamma turn
1167    non-std-residue VisibleString ,  -- non-standard residue here in seq
1168    het Heterogen ,         -- cofactor, prosthetic grp, etc, bound to seq
1169    biosrc BioSource }
1170
1171SeqFeatXref ::= SEQUENCE {		 -- both optional because can have one or both
1172    id Feat-id OPTIONAL ,        -- the feature copied
1173    data SeqFeatData OPTIONAL }  -- the specific data
1174
1175--*** CdRegion ***********************************************
1176--*
1177--*  Instructions to translate from a nucleic acid to a peptide
1178--*    conflict means it's supposed to translate but doesn't
1179--*
1180
1181
1182Cdregion ::= SEQUENCE {
1183    orf BOOLEAN OPTIONAL ,             -- just an ORF ?
1184    frame ENUMERATED {
1185        not-set (0) ,                  -- not set, code uses one
1186        one (1) ,
1187        two (2) ,
1188        three (3) } DEFAULT not-set ,      -- reading frame
1189    conflict BOOLEAN OPTIONAL ,        -- conflict
1190    gaps INTEGER OPTIONAL ,            -- number of gaps on conflict/except
1191    mismatch INTEGER OPTIONAL ,        -- number of mismatches on above
1192    code Genetic-code OPTIONAL ,       -- genetic code used
1193    code-break SEQUENCE OF Code-break OPTIONAL ,   -- individual exceptions
1194    stops INTEGER OPTIONAL }           -- number of stop codons on above
1195
1196                    -- each code is 64 cells long, in the order where
1197                    -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
1198                    -- NOTE: this order does NOT corresspond to a Seq-data
1199                    -- encoding.  It is "natural" to codon usage instead.
1200                    -- the value in each cell is the AA coded for
1201                    -- start= AA coded only if first in peptide
1202                    --   in start array, if codon is not a legitimate start
1203                    --   codon, that cell will have the "gap" symbol for
1204                    --   that alphabet.  Otherwise it will have the AA
1205                    --   encoded when that codon is used at the start.
1206
1207Genetic-code ::= SET OF CHOICE {
1208    name VisibleString ,               -- name of a code
1209    id INTEGER ,                       -- id in dbase
1210    ncbieaa VisibleString ,            -- indexed to IUPAC extended
1211    ncbi8aa OCTET STRING ,             -- indexed to NCBI8aa
1212    ncbistdaa OCTET STRING ,           -- indexed to NCBIstdaa
1213    sncbieaa VisibleString ,            -- start, indexed to IUPAC extended
1214    sncbi8aa OCTET STRING ,             -- start, indexed to NCBI8aa
1215    sncbistdaa OCTET STRING }           -- start, indexed to NCBIstdaa
1216
1217Code-break ::= SEQUENCE {              -- specific codon exceptions
1218    loc Seq-loc ,                      -- location of exception
1219    aa CHOICE {                        -- the amino acid
1220        ncbieaa INTEGER ,              -- ASCII value of NCBIeaa code
1221        ncbi8aa INTEGER ,              -- NCBI8aa code
1222        ncbistdaa INTEGER } }           -- NCBIstdaa code
1223
1224Genetic-code-table ::= SET OF Genetic-code     -- table of genetic codes
1225
1226--*** Import ***********************************************
1227--*
1228--*  Features imported from other databases
1229--*
1230
1231Imp-feat ::= SEQUENCE {
1232    key VisibleString ,
1233    loc VisibleString OPTIONAL ,         -- original location string
1234    descr VisibleString OPTIONAL }       -- text description
1235
1236Gb-qual ::= SEQUENCE {
1237    qual VisibleString ,
1238    val VisibleString }
1239
1240END
1241
1242--**********************************************************************
1243--
1244--  NCBI Restriction Sites
1245--  by James Ostell, 1990
1246--  version 0.8
1247--
1248--**********************************************************************
1249
1250NCBI-Rsite DEFINITIONS ::=
1251BEGIN
1252
1253EXPORTS Rsite-ref;
1254
1255IMPORTS Dbtag FROM NCBI-General;
1256
1257Rsite-ref ::= CHOICE {
1258    str VisibleString ,     -- may be unparsable
1259    db  Dbtag }             -- pointer to a restriction site database
1260
1261END
1262
1263--**********************************************************************
1264--
1265--  NCBI RNAs
1266--  by James Ostell, 1990
1267--  version 0.8
1268--
1269--**********************************************************************
1270
1271NCBI-RNA DEFINITIONS ::=
1272BEGIN
1273
1274EXPORTS RNA-ref, Trna-ext;
1275
1276IMPORTS Seq-loc FROM NCBI-Seqloc;
1277
1278--*** rnas ***********************************************
1279--*
1280--*  various rnas
1281--*
1282                         -- minimal RNA sequence
1283RNA-ref ::= SEQUENCE {
1284    type ENUMERATED {            -- type of RNA feature
1285        unknown (0) ,
1286        premsg (1) ,
1287        mRNA (2) ,
1288        tRNA (3) ,
1289        rRNA (4) ,
1290        snRNA (5) ,
1291        scRNA (6) ,
1292        other (255) } ,
1293    pseudo BOOLEAN OPTIONAL ,
1294    ext CHOICE {
1295        name VisibleString ,        -- for naming "other" type
1296        tRNA Trna-ext } OPTIONAL }  -- for tRNAs
1297
1298Trna-ext ::= SEQUENCE {                -- tRNA feature extensions
1299    aa CHOICE {                         -- aa this carries
1300        iupacaa INTEGER ,
1301        ncbieaa INTEGER ,
1302        ncbi8aa INTEGER ,
1303        ncbistdaa INTEGER } OPTIONAL ,
1304    codon SET OF INTEGER OPTIONAL ,     -- codon(s) as in Genetic-code
1305	anticodon Seq-loc OPTIONAL }        -- location of anticodon
1306
1307END
1308
1309--**********************************************************************
1310--
1311--  NCBI Genes
1312--  by James Ostell, 1990
1313--  version 0.8
1314--
1315--**********************************************************************
1316
1317NCBI-Gene DEFINITIONS ::=
1318BEGIN
1319
1320EXPORTS Gene-ref;
1321
1322IMPORTS Dbtag FROM NCBI-General;
1323
1324--*** Gene ***********************************************
1325--*
1326--*  reference to a gene
1327--*
1328
1329Gene-ref ::= SEQUENCE {
1330    locus VisibleString OPTIONAL ,     -- Official gene symbol
1331    allele VisibleString OPTIONAL ,    -- Official allele designation
1332    desc VisibleString OPTIONAL ,      -- descriptive name
1333    maploc VisibleString OPTIONAL ,    -- descriptive map location
1334    pseudo BOOLEAN DEFAULT FALSE ,          -- pseudogene
1335    db SET OF Dbtag OPTIONAL ,      -- ids in other dbases
1336    syn SET OF VisibleString OPTIONAL }      -- synonyms for locus
1337
1338END
1339
1340
1341--**********************************************************************
1342--
1343--  NCBI Organism
1344--  by James Ostell, 1994
1345--  version 3.0
1346--
1347--**********************************************************************
1348
1349NCBI-Organism DEFINITIONS ::=
1350BEGIN
1351
1352EXPORTS Org-ref;
1353
1354IMPORTS Dbtag FROM NCBI-General;
1355
1356--*** Org-ref ***********************************************
1357--*
1358--*  Reference to an organism
1359--*     defines only the organism.. lower levels of detail for biological
1360--*     molecules are provided by the Source object
1361--*
1362
1363Org-ref ::= SEQUENCE {
1364    taxname VisibleString OPTIONAL ,   -- preferred formal name
1365    common VisibleString OPTIONAL ,    -- common name
1366    mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
1367    db SET OF Dbtag OPTIONAL ,         -- ids in taxonomic or culture dbases
1368    syn SET OF VisibleString OPTIONAL ,  -- synonyms for taxname or common
1369    orgname OrgName OPTIONAL }
1370
1371
1372OrgName ::= SEQUENCE {
1373    name CHOICE {
1374        binomial BinomialOrgName ,         -- genus/species type name
1375        virus VisibleString ,              -- virus names are different
1376        hybrid MultiOrgName ,              -- hybrid between organisms
1377        namedhybrid BinomialOrgName ,      -- some hybrids have genus x species name
1378        partial PartialOrgName } OPTIONAL , -- when genus not known
1379    attrib VisibleString OPTIONAL ,        -- attribution of name
1380    mod SEQUENCE OF OrgMod OPTIONAL ,
1381    lineage VisibleString OPTIONAL ,       -- lineage with semicolon separators
1382    gcode INTEGER OPTIONAL ,               -- genetic code (see CdRegion)
1383    mgcode INTEGER OPTIONAL ,              -- mitochondrial genetic code
1384	div VisibleString OPTIONAL }           -- GenBank division code
1385
1386
1387OrgMod ::= SEQUENCE {
1388    subtype INTEGER {
1389        strain (2) ,
1390        substrain (3) ,
1391        type (4) ,
1392        subtype (5) ,
1393        variety (6) ,
1394        serotype (7) ,
1395        serogroup (8) ,
1396        serovar (9) ,
1397        cultivar (10) ,
1398        pathovar (11) ,
1399        chemovar (12) ,
1400        biovar (13) ,
1401        biotype (14) ,
1402        group (15) ,
1403        subgroup (16) ,
1404        isolate (17) ,
1405        common (18) ,
1406        acronym (19) ,
1407        dosage (20) ,		-- chromosome dosage of hybrid
1408        nat-host (21) ,		-- natural host of this specimen
1409	sub-species (22) ,
1410        specimen-voucher (23) ,
1411	authority (24) ,
1412	forma (25) ,
1413	forma-specialis (26) ,
1414	ecotype (27) ,
1415	synonym (28) ,
1416	anamorph (29) ,
1417	teleomorph (30) ,
1418	breed (31) ,
1419	old-lineage (253) ,
1420        old-name (254) ,
1421        other (255) } ,         -- ASN5: old-name (254) will be added to next spec
1422    subname VisibleString ,
1423    attrib VisibleString OPTIONAL }  -- attribution/source of name
1424
1425BinomialOrgName ::= SEQUENCE {
1426    genus VisibleString ,               -- required
1427    species VisibleString OPTIONAL ,    -- species required if subspecies used
1428    subspecies VisibleString OPTIONAL }
1429
1430MultiOrgName ::= SEQUENCE OF OrgName   -- the first will be used to assign division
1431
1432PartialOrgName ::= SEQUENCE OF TaxElement  -- when we don't know the genus
1433
1434TaxElement ::= SEQUENCE {
1435    fixed-level INTEGER {
1436       other (0) ,                     -- level must be set in string
1437       family (1) ,
1438       order (2) ,
1439       class (3) } ,
1440    level VisibleString OPTIONAL ,
1441    name VisibleString }
1442
1443END
1444
1445
1446--**********************************************************************
1447--
1448--  NCBI BioSource
1449--  by James Ostell, 1994
1450--  version 3.0
1451--
1452--**********************************************************************
1453
1454NCBI-BioSource DEFINITIONS ::=
1455BEGIN
1456
1457EXPORTS BioSource;
1458
1459IMPORTS Org-ref FROM NCBI-Organism;
1460
1461--********************************************************************
1462--
1463-- BioSource gives the source of the biological material
1464--   for sequences
1465--
1466--********************************************************************
1467
1468BioSource ::= SEQUENCE {
1469    genome INTEGER {		 -- biological context
1470        unknown (0) ,
1471        genomic (1) ,
1472        chloroplast (2) ,
1473        chromoplast (3) ,
1474        kinetoplast (4) ,
1475        mitochondrion (5) ,
1476        plastid (6) ,
1477        macronuclear (7) ,
1478        extrachrom (8) ,
1479        plasmid (9) ,
1480        transposon (10) ,
1481        insertion-seq (11) ,
1482	cyanelle (12) ,
1483	proviral (13) ,
1484	virion (14) ,
1485	nucleomorph (15) ,
1486	apicoplast (16) ,
1487	leucoplast (17) ,
1488	proplastid (18) ,
1489	endogenous-virus (19)
1490	 } DEFAULT unknown ,
1491                                       -- 4 more genome values coming
1492                                       -- nucleomorph (15)
1493                                       -- apicoplast (16)
1494                                       -- leucoplast (17)
1495                                       -- proplastid (18)
1496    origin INTEGER {
1497      unknown (0) ,
1498      natural (1) ,                    -- normal biological entity
1499      natmut (2) ,                     -- naturally occurring mutant
1500      mut (3) ,                        -- artificially mutagenized
1501      artificial (4) ,                 -- artificially engineered
1502      synthetic (5) ,                  -- purely synthetic
1503      other (255) } DEFAULT unknown ,
1504    org Org-ref ,
1505    subtype SEQUENCE OF SubSource OPTIONAL ,
1506    is-focus NULL OPTIONAL }   -- to distinguish biological focus
1507
1508SubSource ::= SEQUENCE {
1509    subtype INTEGER {
1510        chromosome (1) ,
1511        map (2) ,
1512        clone (3) ,
1513        subclone (4) ,
1514        haplotype (5) ,
1515        genotype (6) ,
1516        sex (7) ,
1517        cell-line (8) ,
1518        cell-type (9) ,
1519        tissue-type (10) ,
1520        clone-lib (11) ,
1521        dev-stage (12) ,
1522        frequency (13) ,
1523        germline (14) ,
1524        rearranged (15) ,
1525        lab-host (16) ,
1526        pop-variant (17) ,
1527        tissue-lib (18) ,
1528        plasmid-name (19) ,
1529        transposon-name (20) ,
1530        insertion-seq-name (21) ,
1531        plastid-name (22) ,
1532	country (23) ,
1533	segment (24) ,
1534	endogenous-virus-name (25) ,
1535        other (255) } ,
1536    name VisibleString ,
1537    attrib VisibleString OPTIONAL }    -- attribution/source of this name
1538
1539END
1540
1541--**********************************************************************
1542--
1543--  NCBI Protein
1544--  by James Ostell, 1990
1545--  version 0.8
1546--
1547--**********************************************************************
1548
1549NCBI-Protein DEFINITIONS ::=
1550BEGIN
1551
1552EXPORTS Prot-ref;
1553
1554IMPORTS Dbtag FROM NCBI-General;
1555
1556--*** Prot-ref ***********************************************
1557--*
1558--*  Reference to a protein name
1559--*
1560
1561Prot-ref ::= SEQUENCE {
1562    name SET OF VisibleString OPTIONAL ,      -- protein name
1563    desc VisibleString OPTIONAL ,      -- description (instead of name)
1564    ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
1565    activity SET OF VisibleString OPTIONAL ,  -- activities
1566    db SET OF Dbtag OPTIONAL ,         -- ids in other dbases
1567    processed ENUMERATED {             -- processing status
1568       not-set (0) ,
1569       preprotein (1) ,
1570       mature (2) ,
1571	   signal-peptide (3) ,
1572	   transit-peptide (4) } DEFAULT not-set }
1573
1574
1575
1576END
1577--********************************************************************
1578--
1579--  Transcription Initiation Site Feature Data Block
1580--  James Ostell, 1991
1581--  Philip Bucher, David Ghosh
1582--  version 1.1
1583--
1584--
1585--
1586--********************************************************************
1587
1588NCBI-TxInit DEFINITIONS ::=
1589BEGIN
1590
1591EXPORTS Txinit;
1592
1593IMPORTS Gene-ref FROM NCBI-Gene
1594        Prot-ref FROM NCBI-Protein
1595        Org-ref FROM NCBI-Organism;
1596
1597Txinit ::= SEQUENCE {
1598    name VisibleString ,    -- descriptive name of initiation site
1599    syn SEQUENCE OF VisibleString OPTIONAL ,   -- synonyms
1600    gene SEQUENCE OF Gene-ref OPTIONAL ,  -- gene(s) transcribed
1601    protein SEQUENCE OF Prot-ref OPTIONAL ,   -- protein(s) produced
1602    rna SEQUENCE OF VisibleString OPTIONAL ,  -- rna(s) produced
1603    expression VisibleString OPTIONAL ,  -- tissue/time of expression
1604    txsystem ENUMERATED {       -- transcription apparatus used at this site
1605        unknown (0) ,
1606        pol1 (1) ,      -- eukaryotic Pol I
1607        pol2 (2) ,      -- eukaryotic Pol II
1608        pol3 (3) ,      -- eukaryotic Pol III
1609        bacterial (4) ,
1610        viral (5) ,
1611        rna (6) ,       -- RNA replicase
1612        organelle (7) ,
1613        other (255) } ,
1614    txdescr VisibleString OPTIONAL ,   -- modifiers on txsystem
1615    txorg Org-ref OPTIONAL ,  -- organism supplying transcription apparatus
1616    mapping-precise BOOLEAN DEFAULT FALSE ,  -- mapping precise or approx
1617    location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
1618    inittype ENUMERATED {
1619        unknown (0) ,
1620        single (1) ,
1621        multiple (2) ,
1622        region (3) } OPTIONAL ,
1623    evidence SET OF Tx-evidence OPTIONAL }
1624
1625Tx-evidence ::= SEQUENCE {
1626    exp-code ENUMERATED {
1627        unknown (0) ,
1628        rna-seq (1) ,   -- direct RNA sequencing
1629        rna-size (2) ,  -- RNA length measurement
1630        np-map (3) ,    -- nuclease protection mapping with homologous sequence ladder
1631        np-size (4) ,   -- nuclease protected fragment length measurement
1632        pe-seq (5) ,    -- dideoxy RNA sequencing
1633        cDNA-seq (6) ,  -- full-length cDNA sequencing
1634        pe-map (7) ,    -- primer extension mapping with homologous sequence ladder
1635        pe-size (8) ,   -- primer extension product length measurement
1636        pseudo-seq (9) , -- full-length processed pseudogene sequencing
1637        rev-pe-map (10) ,   -- see NOTE (1) below
1638        other (255) } ,
1639    expression-system ENUMERATED {
1640        unknown (0) ,
1641        physiological (1) ,
1642        in-vitro (2) ,
1643        oocyte (3) ,
1644        transfection (4) ,
1645        transgenic (5) ,
1646        other (255) } DEFAULT physiological ,
1647    low-prec-data BOOLEAN DEFAULT FALSE ,
1648    from-homolog BOOLEAN DEFAULT FALSE }     -- experiment actually done on
1649                                             --  close homolog
1650
1651    -- NOTE (1) length measurement of a reverse direction primer-extension
1652    --          product (blocked  by  RNA  5'end)  by  comparison with
1653    --          homologous sequence ladder (J. Mol. Biol. 199, 587)
1654
1655
1656END
1657
1658--$Revision: 97143 $
1659--**********************************************************************
1660--
1661--  NCBI Sequence Analysis Results (other than alignments)
1662--  by James Ostell, 1990
1663--
1664--**********************************************************************
1665
1666NCBI-Seqres DEFINITIONS ::=
1667BEGIN
1668
1669EXPORTS Seq-graph;
1670
1671IMPORTS Seq-loc FROM NCBI-Seqloc;
1672
1673--*** Sequence Graph ********************************
1674--*
1675--*   for values mapped by residue or range to sequence
1676--*
1677
1678Seq-graph ::= SEQUENCE {
1679    title VisibleString OPTIONAL ,
1680    comment VisibleString OPTIONAL ,
1681    loc Seq-loc ,                       -- region this applies to
1682    title-x VisibleString OPTIONAL ,    -- title for x-axis
1683    title-y VisibleString OPTIONAL ,
1684    comp INTEGER OPTIONAL ,             -- compression (residues/value)
1685    a REAL OPTIONAL ,                   -- for scaling values
1686    b REAL OPTIONAL ,                   -- display = (a x value) + b
1687    numval INTEGER ,                    -- number of values in graph
1688    graph CHOICE {
1689        real Real-graph ,
1690        int Int-graph ,
1691        byte Byte-graph } }
1692
1693Real-graph ::= SEQUENCE {
1694    max REAL ,                          -- top of graph
1695    min REAL ,                          -- bottom of graph
1696    axis REAL ,                         -- value to draw axis on
1697    values SEQUENCE OF REAL }
1698
1699Int-graph ::= SEQUENCE {
1700    max INTEGER ,
1701    min INTEGER ,
1702    axis INTEGER ,
1703    values SEQUENCE OF INTEGER }
1704
1705Byte-graph ::= SEQUENCE {              -- integer from 0-255
1706    max INTEGER ,
1707    min INTEGER ,
1708    axis INTEGER ,
1709    values OCTET STRING }
1710
1711END
1712
1713--$Revision: 97143 $
1714--**********************************************************************
1715--
1716--  NCBI Sequence Collections
1717--  by James Ostell, 1990
1718--
1719--  Version 3.0 - 1994
1720--
1721--**********************************************************************
1722
1723NCBI-Seqset DEFINITIONS ::=
1724BEGIN
1725
1726EXPORTS Bioseq-set, Seq-entry;
1727
1728IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
1729        Object-id, Dbtag, Date FROM NCBI-General;
1730
1731--*** Sequence Collections ********************************
1732--*
1733
1734Bioseq-set ::= SEQUENCE {      -- just a collection
1735    id Object-id OPTIONAL ,
1736    coll Dbtag OPTIONAL ,          -- to identify a collection
1737    level INTEGER OPTIONAL ,       -- nesting level
1738    class ENUMERATED {
1739        not-set (0) ,
1740        nuc-prot (1) ,              -- nuc acid and coded proteins
1741        segset (2) ,                -- segmented sequence + parts
1742        conset (3) ,                -- constructed sequence + parts
1743        parts (4) ,                 -- parts for 2 or 3
1744        gibb (5) ,                  -- geninfo backbone
1745        gi (6) ,                    -- geninfo
1746        genbank (7) ,               -- converted genbank
1747        pir (8) ,                   -- converted pir
1748        pub-set (9) ,               -- all the seqs from a single publication
1749        equiv (10) ,                -- a set of equivalent maps or seqs
1750        swissprot (11) ,            -- converted SWISSPROT
1751        pdb-entry (12) ,            -- a complete PDB entry
1752        mut-set (13) ,              -- set of mutations
1753        pop-set (14) ,              -- population study
1754        phy-set (15) ,              -- phylogenetic study
1755	eco-set (16) ,              -- ecological sample study
1756        gen-prod-set (17) ,         -- genomic products, chrom+mRNa+protein
1757        other (255) } DEFAULT not-set ,
1758    release VisibleString OPTIONAL ,
1759    date Date OPTIONAL ,
1760    descr Seq-descr OPTIONAL ,
1761    seq-set SEQUENCE OF Seq-entry ,
1762    annot SET OF Seq-annot OPTIONAL }
1763
1764Seq-entry ::= CHOICE {
1765        seq Bioseq ,
1766        set Bioseq-set }
1767
1768END
1769
1770--$Revision: 97143 $
1771--**********************************************************************
1772--
1773--  NCBI Sequence elements
1774--  by James Ostell, 1990
1775--  Version 3.0 - June 1994
1776--
1777--**********************************************************************
1778
1779NCBI-Sequence DEFINITIONS ::=
1780BEGIN
1781
1782EXPORTS Bioseq, Seq-annot, Pubdesc, Seq-descr, Seqdesc, Numbering, Heterogen,
1783        Seq-hist, GIBB-mol;
1784
1785IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
1786        Seq-align FROM NCBI-Seqalign
1787        Seq-feat FROM NCBI-Seqfeat
1788        Seq-graph FROM NCBI-Seqres
1789        Pub-equiv FROM NCBI-Pub
1790        Org-ref FROM NCBI-Organism
1791        BioSource FROM NCBI-BioSource
1792        Seq-id, Seq-loc FROM NCBI-Seqloc
1793        GB-block FROM GenBank-General
1794        PIR-block FROM PIR-General
1795        EMBL-block FROM EMBL-General
1796        SP-block FROM SP-General
1797        PRF-block FROM PRF-General
1798        PDB-block FROM PDB-General;
1799
1800--*** Sequence ********************************
1801--*
1802
1803Bioseq ::= SEQUENCE {
1804    id SET OF Seq-id ,            -- equivalent identifiers
1805    descr Seq-descr OPTIONAL , -- descriptors
1806    inst Seq-inst ,            -- the sequence data
1807    annot SET OF Seq-annot OPTIONAL }
1808
1809--*** Descriptors *****************************
1810--*
1811
1812Seq-descr ::= SET OF Seqdesc
1813
1814Seqdesc ::= CHOICE {
1815    mol-type GIBB-mol ,          -- type of molecule
1816    modif SET OF GIBB-mod ,             -- modifiers
1817    method GIBB-method ,         -- sequencing method
1818    name VisibleString ,         -- a name for this sequence
1819    title VisibleString ,        -- a title for this sequence
1820    org Org-ref ,                -- if all from one organism
1821    comment VisibleString ,      -- a more extensive comment
1822    num Numbering ,              -- a numbering system
1823    maploc Dbtag ,               -- map location of this sequence
1824    pir PIR-block ,              -- PIR specific info
1825    genbank GB-block ,           -- GenBank specific info
1826    pub Pubdesc ,                -- a reference to the publication
1827    region VisibleString ,       -- overall region (globin locus)
1828    user User-object ,           -- user defined object
1829    sp SP-block ,                -- SWISSPROT specific info
1830    dbxref Dbtag ,               -- xref to other databases
1831    embl EMBL-block ,            -- EMBL specific information
1832    create-date Date ,           -- date entry first created/released
1833    update-date Date ,           -- date of last update
1834    prf PRF-block ,              -- PRF specific information
1835    pdb PDB-block ,              -- PDB specific information
1836    het Heterogen ,              -- cofactor, etc associated but not bound
1837    source BioSource ,           -- source of materials, includes Org-ref
1838    molinfo MolInfo }            -- info on the molecule and techniques
1839
1840--******* NOTE:
1841--*       mol-type, modif, method, and org are consolidated and expanded
1842--*       in Org-ref, BioSource, and MolInfo in this specification. They
1843--*       will be removed in later specifications. Do not use them in the
1844--*       the future. Instead expect the new structures.
1845--*
1846--***************************
1847
1848--********************************************************************
1849--
1850-- MolInfo gives information on the
1851-- classification of the type and quality of the sequence
1852--
1853-- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
1854--
1855--********************************************************************
1856
1857MolInfo ::= SEQUENCE {
1858    biomol INTEGER {
1859        unknown (0) ,
1860        genomic (1) ,
1861        pre-RNA (2) ,              -- precursor RNA of any sort really
1862        mRNA (3) ,
1863        rRNA (4) ,
1864        tRNA (5) ,
1865        snRNA (6) ,
1866        scRNA (7) ,
1867        peptide (8) ,
1868        other-genetic (9) ,      -- other genetic material
1869        genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
1870	cRNA (11) ,              -- viral RNA genome copy intermediate
1871        other (255) } DEFAULT unknown ,
1872    tech INTEGER {
1873        unknown (0) ,
1874        standard (1) ,          -- standard sequencing
1875        est (2) ,               -- Expressed Sequence Tag
1876        sts (3) ,               -- Sequence Tagged Site
1877        survey (4) ,            -- one-pass genomic sequence
1878        genemap (5) ,           -- from genetic mapping techniques
1879        physmap (6) ,           -- from physical mapping techniques
1880        derived (7) ,           -- derived from other data, not a primary entity
1881        concept-trans (8) ,     -- conceptual translation
1882        seq-pept (9) ,          -- peptide was sequenced
1883        both (10) ,             -- concept transl. w/ partial pept. seq.
1884        seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
1885        seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
1886        concept-trans-a (13) ,  -- conceptual transl. supplied by author
1887        htgs-1 (14) ,           -- unordered High Throughput sequence contig
1888        htgs-2 (15) ,           -- ordered High Throughput sequence contig
1889        htgs-3 (16) ,           -- finished High Throughput sequence
1890	fli-cdna (17) ,         -- full length insert cDNA
1891	htgs-0 (18) ,           -- single genomic reads for coordination
1892	htc (19) ,              -- high throughput cDNA
1893        other (255) }           -- use Source.techexp
1894               DEFAULT unknown ,
1895    techexp VisibleString OPTIONAL ,   -- explanation if tech not enough
1896    completeness INTEGER {
1897      unknown (0) ,
1898      complete (1) ,                   -- complete biological entity
1899      partial (2) ,                    -- partial but no details given
1900      no-left (3),                     -- missing 5' or NH3 end
1901      no-right (4) ,                   -- missing 3' or COOH end
1902      no-ends (5) ,                    -- missing both ends
1903      other (255) } DEFAULT unknown }
1904
1905
1906GIBB-mol ::= ENUMERATED {       -- type of molecule represented
1907    unknown (0) ,
1908    genomic (1) ,
1909    pre-mRNA (2) ,              -- precursor RNA of any sort really
1910    mRNA (3) ,
1911    rRNA (4) ,
1912    tRNA (5) ,
1913    snRNA (6) ,
1914    scRNA (7) ,
1915    peptide (8) ,
1916    other-genetic (9) ,      -- other genetic material
1917    genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
1918    other (255) }
1919
1920GIBB-mod ::= ENUMERATED {        -- GenInfo Backbone modifiers
1921    dna (0) ,
1922    rna (1) ,
1923    extrachrom (2) ,
1924    plasmid (3) ,
1925    mitochondrial (4) ,
1926    chloroplast (5) ,
1927    kinetoplast (6) ,
1928    cyanelle (7) ,
1929    synthetic (8) ,
1930    recombinant (9) ,
1931    partial (10) ,
1932    complete (11) ,
1933    mutagen (12) ,    -- subject of mutagenesis ?
1934    natmut (13) ,     -- natural mutant ?
1935    transposon (14) ,
1936    insertion-seq (15) ,
1937    no-left (16) ,    -- missing left end (5' for na, NH2 for aa)
1938    no-right (17) ,   -- missing right end (3' or COOH)
1939    macronuclear (18) ,
1940    proviral (19) ,
1941    est (20) ,        -- expressed sequence tag
1942    sts (21) ,        -- sequence tagged site
1943    survey (22) ,     -- one pass survey sequence
1944    chromoplast (23) ,
1945    genemap (24) ,    -- is a genetic map
1946    restmap (25) ,    -- is an ordered restriction map
1947    physmap (26) ,    -- is a physical map (not ordered restriction map)
1948    other (255) }
1949
1950GIBB-method ::= ENUMERATED {        -- sequencing methods
1951    concept-trans (1) ,    -- conceptual translation
1952    seq-pept (2) ,         -- peptide was sequenced
1953    both (3) ,             -- concept transl. w/ partial pept. seq.
1954    seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
1955    seq-pept-homol (5) ,   -- sequenced peptide, ordered by homology
1956    concept-trans-a (6) ,  -- conceptual transl. supplied by author
1957    other (255) }
1958
1959Numbering ::= CHOICE {           -- any display numbering system
1960    cont Num-cont ,              -- continuous numbering
1961    enum Num-enum ,              -- enumerated names for residues
1962    ref Num-ref ,                -- by reference to another sequence
1963    real Num-real }              -- supports mapping to a float system
1964
1965Num-cont ::= SEQUENCE {          -- continuous display numbering system
1966    refnum INTEGER DEFAULT 1,         -- number assigned to first residue
1967    has-zero BOOLEAN DEFAULT FALSE ,  -- 0 used?
1968    ascending BOOLEAN DEFAULT TRUE }  -- ascending numbers?
1969
1970Num-enum ::= SEQUENCE {          -- any tags to residues
1971    num INTEGER ,                        -- number of tags to follow
1972    names SEQUENCE OF VisibleString }    -- the tags
1973
1974Num-ref ::= SEQUENCE {           -- by reference to other sequences
1975    type ENUMERATED {            -- type of reference
1976        not-set (0) ,
1977        sources (1) ,            -- by segmented or const seq sources
1978        aligns (2) } ,           -- by alignments given below
1979    aligns Seq-align OPTIONAL }
1980
1981Num-real ::= SEQUENCE {          -- mapping to floating point system
1982    a REAL ,                     -- from an integer system used by Bioseq
1983    b REAL ,                     -- position = (a * int_position) + b
1984    units VisibleString OPTIONAL }
1985
1986Pubdesc ::= SEQUENCE {              -- how sequence presented in pub
1987    pub Pub-equiv ,                 -- the citation(s)
1988    name VisibleString OPTIONAL ,   -- name used in paper
1989    fig VisibleString OPTIONAL ,    -- figure in paper
1990    num Numbering OPTIONAL ,        -- numbering from paper
1991    numexc BOOLEAN OPTIONAL ,       -- numbering problem with paper
1992    poly-a BOOLEAN OPTIONAL ,       -- poly A tail indicated in figure?
1993    maploc VisibleString OPTIONAL , -- map location reported in paper
1994    seq-raw StringStore OPTIONAL ,  -- original sequence from paper
1995    align-group INTEGER OPTIONAL ,  -- this seq aligned with others in paper
1996    comment VisibleString OPTIONAL, -- any comment on this pub in context
1997	reftype INTEGER {           -- type of reference in a GenBank record
1998		seq (0) ,               -- refers to sequence
1999		sites (1) ,             -- refers to unspecified features
2000		feats (2) ,             -- refers to specified features
2001		no-target (3) }         -- nothing specified (EMBL)
2002		DEFAULT seq }
2003
2004Heterogen ::= VisibleString       -- cofactor, prosthetic group, inibitor, etc
2005
2006--*** Instances of sequences *******************************
2007--*
2008
2009Seq-inst ::= SEQUENCE {            -- the sequence data itself
2010    repr ENUMERATED {              -- representation class
2011        not-set (0) ,              -- empty
2012        virtual (1) ,              -- no seq data
2013        raw (2) ,                  -- continuous sequence
2014        seg (3) ,                  -- segmented sequence
2015        const (4) ,                -- constructed sequence
2016        ref (5) ,                  -- reference to another sequence
2017        consen (6) ,               -- consensus sequence or pattern
2018        map (7) ,                  -- ordered map of any kind
2019        delta (8) ,              -- sequence made by changes (delta) to others
2020        other (255) } ,
2021    mol ENUMERATED {               -- molecule class in living organism
2022        not-set (0) ,              --   > cdna = rna
2023        dna (1) ,
2024        rna (2) ,
2025        aa (3) ,
2026        na (4) ,                   -- just a nucleic acid
2027        other (255) } ,
2028    length INTEGER OPTIONAL ,      -- length of sequence in residues
2029    fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
2030    topology ENUMERATED {          -- topology of molecule
2031        not-set (0) ,
2032        linear (1) ,
2033        circular (2) ,
2034        tandem (3) ,               -- some part of tandem repeat
2035        other (255) } DEFAULT linear ,
2036    strand ENUMERATED {            -- strandedness in living organism
2037        not-set (0) ,
2038        ss (1) ,                   -- single strand
2039        ds (2) ,                   -- double strand
2040        mixed (3) ,
2041        other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
2042    seq-data Seq-data OPTIONAL ,   -- the sequence
2043    ext Seq-ext OPTIONAL ,         -- extensions for special types
2044    hist Seq-hist OPTIONAL }       -- sequence history
2045
2046--*** Sequence Extensions **********************************
2047--*  for representing more complex types
2048--*  const type uses Seq-hist.assembly
2049
2050Seq-ext ::= CHOICE {
2051    seg Seg-ext ,        -- segmented sequences
2052    ref Ref-ext ,        -- hot link to another sequence (a view)
2053    map Map-ext ,        -- ordered map of markers
2054    delta Delta-ext }
2055
2056Seg-ext ::= SEQUENCE OF Seq-loc
2057
2058Ref-ext ::= Seq-loc
2059
2060Map-ext ::= SEQUENCE OF Seq-feat
2061
2062Delta-ext ::= SEQUENCE OF Delta-seq
2063
2064Delta-seq ::= CHOICE {
2065    loc Seq-loc ,       -- point to a sequence
2066    literal Seq-literal }   -- a piece of sequence
2067
2068Seq-literal ::= SEQUENCE {
2069    length INTEGER ,         -- must give a length in residues
2070    fuzz Int-fuzz OPTIONAL , -- could be unsure
2071    seq-data Seq-data OPTIONAL } -- may have the data
2072
2073--*** Sequence History Record ***********************************
2074--** assembly = records how seq was assembled from others
2075--** replaces = records sequences made obsolete by this one
2076--** replaced-by = this seq is made obsolete by another(s)
2077
2078Seq-hist ::= SEQUENCE {
2079    assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
2080    replaces Seq-hist-rec OPTIONAL ,    -- seq makes these seqs obsolete
2081    replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
2082    deleted CHOICE {
2083        bool BOOLEAN ,
2084        date Date } OPTIONAL }
2085
2086Seq-hist-rec ::= SEQUENCE {
2087    date Date OPTIONAL ,
2088    ids SET OF Seq-id }
2089
2090--*** Various internal sequence representations ************
2091--*      all are controlled, fixed length forms
2092
2093Seq-data ::= CHOICE {              -- sequence representations
2094    iupacna IUPACna ,              -- IUPAC 1 letter nuc acid code
2095    iupacaa IUPACaa ,              -- IUPAC 1 letter amino acid code
2096    ncbi2na NCBI2na ,              -- 2 bit nucleic acid code
2097    ncbi4na NCBI4na ,              -- 4 bit nucleic acid code
2098    ncbi8na NCBI8na ,              -- 8 bit extended nucleic acid code
2099    ncbipna NCBIpna ,              -- nucleic acid probabilities
2100    ncbi8aa NCBI8aa ,              -- 8 bit extended amino acid codes
2101    ncbieaa NCBIeaa ,              -- extended ASCII 1 letter aa codes
2102    ncbipaa NCBIpaa ,              -- amino acid probabilities
2103    ncbistdaa NCBIstdaa }          -- consecutive codes for std aas
2104
2105
2106IUPACna ::= StringStore       -- IUPAC 1 letter codes, no spaces
2107IUPACaa ::= StringStore       -- IUPAC 1 letter codes, no spaces
2108NCBI2na ::= OCTET STRING      -- 00=A, 01=C, 10=G, 11=T
2109NCBI4na ::= OCTET STRING      -- 1 bit each for agct
2110                              -- 0001=A, 0010=C, 0100=G, 1000=T/U
2111                              -- 0101=Purine, 1010=Pyrimidine, etc
2112NCBI8na ::= OCTET STRING      -- for modified nucleic acids
2113NCBIpna ::= OCTET STRING      -- 5 octets/base, prob for a,c,g,t,n
2114                              -- probabilities are coded 0-255 = 0.0-1.0
2115NCBI8aa ::= OCTET STRING      -- for modified amino acids
2116NCBIeaa ::= StringStore       -- ASCII extended 1 letter aa codes
2117                              -- IUPAC codes + U=selenocysteine
2118NCBIpaa ::= OCTET STRING      -- 25 octets/aa, prob for IUPAC aas in order:
2119                              -- A-Y,B,Z,X,(ter),anything
2120                              -- probabilities are coded 0-255 = 0.0-1.0
2121NCBIstdaa ::= OCTET STRING    -- codes 0-25, 1 per byte
2122
2123--*** Sequence Annotation *************************************
2124--*
2125
2126Annot-id ::= CHOICE {
2127    local Object-id ,
2128    ncbi INTEGER ,
2129    general Dbtag }
2130
2131Annot-descr ::= SET OF Annotdesc
2132
2133Annotdesc ::= CHOICE {
2134    name VisibleString ,         -- a short name for this collection
2135    title VisibleString ,        -- a title for this collection
2136    comment VisibleString ,      -- a more extensive comment
2137    pub Pubdesc ,                -- a reference to the publication
2138    user User-object ,           -- user defined object
2139    create-date Date ,           -- date entry first created/released
2140    update-date Date ,           -- date of last update
2141    src Seq-id ,                 -- source sequence from which annot came
2142    align Align-def,             -- definition of the SeqAligns
2143    region Seq-loc }             -- all contents cover this region
2144
2145Align-def ::= SEQUENCE {
2146    align-type INTEGER {         -- class of align Seq-annot
2147      ref (1) ,                  -- set of alignments to the same sequence
2148      alt (2) ,                  -- set of alternate alignments of the same seqs
2149      blocks (3) ,               -- set of aligned blocks in the same seqs
2150      other (255) } ,
2151    ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
2152
2153Seq-annot ::= SEQUENCE {
2154    id SET OF Annot-id OPTIONAL ,
2155    db INTEGER {                 -- source of annotation
2156        genbank (1) ,
2157        embl (2) ,
2158        ddbj (3) ,
2159        pir  (4) ,
2160        sp   (5) ,
2161        bbone (6) ,
2162        pdb   (7) ,
2163        other (255) } OPTIONAL ,
2164    name VisibleString OPTIONAL ,-- source if "other" above
2165    desc Annot-descr OPTIONAL ,  -- used only for stand alone Seq-annots
2166    data CHOICE {
2167        ftable SET OF Seq-feat ,
2168        align SET OF Seq-align ,
2169        graph SET OF Seq-graph ,
2170        ids SET OF Seq-id ,        -- used for communication between tools
2171        locs SET OF Seq-loc } }    -- used for communication between tools
2172
2173END
2174
2175
2176--$Revision: 97143 $
2177--********************************************************************
2178--
2179--  Direct Submission of Sequence Data
2180--  James Ostell, 1991
2181--
2182--  This is a trial specification for direct submission of sequence
2183--    data worked out between NCBI and EMBL
2184--  Later revised to reflect work with GenBank and Integrated database
2185--
2186--  Version 3.0, 1994
2187--    This is the official NCBI sequence submission format now.
2188--
2189--********************************************************************
2190
2191NCBI-Submit DEFINITIONS ::=
2192BEGIN
2193
2194EXPORTS Seq-submit, Contact-info;
2195
2196IMPORTS Cit-sub, Author FROM NCBI-Biblio
2197        Date, Object-id FROM NCBI-General
2198        Seq-annot FROM NCBI-Sequence
2199        Seq-id FROM NCBI-Seqloc
2200        Seq-entry FROM NCBI-Seqset;
2201
2202Seq-submit ::= SEQUENCE {
2203    sub Submit-block ,
2204    data CHOICE {
2205        entrys  SET OF Seq-entry ,  -- sequence(s)
2206        annots  SET OF Seq-annot ,  -- annotation(s)
2207        delete  SET OF Seq-id } } -- deletions of entries
2208
2209Submit-block ::= SEQUENCE {
2210    contact Contact-info ,        -- who to contact
2211    cit Cit-sub ,                 -- citation for this submission
2212    hup BOOLEAN DEFAULT FALSE ,   -- hold until publish
2213    reldate Date OPTIONAL ,       -- release by date
2214    subtype INTEGER {             -- type of submission
2215        new (1) ,                 -- new data
2216        update (2) ,              -- update by author
2217        revision (3) ,            -- 3rd party (non-author) update
2218        other (255) } OPTIONAL ,
2219    tool VisibleString OPTIONAL,  -- tool used to make submission
2220    user-tag VisibleString OPTIONAL, -- user supplied id for this submission
2221    comment VisibleString OPTIONAL } -- user comments/advice to database
2222
2223Contact-info ::= SEQUENCE {      -- who to contact to discuss the submission
2224    name VisibleString OPTIONAL ,        -- OBSOLETE: will be removed
2225    address SEQUENCE OF VisibleString OPTIONAL ,
2226    phone VisibleString OPTIONAL ,
2227    fax VisibleString OPTIONAL ,
2228    email VisibleString OPTIONAL ,
2229    telex VisibleString OPTIONAL ,
2230    owner-id Object-id OPTIONAL ,         -- for owner accounts
2231    password OCTET STRING OPTIONAL ,
2232    last-name VisibleString OPTIONAL ,  -- structured to replace name above
2233    first-name VisibleString OPTIONAL ,
2234    middle-initial VisibleString OPTIONAL ,
2235    contact Author OPTIONAL }           -- WARNING: this will replace the above
2236
2237END
2238
2239--$Revision: 97143 $
2240--****************************************************************
2241--
2242--  NCBI Project Definition Module
2243--  by Jim Ostell and Jonathan Kans, 1998
2244--
2245--****************************************************************
2246
2247NCBI-Project DEFINITIONS ::=
2248BEGIN
2249
2250EXPORTS Project, Project-item;
2251
2252IMPORTS Date FROM NCBI-General
2253        PubMedId FROM NCBI-Biblio
2254        Seq-id, Seq-loc FROM NCBI-Seqloc
2255        Seq-annot, Pubdesc FROM NCBI-Sequence
2256        Seq-entry FROM NCBI-Seqset
2257        Pubmed-entry FROM NCBI-PubMed;
2258
2259Project ::= SEQUENCE {
2260    descr Project-descr OPTIONAL ,
2261    data Project-item }
2262
2263Project-item ::= CHOICE {
2264    pmuid SET OF INTEGER ,
2265    protuid SET OF INTEGER ,
2266    nucuid SET OF INTEGER ,
2267    sequid SET OF INTEGER ,
2268    genomeuid SET OF INTEGER ,
2269    structuid SET OF INTEGER ,
2270    pmid SET OF PubMedId ,
2271    protid SET OF Seq-id ,
2272    nucid SET OF Seq-id ,
2273    seqid SET OF Seq-id ,
2274    genomeid SET OF Seq-id ,
2275    structid NULL ,
2276    pment SET OF Pubmed-entry ,
2277    protent SET OF Seq-entry ,
2278    nucent SET OF Seq-entry ,
2279    seqent SET OF Seq-entry ,
2280    genomeent SET OF Seq-entry ,
2281    structent NULL ,
2282    seqannot SET OF Seq-annot ,
2283    loc SET OF Seq-loc ,
2284    proj SET OF Project
2285}
2286
2287Project-descr ::= SEQUENCE {
2288    id SET OF Project-id ,
2289    name VisibleString OPTIONAL ,
2290    descr SET OF Projdesc OPTIONAL }
2291
2292Projdesc ::= CHOICE {
2293    pub Pubdesc ,
2294    date Date ,
2295    comment VisibleString ,
2296    title VisibleString
2297}
2298
2299Project-id ::= VisibleString
2300
2301END
2302
2303
2304--$Revision: 97143 $
2305--**********************************************************************
2306--
2307--  Biological Macromolecule 3-D Structure Data Types for MMDB,
2308--                A Molecular Modeling Database
2309--
2310--  Definitions for a biomolecular assembly and the MMDB database
2311--
2312--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant
2313--
2314--  National Center for Biotechnology Information
2315--  National Institutes of Health
2316--  Bethesda, MD 20894 USA
2317--
2318--  July 1995
2319--
2320--**********************************************************************
2321
2322-- Contents of the MMDB database are currently based on files distributed by
2323-- the Protein Data Bank, PDB.  These data are changed in form, as described
2324-- in this specification. To some extent they are also changed in content, in
2325-- that many data items implicit in PDB are made explicit, and others are
2326-- corrected or omitted as a consequence of validation checks.  The semantics
2327-- of MMDB data items are indicated by comments within the specification below.
2328-- These comments explain in detail the manner in which data items from  PDB
2329-- have been mapped into MMDB.
2330
2331MMDB DEFINITIONS ::=
2332
2333BEGIN
2334
2335EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set,
2336	Biostruc-residue-graph-set;
2337
2338IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph
2339	Biostruc-model FROM MMDB-Structural-model
2340	Biostruc-feature-set FROM MMDB-Features
2341	Pub FROM NCBI-Pub
2342	Date, Object-id, Dbtag FROM NCBI-General;
2343
2344-- A structure report or "biostruc" describes the components of a biomolecular
2345-- assembly in terms of their names and descriptions, and a chemical graph
2346-- giving atomic formula, connectivity and chirality. It also gives one or more
2347-- three-dimensional model structures, literally a mapping of the atoms,
2348-- residues and/or molecules of each component into a measured three-
2349-- dimensional space. Structure may also be described by named features, which
2350-- associate nodes in the chemical graph, or regions in space, with text or
2351-- numeric descriptors.
2352
2353-- Note that a biostruc may also contain cross references to other databases,
2354-- including citations to relevant scientific literature. These cross
2355-- references use object types from other NCBI data specifications, which are
2356-- "imported" into MMDB, and not repeated in this specification.
2357
2358Biostruc ::= SEQUENCE {
2359	id			SEQUENCE OF Biostruc-id,
2360	descr			SEQUENCE OF Biostruc-descr OPTIONAL,
2361	chemical-graph		Biostruc-graph,
2362	features		SEQUENCE OF Biostruc-feature-set OPTIONAL,
2363	model			SEQUENCE OF Biostruc-model OPTIONAL }
2364
2365-- A Biostruc-id is a collection identifiers for the molecular assembly.
2366-- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable
2367-- identifiers.  Other-id's are synonyms.
2368
2369Biostruc-id ::= CHOICE {
2370	mmdb-id			Mmdb-id,
2371	other-database		Dbtag,
2372	local-id		Object-id }
2373
2374Mmdb-id ::= INTEGER
2375
2376
2377-- The description of a biostruc refers to both the reported chemical and
2378-- spatial structure of a biomolecular assembly.  PDB-derived descriptors
2379-- which refer specifically to the chemical components or spatial structure
2380-- are not provided here, but instead as descriptors of the biostruc-graph or
2381-- biostruc-model. For PDB-derived structures the biostruc name is the PDB
2382-- id-code.  PDB-derived citations appear as publications within the biostruc
2383-- description, and include a data-submission citation derived from PDB AUTHOR
2384-- records.  Citations are described using the NCBI Pub specification.
2385
2386Biostruc-descr ::= CHOICE {
2387	name			VisibleString,
2388	pdb-comment		VisibleString,
2389	other-comment		VisibleString,
2390	history			Biostruc-history,
2391	attribution		Pub }
2392
2393
2394-- The history of a biostruc indicates it's origin and it's update history
2395-- within MMDB, the NCBI-maintained molecular structure database.
2396
2397Biostruc-history ::= SEQUENCE {
2398	replaces		Biostruc-replace OPTIONAL,
2399	replaced-by		Biostruc-replace OPTIONAL,
2400	data-source		Biostruc-source OPTIONAL }
2401
2402Biostruc-replace ::= SEQUENCE {
2403	id			Biostruc-id,
2404	date			Date }
2405
2406-- The origin of a biostruc is a reference to another database.  PDB release
2407-- date and PDB-assigned id codes are recorded here, as are the PDB-assigned
2408-- entry date and replacement history.
2409
2410Biostruc-source ::= SEQUENCE {
2411	name-of-database	VisibleString,
2412	version-of-database	CHOICE {
2413		release-date		Date,
2414		release-code		VisibleString } OPTIONAL,
2415	database-entry-id	Biostruc-id,
2416	database-entry-date	Date,
2417	database-entry-history	SEQUENCE OF VisibleString OPTIONAL}
2418
2419
2420-- A biostruc set is a means to collect ASN.1 data for many biostrucs in
2421-- one file, as convenient for application programs.  The object type is not
2422-- inteded to imply similarity of the biostrucs grouped together.
2423
2424Biostruc-set ::= SEQUENCE {
2425	id		SEQUENCE OF Biostruc-id OPTIONAL,
2426	descr		SEQUENCE OF Biostruc-descr OPTIONAL,
2427	biostrucs	SEQUENCE OF Biostruc }
2428
2429
2430-- A biostruc annotation set is a means to collect ASN.1 data for biostruc
2431-- features into one file. The object type is intended as a means to store
2432-- feature annotation of similar type, such as "core" definitions for a
2433-- threading program, or structure-structure alignments for a structure-
2434-- similarity browser.
2435
2436Biostruc-annot-set ::= SEQUENCE {
2437	id		SEQUENCE OF Biostruc-id OPTIONAL,
2438	descr		SEQUENCE OF Biostruc-descr OPTIONAL,
2439	features	SEQUENCE OF Biostruc-feature-set }
2440
2441
2442-- A biostruc residue graph set is a collection of residue graphs.  The object
2443-- type is intended as a means to record dictionaries containing the chemical
2444-- subgraphs of "standard" residue types, which are used as a means to
2445-- simplify discription of the covalent structure of a biomolecular assembly.
2446-- The standard residue graph dictionary supplied with the MMDB database
2447-- contains 20 standard L amino acids and 8 standard ribonucleotide groups.
2448-- These graphs are complete, including explicit hydrogen atoms and separate
2449-- instances for the terminal polypeptide and polynucleotide residues.
2450
2451Biostruc-residue-graph-set ::= SEQUENCE {
2452	id			SEQUENCE OF Biostruc-id OPTIONAL,
2453	descr			SEQUENCE OF Biomol-descr OPTIONAL,
2454	residue-graphs		SEQUENCE OF Residue-graph }
2455
2456END
2457
2458
2459
2460--**********************************************************************
2461--
2462--  Biological Macromolecule 3-D Structure Data Types for MMDB,
2463--                A Molecular Modeling Database
2464--
2465--  Definitions for a chemical graph
2466--
2467--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
2468--
2469--  National Center for Biotechnology Information
2470--  National Institutes of Health
2471--  Bethesda, MD 20894 USA
2472--
2473--  July, 1995
2474--
2475--**********************************************************************
2476
2477MMDB-Chemical-graph DEFINITIONS ::=
2478
2479BEGIN
2480
2481EXPORTS Biostruc-graph, Biomol-descr, Residue-graph,
2482	Molecule-id, Residue-id, Atom-id;
2483
2484IMPORTS Pub FROM NCBI-Pub
2485	BioSource FROM NCBI-BioSource
2486	Seq-id FROM NCBI-Seqloc
2487	Biostruc-id FROM MMDB;
2488
2489-- A biostruc graph contains the complete chemical graph of the biomolecular
2490-- assembly.  The assembly graph is defined hierarchically, in terms of
2491-- subgraphs graphs of component molecules.  For PDB-derived biostrucs,
2492-- the molecules forming the assembly are the individual biopolymer chains and
2493-- any non-polymer or "heterogen" groups which are present.
2494
2495-- The PDB-derived  "compound name" field appears as the name within the
2496-- biostruc-graph description.  PDB "class" and "source" fields appear as
2497-- explicit attributes.  PDB-derived structures are assigned an assembly type
2498-- of "other" unless they have been further classified as the "physiological
2499-- form" or "crystallographic cell" contents.  If they have, the source of the
2500-- type classification appears as a citation within the  assembly description.
2501
2502-- Note that the biostruc-graph also includes as literals the subgraphs of
2503-- any nonstandard residues present within it. For PDB-derived biostrucs these
2504-- subgraphs are constructed automatically, with validation as described below.
2505
2506Biostruc-graph ::= SEQUENCE {
2507	descr			SEQUENCE OF Biomol-descr OPTIONAL,
2508	molecule-graphs		SEQUENCE OF Molecule-graph,
2509	inter-molecule-bonds	SEQUENCE OF Inter-residue-bond OPTIONAL,
2510	residue-graphs		SEQUENCE OF Residue-graph OPTIONAL }
2511
2512-- A biomolecule description refers to the chemical structure of a molecule or
2513-- component substructures.  This descriptor type is used at the level of
2514-- assemblies, molecules and residues, and also for residue-graph dictionaries.
2515-- The BioSource object type is drawn from NCBI taxonomy data specifications,
2516-- and is not repeated here.
2517
2518Biomol-descr ::= CHOICE {
2519	name			VisibleString,
2520	pdb-class		VisibleString,
2521	pdb-source		VisibleString,
2522	pdb-comment		VisibleString,
2523	other-comment		VisibleString,
2524	organism		BioSource,
2525	attribution		Pub,
2526	assembly-type		INTEGER {	physiological-form(1),
2527						crystallographic-cell(2),
2528						other(255) },
2529	molecule-type		INTEGER {	dna(1),
2530						rna(2),
2531						protein(3),
2532						other-biopolymer(4),
2533						solvent(5),
2534						other-nonpolymer(6),
2535						other(255) } }
2536
2537-- A molecule chemical graph is defined by a sequence of residues.  Nonpolymers
2538-- are described in the same way, but may contain only a single residue.
2539
2540-- Biopolymer molecules are identified within PDB entries according to their
2541-- appearance on SEQRES records, which formally define a biopolymer as such.
2542-- Biopolymers are defined by the distinction between ATOM and HETATM
2543-- coordinate records only in cases where the chemical sequence from SEQRES
2544-- is in conflict with coordinate data. The PDB-assigned chain code appears as
2545-- the name within the molecule descriptions of the biopolymers.
2546
2547-- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups,
2548-- excluding any HETEROGEN groups which represent modified biopolymer residues.
2549-- These molecules are named according to the chain, residue type and residue
2550-- number fields as assigned by PDB. Any description appearing in the PDB HET
2551-- record appears as a pdb-comment within the molecule description.
2552
2553-- Molecule types for PDB-derived molecule graphs are assigned by matching
2554-- residue and atom names against the PDB-documented standard types for protein,
2555-- DNA and RNA, and against residue codes commonly used to indicate solvent.
2556-- Classification is by "majority rule". If more than half of the residues in
2557-- a biopolymer are standard groups of one type, then the molecule is of that
2558-- type, and otherwise classified as "other". Note that this classification does
2559-- not preclude the presence of modified residues, but insists they constitute
2560-- less than half the biopolymer. Non-polymers are classified only as "solvent"
2561-- or "other".
2562
2563-- Note that a molecule graph may also contain a set of cross references
2564-- to biopolymer sequence databases.  All biopolymer molecules in MMDB contain
2565-- appropriate identifiers for the corresponding entry in the NCBI-Sequences
2566-- database, in particular the NCBI "gi" number, which may be used for sequence
2567-- retrieval. The Seq-id object type is defined in the NCBI molecular sequence
2568-- specification, and not repeated here.
2569
2570Molecule-graph ::= SEQUENCE {
2571	id			Molecule-id,
2572	descr			SEQUENCE OF Biomol-descr OPTIONAL,
2573	seq-id			Seq-id OPTIONAL,
2574	residue-sequence	SEQUENCE OF Residue,
2575	inter-residue-bonds	SEQUENCE OF Inter-residue-bond OPTIONAL }
2576
2577Molecule-id ::= INTEGER
2578
2579-- Residues may be assigned a text-string name as well as an id number. PDB
2580-- assigned residue numbers appear as the residue name.
2581
2582Residue ::= SEQUENCE {
2583	id			Residue-id,
2584	name			VisibleString OPTIONAL,
2585	residue-graph		Residue-graph-pntr }
2586
2587Residue-id ::= INTEGER
2588
2589
2590-- Residue graphs from different sources may be referenced within a molecule
2591-- graph.  The allowed choices are the nonstandard residue graphs included in
2592-- the present biostruc, residue graphs within other biostrucs, or residue
2593-- graphs within tables of standard residue definitions.
2594
2595Residue-graph-pntr ::= CHOICE {
2596	local			Residue-graph-id,
2597	biostruc		Biostruc-graph-pntr,
2598	standard		Biostruc-residue-graph-set-pntr }
2599
2600Biostruc-graph-pntr ::= SEQUENCE {
2601	biostruc-id		Biostruc-id,
2602	residue-graph-id	Residue-graph-id }
2603
2604Biostruc-residue-graph-set-pntr ::= SEQUENCE {
2605	biostruc-residue-graph-set-id	Biostruc-id,
2606	residue-graph-id		Residue-graph-id }
2607
2608
2609-- Residue graphs define atomic formulae, connectivity, chirality, and names.
2610-- For standard residue graphs from the MMDB dictionary the PDB-assigned
2611-- residue-type code appears as the name within the residue graph description,
2612-- and the full trivial name of the residue as a comment within that
2613-- description.  For any nonstandard residue graphs provided with an MMDB
2614-- biostruc the PDB-assigned residue-type code similarly appears as the name
2615-- within the description, and any information provided on PDB HET records as
2616-- a pdb-comment within that description.
2617
2618-- Note that nonstandard residue graphs for a PDB-derived biostruc may be
2619-- incomplete. Current PDB format cannot represent connectivity for groups
2620-- which are disordered, and for which no coordinates are given.  In these
2621-- cases the residue graph defined in MMDB represents only the subgraph that
2622-- could be identified from available ATOM, HETATM and CONECT records.
2623
2624Residue-graph ::= SEQUENCE {
2625	id			Residue-graph-id,
2626	descr			SEQUENCE OF Biomol-descr OPTIONAL,
2627	residue-type		INTEGER {	deoxyribonucleotide(1),
2628						ribonucleotide(2),
2629						amino-acid(3),
2630						other(255) } OPTIONAL,
2631	iupac-code		SEQUENCE OF VisibleString OPTIONAL,
2632	atoms			SEQUENCE OF Atom,
2633	bonds			SEQUENCE OF Intra-residue-bond,
2634	chiral-centers		SEQUENCE OF Chiral-center OPTIONAL }
2635
2636Residue-graph-id ::= INTEGER
2637
2638-- Atoms in residue graphs are defined by elemental symbols and names.  PDB-
2639-- assigned atom names appear here in the name field, except in cases of known
2640-- PDB synonyms.  In these cases atom names are mapped to the names used in the
2641-- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where
2642-- PDB practice allows synonyms for several atom types.  For PDB atoms the
2643-- elemental symbol is obtained by parsing the PDB atom name field, allowing
2644-- for known special-semantics cases where the atom name does not follow the
2645-- documented encoding rule.  Ionizable protons are identified within standard
2646-- residue graphs in the MMDB dictionary, but not within automatically-defined
2647-- nonstandard graphs.
2648
2649Atom ::= SEQUENCE {
2650	id			Atom-id,
2651	name			VisibleString OPTIONAL,
2652	iupac-code		SEQUENCE OF VisibleString OPTIONAL,
2653	element			ENUMERATED {
2654				h(1),   he(2),  li(3),  be(4),  b(5),
2655				c(6),   n(7),   o(8),   f(9),   ne(10),
2656				na(11), mg(12), al(13), si(14), p(15),
2657				s(16),  cl(17), ar(18), k(19),  ca(20),
2658				sc(21), ti(22), v(23),  cr(24), mn(25),
2659				fe(26), co(27), ni(28), cu(29), zn(30),
2660				ga(31), ge(32), as(33), se(34), br(35),
2661				kr(36), rb(37), sr(38), y(39),  zr(40),
2662				nb(41), mo(42), tc(43), ru(44), rh(45),
2663				pd(46), ag(47), cd(48), in(49), sn(50),
2664				sb(51), te(52), i(53),  xe(54), cs(55),
2665				ba(56), la(57), ce(58), pr(59), nd(60),
2666				pm(61), sm(62), eu(63), gd(64), tb(65),
2667				dy(66), ho(67), er(68), tm(69), yb(70),
2668				lu(71), hf(72), ta(73), w(74),  re(75),
2669				os(76), ir(77), pt(78), au(79), hg(80),
2670				tl(81), pb(82), bi(83), po(84), at(85),
2671				rn(86), fr(87), ra(88), ac(89), th(90),
2672				pa(91), u(92),  np(93), pu(94), am(95),
2673				cm(96), bk(97), cf(98), es(99),
2674				fm(100), md(101), no(102), lr(103),
2675				other(254), unknown(255) },
2676	ionizable-proton	ENUMERATED {
2677					true(1),
2678					false(2),
2679					unknown(255) } OPTIONAL }
2680
2681Atom-id ::= INTEGER
2682
2683-- Intra-residue-bond specifies connectivity between atoms in Residue-graph.
2684-- Unlike Inter-residue-bond defined later, its participating atoms are part of
2685-- a residue subgraph dictionary, not part of a specific biostruc-graph.
2686
2687-- For residue graphs in the standard MMDB dictionary bonds are defined from
2688-- the known chemical structures of amino acids and nucleotides.  For
2689-- nonstandard residue graphs bonds are defined from PDB CONECT records, with
2690-- validation for consistency with coordinate data, and from stereochemical
2691-- calculation to identify unreported bonds.  Validation and bond identification
2692-- are based on comparison of inter-atomic distances to the sum of covalent
2693-- radii for the corresponding elements.
2694
2695Intra-residue-bond ::= SEQUENCE {
2696	atom-id-1		Atom-id,
2697	atom-id-2		Atom-id,
2698	bond-order		INTEGER {
2699					single(1),
2700					partial-double(2),
2701					aromatic(3),
2702					double(4),
2703					triple(5),
2704					other(6),
2705					unknown(255)} OPTIONAL }
2706
2707-- Chiral centers are atoms with tetrahedral geometry.  Chirality is defined
2708-- by a chiral volume involving the chiral center and 3 other atoms bonded to
2709-- it.  For any coordinates assigned to atoms c, n1, n2, and n3, the vector
2710-- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated
2711-- sign.  The calculation assumes an orthogonal right-handed coordinate system
2712-- as is used for MMDB model structures.
2713
2714-- Chirality is defined for standard residues in the MMDB dictionary, but is
2715-- not assigned automatically for PDB-derived nonstandard residues. If assigned
2716-- for nonstandard residues, the source of chirality information is described
2717-- by a citation within the residue description.
2718
2719Chiral-center ::= SEQUENCE {
2720	c			Atom-id,
2721	n1			Atom-id,
2722	n2			Atom-id,
2723	n3			Atom-id,
2724	sign			ENUMERATED { positive(1),
2725					     negative(2) } }
2726
2727-- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived
2728-- structures bonds are identified from biopolymer connectivity according to
2729-- SEQRES and from other connectivity information on SSBOND and CONECT
2730-- records. These data are validated and unreported bonds identified by
2731-- stereochemical calculation, using the same criteria as for intra-residue
2732-- bonds.
2733
2734Inter-residue-bond ::= SEQUENCE {
2735	atom-id-1		Atom-pntr,
2736	atom-id-2		Atom-pntr,
2737	bond-order		INTEGER {
2738					single(1),
2739					partial-double(2),
2740					aromatic(3),
2741					double(4),
2742					triple(5),
2743					other(6),
2744					unknown(255)} OPTIONAL }
2745
2746-- Atoms, residues and molecules within the current biostruc are referenced
2747-- by hierarchical pointers.
2748
2749Atom-pntr ::= SEQUENCE {
2750	molecule-id		Molecule-id,
2751	residue-id		Residue-id,
2752	atom-id			Atom-id }
2753
2754Atom-pntr-set ::= SEQUENCE OF Atom-pntr
2755
2756END
2757--$Revision: 97143 $
2758--**********************************************************************
2759--
2760--  Biological Macromolecule 3-D Structure Data Types for MMDB,
2761--                A Molecular Modeling Database
2762--
2763--  Definitions for structural models
2764--
2765--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
2766--
2767--  National Center for Biotechnology Information
2768--  National Institutes of Health
2769--  Bethesda, MD 20894 USA
2770--
2771--  July, 1996
2772--
2773--**********************************************************************
2774
2775MMDB-Structural-model DEFINITIONS ::=
2776
2777BEGIN
2778
2779EXPORTS Biostruc-model, Model-id, Model-coordinate-set-id;
2780
2781IMPORTS Chem-graph-pntrs, Atom-pntrs, Chem-graph-alignment,
2782	Sphere, Cone, Cylinder, Brick, Transform FROM MMDB-Features
2783	Biostruc-id FROM MMDB
2784	Pub FROM NCBI-Pub;
2785
2786-- A structural model maps chemical components into a measured three-
2787-- dimensional space. PDB-derived biostrucs generally contain 4 models,
2788-- corresponding to "views" of the structure of a biomolecular assemble with
2789-- increasing levels of complexity.  Model types indicate the complexity of the
2790-- view.
2791
2792-- The model named "NCBI all atom" represents a view suitable for most
2793-- computational biology applications.  It provides complete atomic coordinate
2794-- data for a "single best" model, omitting statistical disorder information
2795-- and/or ensemble structure descriptions provided in the source PDB file.
2796-- Construction of the single best model is based on the assumption that the
2797-- contents of the "alternate conformation" field from pdb imply no correlation
2798-- among the occupancies of multiple sites assigned to sets of atoms: the best
2799-- site is chosen only on the basis of highest occupancy. Note, however, that
2800-- alternate conformation sets where correlation is implied are generally
2801-- constrained in crystallographic refinement to have uniform occupancy, and
2802-- will thus be selected as a set. For ensemble models the model which assigns
2803-- coordinates to the most atoms is chosen.  If numbers of coordinates are the
2804-- same, the model occurring first in the PDB file is selected.  The single
2805-- best model includes complete coordinates for all nonpolymer components, but
2806-- omits those classified as "solvent".  Model type is 3 for this model.
2807
2808-- The model named "NCBI backbone" represents a simple view intended for
2809-- graphic displays and rapid transmission over a network.  It includes only
2810-- alpha carbon or backbone phosphate coordinates for biopolymers. It is based
2811-- on selection of alpha-carbon and backbone phosphate atoms from the "NCBI
2812-- all atom" model. The model type is set to 2.  An even simpler model gives
2813-- only a cartoon representation, using cylinders corresponding to secondary
2814-- structure elements.  This is named "NCBI vector", and has model type 1.
2815
2816-- The models named "PDB Model 1", "PDB Model 2", etc. represent the complete
2817-- information provided by PDB, including full descriptions of statistical
2818-- disorder.  The name of the model is based on the contents of the PDB MODEL
2819-- record, with a default name of "PDB Model 1" for PDB files which contain
2820-- only a single model.  Construction of these models is based on the
2821-- assumption that contents of the PDB "alternate conformation" field are
2822-- intended to imply correlation among the occupancies of atom sets flagged by
2823-- the same identifier.  The special flag " " (blank) is assumed to indicate
2824-- sites occupied in all alternate conformations, and sites flagged otherwise,
2825-- together with " ", to indicate a distinct member of an ensemble of
2826-- alternate conformations.  Note that construction of ensemble members
2827-- according to these assumption requires two validation checks on PDB
2828-- "alternate conformation" flags: they must be unique among sites assigned to
2829-- the same atom, and that the special " " flag must occur only for unique
2830-- sites.  Sites which violate the first check are flagged as "u", for
2831-- "unknown"; they are omitted from all ensemble definitions but are
2832-- nontheless retained in the coordinate list.  Sites which violate the second
2833-- check are flagged "b" for "blank", and are included in an appropriately
2834-- named ensemble.  The model type for pdb all models is 4.
2835
2836-- Note that in the MMDB database models are stored in the ASN.1 stream in
2837-- order of increasing model type value.  Since models occur as the last item
2838-- in a biostruc, parsers may avoid reading the entire stream if the desired
2839-- model is one of the simplified types, which occur first in the stream. This
2840-- can save considerable I/O time, particularly for large ensemble models from
2841-- NMR determinations.
2842
2843Biostruc-model ::= SEQUENCE {
2844	id			Model-id,
2845	type			Model-type,
2846	descr			SEQUENCE OF Model-descr OPTIONAL,
2847	model-space		Model-space OPTIONAL,
2848	model-coordinates	SEQUENCE OF Model-coordinate-set OPTIONAL }
2849
2850Model-id ::= INTEGER
2851
2852Model-type ::= INTEGER {
2853	ncbi-vector(1),
2854	ncbi-backbone(2),
2855	ncbi-all-atom(3),
2856	pdb-model(4),
2857	other(255)}
2858
2859Model-descr ::= CHOICE {
2860	name			VisibleString,
2861	pdb-reso                VisibleString,
2862	pdb-method              VisibleString,
2863	pdb-comment		VisibleString,
2864	other-comment		VisibleString,
2865	attribution		Pub }
2866
2867-- The model space defines measurement units and any external reference frame.
2868-- Coordinates refer to a right-handed orthogonal system defined on axes
2869-- tagged x, y and z in the coordinate and feature definitions of a biostruc.
2870-- Coordinates from PDB-derived structures are reported without change, in
2871-- angstrom units.  The units of temperature and occupancy factors are not
2872-- defined explicitly in PDB, but are inferred from their value range.
2873
2874Model-space ::= SEQUENCE {
2875	coordinate-units	ENUMERATED {
2876					angstroms(1),
2877					nanometers(2),
2878					other(3),
2879					unknown(255)},
2880	thermal-factor-units	ENUMERATED {
2881					b(1),
2882					u(2),
2883					other(3),
2884					unknown(255)} OPTIONAL,
2885	occupancy-factor-units	ENUMERATED {
2886					fractional(1),
2887					electrons(2),
2888					other(3),
2889					unknown(255)} OPTIONAL,
2890	density-units		ENUMERATED {
2891					electrons-per-unit-volume(1),
2892					arbitrary-scale(2),
2893					other(3),
2894					unknown(255)} OPTIONAL,
2895	reference-frame		Reference-frame OPTIONAL }
2896
2897-- An external reference frame is a pointer to another biostruc, with an
2898-- optional operator to rotate and translate coordinates into its model space.
2899-- This item is intended for representation of homology-derived model
2900-- structures, and is not present for structures from PDB.
2901
2902Reference-frame ::= SEQUENCE {
2903	biostruc-id		Biostruc-id,
2904	rotation-translation	Transform OPTIONAL }
2905
2906-- Atomic coordinates may be assigned literally or by reference to another
2907-- biostruc.  The reference coordinate type is used to represent homology-
2908-- derived model structures.  PDB-derived structures have literal coordinates.
2909
2910-- Referenced coordinates identify another biostruc, any transformation to be
2911-- applied to coordinates from that biostruc, and a mapping of the chemical
2912-- graph of the present biostruc onto that of the referenced biostruc.  They
2913-- give an "alignment" of atoms in the current biostruc with those in another,
2914-- from which the coordinates of matched atoms may be retrieved.  For non-
2915-- atomic models "alignment" may also be represented by molecule and residue
2916-- equivalence lists.  Referenced coordinates are a data item inteded for
2917-- representation of homology models, with an explicit pointer to their source
2918-- information. They do not occur in PDB-derived models.
2919
2920Model-coordinate-set ::= SEQUENCE {
2921	id			Model-coordinate-set-id OPTIONAL,
2922	descr			SEQUENCE OF Model-descr OPTIONAL,
2923	coordinates		CHOICE {
2924		literal			Coordinates,
2925		reference		Chem-graph-alignment } }
2926
2927Model-coordinate-set-id ::= INTEGER
2928
2929
2930-- Literal coordinates map chemical components into the model space.  Three
2931-- mapping types are allowed, atomic coordinate models, density-grid models,
2932-- and surface models. A model consists of a sequence of such coordinate sets,
2933-- and may thus combine coordinate subsets which have a different source.
2934-- PDB-derived models contain a single atomic coordinate set, as they by
2935-- definition represent information from a single source.
2936
2937Coordinates ::= CHOICE {
2938	atomic			Atomic-coordinates,
2939	surface			Surface-coordinates,
2940	density			Density-coordinates }
2941
2942-- Literal atomic coordinate values give location, occupancy and order
2943-- parameters, and a pointer to a specific atom defined in the biostruc graph.
2944-- Temperature and occupancy factors have their conventional crystallographic
2945-- definitions, with units defined in the model space declaration.  Atoms,
2946-- sites, temperature-factors, occupancies and alternate-conformation-ids
2947-- are parallel arrays, i.e. the have the same number of values as given by
2948-- number-of-points. Conformation ensembles represent distinct correlated-
2949-- disorder subsets of the coordinates.  They will be present only for certain
2950-- "views" of PDB structures, as described above. Their derivation from PDB-
2951-- supplied "alternate-conformation" ids is described below.
2952
2953Atomic-coordinates ::= SEQUENCE {
2954	number-of-points	INTEGER,
2955	atoms			Atom-pntrs,
2956	sites			Model-space-points,
2957	temperature-factors	Atomic-temperature-factors OPTIONAL,
2958	occupancies		Atomic-occupancies OPTIONAL,
2959	alternate-conf-ids	Alternate-conformation-ids OPTIONAL,
2960	conf-ensembles		SEQUENCE OF Conformation-ensemble OPTIONAL }
2961
2962-- The atoms whose location is described by each coordinate are identified
2963-- via a hierarchical pointer to the chemical graph of the biomolecular
2964-- assembly.  Coordinates may be matched with atoms in the chemical structure
2965-- by the values of the molecule, residue and atom id's given here,  which
2966-- match exactly the items of the same type defined in Biostruc-graph.
2967
2968-- Coordinates are given as integer values, with a scale factor to convert
2969-- to real values for each x, y or z, in the units indicated in model-space.
2970-- Integer values must be divided by the the scale factor.  This use of integer
2971-- values reduces the ASN.1 stream size. The scale factors for temperature
2972-- factors and occupancies are given separately, but must be used in the same
2973-- fashion to produce properly scaled real values.
2974
2975Model-space-points ::= SEQUENCE {
2976	scale-factor		INTEGER,
2977	x			SEQUENCE OF INTEGER,
2978	y			SEQUENCE OF INTEGER,
2979	z			SEQUENCE OF INTEGER }
2980
2981Atomic-temperature-factors ::= CHOICE {
2982	isotropic		Isotropic-temperature-factors,
2983	anisotropic		Anisotropic-temperature-factors }
2984
2985Isotropic-temperature-factors ::= SEQUENCE {
2986	scale-factor		INTEGER,
2987	b			SEQUENCE OF INTEGER }
2988
2989Anisotropic-temperature-factors ::= SEQUENCE {
2990	scale-factor		INTEGER,
2991	b-11			SEQUENCE OF INTEGER,
2992	b-12			SEQUENCE OF INTEGER,
2993	b-13			SEQUENCE OF INTEGER,
2994	b-22			SEQUENCE OF INTEGER,
2995	b-23			SEQUENCE OF INTEGER,
2996	b-33			SEQUENCE OF INTEGER }
2997
2998Atomic-occupancies ::= SEQUENCE {
2999	scale-factor		INTEGER,
3000	o			SEQUENCE OF INTEGER }
3001
3002-- An alternate conformation id is optionally associated with each coordinate.
3003-- Aside from corrections due to the validation checks described above, the
3004-- contents of MMDB Alternate-conformation-ids are identical to the PDB
3005-- "alternate conformation" field.
3006
3007Alternate-conformation-ids ::= SEQUENCE OF Alternate-conformation-id
3008
3009Alternate-conformation-id ::= VisibleString
3010
3011-- Correlated disorder ensemble is defined by a set of alternate conformation
3012-- id's which identify coordinates relevant to that ensemble. These are
3013-- defined from the validated and corrected contents of the PDB "alternate
3014-- conformation" field as described above.  A given ensemble, for example, may
3015-- consist of atom sites flagged by " " and "A" Alternate-conformation-ids.
3016-- Names for ensembles are constructed from these flags. This example would be
3017-- named, in its description, "PDB Ensemble blank plus A".
3018
3019-- Note that this interpretation is consistent with common PDB usage of the
3020-- "alternate conformation" field, but that PDB specifications do not formally
3021-- distinguish between correlated and uncorrelated disorder in crystallographic
3022-- models. Ensembles identified in MMDB thus may not correspond to the meaning
3023-- intended by PDB or the depositor.  No information is lost, however, and
3024-- if the intended meaning is known alternative ensemble descriptions may be
3025-- reconstructed directly from the Alternate-conformation-ids.
3026
3027-- Note that correlated disorder as defined here is allowed within an atomic
3028-- coordinate set but not between the multiple sets which may define a model.
3029-- Multiple sets within the same model are intended as a means to represent
3030-- assemblies modeled from different experimentally determined structures,
3031-- where correlated disorder between coordinate sets is not relevant.
3032
3033Conformation-ensemble ::= SEQUENCE {
3034	name		VisibleString,
3035	alt-conf-ids	SEQUENCE OF Alternate-conformation-id }
3036
3037
3038-- Literal surface coordinates define the chemical components whose structure
3039-- is described by a surface, and the surface itself.  The surface may be
3040-- either a regular geometric solid or a triangle-mesh of arbitrary shape.
3041
3042Surface-coordinates ::= SEQUENCE {
3043	contents		Chem-graph-pntrs,
3044	surface			CHOICE {	sphere		Sphere,
3045						cone		Cone,
3046						cylinder	Cylinder,
3047						brick		Brick,
3048						tmesh		T-mesh,
3049						triangles	Triangles } }
3050T-mesh ::= SEQUENCE {
3051	number-of-points	INTEGER,
3052	scale-factor		INTEGER,
3053	swap			SEQUENCE OF BOOLEAN,
3054	x			SEQUENCE OF INTEGER,
3055	y			SEQUENCE OF INTEGER,
3056	z		        SEQUENCE OF INTEGER }
3057
3058Triangles ::= SEQUENCE {
3059	number-of-points	INTEGER,
3060	scale-factor		INTEGER,
3061	x			SEQUENCE OF INTEGER,
3062	y			SEQUENCE OF INTEGER,
3063	z			SEQUENCE OF INTEGER,
3064	number-of-triangles     INTEGER,
3065	v1			SEQUENCE OF INTEGER,
3066	v2			SEQUENCE OF INTEGER,
3067	v3			SEQUENCE OF INTEGER }
3068
3069
3070-- Literal density coordinates define the chemical components whose structure
3071-- is described by a density grid, parameters of this grid, and density values.
3072
3073Density-coordinates ::= SEQUENCE {
3074	contents		Chem-graph-pntrs,
3075	grid-corners		Brick,
3076	grid-steps-x		INTEGER,
3077	grid-steps-y		INTEGER,
3078	grid-steps-z		INTEGER,
3079	fastest-varying		ENUMERATED {
3080					x(1),
3081					y(2),
3082					z(3)},
3083	slowest-varying		ENUMERATED {
3084					x(1),
3085					y(2),
3086					z(3)},
3087	scale-factor		INTEGER,
3088	density			SEQUENCE OF INTEGER }
3089
3090
3091END
3092--$Revision: 97143 $
3093--**********************************************************************
3094--
3095--  Biological Macromolecule 3-D Structure Data Types for MMDB,
3096--                A Molecular Modeling Database
3097--
3098--  Definitions for structural features and biostruc addressing
3099--
3100--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
3101--
3102--  National Center for Biotechnology Information
3103--  National Institutes of Health
3104--  Bethesda, MD 20894 USA
3105--
3106--  July, 1996
3107--
3108--**********************************************************************
3109
3110MMDB-Features DEFINITIONS ::=
3111
3112BEGIN
3113
3114EXPORTS Biostruc-feature-set, Chem-graph-pntrs, Atom-pntrs,
3115	Chem-graph-alignment, Sphere, Cone, Cylinder, Brick, Transform,
3116	Biostruc-feature-set-id, Biostruc-feature-id;
3117
3118IMPORTS Biostruc-id FROM MMDB
3119	Molecule-id, Residue-id, Atom-id FROM MMDB-Chemical-graph
3120	Model-id, Model-coordinate-set-id FROM MMDB-Structural-model
3121	User-object FROM NCBI-General
3122	Pub FROM NCBI-Pub;
3123
3124-- Named model features refer to sets of residues or atoms, or a region in
3125-- the model space.  A few specific feature types are allowed for compatibility
3126-- with PDB usage, but the purpose of a named model feature is simply to
3127-- associate various types of information with a set of atoms or
3128-- residues, or a spatially-defined region of the model structure.  They also
3129-- support association of various properties with each residue or atom of a
3130-- set.
3131
3132-- PDB-derived secondary structure defines a single feature, represented as a
3133-- sequence of residue motifs, as are the contents of PDB SITE and
3134-- FTNOTE records.  NCBI-assigned core and secondary structure descriptions
3135-- are also represented as a sequence of residue motifs.
3136
3137Biostruc-feature-set ::= SEQUENCE {
3138	id		Biostruc-feature-set-id,
3139	descr		SEQUENCE OF Biostruc-feature-set-descr OPTIONAL,
3140	features	SEQUENCE OF Biostruc-feature }
3141
3142Biostruc-feature-set-id ::= INTEGER
3143
3144Biostruc-feature-set-descr ::= CHOICE {
3145	name			VisibleString,
3146	pdb-comment		VisibleString,
3147	other-comment		VisibleString,
3148	attribution		Pub }
3149
3150-- An explicitly specified type in Biostruc-feature allows for
3151-- efficient extraction and indexing of feature sets of a specific type.
3152-- Special types are provided for coloring and rendering, as
3153-- as needed by molecular graphics programs.
3154
3155Biostruc-feature ::= SEQUENCE {
3156	id		Biostruc-feature-id OPTIONAL,
3157	name		VisibleString OPTIONAL,
3158	type	INTEGER {	helix(1),
3159				strand(2),
3160				sheet(3),
3161				turn(4),
3162				site(5),
3163				footnote(6),
3164				comment(7),      -- new
3165				subgraph(100),   -- NCBI domain reserved
3166				region(101),
3167				core(102),       -- user core definition
3168				supercore(103),  -- NCBI reserved
3169				color(150),      -- new
3170				render(151),     -- new
3171				label(152),      -- new
3172				transform(153),  -- new
3173				camera(154),     -- new
3174				script(155),      -- for scripts
3175				alignment(200),  -- VAST reserved
3176				similarity(201),
3177				multalign(202),  -- multiple alignment
3178                                indirect(203),   -- new
3179				cn3dstate(254),  -- Cn3D reserved
3180				other(255) } OPTIONAL,
3181	property	CHOICE {
3182				color		Color-prop,
3183				render		Render-prop,
3184				transform	Transform,
3185				camera		Camera,
3186				script		Biostruc-script,
3187				user		User-object } OPTIONAL,
3188	location	CHOICE {
3189				subgraph	Chem-graph-pntrs,
3190				region		Region-pntrs,
3191				alignment	Chem-graph-alignment,
3192				similarity	Region-similarity,
3193				indirect	Other-feature } OPTIONAL } -- new
3194
3195-- Other-feature allows for specifying location via reference to another
3196-- Biostruc-feature and its location.
3197
3198Other-feature ::= SEQUENCE {
3199	biostruc-id		Biostruc-id,
3200	set			Biostruc-feature-set-id,
3201	feature			Biostruc-feature-id }
3202
3203Biostruc-feature-id ::= INTEGER
3204
3205-- Atom, residue or molecule motifs describe a substructure defined by a set
3206-- of nodes from the chemical graph. PDB secondary structure features are
3207-- described as a residue motif, since they are not associated with any one of
3208-- the multiple models that may be provided in a PDB file.  NCBI-assigned
3209-- secondary structure is represented in the same way, even though it is
3210-- model specific, since this allows for simple mapping of the structural
3211-- feature onto a sequence-only representation. This addressing mode may also
3212-- be used to describe features to be associated with particular atoms,
3213-- as, for example, the chemical shift observed in an NMR experiment.
3214
3215Chem-graph-pntrs ::= CHOICE {
3216	atoms			Atom-pntrs,
3217	residues		Residue-pntrs,
3218	molecules		Molecule-pntrs }
3219
3220Atom-pntrs ::= SEQUENCE {
3221	number-of-ptrs		INTEGER,
3222	molecule-ids		SEQUENCE OF Molecule-id,
3223	residue-ids		SEQUENCE OF Residue-id,
3224	atom-ids		SEQUENCE OF Atom-id }
3225
3226Residue-pntrs ::= CHOICE {
3227	explicit		Residue-explicit-pntrs,
3228	interval		SEQUENCE OF Residue-interval-pntr }
3229
3230Residue-explicit-pntrs ::= SEQUENCE {
3231	number-of-ptrs		INTEGER,
3232	molecule-ids		SEQUENCE OF Molecule-id,
3233	residue-ids		SEQUENCE OF Residue-id }
3234
3235Residue-interval-pntr ::= SEQUENCE {
3236	molecule-id		Molecule-id,
3237	from			Residue-id,
3238	to			Residue-id }
3239
3240Molecule-pntrs ::= SEQUENCE {
3241	number-of-ptrs		INTEGER,
3242	molecule-ids		SEQUENCE OF Molecule-id }
3243
3244-- Region motifs describe features defined by spatial location, such as the
3245-- site specified by a coordinate value, or a rgeion within a bounding volume.
3246
3247Region-pntrs ::= SEQUENCE {
3248	model-id	Model-id,
3249	region		CHOICE {
3250				site		SEQUENCE OF Region-coordinates,
3251				boundary	SEQUENCE OF Region-boundary } }
3252
3253-- Coordinate sites describe a region in space by reference to individual
3254-- coordinates, in a particular model.  These coordinates may be either the
3255-- x, y and z values of atomic coordinates, the triangles of a surface mesh,
3256-- or the grid points of a density model. All are addressed in the same manner,
3257-- as coordinate indices which give offsets from the beginning of the
3258-- coordinate data arrays.  A coordinate-index of 5, for example, refers to
3259-- the 5th x, y and z values of an atomic coordinate set, the 5th v1, v2, and v3
3260-- values of a triangle mesh, or the 5th value in a density grid.
3261
3262-- PDB SITE and FTNOTE records refer to particular atomic coordinates, and they
3263-- are represented as a region motif with addresses of type Region-coordinates.
3264-- Any names or descriptions provided by PDB are thus associated with the
3265-- indicated sites, in the indicated model.
3266
3267Region-coordinates ::= SEQUENCE {
3268	model-coord-set-id	Model-coordinate-set-id,
3269	number-of-coords	INTEGER OPTIONAL,
3270	coordinate-indices	SEQUENCE OF INTEGER OPTIONAL }
3271
3272-- Region boundaries are defined by regular solids located in the model space.
3273
3274Region-boundary ::=	CHOICE {	sphere		Sphere,
3275					cone		Cone,
3276					cylinder	Cylinder,
3277					brick		Brick }
3278
3279-- A biostruc alignment establishes an equivalence of nodes in the chemical
3280-- graphs of two or more biostrucs. This may be mapped to a sequence
3281-- alignment in the case of biopolymers.
3282-- The 'dimension' component indicates the number of participants
3283-- in the alignment.  For pairwise alignments, such as VAST
3284-- structure-structure alignments, the dimension will be always 2, with
3285-- biostruc-ids, alignment, and domain each containing two entries for an
3286-- aligned pair.  The 'alignment' component contains a pair of Chem-graph-pntrs
3287-- specifying a like number of corresponding residues in each structure.
3288-- The 'domain' component specifies a region of each structure considered
3289-- in the alignment.  Only one transform (for the second structure) and
3290-- one aligndata (for the pair) are provided for each VAST alignment.
3291--
3292-- For multiple alignments, a set of components are treated as
3293-- parallel arrays of length 'dimension'.
3294-- The 'transform' component moves each structure to align it with
3295-- the structure specified as the first element in the "parallel" array,
3296-- so necessarily the first transform is a NULL transform.
3297-- Align-stats are placeholders for scores.
3298
3299Chem-graph-alignment ::= SEQUENCE {
3300	dimension		INTEGER DEFAULT 2,
3301	biostruc-ids		SEQUENCE OF Biostruc-id,
3302	alignment		SEQUENCE OF Chem-graph-pntrs,
3303	domain			SEQUENCE OF Chem-graph-pntrs OPTIONAL,
3304	transform		SEQUENCE OF Transform OPTIONAL,
3305	aligndata		SEQUENCE OF Align-stats OPTIONAL }
3306
3307Align-stats ::= SEQUENCE {
3308	descr		VisibleString OPTIONAL,
3309	scale-factor	INTEGER OPTIONAL,
3310	vast-score	INTEGER OPTIONAL,
3311	vast-mlogp	INTEGER OPTIONAL,
3312	align-res	INTEGER OPTIONAL,
3313 	rmsd		INTEGER OPTIONAL,
3314	blast-score	INTEGER OPTIONAL,
3315	blast-mlogp	INTEGER OPTIONAL,
3316	other-score	INTEGER OPTIONAL }
3317
3318-- A biostruc similarity describes spatial features which are similar between
3319-- two or more biostrucs.  Similarities are model dependent, and the model and
3320-- coordinate set ids of the biostrucs must be specified.  They do not
3321-- necessarily map to a sequence alignment, as the regions referenced may
3322-- be pieces of a surface or grid, and thus not uniquely mapable to particular
3323-- chemical components.
3324
3325Region-similarity ::= SEQUENCE {
3326	dimension		INTEGER DEFAULT 2,
3327	biostruc-ids		SEQUENCE OF Biostruc-id,
3328	similarity		SEQUENCE OF Region-pntrs,
3329	transform		SEQUENCE OF Transform }
3330
3331-- Geometrical primitives are used in the definition of region motifs, and
3332-- also non-atomic coordinates.  Spheres, cones, cylinders and bricks are
3333-- defined by a few points in the model space.
3334
3335Sphere ::= SEQUENCE {
3336	center			Model-space-point,
3337	radius			RealValue }
3338
3339Cone ::= SEQUENCE {
3340	axis-top		Model-space-point,
3341	axis-bottom		Model-space-point,
3342	radius-bottom		RealValue }
3343
3344Cylinder ::= SEQUENCE {
3345	axis-top		Model-space-point,
3346	axis-bottom		Model-space-point,
3347	radius			RealValue }
3348
3349-- A brick is defined by the coordinates of eight corners.  These are assumed
3350-- to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the
3351-- digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube.
3352-- Opposite edges are assumed to be parallel.
3353
3354Brick ::= SEQUENCE {
3355	corner-000		Model-space-point,
3356	corner-001		Model-space-point,
3357	corner-010		Model-space-point,
3358	corner-011		Model-space-point,
3359	corner-100		Model-space-point,
3360	corner-101		Model-space-point,
3361	corner-110		Model-space-point,
3362	corner-111		Model-space-point }
3363
3364Model-space-point ::= SEQUENCE {
3365	scale-factor		INTEGER,
3366	x			INTEGER,
3367	y			INTEGER,
3368	z			INTEGER }
3369
3370RealValue ::= SEQUENCE {
3371	scale-factor		INTEGER,
3372	scaled-integer-value	INTEGER }
3373
3374
3375Transform ::=  SEQUENCE {
3376            id  INTEGER,
3377            moves SEQUENCE OF Move }
3378
3379Move ::= CHOICE {
3380	rotate		Rot-matrix,
3381	translate	Trans-matrix }
3382
3383-- A rotation matrix is defined by 9 numbers, given by row, i.e.,
3384-- with column indices varying fastest.
3385-- Coordinates, as a matrix with columns x, y, an z, are rotated
3386-- via multiplication with the rotation matrix.
3387-- A translation matrix is defined by 3 numbers, which is added to
3388-- the rotated coordinates for specified amount of translation.
3389
3390Rot-matrix ::= SEQUENCE {
3391	scale-factor		INTEGER,
3392	rot-11			INTEGER,
3393	rot-12			INTEGER,
3394	rot-13			INTEGER,
3395	rot-21			INTEGER,
3396	rot-22			INTEGER,
3397	rot-23			INTEGER,
3398	rot-31			INTEGER,
3399	rot-32			INTEGER,
3400	rot-33			INTEGER }
3401
3402Trans-matrix ::= SEQUENCE {
3403	scale-factor		INTEGER,
3404	tran-1			INTEGER,
3405	tran-2			INTEGER,
3406	tran-3			INTEGER }
3407
3408-- The camera is a position relative to the world coordinates
3409-- of the structure referred to by a location.
3410-- this is used to set the initial position of the
3411-- camera using OpenGL.  scale is the value used to scale the
3412-- other values from floating point to integer
3413
3414Camera ::= SEQUENCE {
3415	x		INTEGER,
3416	y		INTEGER,
3417	distance	INTEGER,
3418	angle		INTEGER,
3419	scale		INTEGER,
3420    modelview   GL-matrix }
3421
3422
3423GL-matrix ::= SEQUENCE {
3424	scale		INTEGER,
3425	m11			INTEGER,
3426	m12			INTEGER,
3427	m13			INTEGER,
3428	m14			INTEGER,
3429	m21			INTEGER,
3430	m22			INTEGER,
3431	m23			INTEGER,
3432	m24			INTEGER,
3433	m31			INTEGER,
3434	m32			INTEGER,
3435	m33			INTEGER,
3436	m34			INTEGER,
3437	m41			INTEGER,
3438	m42			INTEGER,
3439	m43			INTEGER,
3440	m44			INTEGER }
3441
3442
3443Color-prop ::= SEQUENCE {
3444	r		INTEGER OPTIONAL,
3445	g		INTEGER OPTIONAL,
3446	b		INTEGER OPTIONAL,
3447	name		VisibleString OPTIONAL }
3448
3449-- Note that Render-prop is compatible with the Annmm specification,
3450-- i.e., its numbering schemes do not clash with those in Render-prop.
3451
3452Render-prop ::= INTEGER {
3453	default		(0),  -- Default view
3454	wire		(1),  -- use wireframe
3455	space		(2),  -- use spacefill
3456	stick		(3),  -- use stick model (thin cylinders)
3457	ballNStick	(4),  -- use ball & stick model
3458	thickWire	(5),  -- thicker wireframe
3459	hide		(9),  -- don't show this
3460	name		(10), -- display its name next to it
3461	number 		(11), -- display its number next to it
3462	pdbNumber	(12), -- display its PDB number next to it
3463	objWireFrame	(150), -- display MMDB surface object as wireframe
3464	objPolygons	(151), -- display MMDB surface object as polygons
3465	colorsetCPK	(225), -- color atoms like CPK models
3466	colorsetbyChain	(226), -- color each chain different
3467	colorsetbyTemp	(227), -- color using isotropic Temp factors
3468	colorsetbyRes	(228), -- color using residue properties
3469	colorsetbyLen	(229), -- color changes along chain length
3470	colorsetbySStru	(230), -- color by secondary structure
3471	colorsetbyHydro (231), -- color by hydrophobicity
3472	colorsetbyObject(246), -- color each object differently
3473	colorsetbyDomain(247), -- color each domain differently
3474	other           (255)
3475	}
3476
3477--  When a Biostruc-Feature with a Biostruc-script is initiated,
3478--  it should play the specified steps one at a time, setting the feature-do
3479--  list as the active display.
3480--  The camera can be set using a feature-do,
3481--  but it may be moved independently with
3482--  camera-move, which specifies how to move
3483--  the camera dynamically during the step along the path defined (e.g.,
3484--  a zoom, a rotate).
3485--  Any value of pause (in 1:10th's of a second) will force a pause
3486--  after an image is shown.
3487--  If waitevent is TRUE, it will await a mouse or keypress and ignore
3488--  the pause value.
3489
3490Biostruc-script ::= SEQUENCE OF Biostruc-script-step
3491
3492Biostruc-script-step ::= SEQUENCE {
3493	step-id			Step-id,
3494	step-name		VisibleString OPTIONAL,
3495	feature-do		SEQUENCE OF Other-feature OPTIONAL,
3496	camera-move		Transform OPTIONAL,
3497	pause			INTEGER DEFAULT 10,
3498	waitevent		BOOLEAN,
3499	extra			INTEGER,
3500	jump			Step-id OPTIONAL }
3501
3502Step-id ::= INTEGER
3503
3504END
3505--$Revision: 97143 $
3506--**********************************************************************
3507--
3508--  Definitions for CDD's
3509--
3510--  NCBI Structure Group
3511--
3512--  National Center for Biotechnology Information
3513--  National Institutes of Health
3514--  Bethesda, MD 20894 USA
3515--
3516--  October 1999
3517--
3518--  asntool -m cdd.asn -w 100 -o cdd.h
3519--  asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h -M asn.all
3520--**********************************************************************
3521
3522NCBI-Cdd DEFINITIONS ::=
3523-- NCBI Conserved Domain Definition
3524
3525
3526BEGIN
3527
3528EXPORTS  Cdd-id, Cdd-id-set, Cdd, Cdd-set, Cdd-tree, Cdd-tree-set;
3529
3530IMPORTS  Date                 FROM NCBI-General
3531         Pub                  FROM NCBI-Pub
3532         Biostruc-annot-set   FROM MMDB
3533         Bioseq               FROM NCBI-Sequence
3534         Seq-annot            FROM NCBI-Sequence
3535         Seq-entry            FROM NCBI-Seqset
3536         Org-ref              FROM NCBI-Organism
3537         Seq-interval         FROM NCBI-Seqloc
3538         Score-set            FROM NCBI-Seqalign;
3539
3540-- Cdd's should not exist without a unique integer id, but alternative
3541-- id's may be present as well.
3542
3543Global-id ::= SEQUENCE {
3544             accession      VisibleString,
3545             release        VisibleString OPTIONAL,
3546             version        INTEGER       OPTIONAL, -- version 0 is the seed
3547             database       VisibleString OPTIONAL  -- this is NOT the source!
3548             }                                      -- rather the database the
3549                                                    -- object resides in
3550
3551Cdd-id ::= CHOICE {
3552             uid            INTEGER,
3553             gid            Global-id
3554             }
3555
3556Cdd-id-set ::= SEQUENCE OF Cdd-id
3557
3558-- The description of CDD's refers to the specific set of aligned sequences,
3559-- the region that is being aligned and the information contained in the
3560-- alignment. It may contain a lengthy comment
3561-- describing the function of the domain as well as its origin and all
3562-- other anecdotal information that can't be pressed into a rigid scheme.
3563-- Crosslinks to reference papers available in PubMed are possible as well.
3564-- There can be as many of these as you want in the CDD.
3565
3566Cdd-descr ::= CHOICE {
3567                othername   VisibleString, -- alternative names for the CDD
3568                category    VisibleString, -- intracellular, extracellular, etc.
3569                comment     VisibleString, -- this is where annotations go
3570                reference   Pub,           -- a citation
3571                create-date Date,          -- valid for the current version
3572                tax-source  Org-ref,       -- holds the highest common node
3573                source      VisibleString, -- the database the seeds were created
3574                                           -- from, e.g. SMART, PFAM, etc..
3575                status      INTEGER { unassigned(0),
3576                                      finished-ok(1),     -- to indicate
3577                                      pending-release(2), -- processing status
3578                                      other-asis(3),      -- or final type
3579                                      matrix-only(4),     --
3580                                      other(255) }        -- for CD production
3581              }
3582
3583Cdd-descr-set ::= SET OF Cdd-descr
3584
3585-- the Cdd-tree contains the hierarchy of CDDs.  This object is separate from
3586-- the Cdd's themselves to allow it to be retrieved separately and to
3587-- operate as an index.
3588
3589Cdd-tree ::= SEQUENCE {
3590            name          VisibleString,
3591            id            Cdd-id-set,
3592            description   Cdd-descr-set OPTIONAL,
3593            parents       Cdd-id-set OPTIONAL,
3594            children      Cdd-id-set OPTIONAL,
3595            siblings      Cdd-id-set OPTIONAL
3596            }
3597
3598Cdd-tree-set ::= SEQUENCE OF Cdd-tree
3599
3600-- Matrix definitions, these are supposed to store PSSMs and corresponding
3601-- matrices of relative residue frequencies.
3602-- the number of columns and rows is listed explicitly, values in columns
3603-- are stored column by column, i.e. in groups of nrows values for each column
3604
3605Matrix ::= SEQUENCE {
3606  ncolumns      INTEGER,
3607  nrows         INTEGER,
3608  row-labels    SEQUENCE OF VisibleString OPTIONAL,
3609  scale-factor  INTEGER,
3610  columns       SEQUENCE OF INTEGER
3611}
3612
3613-- definition for matrix of pairwise "distances", stored as the upper
3614-- triangle of a sqared n x n matrix (excluding the diagonal), this is
3615-- supposed to store pairwise percentages of identical residues, pairwise
3616-- alignment scores or E-values from pairwise BLAST sequence comparisons
3617
3618Triangle ::= SEQUENCE {
3619  nelements     INTEGER,
3620  scores        Score-set
3621}
3622
3623-- the Cdd is the basic ASN.1 object storing an annotated and curated
3624-- set of alignments (formulated as a set of pairwise master-slave
3625-- alignments).
3626-- The alignment data are contained in Seq-align-sets and Biostruc-feature-sets.
3627-- Version numbers in Global-ids are meant to be updated every time the Cdd is changed
3628-- in a way that does not require Global-ids to be changed (sequences added in update
3629-- cycle, annotation changed)
3630
3631Cdd ::= SEQUENCE {
3632  name          VisibleString,
3633  id            Cdd-id-set,
3634  description   Cdd-descr-set OPTIONAL,
3635  seqannot      SEQUENCE OF Seq-annot OPTIONAL, -- contains the alignment
3636  features      Biostruc-annot-set OPTIONAL, -- contains structure alignments
3637                                             -- or "core" definitions
3638  sequences     Seq-entry OPTIONAL,          -- store as bioseq-set inside seq-entry
3639  profile-range Seq-interval OPTIONAL,       -- profile for this region only
3640                                             -- also stores the Seq-id of the master
3641  trunc-master  Bioseq OPTIONAL,             -- holds the truncated master
3642                                             -- which may be something like a
3643                                             -- consensus, but still refers to the
3644                                             -- sequence coord. frame in profile-range
3645  posfreq       Matrix OPTIONAL,             -- relative residue frequencies
3646  scoremat      Matrix OPTIONAL,             -- Position dependent score matrix
3647  distance      Triangle OPTIONAL            -- pairwise distances for all seqs.
3648}
3649
3650Cdd-set ::= SET OF Cdd
3651
3652END
3653--$Revision: 97143 $
3654--****************************************************************
3655--
3656--  NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
3657--  by Jonathan Epstein, February 1996
3658--
3659--****************************************************************
3660
3661NCBI-Mime DEFINITIONS ::=
3662BEGIN
3663
3664EXPORTS Ncbi-mime-asn1;
3665IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
3666	Seq-entry FROM NCBI-Seqset
3667	Seq-annot FROM NCBI-Sequence
3668	Medline-entry FROM NCBI-Medline;
3669
3670Ncbi-mime-asn1 ::= CHOICE {
3671	entrez	Entrez-general,			-- just a structure
3672	alignstruc	Biostruc-align,			-- structures & sequences & alignments
3673	alignseq	Biostruc-align-seq,	-- sequence alignment
3674    strucseq    Biostruc-seq,	-- structure & sequences
3675    strucseqs   Biostruc-seqs	    -- structure & sequences & alignments
3676	-- others may be added here in the future
3677}
3678
3679Biostruc-align ::= SEQUENCE {
3680	master	Biostruc,
3681	slaves	SET OF Biostruc,
3682	alignments	Biostruc-annot-set,	-- structure alignments
3683	sequences SET OF Seq-entry,	-- sequences
3684	seqalign SET OF Seq-annot }
3685
3686Biostruc-align-seq ::= SEQUENCE {	-- display seq structure align only
3687	sequences SET OF Seq-entry,	-- sequences
3688	seqalign SET OF Seq-annot }
3689
3690Biostruc-seq ::= SEQUENCE {	-- display  structure seq added by yanli
3691    structure Biostruc,
3692	sequences SET OF Seq-entry }
3693
3694Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
3695    structure Biostruc,
3696	sequences SET OF Seq-entry,	-- sequences
3697	seqalign SET OF Seq-annot }
3698
3699Entrez-style ::= ENUMERATED {
3700	docsum (1),
3701	genbank (2) ,
3702	genpept (3) ,
3703	fasta (4) ,
3704	asn1 (5) ,
3705	graphic (6) ,
3706	alignment (7) ,
3707	globalview (8) ,
3708	report (9) ,
3709	medlars (10) ,
3710	embl (11) ,
3711	pdb (12) ,
3712	kinemage (13) }
3713
3714Entrez-general ::= SEQUENCE {
3715	title VisibleString OPTIONAL,
3716	data CHOICE {
3717		ml	Medline-entry ,
3718		prot	Seq-entry ,
3719		nuc	Seq-entry ,
3720		genome	Seq-entry ,
3721		structure Biostruc ,
3722		strucAnnot Biostruc-annot-set } ,
3723	style Entrez-style ,
3724	location VisibleString OPTIONAL }
3725END
3726--$Revision: 97143 $
3727--*********************************************************************
3728--
3729--  access.asn
3730--
3731--     messages for data access
3732--
3733--*********************************************************************
3734
3735NCBI-Access DEFINITIONS ::=
3736BEGIN
3737
3738EXPORTS Link-set;
3739
3740    -- links between same class = neighbors
3741    -- links between other classes = links
3742
3743Link-set ::= SEQUENCE {
3744    num INTEGER ,                         -- number of links to this doc type
3745    uids SEQUENCE OF INTEGER OPTIONAL ,     -- the links
3746    weights SEQUENCE OF INTEGER OPTIONAL }  -- the weights
3747
3748
3749END
3750--$Revision: 97143 $
3751--**********************************************************************
3752--
3753--  NCBI Sequence Feature Definition Module
3754--  by James Ostell, 1994
3755--
3756--**********************************************************************
3757
3758NCBI-FeatDef DEFINITIONS ::=
3759BEGIN
3760
3761EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
3762
3763
3764FeatDef ::= SEQUENCE {
3765    typelabel VisibleString ,	   -- short label for type eg "CDS"
3766    menulabel VisibleString ,	   -- label for a menu eg "Coding Region"
3767    featdef-key INTEGER ,		   -- unique for this feature definition
3768    seqfeat-key INTEGER ,		   -- SeqFeat.data.choice from objfeat.h
3769    entrygroup INTEGER ,		   -- Group for data entry
3770    displaygroup INTEGER ,		   -- Group for data display
3771    molgroup FeatMolType           -- Type of Molecule used for
3772}
3773
3774FeatMolType ::= ENUMERATED {
3775	aa (1),  -- proteins
3776    na (2),  -- nucleic acids
3777    both (3) }  -- both
3778
3779FeatDefSet ::= SEQUENCE OF FeatDef   -- collections of defintions
3780
3781FeatDispGroup ::= SEQUENCE {
3782	groupkey INTEGER ,
3783    groupname VisibleString }
3784
3785FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
3786
3787FeatDefGroupSet ::= SEQUENCE {
3788	groups FeatDispGroupSet ,
3789	defs FeatDefSet }
3790
3791END
3792
3793
3794--$Revision: 97143 $
3795--********************************************************************
3796--
3797--  Print Templates
3798--  James Ostell, 1993
3799--
3800--
3801--********************************************************************
3802
3803NCBI-ObjPrt DEFINITIONS ::=
3804BEGIN
3805
3806EXPORTS PrintTemplate, PrintTemplateSet;
3807
3808PrintTemplate ::= SEQUENCE {
3809    name TemplateName ,  -- name for this template
3810    labelfrom VisibleString OPTIONAL,    -- ASN.1 path to get label from
3811    format PrintFormat }
3812
3813TemplateName ::= VisibleString
3814
3815PrintTemplateSet ::= SEQUENCE OF PrintTemplate
3816
3817PrintFormat ::= SEQUENCE {
3818    asn1 VisibleString ,    -- ASN.1 partial path for this
3819    label VisibleString OPTIONAL ,   -- printable label
3820    prefix VisibleString OPTIONAL,
3821    suffix VisibleString OPTIONAL,
3822    form PrintForm }
3823
3824PrintForm ::=   CHOICE {      -- Forms for various ASN.1 components
3825    block PrintFormBlock,
3826    boolean PrintFormBoolean,
3827    enum PrintFormEnum,
3828    text PrintFormText,
3829    use-template TemplateName,
3830    user UserFormat ,
3831    null NULL }               -- rarely used
3832
3833UserFormat ::= SEQUENCE {
3834    printfunc VisibleString ,
3835    defaultfunc VisibleString OPTIONAL }
3836
3837PrintFormBlock ::= SEQUENCE {  -- for SEQUENCE, SET
3838    separator VisibleString OPTIONAL ,
3839    components SEQUENCE OF PrintFormat }
3840
3841PrintFormBoolean ::= SEQUENCE {
3842    true VisibleString OPTIONAL ,
3843    false VisibleString OPTIONAL }
3844
3845PrintFormEnum ::= SEQUENCE {
3846    values SEQUENCE OF VisibleString OPTIONAL }
3847
3848PrintFormText ::= SEQUENCE {
3849    textfunc VisibleString OPTIONAL }
3850
3851END
3852
3853--$Revision: 97143 $
3854--  *********************************************************************
3855--
3856--  These are code and conversion tables for NCBI sequence codes
3857--  ASN.1 for the sequences themselves are define in seq.asn
3858--
3859--  Seq-map-table and Seq-code-table REQUIRE that codes start with 0
3860--    and increase continuously.  So IUPAC codes, which are upper case
3861--    letters will always have 65 0 cells before the codes begin.  This
3862--    allows all codes to do indexed lookups for things
3863--
3864--  Valid names for code tables are:
3865--    IUPACna
3866--    IUPACaa
3867--    IUPACeaa
3868--    IUPACaa3     3 letter amino acid codes : parallels IUPACeaa
3869--                   display only, not a data exchange type
3870--    NCBI2na
3871--    NCBI4na
3872--    NCBI8na
3873--    NCBI8aa
3874--    NCBIstdaa
3875--     probability types map to IUPAC types for display as characters
3876
3877NCBI-SeqCode DEFINITIONS ::=
3878BEGIN
3879
3880EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
3881
3882Seq-code-type ::= ENUMERATED {              -- sequence representations
3883    iupacna (1) ,              -- IUPAC 1 letter nuc acid code
3884    iupacaa (2) ,              -- IUPAC 1 letter amino acid code
3885    ncbi2na (3) ,              -- 2 bit nucleic acid code
3886    ncbi4na (4) ,              -- 4 bit nucleic acid code
3887    ncbi8na (5) ,              -- 8 bit extended nucleic acid code
3888    ncbipna (6) ,              -- nucleic acid probabilities
3889    ncbi8aa (7) ,              -- 8 bit extended amino acid codes
3890    ncbieaa (8) ,              -- extended ASCII 1 letter aa codes
3891    ncbipaa (9) ,              -- amino acid probabilities
3892    iupacaa3 (10) ,            -- 3 letter code only for display
3893    ncbistdaa (11) }           -- consecutive codes for std aas, 0-25
3894
3895Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings
3896    from Seq-code-type ,      -- code to map from
3897    to Seq-code-type ,        -- code to map to
3898    num INTEGER ,             -- number of rows in table
3899    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
3900    table SEQUENCE OF INTEGER }  -- table of values, in from-to order
3901
3902Seq-code-table ::= SEQUENCE { -- for names of coded values
3903    code Seq-code-type ,      -- name of code
3904    num INTEGER ,             -- number of rows in table
3905    one-letter BOOLEAN ,   -- symbol is ALWAYS 1 letter?
3906    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
3907    table SEQUENCE OF
3908        SEQUENCE {
3909            symbol VisibleString ,      -- the printed symbol or letter
3910            name VisibleString } ,      -- an explanatory name or string
3911    comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
3912
3913Seq-code-set ::= SEQUENCE {    -- for distribution
3914    codes SET OF Seq-code-table OPTIONAL ,
3915    maps SET OF Seq-map-table OPTIONAL }
3916
3917END
3918
3919