1--$Revision: 97143 $ 2--********************************************************************** 3-- 4-- NCBI General Data elements 5-- by James Ostell, 1990 6-- Version 3.0 - June 1994 7-- 8--********************************************************************** 9 10NCBI-General DEFINITIONS ::= 11BEGIN 12 13EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object; 14 15-- StringStore is really a VisibleString. It is used to define very 16-- long strings which may need to be stored by the receiving program 17-- in special structures, such as a ByteStore, but it's just a hint. 18-- AsnTool stores StringStores in ByteStore structures. 19-- OCTET STRINGs are also stored in ByteStores by AsnTool 20-- 21-- typedef struct bsunit { /* for building multiline strings */ 22 -- Nlm_Handle str; /* the string piece */ 23 -- Nlm_Int2 len_avail, 24 -- len; 25 -- struct bsunit PNTR next; } /* the next one */ 26-- Nlm_BSUnit, PNTR Nlm_BSUnitPtr; 27-- 28-- typedef struct bytestore { 29 -- Nlm_Int4 seekptr, /* current position */ 30 -- totlen, /* total stored data length in bytes */ 31 -- chain_offset; /* offset in ByteStore of first byte in curchain */ 32 -- Nlm_BSUnitPtr chain, /* chain of elements */ 33 -- curchain; /* the BSUnit containing seekptr */ 34-- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr; 35-- 36-- AsnTool incorporates this as a primitive type, so the definition 37-- is here just for completness 38-- 39-- StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING 40-- 41 42-- BigInt is really an INTEGER. It is used to warn the recieving code to expect 43-- a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue 44-- 45-- Like StringStore, AsnTool incorporates it as a primitive. The definition would be: 46-- BigInt ::= [APPLICATION 2] IMPLICIT INTEGER 47-- 48 49-- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime 50-- of ASN.1 51-- It stores only a date 52-- 53 54Date ::= CHOICE { 55 str VisibleString , -- for those unparsed dates 56 std Date-std } -- use this if you can 57 58Date-std ::= SEQUENCE { -- NOTE: this is NOT a unix tm struct 59 year INTEGER , -- full year (including 1900) 60 month INTEGER OPTIONAL , -- month (1-12) 61 day INTEGER OPTIONAL , -- day of month (1-31) 62 season VisibleString OPTIONAL , -- for "spring", "may-june", etc 63 hour INTEGER OPTIONAL , -- hour of day (0-23) 64 minute INTEGER OPTIONAL , -- minute of hour (0-59) 65 second INTEGER OPTIONAL } -- second of minute (0-59) 66 67-- Dbtag is generalized for tagging 68-- eg. { "Social Security", str "023-79-8841" } 69-- or { "member", id 8882224 } 70 71Dbtag ::= SEQUENCE { 72 db VisibleString , -- name of database or system 73 tag Object-id } -- appropriate tag 74 75-- Object-id can tag or name anything 76-- 77 78Object-id ::= CHOICE { 79 id INTEGER , 80 str VisibleString } 81 82-- Person-id is to define a std element for people 83-- 84 85Person-id ::= CHOICE { 86 dbtag Dbtag , -- any defined database tag 87 name Name-std , -- structured name 88 ml VisibleString , -- MEDLINE name (semi-structured) 89 -- eg. "Jones RM" 90 str VisibleString } -- unstructured name 91 92Name-std ::= SEQUENCE { -- Structured names 93 last VisibleString , 94 first VisibleString OPTIONAL , 95 middle VisibleString OPTIONAL , 96 full VisibleString OPTIONAL , -- full name eg. "J. John Poop, Esq" 97 initials VisibleString OPTIONAL, -- first + middle initials 98 suffix VisibleString OPTIONAL , -- Jr, Sr, III 99 title VisibleString OPTIONAL } -- Dr., Sister, etc 100 101--**** Int-fuzz ********************************************** 102--* 103--* uncertainties in integer values 104 105Int-fuzz ::= CHOICE { 106 p-m INTEGER , -- plus or minus fixed amount 107 range SEQUENCE { -- max to min 108 max INTEGER , 109 min INTEGER } , 110 pct INTEGER , -- % plus or minus (x10) 0-1000 111 lim ENUMERATED { -- some limit value 112 unk (0) , -- unknown 113 gt (1) , -- greater than 114 lt (2) , -- less than 115 tr (3) , -- space to right of position 116 tl (4) , -- space to left of position 117 circle (5) , -- artificial break at origin of circle 118 other (255) } , -- something else 119 alt SET OF INTEGER } -- set of alternatives for the integer 120 121 122--**** User-object ********************************************** 123--* 124--* a general object for a user defined structured data item 125--* used by Seq-feat and Seq-descr 126 127User-object ::= SEQUENCE { 128 class VisibleString OPTIONAL , -- endeavor which designed this object 129 type Object-id , -- type of object within class 130 data SEQUENCE OF User-field } -- the object itself 131 132User-field ::= SEQUENCE { 133 label Object-id , -- field label 134 num INTEGER OPTIONAL , -- required for strs, ints, reals, oss 135 data CHOICE { -- field contents 136 str VisibleString , 137 int INTEGER , 138 real REAL , 139 bool BOOLEAN , 140 os OCTET STRING , 141 object User-object , -- for using other definitions 142 strs SEQUENCE OF VisibleString , 143 ints SEQUENCE OF INTEGER , 144 reals SEQUENCE OF REAL , 145 oss SEQUENCE OF OCTET STRING , 146 fields SEQUENCE OF User-field , 147 objects SEQUENCE OF User-object } } 148 149 150 151END 152 153--$Revision: 97143 $ 154--**************************************************************** 155-- 156-- NCBI Bibliographic data elements 157-- by James Ostell, 1990 158-- 159-- Taken from the American National Standard for 160-- Bibliographic References 161-- ANSI Z39.29-1977 162-- Version 3.0 - June 1994 163-- PubMedId added in 1996 164-- ArticleIds and eprint elements added in 1999 165-- 166--**************************************************************** 167 168NCBI-Biblio DEFINITIONS ::= 169BEGIN 170 171EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen, 172 Cit-proc, Cit-sub, Title, Author, PubMedId; 173 174IMPORTS Person-id, Date, Dbtag FROM NCBI-General; 175 176 -- Article Ids 177 178ArticleId ::= CHOICE { -- can be many ids for an article 179 pubmed PubMedId , -- see types below 180 medline MedlineUID , 181 doi DOI , 182 pii PII , 183 pmcid PmcID , 184 pmcpid PmcPid , 185 pmpid PmPid , 186 other Dbtag } -- generic catch all 187 188PubMedId ::= INTEGER -- Id from the PubMed database at NCBI 189MedlineUID ::= INTEGER -- Id from MEDLINE 190DOI ::= VisibleString -- Document Object Identifier 191PII ::= VisibleString -- Controlled Publisher Identifier 192PmcID ::= INTEGER -- PubMed Central Id 193PmcPid ::= VisibleString -- Publisher Id supplied to PubMed Central 194PmPid ::= VisibleString -- Publisher Id supplied to PubMed 195 196ArticleIdSet ::= SET OF ArticleId 197 198 -- Status Dates 199 200PubStatus ::= INTEGER { -- points of publication 201 received (1) , -- date manuscript received for review 202 accepted (2) , -- accepted for publication 203 epublish (3) , -- published electronically by publisher 204 ppublish (4) , -- published in print by publisher 205 revised (5) , -- article revised by publisher/author 206 pmc (6) , -- article first appeared in PubMed Central 207 pmcr (7) , -- article revision in PubMed Central 208 pubmed (8) , -- article citation first appeared in PubMed 209 pubmedr (9) , -- article citation revision in PubMed 210 aheadofprint (10), -- epublish, but will be followed by print 211 premedline (11), -- date into PreMedline status 212 medline (12), -- date made a MEDLINE record 213 other (255) } 214 215PubStatusDate ::= SEQUENCE { -- done as a structure so fields can be added 216 pubstatus PubStatus , 217 date Date } -- time may be added later 218 219PubStatusDateSet ::= SET OF PubStatusDate 220 221 -- Citation Types 222 223Cit-art ::= SEQUENCE { -- article in journal or book 224 title Title OPTIONAL , -- title of paper (ANSI requires) 225 authors Auth-list OPTIONAL , -- authors (ANSI requires) 226 from CHOICE { -- journal or book 227 journal Cit-jour , 228 book Cit-book , 229 proc Cit-proc } , 230 ids ArticleIdSet OPTIONAL } -- lots of ids 231 232Cit-jour ::= SEQUENCE { -- Journal citation 233 title Title , -- title of journal 234 imp Imprint } 235 236Cit-book ::= SEQUENCE { -- Book citation 237 title Title , -- Title of book 238 coll Title OPTIONAL , -- part of a collection 239 authors Auth-list, -- authors 240 imp Imprint } 241 242Cit-proc ::= SEQUENCE { -- Meeting proceedings 243 book Cit-book , -- citation to meeting 244 meet Meeting } -- time and location of meeting 245 246 -- Patent number and date-issue were made optional in 1997 to 247 -- support patent applications being issued from the USPTO 248 -- Semantically a Cit-pat must have either a patent number or 249 -- an application number (or both) to be valid 250 251Cit-pat ::= SEQUENCE { -- patent citation 252 title VisibleString , 253 authors Auth-list, -- author/inventor 254 country VisibleString , -- Patent Document Country 255 doc-type VisibleString , -- Patent Document Type 256 number VisibleString OPTIONAL, -- Patent Document Number 257 date-issue Date OPTIONAL, -- Patent Issue/Pub Date 258 class SEQUENCE OF VisibleString OPTIONAL , -- Patent Doc Class Code 259 app-number VisibleString OPTIONAL , -- Patent Doc Appl Number 260 app-date Date OPTIONAL , -- Patent Appl File Date 261 applicants Auth-list OPTIONAL , -- Applicants 262 assignees Auth-list OPTIONAL , -- Assignees 263 priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities 264 abstract VisibleString OPTIONAL } -- abstract of patent 265 266Patent-priority ::= SEQUENCE { 267 country VisibleString , -- Patent country code 268 number VisibleString , -- number assigned in that country 269 date Date } -- date of application 270 271Id-pat ::= SEQUENCE { -- just to identify a patent 272 country VisibleString , -- Patent Document Country 273 id CHOICE { 274 number VisibleString , -- Patent Document Number 275 app-number VisibleString } , -- Patent Doc Appl Number 276 doc-type VisibleString OPTIONAL } -- Patent Doc Type 277 278Cit-let ::= SEQUENCE { -- letter, thesis, or manuscript 279 cit Cit-book , -- same fields as a book 280 man-id VisibleString OPTIONAL , -- Manuscript identifier 281 type ENUMERATED { 282 manuscript (1) , 283 letter (2) , 284 thesis (3) } OPTIONAL } 285 -- NOTE: this is just to cite a 286 -- direct data submission, see NCBI-Submit 287 -- for the form of a sequence submission 288Cit-sub ::= SEQUENCE { -- citation for a direct submission 289 authors Auth-list , -- not necessarily authors of the paper 290 imp Imprint OPTIONAL , -- this only used to get date.. will go 291 medium ENUMERATED { -- medium of submission 292 paper (1) , 293 tape (2) , 294 floppy (3) , 295 email (4) , 296 other (255) } OPTIONAL , 297 date Date OPTIONAL , -- replaces imp, will become required 298 descr VisibleString OPTIONAL } -- description of changes for public view 299 300Cit-gen ::= SEQUENCE { -- NOT from ANSI, this is a catchall 301 cit VisibleString OPTIONAL , -- anything, not parsable 302 authors Auth-list OPTIONAL , 303 muid INTEGER OPTIONAL , -- medline uid 304 journal Title OPTIONAL , 305 volume VisibleString OPTIONAL , 306 issue VisibleString OPTIONAL , 307 pages VisibleString OPTIONAL , 308 date Date OPTIONAL , 309 serial-number INTEGER OPTIONAL , -- for GenBank style references 310 title VisibleString OPTIONAL , -- eg. cit="unpublished",title="title" 311 pmid PubMedId OPTIONAL } -- PubMed Id 312 313 314 -- Authorship Group 315Auth-list ::= SEQUENCE { 316 names CHOICE { 317 std SEQUENCE OF Author , -- full citations 318 ml SEQUENCE OF VisibleString , -- MEDLINE, semi-structured 319 str SEQUENCE OF VisibleString } , -- free for all 320 affil Affil OPTIONAL } -- author affiliation 321 322Author ::= SEQUENCE { 323 name Person-id , -- Author, Primary or Secondary 324 level ENUMERATED { 325 primary (1), 326 secondary (2) } OPTIONAL , 327 role ENUMERATED { -- Author Role Indicator 328 compiler (1), 329 editor (2), 330 patent-assignee (3), 331 translator (4) } OPTIONAL , 332 affil Affil OPTIONAL , 333 is-corr BOOLEAN OPTIONAL } -- TRUE if corressponding author 334 335Affil ::= CHOICE { 336 str VisibleString , -- unparsed string 337 std SEQUENCE { -- std representation 338 affil VisibleString OPTIONAL , -- Author Affiliation, Name 339 div VisibleString OPTIONAL , -- Author Affiliation, Division 340 city VisibleString OPTIONAL , -- Author Affiliation, City 341 sub VisibleString OPTIONAL , -- Author Affiliation, County Sub 342 country VisibleString OPTIONAL , -- Author Affiliation, Country 343 street VisibleString OPTIONAL , -- street address, not ANSI 344 email VisibleString OPTIONAL , 345 fax VisibleString OPTIONAL , 346 phone VisibleString OPTIONAL , 347 postal-code VisibleString OPTIONAL }} 348 349 -- Title Group 350 -- Valid for = A = Analytic (Cit-art) 351 -- J = Journals (Cit-jour) 352 -- B = Book (Cit-book) 353 -- Valid for: 354Title ::= SET OF CHOICE { 355 name VisibleString , -- Title, Anal,Coll,Mono AJB 356 tsub VisibleString , -- Title, Subordinate A B 357 trans VisibleString , -- Title, Translated AJB 358 jta VisibleString , -- Title, Abbreviated J 359 iso-jta VisibleString , -- specifically ISO jta J 360 ml-jta VisibleString , -- specifically MEDLINE jta J 361 coden VisibleString , -- a coden J 362 issn VisibleString , -- ISSN J 363 abr VisibleString , -- Title, Abbreviated B 364 isbn VisibleString } -- ISBN B 365 366Imprint ::= SEQUENCE { -- Imprint group 367 date Date , -- date of publication 368 volume VisibleString OPTIONAL , 369 issue VisibleString OPTIONAL , 370 pages VisibleString OPTIONAL , 371 section VisibleString OPTIONAL , 372 pub Affil OPTIONAL, -- publisher, required for book 373 cprt Date OPTIONAL, -- copyright date, " " " 374 part-sup VisibleString OPTIONAL , -- part/sup of volume 375 language VisibleString DEFAULT "ENG" , -- put here for simplicity 376 prepub ENUMERATED { -- for prepublication citaions 377 submitted (1) , -- submitted, not accepted 378 in-press (2) , -- accepted, not published 379 other (255) } OPTIONAL , 380 part-supi VisibleString OPTIONAL , -- part/sup on issue 381 retract CitRetract OPTIONAL , -- retraction info 382 pubstatus PubStatus OPTIONAL , -- current status of this publication 383 history PubStatusDateSet OPTIONAL } -- dates for this record 384 385CitRetract ::= SEQUENCE { 386 type ENUMERATED { -- retraction of an entry 387 retracted (1) , -- this citation retracted 388 notice (2) , -- this citation is a retraction notice 389 in-error (3) , -- an erratum was published about this 390 erratum (4) } , -- this is a published erratum 391 exp VisibleString OPTIONAL } -- citation and/or explanation 392 393Meeting ::= SEQUENCE { 394 number VisibleString , 395 date Date , 396 place Affil OPTIONAL } 397 398 399END 400 401 402--$Revision: 97143 $ 403--********************************************************************** 404-- 405-- MEDLINE data definitions 406-- James Ostell, 1990 407-- 408-- enhanced in 1996 to support PubMed records as well by simply adding 409-- the PubMedId and making MedlineId optional 410-- 411--********************************************************************** 412 413NCBI-Medline DEFINITIONS ::= 414BEGIN 415 416EXPORTS Medline-entry, Medline-si; 417 418IMPORTS Cit-art, PubMedId FROM NCBI-Biblio 419 Date FROM NCBI-General; 420 421 -- a MEDLINE or PubMed entry 422Medline-entry ::= SEQUENCE { 423 uid INTEGER OPTIONAL , -- MEDLINE UID, sometimes not yet available if from PubMed 424 em Date , -- Entry Month 425 cit Cit-art , -- article citation 426 abstract VisibleString OPTIONAL , 427 mesh SET OF Medline-mesh OPTIONAL , 428 substance SET OF Medline-rn OPTIONAL , 429 xref SET OF Medline-si OPTIONAL , 430 idnum SET OF VisibleString OPTIONAL , -- ID Number (grants, contracts) 431 gene SET OF VisibleString OPTIONAL , 432 pmid PubMedId OPTIONAL , -- MEDLINE records may include the PubMedId 433 pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc) 434 mlfield SET OF Medline-field OPTIONAL , -- additional Medline field types 435 status INTEGER { 436 publisher (1) , -- record as supplied by publisher 437 premedline (2) , -- premedline record 438 medline (3) } DEFAULT medline } -- regular medline record 439 440Medline-mesh ::= SEQUENCE { 441 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point (*) 442 term VisibleString , -- the MeSH term 443 qual SET OF Medline-qual OPTIONAL } -- qualifiers 444 445Medline-qual ::= SEQUENCE { 446 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point 447 subh VisibleString } -- the subheading 448 449Medline-rn ::= SEQUENCE { -- medline substance records 450 type ENUMERATED { -- type of record 451 nameonly (0) , 452 cas (1) , -- CAS number 453 ec (2) } , -- EC number 454 cit VisibleString OPTIONAL , -- CAS or EC number if present 455 name VisibleString } -- name (always present) 456 457Medline-si ::= SEQUENCE { -- medline cross reference records 458 type ENUMERATED { -- type of xref 459 ddbj (1) , -- DNA Data Bank of Japan 460 carbbank (2) , -- Carbohydrate Structure Database 461 embl (3) , -- EMBL Data Library 462 hdb (4) , -- Hybridoma Data Bank 463 genbank (5) , -- GenBank 464 hgml (6) , -- Human Gene Map Library 465 mim (7) , -- Mendelian Inheritance in Man 466 msd (8) , -- Microbial Strains Database 467 pdb (9) , -- Protein Data Bank (Brookhaven) 468 pir (10) , -- Protein Identification Resource 469 prfseqdb (11) , -- Protein Research Foundation (Japan) 470 psd (12) , -- Protein Sequence Database (Japan) 471 swissprot (13) , -- SwissProt 472 gdb (14) } , -- Genome Data Base 473 cit VisibleString OPTIONAL } -- the citation/accession number 474 475Medline-field ::= SEQUENCE { 476 type INTEGER { -- Keyed type 477 other (0) , -- look in line code 478 comment (1) , -- comment line 479 erratum (2) } , -- retracted, corrected, etc 480 str VisibleString , -- the text 481 ids SEQUENCE OF DocRef OPTIONAL } -- pointers relevant to this text 482 483DocRef ::= SEQUENCE { -- reference to a document 484 type INTEGER { 485 medline (1) , 486 pubmed (2) , 487 ncbigi (3) } , 488 uid INTEGER } 489 490END 491 492--$Revision: 97143 $ 493--********************************************************************** 494-- 495-- MEDLARS data definitions 496-- Grigoriy Starchenko, 1997 497-- 498--********************************************************************** 499 500NCBI-Medlars DEFINITIONS ::= 501BEGIN 502 503EXPORTS Medlars-entry, Medlars-record; 504 505IMPORTS PubMedId FROM NCBI-Biblio; 506 507Medlars-entry ::= SEQUENCE { -- a MEDLARS entry 508 pmid PubMedId, -- All entries in PubMed must have it 509 muid INTEGER OPTIONAL, -- Medline(OCCS) id 510 recs SET OF Medlars-record -- List of Medlars records 511} 512 513Medlars-record ::= SEQUENCE { 514 code INTEGER, -- Unit record field type integer form 515 abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form 516 data VisibleString -- Unit record data 517} 518 519END 520--$Revision: 97143 $ 521--******************************************************************** 522-- 523-- Publication common set 524-- James Ostell, 1990 525-- 526-- This is the base class definitions for Publications of all sorts 527-- 528-- support for PubMedId added in 1996 529--******************************************************************** 530 531NCBI-Pub DEFINITIONS ::= 532BEGIN 533 534EXPORTS Pub, Pub-set, Pub-equiv; 535 536IMPORTS Medline-entry FROM NCBI-Medline 537 Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen, 538 Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio; 539 540Pub ::= CHOICE { 541 gen Cit-gen , -- general or generic unparsed 542 sub Cit-sub , -- submission 543 medline Medline-entry , 544 muid INTEGER , -- medline uid 545 article Cit-art , 546 journal Cit-jour , 547 book Cit-book , 548 proc Cit-proc , -- proceedings of a meeting 549 patent Cit-pat , 550 pat-id Id-pat , -- identify a patent 551 man Cit-let , -- manuscript, thesis, or letter 552 equiv Pub-equiv, -- to cite a variety of ways 553 pmid PubMedId } -- PubMedId 554 555Pub-equiv ::= SET OF Pub -- equivalent identifiers for same citation 556 557Pub-set ::= CHOICE { 558 pub SET OF Pub , 559 medline SET OF Medline-entry , 560 article SET OF Cit-art , 561 journal SET OF Cit-jour , 562 book SET OF Cit-book , 563 proc SET OF Cit-proc , -- proceedings of a meeting 564 patent SET OF Cit-pat } 565 566END 567 568--$Revision: 97143 $ 569--********************************************************************** 570-- 571-- PUBMED data definitions 572-- 573--********************************************************************** 574 575NCBI-PubMed DEFINITIONS ::= 576BEGIN 577 578EXPORTS Pubmed-entry, Pubmed-url; 579 580IMPORTS PubMedId FROM NCBI-Biblio 581 Medline-entry FROM NCBI-Medline; 582 583Pubmed-entry ::= SEQUENCE { -- a PubMed entry 584 -- PUBMED records must include the PubMedId 585 pmid PubMedId, 586 587 -- Medline entry information 588 medent Medline-entry OPTIONAL, 589 590 -- Publisher name 591 publisher VisibleString OPTIONAL, 592 593 -- List of URL to publisher cite 594 urls SET OF Pubmed-url OPTIONAL, 595 596 -- Publisher's article identifier 597 pubid VisibleString OPTIONAL 598} 599 600Pubmed-url ::= SEQUENCE { 601 location VisibleString OPTIONAL, -- Location code 602 url VisibleString -- Selected URL for location 603} 604 605END 606--$Revision: 97143 $ 607--********************************************************************** 608-- 609-- NCBI Sequence location and identifier elements 610-- by James Ostell, 1990 611-- 612-- Version 3.0 - 1994 613-- 614--********************************************************************** 615 616NCBI-Seqloc DEFINITIONS ::= 617BEGIN 618 619EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt, 620 Na-strand, Giimport-id; 621 622IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General 623 Id-pat FROM NCBI-Biblio 624 Feat-id FROM NCBI-Seqfeat; 625 626--*** Sequence identifiers ******************************** 627--* 628 629Seq-id ::= CHOICE { 630 local Object-id , -- local use 631 gibbsq INTEGER , -- Geninfo backbone seqid 632 gibbmt INTEGER , -- Geninfo backbone moltype 633 giim Giimport-id , -- Geninfo import id 634 genbank Textseq-id , 635 embl Textseq-id , 636 pir Textseq-id , 637 swissprot Textseq-id , 638 patent Patent-seq-id , 639 other Textseq-id , -- catch all 640 general Dbtag , -- for other databases 641 gi INTEGER , -- GenInfo Integrated Database 642 ddbj Textseq-id , -- DDBJ 643 prf Textseq-id , -- PRF SEQDB 644 pdb PDB-seq-id } -- PDB sequence 645 646Patent-seq-id ::= SEQUENCE { 647 seqid INTEGER , -- number of sequence in patent 648 cit Id-pat } -- patent citation 649 650Textseq-id ::= SEQUENCE { 651 name VisibleString OPTIONAL , 652 accession VisibleString OPTIONAL , 653 release VisibleString OPTIONAL , 654 version INTEGER OPTIONAL } 655 656Giimport-id ::= SEQUENCE { 657 id INTEGER , -- the id to use here 658 db VisibleString OPTIONAL , -- dbase used in 659 release VisibleString OPTIONAL } -- the release 660 661PDB-seq-id ::= SEQUENCE { 662 mol PDB-mol-id , -- the molecule name 663 chain INTEGER DEFAULT 32 ,-- a single ASCII character, chain id 664 rel Date OPTIONAL } -- release date, month and year 665 666PDB-mol-id ::= VisibleString -- name of mol, 4 chars 667 668--*** Sequence locations ********************************** 669--* 670 671Seq-loc ::= CHOICE { 672 null NULL , -- not placed 673 empty Seq-id , -- to NULL one Seq-id in a collection 674 whole Seq-id , -- whole sequence 675 int Seq-interval , -- from to 676 packed-int Packed-seqint , 677 pnt Seq-point , 678 packed-pnt Packed-seqpnt , 679 mix Seq-loc-mix , 680 equiv Seq-loc-equiv , -- equivalent sets of locations 681 bond Seq-bond , 682 feat Feat-id } -- indirect, through a Seq-feat 683 684 685Seq-interval ::= SEQUENCE { 686 from INTEGER , 687 to INTEGER , 688 strand Na-strand OPTIONAL , 689 id Seq-id , -- WARNING: this used to be optional 690 fuzz-from Int-fuzz OPTIONAL , 691 fuzz-to Int-fuzz OPTIONAL } 692 693Packed-seqint ::= SEQUENCE OF Seq-interval 694 695Seq-point ::= SEQUENCE { 696 point INTEGER , 697 strand Na-strand OPTIONAL , 698 id Seq-id , -- WARNING: this used to be optional 699 fuzz Int-fuzz OPTIONAL } 700 701Packed-seqpnt ::= SEQUENCE { 702 strand Na-strand OPTIONAL , 703 id Seq-id , 704 fuzz Int-fuzz OPTIONAL , 705 points SEQUENCE OF INTEGER } 706 707Na-strand ::= ENUMERATED { -- strand of nucleid acid 708 unknown (0) , 709 plus (1) , 710 minus (2) , 711 both (3) , -- in forward orientation 712 both-rev (4) , -- in reverse orientation 713 other (255) } 714 715Seq-bond ::= SEQUENCE { -- bond between residues 716 a Seq-point , -- connection to a least one residue 717 b Seq-point OPTIONAL } -- other end may not be available 718 719Seq-loc-mix ::= SEQUENCE OF Seq-loc -- this will hold anything 720 721Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations 722 723END 724 725 726--$Revision: 97143 $ 727--********************************************************************** 728-- 729-- NCBI Sequence Alignment elements 730-- by James Ostell, 1990 731-- 732--********************************************************************** 733 734NCBI-Seqalign DEFINITIONS ::= 735BEGIN 736 737EXPORTS Seq-align, Score, Score-set, Seq-align-set; 738 739IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc 740 Object-id FROM NCBI-General; 741 742--*** Sequence Alignment ******************************** 743--* 744 745Seq-align-set ::= SET OF Seq-align 746 747Seq-align ::= SEQUENCE { 748 type ENUMERATED { 749 not-set (0) , 750 global (1) , 751 diags (2) , -- unbroken, but not ordered, diagonals 752 partial (3) , -- mapping pieces together 753 disc (4) , -- discontinuous alignment 754 other (255) } , 755 dim INTEGER OPTIONAL , -- dimensionality 756 score SET OF Score OPTIONAL , -- for whole alignment 757 segs CHOICE { -- alignment data 758 dendiag SEQUENCE OF Dense-diag , 759 denseg Dense-seg , 760 std SEQUENCE OF Std-seg , 761 packed Packed-seg , 762 disc Seq-align-set } , 763 bounds SET OF Seq-loc OPTIONAL } -- regions of sequence over which align 764 -- was computed 765 766Dense-diag ::= SEQUENCE { -- for (multiway) diagonals 767 dim INTEGER DEFAULT 2 , -- dimensionality 768 ids SEQUENCE OF Seq-id , -- sequences in order 769 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order 770 len INTEGER , -- len of aligned segments 771 strands SEQUENCE OF Na-strand OPTIONAL , 772 scores SET OF Score OPTIONAL } 773 774 -- Dense-seg: the densist packing for sequence alignments only. 775 -- a start of -1 indicates a gap for that sequence of 776 -- length lens. 777 -- 778 -- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA 779 -- id=200 AAGGCCTTTTAG.......GATGATGATGA 780 -- id=300 ....CCTTTTAGAGATGATGAT....ATGA 781 -- 782 -- dim = 3, numseg = 6, ids = { 100, 200, 300 } 783 -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 } 784 -- lens = { 4, 8, 7, 3, 4, 4 } 785 -- 786 787Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments 788 dim INTEGER DEFAULT 2 , -- dimensionality 789 numseg INTEGER , -- number of segments here 790 ids SEQUENCE OF Seq-id , -- sequences in order 791 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs 792 lens SEQUENCE OF INTEGER , -- lengths in ids order within segs 793 strands SEQUENCE OF Na-strand OPTIONAL , 794 scores SEQUENCE OF Score OPTIONAL } -- score for each seg 795 796Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments 797 dim INTEGER DEFAULT 2 , -- dimensionality 798 numseg INTEGER , -- number of segments here 799 ids SEQUENCE OF Seq-id , -- sequences in order 800 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment 801 present OCTET STRING , -- Boolean if each sequence present or absent in 802 -- each segment 803 lens SEQUENCE OF INTEGER , -- length of each segment 804 strands SEQUENCE OF Na-strand OPTIONAL , 805 scores SEQUENCE OF Score OPTIONAL } -- score for each segment 806 807Std-seg ::= SEQUENCE { 808 dim INTEGER DEFAULT 2 , -- dimensionality 809 ids SEQUENCE OF Seq-id OPTIONAL , 810 loc SEQUENCE OF Seq-loc , 811 scores SET OF Score OPTIONAL } 812 813-- use of Score is discouraged for external ASN.1 specifications 814Score ::= SEQUENCE { 815 id Object-id OPTIONAL , 816 value CHOICE { 817 real REAL , 818 int INTEGER } } 819 820-- use of Score-set is encouraged for external ASN.1 specifications 821Score-set ::= SET OF Score 822 823END 824 825--$Revision: 97143 $ 826--********************************************************************* 827-- 828-- 1990 - J.Ostell 829-- Version 3.0 - June 1994 830-- 831--********************************************************************* 832--********************************************************************* 833-- 834-- EMBL specific data 835-- This block of specifications was developed by Reiner Fuchs of EMBL 836-- Updated by J.Ostell, 1994 837-- 838--********************************************************************* 839 840EMBL-General DEFINITIONS ::= 841BEGIN 842 843EXPORTS EMBL-dbname, EMBL-xref, EMBL-block; 844 845IMPORTS Date, Object-id FROM NCBI-General; 846 847EMBL-dbname ::= CHOICE { 848 code ENUMERATED { 849 embl(0), 850 genbank(1), 851 ddbj(2), 852 geninfo(3), 853 medline(4), 854 swissprot(5), 855 pir(6), 856 pdb(7), 857 epd(8), 858 ecd(9), 859 tfd(10), 860 flybase(11), 861 prosite(12), 862 enzyme(13), 863 mim(14), 864 ecoseq(15), 865 hiv(16) , 866 other (255) } , 867 name VisibleString } 868 869EMBL-xref ::= SEQUENCE { 870 dbname EMBL-dbname, 871 id SEQUENCE OF Object-id } 872 873EMBL-block ::= SEQUENCE { 874 class ENUMERATED { 875 not-set(0), 876 standard(1), 877 unannotated(2), 878 other(255) } DEFAULT standard, 879 div ENUMERATED { 880 fun(0), 881 inv(1), 882 mam(2), 883 org(3), 884 phg(4), 885 pln(5), 886 pri(6), 887 pro(7), 888 rod(8), 889 syn(9), 890 una(10), 891 vrl(11), 892 vrt(12), 893 pat(13), 894 est(14), 895 sts(15), 896 other (255) } OPTIONAL, 897 creation-date Date, 898 update-date Date, 899 extra-acc SEQUENCE OF VisibleString OPTIONAL, 900 keywords SEQUENCE OF VisibleString OPTIONAL, 901 xref SEQUENCE OF EMBL-xref OPTIONAL } 902 903END 904 905--********************************************************************* 906-- 907-- SWISSPROT specific data 908-- This block of specifications was developed by Mark Cavanaugh of 909-- NCBI working with Amos Bairoch of SWISSPROT 910-- 911--********************************************************************* 912 913SP-General DEFINITIONS ::= 914BEGIN 915 916EXPORTS SP-block; 917 918IMPORTS Date, Dbtag FROM NCBI-General 919 Seq-id FROM NCBI-Seqloc; 920 921SP-block ::= SEQUENCE { -- SWISSPROT specific descriptions 922 class ENUMERATED { 923 not-set (0) , 924 standard (1) , -- conforms to all SWISSPROT checks 925 prelim (2) , -- only seq and biblio checked 926 other (255) } , 927 extra-acc SET OF VisibleString OPTIONAL , -- old SWISSPROT ids 928 imeth BOOLEAN DEFAULT FALSE , -- seq known to start with Met 929 plasnm SET OF VisibleString OPTIONAL, -- plasmid names carrying gene 930 seqref SET OF Seq-id OPTIONAL, -- xref to other sequences 931 dbref SET OF Dbtag OPTIONAL , -- xref to non-sequence dbases 932 keywords SET OF VisibleString OPTIONAL , -- keywords 933 created Date OPTIONAL , -- creation date 934 sequpd Date OPTIONAL , -- sequence update 935 annotupd Date OPTIONAL } -- annotation update 936 937END 938 939--********************************************************************* 940-- 941-- PIR specific data 942-- This block of specifications was developed by Jim Ostell of 943-- NCBI 944-- 945--********************************************************************* 946 947PIR-General DEFINITIONS ::= 948BEGIN 949 950EXPORTS PIR-block; 951 952IMPORTS Seq-id FROM NCBI-Seqloc; 953 954PIR-block ::= SEQUENCE { -- PIR specific descriptions 955 had-punct BOOLEAN OPTIONAL , -- had punctuation in sequence ? 956 host VisibleString OPTIONAL , 957 source VisibleString OPTIONAL , -- source line 958 summary VisibleString OPTIONAL , 959 genetic VisibleString OPTIONAL , 960 includes VisibleString OPTIONAL , 961 placement VisibleString OPTIONAL , 962 superfamily VisibleString OPTIONAL , 963 keywords SEQUENCE OF VisibleString OPTIONAL , 964 cross-reference VisibleString OPTIONAL , 965 date VisibleString OPTIONAL , 966 seq-raw VisibleString OPTIONAL , -- seq with punctuation 967 seqref SET OF Seq-id OPTIONAL } -- xref to other sequences 968 969END 970 971--********************************************************************* 972-- 973-- GenBank specific data 974-- This block of specifications was developed by Jim Ostell of 975-- NCBI 976-- 977--********************************************************************* 978 979GenBank-General DEFINITIONS ::= 980BEGIN 981 982EXPORTS GB-block; 983 984IMPORTS Date FROM NCBI-General; 985 986GB-block ::= SEQUENCE { -- GenBank specific descriptions 987 extra-accessions SEQUENCE OF VisibleString OPTIONAL , 988 source VisibleString OPTIONAL , -- source line 989 keywords SEQUENCE OF VisibleString OPTIONAL , 990 origin VisibleString OPTIONAL, 991 date VisibleString OPTIONAL , -- OBSOLETE old form Entry Date 992 entry-date Date OPTIONAL , -- replaces date 993 div VisibleString OPTIONAL , -- GenBank division 994 taxonomy VisibleString OPTIONAL } -- continuation line of organism 995 996END 997 998--********************************************************************** 999-- PRF specific definition 1000-- PRF is a protein sequence database crated and maintained by 1001-- Protein Research Foundation, Minoo-city, Osaka, Japan. 1002-- 1003-- Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab), 1004-- Kyoto Univ., Japan 1005-- 1006--********************************************************************** 1007 1008PRF-General DEFINITIONS ::= 1009BEGIN 1010 1011EXPORTS PRF-block; 1012 1013PRF-block ::= SEQUENCE { 1014 extra-src PRF-ExtraSrc OPTIONAL, 1015 keywords SEQUENCE OF VisibleString OPTIONAL 1016} 1017 1018PRF-ExtraSrc ::= SEQUENCE { 1019 host VisibleString OPTIONAL, 1020 part VisibleString OPTIONAL, 1021 state VisibleString OPTIONAL, 1022 strain VisibleString OPTIONAL, 1023 taxon VisibleString OPTIONAL 1024} 1025 1026END 1027 1028--********************************************************************* 1029-- 1030-- PDB specific data 1031-- This block of specifications was developed by Jim Ostell and 1032-- Steve Bryant of NCBI 1033-- 1034--********************************************************************* 1035 1036PDB-General DEFINITIONS ::= 1037BEGIN 1038 1039EXPORTS PDB-block; 1040 1041IMPORTS Date FROM NCBI-General; 1042 1043PDB-block ::= SEQUENCE { -- PDB specific descriptions 1044 deposition Date , -- deposition date month,year 1045 class VisibleString , 1046 compound SEQUENCE OF VisibleString , 1047 source SEQUENCE OF VisibleString , 1048 exp-method VisibleString OPTIONAL , -- present if NOT X-ray diffraction 1049 replace PDB-replace OPTIONAL } -- replacement history 1050 1051PDB-replace ::= SEQUENCE { 1052 date Date , 1053 ids SEQUENCE OF VisibleString } -- entry ids replace by this one 1054 1055END 1056 1057--$Revision: 97143 $ 1058--********************************************************************** 1059-- 1060-- NCBI Sequence Feature elements 1061-- by James Ostell, 1990 1062-- Version 3.0 - June 1994 1063-- 1064--********************************************************************** 1065 1066NCBI-Seqfeat DEFINITIONS ::= 1067BEGIN 1068 1069EXPORTS Seq-feat, Feat-id, Genetic-code; 1070 1071IMPORTS Gene-ref FROM NCBI-Gene 1072 Prot-ref FROM NCBI-Protein 1073 Org-ref FROM NCBI-Organism 1074 BioSource FROM NCBI-BioSource 1075 RNA-ref FROM NCBI-RNA 1076 Seq-loc, Giimport-id FROM NCBI-Seqloc 1077 Pubdesc, Numbering, Heterogen FROM NCBI-Sequence 1078 Rsite-ref FROM NCBI-Rsite 1079 Txinit FROM NCBI-TxInit 1080 Pub-set FROM NCBI-Pub 1081 Object-id, Dbtag, User-object FROM NCBI-General; 1082 1083--*** Feature identifiers ******************************** 1084--* 1085 1086Feat-id ::= CHOICE { 1087 gibb INTEGER , -- geninfo backbone 1088 giim Giimport-id , -- geninfo import 1089 local Object-id , -- for local software use 1090 general Dbtag } -- for use by various databases 1091 1092--*** Seq-feat ******************************************* 1093--* sequence feature generalization 1094 1095Seq-feat ::= SEQUENCE { 1096 id Feat-id OPTIONAL , 1097 data SeqFeatData , -- the specific data 1098 partial BOOLEAN OPTIONAL , -- incomplete in some way? 1099 except BOOLEAN OPTIONAL , -- something funny about this? 1100 comment VisibleString OPTIONAL , 1101 product Seq-loc OPTIONAL , -- product of process 1102 location Seq-loc , -- feature made from 1103 qual SEQUENCE OF Gb-qual OPTIONAL , -- qualifiers 1104 title VisibleString OPTIONAL , -- for user defined label 1105 ext User-object OPTIONAL , -- user defined structure extension 1106 cit Pub-set OPTIONAL , -- citations for this feature 1107 exp-ev ENUMERATED { -- evidence for existence of feature 1108 experimental (1) , -- any reasonable experimental check 1109 not-experimental (2) } OPTIONAL , -- similarity, pattern, etc 1110 xref SET OF SeqFeatXref OPTIONAL , -- cite other relevant features 1111 dbxref SET OF Dbtag OPTIONAL , -- support for xref to other databases 1112 pseudo BOOLEAN OPTIONAL , -- annotated on pseudogene? 1113 except-text VisibleString OPTIONAL } -- explain if except=TRUE 1114 1115SeqFeatData ::= CHOICE { 1116 gene Gene-ref , 1117 org Org-ref , 1118 cdregion Cdregion , 1119 prot Prot-ref , 1120 rna RNA-ref , 1121 pub Pubdesc , -- publication applies to this seq 1122 seq Seq-loc , -- to annotate origin from another seq 1123 imp Imp-feat , 1124 region VisibleString, -- named region (globin locus) 1125 comment NULL , -- just a comment 1126 bond ENUMERATED { 1127 disulfide (1) , 1128 thiolester (2) , 1129 xlink (3) , 1130 thioether (4) , 1131 other (255) } , 1132 site ENUMERATED { 1133 active (1) , 1134 binding (2) , 1135 cleavage (3) , 1136 inhibit (4) , 1137 modified (5), 1138 glycosylation (6) , 1139 myristoylation (7) , 1140 mutagenized (8) , 1141 metal-binding (9) , 1142 phosphorylation (10) , 1143 acetylation (11) , 1144 amidation (12) , 1145 methylation (13) , 1146 hydroxylation (14) , 1147 sulfatation (15) , 1148 oxidative-deamination (16) , 1149 pyrrolidone-carboxylic-acid (17) , 1150 gamma-carboxyglutamic-acid (18) , 1151 blocked (19) , 1152 lipid-binding (20) , 1153 np-binding (21) , 1154 dna-binding (22) , 1155 signal-peptide (23) , 1156 transit-peptide (24) , 1157 transmembrane-region (25) , 1158 other (255) } , 1159 rsite Rsite-ref , -- restriction site (for maps really) 1160 user User-object , -- user defined structure 1161 txinit Txinit , -- transcription initiation 1162 num Numbering , -- a numbering system 1163 psec-str ENUMERATED { -- protein secondary structure 1164 helix (1) , -- any helix 1165 sheet (2) , -- beta sheet 1166 turn (3) } , -- beta or gamma turn 1167 non-std-residue VisibleString , -- non-standard residue here in seq 1168 het Heterogen , -- cofactor, prosthetic grp, etc, bound to seq 1169 biosrc BioSource } 1170 1171SeqFeatXref ::= SEQUENCE { -- both optional because can have one or both 1172 id Feat-id OPTIONAL , -- the feature copied 1173 data SeqFeatData OPTIONAL } -- the specific data 1174 1175--*** CdRegion *********************************************** 1176--* 1177--* Instructions to translate from a nucleic acid to a peptide 1178--* conflict means it's supposed to translate but doesn't 1179--* 1180 1181 1182Cdregion ::= SEQUENCE { 1183 orf BOOLEAN OPTIONAL , -- just an ORF ? 1184 frame ENUMERATED { 1185 not-set (0) , -- not set, code uses one 1186 one (1) , 1187 two (2) , 1188 three (3) } DEFAULT not-set , -- reading frame 1189 conflict BOOLEAN OPTIONAL , -- conflict 1190 gaps INTEGER OPTIONAL , -- number of gaps on conflict/except 1191 mismatch INTEGER OPTIONAL , -- number of mismatches on above 1192 code Genetic-code OPTIONAL , -- genetic code used 1193 code-break SEQUENCE OF Code-break OPTIONAL , -- individual exceptions 1194 stops INTEGER OPTIONAL } -- number of stop codons on above 1195 1196 -- each code is 64 cells long, in the order where 1197 -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc 1198 -- NOTE: this order does NOT corresspond to a Seq-data 1199 -- encoding. It is "natural" to codon usage instead. 1200 -- the value in each cell is the AA coded for 1201 -- start= AA coded only if first in peptide 1202 -- in start array, if codon is not a legitimate start 1203 -- codon, that cell will have the "gap" symbol for 1204 -- that alphabet. Otherwise it will have the AA 1205 -- encoded when that codon is used at the start. 1206 1207Genetic-code ::= SET OF CHOICE { 1208 name VisibleString , -- name of a code 1209 id INTEGER , -- id in dbase 1210 ncbieaa VisibleString , -- indexed to IUPAC extended 1211 ncbi8aa OCTET STRING , -- indexed to NCBI8aa 1212 ncbistdaa OCTET STRING , -- indexed to NCBIstdaa 1213 sncbieaa VisibleString , -- start, indexed to IUPAC extended 1214 sncbi8aa OCTET STRING , -- start, indexed to NCBI8aa 1215 sncbistdaa OCTET STRING } -- start, indexed to NCBIstdaa 1216 1217Code-break ::= SEQUENCE { -- specific codon exceptions 1218 loc Seq-loc , -- location of exception 1219 aa CHOICE { -- the amino acid 1220 ncbieaa INTEGER , -- ASCII value of NCBIeaa code 1221 ncbi8aa INTEGER , -- NCBI8aa code 1222 ncbistdaa INTEGER } } -- NCBIstdaa code 1223 1224Genetic-code-table ::= SET OF Genetic-code -- table of genetic codes 1225 1226--*** Import *********************************************** 1227--* 1228--* Features imported from other databases 1229--* 1230 1231Imp-feat ::= SEQUENCE { 1232 key VisibleString , 1233 loc VisibleString OPTIONAL , -- original location string 1234 descr VisibleString OPTIONAL } -- text description 1235 1236Gb-qual ::= SEQUENCE { 1237 qual VisibleString , 1238 val VisibleString } 1239 1240END 1241 1242--********************************************************************** 1243-- 1244-- NCBI Restriction Sites 1245-- by James Ostell, 1990 1246-- version 0.8 1247-- 1248--********************************************************************** 1249 1250NCBI-Rsite DEFINITIONS ::= 1251BEGIN 1252 1253EXPORTS Rsite-ref; 1254 1255IMPORTS Dbtag FROM NCBI-General; 1256 1257Rsite-ref ::= CHOICE { 1258 str VisibleString , -- may be unparsable 1259 db Dbtag } -- pointer to a restriction site database 1260 1261END 1262 1263--********************************************************************** 1264-- 1265-- NCBI RNAs 1266-- by James Ostell, 1990 1267-- version 0.8 1268-- 1269--********************************************************************** 1270 1271NCBI-RNA DEFINITIONS ::= 1272BEGIN 1273 1274EXPORTS RNA-ref, Trna-ext; 1275 1276IMPORTS Seq-loc FROM NCBI-Seqloc; 1277 1278--*** rnas *********************************************** 1279--* 1280--* various rnas 1281--* 1282 -- minimal RNA sequence 1283RNA-ref ::= SEQUENCE { 1284 type ENUMERATED { -- type of RNA feature 1285 unknown (0) , 1286 premsg (1) , 1287 mRNA (2) , 1288 tRNA (3) , 1289 rRNA (4) , 1290 snRNA (5) , 1291 scRNA (6) , 1292 other (255) } , 1293 pseudo BOOLEAN OPTIONAL , 1294 ext CHOICE { 1295 name VisibleString , -- for naming "other" type 1296 tRNA Trna-ext } OPTIONAL } -- for tRNAs 1297 1298Trna-ext ::= SEQUENCE { -- tRNA feature extensions 1299 aa CHOICE { -- aa this carries 1300 iupacaa INTEGER , 1301 ncbieaa INTEGER , 1302 ncbi8aa INTEGER , 1303 ncbistdaa INTEGER } OPTIONAL , 1304 codon SET OF INTEGER OPTIONAL , -- codon(s) as in Genetic-code 1305 anticodon Seq-loc OPTIONAL } -- location of anticodon 1306 1307END 1308 1309--********************************************************************** 1310-- 1311-- NCBI Genes 1312-- by James Ostell, 1990 1313-- version 0.8 1314-- 1315--********************************************************************** 1316 1317NCBI-Gene DEFINITIONS ::= 1318BEGIN 1319 1320EXPORTS Gene-ref; 1321 1322IMPORTS Dbtag FROM NCBI-General; 1323 1324--*** Gene *********************************************** 1325--* 1326--* reference to a gene 1327--* 1328 1329Gene-ref ::= SEQUENCE { 1330 locus VisibleString OPTIONAL , -- Official gene symbol 1331 allele VisibleString OPTIONAL , -- Official allele designation 1332 desc VisibleString OPTIONAL , -- descriptive name 1333 maploc VisibleString OPTIONAL , -- descriptive map location 1334 pseudo BOOLEAN DEFAULT FALSE , -- pseudogene 1335 db SET OF Dbtag OPTIONAL , -- ids in other dbases 1336 syn SET OF VisibleString OPTIONAL } -- synonyms for locus 1337 1338END 1339 1340 1341--********************************************************************** 1342-- 1343-- NCBI Organism 1344-- by James Ostell, 1994 1345-- version 3.0 1346-- 1347--********************************************************************** 1348 1349NCBI-Organism DEFINITIONS ::= 1350BEGIN 1351 1352EXPORTS Org-ref; 1353 1354IMPORTS Dbtag FROM NCBI-General; 1355 1356--*** Org-ref *********************************************** 1357--* 1358--* Reference to an organism 1359--* defines only the organism.. lower levels of detail for biological 1360--* molecules are provided by the Source object 1361--* 1362 1363Org-ref ::= SEQUENCE { 1364 taxname VisibleString OPTIONAL , -- preferred formal name 1365 common VisibleString OPTIONAL , -- common name 1366 mod SET OF VisibleString OPTIONAL , -- unstructured modifiers 1367 db SET OF Dbtag OPTIONAL , -- ids in taxonomic or culture dbases 1368 syn SET OF VisibleString OPTIONAL , -- synonyms for taxname or common 1369 orgname OrgName OPTIONAL } 1370 1371 1372OrgName ::= SEQUENCE { 1373 name CHOICE { 1374 binomial BinomialOrgName , -- genus/species type name 1375 virus VisibleString , -- virus names are different 1376 hybrid MultiOrgName , -- hybrid between organisms 1377 namedhybrid BinomialOrgName , -- some hybrids have genus x species name 1378 partial PartialOrgName } OPTIONAL , -- when genus not known 1379 attrib VisibleString OPTIONAL , -- attribution of name 1380 mod SEQUENCE OF OrgMod OPTIONAL , 1381 lineage VisibleString OPTIONAL , -- lineage with semicolon separators 1382 gcode INTEGER OPTIONAL , -- genetic code (see CdRegion) 1383 mgcode INTEGER OPTIONAL , -- mitochondrial genetic code 1384 div VisibleString OPTIONAL } -- GenBank division code 1385 1386 1387OrgMod ::= SEQUENCE { 1388 subtype INTEGER { 1389 strain (2) , 1390 substrain (3) , 1391 type (4) , 1392 subtype (5) , 1393 variety (6) , 1394 serotype (7) , 1395 serogroup (8) , 1396 serovar (9) , 1397 cultivar (10) , 1398 pathovar (11) , 1399 chemovar (12) , 1400 biovar (13) , 1401 biotype (14) , 1402 group (15) , 1403 subgroup (16) , 1404 isolate (17) , 1405 common (18) , 1406 acronym (19) , 1407 dosage (20) , -- chromosome dosage of hybrid 1408 nat-host (21) , -- natural host of this specimen 1409 sub-species (22) , 1410 specimen-voucher (23) , 1411 authority (24) , 1412 forma (25) , 1413 forma-specialis (26) , 1414 ecotype (27) , 1415 synonym (28) , 1416 anamorph (29) , 1417 teleomorph (30) , 1418 breed (31) , 1419 old-lineage (253) , 1420 old-name (254) , 1421 other (255) } , -- ASN5: old-name (254) will be added to next spec 1422 subname VisibleString , 1423 attrib VisibleString OPTIONAL } -- attribution/source of name 1424 1425BinomialOrgName ::= SEQUENCE { 1426 genus VisibleString , -- required 1427 species VisibleString OPTIONAL , -- species required if subspecies used 1428 subspecies VisibleString OPTIONAL } 1429 1430MultiOrgName ::= SEQUENCE OF OrgName -- the first will be used to assign division 1431 1432PartialOrgName ::= SEQUENCE OF TaxElement -- when we don't know the genus 1433 1434TaxElement ::= SEQUENCE { 1435 fixed-level INTEGER { 1436 other (0) , -- level must be set in string 1437 family (1) , 1438 order (2) , 1439 class (3) } , 1440 level VisibleString OPTIONAL , 1441 name VisibleString } 1442 1443END 1444 1445 1446--********************************************************************** 1447-- 1448-- NCBI BioSource 1449-- by James Ostell, 1994 1450-- version 3.0 1451-- 1452--********************************************************************** 1453 1454NCBI-BioSource DEFINITIONS ::= 1455BEGIN 1456 1457EXPORTS BioSource; 1458 1459IMPORTS Org-ref FROM NCBI-Organism; 1460 1461--******************************************************************** 1462-- 1463-- BioSource gives the source of the biological material 1464-- for sequences 1465-- 1466--******************************************************************** 1467 1468BioSource ::= SEQUENCE { 1469 genome INTEGER { -- biological context 1470 unknown (0) , 1471 genomic (1) , 1472 chloroplast (2) , 1473 chromoplast (3) , 1474 kinetoplast (4) , 1475 mitochondrion (5) , 1476 plastid (6) , 1477 macronuclear (7) , 1478 extrachrom (8) , 1479 plasmid (9) , 1480 transposon (10) , 1481 insertion-seq (11) , 1482 cyanelle (12) , 1483 proviral (13) , 1484 virion (14) , 1485 nucleomorph (15) , 1486 apicoplast (16) , 1487 leucoplast (17) , 1488 proplastid (18) , 1489 endogenous-virus (19) 1490 } DEFAULT unknown , 1491 -- 4 more genome values coming 1492 -- nucleomorph (15) 1493 -- apicoplast (16) 1494 -- leucoplast (17) 1495 -- proplastid (18) 1496 origin INTEGER { 1497 unknown (0) , 1498 natural (1) , -- normal biological entity 1499 natmut (2) , -- naturally occurring mutant 1500 mut (3) , -- artificially mutagenized 1501 artificial (4) , -- artificially engineered 1502 synthetic (5) , -- purely synthetic 1503 other (255) } DEFAULT unknown , 1504 org Org-ref , 1505 subtype SEQUENCE OF SubSource OPTIONAL , 1506 is-focus NULL OPTIONAL } -- to distinguish biological focus 1507 1508SubSource ::= SEQUENCE { 1509 subtype INTEGER { 1510 chromosome (1) , 1511 map (2) , 1512 clone (3) , 1513 subclone (4) , 1514 haplotype (5) , 1515 genotype (6) , 1516 sex (7) , 1517 cell-line (8) , 1518 cell-type (9) , 1519 tissue-type (10) , 1520 clone-lib (11) , 1521 dev-stage (12) , 1522 frequency (13) , 1523 germline (14) , 1524 rearranged (15) , 1525 lab-host (16) , 1526 pop-variant (17) , 1527 tissue-lib (18) , 1528 plasmid-name (19) , 1529 transposon-name (20) , 1530 insertion-seq-name (21) , 1531 plastid-name (22) , 1532 country (23) , 1533 segment (24) , 1534 endogenous-virus-name (25) , 1535 other (255) } , 1536 name VisibleString , 1537 attrib VisibleString OPTIONAL } -- attribution/source of this name 1538 1539END 1540 1541--********************************************************************** 1542-- 1543-- NCBI Protein 1544-- by James Ostell, 1990 1545-- version 0.8 1546-- 1547--********************************************************************** 1548 1549NCBI-Protein DEFINITIONS ::= 1550BEGIN 1551 1552EXPORTS Prot-ref; 1553 1554IMPORTS Dbtag FROM NCBI-General; 1555 1556--*** Prot-ref *********************************************** 1557--* 1558--* Reference to a protein name 1559--* 1560 1561Prot-ref ::= SEQUENCE { 1562 name SET OF VisibleString OPTIONAL , -- protein name 1563 desc VisibleString OPTIONAL , -- description (instead of name) 1564 ec SET OF VisibleString OPTIONAL , -- E.C. number(s) 1565 activity SET OF VisibleString OPTIONAL , -- activities 1566 db SET OF Dbtag OPTIONAL , -- ids in other dbases 1567 processed ENUMERATED { -- processing status 1568 not-set (0) , 1569 preprotein (1) , 1570 mature (2) , 1571 signal-peptide (3) , 1572 transit-peptide (4) } DEFAULT not-set } 1573 1574 1575 1576END 1577--******************************************************************** 1578-- 1579-- Transcription Initiation Site Feature Data Block 1580-- James Ostell, 1991 1581-- Philip Bucher, David Ghosh 1582-- version 1.1 1583-- 1584-- 1585-- 1586--******************************************************************** 1587 1588NCBI-TxInit DEFINITIONS ::= 1589BEGIN 1590 1591EXPORTS Txinit; 1592 1593IMPORTS Gene-ref FROM NCBI-Gene 1594 Prot-ref FROM NCBI-Protein 1595 Org-ref FROM NCBI-Organism; 1596 1597Txinit ::= SEQUENCE { 1598 name VisibleString , -- descriptive name of initiation site 1599 syn SEQUENCE OF VisibleString OPTIONAL , -- synonyms 1600 gene SEQUENCE OF Gene-ref OPTIONAL , -- gene(s) transcribed 1601 protein SEQUENCE OF Prot-ref OPTIONAL , -- protein(s) produced 1602 rna SEQUENCE OF VisibleString OPTIONAL , -- rna(s) produced 1603 expression VisibleString OPTIONAL , -- tissue/time of expression 1604 txsystem ENUMERATED { -- transcription apparatus used at this site 1605 unknown (0) , 1606 pol1 (1) , -- eukaryotic Pol I 1607 pol2 (2) , -- eukaryotic Pol II 1608 pol3 (3) , -- eukaryotic Pol III 1609 bacterial (4) , 1610 viral (5) , 1611 rna (6) , -- RNA replicase 1612 organelle (7) , 1613 other (255) } , 1614 txdescr VisibleString OPTIONAL , -- modifiers on txsystem 1615 txorg Org-ref OPTIONAL , -- organism supplying transcription apparatus 1616 mapping-precise BOOLEAN DEFAULT FALSE , -- mapping precise or approx 1617 location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping 1618 inittype ENUMERATED { 1619 unknown (0) , 1620 single (1) , 1621 multiple (2) , 1622 region (3) } OPTIONAL , 1623 evidence SET OF Tx-evidence OPTIONAL } 1624 1625Tx-evidence ::= SEQUENCE { 1626 exp-code ENUMERATED { 1627 unknown (0) , 1628 rna-seq (1) , -- direct RNA sequencing 1629 rna-size (2) , -- RNA length measurement 1630 np-map (3) , -- nuclease protection mapping with homologous sequence ladder 1631 np-size (4) , -- nuclease protected fragment length measurement 1632 pe-seq (5) , -- dideoxy RNA sequencing 1633 cDNA-seq (6) , -- full-length cDNA sequencing 1634 pe-map (7) , -- primer extension mapping with homologous sequence ladder 1635 pe-size (8) , -- primer extension product length measurement 1636 pseudo-seq (9) , -- full-length processed pseudogene sequencing 1637 rev-pe-map (10) , -- see NOTE (1) below 1638 other (255) } , 1639 expression-system ENUMERATED { 1640 unknown (0) , 1641 physiological (1) , 1642 in-vitro (2) , 1643 oocyte (3) , 1644 transfection (4) , 1645 transgenic (5) , 1646 other (255) } DEFAULT physiological , 1647 low-prec-data BOOLEAN DEFAULT FALSE , 1648 from-homolog BOOLEAN DEFAULT FALSE } -- experiment actually done on 1649 -- close homolog 1650 1651 -- NOTE (1) length measurement of a reverse direction primer-extension 1652 -- product (blocked by RNA 5'end) by comparison with 1653 -- homologous sequence ladder (J. Mol. Biol. 199, 587) 1654 1655 1656END 1657 1658--$Revision: 97143 $ 1659--********************************************************************** 1660-- 1661-- NCBI Sequence Analysis Results (other than alignments) 1662-- by James Ostell, 1990 1663-- 1664--********************************************************************** 1665 1666NCBI-Seqres DEFINITIONS ::= 1667BEGIN 1668 1669EXPORTS Seq-graph; 1670 1671IMPORTS Seq-loc FROM NCBI-Seqloc; 1672 1673--*** Sequence Graph ******************************** 1674--* 1675--* for values mapped by residue or range to sequence 1676--* 1677 1678Seq-graph ::= SEQUENCE { 1679 title VisibleString OPTIONAL , 1680 comment VisibleString OPTIONAL , 1681 loc Seq-loc , -- region this applies to 1682 title-x VisibleString OPTIONAL , -- title for x-axis 1683 title-y VisibleString OPTIONAL , 1684 comp INTEGER OPTIONAL , -- compression (residues/value) 1685 a REAL OPTIONAL , -- for scaling values 1686 b REAL OPTIONAL , -- display = (a x value) + b 1687 numval INTEGER , -- number of values in graph 1688 graph CHOICE { 1689 real Real-graph , 1690 int Int-graph , 1691 byte Byte-graph } } 1692 1693Real-graph ::= SEQUENCE { 1694 max REAL , -- top of graph 1695 min REAL , -- bottom of graph 1696 axis REAL , -- value to draw axis on 1697 values SEQUENCE OF REAL } 1698 1699Int-graph ::= SEQUENCE { 1700 max INTEGER , 1701 min INTEGER , 1702 axis INTEGER , 1703 values SEQUENCE OF INTEGER } 1704 1705Byte-graph ::= SEQUENCE { -- integer from 0-255 1706 max INTEGER , 1707 min INTEGER , 1708 axis INTEGER , 1709 values OCTET STRING } 1710 1711END 1712 1713--$Revision: 97143 $ 1714--********************************************************************** 1715-- 1716-- NCBI Sequence Collections 1717-- by James Ostell, 1990 1718-- 1719-- Version 3.0 - 1994 1720-- 1721--********************************************************************** 1722 1723NCBI-Seqset DEFINITIONS ::= 1724BEGIN 1725 1726EXPORTS Bioseq-set, Seq-entry; 1727 1728IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence 1729 Object-id, Dbtag, Date FROM NCBI-General; 1730 1731--*** Sequence Collections ******************************** 1732--* 1733 1734Bioseq-set ::= SEQUENCE { -- just a collection 1735 id Object-id OPTIONAL , 1736 coll Dbtag OPTIONAL , -- to identify a collection 1737 level INTEGER OPTIONAL , -- nesting level 1738 class ENUMERATED { 1739 not-set (0) , 1740 nuc-prot (1) , -- nuc acid and coded proteins 1741 segset (2) , -- segmented sequence + parts 1742 conset (3) , -- constructed sequence + parts 1743 parts (4) , -- parts for 2 or 3 1744 gibb (5) , -- geninfo backbone 1745 gi (6) , -- geninfo 1746 genbank (7) , -- converted genbank 1747 pir (8) , -- converted pir 1748 pub-set (9) , -- all the seqs from a single publication 1749 equiv (10) , -- a set of equivalent maps or seqs 1750 swissprot (11) , -- converted SWISSPROT 1751 pdb-entry (12) , -- a complete PDB entry 1752 mut-set (13) , -- set of mutations 1753 pop-set (14) , -- population study 1754 phy-set (15) , -- phylogenetic study 1755 eco-set (16) , -- ecological sample study 1756 gen-prod-set (17) , -- genomic products, chrom+mRNa+protein 1757 other (255) } DEFAULT not-set , 1758 release VisibleString OPTIONAL , 1759 date Date OPTIONAL , 1760 descr Seq-descr OPTIONAL , 1761 seq-set SEQUENCE OF Seq-entry , 1762 annot SET OF Seq-annot OPTIONAL } 1763 1764Seq-entry ::= CHOICE { 1765 seq Bioseq , 1766 set Bioseq-set } 1767 1768END 1769 1770--$Revision: 97143 $ 1771--********************************************************************** 1772-- 1773-- NCBI Sequence elements 1774-- by James Ostell, 1990 1775-- Version 3.0 - June 1994 1776-- 1777--********************************************************************** 1778 1779NCBI-Sequence DEFINITIONS ::= 1780BEGIN 1781 1782EXPORTS Bioseq, Seq-annot, Pubdesc, Seq-descr, Seqdesc, Numbering, Heterogen, 1783 Seq-hist, GIBB-mol; 1784 1785IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General 1786 Seq-align FROM NCBI-Seqalign 1787 Seq-feat FROM NCBI-Seqfeat 1788 Seq-graph FROM NCBI-Seqres 1789 Pub-equiv FROM NCBI-Pub 1790 Org-ref FROM NCBI-Organism 1791 BioSource FROM NCBI-BioSource 1792 Seq-id, Seq-loc FROM NCBI-Seqloc 1793 GB-block FROM GenBank-General 1794 PIR-block FROM PIR-General 1795 EMBL-block FROM EMBL-General 1796 SP-block FROM SP-General 1797 PRF-block FROM PRF-General 1798 PDB-block FROM PDB-General; 1799 1800--*** Sequence ******************************** 1801--* 1802 1803Bioseq ::= SEQUENCE { 1804 id SET OF Seq-id , -- equivalent identifiers 1805 descr Seq-descr OPTIONAL , -- descriptors 1806 inst Seq-inst , -- the sequence data 1807 annot SET OF Seq-annot OPTIONAL } 1808 1809--*** Descriptors ***************************** 1810--* 1811 1812Seq-descr ::= SET OF Seqdesc 1813 1814Seqdesc ::= CHOICE { 1815 mol-type GIBB-mol , -- type of molecule 1816 modif SET OF GIBB-mod , -- modifiers 1817 method GIBB-method , -- sequencing method 1818 name VisibleString , -- a name for this sequence 1819 title VisibleString , -- a title for this sequence 1820 org Org-ref , -- if all from one organism 1821 comment VisibleString , -- a more extensive comment 1822 num Numbering , -- a numbering system 1823 maploc Dbtag , -- map location of this sequence 1824 pir PIR-block , -- PIR specific info 1825 genbank GB-block , -- GenBank specific info 1826 pub Pubdesc , -- a reference to the publication 1827 region VisibleString , -- overall region (globin locus) 1828 user User-object , -- user defined object 1829 sp SP-block , -- SWISSPROT specific info 1830 dbxref Dbtag , -- xref to other databases 1831 embl EMBL-block , -- EMBL specific information 1832 create-date Date , -- date entry first created/released 1833 update-date Date , -- date of last update 1834 prf PRF-block , -- PRF specific information 1835 pdb PDB-block , -- PDB specific information 1836 het Heterogen , -- cofactor, etc associated but not bound 1837 source BioSource , -- source of materials, includes Org-ref 1838 molinfo MolInfo } -- info on the molecule and techniques 1839 1840--******* NOTE: 1841--* mol-type, modif, method, and org are consolidated and expanded 1842--* in Org-ref, BioSource, and MolInfo in this specification. They 1843--* will be removed in later specifications. Do not use them in the 1844--* the future. Instead expect the new structures. 1845--* 1846--*************************** 1847 1848--******************************************************************** 1849-- 1850-- MolInfo gives information on the 1851-- classification of the type and quality of the sequence 1852-- 1853-- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method 1854-- 1855--******************************************************************** 1856 1857MolInfo ::= SEQUENCE { 1858 biomol INTEGER { 1859 unknown (0) , 1860 genomic (1) , 1861 pre-RNA (2) , -- precursor RNA of any sort really 1862 mRNA (3) , 1863 rRNA (4) , 1864 tRNA (5) , 1865 snRNA (6) , 1866 scRNA (7) , 1867 peptide (8) , 1868 other-genetic (9) , -- other genetic material 1869 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence 1870 cRNA (11) , -- viral RNA genome copy intermediate 1871 other (255) } DEFAULT unknown , 1872 tech INTEGER { 1873 unknown (0) , 1874 standard (1) , -- standard sequencing 1875 est (2) , -- Expressed Sequence Tag 1876 sts (3) , -- Sequence Tagged Site 1877 survey (4) , -- one-pass genomic sequence 1878 genemap (5) , -- from genetic mapping techniques 1879 physmap (6) , -- from physical mapping techniques 1880 derived (7) , -- derived from other data, not a primary entity 1881 concept-trans (8) , -- conceptual translation 1882 seq-pept (9) , -- peptide was sequenced 1883 both (10) , -- concept transl. w/ partial pept. seq. 1884 seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap 1885 seq-pept-homol (12) , -- sequenced peptide, ordered by homology 1886 concept-trans-a (13) , -- conceptual transl. supplied by author 1887 htgs-1 (14) , -- unordered High Throughput sequence contig 1888 htgs-2 (15) , -- ordered High Throughput sequence contig 1889 htgs-3 (16) , -- finished High Throughput sequence 1890 fli-cdna (17) , -- full length insert cDNA 1891 htgs-0 (18) , -- single genomic reads for coordination 1892 htc (19) , -- high throughput cDNA 1893 other (255) } -- use Source.techexp 1894 DEFAULT unknown , 1895 techexp VisibleString OPTIONAL , -- explanation if tech not enough 1896 completeness INTEGER { 1897 unknown (0) , 1898 complete (1) , -- complete biological entity 1899 partial (2) , -- partial but no details given 1900 no-left (3), -- missing 5' or NH3 end 1901 no-right (4) , -- missing 3' or COOH end 1902 no-ends (5) , -- missing both ends 1903 other (255) } DEFAULT unknown } 1904 1905 1906GIBB-mol ::= ENUMERATED { -- type of molecule represented 1907 unknown (0) , 1908 genomic (1) , 1909 pre-mRNA (2) , -- precursor RNA of any sort really 1910 mRNA (3) , 1911 rRNA (4) , 1912 tRNA (5) , 1913 snRNA (6) , 1914 scRNA (7) , 1915 peptide (8) , 1916 other-genetic (9) , -- other genetic material 1917 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence 1918 other (255) } 1919 1920GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers 1921 dna (0) , 1922 rna (1) , 1923 extrachrom (2) , 1924 plasmid (3) , 1925 mitochondrial (4) , 1926 chloroplast (5) , 1927 kinetoplast (6) , 1928 cyanelle (7) , 1929 synthetic (8) , 1930 recombinant (9) , 1931 partial (10) , 1932 complete (11) , 1933 mutagen (12) , -- subject of mutagenesis ? 1934 natmut (13) , -- natural mutant ? 1935 transposon (14) , 1936 insertion-seq (15) , 1937 no-left (16) , -- missing left end (5' for na, NH2 for aa) 1938 no-right (17) , -- missing right end (3' or COOH) 1939 macronuclear (18) , 1940 proviral (19) , 1941 est (20) , -- expressed sequence tag 1942 sts (21) , -- sequence tagged site 1943 survey (22) , -- one pass survey sequence 1944 chromoplast (23) , 1945 genemap (24) , -- is a genetic map 1946 restmap (25) , -- is an ordered restriction map 1947 physmap (26) , -- is a physical map (not ordered restriction map) 1948 other (255) } 1949 1950GIBB-method ::= ENUMERATED { -- sequencing methods 1951 concept-trans (1) , -- conceptual translation 1952 seq-pept (2) , -- peptide was sequenced 1953 both (3) , -- concept transl. w/ partial pept. seq. 1954 seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap 1955 seq-pept-homol (5) , -- sequenced peptide, ordered by homology 1956 concept-trans-a (6) , -- conceptual transl. supplied by author 1957 other (255) } 1958 1959Numbering ::= CHOICE { -- any display numbering system 1960 cont Num-cont , -- continuous numbering 1961 enum Num-enum , -- enumerated names for residues 1962 ref Num-ref , -- by reference to another sequence 1963 real Num-real } -- supports mapping to a float system 1964 1965Num-cont ::= SEQUENCE { -- continuous display numbering system 1966 refnum INTEGER DEFAULT 1, -- number assigned to first residue 1967 has-zero BOOLEAN DEFAULT FALSE , -- 0 used? 1968 ascending BOOLEAN DEFAULT TRUE } -- ascending numbers? 1969 1970Num-enum ::= SEQUENCE { -- any tags to residues 1971 num INTEGER , -- number of tags to follow 1972 names SEQUENCE OF VisibleString } -- the tags 1973 1974Num-ref ::= SEQUENCE { -- by reference to other sequences 1975 type ENUMERATED { -- type of reference 1976 not-set (0) , 1977 sources (1) , -- by segmented or const seq sources 1978 aligns (2) } , -- by alignments given below 1979 aligns Seq-align OPTIONAL } 1980 1981Num-real ::= SEQUENCE { -- mapping to floating point system 1982 a REAL , -- from an integer system used by Bioseq 1983 b REAL , -- position = (a * int_position) + b 1984 units VisibleString OPTIONAL } 1985 1986Pubdesc ::= SEQUENCE { -- how sequence presented in pub 1987 pub Pub-equiv , -- the citation(s) 1988 name VisibleString OPTIONAL , -- name used in paper 1989 fig VisibleString OPTIONAL , -- figure in paper 1990 num Numbering OPTIONAL , -- numbering from paper 1991 numexc BOOLEAN OPTIONAL , -- numbering problem with paper 1992 poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure? 1993 maploc VisibleString OPTIONAL , -- map location reported in paper 1994 seq-raw StringStore OPTIONAL , -- original sequence from paper 1995 align-group INTEGER OPTIONAL , -- this seq aligned with others in paper 1996 comment VisibleString OPTIONAL, -- any comment on this pub in context 1997 reftype INTEGER { -- type of reference in a GenBank record 1998 seq (0) , -- refers to sequence 1999 sites (1) , -- refers to unspecified features 2000 feats (2) , -- refers to specified features 2001 no-target (3) } -- nothing specified (EMBL) 2002 DEFAULT seq } 2003 2004Heterogen ::= VisibleString -- cofactor, prosthetic group, inibitor, etc 2005 2006--*** Instances of sequences ******************************* 2007--* 2008 2009Seq-inst ::= SEQUENCE { -- the sequence data itself 2010 repr ENUMERATED { -- representation class 2011 not-set (0) , -- empty 2012 virtual (1) , -- no seq data 2013 raw (2) , -- continuous sequence 2014 seg (3) , -- segmented sequence 2015 const (4) , -- constructed sequence 2016 ref (5) , -- reference to another sequence 2017 consen (6) , -- consensus sequence or pattern 2018 map (7) , -- ordered map of any kind 2019 delta (8) , -- sequence made by changes (delta) to others 2020 other (255) } , 2021 mol ENUMERATED { -- molecule class in living organism 2022 not-set (0) , -- > cdna = rna 2023 dna (1) , 2024 rna (2) , 2025 aa (3) , 2026 na (4) , -- just a nucleic acid 2027 other (255) } , 2028 length INTEGER OPTIONAL , -- length of sequence in residues 2029 fuzz Int-fuzz OPTIONAL , -- length uncertainty 2030 topology ENUMERATED { -- topology of molecule 2031 not-set (0) , 2032 linear (1) , 2033 circular (2) , 2034 tandem (3) , -- some part of tandem repeat 2035 other (255) } DEFAULT linear , 2036 strand ENUMERATED { -- strandedness in living organism 2037 not-set (0) , 2038 ss (1) , -- single strand 2039 ds (2) , -- double strand 2040 mixed (3) , 2041 other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept 2042 seq-data Seq-data OPTIONAL , -- the sequence 2043 ext Seq-ext OPTIONAL , -- extensions for special types 2044 hist Seq-hist OPTIONAL } -- sequence history 2045 2046--*** Sequence Extensions ********************************** 2047--* for representing more complex types 2048--* const type uses Seq-hist.assembly 2049 2050Seq-ext ::= CHOICE { 2051 seg Seg-ext , -- segmented sequences 2052 ref Ref-ext , -- hot link to another sequence (a view) 2053 map Map-ext , -- ordered map of markers 2054 delta Delta-ext } 2055 2056Seg-ext ::= SEQUENCE OF Seq-loc 2057 2058Ref-ext ::= Seq-loc 2059 2060Map-ext ::= SEQUENCE OF Seq-feat 2061 2062Delta-ext ::= SEQUENCE OF Delta-seq 2063 2064Delta-seq ::= CHOICE { 2065 loc Seq-loc , -- point to a sequence 2066 literal Seq-literal } -- a piece of sequence 2067 2068Seq-literal ::= SEQUENCE { 2069 length INTEGER , -- must give a length in residues 2070 fuzz Int-fuzz OPTIONAL , -- could be unsure 2071 seq-data Seq-data OPTIONAL } -- may have the data 2072 2073--*** Sequence History Record *********************************** 2074--** assembly = records how seq was assembled from others 2075--** replaces = records sequences made obsolete by this one 2076--** replaced-by = this seq is made obsolete by another(s) 2077 2078Seq-hist ::= SEQUENCE { 2079 assembly SET OF Seq-align OPTIONAL ,-- how was this assembled? 2080 replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete 2081 replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete 2082 deleted CHOICE { 2083 bool BOOLEAN , 2084 date Date } OPTIONAL } 2085 2086Seq-hist-rec ::= SEQUENCE { 2087 date Date OPTIONAL , 2088 ids SET OF Seq-id } 2089 2090--*** Various internal sequence representations ************ 2091--* all are controlled, fixed length forms 2092 2093Seq-data ::= CHOICE { -- sequence representations 2094 iupacna IUPACna , -- IUPAC 1 letter nuc acid code 2095 iupacaa IUPACaa , -- IUPAC 1 letter amino acid code 2096 ncbi2na NCBI2na , -- 2 bit nucleic acid code 2097 ncbi4na NCBI4na , -- 4 bit nucleic acid code 2098 ncbi8na NCBI8na , -- 8 bit extended nucleic acid code 2099 ncbipna NCBIpna , -- nucleic acid probabilities 2100 ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes 2101 ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes 2102 ncbipaa NCBIpaa , -- amino acid probabilities 2103 ncbistdaa NCBIstdaa } -- consecutive codes for std aas 2104 2105 2106IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces 2107IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces 2108NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T 2109NCBI4na ::= OCTET STRING -- 1 bit each for agct 2110 -- 0001=A, 0010=C, 0100=G, 1000=T/U 2111 -- 0101=Purine, 1010=Pyrimidine, etc 2112NCBI8na ::= OCTET STRING -- for modified nucleic acids 2113NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n 2114 -- probabilities are coded 0-255 = 0.0-1.0 2115NCBI8aa ::= OCTET STRING -- for modified amino acids 2116NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes 2117 -- IUPAC codes + U=selenocysteine 2118NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order: 2119 -- A-Y,B,Z,X,(ter),anything 2120 -- probabilities are coded 0-255 = 0.0-1.0 2121NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte 2122 2123--*** Sequence Annotation ************************************* 2124--* 2125 2126Annot-id ::= CHOICE { 2127 local Object-id , 2128 ncbi INTEGER , 2129 general Dbtag } 2130 2131Annot-descr ::= SET OF Annotdesc 2132 2133Annotdesc ::= CHOICE { 2134 name VisibleString , -- a short name for this collection 2135 title VisibleString , -- a title for this collection 2136 comment VisibleString , -- a more extensive comment 2137 pub Pubdesc , -- a reference to the publication 2138 user User-object , -- user defined object 2139 create-date Date , -- date entry first created/released 2140 update-date Date , -- date of last update 2141 src Seq-id , -- source sequence from which annot came 2142 align Align-def, -- definition of the SeqAligns 2143 region Seq-loc } -- all contents cover this region 2144 2145Align-def ::= SEQUENCE { 2146 align-type INTEGER { -- class of align Seq-annot 2147 ref (1) , -- set of alignments to the same sequence 2148 alt (2) , -- set of alternate alignments of the same seqs 2149 blocks (3) , -- set of aligned blocks in the same seqs 2150 other (255) } , 2151 ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now 2152 2153Seq-annot ::= SEQUENCE { 2154 id SET OF Annot-id OPTIONAL , 2155 db INTEGER { -- source of annotation 2156 genbank (1) , 2157 embl (2) , 2158 ddbj (3) , 2159 pir (4) , 2160 sp (5) , 2161 bbone (6) , 2162 pdb (7) , 2163 other (255) } OPTIONAL , 2164 name VisibleString OPTIONAL ,-- source if "other" above 2165 desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots 2166 data CHOICE { 2167 ftable SET OF Seq-feat , 2168 align SET OF Seq-align , 2169 graph SET OF Seq-graph , 2170 ids SET OF Seq-id , -- used for communication between tools 2171 locs SET OF Seq-loc } } -- used for communication between tools 2172 2173END 2174 2175 2176--$Revision: 97143 $ 2177--******************************************************************** 2178-- 2179-- Direct Submission of Sequence Data 2180-- James Ostell, 1991 2181-- 2182-- This is a trial specification for direct submission of sequence 2183-- data worked out between NCBI and EMBL 2184-- Later revised to reflect work with GenBank and Integrated database 2185-- 2186-- Version 3.0, 1994 2187-- This is the official NCBI sequence submission format now. 2188-- 2189--******************************************************************** 2190 2191NCBI-Submit DEFINITIONS ::= 2192BEGIN 2193 2194EXPORTS Seq-submit, Contact-info; 2195 2196IMPORTS Cit-sub, Author FROM NCBI-Biblio 2197 Date, Object-id FROM NCBI-General 2198 Seq-annot FROM NCBI-Sequence 2199 Seq-id FROM NCBI-Seqloc 2200 Seq-entry FROM NCBI-Seqset; 2201 2202Seq-submit ::= SEQUENCE { 2203 sub Submit-block , 2204 data CHOICE { 2205 entrys SET OF Seq-entry , -- sequence(s) 2206 annots SET OF Seq-annot , -- annotation(s) 2207 delete SET OF Seq-id } } -- deletions of entries 2208 2209Submit-block ::= SEQUENCE { 2210 contact Contact-info , -- who to contact 2211 cit Cit-sub , -- citation for this submission 2212 hup BOOLEAN DEFAULT FALSE , -- hold until publish 2213 reldate Date OPTIONAL , -- release by date 2214 subtype INTEGER { -- type of submission 2215 new (1) , -- new data 2216 update (2) , -- update by author 2217 revision (3) , -- 3rd party (non-author) update 2218 other (255) } OPTIONAL , 2219 tool VisibleString OPTIONAL, -- tool used to make submission 2220 user-tag VisibleString OPTIONAL, -- user supplied id for this submission 2221 comment VisibleString OPTIONAL } -- user comments/advice to database 2222 2223Contact-info ::= SEQUENCE { -- who to contact to discuss the submission 2224 name VisibleString OPTIONAL , -- OBSOLETE: will be removed 2225 address SEQUENCE OF VisibleString OPTIONAL , 2226 phone VisibleString OPTIONAL , 2227 fax VisibleString OPTIONAL , 2228 email VisibleString OPTIONAL , 2229 telex VisibleString OPTIONAL , 2230 owner-id Object-id OPTIONAL , -- for owner accounts 2231 password OCTET STRING OPTIONAL , 2232 last-name VisibleString OPTIONAL , -- structured to replace name above 2233 first-name VisibleString OPTIONAL , 2234 middle-initial VisibleString OPTIONAL , 2235 contact Author OPTIONAL } -- WARNING: this will replace the above 2236 2237END 2238 2239--$Revision: 97143 $ 2240--**************************************************************** 2241-- 2242-- NCBI Project Definition Module 2243-- by Jim Ostell and Jonathan Kans, 1998 2244-- 2245--**************************************************************** 2246 2247NCBI-Project DEFINITIONS ::= 2248BEGIN 2249 2250EXPORTS Project, Project-item; 2251 2252IMPORTS Date FROM NCBI-General 2253 PubMedId FROM NCBI-Biblio 2254 Seq-id, Seq-loc FROM NCBI-Seqloc 2255 Seq-annot, Pubdesc FROM NCBI-Sequence 2256 Seq-entry FROM NCBI-Seqset 2257 Pubmed-entry FROM NCBI-PubMed; 2258 2259Project ::= SEQUENCE { 2260 descr Project-descr OPTIONAL , 2261 data Project-item } 2262 2263Project-item ::= CHOICE { 2264 pmuid SET OF INTEGER , 2265 protuid SET OF INTEGER , 2266 nucuid SET OF INTEGER , 2267 sequid SET OF INTEGER , 2268 genomeuid SET OF INTEGER , 2269 structuid SET OF INTEGER , 2270 pmid SET OF PubMedId , 2271 protid SET OF Seq-id , 2272 nucid SET OF Seq-id , 2273 seqid SET OF Seq-id , 2274 genomeid SET OF Seq-id , 2275 structid NULL , 2276 pment SET OF Pubmed-entry , 2277 protent SET OF Seq-entry , 2278 nucent SET OF Seq-entry , 2279 seqent SET OF Seq-entry , 2280 genomeent SET OF Seq-entry , 2281 structent NULL , 2282 seqannot SET OF Seq-annot , 2283 loc SET OF Seq-loc , 2284 proj SET OF Project 2285} 2286 2287Project-descr ::= SEQUENCE { 2288 id SET OF Project-id , 2289 name VisibleString OPTIONAL , 2290 descr SET OF Projdesc OPTIONAL } 2291 2292Projdesc ::= CHOICE { 2293 pub Pubdesc , 2294 date Date , 2295 comment VisibleString , 2296 title VisibleString 2297} 2298 2299Project-id ::= VisibleString 2300 2301END 2302 2303 2304--$Revision: 97143 $ 2305--********************************************************************** 2306-- 2307-- Biological Macromolecule 3-D Structure Data Types for MMDB, 2308-- A Molecular Modeling Database 2309-- 2310-- Definitions for a biomolecular assembly and the MMDB database 2311-- 2312-- By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant 2313-- 2314-- National Center for Biotechnology Information 2315-- National Institutes of Health 2316-- Bethesda, MD 20894 USA 2317-- 2318-- July 1995 2319-- 2320--********************************************************************** 2321 2322-- Contents of the MMDB database are currently based on files distributed by 2323-- the Protein Data Bank, PDB. These data are changed in form, as described 2324-- in this specification. To some extent they are also changed in content, in 2325-- that many data items implicit in PDB are made explicit, and others are 2326-- corrected or omitted as a consequence of validation checks. The semantics 2327-- of MMDB data items are indicated by comments within the specification below. 2328-- These comments explain in detail the manner in which data items from PDB 2329-- have been mapped into MMDB. 2330 2331MMDB DEFINITIONS ::= 2332 2333BEGIN 2334 2335EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set, 2336 Biostruc-residue-graph-set; 2337 2338IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph 2339 Biostruc-model FROM MMDB-Structural-model 2340 Biostruc-feature-set FROM MMDB-Features 2341 Pub FROM NCBI-Pub 2342 Date, Object-id, Dbtag FROM NCBI-General; 2343 2344-- A structure report or "biostruc" describes the components of a biomolecular 2345-- assembly in terms of their names and descriptions, and a chemical graph 2346-- giving atomic formula, connectivity and chirality. It also gives one or more 2347-- three-dimensional model structures, literally a mapping of the atoms, 2348-- residues and/or molecules of each component into a measured three- 2349-- dimensional space. Structure may also be described by named features, which 2350-- associate nodes in the chemical graph, or regions in space, with text or 2351-- numeric descriptors. 2352 2353-- Note that a biostruc may also contain cross references to other databases, 2354-- including citations to relevant scientific literature. These cross 2355-- references use object types from other NCBI data specifications, which are 2356-- "imported" into MMDB, and not repeated in this specification. 2357 2358Biostruc ::= SEQUENCE { 2359 id SEQUENCE OF Biostruc-id, 2360 descr SEQUENCE OF Biostruc-descr OPTIONAL, 2361 chemical-graph Biostruc-graph, 2362 features SEQUENCE OF Biostruc-feature-set OPTIONAL, 2363 model SEQUENCE OF Biostruc-model OPTIONAL } 2364 2365-- A Biostruc-id is a collection identifiers for the molecular assembly. 2366-- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable 2367-- identifiers. Other-id's are synonyms. 2368 2369Biostruc-id ::= CHOICE { 2370 mmdb-id Mmdb-id, 2371 other-database Dbtag, 2372 local-id Object-id } 2373 2374Mmdb-id ::= INTEGER 2375 2376 2377-- The description of a biostruc refers to both the reported chemical and 2378-- spatial structure of a biomolecular assembly. PDB-derived descriptors 2379-- which refer specifically to the chemical components or spatial structure 2380-- are not provided here, but instead as descriptors of the biostruc-graph or 2381-- biostruc-model. For PDB-derived structures the biostruc name is the PDB 2382-- id-code. PDB-derived citations appear as publications within the biostruc 2383-- description, and include a data-submission citation derived from PDB AUTHOR 2384-- records. Citations are described using the NCBI Pub specification. 2385 2386Biostruc-descr ::= CHOICE { 2387 name VisibleString, 2388 pdb-comment VisibleString, 2389 other-comment VisibleString, 2390 history Biostruc-history, 2391 attribution Pub } 2392 2393 2394-- The history of a biostruc indicates it's origin and it's update history 2395-- within MMDB, the NCBI-maintained molecular structure database. 2396 2397Biostruc-history ::= SEQUENCE { 2398 replaces Biostruc-replace OPTIONAL, 2399 replaced-by Biostruc-replace OPTIONAL, 2400 data-source Biostruc-source OPTIONAL } 2401 2402Biostruc-replace ::= SEQUENCE { 2403 id Biostruc-id, 2404 date Date } 2405 2406-- The origin of a biostruc is a reference to another database. PDB release 2407-- date and PDB-assigned id codes are recorded here, as are the PDB-assigned 2408-- entry date and replacement history. 2409 2410Biostruc-source ::= SEQUENCE { 2411 name-of-database VisibleString, 2412 version-of-database CHOICE { 2413 release-date Date, 2414 release-code VisibleString } OPTIONAL, 2415 database-entry-id Biostruc-id, 2416 database-entry-date Date, 2417 database-entry-history SEQUENCE OF VisibleString OPTIONAL} 2418 2419 2420-- A biostruc set is a means to collect ASN.1 data for many biostrucs in 2421-- one file, as convenient for application programs. The object type is not 2422-- inteded to imply similarity of the biostrucs grouped together. 2423 2424Biostruc-set ::= SEQUENCE { 2425 id SEQUENCE OF Biostruc-id OPTIONAL, 2426 descr SEQUENCE OF Biostruc-descr OPTIONAL, 2427 biostrucs SEQUENCE OF Biostruc } 2428 2429 2430-- A biostruc annotation set is a means to collect ASN.1 data for biostruc 2431-- features into one file. The object type is intended as a means to store 2432-- feature annotation of similar type, such as "core" definitions for a 2433-- threading program, or structure-structure alignments for a structure- 2434-- similarity browser. 2435 2436Biostruc-annot-set ::= SEQUENCE { 2437 id SEQUENCE OF Biostruc-id OPTIONAL, 2438 descr SEQUENCE OF Biostruc-descr OPTIONAL, 2439 features SEQUENCE OF Biostruc-feature-set } 2440 2441 2442-- A biostruc residue graph set is a collection of residue graphs. The object 2443-- type is intended as a means to record dictionaries containing the chemical 2444-- subgraphs of "standard" residue types, which are used as a means to 2445-- simplify discription of the covalent structure of a biomolecular assembly. 2446-- The standard residue graph dictionary supplied with the MMDB database 2447-- contains 20 standard L amino acids and 8 standard ribonucleotide groups. 2448-- These graphs are complete, including explicit hydrogen atoms and separate 2449-- instances for the terminal polypeptide and polynucleotide residues. 2450 2451Biostruc-residue-graph-set ::= SEQUENCE { 2452 id SEQUENCE OF Biostruc-id OPTIONAL, 2453 descr SEQUENCE OF Biomol-descr OPTIONAL, 2454 residue-graphs SEQUENCE OF Residue-graph } 2455 2456END 2457 2458 2459 2460--********************************************************************** 2461-- 2462-- Biological Macromolecule 3-D Structure Data Types for MMDB, 2463-- A Molecular Modeling Database 2464-- 2465-- Definitions for a chemical graph 2466-- 2467-- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 2468-- 2469-- National Center for Biotechnology Information 2470-- National Institutes of Health 2471-- Bethesda, MD 20894 USA 2472-- 2473-- July, 1995 2474-- 2475--********************************************************************** 2476 2477MMDB-Chemical-graph DEFINITIONS ::= 2478 2479BEGIN 2480 2481EXPORTS Biostruc-graph, Biomol-descr, Residue-graph, 2482 Molecule-id, Residue-id, Atom-id; 2483 2484IMPORTS Pub FROM NCBI-Pub 2485 BioSource FROM NCBI-BioSource 2486 Seq-id FROM NCBI-Seqloc 2487 Biostruc-id FROM MMDB; 2488 2489-- A biostruc graph contains the complete chemical graph of the biomolecular 2490-- assembly. The assembly graph is defined hierarchically, in terms of 2491-- subgraphs graphs of component molecules. For PDB-derived biostrucs, 2492-- the molecules forming the assembly are the individual biopolymer chains and 2493-- any non-polymer or "heterogen" groups which are present. 2494 2495-- The PDB-derived "compound name" field appears as the name within the 2496-- biostruc-graph description. PDB "class" and "source" fields appear as 2497-- explicit attributes. PDB-derived structures are assigned an assembly type 2498-- of "other" unless they have been further classified as the "physiological 2499-- form" or "crystallographic cell" contents. If they have, the source of the 2500-- type classification appears as a citation within the assembly description. 2501 2502-- Note that the biostruc-graph also includes as literals the subgraphs of 2503-- any nonstandard residues present within it. For PDB-derived biostrucs these 2504-- subgraphs are constructed automatically, with validation as described below. 2505 2506Biostruc-graph ::= SEQUENCE { 2507 descr SEQUENCE OF Biomol-descr OPTIONAL, 2508 molecule-graphs SEQUENCE OF Molecule-graph, 2509 inter-molecule-bonds SEQUENCE OF Inter-residue-bond OPTIONAL, 2510 residue-graphs SEQUENCE OF Residue-graph OPTIONAL } 2511 2512-- A biomolecule description refers to the chemical structure of a molecule or 2513-- component substructures. This descriptor type is used at the level of 2514-- assemblies, molecules and residues, and also for residue-graph dictionaries. 2515-- The BioSource object type is drawn from NCBI taxonomy data specifications, 2516-- and is not repeated here. 2517 2518Biomol-descr ::= CHOICE { 2519 name VisibleString, 2520 pdb-class VisibleString, 2521 pdb-source VisibleString, 2522 pdb-comment VisibleString, 2523 other-comment VisibleString, 2524 organism BioSource, 2525 attribution Pub, 2526 assembly-type INTEGER { physiological-form(1), 2527 crystallographic-cell(2), 2528 other(255) }, 2529 molecule-type INTEGER { dna(1), 2530 rna(2), 2531 protein(3), 2532 other-biopolymer(4), 2533 solvent(5), 2534 other-nonpolymer(6), 2535 other(255) } } 2536 2537-- A molecule chemical graph is defined by a sequence of residues. Nonpolymers 2538-- are described in the same way, but may contain only a single residue. 2539 2540-- Biopolymer molecules are identified within PDB entries according to their 2541-- appearance on SEQRES records, which formally define a biopolymer as such. 2542-- Biopolymers are defined by the distinction between ATOM and HETATM 2543-- coordinate records only in cases where the chemical sequence from SEQRES 2544-- is in conflict with coordinate data. The PDB-assigned chain code appears as 2545-- the name within the molecule descriptions of the biopolymers. 2546 2547-- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups, 2548-- excluding any HETEROGEN groups which represent modified biopolymer residues. 2549-- These molecules are named according to the chain, residue type and residue 2550-- number fields as assigned by PDB. Any description appearing in the PDB HET 2551-- record appears as a pdb-comment within the molecule description. 2552 2553-- Molecule types for PDB-derived molecule graphs are assigned by matching 2554-- residue and atom names against the PDB-documented standard types for protein, 2555-- DNA and RNA, and against residue codes commonly used to indicate solvent. 2556-- Classification is by "majority rule". If more than half of the residues in 2557-- a biopolymer are standard groups of one type, then the molecule is of that 2558-- type, and otherwise classified as "other". Note that this classification does 2559-- not preclude the presence of modified residues, but insists they constitute 2560-- less than half the biopolymer. Non-polymers are classified only as "solvent" 2561-- or "other". 2562 2563-- Note that a molecule graph may also contain a set of cross references 2564-- to biopolymer sequence databases. All biopolymer molecules in MMDB contain 2565-- appropriate identifiers for the corresponding entry in the NCBI-Sequences 2566-- database, in particular the NCBI "gi" number, which may be used for sequence 2567-- retrieval. The Seq-id object type is defined in the NCBI molecular sequence 2568-- specification, and not repeated here. 2569 2570Molecule-graph ::= SEQUENCE { 2571 id Molecule-id, 2572 descr SEQUENCE OF Biomol-descr OPTIONAL, 2573 seq-id Seq-id OPTIONAL, 2574 residue-sequence SEQUENCE OF Residue, 2575 inter-residue-bonds SEQUENCE OF Inter-residue-bond OPTIONAL } 2576 2577Molecule-id ::= INTEGER 2578 2579-- Residues may be assigned a text-string name as well as an id number. PDB 2580-- assigned residue numbers appear as the residue name. 2581 2582Residue ::= SEQUENCE { 2583 id Residue-id, 2584 name VisibleString OPTIONAL, 2585 residue-graph Residue-graph-pntr } 2586 2587Residue-id ::= INTEGER 2588 2589 2590-- Residue graphs from different sources may be referenced within a molecule 2591-- graph. The allowed choices are the nonstandard residue graphs included in 2592-- the present biostruc, residue graphs within other biostrucs, or residue 2593-- graphs within tables of standard residue definitions. 2594 2595Residue-graph-pntr ::= CHOICE { 2596 local Residue-graph-id, 2597 biostruc Biostruc-graph-pntr, 2598 standard Biostruc-residue-graph-set-pntr } 2599 2600Biostruc-graph-pntr ::= SEQUENCE { 2601 biostruc-id Biostruc-id, 2602 residue-graph-id Residue-graph-id } 2603 2604Biostruc-residue-graph-set-pntr ::= SEQUENCE { 2605 biostruc-residue-graph-set-id Biostruc-id, 2606 residue-graph-id Residue-graph-id } 2607 2608 2609-- Residue graphs define atomic formulae, connectivity, chirality, and names. 2610-- For standard residue graphs from the MMDB dictionary the PDB-assigned 2611-- residue-type code appears as the name within the residue graph description, 2612-- and the full trivial name of the residue as a comment within that 2613-- description. For any nonstandard residue graphs provided with an MMDB 2614-- biostruc the PDB-assigned residue-type code similarly appears as the name 2615-- within the description, and any information provided on PDB HET records as 2616-- a pdb-comment within that description. 2617 2618-- Note that nonstandard residue graphs for a PDB-derived biostruc may be 2619-- incomplete. Current PDB format cannot represent connectivity for groups 2620-- which are disordered, and for which no coordinates are given. In these 2621-- cases the residue graph defined in MMDB represents only the subgraph that 2622-- could be identified from available ATOM, HETATM and CONECT records. 2623 2624Residue-graph ::= SEQUENCE { 2625 id Residue-graph-id, 2626 descr SEQUENCE OF Biomol-descr OPTIONAL, 2627 residue-type INTEGER { deoxyribonucleotide(1), 2628 ribonucleotide(2), 2629 amino-acid(3), 2630 other(255) } OPTIONAL, 2631 iupac-code SEQUENCE OF VisibleString OPTIONAL, 2632 atoms SEQUENCE OF Atom, 2633 bonds SEQUENCE OF Intra-residue-bond, 2634 chiral-centers SEQUENCE OF Chiral-center OPTIONAL } 2635 2636Residue-graph-id ::= INTEGER 2637 2638-- Atoms in residue graphs are defined by elemental symbols and names. PDB- 2639-- assigned atom names appear here in the name field, except in cases of known 2640-- PDB synonyms. In these cases atom names are mapped to the names used in the 2641-- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where 2642-- PDB practice allows synonyms for several atom types. For PDB atoms the 2643-- elemental symbol is obtained by parsing the PDB atom name field, allowing 2644-- for known special-semantics cases where the atom name does not follow the 2645-- documented encoding rule. Ionizable protons are identified within standard 2646-- residue graphs in the MMDB dictionary, but not within automatically-defined 2647-- nonstandard graphs. 2648 2649Atom ::= SEQUENCE { 2650 id Atom-id, 2651 name VisibleString OPTIONAL, 2652 iupac-code SEQUENCE OF VisibleString OPTIONAL, 2653 element ENUMERATED { 2654 h(1), he(2), li(3), be(4), b(5), 2655 c(6), n(7), o(8), f(9), ne(10), 2656 na(11), mg(12), al(13), si(14), p(15), 2657 s(16), cl(17), ar(18), k(19), ca(20), 2658 sc(21), ti(22), v(23), cr(24), mn(25), 2659 fe(26), co(27), ni(28), cu(29), zn(30), 2660 ga(31), ge(32), as(33), se(34), br(35), 2661 kr(36), rb(37), sr(38), y(39), zr(40), 2662 nb(41), mo(42), tc(43), ru(44), rh(45), 2663 pd(46), ag(47), cd(48), in(49), sn(50), 2664 sb(51), te(52), i(53), xe(54), cs(55), 2665 ba(56), la(57), ce(58), pr(59), nd(60), 2666 pm(61), sm(62), eu(63), gd(64), tb(65), 2667 dy(66), ho(67), er(68), tm(69), yb(70), 2668 lu(71), hf(72), ta(73), w(74), re(75), 2669 os(76), ir(77), pt(78), au(79), hg(80), 2670 tl(81), pb(82), bi(83), po(84), at(85), 2671 rn(86), fr(87), ra(88), ac(89), th(90), 2672 pa(91), u(92), np(93), pu(94), am(95), 2673 cm(96), bk(97), cf(98), es(99), 2674 fm(100), md(101), no(102), lr(103), 2675 other(254), unknown(255) }, 2676 ionizable-proton ENUMERATED { 2677 true(1), 2678 false(2), 2679 unknown(255) } OPTIONAL } 2680 2681Atom-id ::= INTEGER 2682 2683-- Intra-residue-bond specifies connectivity between atoms in Residue-graph. 2684-- Unlike Inter-residue-bond defined later, its participating atoms are part of 2685-- a residue subgraph dictionary, not part of a specific biostruc-graph. 2686 2687-- For residue graphs in the standard MMDB dictionary bonds are defined from 2688-- the known chemical structures of amino acids and nucleotides. For 2689-- nonstandard residue graphs bonds are defined from PDB CONECT records, with 2690-- validation for consistency with coordinate data, and from stereochemical 2691-- calculation to identify unreported bonds. Validation and bond identification 2692-- are based on comparison of inter-atomic distances to the sum of covalent 2693-- radii for the corresponding elements. 2694 2695Intra-residue-bond ::= SEQUENCE { 2696 atom-id-1 Atom-id, 2697 atom-id-2 Atom-id, 2698 bond-order INTEGER { 2699 single(1), 2700 partial-double(2), 2701 aromatic(3), 2702 double(4), 2703 triple(5), 2704 other(6), 2705 unknown(255)} OPTIONAL } 2706 2707-- Chiral centers are atoms with tetrahedral geometry. Chirality is defined 2708-- by a chiral volume involving the chiral center and 3 other atoms bonded to 2709-- it. For any coordinates assigned to atoms c, n1, n2, and n3, the vector 2710-- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated 2711-- sign. The calculation assumes an orthogonal right-handed coordinate system 2712-- as is used for MMDB model structures. 2713 2714-- Chirality is defined for standard residues in the MMDB dictionary, but is 2715-- not assigned automatically for PDB-derived nonstandard residues. If assigned 2716-- for nonstandard residues, the source of chirality information is described 2717-- by a citation within the residue description. 2718 2719Chiral-center ::= SEQUENCE { 2720 c Atom-id, 2721 n1 Atom-id, 2722 n2 Atom-id, 2723 n3 Atom-id, 2724 sign ENUMERATED { positive(1), 2725 negative(2) } } 2726 2727-- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived 2728-- structures bonds are identified from biopolymer connectivity according to 2729-- SEQRES and from other connectivity information on SSBOND and CONECT 2730-- records. These data are validated and unreported bonds identified by 2731-- stereochemical calculation, using the same criteria as for intra-residue 2732-- bonds. 2733 2734Inter-residue-bond ::= SEQUENCE { 2735 atom-id-1 Atom-pntr, 2736 atom-id-2 Atom-pntr, 2737 bond-order INTEGER { 2738 single(1), 2739 partial-double(2), 2740 aromatic(3), 2741 double(4), 2742 triple(5), 2743 other(6), 2744 unknown(255)} OPTIONAL } 2745 2746-- Atoms, residues and molecules within the current biostruc are referenced 2747-- by hierarchical pointers. 2748 2749Atom-pntr ::= SEQUENCE { 2750 molecule-id Molecule-id, 2751 residue-id Residue-id, 2752 atom-id Atom-id } 2753 2754Atom-pntr-set ::= SEQUENCE OF Atom-pntr 2755 2756END 2757--$Revision: 97143 $ 2758--********************************************************************** 2759-- 2760-- Biological Macromolecule 3-D Structure Data Types for MMDB, 2761-- A Molecular Modeling Database 2762-- 2763-- Definitions for structural models 2764-- 2765-- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 2766-- 2767-- National Center for Biotechnology Information 2768-- National Institutes of Health 2769-- Bethesda, MD 20894 USA 2770-- 2771-- July, 1996 2772-- 2773--********************************************************************** 2774 2775MMDB-Structural-model DEFINITIONS ::= 2776 2777BEGIN 2778 2779EXPORTS Biostruc-model, Model-id, Model-coordinate-set-id; 2780 2781IMPORTS Chem-graph-pntrs, Atom-pntrs, Chem-graph-alignment, 2782 Sphere, Cone, Cylinder, Brick, Transform FROM MMDB-Features 2783 Biostruc-id FROM MMDB 2784 Pub FROM NCBI-Pub; 2785 2786-- A structural model maps chemical components into a measured three- 2787-- dimensional space. PDB-derived biostrucs generally contain 4 models, 2788-- corresponding to "views" of the structure of a biomolecular assemble with 2789-- increasing levels of complexity. Model types indicate the complexity of the 2790-- view. 2791 2792-- The model named "NCBI all atom" represents a view suitable for most 2793-- computational biology applications. It provides complete atomic coordinate 2794-- data for a "single best" model, omitting statistical disorder information 2795-- and/or ensemble structure descriptions provided in the source PDB file. 2796-- Construction of the single best model is based on the assumption that the 2797-- contents of the "alternate conformation" field from pdb imply no correlation 2798-- among the occupancies of multiple sites assigned to sets of atoms: the best 2799-- site is chosen only on the basis of highest occupancy. Note, however, that 2800-- alternate conformation sets where correlation is implied are generally 2801-- constrained in crystallographic refinement to have uniform occupancy, and 2802-- will thus be selected as a set. For ensemble models the model which assigns 2803-- coordinates to the most atoms is chosen. If numbers of coordinates are the 2804-- same, the model occurring first in the PDB file is selected. The single 2805-- best model includes complete coordinates for all nonpolymer components, but 2806-- omits those classified as "solvent". Model type is 3 for this model. 2807 2808-- The model named "NCBI backbone" represents a simple view intended for 2809-- graphic displays and rapid transmission over a network. It includes only 2810-- alpha carbon or backbone phosphate coordinates for biopolymers. It is based 2811-- on selection of alpha-carbon and backbone phosphate atoms from the "NCBI 2812-- all atom" model. The model type is set to 2. An even simpler model gives 2813-- only a cartoon representation, using cylinders corresponding to secondary 2814-- structure elements. This is named "NCBI vector", and has model type 1. 2815 2816-- The models named "PDB Model 1", "PDB Model 2", etc. represent the complete 2817-- information provided by PDB, including full descriptions of statistical 2818-- disorder. The name of the model is based on the contents of the PDB MODEL 2819-- record, with a default name of "PDB Model 1" for PDB files which contain 2820-- only a single model. Construction of these models is based on the 2821-- assumption that contents of the PDB "alternate conformation" field are 2822-- intended to imply correlation among the occupancies of atom sets flagged by 2823-- the same identifier. The special flag " " (blank) is assumed to indicate 2824-- sites occupied in all alternate conformations, and sites flagged otherwise, 2825-- together with " ", to indicate a distinct member of an ensemble of 2826-- alternate conformations. Note that construction of ensemble members 2827-- according to these assumption requires two validation checks on PDB 2828-- "alternate conformation" flags: they must be unique among sites assigned to 2829-- the same atom, and that the special " " flag must occur only for unique 2830-- sites. Sites which violate the first check are flagged as "u", for 2831-- "unknown"; they are omitted from all ensemble definitions but are 2832-- nontheless retained in the coordinate list. Sites which violate the second 2833-- check are flagged "b" for "blank", and are included in an appropriately 2834-- named ensemble. The model type for pdb all models is 4. 2835 2836-- Note that in the MMDB database models are stored in the ASN.1 stream in 2837-- order of increasing model type value. Since models occur as the last item 2838-- in a biostruc, parsers may avoid reading the entire stream if the desired 2839-- model is one of the simplified types, which occur first in the stream. This 2840-- can save considerable I/O time, particularly for large ensemble models from 2841-- NMR determinations. 2842 2843Biostruc-model ::= SEQUENCE { 2844 id Model-id, 2845 type Model-type, 2846 descr SEQUENCE OF Model-descr OPTIONAL, 2847 model-space Model-space OPTIONAL, 2848 model-coordinates SEQUENCE OF Model-coordinate-set OPTIONAL } 2849 2850Model-id ::= INTEGER 2851 2852Model-type ::= INTEGER { 2853 ncbi-vector(1), 2854 ncbi-backbone(2), 2855 ncbi-all-atom(3), 2856 pdb-model(4), 2857 other(255)} 2858 2859Model-descr ::= CHOICE { 2860 name VisibleString, 2861 pdb-reso VisibleString, 2862 pdb-method VisibleString, 2863 pdb-comment VisibleString, 2864 other-comment VisibleString, 2865 attribution Pub } 2866 2867-- The model space defines measurement units and any external reference frame. 2868-- Coordinates refer to a right-handed orthogonal system defined on axes 2869-- tagged x, y and z in the coordinate and feature definitions of a biostruc. 2870-- Coordinates from PDB-derived structures are reported without change, in 2871-- angstrom units. The units of temperature and occupancy factors are not 2872-- defined explicitly in PDB, but are inferred from their value range. 2873 2874Model-space ::= SEQUENCE { 2875 coordinate-units ENUMERATED { 2876 angstroms(1), 2877 nanometers(2), 2878 other(3), 2879 unknown(255)}, 2880 thermal-factor-units ENUMERATED { 2881 b(1), 2882 u(2), 2883 other(3), 2884 unknown(255)} OPTIONAL, 2885 occupancy-factor-units ENUMERATED { 2886 fractional(1), 2887 electrons(2), 2888 other(3), 2889 unknown(255)} OPTIONAL, 2890 density-units ENUMERATED { 2891 electrons-per-unit-volume(1), 2892 arbitrary-scale(2), 2893 other(3), 2894 unknown(255)} OPTIONAL, 2895 reference-frame Reference-frame OPTIONAL } 2896 2897-- An external reference frame is a pointer to another biostruc, with an 2898-- optional operator to rotate and translate coordinates into its model space. 2899-- This item is intended for representation of homology-derived model 2900-- structures, and is not present for structures from PDB. 2901 2902Reference-frame ::= SEQUENCE { 2903 biostruc-id Biostruc-id, 2904 rotation-translation Transform OPTIONAL } 2905 2906-- Atomic coordinates may be assigned literally or by reference to another 2907-- biostruc. The reference coordinate type is used to represent homology- 2908-- derived model structures. PDB-derived structures have literal coordinates. 2909 2910-- Referenced coordinates identify another biostruc, any transformation to be 2911-- applied to coordinates from that biostruc, and a mapping of the chemical 2912-- graph of the present biostruc onto that of the referenced biostruc. They 2913-- give an "alignment" of atoms in the current biostruc with those in another, 2914-- from which the coordinates of matched atoms may be retrieved. For non- 2915-- atomic models "alignment" may also be represented by molecule and residue 2916-- equivalence lists. Referenced coordinates are a data item inteded for 2917-- representation of homology models, with an explicit pointer to their source 2918-- information. They do not occur in PDB-derived models. 2919 2920Model-coordinate-set ::= SEQUENCE { 2921 id Model-coordinate-set-id OPTIONAL, 2922 descr SEQUENCE OF Model-descr OPTIONAL, 2923 coordinates CHOICE { 2924 literal Coordinates, 2925 reference Chem-graph-alignment } } 2926 2927Model-coordinate-set-id ::= INTEGER 2928 2929 2930-- Literal coordinates map chemical components into the model space. Three 2931-- mapping types are allowed, atomic coordinate models, density-grid models, 2932-- and surface models. A model consists of a sequence of such coordinate sets, 2933-- and may thus combine coordinate subsets which have a different source. 2934-- PDB-derived models contain a single atomic coordinate set, as they by 2935-- definition represent information from a single source. 2936 2937Coordinates ::= CHOICE { 2938 atomic Atomic-coordinates, 2939 surface Surface-coordinates, 2940 density Density-coordinates } 2941 2942-- Literal atomic coordinate values give location, occupancy and order 2943-- parameters, and a pointer to a specific atom defined in the biostruc graph. 2944-- Temperature and occupancy factors have their conventional crystallographic 2945-- definitions, with units defined in the model space declaration. Atoms, 2946-- sites, temperature-factors, occupancies and alternate-conformation-ids 2947-- are parallel arrays, i.e. the have the same number of values as given by 2948-- number-of-points. Conformation ensembles represent distinct correlated- 2949-- disorder subsets of the coordinates. They will be present only for certain 2950-- "views" of PDB structures, as described above. Their derivation from PDB- 2951-- supplied "alternate-conformation" ids is described below. 2952 2953Atomic-coordinates ::= SEQUENCE { 2954 number-of-points INTEGER, 2955 atoms Atom-pntrs, 2956 sites Model-space-points, 2957 temperature-factors Atomic-temperature-factors OPTIONAL, 2958 occupancies Atomic-occupancies OPTIONAL, 2959 alternate-conf-ids Alternate-conformation-ids OPTIONAL, 2960 conf-ensembles SEQUENCE OF Conformation-ensemble OPTIONAL } 2961 2962-- The atoms whose location is described by each coordinate are identified 2963-- via a hierarchical pointer to the chemical graph of the biomolecular 2964-- assembly. Coordinates may be matched with atoms in the chemical structure 2965-- by the values of the molecule, residue and atom id's given here, which 2966-- match exactly the items of the same type defined in Biostruc-graph. 2967 2968-- Coordinates are given as integer values, with a scale factor to convert 2969-- to real values for each x, y or z, in the units indicated in model-space. 2970-- Integer values must be divided by the the scale factor. This use of integer 2971-- values reduces the ASN.1 stream size. The scale factors for temperature 2972-- factors and occupancies are given separately, but must be used in the same 2973-- fashion to produce properly scaled real values. 2974 2975Model-space-points ::= SEQUENCE { 2976 scale-factor INTEGER, 2977 x SEQUENCE OF INTEGER, 2978 y SEQUENCE OF INTEGER, 2979 z SEQUENCE OF INTEGER } 2980 2981Atomic-temperature-factors ::= CHOICE { 2982 isotropic Isotropic-temperature-factors, 2983 anisotropic Anisotropic-temperature-factors } 2984 2985Isotropic-temperature-factors ::= SEQUENCE { 2986 scale-factor INTEGER, 2987 b SEQUENCE OF INTEGER } 2988 2989Anisotropic-temperature-factors ::= SEQUENCE { 2990 scale-factor INTEGER, 2991 b-11 SEQUENCE OF INTEGER, 2992 b-12 SEQUENCE OF INTEGER, 2993 b-13 SEQUENCE OF INTEGER, 2994 b-22 SEQUENCE OF INTEGER, 2995 b-23 SEQUENCE OF INTEGER, 2996 b-33 SEQUENCE OF INTEGER } 2997 2998Atomic-occupancies ::= SEQUENCE { 2999 scale-factor INTEGER, 3000 o SEQUENCE OF INTEGER } 3001 3002-- An alternate conformation id is optionally associated with each coordinate. 3003-- Aside from corrections due to the validation checks described above, the 3004-- contents of MMDB Alternate-conformation-ids are identical to the PDB 3005-- "alternate conformation" field. 3006 3007Alternate-conformation-ids ::= SEQUENCE OF Alternate-conformation-id 3008 3009Alternate-conformation-id ::= VisibleString 3010 3011-- Correlated disorder ensemble is defined by a set of alternate conformation 3012-- id's which identify coordinates relevant to that ensemble. These are 3013-- defined from the validated and corrected contents of the PDB "alternate 3014-- conformation" field as described above. A given ensemble, for example, may 3015-- consist of atom sites flagged by " " and "A" Alternate-conformation-ids. 3016-- Names for ensembles are constructed from these flags. This example would be 3017-- named, in its description, "PDB Ensemble blank plus A". 3018 3019-- Note that this interpretation is consistent with common PDB usage of the 3020-- "alternate conformation" field, but that PDB specifications do not formally 3021-- distinguish between correlated and uncorrelated disorder in crystallographic 3022-- models. Ensembles identified in MMDB thus may not correspond to the meaning 3023-- intended by PDB or the depositor. No information is lost, however, and 3024-- if the intended meaning is known alternative ensemble descriptions may be 3025-- reconstructed directly from the Alternate-conformation-ids. 3026 3027-- Note that correlated disorder as defined here is allowed within an atomic 3028-- coordinate set but not between the multiple sets which may define a model. 3029-- Multiple sets within the same model are intended as a means to represent 3030-- assemblies modeled from different experimentally determined structures, 3031-- where correlated disorder between coordinate sets is not relevant. 3032 3033Conformation-ensemble ::= SEQUENCE { 3034 name VisibleString, 3035 alt-conf-ids SEQUENCE OF Alternate-conformation-id } 3036 3037 3038-- Literal surface coordinates define the chemical components whose structure 3039-- is described by a surface, and the surface itself. The surface may be 3040-- either a regular geometric solid or a triangle-mesh of arbitrary shape. 3041 3042Surface-coordinates ::= SEQUENCE { 3043 contents Chem-graph-pntrs, 3044 surface CHOICE { sphere Sphere, 3045 cone Cone, 3046 cylinder Cylinder, 3047 brick Brick, 3048 tmesh T-mesh, 3049 triangles Triangles } } 3050T-mesh ::= SEQUENCE { 3051 number-of-points INTEGER, 3052 scale-factor INTEGER, 3053 swap SEQUENCE OF BOOLEAN, 3054 x SEQUENCE OF INTEGER, 3055 y SEQUENCE OF INTEGER, 3056 z SEQUENCE OF INTEGER } 3057 3058Triangles ::= SEQUENCE { 3059 number-of-points INTEGER, 3060 scale-factor INTEGER, 3061 x SEQUENCE OF INTEGER, 3062 y SEQUENCE OF INTEGER, 3063 z SEQUENCE OF INTEGER, 3064 number-of-triangles INTEGER, 3065 v1 SEQUENCE OF INTEGER, 3066 v2 SEQUENCE OF INTEGER, 3067 v3 SEQUENCE OF INTEGER } 3068 3069 3070-- Literal density coordinates define the chemical components whose structure 3071-- is described by a density grid, parameters of this grid, and density values. 3072 3073Density-coordinates ::= SEQUENCE { 3074 contents Chem-graph-pntrs, 3075 grid-corners Brick, 3076 grid-steps-x INTEGER, 3077 grid-steps-y INTEGER, 3078 grid-steps-z INTEGER, 3079 fastest-varying ENUMERATED { 3080 x(1), 3081 y(2), 3082 z(3)}, 3083 slowest-varying ENUMERATED { 3084 x(1), 3085 y(2), 3086 z(3)}, 3087 scale-factor INTEGER, 3088 density SEQUENCE OF INTEGER } 3089 3090 3091END 3092--$Revision: 97143 $ 3093--********************************************************************** 3094-- 3095-- Biological Macromolecule 3-D Structure Data Types for MMDB, 3096-- A Molecular Modeling Database 3097-- 3098-- Definitions for structural features and biostruc addressing 3099-- 3100-- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 3101-- 3102-- National Center for Biotechnology Information 3103-- National Institutes of Health 3104-- Bethesda, MD 20894 USA 3105-- 3106-- July, 1996 3107-- 3108--********************************************************************** 3109 3110MMDB-Features DEFINITIONS ::= 3111 3112BEGIN 3113 3114EXPORTS Biostruc-feature-set, Chem-graph-pntrs, Atom-pntrs, 3115 Chem-graph-alignment, Sphere, Cone, Cylinder, Brick, Transform, 3116 Biostruc-feature-set-id, Biostruc-feature-id; 3117 3118IMPORTS Biostruc-id FROM MMDB 3119 Molecule-id, Residue-id, Atom-id FROM MMDB-Chemical-graph 3120 Model-id, Model-coordinate-set-id FROM MMDB-Structural-model 3121 User-object FROM NCBI-General 3122 Pub FROM NCBI-Pub; 3123 3124-- Named model features refer to sets of residues or atoms, or a region in 3125-- the model space. A few specific feature types are allowed for compatibility 3126-- with PDB usage, but the purpose of a named model feature is simply to 3127-- associate various types of information with a set of atoms or 3128-- residues, or a spatially-defined region of the model structure. They also 3129-- support association of various properties with each residue or atom of a 3130-- set. 3131 3132-- PDB-derived secondary structure defines a single feature, represented as a 3133-- sequence of residue motifs, as are the contents of PDB SITE and 3134-- FTNOTE records. NCBI-assigned core and secondary structure descriptions 3135-- are also represented as a sequence of residue motifs. 3136 3137Biostruc-feature-set ::= SEQUENCE { 3138 id Biostruc-feature-set-id, 3139 descr SEQUENCE OF Biostruc-feature-set-descr OPTIONAL, 3140 features SEQUENCE OF Biostruc-feature } 3141 3142Biostruc-feature-set-id ::= INTEGER 3143 3144Biostruc-feature-set-descr ::= CHOICE { 3145 name VisibleString, 3146 pdb-comment VisibleString, 3147 other-comment VisibleString, 3148 attribution Pub } 3149 3150-- An explicitly specified type in Biostruc-feature allows for 3151-- efficient extraction and indexing of feature sets of a specific type. 3152-- Special types are provided for coloring and rendering, as 3153-- as needed by molecular graphics programs. 3154 3155Biostruc-feature ::= SEQUENCE { 3156 id Biostruc-feature-id OPTIONAL, 3157 name VisibleString OPTIONAL, 3158 type INTEGER { helix(1), 3159 strand(2), 3160 sheet(3), 3161 turn(4), 3162 site(5), 3163 footnote(6), 3164 comment(7), -- new 3165 subgraph(100), -- NCBI domain reserved 3166 region(101), 3167 core(102), -- user core definition 3168 supercore(103), -- NCBI reserved 3169 color(150), -- new 3170 render(151), -- new 3171 label(152), -- new 3172 transform(153), -- new 3173 camera(154), -- new 3174 script(155), -- for scripts 3175 alignment(200), -- VAST reserved 3176 similarity(201), 3177 multalign(202), -- multiple alignment 3178 indirect(203), -- new 3179 cn3dstate(254), -- Cn3D reserved 3180 other(255) } OPTIONAL, 3181 property CHOICE { 3182 color Color-prop, 3183 render Render-prop, 3184 transform Transform, 3185 camera Camera, 3186 script Biostruc-script, 3187 user User-object } OPTIONAL, 3188 location CHOICE { 3189 subgraph Chem-graph-pntrs, 3190 region Region-pntrs, 3191 alignment Chem-graph-alignment, 3192 similarity Region-similarity, 3193 indirect Other-feature } OPTIONAL } -- new 3194 3195-- Other-feature allows for specifying location via reference to another 3196-- Biostruc-feature and its location. 3197 3198Other-feature ::= SEQUENCE { 3199 biostruc-id Biostruc-id, 3200 set Biostruc-feature-set-id, 3201 feature Biostruc-feature-id } 3202 3203Biostruc-feature-id ::= INTEGER 3204 3205-- Atom, residue or molecule motifs describe a substructure defined by a set 3206-- of nodes from the chemical graph. PDB secondary structure features are 3207-- described as a residue motif, since they are not associated with any one of 3208-- the multiple models that may be provided in a PDB file. NCBI-assigned 3209-- secondary structure is represented in the same way, even though it is 3210-- model specific, since this allows for simple mapping of the structural 3211-- feature onto a sequence-only representation. This addressing mode may also 3212-- be used to describe features to be associated with particular atoms, 3213-- as, for example, the chemical shift observed in an NMR experiment. 3214 3215Chem-graph-pntrs ::= CHOICE { 3216 atoms Atom-pntrs, 3217 residues Residue-pntrs, 3218 molecules Molecule-pntrs } 3219 3220Atom-pntrs ::= SEQUENCE { 3221 number-of-ptrs INTEGER, 3222 molecule-ids SEQUENCE OF Molecule-id, 3223 residue-ids SEQUENCE OF Residue-id, 3224 atom-ids SEQUENCE OF Atom-id } 3225 3226Residue-pntrs ::= CHOICE { 3227 explicit Residue-explicit-pntrs, 3228 interval SEQUENCE OF Residue-interval-pntr } 3229 3230Residue-explicit-pntrs ::= SEQUENCE { 3231 number-of-ptrs INTEGER, 3232 molecule-ids SEQUENCE OF Molecule-id, 3233 residue-ids SEQUENCE OF Residue-id } 3234 3235Residue-interval-pntr ::= SEQUENCE { 3236 molecule-id Molecule-id, 3237 from Residue-id, 3238 to Residue-id } 3239 3240Molecule-pntrs ::= SEQUENCE { 3241 number-of-ptrs INTEGER, 3242 molecule-ids SEQUENCE OF Molecule-id } 3243 3244-- Region motifs describe features defined by spatial location, such as the 3245-- site specified by a coordinate value, or a rgeion within a bounding volume. 3246 3247Region-pntrs ::= SEQUENCE { 3248 model-id Model-id, 3249 region CHOICE { 3250 site SEQUENCE OF Region-coordinates, 3251 boundary SEQUENCE OF Region-boundary } } 3252 3253-- Coordinate sites describe a region in space by reference to individual 3254-- coordinates, in a particular model. These coordinates may be either the 3255-- x, y and z values of atomic coordinates, the triangles of a surface mesh, 3256-- or the grid points of a density model. All are addressed in the same manner, 3257-- as coordinate indices which give offsets from the beginning of the 3258-- coordinate data arrays. A coordinate-index of 5, for example, refers to 3259-- the 5th x, y and z values of an atomic coordinate set, the 5th v1, v2, and v3 3260-- values of a triangle mesh, or the 5th value in a density grid. 3261 3262-- PDB SITE and FTNOTE records refer to particular atomic coordinates, and they 3263-- are represented as a region motif with addresses of type Region-coordinates. 3264-- Any names or descriptions provided by PDB are thus associated with the 3265-- indicated sites, in the indicated model. 3266 3267Region-coordinates ::= SEQUENCE { 3268 model-coord-set-id Model-coordinate-set-id, 3269 number-of-coords INTEGER OPTIONAL, 3270 coordinate-indices SEQUENCE OF INTEGER OPTIONAL } 3271 3272-- Region boundaries are defined by regular solids located in the model space. 3273 3274Region-boundary ::= CHOICE { sphere Sphere, 3275 cone Cone, 3276 cylinder Cylinder, 3277 brick Brick } 3278 3279-- A biostruc alignment establishes an equivalence of nodes in the chemical 3280-- graphs of two or more biostrucs. This may be mapped to a sequence 3281-- alignment in the case of biopolymers. 3282-- The 'dimension' component indicates the number of participants 3283-- in the alignment. For pairwise alignments, such as VAST 3284-- structure-structure alignments, the dimension will be always 2, with 3285-- biostruc-ids, alignment, and domain each containing two entries for an 3286-- aligned pair. The 'alignment' component contains a pair of Chem-graph-pntrs 3287-- specifying a like number of corresponding residues in each structure. 3288-- The 'domain' component specifies a region of each structure considered 3289-- in the alignment. Only one transform (for the second structure) and 3290-- one aligndata (for the pair) are provided for each VAST alignment. 3291-- 3292-- For multiple alignments, a set of components are treated as 3293-- parallel arrays of length 'dimension'. 3294-- The 'transform' component moves each structure to align it with 3295-- the structure specified as the first element in the "parallel" array, 3296-- so necessarily the first transform is a NULL transform. 3297-- Align-stats are placeholders for scores. 3298 3299Chem-graph-alignment ::= SEQUENCE { 3300 dimension INTEGER DEFAULT 2, 3301 biostruc-ids SEQUENCE OF Biostruc-id, 3302 alignment SEQUENCE OF Chem-graph-pntrs, 3303 domain SEQUENCE OF Chem-graph-pntrs OPTIONAL, 3304 transform SEQUENCE OF Transform OPTIONAL, 3305 aligndata SEQUENCE OF Align-stats OPTIONAL } 3306 3307Align-stats ::= SEQUENCE { 3308 descr VisibleString OPTIONAL, 3309 scale-factor INTEGER OPTIONAL, 3310 vast-score INTEGER OPTIONAL, 3311 vast-mlogp INTEGER OPTIONAL, 3312 align-res INTEGER OPTIONAL, 3313 rmsd INTEGER OPTIONAL, 3314 blast-score INTEGER OPTIONAL, 3315 blast-mlogp INTEGER OPTIONAL, 3316 other-score INTEGER OPTIONAL } 3317 3318-- A biostruc similarity describes spatial features which are similar between 3319-- two or more biostrucs. Similarities are model dependent, and the model and 3320-- coordinate set ids of the biostrucs must be specified. They do not 3321-- necessarily map to a sequence alignment, as the regions referenced may 3322-- be pieces of a surface or grid, and thus not uniquely mapable to particular 3323-- chemical components. 3324 3325Region-similarity ::= SEQUENCE { 3326 dimension INTEGER DEFAULT 2, 3327 biostruc-ids SEQUENCE OF Biostruc-id, 3328 similarity SEQUENCE OF Region-pntrs, 3329 transform SEQUENCE OF Transform } 3330 3331-- Geometrical primitives are used in the definition of region motifs, and 3332-- also non-atomic coordinates. Spheres, cones, cylinders and bricks are 3333-- defined by a few points in the model space. 3334 3335Sphere ::= SEQUENCE { 3336 center Model-space-point, 3337 radius RealValue } 3338 3339Cone ::= SEQUENCE { 3340 axis-top Model-space-point, 3341 axis-bottom Model-space-point, 3342 radius-bottom RealValue } 3343 3344Cylinder ::= SEQUENCE { 3345 axis-top Model-space-point, 3346 axis-bottom Model-space-point, 3347 radius RealValue } 3348 3349-- A brick is defined by the coordinates of eight corners. These are assumed 3350-- to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the 3351-- digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube. 3352-- Opposite edges are assumed to be parallel. 3353 3354Brick ::= SEQUENCE { 3355 corner-000 Model-space-point, 3356 corner-001 Model-space-point, 3357 corner-010 Model-space-point, 3358 corner-011 Model-space-point, 3359 corner-100 Model-space-point, 3360 corner-101 Model-space-point, 3361 corner-110 Model-space-point, 3362 corner-111 Model-space-point } 3363 3364Model-space-point ::= SEQUENCE { 3365 scale-factor INTEGER, 3366 x INTEGER, 3367 y INTEGER, 3368 z INTEGER } 3369 3370RealValue ::= SEQUENCE { 3371 scale-factor INTEGER, 3372 scaled-integer-value INTEGER } 3373 3374 3375Transform ::= SEQUENCE { 3376 id INTEGER, 3377 moves SEQUENCE OF Move } 3378 3379Move ::= CHOICE { 3380 rotate Rot-matrix, 3381 translate Trans-matrix } 3382 3383-- A rotation matrix is defined by 9 numbers, given by row, i.e., 3384-- with column indices varying fastest. 3385-- Coordinates, as a matrix with columns x, y, an z, are rotated 3386-- via multiplication with the rotation matrix. 3387-- A translation matrix is defined by 3 numbers, which is added to 3388-- the rotated coordinates for specified amount of translation. 3389 3390Rot-matrix ::= SEQUENCE { 3391 scale-factor INTEGER, 3392 rot-11 INTEGER, 3393 rot-12 INTEGER, 3394 rot-13 INTEGER, 3395 rot-21 INTEGER, 3396 rot-22 INTEGER, 3397 rot-23 INTEGER, 3398 rot-31 INTEGER, 3399 rot-32 INTEGER, 3400 rot-33 INTEGER } 3401 3402Trans-matrix ::= SEQUENCE { 3403 scale-factor INTEGER, 3404 tran-1 INTEGER, 3405 tran-2 INTEGER, 3406 tran-3 INTEGER } 3407 3408-- The camera is a position relative to the world coordinates 3409-- of the structure referred to by a location. 3410-- this is used to set the initial position of the 3411-- camera using OpenGL. scale is the value used to scale the 3412-- other values from floating point to integer 3413 3414Camera ::= SEQUENCE { 3415 x INTEGER, 3416 y INTEGER, 3417 distance INTEGER, 3418 angle INTEGER, 3419 scale INTEGER, 3420 modelview GL-matrix } 3421 3422 3423GL-matrix ::= SEQUENCE { 3424 scale INTEGER, 3425 m11 INTEGER, 3426 m12 INTEGER, 3427 m13 INTEGER, 3428 m14 INTEGER, 3429 m21 INTEGER, 3430 m22 INTEGER, 3431 m23 INTEGER, 3432 m24 INTEGER, 3433 m31 INTEGER, 3434 m32 INTEGER, 3435 m33 INTEGER, 3436 m34 INTEGER, 3437 m41 INTEGER, 3438 m42 INTEGER, 3439 m43 INTEGER, 3440 m44 INTEGER } 3441 3442 3443Color-prop ::= SEQUENCE { 3444 r INTEGER OPTIONAL, 3445 g INTEGER OPTIONAL, 3446 b INTEGER OPTIONAL, 3447 name VisibleString OPTIONAL } 3448 3449-- Note that Render-prop is compatible with the Annmm specification, 3450-- i.e., its numbering schemes do not clash with those in Render-prop. 3451 3452Render-prop ::= INTEGER { 3453 default (0), -- Default view 3454 wire (1), -- use wireframe 3455 space (2), -- use spacefill 3456 stick (3), -- use stick model (thin cylinders) 3457 ballNStick (4), -- use ball & stick model 3458 thickWire (5), -- thicker wireframe 3459 hide (9), -- don't show this 3460 name (10), -- display its name next to it 3461 number (11), -- display its number next to it 3462 pdbNumber (12), -- display its PDB number next to it 3463 objWireFrame (150), -- display MMDB surface object as wireframe 3464 objPolygons (151), -- display MMDB surface object as polygons 3465 colorsetCPK (225), -- color atoms like CPK models 3466 colorsetbyChain (226), -- color each chain different 3467 colorsetbyTemp (227), -- color using isotropic Temp factors 3468 colorsetbyRes (228), -- color using residue properties 3469 colorsetbyLen (229), -- color changes along chain length 3470 colorsetbySStru (230), -- color by secondary structure 3471 colorsetbyHydro (231), -- color by hydrophobicity 3472 colorsetbyObject(246), -- color each object differently 3473 colorsetbyDomain(247), -- color each domain differently 3474 other (255) 3475 } 3476 3477-- When a Biostruc-Feature with a Biostruc-script is initiated, 3478-- it should play the specified steps one at a time, setting the feature-do 3479-- list as the active display. 3480-- The camera can be set using a feature-do, 3481-- but it may be moved independently with 3482-- camera-move, which specifies how to move 3483-- the camera dynamically during the step along the path defined (e.g., 3484-- a zoom, a rotate). 3485-- Any value of pause (in 1:10th's of a second) will force a pause 3486-- after an image is shown. 3487-- If waitevent is TRUE, it will await a mouse or keypress and ignore 3488-- the pause value. 3489 3490Biostruc-script ::= SEQUENCE OF Biostruc-script-step 3491 3492Biostruc-script-step ::= SEQUENCE { 3493 step-id Step-id, 3494 step-name VisibleString OPTIONAL, 3495 feature-do SEQUENCE OF Other-feature OPTIONAL, 3496 camera-move Transform OPTIONAL, 3497 pause INTEGER DEFAULT 10, 3498 waitevent BOOLEAN, 3499 extra INTEGER, 3500 jump Step-id OPTIONAL } 3501 3502Step-id ::= INTEGER 3503 3504END 3505--$Revision: 97143 $ 3506--********************************************************************** 3507-- 3508-- Definitions for CDD's 3509-- 3510-- NCBI Structure Group 3511-- 3512-- National Center for Biotechnology Information 3513-- National Institutes of Health 3514-- Bethesda, MD 20894 USA 3515-- 3516-- October 1999 3517-- 3518-- asntool -m cdd.asn -w 100 -o cdd.h 3519-- asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h -M asn.all 3520--********************************************************************** 3521 3522NCBI-Cdd DEFINITIONS ::= 3523-- NCBI Conserved Domain Definition 3524 3525 3526BEGIN 3527 3528EXPORTS Cdd-id, Cdd-id-set, Cdd, Cdd-set, Cdd-tree, Cdd-tree-set; 3529 3530IMPORTS Date FROM NCBI-General 3531 Pub FROM NCBI-Pub 3532 Biostruc-annot-set FROM MMDB 3533 Bioseq FROM NCBI-Sequence 3534 Seq-annot FROM NCBI-Sequence 3535 Seq-entry FROM NCBI-Seqset 3536 Org-ref FROM NCBI-Organism 3537 Seq-interval FROM NCBI-Seqloc 3538 Score-set FROM NCBI-Seqalign; 3539 3540-- Cdd's should not exist without a unique integer id, but alternative 3541-- id's may be present as well. 3542 3543Global-id ::= SEQUENCE { 3544 accession VisibleString, 3545 release VisibleString OPTIONAL, 3546 version INTEGER OPTIONAL, -- version 0 is the seed 3547 database VisibleString OPTIONAL -- this is NOT the source! 3548 } -- rather the database the 3549 -- object resides in 3550 3551Cdd-id ::= CHOICE { 3552 uid INTEGER, 3553 gid Global-id 3554 } 3555 3556Cdd-id-set ::= SEQUENCE OF Cdd-id 3557 3558-- The description of CDD's refers to the specific set of aligned sequences, 3559-- the region that is being aligned and the information contained in the 3560-- alignment. It may contain a lengthy comment 3561-- describing the function of the domain as well as its origin and all 3562-- other anecdotal information that can't be pressed into a rigid scheme. 3563-- Crosslinks to reference papers available in PubMed are possible as well. 3564-- There can be as many of these as you want in the CDD. 3565 3566Cdd-descr ::= CHOICE { 3567 othername VisibleString, -- alternative names for the CDD 3568 category VisibleString, -- intracellular, extracellular, etc. 3569 comment VisibleString, -- this is where annotations go 3570 reference Pub, -- a citation 3571 create-date Date, -- valid for the current version 3572 tax-source Org-ref, -- holds the highest common node 3573 source VisibleString, -- the database the seeds were created 3574 -- from, e.g. SMART, PFAM, etc.. 3575 status INTEGER { unassigned(0), 3576 finished-ok(1), -- to indicate 3577 pending-release(2), -- processing status 3578 other-asis(3), -- or final type 3579 matrix-only(4), -- 3580 other(255) } -- for CD production 3581 } 3582 3583Cdd-descr-set ::= SET OF Cdd-descr 3584 3585-- the Cdd-tree contains the hierarchy of CDDs. This object is separate from 3586-- the Cdd's themselves to allow it to be retrieved separately and to 3587-- operate as an index. 3588 3589Cdd-tree ::= SEQUENCE { 3590 name VisibleString, 3591 id Cdd-id-set, 3592 description Cdd-descr-set OPTIONAL, 3593 parents Cdd-id-set OPTIONAL, 3594 children Cdd-id-set OPTIONAL, 3595 siblings Cdd-id-set OPTIONAL 3596 } 3597 3598Cdd-tree-set ::= SEQUENCE OF Cdd-tree 3599 3600-- Matrix definitions, these are supposed to store PSSMs and corresponding 3601-- matrices of relative residue frequencies. 3602-- the number of columns and rows is listed explicitly, values in columns 3603-- are stored column by column, i.e. in groups of nrows values for each column 3604 3605Matrix ::= SEQUENCE { 3606 ncolumns INTEGER, 3607 nrows INTEGER, 3608 row-labels SEQUENCE OF VisibleString OPTIONAL, 3609 scale-factor INTEGER, 3610 columns SEQUENCE OF INTEGER 3611} 3612 3613-- definition for matrix of pairwise "distances", stored as the upper 3614-- triangle of a sqared n x n matrix (excluding the diagonal), this is 3615-- supposed to store pairwise percentages of identical residues, pairwise 3616-- alignment scores or E-values from pairwise BLAST sequence comparisons 3617 3618Triangle ::= SEQUENCE { 3619 nelements INTEGER, 3620 scores Score-set 3621} 3622 3623-- the Cdd is the basic ASN.1 object storing an annotated and curated 3624-- set of alignments (formulated as a set of pairwise master-slave 3625-- alignments). 3626-- The alignment data are contained in Seq-align-sets and Biostruc-feature-sets. 3627-- Version numbers in Global-ids are meant to be updated every time the Cdd is changed 3628-- in a way that does not require Global-ids to be changed (sequences added in update 3629-- cycle, annotation changed) 3630 3631Cdd ::= SEQUENCE { 3632 name VisibleString, 3633 id Cdd-id-set, 3634 description Cdd-descr-set OPTIONAL, 3635 seqannot SEQUENCE OF Seq-annot OPTIONAL, -- contains the alignment 3636 features Biostruc-annot-set OPTIONAL, -- contains structure alignments 3637 -- or "core" definitions 3638 sequences Seq-entry OPTIONAL, -- store as bioseq-set inside seq-entry 3639 profile-range Seq-interval OPTIONAL, -- profile for this region only 3640 -- also stores the Seq-id of the master 3641 trunc-master Bioseq OPTIONAL, -- holds the truncated master 3642 -- which may be something like a 3643 -- consensus, but still refers to the 3644 -- sequence coord. frame in profile-range 3645 posfreq Matrix OPTIONAL, -- relative residue frequencies 3646 scoremat Matrix OPTIONAL, -- Position dependent score matrix 3647 distance Triangle OPTIONAL -- pairwise distances for all seqs. 3648} 3649 3650Cdd-set ::= SET OF Cdd 3651 3652END 3653--$Revision: 97143 $ 3654--**************************************************************** 3655-- 3656-- NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary) 3657-- by Jonathan Epstein, February 1996 3658-- 3659--**************************************************************** 3660 3661NCBI-Mime DEFINITIONS ::= 3662BEGIN 3663 3664EXPORTS Ncbi-mime-asn1; 3665IMPORTS Biostruc, Biostruc-annot-set FROM MMDB 3666 Seq-entry FROM NCBI-Seqset 3667 Seq-annot FROM NCBI-Sequence 3668 Medline-entry FROM NCBI-Medline; 3669 3670Ncbi-mime-asn1 ::= CHOICE { 3671 entrez Entrez-general, -- just a structure 3672 alignstruc Biostruc-align, -- structures & sequences & alignments 3673 alignseq Biostruc-align-seq, -- sequence alignment 3674 strucseq Biostruc-seq, -- structure & sequences 3675 strucseqs Biostruc-seqs -- structure & sequences & alignments 3676 -- others may be added here in the future 3677} 3678 3679Biostruc-align ::= SEQUENCE { 3680 master Biostruc, 3681 slaves SET OF Biostruc, 3682 alignments Biostruc-annot-set, -- structure alignments 3683 sequences SET OF Seq-entry, -- sequences 3684 seqalign SET OF Seq-annot } 3685 3686Biostruc-align-seq ::= SEQUENCE { -- display seq structure align only 3687 sequences SET OF Seq-entry, -- sequences 3688 seqalign SET OF Seq-annot } 3689 3690Biostruc-seq ::= SEQUENCE { -- display structure seq added by yanli 3691 structure Biostruc, 3692 sequences SET OF Seq-entry } 3693 3694Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli 3695 structure Biostruc, 3696 sequences SET OF Seq-entry, -- sequences 3697 seqalign SET OF Seq-annot } 3698 3699Entrez-style ::= ENUMERATED { 3700 docsum (1), 3701 genbank (2) , 3702 genpept (3) , 3703 fasta (4) , 3704 asn1 (5) , 3705 graphic (6) , 3706 alignment (7) , 3707 globalview (8) , 3708 report (9) , 3709 medlars (10) , 3710 embl (11) , 3711 pdb (12) , 3712 kinemage (13) } 3713 3714Entrez-general ::= SEQUENCE { 3715 title VisibleString OPTIONAL, 3716 data CHOICE { 3717 ml Medline-entry , 3718 prot Seq-entry , 3719 nuc Seq-entry , 3720 genome Seq-entry , 3721 structure Biostruc , 3722 strucAnnot Biostruc-annot-set } , 3723 style Entrez-style , 3724 location VisibleString OPTIONAL } 3725END 3726--$Revision: 97143 $ 3727--********************************************************************* 3728-- 3729-- access.asn 3730-- 3731-- messages for data access 3732-- 3733--********************************************************************* 3734 3735NCBI-Access DEFINITIONS ::= 3736BEGIN 3737 3738EXPORTS Link-set; 3739 3740 -- links between same class = neighbors 3741 -- links between other classes = links 3742 3743Link-set ::= SEQUENCE { 3744 num INTEGER , -- number of links to this doc type 3745 uids SEQUENCE OF INTEGER OPTIONAL , -- the links 3746 weights SEQUENCE OF INTEGER OPTIONAL } -- the weights 3747 3748 3749END 3750--$Revision: 97143 $ 3751--********************************************************************** 3752-- 3753-- NCBI Sequence Feature Definition Module 3754-- by James Ostell, 1994 3755-- 3756--********************************************************************** 3757 3758NCBI-FeatDef DEFINITIONS ::= 3759BEGIN 3760 3761EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet; 3762 3763 3764FeatDef ::= SEQUENCE { 3765 typelabel VisibleString , -- short label for type eg "CDS" 3766 menulabel VisibleString , -- label for a menu eg "Coding Region" 3767 featdef-key INTEGER , -- unique for this feature definition 3768 seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h 3769 entrygroup INTEGER , -- Group for data entry 3770 displaygroup INTEGER , -- Group for data display 3771 molgroup FeatMolType -- Type of Molecule used for 3772} 3773 3774FeatMolType ::= ENUMERATED { 3775 aa (1), -- proteins 3776 na (2), -- nucleic acids 3777 both (3) } -- both 3778 3779FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions 3780 3781FeatDispGroup ::= SEQUENCE { 3782 groupkey INTEGER , 3783 groupname VisibleString } 3784 3785FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup 3786 3787FeatDefGroupSet ::= SEQUENCE { 3788 groups FeatDispGroupSet , 3789 defs FeatDefSet } 3790 3791END 3792 3793 3794--$Revision: 97143 $ 3795--******************************************************************** 3796-- 3797-- Print Templates 3798-- James Ostell, 1993 3799-- 3800-- 3801--******************************************************************** 3802 3803NCBI-ObjPrt DEFINITIONS ::= 3804BEGIN 3805 3806EXPORTS PrintTemplate, PrintTemplateSet; 3807 3808PrintTemplate ::= SEQUENCE { 3809 name TemplateName , -- name for this template 3810 labelfrom VisibleString OPTIONAL, -- ASN.1 path to get label from 3811 format PrintFormat } 3812 3813TemplateName ::= VisibleString 3814 3815PrintTemplateSet ::= SEQUENCE OF PrintTemplate 3816 3817PrintFormat ::= SEQUENCE { 3818 asn1 VisibleString , -- ASN.1 partial path for this 3819 label VisibleString OPTIONAL , -- printable label 3820 prefix VisibleString OPTIONAL, 3821 suffix VisibleString OPTIONAL, 3822 form PrintForm } 3823 3824PrintForm ::= CHOICE { -- Forms for various ASN.1 components 3825 block PrintFormBlock, 3826 boolean PrintFormBoolean, 3827 enum PrintFormEnum, 3828 text PrintFormText, 3829 use-template TemplateName, 3830 user UserFormat , 3831 null NULL } -- rarely used 3832 3833UserFormat ::= SEQUENCE { 3834 printfunc VisibleString , 3835 defaultfunc VisibleString OPTIONAL } 3836 3837PrintFormBlock ::= SEQUENCE { -- for SEQUENCE, SET 3838 separator VisibleString OPTIONAL , 3839 components SEQUENCE OF PrintFormat } 3840 3841PrintFormBoolean ::= SEQUENCE { 3842 true VisibleString OPTIONAL , 3843 false VisibleString OPTIONAL } 3844 3845PrintFormEnum ::= SEQUENCE { 3846 values SEQUENCE OF VisibleString OPTIONAL } 3847 3848PrintFormText ::= SEQUENCE { 3849 textfunc VisibleString OPTIONAL } 3850 3851END 3852 3853--$Revision: 97143 $ 3854-- ********************************************************************* 3855-- 3856-- These are code and conversion tables for NCBI sequence codes 3857-- ASN.1 for the sequences themselves are define in seq.asn 3858-- 3859-- Seq-map-table and Seq-code-table REQUIRE that codes start with 0 3860-- and increase continuously. So IUPAC codes, which are upper case 3861-- letters will always have 65 0 cells before the codes begin. This 3862-- allows all codes to do indexed lookups for things 3863-- 3864-- Valid names for code tables are: 3865-- IUPACna 3866-- IUPACaa 3867-- IUPACeaa 3868-- IUPACaa3 3 letter amino acid codes : parallels IUPACeaa 3869-- display only, not a data exchange type 3870-- NCBI2na 3871-- NCBI4na 3872-- NCBI8na 3873-- NCBI8aa 3874-- NCBIstdaa 3875-- probability types map to IUPAC types for display as characters 3876 3877NCBI-SeqCode DEFINITIONS ::= 3878BEGIN 3879 3880EXPORTS Seq-code-table, Seq-map-table, Seq-code-set; 3881 3882Seq-code-type ::= ENUMERATED { -- sequence representations 3883 iupacna (1) , -- IUPAC 1 letter nuc acid code 3884 iupacaa (2) , -- IUPAC 1 letter amino acid code 3885 ncbi2na (3) , -- 2 bit nucleic acid code 3886 ncbi4na (4) , -- 4 bit nucleic acid code 3887 ncbi8na (5) , -- 8 bit extended nucleic acid code 3888 ncbipna (6) , -- nucleic acid probabilities 3889 ncbi8aa (7) , -- 8 bit extended amino acid codes 3890 ncbieaa (8) , -- extended ASCII 1 letter aa codes 3891 ncbipaa (9) , -- amino acid probabilities 3892 iupacaa3 (10) , -- 3 letter code only for display 3893 ncbistdaa (11) } -- consecutive codes for std aas, 0-25 3894 3895Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings 3896 from Seq-code-type , -- code to map from 3897 to Seq-code-type , -- code to map to 3898 num INTEGER , -- number of rows in table 3899 start-at INTEGER DEFAULT 0 , -- index offset of first element 3900 table SEQUENCE OF INTEGER } -- table of values, in from-to order 3901 3902Seq-code-table ::= SEQUENCE { -- for names of coded values 3903 code Seq-code-type , -- name of code 3904 num INTEGER , -- number of rows in table 3905 one-letter BOOLEAN , -- symbol is ALWAYS 1 letter? 3906 start-at INTEGER DEFAULT 0 , -- index offset of first element 3907 table SEQUENCE OF 3908 SEQUENCE { 3909 symbol VisibleString , -- the printed symbol or letter 3910 name VisibleString } , -- an explanatory name or string 3911 comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid 3912 3913Seq-code-set ::= SEQUENCE { -- for distribution 3914 codes SET OF Seq-code-table OPTIONAL , 3915 maps SET OF Seq-map-table OPTIONAL } 3916 3917END 3918 3919