1--$Revision: 6.0 $ 2--********************************************************************** 3-- 4-- asn.all 5-- this file contains all NCBI ASN.1 specifications together 6-- 7-- by James Ostell, 1990 8-- 9--********************************************************************** 10 11--$Revision: 6.6 $ 12--********************************************************************** 13-- 14-- NCBI General Data elements 15-- by James Ostell, 1990 16-- Version 3.0 - June 1994 17-- 18--********************************************************************** 19 20NCBI-General DEFINITIONS ::= 21BEGIN 22 23EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field; 24 25-- StringStore is really a VisibleString. It is used to define very 26-- long strings which may need to be stored by the receiving program 27-- in special structures, such as a ByteStore, but it's just a hint. 28-- AsnTool stores StringStores in ByteStore structures. 29-- OCTET STRINGs are also stored in ByteStores by AsnTool 30-- 31-- typedef struct bsunit { /* for building multiline strings */ 32 -- Nlm_Handle str; /* the string piece */ 33 -- Nlm_Int2 len_avail, 34 -- len; 35 -- struct bsunit PNTR next; } /* the next one */ 36-- Nlm_BSUnit, PNTR Nlm_BSUnitPtr; 37-- 38-- typedef struct bytestore { 39 -- Nlm_Int4 seekptr, /* current position */ 40 -- totlen, /* total stored data length in bytes */ 41 -- chain_offset; /* offset in ByteStore of first byte in curchain */ 42 -- Nlm_BSUnitPtr chain, /* chain of elements */ 43 -- curchain; /* the BSUnit containing seekptr */ 44-- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr; 45-- 46-- AsnTool incorporates this as a primitive type, so the definition 47-- is here just for completeness 48-- 49-- StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING 50-- 51 52-- BigInt is really an INTEGER. It is used to warn the receiving code to expect 53-- a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue 54-- 55-- Like StringStore, AsnTool incorporates it as a primitive. The definition would be: 56-- BigInt ::= [APPLICATION 2] IMPLICIT INTEGER 57-- 58 59-- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime 60-- of ASN.1 61-- It stores only a date 62-- 63 64Date ::= CHOICE { 65 str VisibleString , -- for those unparsed dates 66 std Date-std } -- use this if you can 67 68Date-std ::= SEQUENCE { -- NOTE: this is NOT a unix tm struct 69 year INTEGER , -- full year (including 1900) 70 month INTEGER OPTIONAL , -- month (1-12) 71 day INTEGER OPTIONAL , -- day of month (1-31) 72 season VisibleString OPTIONAL , -- for "spring", "may-june", etc 73 hour INTEGER OPTIONAL , -- hour of day (0-23) 74 minute INTEGER OPTIONAL , -- minute of hour (0-59) 75 second INTEGER OPTIONAL } -- second of minute (0-59) 76 77-- Dbtag is generalized for tagging 78-- eg. { "Social Security", str "023-79-8841" } 79-- or { "member", id 8882224 } 80 81Dbtag ::= SEQUENCE { 82 db VisibleString , -- name of database or system 83 tag Object-id } -- appropriate tag 84 85-- Object-id can tag or name anything 86-- 87 88Object-id ::= CHOICE { 89 id INTEGER , 90 str VisibleString } 91 92-- Person-id is to define a std element for people 93-- 94 95Person-id ::= CHOICE { 96 dbtag Dbtag , -- any defined database tag 97 name Name-std , -- structured name 98 ml VisibleString , -- MEDLINE name (semi-structured) 99 -- eg. "Jones RM" 100 str VisibleString, -- unstructured name 101 consortium VisibleString } -- consortium name 102 103Name-std ::= SEQUENCE { -- Structured names 104 last VisibleString , 105 first VisibleString OPTIONAL , 106 middle VisibleString OPTIONAL , 107 full VisibleString OPTIONAL , -- full name eg. "J. John Smith, Esq" 108 initials VisibleString OPTIONAL, -- first + middle initials 109 suffix VisibleString OPTIONAL , -- Jr, Sr, III 110 title VisibleString OPTIONAL } -- Dr., Sister, etc 111 112--**** Int-fuzz ********************************************** 113--* 114--* uncertainties in integer values 115 116Int-fuzz ::= CHOICE { 117 p-m INTEGER , -- plus or minus fixed amount 118 range SEQUENCE { -- max to min 119 max INTEGER , 120 min INTEGER } , 121 pct INTEGER , -- % plus or minus (x10) 0-1000 122 lim ENUMERATED { -- some limit value 123 unk (0) , -- unknown 124 gt (1) , -- greater than 125 lt (2) , -- less than 126 tr (3) , -- space to right of position 127 tl (4) , -- space to left of position 128 circle (5) , -- artificial break at origin of circle 129 other (255) } , -- something else 130 alt SET OF INTEGER } -- set of alternatives for the integer 131 132 133--**** User-object ********************************************** 134--* 135--* a general object for a user defined structured data item 136--* used by Seq-feat and Seq-descr 137 138User-object ::= SEQUENCE { 139 class VisibleString OPTIONAL , -- endeavor which designed this object 140 type Object-id , -- type of object within class 141 data SEQUENCE OF User-field } -- the object itself 142 143User-field ::= SEQUENCE { 144 label Object-id , -- field label 145 num INTEGER OPTIONAL , -- required for strs, ints, reals, oss 146 data CHOICE { -- field contents 147 str UTF8String , 148 int INTEGER , 149 real REAL , 150 bool BOOLEAN , 151 os OCTET STRING , 152 object User-object , -- for using other definitions 153 strs SEQUENCE OF UTF8String , 154 ints SEQUENCE OF INTEGER , 155 reals SEQUENCE OF REAL , 156 oss SEQUENCE OF OCTET STRING , 157 fields SEQUENCE OF User-field , 158 objects SEQUENCE OF User-object } } 159 160 161 162END 163 164--$Revision: 6.3 $ 165--**************************************************************** 166-- 167-- NCBI Bibliographic data elements 168-- by James Ostell, 1990 169-- 170-- Taken from the American National Standard for 171-- Bibliographic References 172-- ANSI Z39.29-1977 173-- Version 3.0 - June 1994 174-- PubMedId added in 1996 175-- ArticleIds and eprint elements added in 1999 176-- 177--**************************************************************** 178 179NCBI-Biblio DEFINITIONS ::= 180BEGIN 181 182EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen, 183 Cit-proc, Cit-sub, Title, Author, PubMedId, DOI; 184 185IMPORTS Person-id, Date, Dbtag FROM NCBI-General; 186 187 -- Article Ids 188 189ArticleId ::= CHOICE { -- can be many ids for an article 190 pubmed PubMedId , -- see types below 191 medline MedlineUID , 192 doi DOI , 193 pii PII , 194 pmcid PmcID , 195 pmcpid PmcPid , 196 pmpid PmPid , 197 other Dbtag } -- generic catch all 198 199PubMedId ::= INTEGER -- Id from the PubMed database at NCBI 200MedlineUID ::= INTEGER -- Id from MEDLINE 201DOI ::= VisibleString -- Document Object Identifier 202PII ::= VisibleString -- Controlled Publisher Identifier 203PmcID ::= INTEGER -- PubMed Central Id 204PmcPid ::= VisibleString -- Publisher Id supplied to PubMed Central 205PmPid ::= VisibleString -- Publisher Id supplied to PubMed 206 207ArticleIdSet ::= SET OF ArticleId 208 209 -- Status Dates 210 211PubStatus ::= INTEGER { -- points of publication 212 received (1) , -- date manuscript received for review 213 accepted (2) , -- accepted for publication 214 epublish (3) , -- published electronically by publisher 215 ppublish (4) , -- published in print by publisher 216 revised (5) , -- article revised by publisher/author 217 pmc (6) , -- article first appeared in PubMed Central 218 pmcr (7) , -- article revision in PubMed Central 219 pubmed (8) , -- article citation first appeared in PubMed 220 pubmedr (9) , -- article citation revision in PubMed 221 aheadofprint (10), -- epublish, but will be followed by print 222 premedline (11), -- date into PreMedline status 223 medline (12), -- date made a MEDLINE record 224 other (255) } 225 226PubStatusDate ::= SEQUENCE { -- done as a structure so fields can be added 227 pubstatus PubStatus , 228 date Date } -- time may be added later 229 230PubStatusDateSet ::= SET OF PubStatusDate 231 232 -- Citation Types 233 234Cit-art ::= SEQUENCE { -- article in journal or book 235 title Title OPTIONAL , -- title of paper (ANSI requires) 236 authors Auth-list OPTIONAL , -- authors (ANSI requires) 237 from CHOICE { -- journal or book 238 journal Cit-jour , 239 book Cit-book , 240 proc Cit-proc } , 241 ids ArticleIdSet OPTIONAL } -- lots of ids 242 243Cit-jour ::= SEQUENCE { -- Journal citation 244 title Title , -- title of journal 245 imp Imprint } 246 247Cit-book ::= SEQUENCE { -- Book citation 248 title Title , -- Title of book 249 coll Title OPTIONAL , -- part of a collection 250 authors Auth-list, -- authors 251 imp Imprint } 252 253Cit-proc ::= SEQUENCE { -- Meeting proceedings 254 book Cit-book , -- citation to meeting 255 meet Meeting } -- time and location of meeting 256 257 -- Patent number and date-issue were made optional in 1997 to 258 -- support patent applications being issued from the USPTO 259 -- Semantically a Cit-pat must have either a patent number or 260 -- an application number (or both) to be valid 261 262Cit-pat ::= SEQUENCE { -- patent citation 263 title VisibleString , 264 authors Auth-list, -- author/inventor 265 country VisibleString , -- Patent Document Country 266 doc-type VisibleString , -- Patent Document Type 267 number VisibleString OPTIONAL, -- Patent Document Number 268 date-issue Date OPTIONAL, -- Patent Issue/Pub Date 269 class SEQUENCE OF VisibleString OPTIONAL , -- Patent Doc Class Code 270 app-number VisibleString OPTIONAL , -- Patent Doc Appl Number 271 app-date Date OPTIONAL , -- Patent Appl File Date 272 applicants Auth-list OPTIONAL , -- Applicants 273 assignees Auth-list OPTIONAL , -- Assignees 274 priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities 275 abstract VisibleString OPTIONAL } -- abstract of patent 276 277Patent-priority ::= SEQUENCE { 278 country VisibleString , -- Patent country code 279 number VisibleString , -- number assigned in that country 280 date Date } -- date of application 281 282Id-pat ::= SEQUENCE { -- just to identify a patent 283 country VisibleString , -- Patent Document Country 284 id CHOICE { 285 number VisibleString , -- Patent Document Number 286 app-number VisibleString } , -- Patent Doc Appl Number 287 doc-type VisibleString OPTIONAL } -- Patent Doc Type 288 289Cit-let ::= SEQUENCE { -- letter, thesis, or manuscript 290 cit Cit-book , -- same fields as a book 291 man-id VisibleString OPTIONAL , -- Manuscript identifier 292 type ENUMERATED { 293 manuscript (1) , 294 letter (2) , 295 thesis (3) } OPTIONAL } 296 -- NOTE: this is just to cite a 297 -- direct data submission, see NCBI-Submit 298 -- for the form of a sequence submission 299Cit-sub ::= SEQUENCE { -- citation for a direct submission 300 authors Auth-list , -- not necessarily authors of the paper 301 imp Imprint OPTIONAL , -- this only used to get date.. will go 302 medium ENUMERATED { -- medium of submission 303 paper (1) , 304 tape (2) , 305 floppy (3) , 306 email (4) , 307 other (255) } OPTIONAL , 308 date Date OPTIONAL , -- replaces imp, will become required 309 descr VisibleString OPTIONAL } -- description of changes for public view 310 311Cit-gen ::= SEQUENCE { -- NOT from ANSI, this is a catchall 312 cit VisibleString OPTIONAL , -- anything, not parsable 313 authors Auth-list OPTIONAL , 314 muid INTEGER OPTIONAL , -- medline uid 315 journal Title OPTIONAL , 316 volume VisibleString OPTIONAL , 317 issue VisibleString OPTIONAL , 318 pages VisibleString OPTIONAL , 319 date Date OPTIONAL , 320 serial-number INTEGER OPTIONAL , -- for GenBank style references 321 title VisibleString OPTIONAL , -- eg. cit="unpublished",title="title" 322 pmid PubMedId OPTIONAL } -- PubMed Id 323 324 325 -- Authorship Group 326Auth-list ::= SEQUENCE { 327 names CHOICE { 328 std SEQUENCE OF Author , -- full citations 329 ml SEQUENCE OF VisibleString , -- MEDLINE, semi-structured 330 str SEQUENCE OF VisibleString } , -- free for all 331 affil Affil OPTIONAL } -- author affiliation 332 333Author ::= SEQUENCE { 334 name Person-id , -- Author, Primary or Secondary 335 level ENUMERATED { 336 primary (1), 337 secondary (2) } OPTIONAL , 338 role ENUMERATED { -- Author Role Indicator 339 compiler (1), 340 editor (2), 341 patent-assignee (3), 342 translator (4) } OPTIONAL , 343 affil Affil OPTIONAL , 344 is-corr BOOLEAN OPTIONAL } -- TRUE if corresponding author 345 346Affil ::= CHOICE { 347 str VisibleString , -- unparsed string 348 std SEQUENCE { -- std representation 349 affil VisibleString OPTIONAL , -- Author Affiliation, Name 350 div VisibleString OPTIONAL , -- Author Affiliation, Division 351 city VisibleString OPTIONAL , -- Author Affiliation, City 352 sub VisibleString OPTIONAL , -- Author Affiliation, County Sub 353 country VisibleString OPTIONAL , -- Author Affiliation, Country 354 street VisibleString OPTIONAL , -- street address, not ANSI 355 email VisibleString OPTIONAL , 356 fax VisibleString OPTIONAL , 357 phone VisibleString OPTIONAL , 358 postal-code VisibleString OPTIONAL }} 359 360 -- Title Group 361 -- Valid for = A = Analytic (Cit-art) 362 -- J = Journals (Cit-jour) 363 -- B = Book (Cit-book) 364 -- Valid for: 365Title ::= SET OF CHOICE { 366 name VisibleString , -- Title, Anal,Coll,Mono AJB 367 tsub VisibleString , -- Title, Subordinate A B 368 trans VisibleString , -- Title, Translated AJB 369 jta VisibleString , -- Title, Abbreviated J 370 iso-jta VisibleString , -- specifically ISO jta J 371 ml-jta VisibleString , -- specifically MEDLINE jta J 372 coden VisibleString , -- a coden J 373 issn VisibleString , -- ISSN J 374 abr VisibleString , -- Title, Abbreviated B 375 isbn VisibleString } -- ISBN B 376 377Imprint ::= SEQUENCE { -- Imprint group 378 date Date , -- date of publication 379 volume VisibleString OPTIONAL , 380 issue VisibleString OPTIONAL , 381 pages VisibleString OPTIONAL , 382 section VisibleString OPTIONAL , 383 pub Affil OPTIONAL, -- publisher, required for book 384 cprt Date OPTIONAL, -- copyright date, " " " 385 part-sup VisibleString OPTIONAL , -- part/sup of volume 386 language VisibleString DEFAULT "ENG" , -- put here for simplicity 387 prepub ENUMERATED { -- for prepublication citations 388 submitted (1) , -- submitted, not accepted 389 in-press (2) , -- accepted, not published 390 other (255) } OPTIONAL , 391 part-supi VisibleString OPTIONAL , -- part/sup on issue 392 retract CitRetract OPTIONAL , -- retraction info 393 pubstatus PubStatus OPTIONAL , -- current status of this publication 394 history PubStatusDateSet OPTIONAL } -- dates for this record 395 396CitRetract ::= SEQUENCE { 397 type ENUMERATED { -- retraction of an entry 398 retracted (1) , -- this citation retracted 399 notice (2) , -- this citation is a retraction notice 400 in-error (3) , -- an erratum was published about this 401 erratum (4) } , -- this is a published erratum 402 exp VisibleString OPTIONAL } -- citation and/or explanation 403 404Meeting ::= SEQUENCE { 405 number VisibleString , 406 date Date , 407 place Affil OPTIONAL } 408 409 410END 411 412 413--$Revision: 6.0 $ 414--********************************************************************** 415-- 416-- MEDLINE data definitions 417-- James Ostell, 1990 418-- 419-- enhanced in 1996 to support PubMed records as well by simply adding 420-- the PubMedId and making MedlineId optional 421-- 422--********************************************************************** 423 424NCBI-Medline DEFINITIONS ::= 425BEGIN 426 427EXPORTS Medline-entry, Medline-si; 428 429IMPORTS Cit-art, PubMedId FROM NCBI-Biblio 430 Date FROM NCBI-General; 431 432 -- a MEDLINE or PubMed entry 433Medline-entry ::= SEQUENCE { 434 uid INTEGER OPTIONAL , -- MEDLINE UID, sometimes not yet available if from PubMed 435 em Date , -- Entry Month 436 cit Cit-art , -- article citation 437 abstract VisibleString OPTIONAL , 438 mesh SET OF Medline-mesh OPTIONAL , 439 substance SET OF Medline-rn OPTIONAL , 440 xref SET OF Medline-si OPTIONAL , 441 idnum SET OF VisibleString OPTIONAL , -- ID Number (grants, contracts) 442 gene SET OF VisibleString OPTIONAL , 443 pmid PubMedId OPTIONAL , -- MEDLINE records may include the PubMedId 444 pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc) 445 mlfield SET OF Medline-field OPTIONAL , -- additional Medline field types 446 status INTEGER { 447 publisher (1) , -- record as supplied by publisher 448 premedline (2) , -- premedline record 449 medline (3) } DEFAULT medline } -- regular medline record 450 451Medline-mesh ::= SEQUENCE { 452 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point (*) 453 term VisibleString , -- the MeSH term 454 qual SET OF Medline-qual OPTIONAL } -- qualifiers 455 456Medline-qual ::= SEQUENCE { 457 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point 458 subh VisibleString } -- the subheading 459 460Medline-rn ::= SEQUENCE { -- medline substance records 461 type ENUMERATED { -- type of record 462 nameonly (0) , 463 cas (1) , -- CAS number 464 ec (2) } , -- EC number 465 cit VisibleString OPTIONAL , -- CAS or EC number if present 466 name VisibleString } -- name (always present) 467 468Medline-si ::= SEQUENCE { -- medline cross reference records 469 type ENUMERATED { -- type of xref 470 ddbj (1) , -- DNA Data Bank of Japan 471 carbbank (2) , -- Carbohydrate Structure Database 472 embl (3) , -- EMBL Data Library 473 hdb (4) , -- Hybridoma Data Bank 474 genbank (5) , -- GenBank 475 hgml (6) , -- Human Gene Map Library 476 mim (7) , -- Mendelian Inheritance in Man 477 msd (8) , -- Microbial Strains Database 478 pdb (9) , -- Protein Data Bank (Brookhaven) 479 pir (10) , -- Protein Identification Resource 480 prfseqdb (11) , -- Protein Research Foundation (Japan) 481 psd (12) , -- Protein Sequence Database (Japan) 482 swissprot (13) , -- SwissProt 483 gdb (14) } , -- Genome Data Base 484 cit VisibleString OPTIONAL } -- the citation/accession number 485 486Medline-field ::= SEQUENCE { 487 type INTEGER { -- Keyed type 488 other (0) , -- look in line code 489 comment (1) , -- comment line 490 erratum (2) } , -- retracted, corrected, etc 491 str VisibleString , -- the text 492 ids SEQUENCE OF DocRef OPTIONAL } -- pointers relevant to this text 493 494DocRef ::= SEQUENCE { -- reference to a document 495 type INTEGER { 496 medline (1) , 497 pubmed (2) , 498 ncbigi (3) } , 499 uid INTEGER } 500 501END 502 503--$Revision: 6.0 $ 504--********************************************************************** 505-- 506-- PUBMED data definitions 507-- 508--********************************************************************** 509 510NCBI-PubMed DEFINITIONS ::= 511BEGIN 512 513EXPORTS Pubmed-entry, Pubmed-url; 514 515IMPORTS PubMedId FROM NCBI-Biblio 516 Medline-entry FROM NCBI-Medline; 517 518Pubmed-entry ::= SEQUENCE { -- a PubMed entry 519 -- PUBMED records must include the PubMedId 520 pmid PubMedId, 521 522 -- Medline entry information 523 medent Medline-entry OPTIONAL, 524 525 -- Publisher name 526 publisher VisibleString OPTIONAL, 527 528 -- List of URL to publisher cite 529 urls SET OF Pubmed-url OPTIONAL, 530 531 -- Publisher's article identifier 532 pubid VisibleString OPTIONAL 533} 534 535Pubmed-url ::= SEQUENCE { 536 location VisibleString OPTIONAL, -- Location code 537 url VisibleString -- Selected URL for location 538} 539 540END 541--$Revision: 6.0 $ 542--********************************************************************** 543-- 544-- MEDLARS data definitions 545-- Grigoriy Starchenko, 1997 546-- 547--********************************************************************** 548 549NCBI-Medlars DEFINITIONS ::= 550BEGIN 551 552EXPORTS Medlars-entry, Medlars-record; 553 554IMPORTS PubMedId FROM NCBI-Biblio; 555 556Medlars-entry ::= SEQUENCE { -- a MEDLARS entry 557 pmid PubMedId, -- All entries in PubMed must have it 558 muid INTEGER OPTIONAL, -- Medline(OCCS) id 559 recs SET OF Medlars-record -- List of Medlars records 560} 561 562Medlars-record ::= SEQUENCE { 563 code INTEGER, -- Unit record field type integer form 564 abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form 565 data VisibleString -- Unit record data 566} 567 568END 569--$Revision: 6.0 $ 570--******************************************************************** 571-- 572-- Publication common set 573-- James Ostell, 1990 574-- 575-- This is the base class definitions for Publications of all sorts 576-- 577-- support for PubMedId added in 1996 578--******************************************************************** 579 580NCBI-Pub DEFINITIONS ::= 581BEGIN 582 583EXPORTS Pub, Pub-set, Pub-equiv; 584 585IMPORTS Medline-entry FROM NCBI-Medline 586 Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen, 587 Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio; 588 589Pub ::= CHOICE { 590 gen Cit-gen , -- general or generic unparsed 591 sub Cit-sub , -- submission 592 medline Medline-entry , 593 muid INTEGER , -- medline uid 594 article Cit-art , 595 journal Cit-jour , 596 book Cit-book , 597 proc Cit-proc , -- proceedings of a meeting 598 patent Cit-pat , 599 pat-id Id-pat , -- identify a patent 600 man Cit-let , -- manuscript, thesis, or letter 601 equiv Pub-equiv, -- to cite a variety of ways 602 pmid PubMedId } -- PubMedId 603 604Pub-equiv ::= SET OF Pub -- equivalent identifiers for same citation 605 606Pub-set ::= CHOICE { 607 pub SET OF Pub , 608 medline SET OF Medline-entry , 609 article SET OF Cit-art , 610 journal SET OF Cit-jour , 611 book SET OF Cit-book , 612 proc SET OF Cit-proc , -- proceedings of a meeting 613 patent SET OF Cit-pat } 614 615END 616 617--$Revision: 6.7 $ 618--********************************************************************** 619-- 620-- NCBI Sequence location and identifier elements 621-- by James Ostell, 1990 622-- 623-- Version 3.0 - 1994 624-- 625--********************************************************************** 626 627NCBI-Seqloc DEFINITIONS ::= 628BEGIN 629 630EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt, 631 Na-strand, Giimport-id; 632 633IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General 634 Id-pat FROM NCBI-Biblio 635 Feat-id FROM NCBI-Seqfeat; 636 637--*** Sequence identifiers ******************************** 638--* 639 640Seq-id ::= CHOICE { 641 local Object-id , -- local use 642 gibbsq INTEGER , -- Geninfo backbone seqid 643 gibbmt INTEGER , -- Geninfo backbone moltype 644 giim Giimport-id , -- Geninfo import id 645 genbank Textseq-id , 646 embl Textseq-id , 647 pir Textseq-id , 648 swissprot Textseq-id , 649 patent Patent-seq-id , 650 other Textseq-id , -- for historical reasons, 'other' = 'refseq' 651 general Dbtag , -- for other databases 652 gi INTEGER , -- GenInfo Integrated Database 653 ddbj Textseq-id , -- DDBJ 654 prf Textseq-id , -- PRF SEQDB 655 pdb PDB-seq-id , -- PDB sequence 656 tpg Textseq-id , -- Third Party Annot/Seq Genbank 657 tpe Textseq-id , -- Third Party Annot/Seq EMBL 658 tpd Textseq-id , -- Third Party Annot/Seq DDBJ 659 gpipe Textseq-id , -- Internal NCBI genome pipeline processing ID 660 named-annot-track Textseq-id -- Internal named annotation tracking ID 661} 662 663Seq-id-set ::= SET OF Seq-id 664 665 666Patent-seq-id ::= SEQUENCE { 667 seqid INTEGER , -- number of sequence in patent 668 cit Id-pat } -- patent citation 669 670Textseq-id ::= SEQUENCE { 671 name VisibleString OPTIONAL , 672 accession VisibleString OPTIONAL , 673 release VisibleString OPTIONAL , 674 version INTEGER OPTIONAL } 675 676Giimport-id ::= SEQUENCE { 677 id INTEGER , -- the id to use here 678 db VisibleString OPTIONAL , -- dbase used in 679 release VisibleString OPTIONAL } -- the release 680 681PDB-seq-id ::= SEQUENCE { -- must set either chain or chain_id, but not both 682 mol PDB-mol-id , -- the molecule name 683 chain INTEGER DEFAULT 32 , -- Deprecated : For single ASCII character 684 -- chain identifiers of pre-2015 PDB structures 685 rel Date OPTIONAL , -- release date, month and year 686 chain-id VisibleString OPTIONAL } -- chain identifier 687 688PDB-mol-id ::= VisibleString -- name of mol, 4 chars 689 690--*** Sequence locations ********************************** 691--* 692 693Seq-loc ::= CHOICE { 694 null NULL , -- not placed 695 empty Seq-id , -- to NULL one Seq-id in a collection 696 whole Seq-id , -- whole sequence 697 int Seq-interval , -- from to 698 packed-int Packed-seqint , 699 pnt Seq-point , 700 packed-pnt Packed-seqpnt , 701 mix Seq-loc-mix , 702 equiv Seq-loc-equiv , -- equivalent sets of locations 703 bond Seq-bond , 704 feat Feat-id } -- indirect, through a Seq-feat 705 706 707Seq-interval ::= SEQUENCE { 708 from INTEGER , 709 to INTEGER , 710 strand Na-strand OPTIONAL , 711 id Seq-id , -- WARNING: this used to be optional 712 fuzz-from Int-fuzz OPTIONAL , 713 fuzz-to Int-fuzz OPTIONAL } 714 715Packed-seqint ::= SEQUENCE OF Seq-interval 716 717Seq-point ::= SEQUENCE { 718 point INTEGER , 719 strand Na-strand OPTIONAL , 720 id Seq-id , -- WARNING: this used to be optional 721 fuzz Int-fuzz OPTIONAL } 722 723Packed-seqpnt ::= SEQUENCE { 724 strand Na-strand OPTIONAL , 725 id Seq-id , 726 fuzz Int-fuzz OPTIONAL , 727 points SEQUENCE OF INTEGER } 728 729Na-strand ::= ENUMERATED { -- strand of nucleic acid 730 unknown (0) , 731 plus (1) , 732 minus (2) , 733 both (3) , -- in forward orientation 734 both-rev (4) , -- in reverse orientation 735 other (255) } 736 737Seq-bond ::= SEQUENCE { -- bond between residues 738 a Seq-point , -- connection to a least one residue 739 b Seq-point OPTIONAL } -- other end may not be available 740 741Seq-loc-mix ::= SEQUENCE OF Seq-loc -- this will hold anything 742 743Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations 744 745END 746 747 748--$Revision: 6.26 $ 749--********************************************************************** 750-- 751-- NCBI Sequence elements 752-- by James Ostell, 1990 753-- Version 3.0 - June 1994 754-- 755--********************************************************************** 756 757NCBI-Sequence DEFINITIONS ::= 758BEGIN 759 760EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo, 761 Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext, 762 Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap; 763 764IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General 765 Seq-align FROM NCBI-Seqalign 766 Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat 767 Seq-graph FROM NCBI-Seqres 768 Pub-equiv FROM NCBI-Pub 769 Org-ref FROM NCBI-Organism 770 BioSource FROM NCBI-BioSource 771 Seq-id, Seq-loc FROM NCBI-Seqloc 772 GB-block FROM GenBank-General 773 PIR-block FROM PIR-General 774 EMBL-block FROM EMBL-General 775 SP-block FROM SP-General 776 PRF-block FROM PRF-General 777 PDB-block FROM PDB-General 778 Seq-table FROM NCBI-SeqTable; 779 780--*** Sequence ******************************** 781--* 782 783Bioseq ::= SEQUENCE { 784 id SET OF Seq-id , -- equivalent identifiers 785 descr Seq-descr OPTIONAL , -- descriptors 786 inst Seq-inst , -- the sequence data 787 annot SET OF Seq-annot OPTIONAL } 788 789--*** Descriptors ***************************** 790--* 791 792Seq-descr ::= SET OF Seqdesc 793 794Seqdesc ::= CHOICE { 795 mol-type GIBB-mol , -- type of molecule 796 modif SET OF GIBB-mod , -- modifiers 797 method GIBB-method , -- sequencing method 798 name VisibleString , -- a name for this sequence 799 title VisibleString , -- a title for this sequence 800 org Org-ref , -- if all from one organism 801 comment VisibleString , -- a more extensive comment 802 num Numbering , -- a numbering system 803 maploc Dbtag , -- map location of this sequence 804 pir PIR-block , -- PIR specific info 805 genbank GB-block , -- GenBank specific info 806 pub Pubdesc , -- a reference to the publication 807 region VisibleString , -- overall region (globin locus) 808 user User-object , -- user defined object 809 sp SP-block , -- SWISSPROT specific info 810 dbxref Dbtag , -- xref to other databases 811 embl EMBL-block , -- EMBL specific information 812 create-date Date , -- date entry first created/released 813 update-date Date , -- date of last update 814 prf PRF-block , -- PRF specific information 815 pdb PDB-block , -- PDB specific information 816 het Heterogen , -- cofactor, etc associated but not bound 817 source BioSource , -- source of materials, includes Org-ref 818 molinfo MolInfo , -- info on the molecule and techniques 819 modelev ModelEvidenceSupport -- model evidence for XM records 820} 821 822--******* NOTE: 823--* mol-type, modif, method, and org are consolidated and expanded 824--* in Org-ref, BioSource, and MolInfo in this specification. They 825--* will be removed in later specifications. Do not use them in the 826--* the future. Instead expect the new structures. 827--* 828--*************************** 829 830--******************************************************************** 831-- 832-- MolInfo gives information on the 833-- classification of the type and quality of the sequence 834-- 835-- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method 836-- 837--******************************************************************** 838 839MolInfo ::= SEQUENCE { 840 biomol INTEGER { 841 unknown (0) , 842 genomic (1) , 843 pre-RNA (2) , -- precursor RNA of any sort really 844 mRNA (3) , 845 rRNA (4) , 846 tRNA (5) , 847 snRNA (6) , 848 scRNA (7) , 849 peptide (8) , 850 other-genetic (9) , -- other genetic material 851 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence 852 cRNA (11) , -- viral RNA genome copy intermediate 853 snoRNA (12) , -- small nucleolar RNA 854 transcribed-RNA (13) , -- transcribed RNA other than existing classes 855 ncRNA (14) , 856 tmRNA (15) , 857 other (255) } DEFAULT unknown , 858 tech INTEGER { 859 unknown (0) , 860 standard (1) , -- standard sequencing 861 est (2) , -- Expressed Sequence Tag 862 sts (3) , -- Sequence Tagged Site 863 survey (4) , -- one-pass genomic sequence 864 genemap (5) , -- from genetic mapping techniques 865 physmap (6) , -- from physical mapping techniques 866 derived (7) , -- derived from other data, not a primary entity 867 concept-trans (8) , -- conceptual translation 868 seq-pept (9) , -- peptide was sequenced 869 both (10) , -- concept transl. w/ partial pept. seq. 870 seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap 871 seq-pept-homol (12) , -- sequenced peptide, ordered by homology 872 concept-trans-a (13) , -- conceptual transl. supplied by author 873 htgs-1 (14) , -- unordered High Throughput sequence contig 874 htgs-2 (15) , -- ordered High Throughput sequence contig 875 htgs-3 (16) , -- finished High Throughput sequence 876 fli-cdna (17) , -- full length insert cDNA 877 htgs-0 (18) , -- single genomic reads for coordination 878 htc (19) , -- high throughput cDNA 879 wgs (20) , -- whole genome shotgun sequencing 880 barcode (21) , -- barcode of life project 881 composite-wgs-htgs (22) , -- composite of WGS and HTGS 882 tsa (23) , -- transcriptome shotgun assembly 883 targeted (24) , -- targeted locus sets/studies 884 other (255) } -- use Source.techexp 885 DEFAULT unknown , 886 techexp VisibleString OPTIONAL , -- explanation if tech not enough 887 -- 888 -- Completeness is not indicated in most records. For genomes, assume 889 -- the sequences are incomplete unless specifically marked as complete. 890 -- For mRNAs, assume the ends are not known exactly unless marked as 891 -- having the left or right end. 892 -- 893 completeness INTEGER { 894 unknown (0) , 895 complete (1) , -- complete biological entity 896 partial (2) , -- partial but no details given 897 no-left (3) , -- missing 5' or NH3 end 898 no-right (4) , -- missing 3' or COOH end 899 no-ends (5) , -- missing both ends 900 has-left (6) , -- 5' or NH3 end present 901 has-right (7) , -- 3' or COOH end present 902 other (255) } DEFAULT unknown , 903 gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA 904 905 906GIBB-mol ::= ENUMERATED { -- type of molecule represented 907 unknown (0) , 908 genomic (1) , 909 pre-mRNA (2) , -- precursor RNA of any sort really 910 mRNA (3) , 911 rRNA (4) , 912 tRNA (5) , 913 snRNA (6) , 914 scRNA (7) , 915 peptide (8) , 916 other-genetic (9) , -- other genetic material 917 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence 918 other (255) } 919 920GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers 921 dna (0) , 922 rna (1) , 923 extrachrom (2) , 924 plasmid (3) , 925 mitochondrial (4) , 926 chloroplast (5) , 927 kinetoplast (6) , 928 cyanelle (7) , 929 synthetic (8) , 930 recombinant (9) , 931 partial (10) , 932 complete (11) , 933 mutagen (12) , -- subject of mutagenesis ? 934 natmut (13) , -- natural mutant ? 935 transposon (14) , 936 insertion-seq (15) , 937 no-left (16) , -- missing left end (5' for na, NH2 for aa) 938 no-right (17) , -- missing right end (3' or COOH) 939 macronuclear (18) , 940 proviral (19) , 941 est (20) , -- expressed sequence tag 942 sts (21) , -- sequence tagged site 943 survey (22) , -- one pass survey sequence 944 chromoplast (23) , 945 genemap (24) , -- is a genetic map 946 restmap (25) , -- is an ordered restriction map 947 physmap (26) , -- is a physical map (not ordered restriction map) 948 other (255) } 949 950GIBB-method ::= ENUMERATED { -- sequencing methods 951 concept-trans (1) , -- conceptual translation 952 seq-pept (2) , -- peptide was sequenced 953 both (3) , -- concept transl. w/ partial pept. seq. 954 seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap 955 seq-pept-homol (5) , -- sequenced peptide, ordered by homology 956 concept-trans-a (6) , -- conceptual transl. supplied by author 957 other (255) } 958 959Numbering ::= CHOICE { -- any display numbering system 960 cont Num-cont , -- continuous numbering 961 enum Num-enum , -- enumerated names for residues 962 ref Num-ref , -- by reference to another sequence 963 real Num-real } -- supports mapping to a float system 964 965Num-cont ::= SEQUENCE { -- continuous display numbering system 966 refnum INTEGER DEFAULT 1, -- number assigned to first residue 967 has-zero BOOLEAN DEFAULT FALSE , -- 0 used? 968 ascending BOOLEAN DEFAULT TRUE } -- ascending numbers? 969 970Num-enum ::= SEQUENCE { -- any tags to residues 971 num INTEGER , -- number of tags to follow 972 names SEQUENCE OF VisibleString } -- the tags 973 974Num-ref ::= SEQUENCE { -- by reference to other sequences 975 type ENUMERATED { -- type of reference 976 not-set (0) , 977 sources (1) , -- by segmented or const seq sources 978 aligns (2) } , -- by alignments given below 979 aligns Seq-align OPTIONAL } 980 981Num-real ::= SEQUENCE { -- mapping to floating point system 982 a REAL , -- from an integer system used by Bioseq 983 b REAL , -- position = (a * int_position) + b 984 units VisibleString OPTIONAL } 985 986Pubdesc ::= SEQUENCE { -- how sequence presented in pub 987 pub Pub-equiv , -- the citation(s) 988 name VisibleString OPTIONAL , -- name used in paper 989 fig VisibleString OPTIONAL , -- figure in paper 990 num Numbering OPTIONAL , -- numbering from paper 991 numexc BOOLEAN OPTIONAL , -- numbering problem with paper 992 poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure? 993 maploc VisibleString OPTIONAL , -- map location reported in paper 994 seq-raw StringStore OPTIONAL , -- original sequence from paper 995 align-group INTEGER OPTIONAL , -- this seq aligned with others in paper 996 comment VisibleString OPTIONAL, -- any comment on this pub in context 997 reftype INTEGER { -- type of reference in a GenBank record 998 seq (0) , -- refers to sequence 999 sites (1) , -- refers to unspecified features 1000 feats (2) , -- refers to specified features 1001 no-target (3) } -- nothing specified (EMBL) 1002 DEFAULT seq } 1003 1004Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc 1005 1006--*** Instances of sequences ******************************* 1007--* 1008 1009Seq-inst ::= SEQUENCE { -- the sequence data itself 1010 repr ENUMERATED { -- representation class 1011 not-set (0) , -- empty 1012 virtual (1) , -- no seq data 1013 raw (2) , -- continuous sequence 1014 seg (3) , -- segmented sequence 1015 const (4) , -- constructed sequence 1016 ref (5) , -- reference to another sequence 1017 consen (6) , -- consensus sequence or pattern 1018 map (7) , -- ordered map of any kind 1019 delta (8) , -- sequence made by changes (delta) to others 1020 other (255) } , 1021 mol ENUMERATED { -- molecule class in living organism 1022 not-set (0) , -- > cdna = rna 1023 dna (1) , 1024 rna (2) , 1025 aa (3) , 1026 na (4) , -- just a nucleic acid 1027 other (255) } , 1028 length INTEGER OPTIONAL , -- length of sequence in residues 1029 fuzz Int-fuzz OPTIONAL , -- length uncertainty 1030 topology ENUMERATED { -- topology of molecule 1031 not-set (0) , 1032 linear (1) , 1033 circular (2) , 1034 tandem (3) , -- some part of tandem repeat 1035 other (255) } DEFAULT linear , 1036 strand ENUMERATED { -- strandedness in living organism 1037 not-set (0) , 1038 ss (1) , -- single strand 1039 ds (2) , -- double strand 1040 mixed (3) , 1041 other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept 1042 seq-data Seq-data OPTIONAL , -- the sequence 1043 ext Seq-ext OPTIONAL , -- extensions for special types 1044 hist Seq-hist OPTIONAL } -- sequence history 1045 1046--*** Sequence Extensions ********************************** 1047--* for representing more complex types 1048--* const type uses Seq-hist.assembly 1049 1050Seq-ext ::= CHOICE { 1051 seg Seg-ext , -- segmented sequences 1052 ref Ref-ext , -- hot link to another sequence (a view) 1053 map Map-ext , -- ordered map of markers 1054 delta Delta-ext } 1055 1056Seg-ext ::= SEQUENCE OF Seq-loc 1057 1058Ref-ext ::= Seq-loc 1059 1060Map-ext ::= SEQUENCE OF Seq-feat 1061 1062Delta-ext ::= SEQUENCE OF Delta-seq 1063 1064Delta-seq ::= CHOICE { 1065 loc Seq-loc , -- point to a sequence 1066 literal Seq-literal } -- a piece of sequence 1067 1068Seq-literal ::= SEQUENCE { 1069 length INTEGER , -- must give a length in residues 1070 fuzz Int-fuzz OPTIONAL , -- could be unsure 1071 seq-data Seq-data OPTIONAL } -- may have the data 1072 1073--*** Sequence History Record *********************************** 1074--** assembly = records how seq was assembled from others 1075--** replaces = records sequences made obsolete by this one 1076--** replaced-by = this seq is made obsolete by another(s) 1077 1078Seq-hist ::= SEQUENCE { 1079 assembly SET OF Seq-align OPTIONAL ,-- how was this assembled? 1080 replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete 1081 replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete 1082 deleted CHOICE { 1083 bool BOOLEAN , 1084 date Date } OPTIONAL } 1085 1086Seq-hist-rec ::= SEQUENCE { 1087 date Date OPTIONAL , 1088 ids SET OF Seq-id } 1089 1090--*** Various internal sequence representations ************ 1091--* all are controlled, fixed length forms 1092 1093Seq-data ::= CHOICE { -- sequence representations 1094 iupacna IUPACna , -- IUPAC 1 letter nuc acid code 1095 iupacaa IUPACaa , -- IUPAC 1 letter amino acid code 1096 ncbi2na NCBI2na , -- 2 bit nucleic acid code 1097 ncbi4na NCBI4na , -- 4 bit nucleic acid code 1098 ncbi8na NCBI8na , -- 8 bit extended nucleic acid code 1099 ncbipna NCBIpna , -- nucleic acid probabilities 1100 ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes 1101 ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes 1102 ncbipaa NCBIpaa , -- amino acid probabilities 1103 ncbistdaa NCBIstdaa, -- consecutive codes for std aas 1104 gap Seq-gap -- gap types 1105} 1106 1107Seq-gap ::= SEQUENCE { 1108 type INTEGER { 1109 unknown(0), 1110 fragment(1), -- Deprecated. Used only for AGP 1.1 1111 clone(2), -- Deprecated. Used only for AGP 1.1 1112 short-arm(3), 1113 heterochromatin(4), 1114 centromere(5), 1115 telomere(6), 1116 repeat(7), 1117 contig(8), 1118 scaffold(9), 1119 other(255) 1120 }, 1121 linkage INTEGER { 1122 unlinked(0), 1123 linked(1), 1124 other(255) 1125 } OPTIONAL, 1126 linkage-evidence SET OF Linkage-evidence OPTIONAL 1127} 1128 1129Linkage-evidence ::= SEQUENCE { 1130 type INTEGER { 1131 paired-ends(0), 1132 align-genus(1), 1133 align-xgenus(2), 1134 align-trnscpt(3), 1135 within-clone(4), 1136 clone-contig(5), 1137 map(6), 1138 strobe(7), 1139 unspecified(8), 1140 pcr(9), 1141 other(255) 1142 } 1143} 1144 1145IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces 1146IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces 1147NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T 1148NCBI4na ::= OCTET STRING -- 1 bit each for agct 1149 -- 0001=A, 0010=C, 0100=G, 1000=T/U 1150 -- 0101=Purine, 1010=Pyrimidine, etc 1151NCBI8na ::= OCTET STRING -- for modified nucleic acids 1152NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n 1153 -- probabilities are coded 0-255 = 0.0-1.0 1154NCBI8aa ::= OCTET STRING -- for modified amino acids 1155NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes 1156 -- IUPAC codes + U=selenocysteine 1157NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order: 1158 -- A-Y,B,Z,X,(ter),anything 1159 -- probabilities are coded 0-255 = 0.0-1.0 1160NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte 1161 1162--*** Sequence Annotation ************************************* 1163--* 1164 1165-- This is a replica of Textseq-id 1166-- This is specific for annotations, and exists to maintain a semantic 1167-- difference between IDs assigned to annotations and IDs assigned to 1168-- sequences 1169Textannot-id ::= SEQUENCE { 1170 name VisibleString OPTIONAL , 1171 accession VisibleString OPTIONAL , 1172 release VisibleString OPTIONAL , 1173 version INTEGER OPTIONAL 1174} 1175 1176Annot-id ::= CHOICE { 1177 local Object-id , 1178 ncbi INTEGER , 1179 general Dbtag, 1180 other Textannot-id 1181} 1182 1183Annot-descr ::= SET OF Annotdesc 1184 1185Annotdesc ::= CHOICE { 1186 name VisibleString , -- a short name for this collection 1187 title VisibleString , -- a title for this collection 1188 comment VisibleString , -- a more extensive comment 1189 pub Pubdesc , -- a reference to the publication 1190 user User-object , -- user defined object 1191 create-date Date , -- date entry first created/released 1192 update-date Date , -- date of last update 1193 src Seq-id , -- source sequence from which annot came 1194 align Align-def, -- definition of the SeqAligns 1195 region Seq-loc } -- all contents cover this region 1196 1197Align-def ::= SEQUENCE { 1198 align-type INTEGER { -- class of align Seq-annot 1199 ref (1) , -- set of alignments to the same sequence 1200 alt (2) , -- set of alternate alignments of the same seqs 1201 blocks (3) , -- set of aligned blocks in the same seqs 1202 other (255) } , 1203 ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now 1204 1205Seq-annot ::= SEQUENCE { 1206 id SET OF Annot-id OPTIONAL , 1207 db INTEGER { -- source of annotation 1208 genbank (1) , 1209 embl (2) , 1210 ddbj (3) , 1211 pir (4) , 1212 sp (5) , 1213 bbone (6) , 1214 pdb (7) , 1215 other (255) } OPTIONAL , 1216 name VisibleString OPTIONAL ,-- source if "other" above 1217 desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots 1218 data CHOICE { 1219 ftable SET OF Seq-feat , 1220 align SET OF Seq-align , 1221 graph SET OF Seq-graph , 1222 ids SET OF Seq-id , -- used for communication between tools 1223 locs SET OF Seq-loc , -- used for communication between tools 1224 seq-table Seq-table } } -- features in table form 1225 1226END 1227 1228 1229--$Revision: 6.6 $ 1230--********************************************************************** 1231-- 1232-- NCBI Sequence Collections 1233-- by James Ostell, 1990 1234-- 1235-- Version 3.0 - 1994 1236-- 1237--********************************************************************** 1238 1239NCBI-Seqset DEFINITIONS ::= 1240BEGIN 1241 1242EXPORTS Bioseq-set, Seq-entry; 1243 1244IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence 1245 Object-id, Dbtag, Date FROM NCBI-General; 1246 1247--*** Sequence Collections ******************************** 1248--* 1249 1250Bioseq-set ::= SEQUENCE { -- just a collection 1251 id Object-id OPTIONAL , 1252 coll Dbtag OPTIONAL , -- to identify a collection 1253 level INTEGER OPTIONAL , -- nesting level 1254 class ENUMERATED { 1255 not-set (0) , 1256 nuc-prot (1) , -- nuc acid and coded proteins 1257 segset (2) , -- segmented sequence + parts 1258 conset (3) , -- constructed sequence + parts 1259 parts (4) , -- parts for 2 or 3 1260 gibb (5) , -- geninfo backbone 1261 gi (6) , -- geninfo 1262 genbank (7) , -- converted genbank 1263 pir (8) , -- converted pir 1264 pub-set (9) , -- all the seqs from a single publication 1265 equiv (10) , -- a set of equivalent maps or seqs 1266 swissprot (11) , -- converted SWISSPROT 1267 pdb-entry (12) , -- a complete PDB entry 1268 mut-set (13) , -- set of mutations 1269 pop-set (14) , -- population study 1270 phy-set (15) , -- phylogenetic study 1271 eco-set (16) , -- ecological sample study 1272 gen-prod-set (17) , -- genomic products, chrom+mRNA+protein 1273 wgs-set (18) , -- whole genome shotgun project 1274 named-annot (19) , -- named annotation set 1275 named-annot-prod (20) , -- with instantiated mRNA+protein 1276 read-set (21) , -- set from a single read 1277 paired-end-reads (22) , -- paired sequences within a read-set 1278 small-genome-set (23) , -- viral segments or mitochondrial minicircles 1279 other (255) } DEFAULT not-set , 1280 release VisibleString OPTIONAL , 1281 date Date OPTIONAL , 1282 descr Seq-descr OPTIONAL , 1283 seq-set SEQUENCE OF Seq-entry , 1284 annot SET OF Seq-annot OPTIONAL } 1285 1286Seq-entry ::= CHOICE { 1287 seq Bioseq , 1288 set Bioseq-set } 1289 1290END 1291 1292--$Revision: 6.0 $ 1293-- ********************************************************************* 1294-- 1295-- These are code and conversion tables for NCBI sequence codes 1296-- ASN.1 for the sequences themselves are define in seq.asn 1297-- 1298-- Seq-map-table and Seq-code-table REQUIRE that codes start with 0 1299-- and increase continuously. So IUPAC codes, which are upper case 1300-- letters will always have 65 0 cells before the codes begin. This 1301-- allows all codes to do indexed lookups for things 1302-- 1303-- Valid names for code tables are: 1304-- IUPACna 1305-- IUPACaa 1306-- IUPACeaa 1307-- IUPACaa3 3 letter amino acid codes : parallels IUPACeaa 1308-- display only, not a data exchange type 1309-- NCBI2na 1310-- NCBI4na 1311-- NCBI8na 1312-- NCBI8aa 1313-- NCBIstdaa 1314-- probability types map to IUPAC types for display as characters 1315 1316NCBI-SeqCode DEFINITIONS ::= 1317BEGIN 1318 1319EXPORTS Seq-code-table, Seq-map-table, Seq-code-set; 1320 1321Seq-code-type ::= ENUMERATED { -- sequence representations 1322 iupacna (1) , -- IUPAC 1 letter nuc acid code 1323 iupacaa (2) , -- IUPAC 1 letter amino acid code 1324 ncbi2na (3) , -- 2 bit nucleic acid code 1325 ncbi4na (4) , -- 4 bit nucleic acid code 1326 ncbi8na (5) , -- 8 bit extended nucleic acid code 1327 ncbipna (6) , -- nucleic acid probabilities 1328 ncbi8aa (7) , -- 8 bit extended amino acid codes 1329 ncbieaa (8) , -- extended ASCII 1 letter aa codes 1330 ncbipaa (9) , -- amino acid probabilities 1331 iupacaa3 (10) , -- 3 letter code only for display 1332 ncbistdaa (11) } -- consecutive codes for std aas, 0-25 1333 1334Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings 1335 from Seq-code-type , -- code to map from 1336 to Seq-code-type , -- code to map to 1337 num INTEGER , -- number of rows in table 1338 start-at INTEGER DEFAULT 0 , -- index offset of first element 1339 table SEQUENCE OF INTEGER } -- table of values, in from-to order 1340 1341Seq-code-table ::= SEQUENCE { -- for names of coded values 1342 code Seq-code-type , -- name of code 1343 num INTEGER , -- number of rows in table 1344 one-letter BOOLEAN , -- symbol is ALWAYS 1 letter? 1345 start-at INTEGER DEFAULT 0 , -- index offset of first element 1346 table SEQUENCE OF 1347 SEQUENCE { 1348 symbol VisibleString , -- the printed symbol or letter 1349 name VisibleString } , -- an explanatory name or string 1350 comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid 1351 1352Seq-code-set ::= SEQUENCE { -- for distribution 1353 codes SET OF Seq-code-table OPTIONAL , 1354 maps SET OF Seq-map-table OPTIONAL } 1355 1356END 1357 1358--$Revision: 6.0 $ 1359--********************************************************************* 1360-- 1361-- 1990 - J.Ostell 1362-- Version 3.0 - June 1994 1363-- 1364--********************************************************************* 1365--********************************************************************* 1366-- 1367-- EMBL specific data 1368-- This block of specifications was developed by Reiner Fuchs of EMBL 1369-- Updated by J.Ostell, 1994 1370-- 1371--********************************************************************* 1372 1373EMBL-General DEFINITIONS ::= 1374BEGIN 1375 1376EXPORTS EMBL-dbname, EMBL-xref, EMBL-block; 1377 1378IMPORTS Date, Object-id FROM NCBI-General; 1379 1380EMBL-dbname ::= CHOICE { 1381 code ENUMERATED { 1382 embl(0), 1383 genbank(1), 1384 ddbj(2), 1385 geninfo(3), 1386 medline(4), 1387 swissprot(5), 1388 pir(6), 1389 pdb(7), 1390 epd(8), 1391 ecd(9), 1392 tfd(10), 1393 flybase(11), 1394 prosite(12), 1395 enzyme(13), 1396 mim(14), 1397 ecoseq(15), 1398 hiv(16) , 1399 other (255) } , 1400 name VisibleString } 1401 1402EMBL-xref ::= SEQUENCE { 1403 dbname EMBL-dbname, 1404 id SEQUENCE OF Object-id } 1405 1406EMBL-block ::= SEQUENCE { 1407 class ENUMERATED { 1408 not-set(0), 1409 standard(1), 1410 unannotated(2), 1411 other(255) } DEFAULT standard, 1412 div ENUMERATED { 1413 fun(0), 1414 inv(1), 1415 mam(2), 1416 org(3), 1417 phg(4), 1418 pln(5), 1419 pri(6), 1420 pro(7), 1421 rod(8), 1422 syn(9), 1423 una(10), 1424 vrl(11), 1425 vrt(12), 1426 pat(13), 1427 est(14), 1428 sts(15), 1429 other (255) } OPTIONAL, 1430 creation-date Date, 1431 update-date Date, 1432 extra-acc SEQUENCE OF VisibleString OPTIONAL, 1433 keywords SEQUENCE OF VisibleString OPTIONAL, 1434 xref SEQUENCE OF EMBL-xref OPTIONAL } 1435 1436END 1437 1438--********************************************************************* 1439-- 1440-- SWISSPROT specific data 1441-- This block of specifications was developed by Mark Cavanaugh of 1442-- NCBI working with Amos Bairoch of SWISSPROT 1443-- 1444--********************************************************************* 1445 1446SP-General DEFINITIONS ::= 1447BEGIN 1448 1449EXPORTS SP-block; 1450 1451IMPORTS Date, Dbtag FROM NCBI-General 1452 Seq-id FROM NCBI-Seqloc; 1453 1454SP-block ::= SEQUENCE { -- SWISSPROT specific descriptions 1455 class ENUMERATED { 1456 not-set (0) , 1457 standard (1) , -- conforms to all SWISSPROT checks 1458 prelim (2) , -- only seq and biblio checked 1459 other (255) } , 1460 extra-acc SET OF VisibleString OPTIONAL , -- old SWISSPROT ids 1461 imeth BOOLEAN DEFAULT FALSE , -- seq known to start with Met 1462 plasnm SET OF VisibleString OPTIONAL, -- plasmid names carrying gene 1463 seqref SET OF Seq-id OPTIONAL, -- xref to other sequences 1464 dbref SET OF Dbtag OPTIONAL , -- xref to non-sequence dbases 1465 keywords SET OF VisibleString OPTIONAL , -- keywords 1466 created Date OPTIONAL , -- creation date 1467 sequpd Date OPTIONAL , -- sequence update 1468 annotupd Date OPTIONAL } -- annotation update 1469 1470END 1471 1472--********************************************************************* 1473-- 1474-- PIR specific data 1475-- This block of specifications was developed by Jim Ostell of 1476-- NCBI 1477-- 1478--********************************************************************* 1479 1480PIR-General DEFINITIONS ::= 1481BEGIN 1482 1483EXPORTS PIR-block; 1484 1485IMPORTS Seq-id FROM NCBI-Seqloc; 1486 1487PIR-block ::= SEQUENCE { -- PIR specific descriptions 1488 had-punct BOOLEAN OPTIONAL , -- had punctuation in sequence ? 1489 host VisibleString OPTIONAL , 1490 source VisibleString OPTIONAL , -- source line 1491 summary VisibleString OPTIONAL , 1492 genetic VisibleString OPTIONAL , 1493 includes VisibleString OPTIONAL , 1494 placement VisibleString OPTIONAL , 1495 superfamily VisibleString OPTIONAL , 1496 keywords SEQUENCE OF VisibleString OPTIONAL , 1497 cross-reference VisibleString OPTIONAL , 1498 date VisibleString OPTIONAL , 1499 seq-raw VisibleString OPTIONAL , -- seq with punctuation 1500 seqref SET OF Seq-id OPTIONAL } -- xref to other sequences 1501 1502END 1503 1504--********************************************************************* 1505-- 1506-- GenBank specific data 1507-- This block of specifications was developed by Jim Ostell of 1508-- NCBI 1509-- 1510--********************************************************************* 1511 1512GenBank-General DEFINITIONS ::= 1513BEGIN 1514 1515EXPORTS GB-block; 1516 1517IMPORTS Date FROM NCBI-General; 1518 1519GB-block ::= SEQUENCE { -- GenBank specific descriptions 1520 extra-accessions SEQUENCE OF VisibleString OPTIONAL , 1521 source VisibleString OPTIONAL , -- source line 1522 keywords SEQUENCE OF VisibleString OPTIONAL , 1523 origin VisibleString OPTIONAL, 1524 date VisibleString OPTIONAL , -- OBSOLETE old form Entry Date 1525 entry-date Date OPTIONAL , -- replaces date 1526 div VisibleString OPTIONAL , -- GenBank division 1527 taxonomy VisibleString OPTIONAL } -- continuation line of organism 1528 1529END 1530 1531--********************************************************************** 1532-- PRF specific definition 1533-- PRF is a protein sequence database crated and maintained by 1534-- Protein Research Foundation, Minoo-city, Osaka, Japan. 1535-- 1536-- Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab), 1537-- Kyoto Univ., Japan 1538-- 1539--********************************************************************** 1540 1541PRF-General DEFINITIONS ::= 1542BEGIN 1543 1544EXPORTS PRF-block; 1545 1546PRF-block ::= SEQUENCE { 1547 extra-src PRF-ExtraSrc OPTIONAL, 1548 keywords SEQUENCE OF VisibleString OPTIONAL 1549} 1550 1551PRF-ExtraSrc ::= SEQUENCE { 1552 host VisibleString OPTIONAL, 1553 part VisibleString OPTIONAL, 1554 state VisibleString OPTIONAL, 1555 strain VisibleString OPTIONAL, 1556 taxon VisibleString OPTIONAL 1557} 1558 1559END 1560 1561--********************************************************************* 1562-- 1563-- PDB specific data 1564-- This block of specifications was developed by Jim Ostell and 1565-- Steve Bryant of NCBI 1566-- 1567--********************************************************************* 1568 1569PDB-General DEFINITIONS ::= 1570BEGIN 1571 1572EXPORTS PDB-block; 1573 1574IMPORTS Date FROM NCBI-General; 1575 1576PDB-block ::= SEQUENCE { -- PDB specific descriptions 1577 deposition Date , -- deposition date month,year 1578 class VisibleString , 1579 compound SEQUENCE OF VisibleString , 1580 source SEQUENCE OF VisibleString , 1581 exp-method VisibleString OPTIONAL , -- present if NOT X-ray diffraction 1582 replace PDB-replace OPTIONAL } -- replacement history 1583 1584PDB-replace ::= SEQUENCE { 1585 date Date , 1586 ids SEQUENCE OF VisibleString } -- entry ids replace by this one 1587 1588END 1589 1590--$Revision: 6.53 $ 1591--********************************************************************** 1592-- 1593-- NCBI Sequence Feature elements 1594-- by James Ostell, 1990 1595-- Version 3.0 - June 1994 1596-- 1597--********************************************************************** 1598 1599NCBI-Seqfeat DEFINITIONS ::= 1600BEGIN 1601 1602EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport; 1603 1604IMPORTS Gene-ref FROM NCBI-Gene 1605 Prot-ref FROM NCBI-Protein 1606 Org-ref FROM NCBI-Organism 1607 Variation-ref FROM NCBI-Variation 1608 BioSource FROM NCBI-BioSource 1609 RNA-ref FROM NCBI-RNA 1610 Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc 1611 Pubdesc, Numbering, Heterogen FROM NCBI-Sequence 1612 Rsite-ref FROM NCBI-Rsite 1613 Txinit FROM NCBI-TxInit 1614 DOI, PubMedId FROM NCBI-Biblio 1615 Pub-set FROM NCBI-Pub 1616 Object-id, Dbtag, User-object FROM NCBI-General; 1617 1618--*** Feature identifiers ******************************** 1619--* 1620 1621Feat-id ::= CHOICE { 1622 gibb INTEGER , -- geninfo backbone 1623 giim Giimport-id , -- geninfo import 1624 local Object-id , -- for local software use 1625 general Dbtag } -- for use by various databases 1626 1627--*** Seq-feat ******************************************* 1628--* sequence feature generalization 1629 1630Seq-feat ::= SEQUENCE { 1631 id Feat-id OPTIONAL , 1632 data SeqFeatData , -- the specific data 1633 partial BOOLEAN OPTIONAL , -- incomplete in some way? 1634 except BOOLEAN OPTIONAL , -- something funny about this? 1635 comment VisibleString OPTIONAL , 1636 product Seq-loc OPTIONAL , -- product of process 1637 location Seq-loc , -- feature made from 1638 qual SEQUENCE OF Gb-qual OPTIONAL , -- qualifiers 1639 title VisibleString OPTIONAL , -- for user defined label 1640 ext User-object OPTIONAL , -- user defined structure extension 1641 cit Pub-set OPTIONAL , -- citations for this feature 1642 exp-ev ENUMERATED { -- evidence for existence of feature 1643 experimental (1) , -- any reasonable experimental check 1644 not-experimental (2) } OPTIONAL , -- similarity, pattern, etc 1645 xref SET OF SeqFeatXref OPTIONAL , -- cite other relevant features 1646 dbxref SET OF Dbtag OPTIONAL , -- support for xref to other databases 1647 pseudo BOOLEAN OPTIONAL , -- annotated on pseudogene? 1648 except-text VisibleString OPTIONAL , -- explain if except=TRUE 1649 ids SET OF Feat-id OPTIONAL , -- set of Ids; will replace 'id' field 1650 exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field 1651 support SeqFeatSupport OPTIONAL -- will replace /experiment, /inference, model-evidence 1652} 1653 1654SeqFeatData ::= CHOICE { 1655 gene Gene-ref , 1656 org Org-ref , 1657 cdregion Cdregion , 1658 prot Prot-ref , 1659 rna RNA-ref , 1660 pub Pubdesc , -- publication applies to this seq 1661 seq Seq-loc , -- to annotate origin from another seq 1662 imp Imp-feat , 1663 region VisibleString, -- named region (globin locus) 1664 comment NULL , -- just a comment 1665 bond ENUMERATED { 1666 disulfide (1) , 1667 thiolester (2) , 1668 xlink (3) , 1669 thioether (4) , 1670 other (255) } , 1671 site ENUMERATED { 1672 active (1) , 1673 binding (2) , 1674 cleavage (3) , 1675 inhibit (4) , 1676 modified (5), 1677 glycosylation (6) , 1678 myristoylation (7) , 1679 mutagenized (8) , 1680 metal-binding (9) , 1681 phosphorylation (10) , 1682 acetylation (11) , 1683 amidation (12) , 1684 methylation (13) , 1685 hydroxylation (14) , 1686 sulfatation (15) , 1687 oxidative-deamination (16) , 1688 pyrrolidone-carboxylic-acid (17) , 1689 gamma-carboxyglutamic-acid (18) , 1690 blocked (19) , 1691 lipid-binding (20) , 1692 np-binding (21) , 1693 dna-binding (22) , 1694 signal-peptide (23) , 1695 transit-peptide (24) , 1696 transmembrane-region (25) , 1697 nitrosylation (26) , 1698 other (255) } , 1699 rsite Rsite-ref , -- restriction site (for maps really) 1700 user User-object , -- user defined structure 1701 txinit Txinit , -- transcription initiation 1702 num Numbering , -- a numbering system 1703 psec-str ENUMERATED { -- protein secondary structure 1704 helix (1) , -- any helix 1705 sheet (2) , -- beta sheet 1706 turn (3) } , -- beta or gamma turn 1707 non-std-residue VisibleString , -- non-standard residue here in seq 1708 het Heterogen , -- cofactor, prosthetic grp, etc, bound to seq 1709 biosrc BioSource, 1710 clone Clone-ref, 1711 variation Variation-ref 1712} 1713 1714SeqFeatXref ::= SEQUENCE { -- both optional because can have one or both 1715 id Feat-id OPTIONAL , -- the feature copied 1716 data SeqFeatData OPTIONAL } -- the specific data 1717 1718SeqFeatSupport ::= SEQUENCE { 1719 experiment SET OF ExperimentSupport OPTIONAL , 1720 inference SET OF InferenceSupport OPTIONAL , 1721 model-evidence SET OF ModelEvidenceSupport OPTIONAL 1722} 1723 1724EvidenceCategory ::= INTEGER { 1725 not-set (0) , 1726 coordinates (1) , 1727 description (2) , 1728 existence (3) 1729} 1730 1731ExperimentSupport ::= SEQUENCE { 1732 category EvidenceCategory OPTIONAL , 1733 explanation VisibleString , 1734 pmids SET OF PubMedId OPTIONAL , 1735 dois SET OF DOI OPTIONAL 1736} 1737 1738Program-id ::= SEQUENCE { 1739 name VisibleString , 1740 version VisibleString OPTIONAL 1741} 1742 1743EvidenceBasis ::= SEQUENCE { 1744 programs SET OF Program-id OPTIONAL , 1745 accessions SET OF Seq-id OPTIONAL 1746} 1747 1748InferenceSupport ::= SEQUENCE { 1749 category EvidenceCategory OPTIONAL , 1750 type INTEGER { 1751 not-set (0) , 1752 similar-to-sequence (1) , 1753 similar-to-aa (2) , 1754 similar-to-dna (3) , 1755 similar-to-rna (4) , 1756 similar-to-mrna (5) , 1757 similiar-to-est (6) , 1758 similar-to-other-rna (7) , 1759 profile (8) , 1760 nucleotide-motif (9) , 1761 protein-motif (10) , 1762 ab-initio-prediction (11) , 1763 alignment (12) , 1764 other (255) 1765 } DEFAULT not-set , 1766 other-type VisibleString OPTIONAL , 1767 same-species BOOLEAN DEFAULT FALSE , 1768 basis EvidenceBasis , 1769 pmids SET OF PubMedId OPTIONAL , 1770 dois SET OF DOI OPTIONAL 1771} 1772 1773ModelEvidenceItem ::= SEQUENCE { 1774 id Seq-id , 1775 exon-count INTEGER OPTIONAL , 1776 exon-length INTEGER OPTIONAL , 1777 full-length BOOLEAN DEFAULT FALSE , 1778 supports-all-exon-combo BOOLEAN DEFAULT FALSE 1779} 1780 1781ModelEvidenceSupport ::= SEQUENCE { 1782 method VisibleString OPTIONAL , 1783 mrna SET OF ModelEvidenceItem OPTIONAL , 1784 est SET OF ModelEvidenceItem OPTIONAL , 1785 protein SET OF ModelEvidenceItem OPTIONAL , 1786 identification Seq-id OPTIONAL , 1787 dbxref SET OF Dbtag OPTIONAL , 1788 exon-count INTEGER OPTIONAL , 1789 exon-length INTEGER OPTIONAL , 1790 full-length BOOLEAN DEFAULT FALSE , 1791 supports-all-exon-combo BOOLEAN DEFAULT FALSE 1792} 1793 1794--*** CdRegion *********************************************** 1795--* 1796--* Instructions to translate from a nucleic acid to a peptide 1797--* conflict means it's supposed to translate but doesn't 1798--* 1799 1800 1801Cdregion ::= SEQUENCE { 1802 orf BOOLEAN OPTIONAL , -- just an ORF ? 1803 frame ENUMERATED { 1804 not-set (0) , -- not set, code uses one 1805 one (1) , 1806 two (2) , 1807 three (3) } DEFAULT not-set , -- reading frame 1808 conflict BOOLEAN OPTIONAL , -- conflict 1809 gaps INTEGER OPTIONAL , -- number of gaps on conflict/except 1810 mismatch INTEGER OPTIONAL , -- number of mismatches on above 1811 code Genetic-code OPTIONAL , -- genetic code used 1812 code-break SEQUENCE OF Code-break OPTIONAL , -- individual exceptions 1813 stops INTEGER OPTIONAL } -- number of stop codons on above 1814 1815 -- each code is 64 cells long, in the order where 1816 -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc 1817 -- NOTE: this order does NOT correspond to a Seq-data 1818 -- encoding. It is "natural" to codon usage instead. 1819 -- the value in each cell is the AA coded for 1820 -- start= AA coded only if first in peptide 1821 -- in start array, if codon is not a legitimate start 1822 -- codon, that cell will have the "gap" symbol for 1823 -- that alphabet. Otherwise it will have the AA 1824 -- encoded when that codon is used at the start. 1825 1826Genetic-code ::= SET OF CHOICE { 1827 name VisibleString , -- name of a code 1828 id INTEGER , -- id in dbase 1829 ncbieaa VisibleString , -- indexed to IUPAC extended 1830 ncbi8aa OCTET STRING , -- indexed to NCBI8aa 1831 ncbistdaa OCTET STRING , -- indexed to NCBIstdaa 1832 sncbieaa VisibleString , -- start, indexed to IUPAC extended 1833 sncbi8aa OCTET STRING , -- start, indexed to NCBI8aa 1834 sncbistdaa OCTET STRING } -- start, indexed to NCBIstdaa 1835 1836Code-break ::= SEQUENCE { -- specific codon exceptions 1837 loc Seq-loc , -- location of exception 1838 aa CHOICE { -- the amino acid 1839 ncbieaa INTEGER , -- ASCII value of NCBIeaa code 1840 ncbi8aa INTEGER , -- NCBI8aa code 1841 ncbistdaa INTEGER } } -- NCBIstdaa code 1842 1843Genetic-code-table ::= SET OF Genetic-code -- table of genetic codes 1844 1845--*** Import *********************************************** 1846--* 1847--* Features imported from other databases 1848--* 1849 1850Imp-feat ::= SEQUENCE { 1851 key VisibleString , 1852 loc VisibleString OPTIONAL , -- original location string 1853 descr VisibleString OPTIONAL } -- text description 1854 1855Gb-qual ::= SEQUENCE { 1856 qual VisibleString , 1857 val VisibleString } 1858 1859 1860--*** Clone-ref *********************************************** 1861--* 1862--* Specification of clone features 1863--* 1864 1865Clone-ref ::= SEQUENCE { 1866 name VisibleString, -- Official clone symbol 1867 library VisibleString OPTIONAL, -- Library name 1868 1869 concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL? 1870 unique BOOLEAN DEFAULT FALSE, -- OPTIONAL? 1871 placement-method INTEGER { 1872 end-seq (0), -- Clone placed by end sequence 1873 insert-alignment (1), -- Clone placed by insert alignment 1874 sts (2), -- Clone placed by STS 1875 fish (3), 1876 fingerprint (4), 1877 end-seq-insert-alignment (5), -- combined end-seq and insert align 1878 external (253), -- Placement provided externally 1879 curated (254), -- Human placed or approved 1880 other (255) 1881 } OPTIONAL, 1882 clone-seq Clone-seq-set OPTIONAL 1883} 1884 1885Clone-seq-set ::= SET OF Clone-seq 1886 1887 1888Clone-seq ::= SEQUENCE { 1889 type INTEGER { 1890 insert (0), 1891 end (1), 1892 other (255) 1893 }, 1894 confidence INTEGER { 1895 multiple (0), -- Multiple hits 1896 na (1), -- Unspecified 1897 nohit-rep (2), -- No hits, end flagged repetitive 1898 nohitnorep (3), -- No hits, end not flagged repetitive 1899 other-chrm (4), -- Hit on different chromosome 1900 unique (5), 1901 virtual (6), -- Virtual (hasn't been sequenced) 1902 multiple-rep (7), -- Multiple hits, end flagged repetitive 1903 multiplenorep (8), -- Multiple hits, end not flagged repetitive 1904 no-hit (9), -- No hits 1905 other (255) 1906 } OPTIONAL, 1907 location Seq-loc, -- location on sequence 1908 seq Seq-loc OPTIONAL, -- clone sequence location 1909 align-id Dbtag OPTIONAL, -- internal alignment identifier 1910 support INTEGER { 1911 prototype (0), -- sequence used to place clone 1912 supporting (1), -- sequence supports placement 1913 supports-other(2), -- supports a different placement 1914 non-supporting (3) -- does not support any placement 1915 } OPTIONAL 1916} 1917 1918END 1919 1920 1921--*** Variation-ref *********************************************** 1922--* 1923--* Specification of variation features 1924--* 1925 1926NCBI-Variation DEFINITIONS ::= 1927BEGIN 1928 1929EXPORTS Variation-ref, Variation-inst, VariantProperties, 1930 Population-data, Phenotype; 1931 1932IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General 1933 Seq-literal FROM NCBI-Sequence 1934 SubSource FROM NCBI-BioSource 1935 Seq-loc FROM NCBI-Seqloc 1936 Pub FROM NCBI-Pub; 1937 1938 1939-- -------------------------------------------------------------------------- 1940-- Historically, the dbSNP definitions document data structures used in the 1941-- processing and annotation of variations by the dbSNP group. The intention 1942-- is to provide information to clients that reflect internal information 1943-- produced during the mapping of SNPs 1944-- -------------------------------------------------------------------------- 1945 1946VariantProperties ::= SEQUENCE { 1947 version INTEGER, 1948 1949 -- NOTE: 1950 -- The format for most of these values is as an integer 1951 -- Unless otherwise noted, these integers represent a bitwise OR (= simple 1952 -- sum) of the possible values, and as such, these values represent the 1953 -- specific bit flags that may be set for each of the possible attributes 1954 -- here. 1955 1956 resource-link INTEGER { 1957 preserved (1), -- Clinical, Pubmed, Cited, (0x01) 1958 provisional (2), -- Provisional Third Party Annotations (0x02) 1959 has3D (4), -- Has 3D strcture SNP3D table (0x04) 1960 submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08) 1961 clinical (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10) 1962 genotypeKit (32) -- Marker exists on high density genotyping kit 1963 -- (0x20) 1964 } OPTIONAL, 1965 1966 gene-location INTEGER { 1967 in-gene (1), -- Sequence intervals covered by a gene ID but not 1968 -- having an aligned transcript (0x01) 1969 near-gene-5 (2), -- Within 2kb of the 5' end of a gene feature 1970 near-gene-3 (4), -- Within 0.5kb of the 3' end of a gene feature 1971 intron (8), -- In Intron (0x08) 1972 donor (16), -- In donor splice-site (0x10) 1973 acceptor (32), -- In acceptor splice-site (0x20) 1974 utr-5 (64), -- In 5' UTR (0x40) 1975 utr-3 (128), -- In 3' UTR (0x80) 1976 in-start-codon(256), -- the variant is observed in a start codon 1977 -- (0x100) 1978 in-stop-codon (512), -- the variant is observed in a stop codon 1979 -- (0x200) 1980 intergenic (1024), -- variant located between genes (0x400) 1981 conserved-noncoding(2048) -- variant is located in a conserved 1982 -- non-coding region (0x800) 1983 } OPTIONAL, 1984 1985 effect INTEGER { 1986 no-change (0), -- known to cause no functional changes 1987 -- since 0 does not combine with any other bit 1988 -- value, 'no-change' specifically implies that 1989 -- there are no consequences 1990 synonymous (1), -- one allele in the set does not change the encoded 1991 -- amino acid (0x1) 1992 nonsense (2), -- one allele in the set changes to STOP codon 1993 -- (TER). (0x2) 1994 missense (4), -- one allele in the set changes protein peptide 1995 -- (0x4) 1996 frameshift (8), -- one allele in the set changes all downstream 1997 -- amino acids (0x8) 1998 1999 up-regulator (16), -- the variant causes increased transcription 2000 -- (0x10) 2001 down-regulator(32), -- the variant causes decreased transcription 2002 -- (0x20) 2003 methylation (64), 2004 stop-gain (128), -- reference codon is not stop codon, but the snp 2005 -- variant allele changes the codon to a 2006 -- terminating codon. 2007 stop-loss (256) -- reverse of STOP-GAIN: reference codon is a 2008 -- stop codon, but a snp variant allele changes 2009 -- the codon to a non-terminating codon. 2010 } OPTIONAL, 2011 2012 mapping INTEGER { 2013 has-other-snp (1), -- Another SNP has the same mapped positions 2014 -- on reference assembly (0x01) 2015 has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different 2016 -- chromosomes on different assemblies (0x02) 2017 is-assembly-specific (4) -- Only maps to 1 assembly (0x04) 2018 } OPTIONAL, 2019 2020 -- map-weight captures specificity of placement 2021 -- NOTE: This is *NOT* a bitfield 2022 map-weight INTEGER { 2023 is-uniquely-placed(1), 2024 placed-twice-on-same-chrom(2), 2025 placed-twice-on-diff-chrom(3), 2026 many-placements(10) 2027 } OPTIONAL, 2028 2029 frequency-based-validation INTEGER { 2030 is-mutation (1), -- low frequency variation that is cited in 2031 -- journal or other reputable sources (0x01) 2032 above-5pct-all (2), -- >5% minor allele freq in each and all 2033 -- populations (0x02) 2034 above-5pct-1plus (4), -- >5% minor allele freq in 1+ populations (0x04) 2035 validated (8), -- Bit is set if the variant has a minor allele 2036 -- observed in two or more separate chromosomes 2037 above-1pct-all (16), -- >1% minor allele freq in each and all 2038 -- populations (0x10) 2039 above-1pct-1plus (32) -- >1% minor allele freq in 1+ populations (0x20) 2040 } OPTIONAL, 2041 2042 genotype INTEGER { 2043 in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01) 2044 has-genotypes (2) -- SNP has individual genotype (0x02) 2045 } OPTIONAL, 2046 2047 -- project IDs are IDs from BioProjects 2048 -- in order to report information about project relationships, we 2049 -- require projects to be registered 2050 -- This field in many ways duplicates dbxrefs; however, the 2051 -- intention of this field is to more adequately reflect 2052 -- ownership and data source 2053 -- 2054 -- 11/9/2010: DO NOT USE 2055 -- This field was changed in the spec in a breaking way; using it will 2056 -- break clients. We are officially suppressing / abandoning this field. 2057 -- Clients who need to use this should instead place the data in 2058 -- Seq-feat.dbxref, using the db name 'BioProject' 2059 project-data SET OF INTEGER OPTIONAL, 2060 2061 quality-check INTEGER { 2062 contig-allele-missing (1), -- Reference sequence allele at the mapped 2063 -- position is not present in the SNP 2064 -- allele list, adjusted for orientation 2065 -- (0x01) 2066 withdrawn-by-submitter (2), -- One member SS is withdrawn by submitter 2067 -- (0x02) 2068 non-overlapping-alleles (4), -- RS set has 2+ alleles from different 2069 -- submissions and these sets share no 2070 -- alleles in common (0x04) 2071 strain-specific (8), -- Straing specific fixed difference (0x08) 2072 genotype-conflict (16) -- Has Genotype Conflict (0x10) 2073 } OPTIONAL, 2074 2075 confidence INTEGER { 2076 unknown (0), 2077 likely-artifact (1), 2078 other (255) 2079 } OPTIONAL, 2080 2081 -- has this variant been validated? 2082 -- While a boolean flag offers no subtle distinctions of validation 2083 -- methods, occasionally it is only known as a single boolean value 2084 -- NOTE: this flag is redundant and should be omitted if more comprehensive 2085 -- validation information is present 2086 other-validation BOOLEAN OPTIONAL, 2087 2088 -- origin of this allele, if known 2089 -- note that these are powers-of-two, and represent bits; thus, we can 2090 -- represent more than one state simultaneously through a bitwise OR 2091 allele-origin INTEGER { 2092 unknown (0), 2093 germline (1), 2094 somatic (2), 2095 inherited (4), 2096 paternal (8), 2097 maternal (16), 2098 de-novo (32), 2099 biparental (64), 2100 uniparental (128), 2101 not-tested (256), 2102 tested-inconclusive (512), 2103 not-reported (1024), 2104 2105 -- stopper - 2^31 2106 other (1073741824) 2107 } OPTIONAL, 2108 2109 -- observed allele state, if known 2110 -- NOTE: THIS IS NOT A BITFIELD! 2111 allele-state INTEGER { 2112 unknown (0), 2113 homozygous (1), 2114 heterozygous (2), 2115 hemizygous (3), 2116 nullizygous (4), 2117 other (255) 2118 } OPTIONAL, 2119 2120 -- NOTE: 2121 -- 'allele-frequency' here refers to the minor allele frequency of the 2122 -- default population 2123 allele-frequency REAL OPTIONAL, 2124 2125 -- is this variant the ancestral allele? 2126 is-ancestral-allele BOOLEAN OPTIONAL 2127} 2128 2129Phenotype ::= SEQUENCE { 2130 source VisibleString OPTIONAL, 2131 term VisibleString OPTIONAL, 2132 xref SET OF Dbtag OPTIONAL, 2133 2134 -- does this variant have known clinical significance? 2135 clinical-significance INTEGER { 2136 unknown (0), 2137 untested (1), 2138 non-pathogenic (2), 2139 probable-non-pathogenic (3), 2140 probable-pathogenic (4), 2141 pathogenic (5), 2142 drug-response (6), 2143 histocompatibility (7), 2144 other (255) 2145 } OPTIONAL 2146} 2147 2148Population-data ::= SEQUENCE { 2149 -- assayed population (e.g. HAPMAP-CEU) 2150 population VisibleString, 2151 genotype-frequency REAL OPTIONAL, 2152 chromosomes-tested INTEGER OPTIONAL, 2153 sample-ids SET OF Object-id OPTIONAL, 2154 allele-frequency REAL OPTIONAL, 2155 2156 -- This field is an explicit bit-field 2157 -- Valid values should be a bitwise combination (= simple sum) 2158 -- of any of the values below 2159 flags INTEGER { 2160 is-default-population (1), 2161 is-minor-allele (2), 2162 is-rare-allele (4) 2163 } OPTIONAL 2164} 2165 2166Ext-loc ::= SEQUENCE { 2167 id Object-id, 2168 location Seq-loc 2169} 2170 2171 2172Variation-ref ::= SEQUENCE { 2173 -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) 2174 -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1' 2175 -- 2176 -- we relate three kinds of IDs here: 2177 -- - our current object's id 2178 -- - the id of this object's parent, if it exists 2179 -- - the sample ID that this item originates from 2180 id Dbtag OPTIONAL, 2181 parent-id Dbtag OPTIONAL, 2182 sample-id Object-id OPTIONAL, 2183 other-ids SET OF Dbtag OPTIONAL, 2184 2185 -- names and synonyms 2186 -- some variants have well-known canonical names and possible accepted 2187 -- synonyms 2188 name VisibleString OPTIONAL, 2189 synonyms SET OF VisibleString OPTIONAL, 2190 2191 -- tag for comment and descriptions 2192 description VisibleString OPTIONAL, 2193 2194 -- phenotype 2195 phenotype SET OF Phenotype OPTIONAL, 2196 2197 -- sequencing / acuisition method 2198 method SET OF INTEGER { 2199 unknown (0), 2200 bac-acgh (1), 2201 computational (2), 2202 curated (3), 2203 digital-array (4), 2204 expression-array (5), 2205 fish (6), 2206 flanking-sequence (7), 2207 maph (8), 2208 mcd-analysis (9), 2209 mlpa (10), 2210 oea-assembly (11), 2211 oligo-acgh (12), 2212 paired-end (13), 2213 pcr (14), 2214 qpcr (15), 2215 read-depth (16), 2216 roma (17), 2217 rt-pcr (18), 2218 sage (19), 2219 sequence-alignment (20), 2220 sequencing (21), 2221 snp-array (22), 2222 snp-genoytyping (23), 2223 southern (24), 2224 western (25), 2225 optical-mapping (26), 2226 2227 other (255) 2228 } OPTIONAL, 2229 2230 -- Note about SNP representation and pretinent fields: allele-frequency, 2231 -- population, quality-codes: 2232 -- The case of multiple alleles for a SNP would be described by 2233 -- parent-feature of type Variation-set.diff-alleles, where the child 2234 -- features of type Variation-inst, all at the same location, would 2235 -- describe individual alleles. 2236 2237 -- population data 2238 -- DEPRECATED - do not use 2239 population-data SET OF Population-data OPTIONAL, 2240 2241 -- variant properties bit fields 2242 variant-prop VariantProperties OPTIONAL, 2243 2244 -- has this variant been validated? 2245 -- DEPRECATED: new field = VariantProperties.other-validation 2246 validated BOOLEAN OPTIONAL, 2247 2248 -- link-outs to GeneTests database 2249 -- DEPRECATED - do not use 2250 clinical-test SET OF Dbtag OPTIONAL, 2251 2252 -- origin of this allele, if known 2253 -- note that these are powers-of-two, and represent bits; thus, we can 2254 -- represent more than one state simultaneously through a bitwise OR 2255 -- DEPRECATED: new field = VariantProperties.allele-origin 2256 allele-origin INTEGER { 2257 unknown (0), 2258 germline (1), 2259 somatic (2), 2260 inherited (4), 2261 paternal (8), 2262 maternal (16), 2263 de-novo (32), 2264 biparental (64), 2265 uniparental (128), 2266 not-tested (256), 2267 tested-inconclusive (512), 2268 2269 -- stopper - 2^31 2270 other (1073741824) 2271 } OPTIONAL, 2272 2273 -- observed allele state, if known 2274 -- DEPRECATED: new field = VariantProperties.allele-state 2275 allele-state INTEGER { 2276 unknown (0), 2277 homozygous (1), 2278 heterozygous (2), 2279 hemizygous (3), 2280 nullizygous (4), 2281 other (255) 2282 } OPTIONAL, 2283 2284 -- NOTE: 2285 -- 'allele-frequency' here refers to the minor allele frequency of the 2286 -- default population 2287 -- DEPRECATED: new field = VariantProperties.allele-frequency 2288 allele-frequency REAL OPTIONAL, 2289 2290 -- is this variant the ancestral allele? 2291 -- DEPRECATED: new field = VariantProperties.is-ancestral-allele 2292 is-ancestral-allele BOOLEAN OPTIONAL, 2293 2294 -- publication support. 2295 -- Note: made this pub instead of pub-equiv, since 2296 -- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like 2297 -- Pub is more often used as top-level container 2298 -- DEPRECATED - do not use; use Seq-feat.dbxref instead 2299 pub Pub OPTIONAL, 2300 2301 data CHOICE { 2302 unknown NULL, 2303 note VisibleString, --free-form 2304 uniparental-disomy NULL, 2305 2306 -- actual sequence-edit at feat.location 2307 instance Variation-inst, 2308 2309 -- Set of related Variations. 2310 -- Location of the set equals to the union of member locations 2311 set SEQUENCE { 2312 type INTEGER { 2313 unknown (0), 2314 compound (1), -- complex change at the same location on the 2315 -- same molecule 2316 products (2), -- different products arising from the same 2317 -- variation in a precursor, e.g. r.[13g>a, 2318 -- 13_88del] 2319 haplotype (3), -- changes on the same allele, e.g 2320 -- r.[13g>a;15u>c] 2321 genotype (4), -- changes on different alleles in the same 2322 -- genotype, e.g. g.[476C>T]+[476C>T] 2323 mosaic (5), -- different genotypes in the same individual 2324 individual (6), -- same organism; allele relationship unknown, 2325 -- e.g. g.[476C>T(+)183G>C] 2326 population (7), -- population 2327 alleles (8), -- set represents a set of observed alleles 2328 package (9), -- set represents a package of observations at 2329 -- a given location, generally containing 2330 -- asserted + reference 2331 other (255) 2332 }, 2333 variations SET OF Variation-ref, 2334 name VisibleString OPTIONAL 2335 }, 2336 2337 -- variant is a complex and undescribed change at the location 2338 -- This type of variant is known to occur in dbVar submissions 2339 complex NULL 2340 }, 2341 2342 consequence SET OF CHOICE { 2343 unknown NULL, 2344 splicing NULL, --some effect on splicing 2345 note VisibleString, --freeform 2346 2347 -- Describe resulting variation in the product, e.g. missense, 2348 -- nonsense, silent, neutral, etc in a protein, that arises from 2349 -- THIS variation. 2350 variation Variation-ref, 2351 2352 -- see http://www.hgvs.org/mutnomen/recs-prot.html 2353 frameshift SEQUENCE { 2354 phase INTEGER OPTIONAL, 2355 x-length INTEGER OPTIONAL 2356 }, 2357 2358 loss-of-heterozygosity SEQUENCE { 2359 -- In germline comparison, it will be reference genome assembly 2360 -- (default) or reference/normal population. In somatic mutation, 2361 -- it will be a name of the normal tissue. 2362 reference VisibleString OPTIONAL, 2363 2364 -- Name of the testing subject type or the testing tissue. 2365 test VisibleString OPTIONAL 2366 } 2367 } OPTIONAL, 2368 2369 -- Observed location, if different from the parent set or feature.location. 2370 -- DEPRECATED - do not use 2371 location Seq-loc OPTIONAL, 2372 2373 -- reference other locs, e.g. mapped source 2374 -- DEPRECATED - do not use 2375 ext-locs SET OF Ext-loc OPTIONAL, 2376 2377 -- DEPRECATED - do not use; use Seq-feat.exts instead 2378 ext User-object OPTIONAL, 2379 2380 somatic-origin SET OF SEQUENCE { 2381 -- description of the somatic origin itself 2382 source SubSource OPTIONAL, 2383 -- condition related to this origin's type 2384 condition SEQUENCE { 2385 description VisibleString OPTIONAL, 2386 -- reference to BioTerm / other descriptive database 2387 object-id SET OF Dbtag OPTIONAL 2388 } OPTIONAL 2389 } OPTIONAL 2390 2391} 2392 2393 2394Delta-item ::= SEQUENCE { 2395 seq CHOICE { 2396 literal Seq-literal, 2397 loc Seq-loc, 2398 this NULL --same location as variation-ref itself 2399 } OPTIONAL, 2400 2401 -- Multiplier allows representing a tandem, e.g. ATATAT as AT*3 2402 -- This allows describing CNV/SSR where delta=self with a 2403 -- multiplier which specifies the count of the repeat unit. 2404 2405 multiplier INTEGER OPTIONAL, --assumed 1 if not specified. 2406 multiplier-fuzz Int-fuzz OPTIONAL, 2407 2408 action INTEGER { 2409 2410 -- replace len(seq) positions starting with location.start with seq 2411 morph (0), 2412 2413 -- go downstream by distance specified by multiplier (upstream if < 0), 2414 -- in genomic context. 2415 offset (1), 2416 2417 -- excise sequence at location 2418 -- if multiplier is specified, delete len(location)*multiplier 2419 -- positions downstream 2420 del-at (2), 2421 2422 -- insert seq before the location.start 2423 ins-before (3) 2424 2425 } DEFAULT morph 2426} 2427 2428 2429-- Variation instance 2430Variation-inst ::= SEQUENCE { 2431 type INTEGER { 2432 unknown (0), -- delta=[] 2433 identity (1), -- delta=[] 2434 inv (2), -- delta=[del, ins.seq= 2435 -- RevComp(variation-location)] 2436 snv (3), -- delta=[morph of length 1] 2437 -- NOTE: this is snV not snP; the latter 2438 -- requires frequency-based validation to be 2439 -- established in VariantProperties 2440 -- the strict definition of SNP is an SNV with 2441 -- an established population frequency of at 2442 -- least 1% in at least 1 popuplation 2443 mnp (4), -- delta=[morph of length >1] 2444 delins (5), -- delta=[del, ins] 2445 del (6), -- delta=[del] 2446 ins (7), -- delta=[ins] 2447 microsatellite (8), -- delta=[del, ins.seq= repeat-unit with fuzzy 2448 -- multiplier] 2449 -- variation-location is the microsat expansion 2450 -- on the sequence 2451 transposon (9), -- delta=[del, ins.seq= known donor or 'this'] 2452 -- variation-location is equiv of transposon 2453 -- locs. 2454 cnv (10), -- delta=[del, ins= 'this' with fuzzy 2455 -- multiplier] 2456 direct-copy (11), -- delta=[ins.seq= upstream location on the 2457 -- same strand] 2458 rev-direct-copy (12), -- delta=[ins.seq= downstream location on the 2459 -- same strand] 2460 inverted-copy (13), -- delta=[ins.seq= upstream location on the 2461 -- opposite strand] 2462 everted-copy (14), -- delta=[ins.seq= downstream location on the 2463 -- opposite strand] 2464 translocation (15), -- delta=like delins 2465 prot-missense (16), -- delta=[morph of length 1] 2466 prot-nonsense (17), -- delta=[del]; variation-location is the tail 2467 -- of the protein being truncated 2468 prot-neutral (18), -- delta=[morph of length 1] 2469 prot-silent (19), -- delta=[morph of length 1, same AA as at 2470 -- variation-location] 2471 prot-other (20), -- delta=any 2472 2473 other (255) -- delta=any 2474 }, 2475 2476 -- Sequence that replaces the location, in biological order. 2477 delta SEQUENCE OF Delta-item, 2478 2479 -- 'observation' is used to label items in a Variation-ref package 2480 -- This field is explicitly a bit-field, so the bitwise OR (= sum) of any 2481 -- of the values may be observed. 2482 observation INTEGER { 2483 asserted (1), -- inst represents the asserted base at a 2484 -- position 2485 reference (2), -- inst represents the reference base at the 2486 -- position 2487 variant (4) -- inst represent the observed variant at a 2488 -- given position 2489 } OPTIONAL 2490} 2491 2492END 2493 2494 2495--********************************************************************** 2496-- 2497-- NCBI Restriction Sites 2498-- by James Ostell, 1990 2499-- version 0.8 2500-- 2501--********************************************************************** 2502 2503NCBI-Rsite DEFINITIONS ::= 2504BEGIN 2505 2506EXPORTS Rsite-ref; 2507 2508IMPORTS Dbtag FROM NCBI-General; 2509 2510Rsite-ref ::= CHOICE { 2511 str VisibleString , -- may be unparsable 2512 db Dbtag } -- pointer to a restriction site database 2513 2514END 2515 2516--********************************************************************** 2517-- 2518-- NCBI RNAs 2519-- by James Ostell, 1990 2520-- version 0.8 2521-- 2522--********************************************************************** 2523 2524NCBI-RNA DEFINITIONS ::= 2525BEGIN 2526 2527EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set; 2528 2529IMPORTS Seq-loc FROM NCBI-Seqloc; 2530 2531--*** rnas *********************************************** 2532--* 2533--* various rnas 2534--* 2535 -- minimal RNA sequence 2536RNA-ref ::= SEQUENCE { 2537 type ENUMERATED { -- type of RNA feature 2538 unknown (0) , 2539 premsg (1) , 2540 mRNA (2) , 2541 tRNA (3) , 2542 rRNA (4) , 2543 snRNA (5) , -- will become ncRNA, with RNA-gen.class = snRNA 2544 scRNA (6) , -- will become ncRNA, with RNA-gen.class = scRNA 2545 snoRNA (7) , -- will become ncRNA, with RNA-gen.class = snoRNA 2546 ncRNA (8) , -- non-coding RNA; subsumes snRNA, scRNA, snoRNA 2547 tmRNA (9) , 2548 miscRNA (10) , 2549 other (255) } , 2550 pseudo BOOLEAN OPTIONAL , 2551 ext CHOICE { 2552 name VisibleString , -- for naming "other" type 2553 tRNA Trna-ext , -- for tRNAs 2554 gen RNA-gen } OPTIONAL -- generic fields for ncRNA, tmRNA, miscRNA 2555 } 2556 2557Trna-ext ::= SEQUENCE { -- tRNA feature extensions 2558 aa CHOICE { -- aa this carries 2559 iupacaa INTEGER , 2560 ncbieaa INTEGER , 2561 ncbi8aa INTEGER , 2562 ncbistdaa INTEGER } OPTIONAL , 2563 codon SET OF INTEGER OPTIONAL , -- codon(s) as in Genetic-code 2564 anticodon Seq-loc OPTIONAL } -- location of anticodon 2565 2566RNA-gen ::= SEQUENCE { 2567 class VisibleString OPTIONAL , -- for ncRNAs, the class of non-coding RNA: 2568 -- examples: antisense_RNA, guide_RNA, snRNA 2569 product VisibleString OPTIONAL , 2570 quals RNA-qual-set OPTIONAL -- e.g., tag_peptide qualifier for tmRNAs 2571} 2572 2573RNA-qual ::= SEQUENCE { -- Additional data values for RNA-gen, 2574 qual VisibleString , -- in a tag (qual), value (val) format 2575 val VisibleString } 2576 2577RNA-qual-set ::= SEQUENCE OF RNA-qual 2578 2579END 2580 2581--********************************************************************** 2582-- 2583-- NCBI Genes 2584-- by James Ostell, 1990 2585-- version 0.8 2586-- 2587--********************************************************************** 2588 2589NCBI-Gene DEFINITIONS ::= 2590BEGIN 2591 2592EXPORTS Gene-ref, Gene-nomenclature; 2593 2594IMPORTS Dbtag FROM NCBI-General; 2595 2596--*** Gene *********************************************** 2597--* 2598--* reference to a gene 2599--* 2600 2601Gene-ref ::= SEQUENCE { 2602 locus VisibleString OPTIONAL , -- Official gene symbol 2603 allele VisibleString OPTIONAL , -- Official allele designation 2604 desc VisibleString OPTIONAL , -- descriptive name 2605 maploc VisibleString OPTIONAL , -- descriptive map location 2606 pseudo BOOLEAN DEFAULT FALSE , -- pseudogene 2607 db SET OF Dbtag OPTIONAL , -- ids in other dbases 2608 syn SET OF VisibleString OPTIONAL , -- synonyms for locus 2609 locus-tag VisibleString OPTIONAL , -- systematic gene name (e.g., MI0001, ORF0069) 2610 formal-name Gene-nomenclature OPTIONAL 2611} 2612 2613Gene-nomenclature ::= SEQUENCE { 2614 status ENUMERATED { 2615 unknown (0) , 2616 official (1) , 2617 interim (2) 2618 } , 2619 symbol VisibleString OPTIONAL , 2620 name VisibleString OPTIONAL , 2621 source Dbtag OPTIONAL 2622} 2623 2624END 2625 2626 2627--********************************************************************** 2628-- 2629-- NCBI Organism 2630-- by James Ostell, 1994 2631-- version 3.0 2632-- 2633--********************************************************************** 2634 2635NCBI-Organism DEFINITIONS ::= 2636BEGIN 2637 2638EXPORTS Org-ref; 2639 2640IMPORTS Dbtag FROM NCBI-General; 2641 2642--*** Org-ref *********************************************** 2643--* 2644--* Reference to an organism 2645--* defines only the organism.. lower levels of detail for biological 2646--* molecules are provided by the Source object 2647--* 2648 2649Org-ref ::= SEQUENCE { 2650 taxname VisibleString OPTIONAL , -- preferred formal name 2651 common VisibleString OPTIONAL , -- common name 2652 mod SET OF VisibleString OPTIONAL , -- unstructured modifiers 2653 db SET OF Dbtag OPTIONAL , -- ids in taxonomic or culture dbases 2654 syn SET OF VisibleString OPTIONAL , -- synonyms for taxname or common 2655 orgname OrgName OPTIONAL } 2656 2657 2658OrgName ::= SEQUENCE { 2659 name CHOICE { 2660 binomial BinomialOrgName , -- genus/species type name 2661 virus VisibleString , -- virus names are different 2662 hybrid MultiOrgName , -- hybrid between organisms 2663 namedhybrid BinomialOrgName , -- some hybrids have genus x species name 2664 partial PartialOrgName } OPTIONAL , -- when genus not known 2665 attrib VisibleString OPTIONAL , -- attribution of name 2666 mod SEQUENCE OF OrgMod OPTIONAL , 2667 lineage VisibleString OPTIONAL , -- lineage with semicolon separators 2668 gcode INTEGER OPTIONAL , -- genetic code (see CdRegion) 2669 mgcode INTEGER OPTIONAL , -- mitochondrial genetic code 2670 div VisibleString OPTIONAL , -- GenBank division code 2671 pgcode INTEGER OPTIONAL } -- plastid genetic code 2672 2673 2674OrgMod ::= SEQUENCE { 2675 subtype INTEGER { 2676 strain (2) , 2677 substrain (3) , 2678 type (4) , 2679 subtype (5) , 2680 variety (6) , 2681 serotype (7) , 2682 serogroup (8) , 2683 serovar (9) , 2684 cultivar (10) , 2685 pathovar (11) , 2686 chemovar (12) , 2687 biovar (13) , 2688 biotype (14) , 2689 group (15) , 2690 subgroup (16) , 2691 isolate (17) , 2692 common (18) , 2693 acronym (19) , 2694 dosage (20) , -- chromosome dosage of hybrid 2695 nat-host (21) , -- natural host of this specimen 2696 sub-species (22) , 2697 specimen-voucher (23) , 2698 authority (24) , 2699 forma (25) , 2700 forma-specialis (26) , 2701 ecotype (27) , 2702 synonym (28) , 2703 anamorph (29) , 2704 teleomorph (30) , 2705 breed (31) , 2706 gb-acronym (32) , -- used by taxonomy database 2707 gb-anamorph (33) , -- used by taxonomy database 2708 gb-synonym (34) , -- used by taxonomy database 2709 culture-collection (35) , 2710 bio-material (36) , 2711 metagenome-source (37) , 2712 type-material (38) , 2713 old-lineage (253) , 2714 old-name (254) , 2715 other (255) } , -- ASN5: old-name (254) will be added to next spec 2716 subname VisibleString , 2717 attrib VisibleString OPTIONAL } -- attribution/source of name 2718 2719BinomialOrgName ::= SEQUENCE { 2720 genus VisibleString , -- required 2721 species VisibleString OPTIONAL , -- species required if subspecies used 2722 subspecies VisibleString OPTIONAL } 2723 2724MultiOrgName ::= SEQUENCE OF OrgName -- the first will be used to assign division 2725 2726PartialOrgName ::= SEQUENCE OF TaxElement -- when we don't know the genus 2727 2728TaxElement ::= SEQUENCE { 2729 fixed-level INTEGER { 2730 other (0) , -- level must be set in string 2731 family (1) , 2732 order (2) , 2733 class (3) } , 2734 level VisibleString OPTIONAL , 2735 name VisibleString } 2736 2737END 2738 2739 2740--********************************************************************** 2741-- 2742-- NCBI BioSource 2743-- by James Ostell, 1994 2744-- version 3.0 2745-- 2746--********************************************************************** 2747 2748NCBI-BioSource DEFINITIONS ::= 2749BEGIN 2750 2751EXPORTS BioSource, SubSource; 2752 2753IMPORTS Org-ref FROM NCBI-Organism; 2754 2755--******************************************************************** 2756-- 2757-- BioSource gives the source of the biological material 2758-- for sequences 2759-- 2760--******************************************************************** 2761 2762BioSource ::= SEQUENCE { 2763 genome INTEGER { -- biological context 2764 unknown (0) , 2765 genomic (1) , 2766 chloroplast (2) , 2767 chromoplast (3) , 2768 kinetoplast (4) , 2769 mitochondrion (5) , 2770 plastid (6) , 2771 macronuclear (7) , 2772 extrachrom (8) , 2773 plasmid (9) , 2774 transposon (10) , 2775 insertion-seq (11) , 2776 cyanelle (12) , 2777 proviral (13) , 2778 virion (14) , 2779 nucleomorph (15) , 2780 apicoplast (16) , 2781 leucoplast (17) , 2782 proplastid (18) , 2783 endogenous-virus (19) , 2784 hydrogenosome (20) , 2785 chromosome (21) , 2786 chromatophore (22) , 2787 plasmid-in-mitochondrion (23) , 2788 plasmid-in-plastid (24) 2789 } DEFAULT unknown , 2790 origin INTEGER { 2791 unknown (0) , 2792 natural (1) , -- normal biological entity 2793 natmut (2) , -- naturally occurring mutant 2794 mut (3) , -- artificially mutagenized 2795 artificial (4) , -- artificially engineered 2796 synthetic (5) , -- purely synthetic 2797 other (255) 2798 } DEFAULT unknown , 2799 org Org-ref , 2800 subtype SEQUENCE OF SubSource OPTIONAL , 2801 is-focus NULL OPTIONAL , -- to distinguish biological focus 2802 pcr-primers PCRReactionSet OPTIONAL } 2803 2804PCRReactionSet ::= SET OF PCRReaction 2805 2806PCRReaction ::= SEQUENCE { 2807 forward PCRPrimerSet OPTIONAL , 2808 reverse PCRPrimerSet OPTIONAL } 2809 2810PCRPrimerSet ::= SET OF PCRPrimer 2811 2812PCRPrimer ::= SEQUENCE { 2813 seq PCRPrimerSeq OPTIONAL , 2814 name PCRPrimerName OPTIONAL } 2815 2816PCRPrimerSeq ::= VisibleString 2817 2818PCRPrimerName ::= VisibleString 2819 2820SubSource ::= SEQUENCE { 2821 subtype INTEGER { 2822 chromosome (1) , 2823 map (2) , 2824 clone (3) , 2825 subclone (4) , 2826 haplotype (5) , 2827 genotype (6) , 2828 sex (7) , 2829 cell-line (8) , 2830 cell-type (9) , 2831 tissue-type (10) , 2832 clone-lib (11) , 2833 dev-stage (12) , 2834 frequency (13) , 2835 germline (14) , 2836 rearranged (15) , 2837 lab-host (16) , 2838 pop-variant (17) , 2839 tissue-lib (18) , 2840 plasmid-name (19) , 2841 transposon-name (20) , 2842 insertion-seq-name (21) , 2843 plastid-name (22) , 2844 country (23) , 2845 segment (24) , 2846 endogenous-virus-name (25) , 2847 transgenic (26) , 2848 environmental-sample (27) , 2849 isolation-source (28) , 2850 lat-lon (29) , -- +/- decimal degrees 2851 collection-date (30) , -- DD-MMM-YYYY format 2852 collected-by (31) , -- name of person who collected the sample 2853 identified-by (32) , -- name of person who identified the sample 2854 fwd-primer-seq (33) , -- sequence (possibly more than one; semicolon-separated) 2855 rev-primer-seq (34) , -- sequence (possibly more than one; semicolon-separated) 2856 fwd-primer-name (35) , 2857 rev-primer-name (36) , 2858 metagenomic (37) , 2859 mating-type (38) , 2860 linkage-group (39) , 2861 haplogroup (40) , 2862 whole-replicon (41) , 2863 phenotype (42) , 2864 altitude (43) , 2865 other (255) } , 2866 name VisibleString , 2867 attrib VisibleString OPTIONAL } -- attribution/source of this name 2868 2869END 2870 2871--********************************************************************** 2872-- 2873-- NCBI Protein 2874-- by James Ostell, 1990 2875-- version 0.8 2876-- 2877--********************************************************************** 2878 2879NCBI-Protein DEFINITIONS ::= 2880BEGIN 2881 2882EXPORTS Prot-ref; 2883 2884IMPORTS Dbtag FROM NCBI-General; 2885 2886--*** Prot-ref *********************************************** 2887--* 2888--* Reference to a protein name 2889--* 2890 2891Prot-ref ::= SEQUENCE { 2892 name SET OF VisibleString OPTIONAL , -- protein name 2893 desc VisibleString OPTIONAL , -- description (instead of name) 2894 ec SET OF VisibleString OPTIONAL , -- E.C. number(s) 2895 activity SET OF VisibleString OPTIONAL , -- activities 2896 db SET OF Dbtag OPTIONAL , -- ids in other dbases 2897 processed ENUMERATED { -- processing status 2898 not-set (0) , 2899 preprotein (1) , 2900 mature (2) , 2901 signal-peptide (3) , 2902 transit-peptide (4) , 2903 propeptide (5) } DEFAULT not-set } 2904 2905END 2906--******************************************************************** 2907-- 2908-- Transcription Initiation Site Feature Data Block 2909-- James Ostell, 1991 2910-- Philip Bucher, David Ghosh 2911-- version 1.1 2912-- 2913-- 2914-- 2915--******************************************************************** 2916 2917NCBI-TxInit DEFINITIONS ::= 2918BEGIN 2919 2920EXPORTS Txinit; 2921 2922IMPORTS Gene-ref FROM NCBI-Gene 2923 Prot-ref FROM NCBI-Protein 2924 Org-ref FROM NCBI-Organism; 2925 2926Txinit ::= SEQUENCE { 2927 name VisibleString , -- descriptive name of initiation site 2928 syn SEQUENCE OF VisibleString OPTIONAL , -- synonyms 2929 gene SEQUENCE OF Gene-ref OPTIONAL , -- gene(s) transcribed 2930 protein SEQUENCE OF Prot-ref OPTIONAL , -- protein(s) produced 2931 rna SEQUENCE OF VisibleString OPTIONAL , -- rna(s) produced 2932 expression VisibleString OPTIONAL , -- tissue/time of expression 2933 txsystem ENUMERATED { -- transcription apparatus used at this site 2934 unknown (0) , 2935 pol1 (1) , -- eukaryotic Pol I 2936 pol2 (2) , -- eukaryotic Pol II 2937 pol3 (3) , -- eukaryotic Pol III 2938 bacterial (4) , 2939 viral (5) , 2940 rna (6) , -- RNA replicase 2941 organelle (7) , 2942 other (255) } , 2943 txdescr VisibleString OPTIONAL , -- modifiers on txsystem 2944 txorg Org-ref OPTIONAL , -- organism supplying transcription apparatus 2945 mapping-precise BOOLEAN DEFAULT FALSE , -- mapping precise or approx 2946 location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping 2947 inittype ENUMERATED { 2948 unknown (0) , 2949 single (1) , 2950 multiple (2) , 2951 region (3) } OPTIONAL , 2952 evidence SET OF Tx-evidence OPTIONAL } 2953 2954Tx-evidence ::= SEQUENCE { 2955 exp-code ENUMERATED { 2956 unknown (0) , 2957 rna-seq (1) , -- direct RNA sequencing 2958 rna-size (2) , -- RNA length measurement 2959 np-map (3) , -- nuclease protection mapping with homologous sequence ladder 2960 np-size (4) , -- nuclease protected fragment length measurement 2961 pe-seq (5) , -- dideoxy RNA sequencing 2962 cDNA-seq (6) , -- full-length cDNA sequencing 2963 pe-map (7) , -- primer extension mapping with homologous sequence ladder 2964 pe-size (8) , -- primer extension product length measurement 2965 pseudo-seq (9) , -- full-length processed pseudogene sequencing 2966 rev-pe-map (10) , -- see NOTE (1) below 2967 other (255) } , 2968 expression-system ENUMERATED { 2969 unknown (0) , 2970 physiological (1) , 2971 in-vitro (2) , 2972 oocyte (3) , 2973 transfection (4) , 2974 transgenic (5) , 2975 other (255) } DEFAULT physiological , 2976 low-prec-data BOOLEAN DEFAULT FALSE , 2977 from-homolog BOOLEAN DEFAULT FALSE } -- experiment actually done on 2978 -- close homolog 2979 2980 -- NOTE (1) length measurement of a reverse direction primer-extension 2981 -- product (blocked by RNA 5'end) by comparison with 2982 -- homologous sequence ladder (J. Mol. Biol. 199, 587) 2983 2984END 2985 2986--$Revision: 1.10 $ 2987-- ---------------------------------------------------------------------------- 2988-- 2989-- PUBLIC DOMAIN NOTICE 2990-- National Center for Biotechnology Information 2991-- 2992-- This software/database is a "United States Government Work" under the terms 2993-- of the United States Copyright Act. It was written as part of the author's 2994-- official duties as a United States Government employee and thus cannot be 2995-- copyrighted. This software/database is freely available to the public for 2996-- use. The National Library of Medicine and the U.S. Government have not 2997-- placed any restriction on its use or reproduction. 2998-- 2999-- Although all reasonable efforts have been taken to ensure the accuracy and 3000-- reliability of the software and data, the NLM and the U.S. Government do not 3001-- and cannot warrant the performance or results that may be obtained by using 3002-- this software or data. The NLM and the U.S. Government disclaim all 3003-- warranties, express or implied, including warranties of performance, 3004-- merchantability or fitness for any particular purpose. 3005-- 3006-- Please cite the authors in any work or product based on this material. 3007-- 3008-- ---------------------------------------------------------------------------- 3009-- 3010-- Authors: Mike DiCuccio, Eugene Vasilchenko 3011-- 3012-- ASN.1 interface to table readers 3013-- 3014-- ---------------------------------------------------------------------------- 3015 3016NCBI-SeqTable DEFINITIONS ::= 3017 3018BEGIN 3019 3020EXPORTS 3021 SeqTable-column-info, SeqTable-column, Seq-table; 3022 3023IMPORTS 3024 Seq-id, Seq-loc, Seq-interval FROM NCBI-Seqloc; 3025 3026 3027SeqTable-column-info ::= SEQUENCE { 3028 -- user friendly column name, can be skipped 3029 title VisibleString OPTIONAL, 3030 3031 -- identification of the column data in the objects described by the table 3032 field-id INTEGER { -- known column data types 3033 -- position types 3034 location (0), -- location as Seq-loc 3035 location-id (1), -- location Seq-id 3036 location-gi (2), -- gi 3037 location-from (3), -- interval from 3038 location-to (4), -- interval to 3039 location-strand (5), -- location strand 3040 location-fuzz-from-lim (6), 3041 location-fuzz-to-lim (7), 3042 3043 product (10), -- product as Seq-loc 3044 product-id (11), -- product Seq-id 3045 product-gi (12), -- product gi 3046 product-from (13), -- product interval from 3047 product-to (14), -- product interval to 3048 product-strand (15), -- product strand 3049 product-fuzz-from-lim (16), 3050 product-fuzz-to-lim (17), 3051 3052 -- main feature fields 3053 id-local (20), -- id.local.id 3054 xref-id-local (21), -- xref.id.local.id 3055 partial (22), 3056 comment (23), 3057 title (24), 3058 ext (25), -- field-name must be "E.xxx", see below 3059 qual (26), -- field-name must be "Q.xxx", see below 3060 dbxref (27), -- field-name must be "D.xxx", see below 3061 3062 -- various data fields 3063 data-imp-key (30), 3064 data-region (31), 3065 data-cdregion-frame (32), 3066 3067 -- extra fields, see also special values for str below 3068 ext-type (40), 3069 qual-qual (41), 3070 qual-val (42), 3071 dbxref-db (43), 3072 dbxref-tag (44) 3073 } OPTIONAL, 3074 3075 -- any column can be identified by ASN.1 text locator string 3076 -- with omitted object type. 3077 -- examples: 3078 -- "data.gene.locus" for Seq-feat.data.gene.locus 3079 -- "data.imp.key" for Seq-feat.data.imp.key 3080 -- "qual.qual" 3081 -- - Seq-feat.qual is SEQUENCE so several columns are allowed 3082 -- see also "Q.xxx" special value for shorter qual representation 3083 -- "ext.type.str" 3084 -- "ext.data.label.str" 3085 -- "ext.data.data.int" 3086 -- see also "E.xxx" special value for shorter ext representation 3087 -- special values start with capital letter: 3088 -- "E.xxx" - ext.data.label.str = xxx, ext.data.data = data 3089 -- - Seq-feat.ext.data is SEQUENCE so several columns are allowed 3090 -- "Q.xxx" - qual.qual = xxx, qual.val = data 3091 -- - Seq-feat.qual is SEQUENCE so several columns are allowed 3092 -- "D.xxx" - dbxref.id = xxx, dbxref.tag = data 3093 -- - Seq-feat.dbxref is SET so several columns are allowed 3094 field-name VisibleString OPTIONAL 3095} 3096 3097 3098CommonString-table ::= SEQUENCE { 3099 -- set of possible values 3100 strings SEQUENCE OF UTF8String, 3101 3102 -- indexes of values in array 'strings' for each data row 3103 indexes SEQUENCE OF INTEGER 3104} 3105 3106 3107CommonBytes-table ::= SEQUENCE { 3108 -- set of possible values 3109 bytes SEQUENCE OF OCTET STRING, 3110 3111 -- indexes of values in array 'bytes' for each data row 3112 indexes SEQUENCE OF INTEGER 3113} 3114 3115 3116Scaled-int-multi-data ::= SEQUENCE { 3117 -- output data[i] = data[i]*mul+add 3118 mul INTEGER, 3119 add INTEGER, 3120 data SeqTable-multi-data, 3121 3122 -- min/max scaled value 3123 -- should be set if scaled values may not fit in 32-bit signed integer 3124 min INTEGER OPTIONAL, 3125 max INTEGER OPTIONAL 3126} 3127 3128 3129Scaled-real-multi-data ::= SEQUENCE { 3130 -- output data[i] = data[i]*mul+add 3131 mul REAL, 3132 add REAL, 3133 data SeqTable-multi-data 3134} 3135 3136 3137-- Class for serializing bm::bvector<> 3138-- see include/util/bitset/bm.h 3139-- Since bvector<> serialization doesn't keep size we have to add it explicitly 3140BVector-data ::= SEQUENCE { 3141 size INTEGER, 3142 data OCTET STRING 3143} 3144 3145 3146SeqTable-multi-data ::= CHOICE { 3147 -- a set of 4-byte integers, one per row 3148 int SEQUENCE OF INTEGER, 3149 3150 -- a set of reals, one per row 3151 real SEQUENCE OF REAL, 3152 3153 -- a set of strings, one per row 3154 string SEQUENCE OF UTF8String, 3155 3156 -- a set of byte arrays, one per row 3157 bytes SEQUENCE OF OCTET STRING, 3158 3159 -- a set of string with small set of possible values 3160 common-string CommonString-table, 3161 3162 -- a set of byte arrays with small set of possible values 3163 common-bytes CommonBytes-table, 3164 3165 -- a set of bits, one per row 3166 -- Most-significant bit in each octet comes first. 3167 bit OCTET STRING, 3168 3169 -- a set of locations, one per row 3170 loc SEQUENCE OF Seq-loc, 3171 id SEQUENCE OF Seq-id, 3172 interval SEQUENCE OF Seq-interval, 3173 3174 -- delta-encoded data (int/bit -> int) 3175 int-delta SeqTable-multi-data, 3176 3177 -- scaled data (int/bit -> int) 3178 int-scaled Scaled-int-multi-data, 3179 3180 -- scaled data (int/bit -> real) 3181 real-scaled Scaled-real-multi-data, 3182 3183 -- a set of bit, represented as serialized bvector, 3184 -- see include/util/bitset/bm.h 3185 bit-bvector BVector-data, 3186 3187 -- a set of signed 1-byte integers encoded as sequential octets 3188 int1 OCTET STRING, 3189 3190 -- a set of signed 2-byte integers 3191 int2 SEQUENCE OF INTEGER, 3192 3193 -- a set of signed 8-byte integers 3194 int8 SEQUENCE OF INTEGER 3195} 3196 3197 3198SeqTable-single-data ::= CHOICE { 3199 -- integer 3200 int INTEGER, 3201 3202 -- real 3203 real REAL, 3204 3205 -- string 3206 string UTF8String, 3207 3208 -- byte array 3209 bytes OCTET STRING, 3210 3211 -- bit 3212 bit BOOLEAN, 3213 3214 -- location 3215 loc Seq-loc, 3216 id Seq-id, 3217 interval Seq-interval, 3218 3219 int8 INTEGER 3220} 3221 3222 3223SeqTable-sparse-index ::= CHOICE { 3224 -- Indexes of rows with values 3225 indexes SEQUENCE OF INTEGER, 3226 3227 -- Bitset of rows with values, set bit means the row has value. 3228 -- Most-significant bit in each octet comes first. 3229 bit-set OCTET STRING, 3230 3231 -- Indexes of rows with values, delta-encoded 3232 indexes-delta SEQUENCE OF INTEGER, 3233 3234 -- Bitset of rows with values, as serialized bvector<>, 3235 -- see include/util/bitset/bm.h 3236 bit-set-bvector BVector-data 3237} 3238 3239 3240SeqTable-column ::= SEQUENCE { 3241 -- column description or reference to previously defined info 3242 header SeqTable-column-info, -- information about data 3243 3244 -- row data 3245 data SeqTable-multi-data OPTIONAL, 3246 3247 -- in case not all rows contain data this field will contain sparse info 3248 sparse SeqTable-sparse-index OPTIONAL, 3249 3250 -- default value for sparse table, or if row data is too short 3251 default SeqTable-single-data OPTIONAL, 3252 3253 -- single value for indexes not listed in sparse table 3254 sparse-other SeqTable-single-data OPTIONAL 3255} 3256 3257 3258Seq-table ::= SEQUENCE { 3259 -- type of features in this table, equal to Seq-feat.data variant index 3260 feat-type INTEGER, 3261 3262 -- subtype of features in this table, defined in header SeqFeatData.hpp 3263 feat-subtype INTEGER OPTIONAL, 3264 3265 -- number of rows 3266 num-rows INTEGER, 3267 3268 -- data in columns 3269 columns SEQUENCE OF SeqTable-column 3270} 3271 3272 3273END 3274--$Revision: 6.4 $ 3275--********************************************************************** 3276-- 3277-- NCBI Sequence Alignment elements 3278-- by James Ostell, 1990 3279-- 3280--********************************************************************** 3281 3282NCBI-Seqalign DEFINITIONS ::= 3283BEGIN 3284 3285EXPORTS Seq-align, Score, Score-set, Seq-align-set; 3286 3287IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc 3288 User-object, Object-id FROM NCBI-General; 3289 3290--*** Sequence Alignment ******************************** 3291--* 3292 3293Seq-align-set ::= SET OF Seq-align 3294 3295Seq-align ::= SEQUENCE { 3296 type ENUMERATED { 3297 not-set (0) , 3298 global (1) , 3299 diags (2) , -- unbroken, but not ordered, diagonals 3300 partial (3) , -- mapping pieces together 3301 disc (4) , -- discontinuous alignment 3302 other (255) } , 3303 dim INTEGER OPTIONAL , -- dimensionality 3304 score SET OF Score OPTIONAL , -- for whole alignment 3305 segs CHOICE { -- alignment data 3306 dendiag SEQUENCE OF Dense-diag , 3307 denseg Dense-seg , 3308 std SEQUENCE OF Std-seg , 3309 packed Packed-seg , 3310 disc Seq-align-set, 3311 spliced Spliced-seg, 3312 sparse Sparse-seg 3313 } , 3314 3315 -- regions of sequence over which align 3316 -- was computed 3317 bounds SET OF Seq-loc OPTIONAL, 3318 3319 -- alignment id 3320 id SEQUENCE OF Object-id OPTIONAL, 3321 3322 --extra info 3323 ext SEQUENCE OF User-object OPTIONAL 3324} 3325 3326Dense-diag ::= SEQUENCE { -- for (multiway) diagonals 3327 dim INTEGER DEFAULT 2 , -- dimensionality 3328 ids SEQUENCE OF Seq-id , -- sequences in order 3329 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order 3330 len INTEGER , -- len of aligned segments 3331 strands SEQUENCE OF Na-strand OPTIONAL , 3332 scores SET OF Score OPTIONAL } 3333 3334 -- Dense-seg: the densist packing for sequence alignments only. 3335 -- a start of -1 indicates a gap for that sequence of 3336 -- length lens. 3337 -- 3338 -- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA 3339 -- id=200 AAGGCCTTTTAG.......GATGATGATGA 3340 -- id=300 ....CCTTTTAGAGATGATGAT....ATGA 3341 -- 3342 -- dim = 3, numseg = 6, ids = { 100, 200, 300 } 3343 -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 } 3344 -- lens = { 4, 8, 7, 3, 4, 4 } 3345 -- 3346 3347Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments 3348 dim INTEGER DEFAULT 2 , -- dimensionality 3349 numseg INTEGER , -- number of segments here 3350 ids SEQUENCE OF Seq-id , -- sequences in order 3351 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs 3352 lens SEQUENCE OF INTEGER , -- lengths in ids order within segs 3353 strands SEQUENCE OF Na-strand OPTIONAL , 3354 scores SEQUENCE OF Score OPTIONAL } -- score for each seg 3355 3356Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments 3357 dim INTEGER DEFAULT 2 , -- dimensionality 3358 numseg INTEGER , -- number of segments here 3359 ids SEQUENCE OF Seq-id , -- sequences in order 3360 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment 3361 present OCTET STRING , -- Boolean if each sequence present or absent in 3362 -- each segment 3363 lens SEQUENCE OF INTEGER , -- length of each segment 3364 strands SEQUENCE OF Na-strand OPTIONAL , 3365 scores SEQUENCE OF Score OPTIONAL } -- score for each segment 3366 3367Std-seg ::= SEQUENCE { 3368 dim INTEGER DEFAULT 2 , -- dimensionality 3369 ids SEQUENCE OF Seq-id OPTIONAL , 3370 loc SEQUENCE OF Seq-loc , 3371 scores SET OF Score OPTIONAL } 3372 3373 3374Spliced-seg ::= SEQUENCE { 3375 -- product is either protein or transcript (cDNA) 3376 product-id Seq-id OPTIONAL, 3377 genomic-id Seq-id OPTIONAL, 3378 3379 -- should be 'plus' or 'minus' 3380 product-strand Na-strand OPTIONAL , 3381 genomic-strand Na-strand OPTIONAL , 3382 3383 product-type ENUMERATED { 3384 transcript(0), 3385 protein(1) 3386 }, 3387 3388 -- set of segments involved 3389 -- each segment corresponds to one exon 3390 -- exons are always in biological order 3391 exons SEQUENCE OF Spliced-exon , 3392 3393 -- optional poly(A) tail 3394 poly-a INTEGER OPTIONAL, 3395 3396 -- length of the product, in bases/residues 3397 -- from this, a 3' unaligned length can be extracted; this also captures 3398 -- the case in which a protein aligns leaving a partial codon alignment 3399 -- at the 3' end 3400 product-length INTEGER OPTIONAL, 3401 3402 -- alignment descriptors / modifiers 3403 -- this provides us a set for extension 3404 modifiers SET OF Spliced-seg-modifier OPTIONAL 3405} 3406 3407Spliced-seg-modifier ::= CHOICE { 3408 -- protein aligns from the start and the first codon 3409 -- on both product and genomic is start codon 3410 start-codon-found BOOLEAN, 3411 3412 -- protein aligns to it's end and there is stop codon 3413 -- on the genomic right after the alignment 3414 stop-codon-found BOOLEAN 3415} 3416 3417 3418-- complete or partial exon 3419-- two consecutive Spliced-exons may belong to one exon 3420Spliced-exon ::= SEQUENCE { 3421 -- product-end >= product-start 3422 product-start Product-pos , 3423 product-end Product-pos , 3424 3425 -- genomic-end >= genomic-start 3426 genomic-start INTEGER , 3427 genomic-end INTEGER , 3428 3429 -- product is either protein or transcript (cDNA) 3430 product-id Seq-id OPTIONAL , 3431 genomic-id Seq-id OPTIONAL , 3432 3433 -- should be 'plus' or 'minus' 3434 product-strand Na-strand OPTIONAL , 3435 3436 -- genomic-strand represents the strand of translation 3437 genomic-strand Na-strand OPTIONAL , 3438 3439 -- basic seqments always are in biologic order 3440 parts SEQUENCE OF Spliced-exon-chunk OPTIONAL , 3441 3442 -- scores for this exon 3443 scores Score-set OPTIONAL , 3444 3445 -- splice sites 3446 acceptor-before-exon Splice-site OPTIONAL, 3447 donor-after-exon Splice-site OPTIONAL, 3448 3449 -- flag: is this exon complete or partial? 3450 partial BOOLEAN OPTIONAL, 3451 3452 --extra info 3453 ext SEQUENCE OF User-object OPTIONAL 3454} 3455 3456 3457Product-pos ::= CHOICE { 3458 nucpos INTEGER, 3459 protpos Prot-pos 3460} 3461 3462 3463-- codon based position on protein (1/3 of aminoacid) 3464Prot-pos ::= SEQUENCE { 3465 -- standard protein position 3466 amin INTEGER , 3467 3468 -- 0, 1, 2, or 3 as for Cdregion 3469 -- 0 = not set 3470 -- 1, 2, 3 = actual frame 3471 frame INTEGER DEFAULT 0 3472} 3473 3474 3475-- Spliced-exon-chunk: piece of an exon 3476-- lengths are given in nucleotide bases (1/3 of aminoacid when product is a 3477-- protein) 3478Spliced-exon-chunk ::= CHOICE { 3479 -- both sequences represented, product and genomic sequences match 3480 match INTEGER , 3481 3482 -- both sequences represented, product and genomic sequences do not match 3483 mismatch INTEGER , 3484 3485 -- both sequences are represented, there is sufficient similarity 3486 -- between product and genomic sequences. Can be used to replace stretches 3487 -- of matches and mismatches, mostly for protein to genomic where 3488 -- definition of match or mismatch depends on translation table 3489 diag INTEGER , 3490 3491 -- insertion in product sequence (i.e. gap in the genomic sequence) 3492 product-ins INTEGER , 3493 3494 -- insertion in genomic sequence (i.e. gap in the product sequence) 3495 genomic-ins INTEGER 3496} 3497 3498 3499-- site involved in splice 3500Splice-site ::= SEQUENCE { 3501 -- typically two bases in the intronic region, always 3502 -- in IUPAC format 3503 bases VisibleString 3504} 3505 3506 3507-- ========================================================================== 3508-- 3509-- Sparse-seg follows the semantics of dense-seg and is more optimal for 3510-- representing sparse multiple alignments 3511-- 3512-- ========================================================================== 3513 3514 3515Sparse-seg ::= SEQUENCE { 3516 master-id Seq-id OPTIONAL, 3517 3518 -- pairwise alignments constituting this multiple alignment 3519 rows SET OF Sparse-align, 3520 3521 -- per-row scores 3522 row-scores SET OF Score OPTIONAL, 3523 3524 -- index of extra items 3525 ext SET OF Sparse-seg-ext OPTIONAL 3526} 3527 3528Sparse-align ::= SEQUENCE { 3529 first-id Seq-id, 3530 second-id Seq-id, 3531 3532 numseg INTEGER, --number of segments 3533 first-starts SEQUENCE OF INTEGER , --starts on the first sequence [numseg] 3534 second-starts SEQUENCE OF INTEGER , --starts on the second sequence [numseg] 3535 lens SEQUENCE OF INTEGER , --lengths of segments [numseg] 3536 second-strands SEQUENCE OF Na-strand OPTIONAL , 3537 3538 -- per-segment scores 3539 seg-scores SET OF Score OPTIONAL 3540} 3541 3542Sparse-seg-ext ::= SEQUENCE { 3543 --seg-ext SET OF { 3544 -- index INTEGER, 3545 -- data User-field 3546 -- } 3547 index INTEGER 3548} 3549 3550 3551 3552-- use of Score is discouraged for external ASN.1 specifications 3553Score ::= SEQUENCE { 3554 id Object-id OPTIONAL , 3555 value CHOICE { 3556 real REAL , 3557 int INTEGER 3558 } 3559} 3560 3561-- use of Score-set is encouraged for external ASN.1 specifications 3562Score-set ::= SET OF Score 3563 3564END 3565 3566--$Revision: 6.0 $ 3567--********************************************************************** 3568-- 3569-- NCBI Sequence Analysis Results (other than alignments) 3570-- by James Ostell, 1990 3571-- 3572--********************************************************************** 3573 3574NCBI-Seqres DEFINITIONS ::= 3575BEGIN 3576 3577EXPORTS Seq-graph; 3578 3579IMPORTS Seq-loc FROM NCBI-Seqloc; 3580 3581--*** Sequence Graph ******************************** 3582--* 3583--* for values mapped by residue or range to sequence 3584--* 3585 3586Seq-graph ::= SEQUENCE { 3587 title VisibleString OPTIONAL , 3588 comment VisibleString OPTIONAL , 3589 loc Seq-loc , -- region this applies to 3590 title-x VisibleString OPTIONAL , -- title for x-axis 3591 title-y VisibleString OPTIONAL , 3592 comp INTEGER OPTIONAL , -- compression (residues/value) 3593 a REAL OPTIONAL , -- for scaling values 3594 b REAL OPTIONAL , -- display = (a x value) + b 3595 numval INTEGER , -- number of values in graph 3596 graph CHOICE { 3597 real Real-graph , 3598 int Int-graph , 3599 byte Byte-graph } } 3600 3601Real-graph ::= SEQUENCE { 3602 max REAL , -- top of graph 3603 min REAL , -- bottom of graph 3604 axis REAL , -- value to draw axis on 3605 values SEQUENCE OF REAL } 3606 3607Int-graph ::= SEQUENCE { 3608 max INTEGER , 3609 min INTEGER , 3610 axis INTEGER , 3611 values SEQUENCE OF INTEGER } 3612 3613Byte-graph ::= SEQUENCE { -- integer from 0-255 3614 max INTEGER , 3615 min INTEGER , 3616 axis INTEGER , 3617 values OCTET STRING } 3618 3619END 3620 3621--$Revision: 6.1 $ 3622--******************************************************************** 3623-- 3624-- Direct Submission of Sequence Data 3625-- James Ostell, 1991 3626-- 3627-- This is a trial specification for direct submission of sequence 3628-- data worked out between NCBI and EMBL 3629-- Later revised to reflect work with GenBank and Integrated database 3630-- 3631-- Version 3.0, 1994 3632-- This is the official NCBI sequence submission format now. 3633-- 3634--******************************************************************** 3635 3636NCBI-Submit DEFINITIONS ::= 3637BEGIN 3638 3639EXPORTS Seq-submit, Contact-info; 3640 3641IMPORTS Cit-sub, Author FROM NCBI-Biblio 3642 Date, Object-id FROM NCBI-General 3643 Seq-annot FROM NCBI-Sequence 3644 Seq-id FROM NCBI-Seqloc 3645 Seq-entry FROM NCBI-Seqset; 3646 3647Seq-submit ::= SEQUENCE { 3648 sub Submit-block , 3649 data CHOICE { 3650 entrys SET OF Seq-entry , -- sequence(s) 3651 annots SET OF Seq-annot , -- annotation(s) 3652 delete SET OF Seq-id } } -- deletions of entries 3653 3654Submit-block ::= SEQUENCE { 3655 contact Contact-info , -- who to contact 3656 cit Cit-sub , -- citation for this submission 3657 hup BOOLEAN DEFAULT FALSE , -- hold until publish 3658 reldate Date OPTIONAL , -- release by date 3659 subtype INTEGER { -- type of submission 3660 new (1) , -- new data 3661 update (2) , -- update by author 3662 revision (3) , -- 3rd party (non-author) update 3663 other (255) } OPTIONAL , 3664 tool VisibleString OPTIONAL, -- tool used to make submission 3665 user-tag VisibleString OPTIONAL, -- user supplied id for this submission 3666 comment VisibleString OPTIONAL } -- user comments/advice to database 3667 3668Contact-info ::= SEQUENCE { -- who to contact to discuss the submission 3669 name VisibleString OPTIONAL , -- OBSOLETE: will be removed 3670 address SEQUENCE OF VisibleString OPTIONAL , 3671 phone VisibleString OPTIONAL , 3672 fax VisibleString OPTIONAL , 3673 email VisibleString OPTIONAL , 3674 telex VisibleString OPTIONAL , 3675 owner-id Object-id OPTIONAL , -- for owner accounts 3676 password OCTET STRING OPTIONAL , 3677 last-name VisibleString OPTIONAL , -- structured to replace name above 3678 first-name VisibleString OPTIONAL , 3679 middle-initial VisibleString OPTIONAL , 3680 contact Author OPTIONAL } -- WARNING: this will replace the above 3681 3682END 3683 3684--$Revision: 1.15 $ 3685--********************************************************************** 3686-- 3687-- Definitions for Cn3D-specific data (rendering settings, 3688-- user annotations, etc.) 3689-- 3690-- by Paul Thiessen 3691-- 3692-- National Center for Biotechnology Information 3693-- National Institutes of Health 3694-- Bethesda, MD 20894 USA 3695-- 3696-- asntool -m cn3d.asn -w 100 -o cn3d.h 3697-- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \ 3698-- -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn 3699--********************************************************************** 3700 3701NCBI-Cn3d DEFINITIONS ::= 3702-- Cn3D-specific information 3703 3704BEGIN 3705 3706EXPORTS Cn3d-style-dictionary, Cn3d-user-annotations; 3707 3708IMPORTS Biostruc-id FROM MMDB 3709 Molecule-id, Residue-id FROM MMDB-Chemical-graph; 3710 3711 3712-- values of enumerations must match those in cn3d/style_manager.hpp! 3713 3714Cn3d-backbone-type ::= ENUMERATED { -- for different types of backbones 3715 off (1), 3716 trace (2), 3717 partial (3), 3718 complete (4) 3719} 3720 3721Cn3d-drawing-style ::= ENUMERATED { -- atom/bond/object rendering styles 3722 -- for atoms and bonds 3723 wire (1), 3724 tubes (2), 3725 ball-and-stick (3), 3726 space-fill (4), 3727 wire-worm (5), 3728 tube-worm (6), 3729 -- for 3d-objects 3730 with-arrows (7), 3731 without-arrows (8) 3732} 3733 3734Cn3d-color-scheme ::= ENUMERATED { -- available color schemes (not all 3735 -- necessarily applicable to all objects) 3736 element (1), 3737 object (2), 3738 molecule (3), 3739 domain (4), 3740 residue (20), 3741 secondary-structure (5), 3742 user-select (6), 3743 -- different alignment conservation coloring (currently only for proteins) 3744 aligned (7), 3745 identity (8), 3746 variety (9), 3747 weighted-variety (10), 3748 information-content (11), 3749 fit (12), 3750 block-fit (17), 3751 block-z-fit (18), 3752 block-row-fit (19), 3753 -- other schemes 3754 temperature (13), 3755 hydrophobicity (14), 3756 charge (15), 3757 rainbow (16) 3758} 3759 3760-- RGB triplet, interpreted (after division by the scale-factor) as floating 3761-- point values which should range from [0..1]. The default scale-factor is 3762-- 255, so that one can conveniently set integer byte values [0..255] for 3763-- colors with the scale-factor already set appropriately to map to [0..1]. 3764-- An alpha value is allowed, but is currently ignored by Cn3D. 3765Cn3d-color ::= SEQUENCE { 3766 scale-factor INTEGER DEFAULT 255, 3767 red INTEGER, 3768 green INTEGER, 3769 blue INTEGER, 3770 alpha INTEGER DEFAULT 255 3771} 3772 3773Cn3d-backbone-style ::= SEQUENCE { -- style blob for backbones only 3774 type Cn3d-backbone-type, 3775 style Cn3d-drawing-style, 3776 color-scheme Cn3d-color-scheme, 3777 user-color Cn3d-color 3778} 3779 3780Cn3d-general-style ::= SEQUENCE { -- style blob for other objects 3781 is-on BOOLEAN, 3782 style Cn3d-drawing-style, 3783 color-scheme Cn3d-color-scheme, 3784 user-color Cn3d-color 3785} 3786 3787Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels 3788 spacing INTEGER, -- zero means none 3789 type ENUMERATED { 3790 one-letter (1), 3791 three-letter (2) 3792 }, 3793 number ENUMERATED { 3794 none (0), 3795 sequential (1), -- from 1, by residues present, to match sequence 3796 pdb (2) -- use number assigned by PDB 3797 }, 3798 termini BOOLEAN, 3799 white BOOLEAN -- all white, or (if false) color of alpha carbon 3800} 3801 3802-- rendering settings for Cn3D (mirrors StyleSettings class) 3803Cn3d-style-settings ::= SEQUENCE { 3804 name VisibleString OPTIONAL, -- a name (for favorites) 3805 protein-backbone Cn3d-backbone-style, -- backbone styles 3806 nucleotide-backbone Cn3d-backbone-style, 3807 protein-sidechains Cn3d-general-style, -- styles for other stuff 3808 nucleotide-sidechains Cn3d-general-style, 3809 heterogens Cn3d-general-style, 3810 solvents Cn3d-general-style, 3811 connections Cn3d-general-style, 3812 helix-objects Cn3d-general-style, 3813 strand-objects Cn3d-general-style, 3814 virtual-disulfides-on BOOLEAN, -- virtual disulfides 3815 virtual-disulfide-color Cn3d-color, 3816 hydrogens-on BOOLEAN, -- hydrogens 3817 background-color Cn3d-color, -- background 3818 -- floating point parameters - scale-factor applies to all the following: 3819 scale-factor INTEGER, 3820 space-fill-proportion INTEGER, 3821 ball-radius INTEGER, 3822 stick-radius INTEGER, 3823 tube-radius INTEGER, 3824 tube-worm-radius INTEGER, 3825 helix-radius INTEGER, 3826 strand-width INTEGER, 3827 strand-thickness INTEGER, 3828 -- backbone labels (no labels if not present) 3829 protein-labels Cn3d-backbone-label-style OPTIONAL, 3830 nucleotide-labels Cn3d-backbone-label-style OPTIONAL, 3831 -- ion labels 3832 ion-labels BOOLEAN OPTIONAL 3833} 3834 3835Cn3d-style-settings-set ::= SET OF Cn3d-style-settings 3836 3837Cn3d-style-table-id ::= INTEGER 3838 3839Cn3d-style-table-item ::= SEQUENCE { 3840 id Cn3d-style-table-id, 3841 style Cn3d-style-settings 3842} 3843 3844-- the global settings, and a lookup table of styles for user annotations. 3845Cn3d-style-dictionary ::= SEQUENCE { 3846 global-style Cn3d-style-settings, 3847 style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL 3848} 3849 3850-- a range of residues in a chain, identified by MMDB residue-id 3851-- (e.g., numbered from 1) 3852Cn3d-residue-range ::= SEQUENCE { 3853 from Residue-id, 3854 to Residue-id 3855} 3856 3857-- set of locations on a particular chain 3858Cn3d-molecule-location ::= SEQUENCE { 3859 molecule-id Molecule-id, -- MMDB molecule id 3860 -- which residues; whole molecule implied if absent 3861 residues SEQUENCE OF Cn3d-residue-range OPTIONAL 3862} 3863 3864-- set of locations on a particular structure object (e.g., a PDB/MMDB 3865-- structure), which may include multiple ranges of residues each on 3866-- multiple chains. 3867Cn3d-object-location ::= SEQUENCE { 3868 structure-id Biostruc-id, 3869 residues SEQUENCE OF Cn3d-molecule-location 3870} 3871 3872-- information for an individual user annotation 3873Cn3d-user-annotation ::= SEQUENCE { 3874 name VisibleString, -- a (short) name for this annotation 3875 description VisibleString OPTIONAL, -- an optional longer description 3876 style-id Cn3d-style-table-id, -- how to draw this annotation 3877 residues SEQUENCE OF Cn3d-object-location, -- which residues to cover 3878 is-on BOOLEAN -- whether this annotation is to be turned on in Cn3D 3879} 3880 3881-- a GL-ordered transformation matrix 3882Cn3d-GL-matrix ::= SEQUENCE { 3883 m0 REAL, m1 REAL, m2 REAL, m3 REAL, 3884 m4 REAL, m5 REAL, m6 REAL, m7 REAL, 3885 m8 REAL, m9 REAL, m10 REAL, m11 REAL, 3886 m12 REAL, m13 REAL, m14 REAL, m15 REAL 3887} 3888 3889-- a floating point 3d vector 3890Cn3d-vector ::= SEQUENCE { 3891 x REAL, 3892 y REAL, 3893 z REAL 3894} 3895 3896-- parameters used to set up the camera in Cn3D 3897Cn3d-view-settings ::= SEQUENCE { 3898 camera-distance REAL, -- camera on +Z axis this distance from origin 3899 camera-angle-rad REAL, -- camera angle 3900 camera-look-at-X REAL, -- X,Y of point in Z=0 plane camera points at 3901 camera-look-at-Y REAL, 3902 camera-clip-near REAL, -- distance of clipping planes from camera 3903 camera-clip-far REAL, 3904 matrix Cn3d-GL-matrix, -- transformation of objects in the scene 3905 rotation-center Cn3d-vector -- center of rotation of whole scene 3906} 3907 3908-- The list of annotations for a given CDD/mime. If residue regions overlap 3909-- between annotations that are turned on, the last annotation in this list 3910-- that contains these residues will be used as the display style for these 3911-- residues. 3912-- Also contains the current viewpoint, so that user's camera angle 3913-- can be stored and reproduced, for illustrations, on-line figures, etc. 3914Cn3d-user-annotations ::= SEQUENCE { 3915 annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL, 3916 view Cn3d-view-settings OPTIONAL 3917} 3918 3919END 3920 3921--$Revision: 6.3 $ 3922--**************************************************************** 3923-- 3924-- NCBI Project Definition Module 3925-- by Jim Ostell and Jonathan Kans, 1998 3926-- 3927--**************************************************************** 3928 3929NCBI-Project DEFINITIONS ::= 3930BEGIN 3931 3932EXPORTS Project, Project-item; 3933 3934IMPORTS Date FROM NCBI-General 3935 PubMedId FROM NCBI-Biblio 3936 Seq-id, Seq-loc FROM NCBI-Seqloc 3937 Seq-annot, Pubdesc FROM NCBI-Sequence 3938 Seq-entry FROM NCBI-Seqset 3939 Pubmed-entry FROM NCBI-PubMed; 3940 3941Project ::= SEQUENCE { 3942 descr Project-descr OPTIONAL , 3943 data Project-item } 3944 3945Project-item ::= CHOICE { 3946 pmuid SET OF INTEGER , 3947 protuid SET OF INTEGER , 3948 nucuid SET OF INTEGER , 3949 sequid SET OF INTEGER , 3950 genomeuid SET OF INTEGER , 3951 structuid SET OF INTEGER , 3952 pmid SET OF PubMedId , 3953 protid SET OF Seq-id , 3954 nucid SET OF Seq-id , 3955 seqid SET OF Seq-id , 3956 genomeid SET OF Seq-id , 3957 structid NULL , 3958 pment SET OF Pubmed-entry , 3959 protent SET OF Seq-entry , 3960 nucent SET OF Seq-entry , 3961 seqent SET OF Seq-entry , 3962 genomeent SET OF Seq-entry , 3963 structent NULL , 3964 seqannot SET OF Seq-annot , 3965 loc SET OF Seq-loc , 3966 proj SET OF Project 3967} 3968 3969Project-descr ::= SEQUENCE { 3970 id SET OF Project-id , 3971 name VisibleString OPTIONAL , 3972 descr SET OF Projdesc OPTIONAL } 3973 3974Projdesc ::= CHOICE { 3975 pub Pubdesc , 3976 date Date , 3977 comment VisibleString , 3978 title VisibleString 3979} 3980 3981Project-id ::= VisibleString 3982 3983END 3984 3985 3986--$Revision: 6.0 $ 3987--********************************************************************* 3988-- 3989-- access.asn 3990-- 3991-- messages for data access 3992-- 3993--********************************************************************* 3994 3995NCBI-Access DEFINITIONS ::= 3996BEGIN 3997 3998EXPORTS Link-set; 3999 4000 -- links between same class = neighbors 4001 -- links between other classes = links 4002 4003Link-set ::= SEQUENCE { 4004 num INTEGER , -- number of links to this doc type 4005 uids SEQUENCE OF INTEGER OPTIONAL , -- the links 4006 weights SEQUENCE OF INTEGER OPTIONAL } -- the weights 4007 4008 4009END 4010--$Revision: 6.0 $ 4011--********************************************************************** 4012-- 4013-- NCBI Sequence Feature Definition Module 4014-- by James Ostell, 1994 4015-- 4016--********************************************************************** 4017 4018NCBI-FeatDef DEFINITIONS ::= 4019BEGIN 4020 4021EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet; 4022 4023 4024FeatDef ::= SEQUENCE { 4025 typelabel VisibleString , -- short label for type eg "CDS" 4026 menulabel VisibleString , -- label for a menu eg "Coding Region" 4027 featdef-key INTEGER , -- unique for this feature definition 4028 seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h 4029 entrygroup INTEGER , -- Group for data entry 4030 displaygroup INTEGER , -- Group for data display 4031 molgroup FeatMolType -- Type of Molecule used for 4032} 4033 4034FeatMolType ::= ENUMERATED { 4035 aa (1), -- proteins 4036 na (2), -- nucleic acids 4037 both (3) } -- both 4038 4039FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions 4040 4041FeatDispGroup ::= SEQUENCE { 4042 groupkey INTEGER , 4043 groupname VisibleString } 4044 4045FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup 4046 4047FeatDefGroupSet ::= SEQUENCE { 4048 groups FeatDispGroupSet , 4049 defs FeatDefSet } 4050 4051END 4052 4053 4054--$Revision: 6.12 $ 4055--**************************************************************** 4056-- 4057-- NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary) 4058-- by Jonathan Epstein, February 1996 4059-- 4060--**************************************************************** 4061 4062NCBI-Mime DEFINITIONS ::= 4063BEGIN 4064 4065EXPORTS Ncbi-mime-asn1; 4066IMPORTS Biostruc, Biostruc-annot-set FROM MMDB 4067 Cdd FROM NCBI-Cdd 4068 Seq-entry FROM NCBI-Seqset 4069 Seq-annot FROM NCBI-Sequence 4070 Medline-entry FROM NCBI-Medline 4071 Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d; 4072 4073Ncbi-mime-asn1 ::= CHOICE { 4074 entrez Entrez-general, -- just a structure 4075 alignstruc Biostruc-align, -- structures & sequences & alignments 4076 alignseq Biostruc-align-seq, -- sequence alignment 4077 strucseq Biostruc-seq, -- structure & sequences 4078 strucseqs Biostruc-seqs, -- structure & sequences & alignments 4079 general Biostruc-seqs-aligns-cdd -- all-purpose "grab bag" 4080 -- others may be added here in the future 4081} 4082 4083-- generic bundle of sequence and alignment info 4084Bundle-seqs-aligns ::= SEQUENCE { 4085 sequences SET OF Seq-entry OPTIONAL, -- sequences 4086 seqaligns SET OF Seq-annot OPTIONAL, -- sequence alignments 4087 strucaligns Biostruc-annot-set OPTIONAL, -- structure alignments 4088 imports SET OF Seq-annot OPTIONAL, -- imports (updates in Cn3D) 4089 style-dictionary Cn3d-style-dictionary OPTIONAL, -- Cn3D stuff 4090 user-annotations Cn3d-user-annotations OPTIONAL 4091} 4092 4093Biostruc-seqs-aligns-cdd ::= SEQUENCE { 4094 seq-align-data CHOICE { 4095 bundle Bundle-seqs-aligns, -- either seqs + alignments 4096 cdd Cdd -- or CDD (which contains these) 4097 }, 4098 structures SET OF Biostruc OPTIONAL, -- structures 4099 structure-type ENUMERATED { -- type of structures to load if 4100 ncbi-backbone(2), -- not present; meanings and 4101 ncbi-all-atom(3), -- values are same as MMDB's 4102 pdb-model(4) -- Model-type 4103 } OPTIONAL 4104} 4105 4106Biostruc-align ::= SEQUENCE { 4107 master Biostruc, 4108 slaves SET OF Biostruc, 4109 alignments Biostruc-annot-set, -- structure alignments 4110 sequences SET OF Seq-entry, -- sequences 4111 seqalign SET OF Seq-annot, 4112 style-dictionary Cn3d-style-dictionary OPTIONAL, 4113 user-annotations Cn3d-user-annotations OPTIONAL 4114} 4115 4116Biostruc-align-seq ::= SEQUENCE { -- display seq structure align only 4117 sequences SET OF Seq-entry, -- sequences 4118 seqalign SET OF Seq-annot, 4119 style-dictionary Cn3d-style-dictionary OPTIONAL, 4120 user-annotations Cn3d-user-annotations OPTIONAL 4121} 4122 4123Biostruc-seq ::= SEQUENCE { -- display structure seq added by yanli 4124 structure Biostruc, 4125 sequences SET OF Seq-entry, 4126 style-dictionary Cn3d-style-dictionary OPTIONAL, 4127 user-annotations Cn3d-user-annotations OPTIONAL 4128} 4129 4130Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli 4131 structure Biostruc, 4132 sequences SET OF Seq-entry, -- sequences 4133 seqalign SET OF Seq-annot, 4134 style-dictionary Cn3d-style-dictionary OPTIONAL, 4135 user-annotations Cn3d-user-annotations OPTIONAL 4136} 4137 4138Entrez-style ::= ENUMERATED { 4139 docsum (1), 4140 genbank (2) , 4141 genpept (3) , 4142 fasta (4) , 4143 asn1 (5) , 4144 graphic (6) , 4145 alignment (7) , 4146 globalview (8) , 4147 report (9) , 4148 medlars (10) , 4149 embl (11) , 4150 pdb (12) , 4151 kinemage (13) } 4152 4153Entrez-general ::= SEQUENCE { 4154 title VisibleString OPTIONAL, 4155 data CHOICE { 4156 ml Medline-entry , 4157 prot Seq-entry , 4158 nuc Seq-entry , 4159 genome Seq-entry , 4160 structure Biostruc , 4161 strucAnnot Biostruc-annot-set } , 4162 style Entrez-style , 4163 location VisibleString OPTIONAL } 4164END 4165--$Revision: 6.0 $ 4166--******************************************************************** 4167-- 4168-- Print Templates 4169-- James Ostell, 1993 4170-- 4171-- 4172--******************************************************************** 4173 4174NCBI-ObjPrt DEFINITIONS ::= 4175BEGIN 4176 4177EXPORTS PrintTemplate, PrintTemplateSet; 4178 4179PrintTemplate ::= SEQUENCE { 4180 name TemplateName , -- name for this template 4181 labelfrom VisibleString OPTIONAL, -- ASN.1 path to get label from 4182 format PrintFormat } 4183 4184TemplateName ::= VisibleString 4185 4186PrintTemplateSet ::= SEQUENCE OF PrintTemplate 4187 4188PrintFormat ::= SEQUENCE { 4189 asn1 VisibleString , -- ASN.1 partial path for this 4190 label VisibleString OPTIONAL , -- printable label 4191 prefix VisibleString OPTIONAL, 4192 suffix VisibleString OPTIONAL, 4193 form PrintForm } 4194 4195PrintForm ::= CHOICE { -- Forms for various ASN.1 components 4196 block PrintFormBlock, 4197 boolean PrintFormBoolean, 4198 enum PrintFormEnum, 4199 text PrintFormText, 4200 use-template TemplateName, 4201 user UserFormat , 4202 null NULL } -- rarely used 4203 4204UserFormat ::= SEQUENCE { 4205 printfunc VisibleString , 4206 defaultfunc VisibleString OPTIONAL } 4207 4208PrintFormBlock ::= SEQUENCE { -- for SEQUENCE, SET 4209 separator VisibleString OPTIONAL , 4210 components SEQUENCE OF PrintFormat } 4211 4212PrintFormBoolean ::= SEQUENCE { 4213 true VisibleString OPTIONAL , 4214 false VisibleString OPTIONAL } 4215 4216PrintFormEnum ::= SEQUENCE { 4217 values SEQUENCE OF VisibleString OPTIONAL } 4218 4219PrintFormText ::= SEQUENCE { 4220 textfunc VisibleString OPTIONAL } 4221 4222END 4223 4224--$Revision: 6.11 $ 4225--********************************************************* 4226-- 4227-- ASN.1 and XML for the components of a GenBank format sequence 4228-- J.Ostell 2002 4229-- Updated 25 May 2010 4230-- 4231--********************************************************* 4232 4233NCBI-GBSeq DEFINITIONS ::= 4234BEGIN 4235 4236--******** 4237-- GBSeq represents the elements in a GenBank style report 4238-- of a sequence with some small additions to structure and support 4239-- for protein (GenPept) versions of GenBank format as seen in 4240-- Entrez. While this represents the simplification, reduction of 4241-- detail, and flattening to a single sequence perspective of GenBank 4242-- format (compared with the full ASN.1 or XML from which GenBank and 4243-- this format is derived at NCBI), it is presented in ASN.1 or XML for 4244-- automated parsing and processing. It is hoped that this compromise 4245-- will be useful for those bulk processing at the GenBank format level 4246-- of detail today. Since it is a compromise, a number of pragmatic 4247-- decisions have been made. 4248-- 4249-- In pursuit of simplicity and familiarity a number of 4250-- fields do not have full substructure defined here where there is 4251-- already a standard GenBank format string. For example: 4252-- 4253-- Date DD-Mon-YYYY 4254-- Authors LastName, Intials (with periods) 4255-- Journal JounalName Volume (issue), page-range (year) 4256-- FeatureLocations as per GenBank feature table, but FeatureIntervals 4257-- may also be provided as a convenience 4258-- FeatureQualifiers as per GenBank feature table 4259-- Primary has a string that represents a table to construct 4260-- a third party (TPA) sequence. 4261-- other-seqids can have strings with the "vertical bar format" sequence 4262-- identifiers used in BLAST for example, when they are non-genbank types. 4263-- Currently in GenBank format you only see GI, but there are others, like 4264-- patents, submitter clone names, etc which will appear here, as they 4265-- always have in the ASN.1 format, and full XML format. 4266-- source-db is a formatted text block for peptides in GenPept format that 4267-- carries information from the source protein database. 4268-- 4269-- There are also a number of elements that could have been 4270-- more exactly specified, but in the interest of simplicity 4271-- have been simply left as options. For example.. 4272-- 4273-- accession and accession.version will always appear in a GenBank record 4274-- they are optional because this format can also be used for non-GenBank 4275-- sequences, and in that case will have only "other-seqids". 4276-- 4277-- sequences will normally all have "sequence" filled in. But contig records 4278-- will have a "join" statement in the "contig" slot, and no "sequence". 4279-- We also may consider a retrieval option with no sequence of any kind 4280-- and no feature table to quickly check minimal values. 4281-- 4282-- a reference may have an author list, or be from a consortium, or both. 4283-- 4284-- some fields, such as taxonomy, do appear as separate elements in GenBank 4285-- format but without a specific linetype (in GenBank format this comes 4286-- under ORGANISM). Another example is the separation of primary accession 4287-- from the list of secondary accessions. In GenBank format primary 4288-- accession is just the first one on the list that includes all secondaries 4289-- after it. 4290-- 4291-- create-date deserves special comment. The date you see on the right hand 4292-- side of the LOCUS line in GenBank format is actually the last date the 4293-- the record was modified (or the update-date). The date the record was 4294-- first submitted to GenBank appears in the first submission citation in 4295-- the reference section. Internally in the databases and ASN.1 NCBI keeps 4296-- the first date the record was released into the sequence database at 4297-- NCBI as create-date. For records from EMBL, which supports create-date, 4298-- it is the date provided by EMBL. For DDBJ records, which do not supply 4299-- a create-date (same as GenBank format) the create-date is the first date 4300-- NCBI saw the record from DDBJ. For older GenBank records, before NCBI 4301-- took responsibility for GenBank, it is just the first date NCBI saw the 4302-- record. Create-date can be very useful, so we expose it here, but users 4303-- must understand it is only an approximation and comes from many sources, 4304-- and with many exceptions and caveats. It does NOT tell you the first 4305-- date the public might have seen this record and thus is NOT an accurate 4306-- measure for legal issues of precedence. 4307-- 4308--******** 4309 4310GBSet ::= SEQUENCE OF GBSeq 4311 4312GBSeq ::= SEQUENCE { 4313 locus VisibleString OPTIONAL , 4314 length INTEGER , 4315 strandedness VisibleString OPTIONAL , 4316 moltype VisibleString , 4317 topology VisibleString OPTIONAL , 4318 division VisibleString OPTIONAL , 4319 update-date VisibleString OPTIONAL , 4320 create-date VisibleString OPTIONAL , 4321 update-release VisibleString OPTIONAL , 4322 create-release VisibleString OPTIONAL , 4323 definition VisibleString OPTIONAL , 4324 primary-accession VisibleString OPTIONAL , 4325 entry-version VisibleString OPTIONAL , 4326 accession-version VisibleString OPTIONAL , 4327 other-seqids SEQUENCE OF GBSeqid OPTIONAL , 4328 secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL, 4329 project VisibleString OPTIONAL , 4330 keywords SEQUENCE OF GBKeyword OPTIONAL , 4331 segment VisibleString OPTIONAL , 4332 source VisibleString OPTIONAL , 4333 organism VisibleString OPTIONAL , 4334 taxonomy VisibleString OPTIONAL , 4335 references SEQUENCE OF GBReference OPTIONAL , 4336 comment VisibleString OPTIONAL , 4337 comment-set SEQUENCE OF GBComment OPTIONAL , 4338 struc-comments SEQUENCE OF GBStrucComment OPTIONAL , 4339 primary VisibleString OPTIONAL , 4340 source-db VisibleString OPTIONAL , 4341 database-reference VisibleString OPTIONAL , 4342 feature-table SEQUENCE OF GBFeature OPTIONAL , 4343 feature-set SEQUENCE OF GBFeatureSet OPTIONAL , 4344 sequence VisibleString OPTIONAL , -- Optional for contig, wgs, etc. 4345 contig VisibleString OPTIONAL , 4346 alt-seq SEQUENCE OF GBAltSeqData OPTIONAL , 4347 xrefs SEQUENCE OF GBXref OPTIONAL 4348} 4349 4350GBSeqid ::= VisibleString 4351 4352GBSecondary-accn ::= VisibleString 4353 4354GBKeyword ::= VisibleString 4355 4356GBReference ::= SEQUENCE { 4357 reference VisibleString , 4358 position VisibleString OPTIONAL , 4359 authors SEQUENCE OF GBAuthor OPTIONAL , 4360 consortium VisibleString OPTIONAL , 4361 title VisibleString OPTIONAL , 4362 journal VisibleString , 4363 xref SEQUENCE OF GBXref OPTIONAL , 4364 pubmed INTEGER OPTIONAL , 4365 remark VisibleString OPTIONAL 4366} 4367 4368GBAuthor ::= VisibleString 4369 4370GBXref ::= SEQUENCE { 4371 dbname VisibleString , 4372 id VisibleString 4373} 4374 4375GBComment ::= SEQUENCE { 4376 type VisibleString OPTIONAL , 4377 paragraphs SEQUENCE OF GBCommentParagraph 4378} 4379 4380GBCommentParagraph ::= VisibleString 4381 4382GBStrucComment ::= SEQUENCE { 4383 name VisibleString OPTIONAL , 4384 items SEQUENCE OF GBStrucCommentItem 4385} 4386 4387GBStrucCommentItem ::= SEQUENCE { 4388 tag VisibleString OPTIONAL , 4389 value VisibleString OPTIONAL , 4390 url VisibleString OPTIONAL 4391} 4392 4393GBFeatureSet ::= SEQUENCE { 4394 annot-source VisibleString OPTIONAL , 4395 features SEQUENCE OF GBFeature 4396} 4397 4398GBFeature ::= SEQUENCE { 4399 key VisibleString , 4400 location VisibleString , 4401 intervals SEQUENCE OF GBInterval OPTIONAL , 4402 operator VisibleString OPTIONAL , 4403 partial5 BOOLEAN OPTIONAL , 4404 partial3 BOOLEAN OPTIONAL , 4405 quals SEQUENCE OF GBQualifier OPTIONAL , 4406 xrefs SEQUENCE OF GBXref OPTIONAL 4407} 4408 4409GBInterval ::= SEQUENCE { 4410 from INTEGER OPTIONAL , 4411 to INTEGER OPTIONAL , 4412 point INTEGER OPTIONAL , 4413 iscomp BOOLEAN OPTIONAL , 4414 interbp BOOLEAN OPTIONAL , 4415 accession VisibleString 4416} 4417 4418GBQualifier ::= SEQUENCE { 4419 name VisibleString , 4420 value VisibleString OPTIONAL 4421} 4422 4423GBAltSeqData ::= SEQUENCE { 4424 name VisibleString , -- e.g., contig, wgs, scaffold, cage, genome 4425 items SEQUENCE OF GBAltSeqItem OPTIONAL 4426} 4427 4428GBAltSeqItem ::= SEQUENCE { 4429 interval GBInterval OPTIONAL , 4430 isgap BOOLEAN OPTIONAL , 4431 gap-length INTEGER OPTIONAL , 4432 gap-type VisibleString OPTIONAL , 4433 gap-linkage VisibleString OPTIONAL , 4434 gap-comment VisibleString OPTIONAL , 4435 first-accn VisibleString OPTIONAL , 4436 last-accn VisibleString OPTIONAL , 4437 value VisibleString OPTIONAL 4438} 4439 4440END 4441 4442--$Revision: 1.9 $ 4443--************************************************************************ 4444-- 4445-- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record 4446-- The International Nucleotide Sequence Database (INSD) collaboration 4447-- Version 1.6, 25 May 2010 4448-- 4449--************************************************************************ 4450 4451INSD-INSDSeq DEFINITIONS ::= 4452BEGIN 4453 4454-- INSDSeq provides the elements of a sequence as presented in the 4455-- GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of 4456-- additional structure. 4457-- Although this single perspective of the three flatfile formats 4458-- provides a useful simplification, it hides to some extent the 4459-- details of the actual data underlying those formats. Nevertheless, 4460-- the XML version of INSD-Seq is being provided with 4461-- the hopes that it will prove useful to those who bulk-process 4462-- sequence data at the flatfile-format level of detail. Further 4463-- documentation regarding the content and conventions of those formats 4464-- can be found at: 4465-- 4466-- URLs for the DDBJ, EMBL, and GenBank Feature Table Document: 4467-- http://www.ddbj.nig.ac.jp/FT/full_index.html 4468-- http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html 4469-- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html 4470-- 4471-- URLs for DDBJ, EMBL, and GenBank Release Notes : 4472-- ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt 4473-- http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html 4474-- ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt 4475-- 4476-- Because INSDSeq is a compromise, a number of pragmatic decisions have 4477-- been made: 4478-- 4479-- In pursuit of simplicity and familiarity a number of fields do not 4480-- have full substructure defined here where there is already a 4481-- standard flatfile format string. For example: 4482-- 4483-- Dates: DD-MON-YYYY (eg 10-JUN-2003) 4484-- 4485-- Author: LastName, Initials (eg Smith, J.N.) 4486-- or Lastname Initials (eg Smith J.N.) 4487-- 4488-- Journal: JournalName Volume (issue), page-range (year) 4489-- or JournalName Volume(issue):page-range(year) 4490-- eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995) 4491-- Appl. Environ. Microbiol. 61(4):1646-1648(1995). 4492-- 4493-- FeatureLocations are representated as in the flatfile feature table, 4494-- but FeatureIntervals may also be provided as a convenience 4495-- 4496-- FeatureQualifiers are represented as in the flatfile feature table. 4497-- 4498-- Primary has a string that represents a table to construct 4499-- a third party (TPA) sequence. 4500-- 4501-- other-seqids can have strings with the "vertical bar format" sequence 4502-- identifiers used in BLAST for example, when they are non-INSD types. 4503-- 4504-- Currently in flatfile format you only see Accession numbers, but there 4505-- are others, like patents, submitter clone names, etc which will 4506-- appear here 4507-- 4508-- There are also a number of elements that could have been more exactly 4509-- specified, but in the interest of simplicity have been simply left as 4510-- optional. For example: 4511-- 4512-- All publicly accessible sequence records in INSDSeq format will 4513-- include accession and accession.version. However, these elements are 4514-- optional in optional in INSDSeq so that this format can also be used 4515-- for non-public sequence data, prior to the assignment of accessions and 4516-- version numbers. In such cases, records will have only "other-seqids". 4517-- 4518-- sequences will normally all have "sequence" filled in. But contig records 4519-- will have a "join" statement in the "contig" slot, and no "sequence". 4520-- We also may consider a retrieval option with no sequence of any kind 4521-- and no feature table to quickly check minimal values. 4522-- 4523-- Four (optional) elements are specific to records represented via the EMBL 4524-- sequence database: INSDSeq_update-release, INSDSeq_create-release, 4525-- INSDSeq_entry-version, and INSDSeq_database-reference. 4526-- 4527-- One (optional) element is specific to records originating at the GenBank 4528-- and DDBJ sequence databases: INSDSeq_segment. 4529-- 4530--******** 4531 4532INSDSet ::= SEQUENCE OF INSDSeq 4533 4534INSDSeq ::= SEQUENCE { 4535 locus VisibleString OPTIONAL , 4536 length INTEGER , 4537 strandedness VisibleString OPTIONAL , 4538 moltype VisibleString , 4539 topology VisibleString OPTIONAL , 4540 division VisibleString OPTIONAL , 4541 update-date VisibleString OPTIONAL , 4542 create-date VisibleString OPTIONAL , 4543 update-release VisibleString OPTIONAL , 4544 create-release VisibleString OPTIONAL , 4545 definition VisibleString OPTIONAL , 4546 primary-accession VisibleString OPTIONAL , 4547 entry-version VisibleString OPTIONAL , 4548 accession-version VisibleString OPTIONAL , 4549 other-seqids SEQUENCE OF INSDSeqid OPTIONAL , 4550 secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL, 4551 4552-- INSDSeq_project has been deprecated in favor of INSDSeq_xrefs . 4553-- This element may be be removed from a future version of this DTD. 4554 4555 project VisibleString OPTIONAL , 4556 4557 keywords SEQUENCE OF INSDKeyword OPTIONAL , 4558 segment VisibleString OPTIONAL , 4559 source VisibleString OPTIONAL , 4560 organism VisibleString OPTIONAL , 4561 taxonomy VisibleString OPTIONAL , 4562 references SEQUENCE OF INSDReference OPTIONAL , 4563 comment VisibleString OPTIONAL , 4564 comment-set SEQUENCE OF INSDComment OPTIONAL , 4565 struc-comments SEQUENCE OF INSDStrucComment OPTIONAL , 4566 primary VisibleString OPTIONAL , 4567 source-db VisibleString OPTIONAL , 4568 database-reference VisibleString OPTIONAL , 4569 feature-table SEQUENCE OF INSDFeature OPTIONAL , 4570 feature-set SEQUENCE OF INSDFeatureSet OPTIONAL , 4571 sequence VisibleString OPTIONAL , -- Optional for contig, wgs, etc. 4572 contig VisibleString OPTIONAL , 4573 alt-seq SEQUENCE OF INSDAltSeqData OPTIONAL , 4574 4575-- INSDSeq_xrefs provides cross-references from a sequence record 4576-- to other database resources. These cross-references are at the 4577-- level of the entire record, rather than at the level of a specific 4578-- feature. These cross-references can include: BioProject, BioSample, 4579-- Sequence Read Archive, etc. 4580 4581 xrefs SEQUENCE OF INSDXref OPTIONAL 4582} 4583 4584INSDSeqid ::= VisibleString 4585 4586INSDSecondary-accn ::= VisibleString 4587 4588INSDKeyword ::= VisibleString 4589 4590-- INSDReference_position contains a string value indicating the 4591-- basepair span(s) to which a reference applies. The allowable 4592-- formats are: 4593-- 4594-- X..Y : Where X and Y are integers separated by two periods, 4595-- X >= 1 , Y <= sequence length, and X <= Y 4596-- 4597-- Multiple basepair spans can exist, separated by a 4598-- semi-colon and a space. For example : 10..20; 100..500 4599-- 4600-- sites : The string literal 'sites', indicating that a reference 4601-- provides sequence annotation information, but the specific 4602-- basepair spans are either not captured, or were too numerous 4603-- to record. 4604-- 4605-- The 'sites' literal string is singly occuring, and 4606-- cannot be used in conjunction with any X..Y basepair spans. 4607-- 4608-- 'sites' is a convention utilized by GenBank, and might 4609-- not be presented in XML provided by EMBL and DDBJ. 4610-- 4611-- References that lack an INSDReference_position element are not 4612-- attributed to any particular region of the sequence. 4613 4614INSDReference ::= SEQUENCE { 4615 reference VisibleString , 4616 position VisibleString OPTIONAL , 4617 authors SEQUENCE OF INSDAuthor OPTIONAL , 4618 consortium VisibleString OPTIONAL , 4619 title VisibleString OPTIONAL , 4620 journal VisibleString , 4621 xref SEQUENCE OF INSDXref OPTIONAL , 4622 pubmed INTEGER OPTIONAL , 4623 remark VisibleString OPTIONAL 4624} 4625 4626INSDAuthor ::= VisibleString 4627 4628-- INSDXref provides a method for referring to records in 4629-- other databases. INSDXref_dbname is a string value that 4630-- provides the name of the database, and INSDXref_dbname 4631-- is a string value that provides the record's identifier 4632-- in that database. 4633 4634INSDXref ::= SEQUENCE { 4635 dbname VisibleString , 4636 id VisibleString 4637} 4638 4639INSDComment ::= SEQUENCE { 4640 type VisibleString OPTIONAL , 4641 paragraphs SEQUENCE OF INSDCommentParagraph 4642} 4643 4644INSDCommentParagraph ::= VisibleString 4645 4646INSDStrucComment ::= SEQUENCE { 4647 name VisibleString OPTIONAL , 4648 items SEQUENCE OF INSDStrucCommentItem 4649} 4650 4651INSDStrucCommentItem ::= SEQUENCE { 4652 tag VisibleString OPTIONAL , 4653 value VisibleString OPTIONAL , 4654 url VisibleString OPTIONAL 4655} 4656 4657-- INSDFeature_operator contains a string value describing 4658-- the relationship among a set of INSDInterval within 4659-- INSDFeature_intervals. The allowable formats are: 4660-- 4661-- join : The string literal 'join' indicates that the 4662-- INSDInterval intervals are biologically joined 4663-- together into a contiguous molecule. 4664-- 4665-- order : The string literal 'order' indicates that the 4666-- INSDInterval intervals are in the presented 4667-- order, but they are not necessarily contiguous. 4668-- 4669-- Either 'join' or 'order' is required if INSDFeature_intervals 4670-- is comprised of more than one INSDInterval . 4671 4672INSDFeatureSet ::= SEQUENCE { 4673 annot-source VisibleString OPTIONAL , 4674 features SEQUENCE OF INSDFeature 4675} 4676 4677INSDFeature ::= SEQUENCE { 4678 key VisibleString , 4679 location VisibleString , 4680 intervals SEQUENCE OF INSDInterval OPTIONAL , 4681 operator VisibleString OPTIONAL , 4682 partial5 BOOLEAN OPTIONAL , 4683 partial3 BOOLEAN OPTIONAL , 4684 quals SEQUENCE OF INSDQualifier OPTIONAL , 4685 xrefs SEQUENCE OF INSDXref OPTIONAL 4686} 4687 4688-- INSDInterval_iscomp is a boolean indicating whether 4689-- an INSDInterval_from / INSDInterval_to location 4690-- represents a location on the complement strand. 4691-- When INSDInterval_iscomp is TRUE, it essentially 4692-- confirms that a 'from' value which is greater than 4693-- a 'to' value is intentional, because the location 4694-- is on the opposite strand of the presented sequence. 4695 4696-- INSDInterval_interbp is a boolean indicating whether 4697-- a feature (such as a restriction site) is located 4698-- between two adjacent basepairs. When INSDInterval_interbp 4699-- is TRUE, the 'from' and 'to' values will differ by 4700-- exactly one base for linear molecules. For circular 4701-- molecules, if the inter-basepair position falls between 4702-- the last and the first base, then 'from' will be the 4703-- final base (equal to the length of the sequence), and 4704-- 'to' will have a value of 1. 4705 4706INSDInterval ::= SEQUENCE { 4707 from INTEGER OPTIONAL , 4708 to INTEGER OPTIONAL , 4709 point INTEGER OPTIONAL , 4710 iscomp BOOLEAN OPTIONAL , 4711 interbp BOOLEAN OPTIONAL , 4712 accession VisibleString 4713} 4714 4715INSDQualifier ::= SEQUENCE { 4716 name VisibleString , 4717 value VisibleString OPTIONAL 4718} 4719 4720-- INSDAltSeqData provides for sequence representations other than 4721-- literal basepair abbreviations (INSDSeq_sequence), such as the 4722-- CONTIG/CO linetype of the GenBank and EMBL flatfile formats. 4723-- It also accomodates the specification of accession-number ranges, 4724-- which are presented on a WGS master record (for the contigs and 4725-- and scaffolds of a WGS project). 4726 4727INSDAltSeqData ::= SEQUENCE { 4728 name VisibleString , -- e.g., contig, wgs, scaffold, cage, genome 4729 items SEQUENCE OF INSDAltSeqItem OPTIONAL 4730} 4731 4732INSDAltSeqItem ::= SEQUENCE { 4733 interval INSDInterval OPTIONAL , 4734 isgap BOOLEAN OPTIONAL , 4735 gap-length INTEGER OPTIONAL , 4736 gap-type VisibleString OPTIONAL , 4737 gap-linkage VisibleString OPTIONAL , 4738 gap-comment VisibleString OPTIONAL , 4739 first-accn VisibleString OPTIONAL , 4740 last-accn VisibleString OPTIONAL , 4741 value VisibleString OPTIONAL 4742} 4743 4744END 4745 4746--$Revision: 6.1 $ 4747--********************************************************************** 4748-- 4749-- ASN.1 for a tiny Bioseq in XML 4750-- basically a structured FASTA file with a few extras 4751-- in this case we drop all modularity of components 4752-- All ids are Optional - simpler structure, less checking 4753-- Components of organism are hard coded - can't easily add or change 4754-- sequence is just string whether DNA or protein 4755-- by James Ostell, 2000 4756-- 4757--********************************************************************** 4758 4759NCBI-TSeq DEFINITIONS ::= 4760BEGIN 4761 4762TSeq ::= SEQUENCE { 4763 seqtype ENUMERATED { 4764 nucleotide (1), 4765 protein (2) }, 4766 gi INTEGER OPTIONAL, 4767 accver VisibleString OPTIONAL, 4768 sid VisibleString OPTIONAL, 4769 local VisibleString OPTIONAL, 4770 taxid INTEGER OPTIONAL, 4771 orgname VisibleString OPTIONAL, 4772 defline VisibleString, 4773 length INTEGER, 4774 sequence VisibleString } 4775 4776TSeqSet ::= SEQUENCE OF TSeq -- a bunch of them 4777 4778END 4779 4780--$Id: scoremat.asn,v 1.14 2011/12/21 15:29:33 kazimird Exp $ 4781-- =========================================================================== 4782-- 4783-- PUBLIC DOMAIN NOTICE 4784-- National Center for Biotechnology Information 4785-- 4786-- This software/database is a "United States Government Work" under the 4787-- terms of the United States Copyright Act. It was written as part of 4788-- the author's official duties as a United States Government employee and 4789-- thus cannot be copyrighted. This software/database is freely available 4790-- to the public for use. The National Library of Medicine and the U.S. 4791-- Government have not placed any restriction on its use or reproduction. 4792-- 4793-- Although all reasonable efforts have been taken to ensure the accuracy 4794-- and reliability of the software and data, the NLM and the U.S. 4795-- Government do not and cannot warrant the performance or results that 4796-- may be obtained by using this software or data. The NLM and the U.S. 4797-- Government disclaim all warranties, express or implied, including 4798-- warranties of performance, merchantability or fitness for any particular 4799-- purpose. 4800-- 4801-- Please cite the author in any work or product based on this material. 4802-- 4803-- =========================================================================== 4804-- 4805-- Author: Christiam Camacho 4806-- 4807-- File Description: 4808-- ASN.1 definitions for scoring matrix 4809-- 4810-- =========================================================================== 4811 4812NCBI-ScoreMat DEFINITIONS ::= BEGIN 4813 4814EXPORTS Pssm, PssmIntermediateData, PssmFinalData, 4815 PssmParameters, PssmWithParameters; 4816 4817IMPORTS Object-id FROM NCBI-General 4818 Seq-entry FROM NCBI-Seqset; 4819 4820-- a rudimentary block/core-model, to be used with block-based alignment 4821-- routines and threading 4822 4823BlockProperty ::= SEQUENCE { 4824 type INTEGER { unassigned (0), 4825 threshold (1), -- score threshold for heuristics 4826 minscore (2), -- observed minimum score in CD 4827 maxscore (3), -- observed maximum score in CD 4828 meanscore (4), -- observed mean score in CD 4829 variance (5), -- observed score variance 4830 name (10), -- just name the block 4831 is-optional(20), -- block may not have to be used 4832 other (255) }, 4833 intvalue INTEGER OPTIONAL, 4834 textvalue VisibleString OPTIONAL 4835} 4836 4837CoreBlock ::= SEQUENCE { 4838 start INTEGER, -- begin of block on query 4839 stop INTEGER, -- end of block on query 4840 minstart INTEGER OPTIONAL, -- optional N-terminal extension 4841 maxstop INTEGER OPTIONAL, -- optional C-terminal extension 4842 property SEQUENCE OF BlockProperty OPTIONAL 4843} 4844 4845LoopConstraint ::= SEQUENCE { 4846 minlength INTEGER DEFAULT 0, -- minimum length of unaligned region 4847 maxlength INTEGER DEFAULT 100000 -- maximum length of unaligned region 4848} 4849 4850CoreDef ::= SEQUENCE { 4851 nblocks INTEGER, -- number of core elements/blocks 4852 blocks SEQUENCE OF CoreBlock, -- nblocks locations 4853 loops SEQUENCE OF LoopConstraint, -- (nblocks+1) constraints 4854 4855 isDiscontinuous BOOLEAN OPTIONAL, -- is it a discontinuous domain 4856 4857 insertions SEQUENCE OF INTEGER OPTIONAL -- positions of long insertions 4858} 4859 4860Site-annot ::= SEQUENCE { 4861 startPosition INTEGER, -- location of the annotation, 4862 stopPosition INTEGER, -- start and stop position in the 4863 -- PSSM 4864 4865 description VisibleString OPTIONAL, -- holds description or names, that 4866 -- can be used for labels in 4867 -- visualization 4868 4869 type INTEGER OPTIONAL, -- type of the annotated feature, 4870 -- similarly to Align-annot in 4871 -- NCBI-Cdd 4872 4873 aliases SEQUENCE OF VisibleString OPTIONAL, -- additional names for 4874 -- the annotation 4875 4876 motif VisibleString OPTIONAL, -- motif to validate mapping of sites 4877 4878 motifuse INTEGER OPTIONAL -- 0 for validation 4879 -- 1 for motif in seqloc 4880 -- 2 for multiple motifs in seqloc 4881} 4882 4883Site-annot-set ::= SEQUENCE OF Site-annot 4884 4885-- =========================================================================== 4886-- PSI-BLAST, formatrpsdb, RPS-BLAST workflow: 4887-- =========================================== 4888-- 4889-- Two possible inputs to PSI-BLAST and formatrpsdb: 4890-- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix 4891-- of frequency ratios) 4892-- 2) PssmWithParams where pssm field contains final PSSM data (matrix of 4893-- scores and statistical parameters) - such as written by cddumper 4894-- 4895-- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform 4896-- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database. 4897-- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores 4898-- statistical parameters are used to perform the search in PSI-BLAST and the 4899-- same data and the data in PssmWithParams::params::rpsdbparams is used to 4900-- build the PSSM and ultimately the RPS-BLAST database 4901-- 4902-- 4903-- reads ++++++++++++++ writes 4904-- PssmWithParams ====> + PSI-BLAST + =====> PssmWithParams 4905-- ++++++++++++++ | ^ 4906-- ^ | | 4907-- | | | 4908-- +===========================================+ | 4909-- | | 4910-- +===========================================+ | 4911-- | | 4912-- reads | | 4913-- v | 4914-- +++++++++++++++ writes +++++++++++++++++++++++ | 4915-- | formatrpsdb | =====> | RPS-BLAST databases | | 4916-- +++++++++++++++ +++++++++++++++++++++++ | 4917-- ^ | 4918-- | | 4919-- | reads | 4920-- +++++++++++++ | 4921-- | RPS-BLAST | | 4922-- +++++++++++++ | 4923-- | 4924-- reads ++++++++++++ writes | 4925-- Cdd ======> | cddumper | =============================+ 4926-- ++++++++++++ 4927-- 4928-- =========================================================================== 4929 4930-- Contains the PSSM's scores and its associated statistical parameters. 4931-- Dimensions and order in which scores are stored must be the same as that 4932-- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow 4933PssmFinalData ::= SEQUENCE { 4934 4935 -- PSSM's scores 4936 scores SEQUENCE OF INTEGER, 4937 4938 -- Karlin & Altschul parameter produced during the PSSM's calculation 4939 lambda REAL, 4940 4941 -- Karlin & Altschul parameter produced during the PSSM's calculation 4942 kappa REAL, 4943 4944 -- Karlin & Altschul parameter produced during the PSSM's calculation 4945 h REAL, 4946 4947 -- scaling factor used to obtain more precision when building the PSSM. 4948 -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM 4949 -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is 4950 -- given a PSSM which contains a scaled-up PSSM (indicated by having a 4951 -- scalingFactor greater than 1), then it will scale down the PSSM to 4952 -- perform the initial stages of the search with it. 4953 -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided 4954 -- scaled-up PSSMs, it will ensure that all PSSMs used to build the 4955 -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST 4956 -- will silently produce incorrect results). 4957 scalingFactor INTEGER DEFAULT 1, 4958 4959 -- Karlin & Altschul parameter produced during the PSSM's calculation 4960 lambdaUngapped REAL OPTIONAL, 4961 4962 -- Karlin & Altschul parameter produced during the PSSM's calculation 4963 kappaUngapped REAL OPTIONAL, 4964 4965 -- Karlin & Altschul parameter produced during the PSSM's calculation 4966 hUngapped REAL OPTIONAL 4967} 4968 4969-- Contains the PSSM's intermediate data used to create the PSSM's scores 4970-- and statistical parameters. Dimensions and order in which scores are 4971-- stored must be the same as that specified in Pssm::numRows, 4972-- Pssm::numColumns, and Pssm::byrow 4973PssmIntermediateData ::= SEQUENCE { 4974 4975 -- observed residue frequencies (or counts) per position of the PSSM 4976 -- (prior to application of pseudocounts) 4977 resFreqsPerPos SEQUENCE OF INTEGER OPTIONAL, 4978 4979 -- Weighted observed residue frequencies per position of the PSSM. 4980 -- (N.B.: each position's weights should add up to 1.0). 4981 -- This field corresponds to f_i (f sub i) in equation 2 of 4982 -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005. 4983 -- NOTE: this is needed for diagnostics information only (i.e.: 4984 -- -out_ascii_pssm option in psiblast) 4985 weightedResFreqsPerPos SEQUENCE OF REAL OPTIONAL, 4986 4987 -- PSSM's frequency ratios 4988 freqRatios SEQUENCE OF REAL, 4989 4990 -- Information content per position of the PSSM 4991 -- NOTE: this is needed for diagnostics information only (i.e.: 4992 -- -out_ascii_pssm option in psiblast) 4993 informationContent SEQUENCE OF REAL OPTIONAL, 4994 4995 -- Relative weight for columns of the PSSM without gaps to pseudocounts 4996 -- NOTE: this is needed for diagnostics information only (i.e.: 4997 -- -out_ascii_pssm option in psiblast) 4998 gaplessColumnWeights SEQUENCE OF REAL OPTIONAL, 4999 5000 -- Used in sequence weights computation 5001 -- NOTE: this is needed for diagnostics information only (i.e.: 5002 -- -out_ascii_pssm option in psiblast) 5003 sigma SEQUENCE OF REAL OPTIONAL, 5004 5005 -- Length of the aligned regions per position of the query sequence 5006 -- NOTE: this is needed for diagnostics information only (i.e.: 5007 -- -out_ascii_pssm option in psiblast) 5008 intervalSizes SEQUENCE OF INTEGER OPTIONAL, 5009 5010 -- Number of matching sequences per position of the PSSM (including the 5011 -- query) 5012 -- NOTE: this is needed for diagnostics information only (i.e.: 5013 -- -out_ascii_pssm option in psiblast) 5014 numMatchingSeqs SEQUENCE OF INTEGER OPTIONAL, 5015 5016 -- Number of independent observations per position of the PSSM 5017 -- NOTE: this is needed for building CDD database for DELTA-BLAST 5018 numIndeptObsr SEQUENCE OF REAL OPTIONAL 5019} 5020 5021-- Position-specific scoring matrix 5022-- 5023-- Column indices on the PSSM refer to the positions corresponding to the 5024-- query/master sequence, i.e. the number of columns (N) is the same 5025-- as the length of the query/master sequence. 5026-- Row indices refer to individual amino acid types, i.e. the number of 5027-- rows (M) is the same as the number of different residues in the 5028-- alphabet we use. Consequently, row labels are amino acid identifiers. 5029-- 5030-- PSSMs are stored as linear arrays of integers. By default, we store 5031-- them column-by-column, M values for the first column followed by M 5032-- values for the second column, and so on. In order to provide 5033-- flexibility for external applications, the boolean field "byrow" is 5034-- provided to specify the storage order. 5035Pssm ::= SEQUENCE { 5036 5037 -- Is the this a protein or nucleotide scoring matrix? 5038 isProtein BOOLEAN DEFAULT TRUE, 5039 5040 -- PSSM identifier 5041 identifier Object-id OPTIONAL, 5042 5043 -- The dimensions of the matrix are returned so the client can 5044 -- verify that all data was received. 5045 5046 numRows INTEGER, -- number of rows 5047 numColumns INTEGER, -- number of columns 5048 5049 -- row-labels is given to note the order of residue types so that it can 5050 -- be cross-checked between applications. 5051 -- If this field is not given, the matrix values are presented in 5052 -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl. 5053 -- for proteins the values returned correspond to 5054 -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ... 5055 rowLabels SEQUENCE OF VisibleString OPTIONAL, 5056 5057 -- are matrices stored row by row? 5058 byRow BOOLEAN DEFAULT FALSE, 5059 5060 -- PSSM representative sequence (master) 5061 query Seq-entry OPTIONAL, 5062 5063 -- both intermediateData and finalData can be provided, but at least one of 5064 -- them must be provided. 5065 -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData 5066 -- representation. 5067 5068 -- Intermediate or final data for the PSSM 5069 intermediateData PssmIntermediateData OPTIONAL, 5070 5071 -- Final representation for the PSSM 5072 finalData PssmFinalData OPTIONAL 5073} 5074 5075-- This structure is used to create the RPS-BLAST database auxiliary file 5076-- (*.aux) and it contains parameters set at creation time of the PSSM. 5077-- Also, the matrixName field is used by formatrpsdb to build a PSSM from 5078-- a Pssm structure which only contains PssmIntermediateData. 5079FormatRpsDbParameters ::= SEQUENCE { 5080 5081 -- name of the underlying score matrix whose frequency ratios were 5082 -- used in PSSM construction (e.g.: BLOSUM62) 5083 matrixName VisibleString, 5084 5085 -- gap opening penalty corresponding to the matrix above 5086 gapOpen INTEGER OPTIONAL, 5087 5088 -- gap extension penalty corresponding to the matrix above 5089 gapExtend INTEGER OPTIONAL 5090 5091} 5092 5093-- Populated by PSSM engine of PSI-BLAST, original source for these values 5094-- are the PSI-BLAST options specified using the BLAST options API 5095PssmParameters ::= SEQUENCE { 5096 5097 -- pseudocount constant used for PSSM. This field corresponds to beta in 5098 -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005. 5099 pseudocount INTEGER OPTIONAL, 5100 5101 -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is 5102 -- populated by PSI-BLAST 5103 rpsdbparams FormatRpsDbParameters OPTIONAL, 5104 5105 -- alignment constraints needed by sequence-structure threader 5106 -- and other global or local block-alignment algorithms 5107 constraints CoreDef OPTIONAL, 5108 5109 -- bit score threshold for specific conserved domain hits 5110 bitScoreThresh REAL OPTIONAL, 5111 5112 -- conserved functional sites with annotations 5113 annotatedSites Site-annot-set OPTIONAL 5114} 5115 5116-- Envelope containing PSSM and the parameters used to create it. 5117-- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group. 5118PssmWithParameters ::= SEQUENCE { 5119 5120 -- This field is applicable to PSI-BLAST and formatrpsdb. 5121 -- When both the intermediate and final PSSM data are provided in this 5122 -- field, the final data (matrix of scores and associated statistical 5123 -- parameters) takes precedence and that data is used for further 5124 -- processing. The rationale for this is that the PSSM's scores and 5125 -- statistical parameters might have been calculated by other applications 5126 -- and it might not be possible to recreate it by using PSI-BLAST's PSSM 5127 -- engine. 5128 pssm Pssm, 5129 5130 -- This field's rpsdbparams is used to specify the values of options 5131 -- for processing by formatrpsdb. If these are not set, the command 5132 -- line defaults of formatrpsdb are applied. This field is used 5133 -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD 5134 -- the PSSM is the same as the one being specified through the BLAST 5135 -- Options API. If this field is omitted, no verification will be 5136 -- performed, so be careful to keep track of what matrix was used to build 5137 -- the PSSM or else the results produced by PSI-BLAST will be unreliable. 5138 params PssmParameters OPTIONAL 5139} 5140 5141END 5142--$Revision: 1.167 $ 5143--********************************************************************** 5144-- 5145-- NCBI ASN.1 macro editing language specifications 5146-- 5147-- by Colleen Bollin, 2007 5148-- 5149--********************************************************************** 5150 5151NCBI-Macro DEFINITIONS ::= 5152BEGIN 5153 5154EXPORTS AECR-action, Parse-action, Macro-action-list, Suspect-rule-set; 5155 5156-- simple constraints -- 5157 5158String-location ::= ENUMERATED { 5159 contains (1) , 5160 equals (2) , 5161 starts (3) , 5162 ends (4) , 5163 inlist (5) } 5164 5165Word-substitution ::= SEQUENCE { 5166 word VisibleString OPTIONAL , 5167 synonyms SET OF VisibleString OPTIONAL , 5168 case-sensitive BOOLEAN DEFAULT FALSE , 5169 whole-word BOOLEAN DEFAULT FALSE } 5170 5171Word-substitution-set ::= SET OF Word-substitution 5172 5173String-constraint ::= SEQUENCE { 5174 match-text VisibleString OPTIONAL , 5175 match-location String-location DEFAULT contains , 5176 case-sensitive BOOLEAN DEFAULT FALSE , 5177 ignore-space BOOLEAN DEFAULT FALSE , 5178 ignore-punct BOOLEAN DEFAULT FALSE , 5179 ignore-words Word-substitution-set OPTIONAL , 5180 whole-word BOOLEAN DEFAULT FALSE , 5181 not-present BOOLEAN DEFAULT FALSE , 5182 is-all-caps BOOLEAN DEFAULT FALSE , 5183 is-all-lower BOOLEAN DEFAULT FALSE , 5184 is-all-punct BOOLEAN DEFAULT FALSE , 5185 ignore-weasel BOOLEAN DEFAULT FALSE , 5186 is-first-cap BOOLEAN DEFAULT FALSE , 5187 is-first-each-cap BOOLEAN DEFAULT FALSE } 5188 5189String-constraint-set ::= SET OF String-constraint 5190 5191Strand-constraint ::= ENUMERATED { 5192 any (0) , 5193 plus (1) , 5194 minus (2) } 5195 5196Seqtype-constraint ::= ENUMERATED { 5197 any (0) , 5198 nuc (1) , 5199 prot (2) } 5200 5201Partial-constraint ::= ENUMERATED { 5202 either (0) , 5203 partial (1) , 5204 complete (2) } 5205 5206Location-type-constraint ::= ENUMERATED { 5207 any (0) , 5208 single-interval (1) , 5209 joined (2) , 5210 ordered (3) } 5211 5212Location-pos-constraint ::= CHOICE { 5213 dist-from-end INTEGER , 5214 max-dist-from-end INTEGER , 5215 min-dist-from-end INTEGER } 5216 5217Location-constraint ::= SEQUENCE { 5218 strand Strand-constraint DEFAULT any , 5219 seq-type Seqtype-constraint DEFAULT any , 5220 partial5 Partial-constraint DEFAULT either , 5221 partial3 Partial-constraint DEFAULT either , 5222 location-type Location-type-constraint DEFAULT any , 5223 end5 Location-pos-constraint OPTIONAL , 5224 end3 Location-pos-constraint OPTIONAL } 5225 5226Object-type-constraint ::= ENUMERATED { 5227 any (0) , 5228 feature (1) , 5229 descriptor (2) } 5230 5231-- feature values -- 5232 5233Macro-feature-type ::= ENUMERATED { 5234 any (0) , 5235 gene (1) , 5236 org (2) , 5237 cds (3) , 5238 prot (4) , 5239 preRNA (5) , 5240 mRNA (6) , 5241 tRNA (7) , 5242 rRNA (8) , 5243 snRNA (9) , 5244 scRNA (10) , 5245 otherRNA (11) , 5246 pub (12) , 5247 seq (13) , 5248 imp (14) , 5249 allele (15) , 5250 attenuator (16) , 5251 c-region (17) , 5252 caat-signal (18) , 5253 imp-CDS (19) , 5254 conflict (20) , 5255 d-loop (21) , 5256 d-segment (22) , 5257 enhancer (23) , 5258 exon (24) , 5259 gC-signal (25) , 5260 iDNA (26) , 5261 intron (27) , 5262 j-segment (28) , 5263 ltr (29) , 5264 mat-peptide (30) , 5265 misc-binding (31) , 5266 misc-difference (32) , 5267 misc-feature (33) , 5268 misc-recomb (34) , 5269 misc-RNA (35) , 5270 misc-signal (36) , 5271 misc-structure (37) , 5272 modified-base (38) , 5273 mutation (39) , 5274 n-region (40) , 5275 old-sequence (41) , 5276 polyA-signal (42) , 5277 polyA-site (43) , 5278 precursor-RNA (44) , 5279 prim-transcript (45) , 5280 primer-bind (46) , 5281 promoter (47) , 5282 protein-bind (48) , 5283 rbs (49) , 5284 repeat-region (50) , 5285 rep-origin (51) , 5286 s-region (52) , 5287 sig-peptide (53) , 5288 source (54) , 5289 stem-loop (55) , 5290 sts (56) , 5291 tata-signal (57) , 5292 terminator (58) , 5293 transit-peptide (59) , 5294 unsure (60) , 5295 v-region (61) , 5296 v-segment (62) , 5297 variation (63) , 5298 virion (64) , 5299 n3clip (65) , 5300 n3UTR (66) , 5301 n5clip (67) , 5302 n5UTR (68) , 5303 n10-signal (69) , 5304 n35-signal (70) , 5305 site-ref (71) , 5306 region (72) , 5307 comment (73) , 5308 bond (74) , 5309 site (75) , 5310 rsite (76) , 5311 user (77) , 5312 txinit (78) , 5313 num (79) , 5314 psec-str (80) , 5315 non-std-residue (81) , 5316 het (82) , 5317 biosrc (83) , 5318 preprotein (84) , 5319 mat-peptide-aa (85) , 5320 sig-peptide-aa (86) , 5321 transit-peptide-aa (87) , 5322 snoRNA (88) , 5323 gap (89) , 5324 operon (90) , 5325 oriT (91) , 5326 ncRNA (92) , 5327 tmRNA (93) , 5328 mobile-element (94) , 5329 regulatory (95) } 5330 5331Feat-qual-legal ::= ENUMERATED { 5332 allele (1) , 5333 activity (2) , 5334 anticodon (3) , 5335 bound-moiety (4) , 5336 chromosome (5), 5337 citation (6), 5338 codon (7) , 5339 codon-start (8) , 5340 codons-recognized (9) , 5341 compare (10) , 5342 cons-splice (11) , 5343 db-xref (12) , 5344 description (13) , 5345 direction (14) , 5346 ec-number (15) , 5347 environmental-sample (16) , 5348 evidence (17) , 5349 exception (18) , 5350 experiment (19) , 5351 focus (20) , 5352 frequency (21) , 5353 function (22) , 5354 gene (23) , 5355 gene-description (24) , 5356 inference (25) , 5357 label (26) , 5358 locus-tag (27) , 5359 map (28) , 5360 mobile-element (29) , 5361 mod-base (30) , 5362 mol-type (31) , 5363 ncRNA-class (32) , 5364 note (33) , 5365 number (34) , 5366 old-locus-tag (35) , 5367 operon (36) , 5368 organism (37) , 5369 organelle (38) , 5370 partial (39) , 5371 phenotype (40) , 5372 plasmid (41) , 5373 product (42) , 5374 protein-id (43) , 5375 pseudo (44) , 5376 rearranged (45) , 5377 replace (46) , 5378 rpt-family (47) , 5379 rpt-type (48) , 5380 rpt-unit (49) , 5381 rpt-unit-seq (50) , 5382 rpt-unit-range (51) , 5383 segment (52) , 5384 sequenced-mol (53) , 5385 standard-name (54) , 5386 synonym (55) , 5387 transcript-id (56) , 5388 transgenic (57) , 5389 translation (58) , 5390 transl-except (59) , 5391 transl-table (60) , 5392 usedin (61), 5393 mobile-element-type (62), 5394 mobile-element-name (63), 5395 gene-comment (64) , 5396 satellite (65) , 5397 satellite-type (66) , 5398 satellite-name (67) , 5399 location (68) , 5400 tag-peptide (69) , 5401 mobile-element-type-type (70) , 5402 name (71) , 5403 pcr-conditions (72) , 5404 regulatory-class (73) } 5405 5406Feat-qual-legal-val ::= SEQUENCE { 5407 qual Feat-qual-legal , 5408 val VisibleString } 5409 5410Feat-qual-legal-val-choice ::= CHOICE { 5411 qual Feat-qual-legal-val } 5412 5413Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice 5414 5415Feat-qual-choice ::= CHOICE { 5416 legal-qual Feat-qual-legal , 5417 illegal-qual String-constraint } 5418 5419Feature-field ::= SEQUENCE { 5420 type Macro-feature-type , 5421 field Feat-qual-choice } 5422 5423Feature-field-legal ::= SEQUENCE { 5424 type Macro-feature-type , 5425 field Feat-qual-legal } 5426 5427Feature-field-pair ::= SEQUENCE { 5428 type Macro-feature-type , 5429 field-from Feat-qual-choice , 5430 field-to Feat-qual-choice } 5431 5432Rna-feat-type ::= CHOICE { 5433 any NULL , 5434 preRNA NULL , 5435 mRNA NULL , 5436 tRNA NULL , 5437 rRNA NULL , 5438 ncRNA VisibleString , 5439 tmRNA NULL, 5440 miscRNA NULL } 5441 5442Rna-field ::= ENUMERATED { 5443 product (1) , 5444 comment (2) , 5445 codons-recognized (3) , 5446 ncrna-class (4) , 5447 anticodon (5) , 5448 transcript-id (6) , 5449 gene-locus (7) , 5450 gene-description (8) , 5451 gene-maploc (9) , 5452 gene-locus-tag (10) , 5453 gene-synonym (11) , 5454 gene-comment (12) , 5455 tag-peptide (13) } 5456 5457Rna-qual ::= SEQUENCE { 5458 type Rna-feat-type , 5459 field Rna-field } 5460 5461Rna-qual-pair ::= SEQUENCE { 5462 type Rna-feat-type , 5463 field-from Rna-field , 5464 field-to Rna-field } 5465 5466Source-qual ::= ENUMERATED { 5467 acronym (1) , 5468 anamorph (2) , 5469 authority (3) , 5470 bio-material (4) , 5471 biotype (5) , 5472 biovar (6) , 5473 breed (7) , 5474 cell-line (8) , 5475 cell-type (9) , 5476 chemovar (10) , 5477 chromosome (11) , 5478 clone (12) , 5479 clone-lib (13) , 5480 collected-by (14) , 5481 collection-date (15) , 5482 common (16) , 5483 common-name (17) , 5484 country (18) , 5485 cultivar (19) , 5486 culture-collection (20) , 5487 dev-stage (21) , 5488 division (22) , 5489 dosage (23) , 5490 ecotype (24) , 5491 endogenous-virus-name (25) , 5492 environmental-sample (26) , 5493 forma (27) , 5494 forma-specialis (28) , 5495 frequency (29) , 5496 fwd-primer-name (30) , 5497 fwd-primer-seq (31) , 5498 gb-acronym (32) , 5499 gb-anamorph (33) , 5500 gb-synonym (34) , 5501 genotype (35) , 5502 germline (36) , 5503 group (37) , 5504 haplotype (38) , 5505 identified-by (39) , 5506 insertion-seq-name (40) , 5507 isolate (41) , 5508 isolation-source (42) , 5509 lab-host (43) , 5510 lat-lon (44) , 5511 lineage (45) , 5512 map (46) , 5513 metagenome-source (47) , 5514 metagenomic (48) , 5515 old-lineage (49) , 5516 old-name (50) , 5517 orgmod-note (51) , 5518 nat-host (52) , 5519 pathovar (53) , 5520 plasmid-name (54) , 5521 plastid-name (55) , 5522 pop-variant (56) , 5523 rearranged (57) , 5524 rev-primer-name (58) , 5525 rev-primer-seq (59) , 5526 segment (60) , 5527 serogroup (61) , 5528 serotype (62) , 5529 serovar (63) , 5530 sex (64) , 5531 specimen-voucher (65) , 5532 strain (66) , 5533 subclone (67) , 5534 subgroup (68) , 5535 subsource-note (69), 5536 sub-species (70) , 5537 substrain (71) , 5538 subtype (72) , 5539 synonym (73) , 5540 taxname (74) , 5541 teleomorph (75) , 5542 tissue-lib (76) , 5543 tissue-type (77) , 5544 transgenic (78) , 5545 transposon-name (79) , 5546 type (80) , 5547 variety (81) , 5548 specimen-voucher-INST (82) , 5549 specimen-voucher-COLL (83) , 5550 specimen-voucher-SpecID (84) , 5551 culture-collection-INST (85) , 5552 culture-collection-COLL (86) , 5553 culture-collection-SpecID (87) , 5554 bio-material-INST (88) , 5555 bio-material-COLL (89) , 5556 bio-material-SpecID (90), 5557 all-notes (91), 5558 mating-type (92), 5559 linkage-group (93) , 5560 haplogroup (94), 5561 all-quals (95), 5562 dbxref (96) , 5563 taxid (97) , 5564 all-primers (98) , 5565 altitude (99) , 5566 type-material (100) 5567} 5568 5569Source-qual-pair ::= SEQUENCE { 5570 field-from Source-qual , 5571 field-to Source-qual } 5572 5573Source-location ::= ENUMERATED { 5574 unknown (0) , 5575 genomic (1) , 5576 chloroplast (2) , 5577 chromoplast (3) , 5578 kinetoplast (4) , 5579 mitochondrion (5) , 5580 plastid (6) , 5581 macronuclear (7) , 5582 extrachrom (8) , 5583 plasmid (9) , 5584 transposon (10) , 5585 insertion-seq (11) , 5586 cyanelle (12) , 5587 proviral (13) , 5588 virion (14) , 5589 nucleomorph (15) , 5590 apicoplast (16) , 5591 leucoplast (17) , 5592 proplastid (18) , 5593 endogenous-virus (19) , 5594 hydrogenosome (20) , 5595 chromosome (21) , 5596 chromatophore (22) } 5597 5598Source-origin ::= ENUMERATED { 5599 unknown (0) , 5600 natural (1) , 5601 natmut (2) , 5602 mut (3) , 5603 artificial (4) , 5604 synthetic (5) , 5605 other (255) } 5606 5607Source-qual-choice ::= CHOICE { 5608 textqual Source-qual , 5609 location Source-location, 5610 origin Source-origin , 5611 gcode INTEGER , 5612 mgcode INTEGER } 5613 5614Source-qual-text-val ::= SEQUENCE { 5615 srcqual Source-qual , 5616 val VisibleString } 5617 5618Source-qual-val-choice ::= CHOICE { 5619 textqual Source-qual-text-val , 5620 location Source-location, 5621 origin Source-origin , 5622 gcode INTEGER , 5623 mgcode INTEGER } 5624 5625Source-qual-val-set ::= SET OF Source-qual-val-choice 5626 5627CDSGeneProt-field ::= ENUMERATED { 5628 cds-comment (1) , 5629 gene-locus (2) , 5630 gene-description (3) , 5631 gene-comment (4) , 5632 gene-allele (5) , 5633 gene-maploc (6) , 5634 gene-locus-tag (7) , 5635 gene-synonym (8) , 5636 gene-old-locus-tag (9) , 5637 mrna-product (10) , 5638 mrna-comment (11) , 5639 prot-name (12) , 5640 prot-description (13) , 5641 prot-ec-number (14) , 5642 prot-activity (15) , 5643 prot-comment (16) , 5644 mat-peptide-name (17) , 5645 mat-peptide-description (18) , 5646 mat-peptide-ec-number (19) , 5647 mat-peptide-activity (20) , 5648 mat-peptide-comment (21) , 5649 cds-inference (22) , 5650 gene-inference (23) , 5651 codon-start (24) } 5652 5653CDSGeneProt-field-pair ::= SEQUENCE { 5654 field-from CDSGeneProt-field , 5655 field-to CDSGeneProt-field } 5656 5657Molecule-type ::= ENUMERATED { 5658 unknown (0) , 5659 genomic (1) , 5660 precursor-RNA (2) , 5661 mRNA (3) , 5662 rRNA (4) , 5663 tRNA (5) , 5664 genomic-mRNA (6) , 5665 cRNA (7) , 5666 transcribed-RNA (8) , 5667 ncRNA (9) , 5668 transfer-messenger-RNA (10) , 5669 macro-other (11) } 5670 5671Technique-type ::= ENUMERATED { 5672 unknown (0) , 5673 standard (1) , 5674 est (2) , 5675 sts (3) , 5676 survey (4) , 5677 genetic-map (5) , 5678 physical-map (6) , 5679 derived (7) , 5680 concept-trans (8) , 5681 seq-pept (9) , 5682 both (10) , 5683 seq-pept-overlap (11) , 5684 seq-pept-homol (12) , 5685 concept-trans-a (13) , 5686 htgs-1 (14) , 5687 htgs-2 (15) , 5688 htgs-3 (16) , 5689 fli-cDNA (17) , 5690 htgs-0 (18) , 5691 htc (19) , 5692 wgs (20) , 5693 barcode (21) , 5694 composite-wgs-htgs (22) , 5695 tsa (23) , 5696 targeted (24) , 5697 other (25) } 5698 5699Completedness-type ::= ENUMERATED { 5700 unknown (0) , 5701 complete (1) , 5702 partial (2) , 5703 no-left (3) , 5704 no-right (4) , 5705 no-ends (5) , 5706 has-left (6) , 5707 has-right (7) , 5708 other (6) } 5709 5710Molecule-class-type ::= ENUMERATED { 5711 unknown (0) , 5712 dna (1) , 5713 rna (2) , 5714 protein (3) , 5715 nucleotide (4), 5716 other (5) } 5717 5718Topology-type ::= ENUMERATED { 5719 unknown (0) , 5720 linear (1) , 5721 circular (2) , 5722 tandem (3) , 5723 other (4) } 5724 5725Strand-type ::= ENUMERATED { 5726 unknown (0) , 5727 single (1) , 5728 double (2) , 5729 mixed (3) , 5730 mixed-rev (4) , 5731 other (5) } 5732 5733Molinfo-field ::= CHOICE { 5734 molecule Molecule-type , 5735 technique Technique-type , 5736 completedness Completedness-type , 5737 mol-class Molecule-class-type , 5738 topology Topology-type , 5739 strand Strand-type } 5740 5741Molinfo-molecule-pair ::= SEQUENCE { 5742 from Molecule-type , 5743 to Molecule-type } 5744 5745Molinfo-technique-pair ::= SEQUENCE { 5746 from Technique-type , 5747 to Technique-type } 5748 5749Molinfo-completedness-pair ::= SEQUENCE { 5750 from Completedness-type , 5751 to Completedness-type } 5752 5753Molinfo-mol-class-pair ::= SEQUENCE { 5754 from Molecule-class-type , 5755 to Molecule-class-type } 5756 5757Molinfo-topology-pair ::= SEQUENCE { 5758 from Topology-type , 5759 to Topology-type } 5760 5761Molinfo-strand-pair ::= SEQUENCE { 5762 from Strand-type , 5763 to Strand-type } 5764 5765Molinfo-field-pair ::= CHOICE { 5766 molecule Molinfo-molecule-pair , 5767 technique Molinfo-technique-pair , 5768 completedness Molinfo-completedness-pair , 5769 mol-class Molinfo-mol-class-pair , 5770 topology Molinfo-topology-pair , 5771 strand Molinfo-strand-pair } 5772 5773Molinfo-field-list ::= SET OF Molinfo-field 5774 5775Molinfo-field-constraint ::= SEQUENCE { 5776 field Molinfo-field , 5777 is-not BOOLEAN DEFAULT FALSE } 5778 5779-- publication fields -- 5780 5781Publication-field ::= ENUMERATED { 5782 cit (1) , 5783 authors (2) , 5784 journal (3) , 5785 volume (4) , 5786 issue (5) , 5787 pages (6) , 5788 date (7) , 5789 serial-number (8) , 5790 title (9) , 5791 affiliation (10) , 5792 affil-div (11) , 5793 affil-city (12) , 5794 affil-sub (13) , 5795 affil-country (14) , 5796 affil-street (15) , 5797 affil-email (16) , 5798 affil-fax (17) , 5799 affil-phone (18) , 5800 affil-zipcode (19), 5801 authors-initials (20), 5802 pmid (21), 5803 pub-class (22) 5804 } 5805 5806-- structured comment fields -- 5807 5808Structured-comment-field ::= CHOICE { 5809 database NULL , 5810 named VisibleString , 5811 field-name NULL 5812 } 5813 5814Structured-comment-field-pair ::= SEQUENCE { 5815 from Structured-comment-field , 5816 to Structured-comment-field 5817 } 5818 5819-- misc fields -- 5820-- these would not appear in pairs -- 5821Misc-field ::= ENUMERATED { 5822 genome-project-id (1) , 5823 comment-descriptor (2) , 5824 defline (3) , 5825 keyword (4) 5826 } 5827 5828-- dblink fields -- 5829DBLink-field-type ::= ENUMERATED { 5830 trace-assembly (1) , 5831 bio-sample (2) , 5832 probe-db (3) , 5833 sequence-read-archve (4) , 5834 bio-project (5) , 5835 assembly (6) } 5836 5837DBLink-field-pair ::= SEQUENCE { 5838 from DBLink-field-type , 5839 to DBLink-field-type 5840 } 5841 5842-- complex constraints -- 5843 5844Pub-type ::= ENUMERATED { 5845 any (0) , 5846 published (1) , 5847 unpublished (2) , 5848 in-press (3) , 5849 submitter-block (4) } 5850 5851Pub-field-constraint ::= SEQUENCE { 5852 field Publication-field , 5853 constraint String-constraint } 5854 5855Pub-field-special-constraint-type ::= CHOICE { 5856 is-present NULL , 5857 is-not-present NULL , 5858 is-all-caps NULL , 5859 is-all-lower NULL , 5860 is-all-punct NULL } 5861 5862Pub-field-special-constraint ::= SEQUENCE { 5863 field Publication-field , 5864 constraint Pub-field-special-constraint-type } 5865 5866Publication-constraint ::= SEQUENCE { 5867 type Pub-type , 5868 field Pub-field-constraint OPTIONAL , 5869 special-field Pub-field-special-constraint OPTIONAL } 5870 5871Source-constraint ::= SEQUENCE { 5872 field1 Source-qual-choice OPTIONAL , 5873 field2 Source-qual-choice OPTIONAL , 5874 constraint String-constraint OPTIONAL , 5875 type-constraint Object-type-constraint OPTIONAL } 5876 5877CDSGeneProt-feature-type-constraint ::= ENUMERATED { 5878 gene (1) , 5879 mRNA (2) , 5880 cds (3) , 5881 prot (4) , 5882 exon (5) , 5883 mat-peptide (6) } 5884 5885CDSGeneProt-pseudo-constraint ::= SEQUENCE { 5886 feature CDSGeneProt-feature-type-constraint , 5887 is-pseudo BOOLEAN DEFAULT TRUE } 5888 5889CDSGeneProt-constraint-field ::= CHOICE { 5890 field CDSGeneProt-field } 5891 5892CDSGeneProt-qual-constraint ::= SEQUENCE { 5893 field1 CDSGeneProt-constraint-field OPTIONAL , 5894 field2 CDSGeneProt-constraint-field OPTIONAL , 5895 constraint String-constraint OPTIONAL } 5896 5897Field-constraint ::= SEQUENCE { 5898 field Field-type , 5899 string-constraint String-constraint } 5900 5901Sequence-constraint-rnamol ::= ENUMERATED { 5902 any (0) , 5903 genomic (1) , 5904 precursor-RNA (2) , 5905 mRNA (3) , 5906 rRNA (4) , 5907 tRNA (5) , 5908 genomic-mRNA (6) , 5909 cRNA (7) , 5910 transcribed-RNA (8) , 5911 ncRNA (9) , 5912 transfer-messenger-RNA (10) } 5913 5914Sequence-constraint-mol-type-constraint ::= CHOICE { 5915 any NULL , 5916 nucleotide NULL , 5917 dna NULL , 5918 rna Sequence-constraint-rnamol , 5919 protein NULL } 5920 5921Quantity-constraint ::= CHOICE { 5922 equals INTEGER , 5923 greater-than INTEGER , 5924 less-than INTEGER } 5925 5926Feature-strandedness-constraint ::= ENUMERATED { 5927 any (0) , 5928 minus-only (1) , 5929 plus-only (2) , 5930 at-least-one-minus (3) , 5931 at-least-one-plus (4) , 5932 no-minus (5) , 5933 no-plus (6) } 5934 5935Sequence-constraint ::= SEQUENCE { 5936 seqtype Sequence-constraint-mol-type-constraint OPTIONAL , 5937 id String-constraint OPTIONAL , 5938 feature Macro-feature-type , 5939 num-type-features Quantity-constraint OPTIONAL , 5940 num-features Quantity-constraint OPTIONAL , 5941 length Quantity-constraint OPTIONAL , 5942 strandedness Feature-strandedness-constraint DEFAULT any } 5943 5944Match-type-constraint ::= ENUMERATED { 5945 dont-care (0) , 5946 yes (1) , 5947 no (2) } 5948 5949Translation-constraint ::= SEQUENCE { 5950 actual-strings String-constraint-set , 5951 transl-strings String-constraint-set , 5952 internal-stops Match-type-constraint DEFAULT dont-care , 5953 num-mismatches Quantity-constraint OPTIONAL } 5954 5955Constraint-choice ::= CHOICE { 5956 string String-constraint , 5957 location Location-constraint , 5958 field Field-constraint , 5959 source Source-constraint , 5960 cdsgeneprot-qual CDSGeneProt-qual-constraint , 5961 cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint , 5962 sequence Sequence-constraint , 5963 pub Publication-constraint , 5964 molinfo Molinfo-field-constraint , 5965 field-missing Field-type , 5966 translation Translation-constraint } 5967 5968Constraint-choice-set ::= SET OF Constraint-choice 5969 5970Text-marker ::= CHOICE { 5971 free-text VisibleString , 5972 digits NULL , 5973 letters NULL } 5974 5975Text-portion ::= SEQUENCE { 5976 left-marker Text-marker OPTIONAL , 5977 include-left BOOLEAN , 5978 right-marker Text-marker OPTIONAL , 5979 include-right BOOLEAN , 5980 inside BOOLEAN , 5981 case-sensitive BOOLEAN DEFAULT FALSE , 5982 whole-word BOOLEAN DEFAULT FALSE } 5983 5984Field-edit-location ::= ENUMERATED { 5985 anywhere (0) , 5986 beginning (1) , 5987 end (2) } 5988 5989Field-edit ::= SEQUENCE { 5990 find-txt VisibleString , 5991 repl-txt VisibleString OPTIONAL , 5992 location Field-edit-location DEFAULT anywhere , 5993 case-insensitive BOOLEAN DEFAULT FALSE } 5994 5995Field-type ::= CHOICE { 5996 source-qual Source-qual-choice , 5997 feature-field Feature-field , 5998 rna-field Rna-qual , 5999 cds-gene-prot CDSGeneProt-field , 6000 molinfo-field Molinfo-field , 6001 pub Publication-field , 6002 struc-comment-field Structured-comment-field , 6003 misc Misc-field , 6004 dblink DBLink-field-type } 6005 6006Field-pair-type ::= CHOICE { 6007 source-qual Source-qual-pair , 6008 feature-field Feature-field-pair , 6009 rna-field Rna-qual-pair , 6010 cds-gene-prot CDSGeneProt-field-pair , 6011 molinfo-field Molinfo-field-pair , 6012 struc-comment-field Structured-comment-field-pair , 6013 dblink DBLink-field-pair} 6014 6015ExistingTextOption ::= ENUMERATED { 6016 replace-old (1) , 6017 append-semi (2) , 6018 append-space (3) , 6019 append-colon (4) , 6020 append-comma (5) , 6021 append-none (6) , 6022 prefix-semi (7) , 6023 prefix-space (8) , 6024 prefix-colon (9) , 6025 prefix-comma (10) , 6026 prefix-none (11) , 6027 leave-old (12) , 6028 add-qual (13) } 6029 6030Apply-action ::= SEQUENCE { 6031 field Field-type , 6032 value VisibleString , 6033 existing-text ExistingTextOption } 6034 6035Edit-action ::= SEQUENCE { 6036 edit Field-edit , 6037 field Field-type } 6038 6039Cap-change ::= ENUMERATED { 6040 none (0) , 6041 tolower (1) , 6042 toupper (2) , 6043 firstcap (3) , 6044 firstcaprestnochange (4) , 6045 firstlower-restnochange (5) , 6046 cap-word-space (6) , 6047 cap-word-space-punc (7) 6048 } 6049 6050Text-transform ::= CHOICE { 6051 edit Field-edit , 6052 caps Cap-change , 6053 remove Text-portion } 6054 6055Text-transform-set ::= SET OF Text-transform 6056 6057Convert-action ::= SEQUENCE { 6058 fields Field-pair-type , 6059 strip-name BOOLEAN DEFAULT FALSE , 6060 keep-original BOOLEAN DEFAULT FALSE , 6061 capitalization Cap-change DEFAULT none , 6062 existing-text ExistingTextOption } 6063 6064Copy-action ::= SEQUENCE { 6065 fields Field-pair-type , 6066 existing-text ExistingTextOption } 6067 6068Swap-action ::= SEQUENCE { 6069 fields Field-pair-type } 6070 6071AECRParse-action ::= SEQUENCE { 6072 portion Text-portion , 6073 fields Field-pair-type , 6074 remove-from-parsed BOOLEAN DEFAULT FALSE , 6075 remove-left BOOLEAN DEFAULT FALSE , 6076 remove-right BOOLEAN DEFAULT FALSE , 6077 transform Text-transform-set OPTIONAL , 6078 existing-text ExistingTextOption } 6079 6080Remove-action ::= SEQUENCE { 6081 field Field-type } 6082 6083Remove-outside-action ::= SEQUENCE { 6084 portion Text-portion , 6085 field Field-type , 6086 remove-if-not-found BOOLEAN DEFAULT FALSE } 6087 6088Action-choice ::= CHOICE { 6089 apply Apply-action , 6090 edit Edit-action , 6091 convert Convert-action , 6092 copy Copy-action , 6093 swap Swap-action , 6094 remove Remove-action , 6095 parse AECRParse-action , 6096 remove-outside Remove-outside-action } 6097 6098AECR-action ::= SEQUENCE { 6099 action Action-choice , 6100 also-change-mrna BOOLEAN DEFAULT FALSE , 6101 constraint Constraint-choice-set OPTIONAL } 6102 6103Parse-src-org-choice ::= CHOICE { 6104 source-qual Source-qual , 6105 taxname-after-binomial NULL } 6106 6107Parse-src-org ::= SEQUENCE { 6108 field Parse-src-org-choice , 6109 type Object-type-constraint DEFAULT any } 6110 6111-- For Parse-src-general-id tag, specify the db of the id from which you 6112-- want to retrieve the tag. If empty or null, any db will do. 6113Parse-src-general-id ::= CHOICE { 6114 whole-text NULL , 6115 db NULL , 6116 tag VisibleString } 6117 6118Parse-src ::= CHOICE { 6119 defline NULL , 6120 flatfile NULL , 6121 local-id NULL , 6122 org Parse-src-org , 6123 comment NULL , 6124 bankit-comment NULL , 6125 structured-comment VisibleString , 6126 file-id NULL , 6127 general-id Parse-src-general-id } 6128 6129Parse-dst-org ::= SEQUENCE { 6130 field Source-qual-choice , 6131 type Object-type-constraint DEFAULT any } 6132 6133Parse-dest ::= CHOICE { 6134 defline NULL , 6135 org Parse-dst-org , 6136 featqual Feature-field-legal , 6137 comment-descriptor NULL , 6138 dbxref VisibleString } 6139 6140Parse-action ::= SEQUENCE { 6141 portion Text-portion , 6142 src Parse-src , 6143 dest Parse-dest , 6144 capitalization Cap-change DEFAULT none , 6145 remove-from-parsed BOOLEAN DEFAULT FALSE , 6146 transform Text-transform-set OPTIONAL , 6147 existing-text ExistingTextOption } 6148 6149Location-interval ::= SEQUENCE { 6150 from INTEGER , 6151 to INTEGER } 6152 6153Location-choice ::= CHOICE { 6154 interval Location-interval , 6155 whole-sequence NULL , 6156 point INTEGER } 6157 6158Sequence-list ::= SET OF VisibleString 6159Sequence-list-choice ::= CHOICE { 6160 list Sequence-list , 6161 all NULL } 6162 6163Apply-feature-action ::= SEQUENCE { 6164 type Macro-feature-type , 6165 partial5 BOOLEAN DEFAULT FALSE , 6166 partial3 BOOLEAN DEFAULT FALSE , 6167 plus-strand BOOLEAN DEFAULT TRUE , 6168 location Location-choice , 6169 seq-list Sequence-list-choice , 6170 add-redundant BOOLEAN DEFAULT TRUE , 6171 add-mrna BOOLEAN DEFAULT FALSE , 6172 apply-to-parts BOOLEAN DEFAULT FALSE , 6173 only-seg-num INTEGER DEFAULT -1 , 6174 fields Feat-qual-legal-set OPTIONAL, 6175 src-fields Source-qual-val-set OPTIONAL } 6176 6177Remove-feature-action ::= SEQUENCE { 6178 type Macro-feature-type , 6179 constraint Constraint-choice-set OPTIONAL } 6180 6181-- for convert features -- 6182Convert-from-CDS-options ::= SEQUENCE { 6183 remove-mRNA BOOLEAN , 6184 remove-gene BOOLEAN , 6185 remove-transcript-id BOOLEAN } 6186 6187Convert-feature-src-options ::= CHOICE { 6188 cds Convert-from-CDS-options } 6189 6190Bond-type ::= ENUMERATED { 6191 disulfide (1) , 6192 thioester (2) , 6193 crosslink (3) , 6194 thioether (4) , 6195 other (5) } 6196 6197Site-type ::= ENUMERATED { 6198 active (1) , 6199 binding (2) , 6200 cleavage (3) , 6201 inhibit (4) , 6202 modified (5) , 6203 glycosylation (6) , 6204 myristoylation (7) , 6205 mutagenized (8) , 6206 metal-binding (9) , 6207 phosphorylation (10) , 6208 acetylation (11) , 6209 amidation (12) , 6210 methylation (13) , 6211 hydroxylation (14) , 6212 sulfatation (15) , 6213 oxidative-deamination (16) , 6214 pyrrolidone-carboxylic-acid (17) , 6215 gamma-carboxyglutamic-acid (18) , 6216 blocked (19) , 6217 lipid-binding (20) , 6218 np-binding (21) , 6219 dna-binding (22) , 6220 signal-peptide (23) , 6221 transit-peptide (24) , 6222 transmembrane-region (25) , 6223 nitrosylation (26) , 6224 other (27) } 6225 6226-- other choice is to create protein sequences, skipping bad -- 6227Region-type ::= SEQUENCE { 6228 create-nucleotide BOOLEAN } 6229 6230Convert-feature-dst-options ::= CHOICE { 6231 bond Bond-type , 6232 site Site-type , 6233 region Region-type , 6234 ncrna-class VisibleString , 6235 remove-original BOOLEAN } 6236 6237Convert-feature-action ::= SEQUENCE { 6238 type-from Macro-feature-type , 6239 type-to Macro-feature-type , 6240 src-options Convert-feature-src-options OPTIONAL , 6241 dst-options Convert-feature-dst-options OPTIONAL , 6242 leave-original BOOLEAN , 6243 src-feat-constraint Constraint-choice-set OPTIONAL } 6244 6245Feature-location-strand-from ::= ENUMERATED { 6246 any (0) , 6247 plus (1) , 6248 minus (2) , 6249 unknown (3) , 6250 both (4) } 6251 6252Feature-location-strand-to ::= ENUMERATED { 6253 plus (1) , 6254 minus (2) , 6255 unknown (3) , 6256 both (4) , 6257 reverse (5) } 6258 6259Edit-location-strand ::= SEQUENCE { 6260 strand-from Feature-location-strand-from , 6261 strand-to Feature-location-strand-to } 6262 6263Partial-5-set-constraint ::= ENUMERATED { 6264 all (0) , 6265 at-end (1) , 6266 bad-start (2) , 6267 frame-not-one (3) } 6268 6269Partial-5-set-action ::= SEQUENCE { 6270 constraint Partial-5-set-constraint , 6271 extend BOOLEAN } 6272 6273Partial-5-clear-constraint ::= ENUMERATED { 6274 all (0) , 6275 not-at-end (1) , 6276 good-start (2) } 6277 6278Partial-3-set-constraint ::= ENUMERATED { 6279 all (0) , 6280 at-end (1) , 6281 bad-end (2) } 6282 6283Partial-3-set-action ::= SEQUENCE { 6284 constraint Partial-3-set-constraint , 6285 extend BOOLEAN } 6286 6287Partial-3-clear-constraint ::= ENUMERATED { 6288 all (0) , 6289 not-at-end (1) , 6290 good-end (2) } 6291 6292Partial-both-set-constraint ::= ENUMERATED { 6293 all (0) , 6294 at-end (1) } 6295 6296Partial-both-set-action ::= SEQUENCE { 6297 constraint Partial-both-set-constraint , 6298 extend BOOLEAN } 6299 6300Partial-both-clear-constraint ::= ENUMERATED { 6301 all (0) , 6302 not-at-end (1) } 6303 6304Convert-location-type ::= ENUMERATED { 6305 join (1) , 6306 order (2) , 6307 merge (3) } 6308 6309Extend-to-feature ::= SEQUENCE { 6310 type Macro-feature-type , 6311 include-feat BOOLEAN , 6312 distance Quantity-constraint OPTIONAL } 6313 6314Location-edit-type ::= CHOICE { 6315 strand Edit-location-strand , 6316 set-5-partial Partial-5-set-action , 6317 clear-5-partial Partial-5-clear-constraint , 6318 set-3-partial Partial-3-set-action , 6319 clear-3-partial Partial-3-clear-constraint , 6320 set-both-partial Partial-both-set-action , 6321 clear-both-partial Partial-both-clear-constraint , 6322 convert Convert-location-type , 6323 extend-5 NULL , 6324 extend-3 NULL , 6325 extend-5-to-feat Extend-to-feature , 6326 extend-3-to-feat Extend-to-feature } 6327 6328Edit-feature-location-action ::= SEQUENCE { 6329 type Macro-feature-type , 6330 action Location-edit-type , 6331 retranslate-cds BOOLEAN OPTIONAL , 6332 also-edit-gene BOOLEAN OPTIONAL , 6333 constraint Constraint-choice-set OPTIONAL } 6334 6335Molinfo-block ::= SEQUENCE { 6336 to-list Molinfo-field-list , 6337 from-list Molinfo-field-list OPTIONAL , 6338 constraint Constraint-choice-set OPTIONAL } 6339 6340Descriptor-type ::= ENUMERATED { 6341 all (0) , 6342 title (1) , 6343 source (2) , 6344 publication (3) , 6345 comment (4) , 6346 genbank (5) , 6347 user (6) , 6348 create-date (7) , 6349 update-date (8) , 6350 mol-info (9) , 6351 structured-comment (10) , 6352 genome-project-id (11) } 6353 6354Remove-descriptor-action ::= SEQUENCE { 6355 type Descriptor-type , 6356 constraint Constraint-choice-set OPTIONAL } 6357 6358Autodef-list-type ::= ENUMERATED { 6359 feature-list (1) , 6360 complete-sequence (2) , 6361 complete-genome (3) , 6362 sequence (4) } 6363 6364Autodef-misc-feat-parse-rule ::= ENUMERATED { 6365 use-comment-before-first-semicolon (1) , 6366 look-for-noncoding-products (2) } 6367 6368Autodef-action ::= SEQUENCE { 6369 modifiers SET OF Source-qual OPTIONAL , 6370 clause-list-type Autodef-list-type , 6371 misc-feat-parse-rule Autodef-misc-feat-parse-rule DEFAULT look-for-noncoding-products } 6372 6373Fix-pub-caps-action ::= SEQUENCE { 6374 title BOOLEAN OPTIONAL , 6375 authors BOOLEAN OPTIONAL , 6376 affiliation BOOLEAN OPTIONAL , 6377 affil-country BOOLEAN OPTIONAL , 6378 punct-only BOOLEAN DEFAULT FALSE , 6379 constraint Constraint-choice-set OPTIONAL } 6380 6381Sort-order ::= ENUMERATED { 6382 short-to-long (1) , 6383 long-to-short (2) , 6384 alphabetical (3) } 6385 6386Sort-fields-action ::= SEQUENCE { 6387 field Field-type , 6388 order Sort-order , 6389 constraint Constraint-choice-set OPTIONAL } 6390 6391Fix-author-caps ::= SEQUENCE { 6392 last-name-only BOOLEAN } 6393 6394Fix-caps-action ::= CHOICE { 6395 pub Fix-pub-caps-action , 6396 src-country NULL , 6397 mouse-strain NULL , 6398 src-qual Source-qual , 6399 author Fix-author-caps } 6400 6401Fix-format-action ::= CHOICE { 6402 collection-date NULL , 6403 lat-lon NULL , 6404 primers NULL , 6405 protein-name NULL } 6406 6407Remove-duplicate-feature-action ::= SEQUENCE { 6408 type Macro-feature-type , 6409 ignore-partials BOOLEAN , 6410 case-sensitive BOOLEAN , 6411 remove-proteins BOOLEAN , 6412 rd-constraint Constraint-choice-set OPTIONAL } 6413 6414Gene-xref-suppression-type ::= ENUMERATED { 6415 any (0) , 6416 suppressing (1) , 6417 non-suppressing (2) } 6418 6419Gene-xref-necessary-type ::= ENUMERATED { 6420 any (0) , 6421 necessary (1) , 6422 unnecessary (2) } 6423 6424Gene-xref-type ::= SEQUENCE { 6425 feature Macro-feature-type , 6426 suppression Gene-xref-suppression-type , 6427 necessary Gene-xref-necessary-type } 6428 6429Xref-type ::= CHOICE { 6430 gene Gene-xref-type } 6431 6432Remove-xrefs-action ::= SEQUENCE { 6433 xref-type Xref-type , 6434 constraint Constraint-choice-set OPTIONAL } 6435 6436Make-gene-xref-action ::= SEQUENCE { 6437 feature Macro-feature-type , 6438 constraint Constraint-choice-set OPTIONAL } 6439 6440Author-fix-type ::= ENUMERATED { 6441 truncate-middle-initials (1) , 6442 strip-suffix (2) , 6443 move-middle-to-first (3) } 6444 6445Author-fix-action ::= SEQUENCE { 6446 fix-type Author-fix-type , 6447 constraint Constraint-choice-set OPTIONAL } 6448 6449Update-sequences-action ::= SEQUENCE { 6450 filename VisibleString , 6451 add-cit-subs BOOLEAN DEFAULT FALSE } 6452 6453Create-TSA-ids-src ::= CHOICE { 6454 local-id NULL , 6455 defline Text-portion 6456} 6457 6458Create-TSA-ids-action ::= SEQUENCE { 6459 src Create-TSA-ids-src , 6460 suffix VisibleString OPTIONAL , 6461 id-text-portion Text-portion OPTIONAL } 6462 6463Autofix-action ::= SEQUENCE { 6464 test-name VisibleString } 6465 6466Fix-sets-action ::= CHOICE { 6467 remove-single-item-set NULL , 6468 renormalize-nuc-prot-sets NULL , 6469 fix-pop-to-phy NULL 6470} 6471 6472Table-match-type ::= CHOICE { 6473 feature-id NULL , 6474 gene-locus-tag NULL , 6475 protein-id NULL, 6476 dbxref NULL , 6477 nuc-id NULL , 6478 src-qual Source-qual-choice , 6479 protein-name NULL , 6480 bioproject NULL , 6481 any NULL 6482} 6483 6484Table-match ::= SEQUENCE { 6485 match-type Table-match-type , 6486 match-location String-location DEFAULT equals 6487} 6488 6489Apply-table-extra-data ::= CHOICE { 6490 table NULL } 6491 6492Apply-table-action ::= SEQUENCE { 6493 filename VisibleString , 6494 match-type Table-match , 6495 in-memory-table Apply-table-extra-data OPTIONAL , 6496 also-change-mrna BOOLEAN DEFAULT FALSE , 6497 skip-blanks BOOLEAN DEFAULT TRUE 6498} 6499 6500Add-file-action ::= SEQUENCE { 6501 filename VisibleString , 6502 in-memory-table Apply-table-extra-data OPTIONAL 6503} 6504 6505Add-descriptor-list-action ::= SEQUENCE { 6506 descriptor-list Add-file-action , 6507 constraint Constraint-choice-set OPTIONAL 6508} 6509 6510Remove-sequences-action ::= SEQUENCE { 6511 constraint Constraint-choice-set 6512} 6513 6514Update-replaced-ec-numbers-action ::= SEQUENCE { 6515 delete-improper-format BOOLEAN , 6516 delete-unrecognized BOOLEAN , 6517 delete-multiple-replacement BOOLEAN 6518} 6519 6520Retranslate-cds-action ::= SEQUENCE { 6521 obey-stop-codon BOOLEAN 6522} 6523 6524Truncated-ends-partial-type ::= ENUMERATED { 6525 always (1) , 6526 unless-pseudo (2) , 6527 never (3) } 6528 6529Adjust-features-for-gaps-action ::= SEQUENCE { 6530 type Macro-feature-type , 6531 adjust-for-unknown-length-gaps BOOLEAN , 6532 adjust-for-known-length-gaps BOOLEAN , 6533 make-truncated-ends-partial Truncated-ends-partial-type , 6534 trim-ends-in-gaps BOOLEAN , 6535 split-for-internal-gaps BOOLEAN , 6536 even-when-gaps-are-in-introns BOOLEAN 6537} 6538 6539Macro-action-choice ::= CHOICE { 6540 aecr AECR-action , 6541 parse Parse-action , 6542 add-feature Apply-feature-action , 6543 remove-feature Remove-feature-action , 6544 convert-feature Convert-feature-action , 6545 edit-location Edit-feature-location-action , 6546 remove-descriptor Remove-descriptor-action , 6547 autodef Autodef-action , 6548 removesets NULL , 6549 trim-junk-from-primer-seq NULL , 6550 trim-stop-from-complete-cds NULL , 6551 fix-usa-and-states NULL , 6552 synchronize-cds-partials NULL , 6553 adjust-for-consensus-splice NULL , 6554 fix-pub-caps Fix-pub-caps-action , 6555 remove-seg-gaps NULL , 6556 sort-fields Sort-fields-action , 6557 apply-molinfo-block Molinfo-block , 6558 fix-caps Fix-caps-action , 6559 fix-format Fix-format-action , 6560 fix-spell NULL , 6561 remove-duplicate-features Remove-duplicate-feature-action , 6562 remove-lineage-notes NULL , 6563 remove-xrefs Remove-xrefs-action , 6564 make-gene-xrefs Make-gene-xref-action , 6565 make-bold-xrefs NULL , 6566 fix-author Author-fix-action , 6567 update-sequences Update-sequences-action , 6568 add-trans-splicing NULL , 6569 remove-invalid-ecnumbers NULL , 6570 create-tsa-ids Create-TSA-ids-action , 6571 perform-autofix Autofix-action , 6572 fix-sets Fix-sets-action , 6573 apply-table Apply-table-action , 6574 remove-sequences Remove-sequences-action , 6575 propagate-sequence-technology NULL , 6576 add-file-descriptors Add-descriptor-list-action , 6577 propagate-missing-old-name NULL , 6578 autoapply-structured-comments NULL , 6579 reorder-structured-comments NULL , 6580 remove-duplicate-structured-comments NULL , 6581 lookup-taxonomy NULL , 6582 lookup-pubs NULL , 6583 trim-terminal-ns NULL , 6584 update-replaced-ecnumbers Update-replaced-ec-numbers-action , 6585 instantiate-protein-titles NULL , 6586 retranslate-cds Retranslate-cds-action , 6587 add-selenocysteine-except NULL , 6588 join-short-trnas NULL , 6589 adjust-features-for-gaps Adjust-features-for-gaps-action } 6590 6591Macro-action-list ::= SET OF Macro-action-choice 6592 6593Search-func ::= CHOICE { 6594 string-constraint String-constraint , 6595 contains-plural NULL , 6596 n-or-more-brackets-or-parentheses INTEGER , 6597 three-numbers NULL , 6598 underscore NULL , 6599 prefix-and-numbers VisibleString , 6600 all-caps NULL , 6601 unbalanced-paren NULL , 6602 too-long INTEGER , 6603 has-term VisibleString } 6604 6605Simple-replace ::= SEQUENCE { 6606 replace VisibleString OPTIONAL, 6607 whole-string BOOLEAN DEFAULT FALSE , 6608 weasel-to-putative BOOLEAN DEFAULT FALSE } 6609 6610Replace-func ::= CHOICE { 6611 simple-replace Simple-replace , 6612 haem-replace VisibleString } 6613 6614Replace-rule ::= SEQUENCE { 6615 replace-func Replace-func , 6616 move-to-note BOOLEAN DEFAULT FALSE } 6617 6618Fix-type ::= ENUMERATED { 6619 none (0) , 6620 typo (1) , 6621 putative-typo (2) , 6622 quickfix (3) , 6623 no-organelle-for-prokaryote (4), 6624 might-be-nonfunctional (5), 6625 database (6), 6626 remove-organism-name (7), 6627 inappropriate-symbol (8), 6628 evolutionary-relationship (9), 6629 use-protein (10), 6630 hypothetical (11), 6631 british (12), 6632 description (13), 6633 gene (14) } 6634 6635Suspect-rule ::= SEQUENCE { 6636 find Search-func , 6637 except Search-func OPTIONAL , 6638 feat-constraint Constraint-choice-set OPTIONAL , 6639 rule-type Fix-type DEFAULT none , 6640 replace Replace-rule OPTIONAL , 6641 description VisibleString OPTIONAL , 6642 fatal BOOLEAN DEFAULT FALSE } 6643 6644Suspect-rule-set ::= SET OF Suspect-rule 6645 6646 6647END 6648