1--$Revision: 587101 $ 2--********************************************************************** 3-- 4-- NCBI Sequence elements 5-- by James Ostell, 1990 6-- Version 3.0 - June 1994 7-- 8--********************************************************************** 9 10NCBI-Sequence DEFINITIONS ::= 11BEGIN 12 13EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo, 14 Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext, 15 Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap; 16 17IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General 18 Seq-align FROM NCBI-Seqalign 19 Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat 20 Seq-graph FROM NCBI-Seqres 21 Pub-equiv FROM NCBI-Pub 22 Org-ref FROM NCBI-Organism 23 BioSource FROM NCBI-BioSource 24 Seq-id, Seq-loc FROM NCBI-Seqloc 25 GB-block FROM GenBank-General 26 PIR-block FROM PIR-General 27 EMBL-block FROM EMBL-General 28 SP-block FROM SP-General 29 PRF-block FROM PRF-General 30 PDB-block FROM PDB-General 31 Seq-table FROM NCBI-SeqTable; 32 33--*** Sequence ******************************** 34--* 35 36Bioseq ::= SEQUENCE { 37 id SET OF Seq-id , -- equivalent identifiers 38 descr Seq-descr OPTIONAL , -- descriptors 39 inst Seq-inst , -- the sequence data 40 annot SET OF Seq-annot OPTIONAL } 41 42--*** Descriptors ***************************** 43--* 44 45Seq-descr ::= SET OF Seqdesc 46 47Seqdesc ::= CHOICE { 48 mol-type GIBB-mol , -- type of molecule 49 modif SET OF GIBB-mod , -- modifiers 50 method GIBB-method , -- sequencing method 51 name VisibleString , -- a name for this sequence 52 title VisibleString , -- a title for this sequence 53 org Org-ref , -- if all from one organism 54 comment VisibleString , -- a more extensive comment 55 num Numbering , -- a numbering system 56 maploc Dbtag , -- map location of this sequence 57 pir PIR-block , -- PIR specific info 58 genbank GB-block , -- GenBank specific info 59 pub Pubdesc , -- a reference to the publication 60 region VisibleString , -- overall region (globin locus) 61 user User-object , -- user defined object 62 sp SP-block , -- SWISSPROT specific info 63 dbxref Dbtag , -- xref to other databases 64 embl EMBL-block , -- EMBL specific information 65 create-date Date , -- date entry first created/released 66 update-date Date , -- date of last update 67 prf PRF-block , -- PRF specific information 68 pdb PDB-block , -- PDB specific information 69 het Heterogen , -- cofactor, etc associated but not bound 70 source BioSource , -- source of materials, includes Org-ref 71 molinfo MolInfo , -- info on the molecule and techniques 72 modelev ModelEvidenceSupport -- model evidence for XM records 73} 74 75--******* NOTE: 76--* mol-type, modif, method, and org are consolidated and expanded 77--* in Org-ref, BioSource, and MolInfo in this specification. They 78--* will be removed in later specifications. Do not use them in the 79--* the future. Instead expect the new structures. 80--* 81--*************************** 82 83--******************************************************************** 84-- 85-- MolInfo gives information on the 86-- classification of the type and quality of the sequence 87-- 88-- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method 89-- 90--******************************************************************** 91 92MolInfo ::= SEQUENCE { 93 biomol INTEGER { 94 unknown (0) , 95 genomic (1) , 96 pre-RNA (2) , -- precursor RNA of any sort really 97 mRNA (3) , 98 rRNA (4) , 99 tRNA (5) , 100 snRNA (6) , 101 scRNA (7) , 102 peptide (8) , 103 other-genetic (9) , -- other genetic material 104 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence 105 cRNA (11) , -- viral RNA genome copy intermediate 106 snoRNA (12) , -- small nucleolar RNA 107 transcribed-RNA (13) , -- transcribed RNA other than existing classes 108 ncRNA (14) , 109 tmRNA (15) , 110 other (255) } DEFAULT unknown , 111 tech INTEGER { 112 unknown (0) , 113 standard (1) , -- standard sequencing 114 est (2) , -- Expressed Sequence Tag 115 sts (3) , -- Sequence Tagged Site 116 survey (4) , -- one-pass genomic sequence 117 genemap (5) , -- from genetic mapping techniques 118 physmap (6) , -- from physical mapping techniques 119 derived (7) , -- derived from other data, not a primary entity 120 concept-trans (8) , -- conceptual translation 121 seq-pept (9) , -- peptide was sequenced 122 both (10) , -- concept transl. w/ partial pept. seq. 123 seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap 124 seq-pept-homol (12) , -- sequenced peptide, ordered by homology 125 concept-trans-a (13) , -- conceptual transl. supplied by author 126 htgs-1 (14) , -- unordered High Throughput sequence contig 127 htgs-2 (15) , -- ordered High Throughput sequence contig 128 htgs-3 (16) , -- finished High Throughput sequence 129 fli-cdna (17) , -- full length insert cDNA 130 htgs-0 (18) , -- single genomic reads for coordination 131 htc (19) , -- high throughput cDNA 132 wgs (20) , -- whole genome shotgun sequencing 133 barcode (21) , -- barcode of life project 134 composite-wgs-htgs (22) , -- composite of WGS and HTGS 135 tsa (23) , -- transcriptome shotgun assembly 136 targeted (24) , -- targeted locus sets/studies 137 other (255) } -- use Source.techexp 138 DEFAULT unknown , 139 techexp VisibleString OPTIONAL , -- explanation if tech not enough 140 -- 141 -- Completeness is not indicated in most records. For genomes, assume 142 -- the sequences are incomplete unless specifically marked as complete. 143 -- For mRNAs, assume the ends are not known exactly unless marked as 144 -- having the left or right end. 145 -- 146 completeness INTEGER { 147 unknown (0) , 148 complete (1) , -- complete biological entity 149 partial (2) , -- partial but no details given 150 no-left (3) , -- missing 5' or NH3 end 151 no-right (4) , -- missing 3' or COOH end 152 no-ends (5) , -- missing both ends 153 has-left (6) , -- 5' or NH3 end present 154 has-right (7) , -- 3' or COOH end present 155 other (255) } DEFAULT unknown , 156 gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA 157 158 159GIBB-mol ::= ENUMERATED { -- type of molecule represented 160 unknown (0) , 161 genomic (1) , 162 pre-mRNA (2) , -- precursor RNA of any sort really 163 mRNA (3) , 164 rRNA (4) , 165 tRNA (5) , 166 snRNA (6) , 167 scRNA (7) , 168 peptide (8) , 169 other-genetic (9) , -- other genetic material 170 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence 171 other (255) } 172 173GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers 174 dna (0) , 175 rna (1) , 176 extrachrom (2) , 177 plasmid (3) , 178 mitochondrial (4) , 179 chloroplast (5) , 180 kinetoplast (6) , 181 cyanelle (7) , 182 synthetic (8) , 183 recombinant (9) , 184 partial (10) , 185 complete (11) , 186 mutagen (12) , -- subject of mutagenesis ? 187 natmut (13) , -- natural mutant ? 188 transposon (14) , 189 insertion-seq (15) , 190 no-left (16) , -- missing left end (5' for na, NH2 for aa) 191 no-right (17) , -- missing right end (3' or COOH) 192 macronuclear (18) , 193 proviral (19) , 194 est (20) , -- expressed sequence tag 195 sts (21) , -- sequence tagged site 196 survey (22) , -- one pass survey sequence 197 chromoplast (23) , 198 genemap (24) , -- is a genetic map 199 restmap (25) , -- is an ordered restriction map 200 physmap (26) , -- is a physical map (not ordered restriction map) 201 other (255) } 202 203GIBB-method ::= ENUMERATED { -- sequencing methods 204 concept-trans (1) , -- conceptual translation 205 seq-pept (2) , -- peptide was sequenced 206 both (3) , -- concept transl. w/ partial pept. seq. 207 seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap 208 seq-pept-homol (5) , -- sequenced peptide, ordered by homology 209 concept-trans-a (6) , -- conceptual transl. supplied by author 210 other (255) } 211 212Numbering ::= CHOICE { -- any display numbering system 213 cont Num-cont , -- continuous numbering 214 enum Num-enum , -- enumerated names for residues 215 ref Num-ref , -- by reference to another sequence 216 real Num-real } -- supports mapping to a float system 217 218Num-cont ::= SEQUENCE { -- continuous display numbering system 219 refnum INTEGER DEFAULT 1, -- number assigned to first residue 220 has-zero BOOLEAN DEFAULT FALSE , -- 0 used? 221 ascending BOOLEAN DEFAULT TRUE } -- ascending numbers? 222 223Num-enum ::= SEQUENCE { -- any tags to residues 224 num INTEGER , -- number of tags to follow 225 names SEQUENCE OF VisibleString } -- the tags 226 227Num-ref ::= SEQUENCE { -- by reference to other sequences 228 type ENUMERATED { -- type of reference 229 not-set (0) , 230 sources (1) , -- by segmented or const seq sources 231 aligns (2) } , -- by alignments given below 232 aligns Seq-align OPTIONAL } 233 234Num-real ::= SEQUENCE { -- mapping to floating point system 235 a REAL , -- from an integer system used by Bioseq 236 b REAL , -- position = (a * int_position) + b 237 units VisibleString OPTIONAL } 238 239Pubdesc ::= SEQUENCE { -- how sequence presented in pub 240 pub Pub-equiv , -- the citation(s) 241 name VisibleString OPTIONAL , -- name used in paper 242 fig VisibleString OPTIONAL , -- figure in paper 243 num Numbering OPTIONAL , -- numbering from paper 244 numexc BOOLEAN OPTIONAL , -- numbering problem with paper 245 poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure? 246 maploc VisibleString OPTIONAL , -- map location reported in paper 247 seq-raw StringStore OPTIONAL , -- original sequence from paper 248 align-group INTEGER OPTIONAL , -- this seq aligned with others in paper 249 comment VisibleString OPTIONAL, -- any comment on this pub in context 250 reftype INTEGER { -- type of reference in a GenBank record 251 seq (0) , -- refers to sequence 252 sites (1) , -- refers to unspecified features 253 feats (2) , -- refers to specified features 254 no-target (3) } -- nothing specified (EMBL) 255 DEFAULT seq } 256 257Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc 258 259--*** Instances of sequences ******************************* 260--* 261 262Seq-inst ::= SEQUENCE { -- the sequence data itself 263 repr ENUMERATED { -- representation class 264 not-set (0) , -- empty 265 virtual (1) , -- no seq data 266 raw (2) , -- continuous sequence 267 seg (3) , -- segmented sequence 268 const (4) , -- constructed sequence 269 ref (5) , -- reference to another sequence 270 consen (6) , -- consensus sequence or pattern 271 map (7) , -- ordered map of any kind 272 delta (8) , -- sequence made by changes (delta) to others 273 other (255) } , 274 mol ENUMERATED { -- molecule class in living organism 275 not-set (0) , -- > cdna = rna 276 dna (1) , 277 rna (2) , 278 aa (3) , 279 na (4) , -- just a nucleic acid 280 other (255) } , 281 length INTEGER OPTIONAL , -- length of sequence in residues 282 fuzz Int-fuzz OPTIONAL , -- length uncertainty 283 topology ENUMERATED { -- topology of molecule 284 not-set (0) , 285 linear (1) , 286 circular (2) , 287 tandem (3) , -- some part of tandem repeat 288 other (255) } DEFAULT linear , 289 strand ENUMERATED { -- strandedness in living organism 290 not-set (0) , 291 ss (1) , -- single strand 292 ds (2) , -- double strand 293 mixed (3) , 294 other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept 295 seq-data Seq-data OPTIONAL , -- the sequence 296 ext Seq-ext OPTIONAL , -- extensions for special types 297 hist Seq-hist OPTIONAL } -- sequence history 298 299--*** Sequence Extensions ********************************** 300--* for representing more complex types 301--* const type uses Seq-hist.assembly 302 303Seq-ext ::= CHOICE { 304 seg Seg-ext , -- segmented sequences 305 ref Ref-ext , -- hot link to another sequence (a view) 306 map Map-ext , -- ordered map of markers 307 delta Delta-ext } 308 309Seg-ext ::= SEQUENCE OF Seq-loc 310 311Ref-ext ::= Seq-loc 312 313Map-ext ::= SEQUENCE OF Seq-feat 314 315Delta-ext ::= SEQUENCE OF Delta-seq 316 317Delta-seq ::= CHOICE { 318 loc Seq-loc , -- point to a sequence 319 literal Seq-literal } -- a piece of sequence 320 321Seq-literal ::= SEQUENCE { 322 length INTEGER , -- must give a length in residues 323 fuzz Int-fuzz OPTIONAL , -- could be unsure 324 seq-data Seq-data OPTIONAL } -- may have the data 325 326--*** Sequence History Record *********************************** 327--** assembly = records how seq was assembled from others 328--** replaces = records sequences made obsolete by this one 329--** replaced-by = this seq is made obsolete by another(s) 330 331Seq-hist ::= SEQUENCE { 332 assembly SET OF Seq-align OPTIONAL ,-- how was this assembled? 333 replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete 334 replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete 335 deleted CHOICE { 336 bool BOOLEAN , 337 date Date } OPTIONAL } 338 339Seq-hist-rec ::= SEQUENCE { 340 date Date OPTIONAL , 341 ids SET OF Seq-id } 342 343--*** Various internal sequence representations ************ 344--* all are controlled, fixed length forms 345 346Seq-data ::= CHOICE { -- sequence representations 347 iupacna IUPACna , -- IUPAC 1 letter nuc acid code 348 iupacaa IUPACaa , -- IUPAC 1 letter amino acid code 349 ncbi2na NCBI2na , -- 2 bit nucleic acid code 350 ncbi4na NCBI4na , -- 4 bit nucleic acid code 351 ncbi8na NCBI8na , -- 8 bit extended nucleic acid code 352 ncbipna NCBIpna , -- nucleic acid probabilities 353 ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes 354 ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes 355 ncbipaa NCBIpaa , -- amino acid probabilities 356 ncbistdaa NCBIstdaa, -- consecutive codes for std aas 357 gap Seq-gap -- gap types 358} 359 360Seq-gap ::= SEQUENCE { 361 type INTEGER { 362 unknown(0), 363 fragment(1), -- Deprecated. Used only for AGP 1.1 364 clone(2), -- Deprecated. Used only for AGP 1.1 365 short-arm(3), 366 heterochromatin(4), 367 centromere(5), 368 telomere(6), 369 repeat(7), 370 contig(8), 371 scaffold(9), 372 contamination(10), 373 other(255) 374 }, 375 linkage INTEGER { 376 unlinked(0), 377 linked(1), 378 other(255) 379 } OPTIONAL, 380 linkage-evidence SET OF Linkage-evidence OPTIONAL 381} 382 383Linkage-evidence ::= SEQUENCE { 384 type INTEGER { 385 paired-ends(0), 386 align-genus(1), 387 align-xgenus(2), 388 align-trnscpt(3), 389 within-clone(4), 390 clone-contig(5), 391 map(6), 392 strobe(7), 393 unspecified(8), 394 pcr(9), 395 proximity-ligation(10), 396 other(255) 397 } 398} 399 400IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces 401IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces 402NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T 403NCBI4na ::= OCTET STRING -- 1 bit each for agct 404 -- 0001=A, 0010=C, 0100=G, 1000=T/U 405 -- 0101=Purine, 1010=Pyrimidine, etc 406NCBI8na ::= OCTET STRING -- for modified nucleic acids 407NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n 408 -- probabilities are coded 0-255 = 0.0-1.0 409NCBI8aa ::= OCTET STRING -- for modified amino acids 410NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes 411 -- IUPAC codes + U=selenocysteine 412NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order: 413 -- A-Y,B,Z,X,(ter),anything 414 -- probabilities are coded 0-255 = 0.0-1.0 415NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte 416 417--*** Sequence Annotation ************************************* 418--* 419 420-- This is a replica of Textseq-id 421-- This is specific for annotations, and exists to maintain a semantic 422-- difference between IDs assigned to annotations and IDs assigned to 423-- sequences 424Textannot-id ::= SEQUENCE { 425 name VisibleString OPTIONAL , 426 accession VisibleString OPTIONAL , 427 release VisibleString OPTIONAL , 428 version INTEGER OPTIONAL 429} 430 431Annot-id ::= CHOICE { 432 local Object-id , 433 ncbi INTEGER , 434 general Dbtag, 435 other Textannot-id 436} 437 438Annot-descr ::= SET OF Annotdesc 439 440Annotdesc ::= CHOICE { 441 name VisibleString , -- a short name for this collection 442 title VisibleString , -- a title for this collection 443 comment VisibleString , -- a more extensive comment 444 pub Pubdesc , -- a reference to the publication 445 user User-object , -- user defined object 446 create-date Date , -- date entry first created/released 447 update-date Date , -- date of last update 448 src Seq-id , -- source sequence from which annot came 449 align Align-def, -- definition of the SeqAligns 450 region Seq-loc } -- all contents cover this region 451 452Align-def ::= SEQUENCE { 453 align-type INTEGER { -- class of align Seq-annot 454 ref (1) , -- set of alignments to the same sequence 455 alt (2) , -- set of alternate alignments of the same seqs 456 blocks (3) , -- set of aligned blocks in the same seqs 457 other (255) } , 458 ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now 459 460Seq-annot ::= SEQUENCE { 461 id SET OF Annot-id OPTIONAL , 462 db INTEGER { -- source of annotation 463 genbank (1) , 464 embl (2) , 465 ddbj (3) , 466 pir (4) , 467 sp (5) , 468 bbone (6) , 469 pdb (7) , 470 other (255) } OPTIONAL , 471 name VisibleString OPTIONAL ,-- source if "other" above 472 desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots 473 data CHOICE { 474 ftable SET OF Seq-feat , 475 align SET OF Seq-align , 476 graph SET OF Seq-graph , 477 ids SET OF Seq-id , -- used for communication between tools 478 locs SET OF Seq-loc , -- used for communication between tools 479 seq-table Seq-table } } -- features in table form 480 481END 482 483 484