1--$Revision: 460847 $ 2--********************************************************************** 3-- 4-- NCBI Variation container 5-- by Variation Working Group, 2011 6-- 7-- The Variation type describes a sequence change at location(s), 8-- or a hierarchical combination thereof. 9-- 10-- Related location-centric type is SeqFeatData.Variation-ref 11-- 12--********************************************************************** 13 14NCBI-VariationPackage DEFINITIONS ::= 15BEGIN 16 17EXPORTS Variation, VariantPlacement; 18 19IMPORTS Int-fuzz, Dbtag, User-object, Object-id FROM NCBI-General 20 Population-data, Phenotype, Variation-inst, VariantProperties FROM NCBI-Variation 21 Seq-loc FROM NCBI-Seqloc 22 SubSource FROM NCBI-BioSource 23 Seq-literal, Bioseq FROM NCBI-Sequence 24 Pub-set FROM NCBI-Pub; 25 26 27VariationException ::= SEQUENCE 28{ 29 code INTEGER { 30 hgvs-parsing (1), --invalid hgvs expression 31 hgvs-exon-boundary (2), --anchor position in an intronic HGVS expression is not at an exon boundary 32 33 34 inconsistent-consequence (3), --consequence protein variation attached to precursor variation's consequence 35 --could not be derived from it. 36 37 inconsistent-asserted-allele (4), --asserted allele is inconsistent with the reference 38 39 no-mapping (5), --could not remap 40 partial-mapping (6), --mapped location is shorter than the query 41 split-mapping (7), --a source interval maps to multiple non-abutting intervals. 42 mismatches-in-mapping (8), --the source sequence differs from sequence at mapped loc 43 inconsistent-asserted-moltype (9), --asserted mol-type is inconsistent with seq-id (e.g. NM_12345.6:g.) 44 bioseq-state (10), 45 ambiguous-sequence (11), 46 ref-same-as-variant (12), --reference sequence at the location is same as variant sequence in the variation 47 seqfetch-too-long (13), --can't fetch sequence because location is longer than specified threshold 48 seqfetch-intronic (14), --can't fetch sequence for an intronic (anchor+offset)-based location 49 seqfetch-invalid (15), --can't fetch sequence because location is invalid (e.g. extends past the end) 50 no-mapping-from-newer-version (16), --have mapping from older version of a sequence, but not from newer 51 source-location-overhang (17), --The source location overhangs the alignment by at least 5kb (VAR-1307) 52 hgvs-exon-boundary-induced (18) --Similar to (2), except induced by 5'/3'-terminal or an exon extension (VAR-1309) 53 } OPTIONAL, 54 55 message VisibleString 56} 57 58VariantPlacement ::= SEQUENCE 59{ 60 -- actual concrete placement we are considering 61 loc Seq-loc, 62 63 mol INTEGER { 64 unknown(0), 65 genomic(1), --"g." coordinates in HGVS 66 cdna(2), --"c." coordinates in HGVS 67 rna(3), --"n." coordinates in HGVS 68 protein(4), --"p." coordinates in HGVS 69 mitochondrion(5) --"mt." coordinates in HGVS 70 }, 71 72 -- location flags 73 placement-method INTEGER { 74 projected(1), 75 asserted(2), 76 aligned(3) 77 } OPTIONAL, 78 79 -- location refinements, describing offsets into introns from product coordinates. 80 -- Biological semantics: start-offset/stop-offset apply to bio-start/bio-stop respectively. 81 -- positive = downstream; negative = upstream. 82 start-offset INTEGER OPTIONAL, 83 start-offset-fuzz Int-fuzz OPTIONAL, 84 stop-offset INTEGER OPTIONAL, 85 stop-offset-fuzz Int-fuzz OPTIONAL, 86 87 -- 0-based position of bio-start relative to containing codon 88 frame INTEGER OPTIONAL, 89 90 -- for situations in which a raw location isn't sufficient 91 seq Seq-literal OPTIONAL, 92 93 -- reference to the assembly (GenColl ID) for this location 94 assembly Dbtag OPTIONAL, 95 96 hgvs-name VisibleString OPTIONAL, 97 98 -- the reference location for this variant 99 comment VisibleString OPTIONAL, 100 101 exceptions SET OF VariationException OPTIONAL, 102 103 dbxrefs SET OF Dbtag OPTIONAL, --e.g. rs#, that are placement-specific 104 105 ext SET OF User-object OPTIONAL, --for process-specific placement tags/labels 106 107 gene-location INTEGER OPTIONAL, --Same semantics as VariantProperties.gene-location, except placement-specific 108 109 id Object-id OPTIONAL, 110 parent-id Object-id OPTIONAL, --id of the placement from which this one was derived 111 112 so-terms SEQUENCE OF INTEGER OPTIONAL --Sequence Ontology terms for this placement 113} 114 115VariationMethod ::= SEQUENCE 116{ 117 -- sequencing / acuisition method 118 method SET OF INTEGER { 119 unknown (0), 120 bac-acgh (1), 121 computational (2), 122 curated (3), 123 digital-array (4), 124 expression-array (5), 125 fish (6), 126 flanking-sequence (7), 127 maph (8), 128 mcd-analysis (9), 129 mlpa (10), 130 oea-assembly (11), 131 oligo-acgh (12), 132 paired-end (13), 133 pcr (14), 134 qpcr (15), 135 read-depth (16), 136 roma (17), 137 rt-pcr (18), 138 sage (19), 139 sequence-alignment (20), 140 sequencing (21), 141 snp-array (22), 142 snp-genoytyping (23), 143 southern (24), 144 western (25), 145 optical-mapping (26), 146 147 other (255) 148 }, 149 150 -- if sequence-based validation methods are used, 151 -- what reference sequence location validated the presence of this? 152 reference-location Seq-loc OPTIONAL 153} 154 155 156Variation ::= SEQUENCE 157{ 158 -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) 159 -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1' 160 -- 161 -- we relate three kinds of IDs here: 162 -- - our current object's id 163 -- - the id of this object's parent, if it exists 164 -- - the sample ID that this item originates from 165 id Dbtag OPTIONAL, 166 parent-id Dbtag OPTIONAL, 167 sample-id SET OF Object-id OPTIONAL, 168 other-ids SET OF Dbtag OPTIONAL, 169 170 -- names and synonyms 171 -- some variants have well-known canonical names and possible accepted 172 -- synonyms 173 name VisibleString OPTIONAL, 174 synonyms SET OF VisibleString OPTIONAL, 175 176 -- tag for comment and descriptions 177 description VisibleString OPTIONAL, 178 179 -- where this beast is seen 180 -- note that this is a set of locations, and there are no restrictions to 181 -- the contents to this set. 182 placements SEQUENCE OF VariantPlacement OPTIONAL, 183 184 -- phenotype 185 phenotype SET OF Phenotype OPTIONAL, 186 187 -- sequencing / acuisition method 188 method VariationMethod OPTIONAL, 189 190 -- Note about SNP representation and pretinent fields: allele-frequency, 191 -- population, quality-codes: 192 -- The case of multiple alleles for a SNP would be described by 193 -- parent-feature of type Variation-set.diff-alleles, where the child 194 -- features of type Variation-inst, all at the same location, would 195 -- describe individual alleles. 196 197 -- population data 198 population-data SET OF Population-data OPTIONAL, 199 200 -- variant properties bit fields 201 variant-prop VariantProperties OPTIONAL, 202 203 -- publication support; same type as in seq-feat 204 pub Pub-set OPTIONAL, 205 206 -- References to external 207 clinical-test Dbtag OPTIONAL, 208 209 data CHOICE { 210 unknown NULL, 211 note VisibleString, --free-form 212 uniparental-disomy NULL, 213 214 -- actual sequence-edit at feat.location 215 instance Variation-inst, 216 217 -- Set of related Variations. 218 -- Location of the set equals to the union of member locations 219 set SEQUENCE { 220 type INTEGER { 221 unknown (0), 222 compound (1), -- complex change at the same location on the 223 -- same molecule 224 products (2), -- different products arising from the same 225 -- variation in a precursor, e.g. r.[13g>a, 226 -- 13_88del] 227 haplotype (3), -- changes on the same allele, e.g 228 -- r.[13g>a;15u>c] 229 genotype (4), -- changes on different alleles in the same 230 -- genotype, e.g. g.[476C>T]+[476C>T] 231 mosaic (5), -- different genotypes in the same individual 232 individual (6), -- same organism; allele relationship unknown, 233 -- e.g. g.[476C>T(+)183G>C] 234 population (7), -- population 235 alleles (8), -- set represents a set of observed alleles 236 package (9), -- set represents a package of observations at 237 -- a given location, generally containing 238 -- asserted + reference 239 chimeric (10), -- e.g. c.[1C>T//2G>T] 240 other (255) 241 }, 242 variations SET OF Variation, 243 name VisibleString OPTIONAL 244 }, 245 246 -- variant is a complex and undescribed change at the location 247 -- This type of variant is known to occur in dbVar submissions 248 complex NULL, 249 250 seq Bioseq -- Sequnece as it exists post-alteration 251 }, 252 253 consequence SET OF CHOICE { 254 unknown NULL, 255 splicing NULL, --some effect on splicing 256 note VisibleString, --freeform 257 258 -- Describe resulting variation in the product, e.g. missense, 259 -- nonsense, silent, neutral, etc in a protein, that arises from 260 -- THIS variation. 261 variation Variation, 262 263 loss-of-heterozygosity SEQUENCE { 264 -- In germline comparison, it will be reference genome assembly 265 -- (default) or reference/normal population. In somatic mutation, 266 -- it will be a name of the normal tissue. 267 reference VisibleString OPTIONAL, 268 269 -- Name of the testing subject type or the testing tissue. 270 test VisibleString OPTIONAL 271 } 272 } OPTIONAL, 273 274 -- Frameshift-related info. Applies only to protein-level variations. 275 -- see http://www.hgvs.org/mutnomen/recs-prot.html 276 frameshift SEQUENCE { 277 phase INTEGER OPTIONAL, 278 x-length INTEGER OPTIONAL 279 } OPTIONAL, 280 281 -- Additional undescribed extensions 282 ext SET OF User-object OPTIONAL, 283 284 somatic-origin SET OF SEQUENCE { 285 -- description of the somatic origin itself 286 source SubSource OPTIONAL, 287 -- condition related to this origin's type 288 condition SEQUENCE { 289 description VisibleString OPTIONAL, 290 -- reference to BioTerm / other descriptive database 291 object-id SET OF Dbtag OPTIONAL 292 } OPTIONAL 293 } OPTIONAL, 294 295 exceptions SET OF VariationException OPTIONAL, 296 297 so-terms SET OF INTEGER OPTIONAL 298} 299 300 301END 302 303