1--$Revision: 370718 $ 2--********************************************************************** 3-- 4-- NCBI Sequence Alignment elements 5-- by James Ostell, 1990 6-- 7--********************************************************************** 8 9NCBI-Seqalign DEFINITIONS ::= 10BEGIN 11 12EXPORTS Seq-align, Score, Score-set, Seq-align-set; 13 14IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc 15 User-object, Object-id FROM NCBI-General; 16 17--*** Sequence Alignment ******************************** 18--* 19 20Seq-align-set ::= SET OF Seq-align 21 22Seq-align ::= SEQUENCE { 23 type ENUMERATED { 24 not-set (0) , 25 global (1) , 26 diags (2) , -- unbroken, but not ordered, diagonals 27 partial (3) , -- mapping pieces together 28 disc (4) , -- discontinuous alignment 29 other (255) } , 30 dim INTEGER OPTIONAL , -- dimensionality 31 score SET OF Score OPTIONAL , -- for whole alignment 32 segs CHOICE { -- alignment data 33 dendiag SEQUENCE OF Dense-diag , 34 denseg Dense-seg , 35 std SEQUENCE OF Std-seg , 36 packed Packed-seg , 37 disc Seq-align-set, 38 spliced Spliced-seg, 39 sparse Sparse-seg 40 } , 41 42 -- regions of sequence over which align 43 -- was computed 44 bounds SET OF Seq-loc OPTIONAL, 45 46 -- alignment id 47 id SEQUENCE OF Object-id OPTIONAL, 48 49 --extra info 50 ext SEQUENCE OF User-object OPTIONAL 51} 52 53Dense-diag ::= SEQUENCE { -- for (multiway) diagonals 54 dim INTEGER DEFAULT 2 , -- dimensionality 55 ids SEQUENCE OF Seq-id , -- sequences in order 56 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order 57 len INTEGER , -- len of aligned segments 58 strands SEQUENCE OF Na-strand OPTIONAL , 59 scores SET OF Score OPTIONAL } 60 61 -- Dense-seg: the densist packing for sequence alignments only. 62 -- a start of -1 indicates a gap for that sequence of 63 -- length lens. 64 -- 65 -- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA 66 -- id=200 AAGGCCTTTTAG.......GATGATGATGA 67 -- id=300 ....CCTTTTAGAGATGATGAT....ATGA 68 -- 69 -- dim = 3, numseg = 6, ids = { 100, 200, 300 } 70 -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 } 71 -- lens = { 4, 8, 7, 3, 4, 4 } 72 -- 73 74Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments 75 dim INTEGER DEFAULT 2 , -- dimensionality 76 numseg INTEGER , -- number of segments here 77 ids SEQUENCE OF Seq-id , -- sequences in order 78 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs 79 lens SEQUENCE OF INTEGER , -- lengths in ids order within segs 80 strands SEQUENCE OF Na-strand OPTIONAL , 81 scores SEQUENCE OF Score OPTIONAL } -- score for each seg 82 83Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments 84 dim INTEGER DEFAULT 2 , -- dimensionality 85 numseg INTEGER , -- number of segments here 86 ids SEQUENCE OF Seq-id , -- sequences in order 87 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment 88 present OCTET STRING , -- Boolean if each sequence present or absent in 89 -- each segment 90 lens SEQUENCE OF INTEGER , -- length of each segment 91 strands SEQUENCE OF Na-strand OPTIONAL , 92 scores SEQUENCE OF Score OPTIONAL } -- score for each segment 93 94Std-seg ::= SEQUENCE { 95 dim INTEGER DEFAULT 2 , -- dimensionality 96 ids SEQUENCE OF Seq-id OPTIONAL , 97 loc SEQUENCE OF Seq-loc , 98 scores SET OF Score OPTIONAL } 99 100 101Spliced-seg ::= SEQUENCE { 102 -- product is either protein or transcript (cDNA) 103 product-id Seq-id OPTIONAL, 104 genomic-id Seq-id OPTIONAL, 105 106 -- should be 'plus' or 'minus' 107 product-strand Na-strand OPTIONAL , 108 genomic-strand Na-strand OPTIONAL , 109 110 product-type ENUMERATED { 111 transcript(0), 112 protein(1) 113 }, 114 115 -- set of segments involved 116 -- each segment corresponds to one exon 117 -- exons are always in biological order 118 exons SEQUENCE OF Spliced-exon , 119 120 -- start of poly(A) tail on the transcript 121 -- For sense transcripts: 122 -- aligned product positions < poly-a <= product-length 123 -- poly-a == product-length indicates inferred poly(A) tail at transcript's end 124 -- For antisense transcripts: 125 -- -1 <= poly-a < aligned product positions 126 -- poly-a == -1 indicates inferred poly(A) tail at transcript's start 127 poly-a INTEGER OPTIONAL, 128 129 -- length of the product, in bases/residues 130 -- from this (or from poly-a if present), a 3' unaligned length can be extracted 131 product-length INTEGER OPTIONAL, 132 133 -- alignment descriptors / modifiers 134 -- this provides us a set for extension 135 modifiers SET OF Spliced-seg-modifier OPTIONAL 136} 137 138Spliced-seg-modifier ::= CHOICE { 139 -- protein aligns from the start and the first codon 140 -- on both product and genomic is start codon 141 start-codon-found BOOLEAN, 142 143 -- protein aligns to it's end and there is stop codon 144 -- on the genomic right after the alignment 145 stop-codon-found BOOLEAN 146} 147 148 149-- complete or partial exon 150-- two consecutive Spliced-exons may belong to one exon 151Spliced-exon ::= SEQUENCE { 152 -- product-end >= product-start 153 product-start Product-pos , 154 product-end Product-pos , 155 156 -- genomic-end >= genomic-start 157 genomic-start INTEGER , 158 genomic-end INTEGER , 159 160 -- product is either protein or transcript (cDNA) 161 product-id Seq-id OPTIONAL , 162 genomic-id Seq-id OPTIONAL , 163 164 -- should be 'plus' or 'minus' 165 product-strand Na-strand OPTIONAL , 166 167 -- genomic-strand represents the strand of translation 168 genomic-strand Na-strand OPTIONAL , 169 170 -- basic seqments always are in biologic order 171 parts SEQUENCE OF Spliced-exon-chunk OPTIONAL , 172 173 -- scores for this exon 174 scores Score-set OPTIONAL , 175 176 -- splice sites 177 acceptor-before-exon Splice-site OPTIONAL, 178 donor-after-exon Splice-site OPTIONAL, 179 180 -- flag: is this exon complete or partial? 181 partial BOOLEAN OPTIONAL, 182 183 --extra info 184 ext SEQUENCE OF User-object OPTIONAL 185} 186 187 188Product-pos ::= CHOICE { 189 nucpos INTEGER, 190 protpos Prot-pos 191} 192 193 194-- position on protein (1/3 of amino-acid resolution) 195Prot-pos ::= SEQUENCE { 196 -- amino-acid position (0-based) 197 amin INTEGER , 198 199 -- position within codon (1-based) 200 -- 0 = not set (meaning 1) 201 frame INTEGER DEFAULT 0 202} 203 204 205-- Spliced-exon-chunk: piece of an exon 206-- lengths are given in nucleotide bases (1/3 of aminoacid when product is a 207-- protein) 208Spliced-exon-chunk ::= CHOICE { 209 -- both sequences represented, product and genomic sequences match 210 match INTEGER , 211 212 -- both sequences represented, product and genomic sequences do not match 213 mismatch INTEGER , 214 215 -- both sequences are represented, there is sufficient similarity 216 -- between product and genomic sequences. Can be used to replace stretches 217 -- of matches and mismatches, mostly for protein to genomic where 218 -- definition of match or mismatch depends on translation table 219 diag INTEGER , 220 221 -- insertion in product sequence (i.e. gap in the genomic sequence) 222 product-ins INTEGER , 223 224 -- insertion in genomic sequence (i.e. gap in the product sequence) 225 genomic-ins INTEGER 226} 227 228 229-- site involved in splice 230Splice-site ::= SEQUENCE { 231 -- typically two bases in the intronic region, always 232 -- in IUPAC format 233 bases VisibleString 234} 235 236 237-- ========================================================================== 238-- 239-- Sparse-seg follows the semantics of dense-seg and is more optimal for 240-- representing sparse multiple alignments 241-- 242-- ========================================================================== 243 244 245Sparse-seg ::= SEQUENCE { 246 master-id Seq-id OPTIONAL, 247 248 -- pairwise alignments constituting this multiple alignment 249 rows SET OF Sparse-align, 250 251 -- per-row scores 252 row-scores SET OF Score OPTIONAL, 253 254 -- index of extra items 255 ext SET OF Sparse-seg-ext OPTIONAL 256} 257 258Sparse-align ::= SEQUENCE { 259 first-id Seq-id, 260 second-id Seq-id, 261 262 numseg INTEGER, --number of segments 263 first-starts SEQUENCE OF INTEGER , --starts on the first sequence [numseg] 264 second-starts SEQUENCE OF INTEGER , --starts on the second sequence [numseg] 265 lens SEQUENCE OF INTEGER , --lengths of segments [numseg] 266 second-strands SEQUENCE OF Na-strand OPTIONAL , 267 268 -- per-segment scores 269 seg-scores SET OF Score OPTIONAL 270} 271 272Sparse-seg-ext ::= SEQUENCE { 273 --seg-ext SET OF { 274 -- index INTEGER, 275 -- data User-field 276 -- } 277 index INTEGER 278} 279 280 281 282-- use of Score is discouraged for external ASN.1 specifications 283Score ::= SEQUENCE { 284 id Object-id OPTIONAL , 285 value CHOICE { 286 real REAL , 287 int INTEGER 288 } 289} 290 291-- use of Score-set is encouraged for external ASN.1 specifications 292Score-set ::= SET OF Score 293 294END 295 296