1/*=========================================================================== 2* 3* PUBLIC DOMAIN NOTICE 4* National Center for Biotechnology Information 5* 6* This software/database is a "United States Government Work" under the 7* terms of the United States Copyright Act. It was written as part of 8* the author's official duties as a United States Government employee and 9* thus cannot be copyrighted. This software/database is freely available 10* to the public for use. The National Library of Medicine and the U.S. 11* Government have not placed any restriction on its use or reproduction. 12* 13* Although all reasonable efforts have been taken to ensure the accuracy 14* and reliability of the software and data, the NLM and the U.S. 15* Government do not and cannot warrant the performance or results that 16* may be obtained by using this software or data. The NLM and the U.S. 17* Government disclaim all warranties, express or implied, including 18* warranties of performance, merchantability or fitness for any particular 19* purpose. 20* 21* Please cite the author in any work or product based on this material. 22* 23* =========================================================================== 24* 25*/ 26 27/*========================================================================== 28 * Sequence schema 29 */ 30version 1; 31 32include 'vdb/vdb.vschema'; 33include 'insdc/insdc.vschema'; 34 35 36/*-------------------------------------------------------------------------- 37 * rand_4na_2na 38 * converts 4na to 2na 39 * 40 * substitutes a random base for ambiguities 41 * from the bases allowed in the 4na. 42 * 43 * A | C | G | T 44 * ================= 45 * N | | | # any base may be substituted 46 * A * | | | # always A 47 * C | * | | # always C 48 * M * | * | | # A or C 49 * G | | * | # always G 50 * R * | | * | # A or G 51 * S | * | * | # C or G 52 * V * | * | * | # A, C or G 53 * T | | | * # always T 54 * W * | | | * # A or T 55 * Y | * | | * # C or T 56 * H * | * | | * # A, C or T 57 * K | | * | * # G or T 58 * D * | | * | * # A, G or T 59 * B | * | * | * # C, G or T 60 * N * | * | * | * # any base may be substituted 61 */ 62extern function 63 INSDC:2na:bin INSDC:SEQ:rand_4na_2na #1 ( INSDC:4na:bin rd_bin ); 64 65 66/*-------------------------------------------------------------------------- 67 * sequence 68 * basic sequence table 69 * 70 * history: 71 * 1.0.1 - introduced text-mode QUALITY columns 72 */ 73table INSDC:tbl:sequence #1.0.1 74{ 75 /* READ 76 * native or converted DNA sequence 77 */ 78 79 // default is IUPAC character representation 80 extern default column INSDC:dna:text READ 81 { 82 read = out_dna_text; 83 validate = < INSDC:dna:text > compare ( in_dna_text, out_dna_text ); 84 } 85 86 // 4na representation - unpacked and packed 87 extern column INSDC:4na:bin READ = out_4na_bin; 88 extern column INSDC:4na:packed READ = out_4na_packed; 89 90 // x2na representation - 2na with ambiguity 91 extern column INSDC:x2na:bin READ = out_x2na_bin; 92 93 // 2na representation - 2na with no ambiguity - unpacked and packed 94 extern column INSDC:2na:bin READ = out_2na_bin; 95 extern column INSDC:2na:packed READ = out_2na_packed; 96 97 98 /* CSREAD 99 * native or converted color-space sequence 100 */ 101 102 // default is ASCII character representation 103 extern default column INSDC:color:text CSREAD 104 { 105 read = out_color_text; 106 validate = < INSDC:color:text > compare ( in_color_text, out_color_text ); 107 } 108 109 // x2cs representation - 2cs with ambiguity 110 extern column INSDC:x2cs:bin CSREAD = out_x2cs_bin; 111 112 // 2cs representation - 2cs with no ambiguity - unpacked and packed 113 extern column INSDC:2cs:bin CSREAD = out_2cs_bin; 114 extern column INSDC:2cs:packed CSREAD = out_2cs_packed; 115 116 /* CS_NATIVE 117 * is color-space the native sequence space 118 */ 119 readonly column bool CS_NATIVE = cs_native; 120 121 /* CS_KEY 122 * leading call given in base-space 123 */ 124 extern column INSDC:dna:text CS_KEY 125 { 126 read = out_cs_key; 127 validate = < INSDC:dna:text > compare ( in_cs_key, out_cs_key ); 128 } 129 130 /* COLOR_MATRIX 131 * matrix used for color-space conversions 132 */ 133 extern column U8 COLOR_MATRIX = out_color_matrix; 134 135 136 /* QUALITY 137 * base or color call qualities 138 */ 139 140 // PHRED is default 141 extern default column INSDC:quality:phred QUALITY 142 { 143 read = out_qual_phred; 144 validate = < INSDC:quality:phred > compare ( in_qual_phred, phys_qual_phred ); 145 } 146 147 // textual encodings 148 extern column INSDC:quality:text:phred_33 QUALITY 149 = out_qual_text_phred_33 150 | ( INSDC:quality:text:phred_33 ) < B8 > sum < 33 > ( out_qual_phred ) 151 ; 152 extern column INSDC:quality:text:phred_64 QUALITY 153 = out_qual_text_phred_64 154 | ( INSDC:quality:text:phred_64 ) < B8 > sum < 64 > ( out_qual_phred ) 155 ; 156 157 158 /* SIGNAL 159 * signal and intensity information is unspecified 160 */ 161 INSDC:coord:len signal_len 162 = ( INSDC:coord:len ) row_len ( out_signal ) 163 | < INSDC:coord:len > echo < 0 > () 164 ; 165 166 167 /* INSDC:tbl:sequence virtual productions 168 * cs_native 169 * in_cs_key 170 * out_cs_key 171 * out_signal 172 * in_dna_text 173 * out_2cs_bin 174 * out_2na_bin 175 * out_4na_bin 176 * out_dna_text 177 * out_x2cs_bin 178 * out_x2na_bin 179 * in_color_text 180 * out_2cs_packed 181 * out_2na_packed 182 * out_4na_packed 183 * out_color_text 184 * out_qual_phred 185 * out_color_matrix 186 */ 187}; 188 189 190/*-------------------------------------------------------------------------- 191 * protein 192 * basic protein sequence table 193 */ 194table INSDC:tbl:protein #1 195{ 196 /* PROTEIN 197 * native or converted protein sequence 198 */ 199 200 // default is IUPAC character representation 201 extern default column INSDC:protein:text PROTEIN 202 { 203 read = out_protein_text; 204 validate = < INSDC:protein:text > compare ( in_protein_text, out_protein_text ); 205 } 206 207 // aa representation 208 extern column INSDC:aa:bin PROTEIN = out_aa_bin; 209 210 211 /* INSDC:tbl:protein productions 212 * out_aa_bin 213 * in_protein_text 214 * out_protein_text 215 */ 216}; 217