1/*===========================================================================
2*
3*                            PUBLIC DOMAIN NOTICE
4*               National Center for Biotechnology Information
5*
6*  This software/database is a "United States Government Work" under the
7*  terms of the United States Copyright Act.  It was written as part of
8*  the author's official duties as a United States Government employee and
9*  thus cannot be copyrighted.  This software/database is freely available
10*  to the public for use. The National Library of Medicine and the U.S.
11*  Government have not placed any restriction on its use or reproduction.
12*
13*  Although all reasonable efforts have been taken to ensure the accuracy
14*  and reliability of the software and data, the NLM and the U.S.
15*  Government do not and cannot warrant the performance or results that
16*  may be obtained by using this software or data. The NLM and the U.S.
17*  Government disclaim all warranties, express or implied, including
18*  warranties of performance, merchantability or fitness for any particular
19*  purpose.
20*
21*  Please cite the author in any work or product based on this material.
22*
23* ===========================================================================
24*
25*/
26
27/*==========================================================================
28 * Sequence schema
29 */
30version 1;
31
32include 'vdb/vdb.vschema';
33include 'insdc/insdc.vschema';
34
35
36/*--------------------------------------------------------------------------
37 * rand_4na_2na
38 *  converts 4na to 2na
39 *
40 *  substitutes a random base for ambiguities
41 *  from the bases allowed in the 4na.
42 *
43 *       A | C | G | T
44 *    =================
45 *    N    |   |   |     # any base may be substituted
46 *    A  * |   |   |     # always A
47 *    C    | * |   |     # always C
48 *    M  * | * |   |     # A or C
49 *    G    |   | * |     # always G
50 *    R  * |   | * |     # A or G
51 *    S    | * | * |     # C or G
52 *    V  * | * | * |     # A, C or G
53 *    T    |   |   | *   # always T
54 *    W  * |   |   | *   # A or T
55 *    Y    | * |   | *   # C or T
56 *    H  * | * |   | *   # A, C or T
57 *    K    |   | * | *   # G or T
58 *    D  * |   | * | *   # A, G or T
59 *    B    | * | * | *   # C, G or T
60 *    N  * | * | * | *   # any base may be substituted
61 */
62extern function
63    INSDC:2na:bin INSDC:SEQ:rand_4na_2na #1 ( INSDC:4na:bin rd_bin );
64
65
66/*--------------------------------------------------------------------------
67 * sequence
68 *  basic sequence table
69 *
70 * history:
71 *  1.0.1 - introduced text-mode QUALITY columns
72 */
73table INSDC:tbl:sequence #1.0.1
74{
75    /* READ
76     *  native or converted DNA sequence
77     */
78
79    // default is IUPAC character representation
80    extern default column INSDC:dna:text READ
81    {
82        read = out_dna_text;
83        validate = < INSDC:dna:text > compare ( in_dna_text, out_dna_text );
84    }
85
86    // 4na representation - unpacked and packed
87    extern column INSDC:4na:bin READ = out_4na_bin;
88    extern column INSDC:4na:packed READ = out_4na_packed;
89
90    // x2na representation - 2na with ambiguity
91    extern column INSDC:x2na:bin READ = out_x2na_bin;
92
93    // 2na representation - 2na with no ambiguity - unpacked and packed
94    extern column INSDC:2na:bin READ = out_2na_bin;
95    extern column INSDC:2na:packed READ = out_2na_packed;
96
97
98    /* CSREAD
99     *  native or converted color-space sequence
100     */
101
102    // default is ASCII character representation
103    extern default column INSDC:color:text CSREAD
104    {
105        read = out_color_text;
106        validate = < INSDC:color:text > compare ( in_color_text, out_color_text );
107    }
108
109    // x2cs representation - 2cs with ambiguity
110    extern column INSDC:x2cs:bin CSREAD = out_x2cs_bin;
111
112    // 2cs representation - 2cs with no ambiguity - unpacked and packed
113    extern column INSDC:2cs:bin CSREAD = out_2cs_bin;
114    extern column INSDC:2cs:packed CSREAD = out_2cs_packed;
115
116    /* CS_NATIVE
117     *  is color-space the native sequence space
118     */
119    readonly column bool CS_NATIVE = cs_native;
120
121    /* CS_KEY
122     *  leading call given in base-space
123     */
124    extern column INSDC:dna:text CS_KEY
125    {
126        read = out_cs_key;
127        validate = < INSDC:dna:text > compare ( in_cs_key, out_cs_key );
128    }
129
130    /* COLOR_MATRIX
131     *  matrix used for color-space conversions
132     */
133    extern column U8 COLOR_MATRIX = out_color_matrix;
134
135
136    /* QUALITY
137     *  base or color call qualities
138     */
139
140    // PHRED is default
141    extern default column INSDC:quality:phred QUALITY
142    {
143        read = out_qual_phred;
144        validate = < INSDC:quality:phred > compare ( in_qual_phred, phys_qual_phred );
145    }
146
147    // textual encodings
148    extern column INSDC:quality:text:phred_33 QUALITY
149        = out_qual_text_phred_33
150        | ( INSDC:quality:text:phred_33 ) < B8 > sum < 33 > ( out_qual_phred )
151        ;
152    extern column INSDC:quality:text:phred_64 QUALITY
153        = out_qual_text_phred_64
154        | ( INSDC:quality:text:phred_64 ) < B8 > sum < 64 > ( out_qual_phred )
155        ;
156
157
158    /* SIGNAL
159     *  signal and intensity information is unspecified
160     */
161    INSDC:coord:len signal_len
162        = ( INSDC:coord:len ) row_len ( out_signal )
163        | < INSDC:coord:len > echo < 0 > ()
164        ;
165
166
167	/* INSDC:tbl:sequence virtual productions
168	 *  cs_native
169	 *  in_cs_key
170	 *  out_cs_key
171	 *  out_signal
172	 *  in_dna_text
173	 *  out_2cs_bin
174	 *  out_2na_bin
175	 *  out_4na_bin
176	 *  out_dna_text
177	 *  out_x2cs_bin
178	 *  out_x2na_bin
179	 *  in_color_text
180	 *  out_2cs_packed
181	 *  out_2na_packed
182	 *  out_4na_packed
183	 *  out_color_text
184	 *  out_qual_phred
185	 *  out_color_matrix
186	 */
187};
188
189
190/*--------------------------------------------------------------------------
191 * protein
192 *  basic protein sequence table
193 */
194table INSDC:tbl:protein #1
195{
196    /* PROTEIN
197     *  native or converted protein sequence
198     */
199
200    // default is IUPAC character representation
201    extern default column INSDC:protein:text PROTEIN
202    {
203        read = out_protein_text;
204        validate = < INSDC:protein:text > compare ( in_protein_text, out_protein_text );
205    }
206
207    // aa representation
208    extern column INSDC:aa:bin PROTEIN = out_aa_bin;
209
210
211	/* INSDC:tbl:protein productions
212	 *  out_aa_bin
213	 *  in_protein_text
214	 *  out_protein_text
215	 */
216};
217