1 #include <ncbi_pch.hpp>
2 #include <objtools/readers/hgvs/hgvs_nucleic_acid_parser.hpp>
3 #include <boost/spirit/include/phoenix.hpp>
4 #include <objtools/readers/hgvs/semantic_actions.hpp>
5 
6 
7 using boost::spirit::qi::_1;
8 using boost::spirit::qi::_2;
9 using boost::spirit::qi::_3;
10 using boost::spirit::qi::_val;
11 using boost::phoenix::bind;
12 
13 
14 BEGIN_NCBI_SCOPE
15 USING_SCOPE(objects);
16 
17 
SHgvsNucleicAcidGrammar(const SHgvsLexer & tok)18 SHgvsNucleicAcidGrammar::SHgvsNucleicAcidGrammar(const SHgvsLexer& tok) :
19     SHgvsNucleicAcidGrammar::base_type(simple_dna_variation)
20 
21 {
22     simple_dna_variation = fuzzy_simple_variation | confirmed_simple_variation;
23 
24     fuzzy_simple_variation = ("(" >> confirmed_simple_variation >> ")") ACTION1(AssignFuzzyLocalVariation);
25 
26     confirmed_simple_variation = sub |
27                                  dup |
28                                  delins |
29                                  del |
30                                  ins |
31                                  inv |
32                                  conv |
33                                  ssr |
34                                  identity;
35 
36     sub = (nucleotide_site >> nucleotide_seq >> ">" >> nucleotide_seq) ACTION3(AssignNtSub);
37 
38     dup = (nucleotide_location >> tok.dup >> nucleotide_seq) ACTION2(AssignNtDup) | // Need to change this to allow remote sequences
39           (nucleotide_location >> tok.dup) ACTION1(AssignNtDup);
40 
41     delins = (nucleotide_location >> tok.del >> nucleotide_seq >> tok.ins >> nucleotide_seq) ACTION3(AssignNtDelins) |
42              (nucleotide_location >> tok.delins >> nucleotide_seq) ACTION2(AssignNtDelins);
43 
44     del = (nucleotide_location >> tok.del >> nucleotide_seq) ACTION2(AssignNtDeletion) |
45           (nucleotide_location >> tok.del) ACTION1(AssignNtDeletion);
46 
47     ins = (nucleotide_site_interval >> tok.ins >> nucleotide_seq) ACTION2(AssignNtInsertion); // Need to change this to allow remote sequences
48 
49     conv = (nucleotide_site_interval >> tok.con >> remote_nucleotide_interval) ACTION2(AssignNtConversion);
50 
51     inv = (nucleotide_site_interval >> tok.inv >> nucleotide_seq) ACTION2(AssignNtInv) |
52           (nucleotide_site_interval >> tok.inv >> tok.pos_int) ACTION2(AssignNtInvSize) | // Assign interval with size
53           (nucleotide_site_interval >> tok.inv) ACTION1(AssignNtInv);
54 
55 
56     ssr = (nucleotide_location >> "[" >> count >> "]") ACTION2(AssignNtSSR) |
57           (nucleotide_location >> nucleotide_seq >> "[" >> count >> "]") ACTION3(AssignNtSSR) |
58           (nucleotide_location >> count_range ) ACTION2(AssignNtSSR) |
59           (nucleotide_location >> nucleotide_seq >> count_range) ACTION3(AssignNtSSR) |
60           (nucleotide_location >> fuzzy_count) ACTION2(AssignNtSSR) |
61           (nucleotide_location >> nucleotide_seq >> fuzzy_count) ACTION3(AssignNtSSR);
62 
63     identity = (nucleotide_site >> nucleotide >> tok.nochange) ACTION2(AssignNtIdentity) |
64                (nucleotide_site >> tok.nochange) ACTION1(AssignNtIdentity);
65 
66     remote_nucleotide_interval = (tok.identifier >> tok.na_tag >> nucleotide_site_interval) ACTION3(AssignNtRemoteLocation);
67 
68     nucleotide_location = nucleotide_site_interval |
69                           nucleotide_site;
70 
71     nucleotide_site_interval = (nucleotide_site >> "_" >> nucleotide_site) ACTION2(AssignNtInterval);
72 
73     nucleotide_site =  nucleotide_site_range VALASSIGN |
74                        nucleotide_single_site ACTION1(AssignNtSite) |
75                        nucleotide_uncertain_site ACTION1(AssignNtSite);
76 
77     nucleotide_site_range = ("(" >> nucleotide_single_site >> "_" >> nucleotide_single_site >> ")")
78                             ACTION2(AssignNtSiteRange);
79 
80     nucleotide_uncertain_site = ("(" >> nucleotide_single_site >> ")") ACTION1(AssignFuzzyNtSite);
81 
82     nucleotide_single_site = pretranslation_site  |
83                              posttranslation_site |
84                              intron_site |
85                              simple_site;
86 
87     pretranslation_site = "-" >> intron_site ACTION1(Assign5primeUTRSite) |
88                           "-" >> simple_site ACTION1(Assign5primeUTRSite);
89 
90     posttranslation_site = tok.stop >> intron_site ACTION1(Assign3primeUTRSite) |
91                            tok.stop >> simple_site ACTION1(Assign3primeUTRSite);
92 
93     intron_site = (tok.pos_int >> intron_offset) ACTION2(AssignIntronSite);
94 
95     intron_offset = "+" >> offset_length [_val = "+" + _1] |
96                     "-" >> offset_length [_val = "-" + _1];
97 
98     offset_length = tok.pos_int | tok.fuzzy_pos_int | tok.unknown_val;
99 
100     simple_site = tok.pos_int ACTION1(AssignSimpleNtSite) |
101                   tok.fuzzy_pos_int ACTION1(AssignFuzzySimpleNtSite) |
102                   tok.unknown_val ACTION1(AssignSimpleNtSite);
103 
104     nucleotide_seq = +nucleotide [_val += _1];
105 
106     nucleotide = tok.acgu | tok.ACGT;
107 
108     count = val_or_unknown ACTION1(AssignCount) |
109             count_range VALASSIGN;
110 
111     fuzzy_count = tok.fuzzy_pos_int ACTION1(AssignFuzzyCount);
112 
113     count_range = ("(" >> val_or_unknown >> "_" >> val_or_unknown >> ")") ACTION2(AssignCountRange);
114 
115     val_or_unknown = nn_int | tok.unknown_val;
116 
117     nn_int = tok.zero [_val = "0"] |
118              tok.pos_int VALASSIGN;
119 }
120 
121 END_NCBI_SCOPE
122