1 //  Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2 //  All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 //       notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 //       copyright notice, this list of conditions and the following
12 //       disclaimer in the documentation and/or other materials provided
13 //       with the distribution.
14 //     * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 //       nor the names of its contributors may be used to endorse or promote
16 //       products derived from this software without specific prior written
17 //       permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 
32 #include "Filters.h"
33 #include "FilterCatalog.h"
34 
35 namespace RDKit {
36 
37 /////////////////////////////////////////////////////////////////////////////////////////
38 // BRENK data
39 // # Reference: Brenk R et al. Lessons Learnt from Assembling Screening
40 // Libraries for Drug Discovery for Neglected Diseases. ChemMedChem 3 (2008)
41 // 435-444. doi:10.1002/cmdc.200700139.
42 // # Scope: unwanted functionality due to potential tox reasons or unfavourable
43 // pharmacokinetic properties
44 //
45 
46 const FilterData_t BRENK[] = {
47     {">_2_ester_groups", "C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]", 0, ""},
48     {"2-halo_pyridine", "n1c([F,Cl,Br,I])cccc1", 0, ""},
49     {"acid_halide", "C(=O)[Cl,Br,I,F]", 0, ""},
50     {"acyclic_C=C-O", "C=[C!r]O", 0, ""},
51     {"acyl_cyanide", "N#CC(=O)", 0, ""},
52     {"acyl_hydrazine", "C(=O)N[NH2]", 0, ""},
53     {"aldehyde", "[CH1](=O)", 0, ""},
54     {"Aliphatic_long_chain", "[R0;D2][R0;D2][R0;D2][R0;D2]", 0, ""},
55     {"alkyl_halide", "[CX4][Cl,Br,I]", 0, ""},
56     {"amidotetrazole", "c1nnnn1C=O", 0, ""},
57     {"aniline", "c1cc([NH2])ccc1", 0, ""},
58     {"azepane", "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1", 0, ""},
59     {"Azido_group", "N=[N+]=[N-]", 0, ""},
60     {"Azo_group", "N#N", 0, ""},
61     {"azocane", "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1", 0, ""},
62     {"benzidine",
63      "[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2](["
64      "Nv3X3,Nv4X4])[cR2][cR2]2",
65      0, ""},
66     {"beta-keto/anhydride", "[C,c](=O)[CX4,CR0X3,O][C,c](=O)", 0, ""},
67     {"biotin_analogue", "C12C(NC(N1)=O)CSC2", 0, ""},
68     {"Carbo_cation/anion", "[C+,c+,C-,c-]", 0, ""},
69     {"catechol", "c1c([OH])c([OH,NH2,NH])ccc1", 0, ""},
70     {"charged_oxygen_or_sulfur_atoms", "[O+,o+,S+,s+]", 0, ""},
71     {"chinone_1", "C1(=[O,N])C=CC(=[O,N])C=C1", 0, ""},
72     {"chinone_2", "C1(=[O,N])C(=[O,N])C=CC=C1", 0, ""},
73     {"conjugated_nitrile_group", "C=[C!r]C#N", 0, ""},
74     {"crown_ether", "[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]", 0,
75      ""},
76     {"cumarine", "c1ccc2c(c1)ccc(=O)o2", 0, ""},
77     {"cyanamide", "N[CH2]C#N", 0, ""},
78     {"cyanate_/aminonitrile_/thiocyanate", "[N,O,S]C#N", 0, ""},
79     {"cyanohydrins", "N#CC[OH]", 0, ""},
80     {"cycloheptane_1", "[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2]1", 0, ""},
81     {"cycloheptane_2", "[CR2]1[CR2][CR2]cc[CR2][CR2]1", 0, ""},
82     {"cyclooctane_1", "[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1", 0, ""},
83     {"cyclooctane_2", "[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1", 0, ""},
84     {"diaminobenzene_1",
85      "[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1", 0, ""},
86     {"diaminobenzene_2",
87      "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1", 0, ""},
88     {"diaminobenzene_3",
89      "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])", 0, ""},
90     {"diazo_group", "[N!R]=[N!R]", 0, ""},
91     {"diketo_group", "[C,c](=O)[C,c](=O)", 0, ""},
92     {"disulphide", "SS", 0, ""},
93     {"enamine", "[CX2R0][NX3R0]", 0, ""},
94     {"ester_of_HOBT", "C(=O)Onnn", 0, ""},
95     {"four_member_lactones", "C1(=O)OCC1", 0, ""},
96     {"halogenated_ring_1", "c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]", 0,
97      ""},
98     {"halogenated_ring_2", "c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]", 0,
99      ""},
100     {"heavy_metal", "[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si]", 0, ""},
101     {"het-C-het_not_in_ring",
102      "[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]", 0, ""},
103     {"hydantoin", "C1NC(=O)NC(=O)1", 0, ""},
104     {"hydrazine", "N[NH2]", 0, ""},
105     {"hydroquinone", "[OH]c1ccc([OH,NH2,NH])cc1", 0, ""},
106     {"hydroxamic_acid", "C(=O)N[OH]", 0, ""},
107     {"imine_1", "C=[N!R]", 0, ""},
108     {"imine_2", "N=[CR0][N,n,O,S]", 0, ""},
109     {"iodine", "I", 0, ""},
110     {"isocyanate", "N=C=O", 0, ""},
111     {"isolated_alkene",
112      "[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4]"
113      ")[CX4])]",
114      0, ""},
115     {"ketene", "C=C=O", 0, ""},
116     {"methylidene-1,3-dithiole", "S1C=CSC1=S", 0, ""},
117     {"Michael_acceptor_1", "C=!@CC=[O,S]", 0, ""},
118     {"Michael_acceptor_2", "[$([CH]),$(CC)]#CC(=O)[C,c]", 0, ""},
119     {"Michael_acceptor_3", "[$([CH]),$(CC)]#CS(=O)(=O)[C,c]", 0, ""},
120     {"Michael_acceptor_4", "C=C(C=O)C=O", 0, ""},
121     {"Michael_acceptor_5", "[$([CH]),$(CC)]#CC(=O)O[C,c]", 0, ""},
122     {"N_oxide", "[NX2,nX3][OX1]", 0, ""},
123     {"N-acyl-2-amino-5-mercapto-1,3,4-_thiadiazole", "s1c(S)nnc1NC=O", 0, ""},
124     {"N-C-halo", "NC[F,Cl,Br,I]", 0, ""},
125     {"N-halo", "[NX3,NX4][F,Cl,Br,I]", 0, ""},
126     {"N-hydroxyl_pyridine", "n[OH]", 0, ""},
127     {"nitro_group", "[N+](=O)[O-]", 0, ""},
128     {"N-nitroso", "[#7]-N=O", 0, ""},
129     {"oxime_1", "[C,c]=N[OH]", 0, ""},
130     {"oxime_2", "[C,c]=NOC=O", 0, ""},
131     {"Oxygen-nitrogen_single_bond", "[OR0,NR0][OR0,NR0]", 0, ""},
132     {"Perfluorinated_chain", "[CX4](F)(F)[CX4](F)F", 0, ""},
133     {"peroxide", "OO", 0, ""},
134     {"phenol_ester", "c1ccccc1OC(=O)[#6]", 0, ""},
135     {"phenyl_carbonate", "c1ccccc1OC(=O)O", 0, ""},
136     {"phosphor", "P", 0, ""},
137     {"phthalimide", "[cR,CR]~C(=O)NC(=O)~[cR,CR]", 0, ""},
138     {"Polycyclic_aromatic_hydrocarbon_1", "a1aa2a3a(a1)A=AA=A3=AA=A2", 0, ""},
139     {"Polycyclic_aromatic_hydrocarbon_2", "a21aa3a(aa1aaaa2)aaaa3", 0, ""},
140     {"Polycyclic_aromatic_hydrocarbon_3", "a31a(a2a(aa1)aaaa2)aaaa3", 0, ""},
141     {"polyene", "[CR0]=[CR0][CR0]=[CR0]", 0, ""},
142     {"quaternary_nitrogen_1",
143      "[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]", 0, ""},
144     {"quaternary_nitrogen_2",
145      "[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,"
146      "C,n,N,o,O]",
147      0, ""},
148     {"quaternary_nitrogen_3", "[*]=[N+]=[*]", 0, ""},
149     {"saponine_derivative", "O1CCCCC1OC2CCC3CCCCC3C2", 0, ""},
150     {"silicon_halogen", "[Si][F,Cl,Br,I]", 0, ""},
151     {"stilbene", "c1ccccc1C=Cc2ccccc2", 0, ""},
152     {"sulfinic_acid", "[SX3](=O)[O-,OH]", 0, ""},
153     {"Sulfonic_acid_1", "[C,c]S(=O)(=O)O[C,c]", 0, ""},
154     {"Sulfonic_acid_2", "S(=O)(=O)[O-,OH]", 0, ""},
155     {"sulfonyl_cyanide", "S(=O)(=O)C#N", 0, ""},
156     {"sulfur_oxygen_single_bond", "[SX2]O", 0, ""},
157     {"sulphate", "OS(=O)(=O)[O-]", 0, ""},
158     {"sulphur_nitrogen_single_bond", "[SX2H0][N]", 0, ""},
159     {"Thiobenzothiazole_1", "c12ccccc1(SC(S)=N2)", 0, ""},
160     {"thiobenzothiazole_2", "c12ccccc1(SC(=S)N2)", 0, ""},
161     {"Thiocarbonyl_group", "[C,c]=S", 0, ""},
162     {"thioester", "SC=O", 0, ""},
163     {"thiol_1", "[S-]", 0, ""},
164     {"thiol_2", "[SH]", 0, ""},
165     {"Three-membered_heterocycle", "*1[O,S,N]*1", 0, ""},
166     {"triflate", "OS(=O)(=O)C(F)(F)F", 0, ""},
167     {"triphenyl_methyl-silyl", "[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)", 0,
168      ""},
169     {"triple_bond", "C#C", 0, ""}};
170 const unsigned int NUM_BRENK =
171     static_cast<unsigned int>(sizeof(BRENK) / sizeof(FilterData_t));
172 
173 const FilterProperty_t BRENK_PROPS[] = {
174     {"FilterSet", "Brenk"},
175     {"Reference",
176      "Brenk R et al. Lessons Learnt from Assembling Screening Libraries for "
177      "Drug Discovery for Neglected Diseases. ChemMedChem 3 (2008) 435-444. "
178      "doi:10.1002/cmdc.200700139."},
179     {"Scope",
180      "unwanted functionality due to potential tox reasons or unfavourable "
181      "pharmacokinetic properties"}};
182 const unsigned int NUM_BRENK_PROPS =
183     static_cast<unsigned int>(sizeof(BRENK_PROPS) / sizeof(FilterProperty_t));
184 
185 /////////////////////////////////////////////////////////////////////////////////////////
186 // NIH data
187 // # Scope: annotate compounds with problematic functional groups
188 // # Reference: Doveston R, et al. A Unified Lead-oriented Synthesis of over
189 // Fifty Molecular Scaffolds. Org Biomol Chem 13 (2014) 859D65.
190 // doi:10.1039/C4OB02287D.
191 // # Reference: Jadhav A, et al. Quantitative Analyses of Aggregation,
192 // Autofluorescence, and Reactivity Artifacts in a Screen for Inhibitors of a
193 // Thiol Protease. J Med Chem 53 (2009) 37D51. doi:10.1021/jm901070c.
194 //
195 
196 const FilterData_t NIH[] = {
197     {"2halo_pyrazine_3EWG",
198      "[#7;R1]1[#6]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)("
199      "=O)),$([N+](=O)[O-]),$(C=O)])[#7][#6][#6]1",
200      0, ""},
201     {"2halo_pyrazine_5EWG",
202      "[#7;R1]1[#6]([F,Cl,Br,I])[#6;!$(c-N)][#7][#6]([$(S(=O)(=O)),$(C(F)(F)(F))"
203      ",$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6;!$(c-N)]1",
204      0, ""},
205     {"2halo_pyridazine_3EWG",
206      "[#7;R1]1[#6]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)("
207      "=O)),$([N+](=O)[O-]),$(C=O)])[#6][#6][#7]1",
208      0, ""},
209     {"2halo_pyridazine_5EWG",
210      "[#7;R1]1[#6]([F,Cl,Br,I])[#6][#6][#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),"
211      "$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#7]1",
212      0, ""},
213     {"2halo_pyridine_3EWG",
214      "[#7;R1]1[#6;!$(c=O)]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),"
215      "$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6;!$(c-N)][#6][#6;!$(c-N)]1",
216      0, ""},
217     {"2halo_pyridine_5EWG",
218      "[#7;R1]1[#6;!$(c=O)]([F,Cl,Br,I])[#6][#6;!$(c-N)][#6]([$(S(=O)(=O)),$(C("
219      "F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6;!$(c=O);!$(c-N)"
220      "]1",
221      0, ""},
222     {"2halo_pyrimidine_5EWG",
223      "[#7;R1]1[#6]([F,Cl,Br,I])[#7][#6][#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),"
224      "$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6]1",
225      0, ""},
226     {"3halo_pyridazine_2EWG",
227      "[#7;R1]1[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)["
228      "O-]),$(C=O)])[#6]([F,Cl,Br,I])[#6][#6][#7]1",
229      0, ""},
230     {"3halo_pyridazine_4EWG",
231      "[#7;R1]1[#6][#6]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N("
232      "=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6][#7]1",
233      0, ""},
234     {"4_pyridone_3_5_EWG",
235      "[#7,#8,#16]1~[#6;H]~[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),"
236      "$([N+](=O)[O-]),$(C=O)])~[#6](=O)~[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N)"
237      ",$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])~[#6;H]1",
238      0, ""},
239     {"4halo_pyridine_3EWG",
240      "[#7;R1]1[#6;!$(c=O);!$(c-N)][#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(="
241      "O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6]([F,Cl,Br,I])[#6][#6;!$(c=O);!$(c-N)]"
242      "1",
243      0, ""},
244     {"4halo_pyrimidine_2_6EWG",
245      "[#7]1[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-])"
246      ",$(C=O)])[#7;R1][#6]([F,Cl,Br,I])[#6][#6]1([$(S(=O)(=O)),$(C(F)(F)(F)),$("
247      "C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])",
248      0, ""},
249     {"4halo_pyrimidine_5EWG",
250      "[#7]1[#6][#7;R1][#6]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),"
251      "$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6]1",
252      0, ""},
253     {"CH2_S#O_3_ring", "[CH2]1[O,S]C1", 0, ""},
254     {"HOBT_ester", "O=C(-[!N])O[$(nnn),$([#7]-[#7]=[#7])]", 0, ""},
255     {"NO_phosphonate", "P(=O)ON", 0, ""},
256     {"acrylate", "[CH2]=[C;!$(C-N);!$(C-O)]C(=O)", 0, ""},
257     {"activated_4mem_ring",
258      "[#6]1~[$(C(=O)),$(S(=O))]~[O,S,N]~[$(C(=O)),$(S(=O))]1", 0, ""},
259     {"activated_S#O_3_ring", "C1~[O,S]~[C,N,O,S]1[a,N,O,S]", 0, ""},
260     {"activated_acetylene",
261      "[$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O))]"
262      "C#[C;!$(C-N);!$(C-n)]",
263      0, ""},
264     {"activated_diazo",
265      "[N;!R]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$("
266      "C(=O))])=[N;!R]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](="
267      "O)[O-]),$(C(=O))])",
268      0, ""},
269     {"activated_vinyl_ester",
270      "O=COC=[$(C(S(=O)(=O))),$(C(C(F)(F)(F))),$(C(C#N)),$(C(N(=O)(=O))),$(C([N+"
271      "](=O)[O-])),$(C(C(=O)));!$(C(N))]",
272      0, ""},
273     {"activated_vinyl_sulfonate",
274      "O(-S(=O)(=O))C=[$(C(S(=O)(=O))),$(C(C(F)(F)(F))),$(C(C#N)),$(C(N(=O)(=O))"
275      "),$(C([N+](=O)[O-])),$(C(C(=O)));!$(C(N))]",
276      0, ""},
277     {"acyclic_imide", "[C,c][C;!R](=O)[N;!R][C;!R](=O)[C,c]", 0, ""},
278     {"acyl_123_triazole", "[#7;R1]1~[#7;R1]~[#7;R1](-C(=O))~[#6]~[#6]1", 0, ""},
279     {"acyl_134_triazole", "[#7]1~[#7]~[#6]~[#7](-C(=O)[!N])~[#6]1", 0, ""},
280     {"acyl_activated_NO", "O=C(-[!N])O[$([#7;+]),$(N(C=[O,S,N])(C=[O,S,N]))]",
281      0, ""},
282     {"acyl_cyanide", "C(=O)-C#N", 0, ""},
283     {"acyl_imidazole",
284      "[C;!$(C-N)](=O)[#7]1[#6;H1,$([#6]([*;!R]))][#7][#6;H1,$([#6]([*;!R]))][#"
285      "6;H1,$([#6]([*;!R]))]1",
286      0, ""},
287     {"acyl_pyrazole",
288      "[C;!$(C-N)](=O)[#7]1[#7][#6;H1,$([#6]([*;!R]))][#6;H1,$([#6]([*;!R]))][#"
289      "6;H1,$([#6]([*;!R]))]1",
290      0, ""},
291     {"aldehyde", "[C,c][C;H1](=O)", 0, ""},
292     {"alpha_dicarbonyl", "C(=O)!@C(=O)", 0, ""},
293     {"alpha_halo_EWG",
294      "[$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-])]-[CH,CH2]-[Cl,Br,I,$("
295      "O(S(=O)(=O)))]",
296      0, ""},
297     {"alpha_halo_amine",
298      "[F,Cl,Br,I,$(O(S(=O)(=O)))]-[CH,CH2;!$(C(F)(F))]-[N,n]", 0, "Edited"},
299     {"alpha_halo_carbonyl", "C(=O)([CH,CH2][Cl,Br,I,$(O(S(=O)(=O)))])", 0, ""},
300     {"alpha_halo_heteroatom",
301      "[N,n,O,S;!$(S(=O)(=O))]-[CH,CH2;!$(C(F)(F))][F,Cl,Br,I,$(O(S(=O)(=O)))]",
302      0, ""},
303     {"alpha_halo_heteroatom_tert",
304      "[N,n,O,S;!$(S(=O)(=O))]-C([Cl,Br,I,$(O(S(=O)(=O)))])(C)(C)", 0, ""},
305     {"anhydride",
306      "[$(C(=O)),$(C(=S))]-[O,S]-[$(C(=O)),$(C(=S)),$(C(=[N;!R])),$(C(=N(-[C;X4]"
307      ")))]",
308      0, ""},
309     {"aryl_phosphonate", "P(=O)-[O;!R]-a", 0, ""},
310     {"aryl_thiocarbonyl", "a-[S;X2;!R]-[C;!R](=O)", 0, ""},
311     {"azide", "[$(N#[N+]-[N-]),$([N-]=[N+]=N)]", 0, ""},
312     {"aziridine_diazirine", "[C,N]1~[C,N]~N~1", 0, ""},
313     {"azo_amino", "[N]=[N;!R]-[N]", 0, ""},
314     {"azo_aryl", "c[N;!R;!+]=[N;!R;!+]-c", 0, ""},
315     {"azo_filter1", "[N;!R]=[N;!R]-[N]=[*]", 0, ""},
316     {"azo_filter2",
317      "[N;!$(N-S(=O)(=O));!$(N-C=O)]-[N;!r3;!$(N-S(=O)(=O));!$(N-C=O)]-[N;!$(N-"
318      "S(=O)(=O));!$(N-C=O)]",
319      0, ""},
320     {"azo_filter3", "[N;!R]-[N;!R]-[N;!R]", 0, ""},
321     {"azo_filter4", "a-N=N-[N;H2]", 0, ""},
322     {"bad_boron", "[B-,BH2,BH3,$(B(F)(F))]", 0, ""},
323     {"bad_cations", "[C+,F+,Cl+,Br+,I+,Se+]", 0, ""},
324     {"benzidine_like", "c([N;!+])1ccc(c2ccc([N;!+])cc2)cc1", 0, ""},
325     {"beta_lactone", "[#6,#15,#16]1(=O)~[#6]~[#6]~[#8,#16]1", 0, ""},
326     {"betalactam", "C1(=O)~[#6]~[#6]N1", 0, ""},
327     {"betalactam_EWG",
328      "C1(=O)~[#6]~[#6]N1([$(S(=O)(=O)[C,c,O&D2]),$(C(F)(F)(F)),$(C#N),$(N(=O)(="
329      "O)),$([N+](=O)[O-]),$(C(=O)[C,c,O&D2])])",
330      0, ""},
331     {"bis_activated_aryl_ester",
332      "O=[C,S]Oc1aaa([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)["
333      "O-]),$(C(=O)O),$(C(=O)N)])aa([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(="
334      "O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])1",
335      0, ""},
336     {"bis_keto_olefin",
337      "CC(=O)[$([C&H1]),$(C-F),$(C-Cl),$(C-Br),$(C-I)]=[$([C&H1]),$(C-F),$(C-Cl)"
338      ",$(C-Br),$(C-I)]C(=O)C",
339      0, ""},
340     {"boron_warhead", "[C,c]~[#5]", 0, ""},
341     {"branched_polycyclic_aromatic", "a1(a2aa(a3aaaaa3)aa(a4aaaaa4)a2)aaaaa1",
342      0, ""},
343     {"carbodiimide_iso#thio#cyanate", "N=C=[N,O,S]", 0, ""},
344     {"carbonyl_halide", "O=C[F,Cl,Br,I]", 0, ""},
345     {"contains_metal",
346      "[$([Ru]),$([#45]),$([Se]),$([se]),$([Pd]),$([#21]),$([Bi]),$([Sb]),$([Ag]"
347      "),$([Ti]),$([Al]),$([Cd]),$([V]),$([In]),$([#24]),$([#50]),$([Mn]),$([La]"
348      "),$([Fe]),$([Er]),$([Tm]),$([Yb]),$([Lu]),$([Hf]),$([Ta]),$([W]),$([Re]),"
349      "$([#27]),$([#76]),$([Ni]),$([Ir]),$([Cu]),$([Zn]),$([Ga]),$([Ge]),$([As])"
350      ",$([Y]),$([Zr]),$([Nb]),$([Ce]),$([#59]),$([Nd]),$([Sm]),$([Eu]),$([Gd]),"
351      "$([Tb]),$([Dy]),$([#67]),$([Pt]),$([Au]),$([Hg]),$([Tl]),$([Pb]),$([Ac]),"
352      "$([Th]),$([Pa]),$([Mo]),$([U]),$([Tc]),$([Te]),$([#84]),$([At])]",
353      0, "Edited"},
354     {"crown_ether",
355      "[$([O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,"
356      "r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,"
357      "r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,"
358      "r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,"
359      "r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]),$([O,S,#7;R1;r9,"
360      "r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,"
361      "r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,"
362      "r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,"
363      "r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;"
364      "r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,"
365      "r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]),$([O,"
366      "S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,"
367      "r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]["
368      "O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,"
369      "r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]["
370      "CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,"
371      "r13,r14,r15,r16,r17,r18])]",
372      0, ""},
373     {"cyano_phosphonate", "P(O[A,a])(O[A,a])(=O)C#N", 0, ""},
374     {"cyanohydrin", "[C;X4](-[OH,NH1,NH2,SH])(-C#N)", 0, ""},
375     {"diamino_sulfide", "[N,n]~[S;!R;D2]~[N,n]", 0, ""},
376     {"diazo_carbonyl", "[$(N=N=C~C=O),$(N#N-C~C=O)]", 0, ""},
377     {"diazonium", "a[N+]#N", 0, ""},
378     {"dicarbonyl_sulfonamide",
379      "[$(N(-C(=O))(-C(=O))(-S(=O))),$(n([#6](=O))([#6](=O))([#16](=O)))]", 0,
380      ""},
381     {"disulfide_acyclic", "[S;!R;X2]-[S;!R;X2]", 0, ""},
382     {"disulfonyliminoquinone", "S(=O)(=O)N=C1C=CC(=NS(=O)(=O))C=C1", 0, ""},
383     {"double_trouble_warhead", "NC(C[S;D1])C([N;H1]([O;D1]))=O", 0, ""},
384     {"flavanoid", "O=C2CC(a3aaaaa3)Oa1aaaaa12", 0, ""},
385     {"four_nitriles", "C#N.C#N.C#N.C#N", 0, ""},
386     {"gte_10_carbon_sb_chain",
387      "[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]", 0,
388      ""},
389     {"gte_2_N_quats", "[N,n;H0;+;!$(N~O);!$(n~O)].[N,n;H0;+;!$(N~O);!$(n~O)]",
390      0, ""},
391     {"gte_2_free_phos", "P([O;D1])=O.P([O;D1])=O", 0, ""},
392     {"gte_2_sulfonic_acid", "[C,c]S(=O)(=O)[O;D1].[C,c]S(=O)(=O)[O;D1]", 0, ""},
393     {"gte_3_COOH", "C(=O)[O;D1].C(=O)[O;D1].C(=O)[O;D1]", 0, ""},
394     {"gte_3_iodine", "[#53].[#53].[#53]", 0, ""},
395     {"gte_4_basic_N",
396      "[N;!$(N(=[N,O,S,C]));!$(N(S(=O)(=O)));!$(N(C(F)(F)(F)));!$(N(C#N));!$(N("
397      "C(=O)));!$(N(C(=S)));!$(N(C(=N)));!$(N(#C));!$(N-c)].[N;!$(N(=[N,O,S,C]))"
398      ";!$(N(S(=O)(=O)));!$(N(C(F)(F)(F)));!$(N(C#N));!$(N(C(=O)));!$(N(C(=S)));"
399      "!$(N(C(=N)));!$(N(#C));!$(N-c)].[N;!$(N(=[N,O,S,C]));!$(N(S(=O)(=O)));!$("
400      "N(C(F)(F)(F)));!$(N(C#N));!$(N(C(=O)));!$(N(C(=S)));!$(N(C(=N)));!$(N(#C)"
401      ");!$(N-c)].[N;!$(N(=[N,O,S,C]));!$(N(S(=O)(=O)));!$(N(C(F)(F)(F)));!$(N("
402      "C#N));!$(N(C(=O)));!$(N(C(=S)));!$(N(C(=N)));!$(N(#C));!$(N-c)]",
403      0, ""},
404     {"gte_4_nitro",
405      "[$([N+](=O)[O-]),$(N(=O)=O)].[$([N+](=O)[O-]),$(N(=O)=O)].[$([N+](=O)[O-]"
406      "),$(N(=O)=O)].[$([N+](=O)[O-]),$(N(=O)=O)]",
407      0, ""},
408     {"gte_5_phenolic_OH", "a[O;D1].a[O;D1].a[O;D1].a[O;D1].a[O;D1]", 0, ""},
409     {"gte_7_aliphatic_OH",
410      "C[O;D1].C[O;D1].C[O;D1].C[O;D1].C[O;D1].C[O;D1].C[O;D1]", 0, ""},
411     {"gte_7_total_hal",
412      "[Cl,Br,I].[Cl,Br,I].[Cl,Br,I].[Cl,Br,I].[Cl,Br,I].[Cl,Br,I].[Cl,Br,I]", 0,
413      ""},
414     {"gte_8_CF2_or_CH2",
415      "[CH2,$(C(F)(F));R0][CH2,$(C(F)(F));R0][CH2,$(C(F)(F));R0][CH2,$(C(F)(F));"
416      "R0][CH2,$(C(F)(F));R0][CH2,$(C(F)(F));R0][CH2,$(C(F)(F));R0][CH2,$(C(F)("
417      "F));R0]",
418      0, "Edited"},
419     {"halo_5heterocycle_bis_EWG",
420      "[#7,#8,#16]1[#6]([$(S(=O)(=O)),$([F,Cl]),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)"
421      "),$([N+](=O)[O-]),$(C(=O))])[#6]([$(S(=O)(=O)),$([F,Cl]),$(C(F)(F)(F)),$("
422      "C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O))])[#7][#6]1([Cl,Br,I])",
423      0, ""},
424     {"halo_acrylate",
425      "[$([C;H2]),$([C&H1;$(C-F)]),$([C&H1;$(C-Cl)]),$([C&H1;$(C-Br)]),$([C&H1;$"
426      "(C-I)]),$(C(F)F),$(C(Cl)Cl),$(C(Br)Br),$(C(I)I),$(C(F)Cl),$(C(F)Br),$(C("
427      "F)I),$(C(Cl)Br),$(C(Br)I)](=[$([C&H1;$(C(-C(=O)))]),$(C(F)(C(=O))),$(C("
428      "Cl)(C(=O))),$(C(Br)(C(=O))),$(C(I)(C(=O))),$(C(C)(C(=O))),$(C(c)(C(=O)))]"
429      ")",
430      0, ""},
431     {"halo_imino", "C(=[#7])([Cl,Br,I,$(O(S(=O)(=O)))])", 0, ""},
432     {"halo_olefin_bis_EWG",
433      "C([Cl,Br,I,$(O(S(=O)(=O)))])=C([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)"
434      "(=O)),$([N+](=O)[O-]),$(C=O)])([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)"
435      "(=O)),$([N+](=O)[O-]),$(C=O)])",
436      0, ""},
437     {"halo_phenolic_carbonyl",
438      "C(=O)Oc1c([Cl,F])[cH1,$(c[F,Cl])]c([F,Cl])[cH1,$(c[F,Cl])]c1([F,Cl])", 0,
439      ""},
440     {"halo_phenolic_sulfonyl",
441      "S(=O)Oc1c([Cl,F])[cH1,$(c[F,Cl])]c([F,Cl])[cH1,$(c[F,Cl])]c1([F,Cl])", 0,
442      ""},
443     {"halogen_heteroatom", "[!C;!c;!H][F,Cl,Br,I]", 0, ""},
444     {"hetero_silyl", "[Si]~[!#6]", 0, ""},
445     {"hydrazine",
446      "[N;X3;!$(N-S(=O)(=O));!$(N-C(F)(F)(F));!$(N-C#N);!$(N-C(=O));!$(N-C(=S));"
447      "!$(N-C(=N))]-[N;X3;!$(N-S(=O)(=O));!$(N-C(F)(F)(F));!$(N-C#N);!$(N-C(=O))"
448      ";!$(N-C(=S));!$(N-C(=N))]",
449      0, ""},
450     {"hydrazothiourea", "[N;!R]=NC(=S)N", 0, ""},
451     {"hydroxamate_warhead", "C([N;H1]([O;D1]))=O", 0, ""},
452     {"hyperval_sulfur", "[$([#16&D3]),$([#16&D4])]=,:[#6]", 0, ""},
453     {"isonitrile", "[N+]#[C-]", 0, ""},
454     {"keto_def_heterocycle",
455      "[$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naaaa1),$(c([C;!R;!$(C-[N,O,S]);"
456      "!$(C-[H])](=O))1naa[n,s,o]1)]",
457      0, ""},
458     {"linear_polycyclic_aromatic_I",
459      "[$(a12aaaaa1aa3a(aa(aaaa4)a4a3)a2),$(a12aaaaa1aa3a(aaa4a3aaaa4)a2),$("
460      "a12aaaaa1a(aa5)a3a(aaa4a3a5aaa4)a2)]",
461      0, ""},
462     {"linear_polycyclic_aromatic_II",
463      "[$(a12aaaa4a1a3a(aaaa3aa4)aa2),$(a12aaaaa1a3a(aaa4a3aaaa4)aa2),$(a1(a("
464      "aaaa4)a4a3a2aaaa3)a2aaaa1)]",
465      0, ""},
466     {"maleimide_etc",
467      "[$([C;H1]),$(C(-[F,Cl,Br,I]))]1=[$([C;H1]),$(C(-[F,Cl,Br,I]))]C(=O)[N,O,"
468      "S]C(=O)1",
469      0, ""},
470     {"meldrums_acid_deriv", "O=C1OC(C)(C)OC(C1)=O", 0, ""},
471     {"monofluoroacetate", "[C;H2](F)C(=O)[O,N,S]", 0, ""},
472     {"nitrone", "[C;!R]=[N+][O;D1]", 0, ""},
473     {"nitrosamine", "N-[N;X2](=O)", 0, ""},
474     {"non_ring_CH2O_acetal", "[O,N,S;!$(S~O)]!@[CH2]!@[O,S,N;!$(S~O)]", 0, ""},
475     {"non_ring_acetal", "[O,N,S;!$(S~O)]!@[C;H1;X4]!@[O,N,S;!$(S~O)]", 0, ""},
476     {"non_ring_ketal", "[O,N,S;!$(S~O)]!@[C;H0;X4](!@[O,N,S;!$(S~O)])(C)", 0,
477      ""},
478     {"ortho_hydroiminoquinone", "c1c([N;D1])c([N;D1])c[cH1][cH1]1", 0, ""},
479     {"ortho_hydroquinone", "a1c([O,S;D1])c([O,S;D1])a[cH1][cH1]1", 0, ""},
480     {"ortho_nitrophenyl_carbonyl",
481      "[#6]1(-O-[C;!R](=[O,N;!R]))[#6]([$(N(=O)(=O)),$([N+](=O)[O-])])[#6][#6][#"
482      "6][#6]1",
483      0, ""},
484     {"ortho_quinone",
485      "[CH1,$(C(-[Cl,Br,I]))]1=CC(=[O,N,S;!R])C(=[O,N,S])C=[CH1,$(C(-[Cl,Br,I]))"
486      "]1",
487      0, ""},
488     {"oxaziridine", "C1~[O,S]~N1", 0, ""},
489     {"oxime", "[$(C=N[O;D1]);!$(C=[N+])][#6][#6]", 0, ""},
490     {"oxonium", "[o+,O+]", 0, ""},
491     {"para_hydroiminoquinone", "a1[cH1]c([N;D1])[cH1]ac([N;D1])1", 0, ""},
492     {"para_hydroquinone", "a1[cH1]c([O,S;D1])[cH1]ac([O,S;D1])1", 0, ""},
493     {"para_nitrophenyl_ester",
494      "[#6]1(-O(-[C;!R](-[!N])(=[O,N;!R])))[#6][#6][#6]([$(N(=O)(=O)),$([N+](=O)"
495      "[O-])])[#6][#6]1",
496      0, ""},
497     {"para_quinone",
498      "[CH1,$(C(-[Cl,Br,I]))]1=[CH1,$(C(-[Cl,Br,I]))]C(=[O,N,S])[CH1,$(C(-[Cl,"
499      "Br,I]))]=[CH1,$(C(-[Cl,Br,I]))]C1(=[O,N,S])",
500      0, ""},
501     {"paraquat_like",
502      "[#6]1[#6][#6]([#6]2[#6][#6][#7;+][#6][#6]2)[#6][#6][#7;+]1", 0, ""},
503     {"pentafluorophenylester", "C(=O)Oc1c(F)c(F)c(F)c(F)c1(F)", 0, ""},
504     {"perchloro_cp", "C1(Cl)(Cl)C(Cl)C(Cl)=C(Cl)C1(Cl)", 0, ""},
505     {"perhalo_dicarbonyl_phenyl",
506      "c1(C=O)c([Br,Cl,I])c([Br,Cl,I])c([Br,Cl,I])c([Br,Cl,I])c1(C=O)", 0, ""},
507     {"perhalo_phenyl",
508      "c1c([Br,Cl,I])c([Br,Cl,I])c([Br,Cl,I])c([Br,Cl,I])c1([Br,Cl,I])", 0, ""},
509     {"peroxide", "[#8]~[#8]", 0, ""},
510     {"phenolate_bis_EWG",
511      "O=[C,S]Oc1aaa([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)["
512      "O-]),$(C(=O)O),$(C(=O)N)])aa([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(="
513      "O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])1",
514      0, ""},
515     {"phos_serine_warhead", "NC(COP(O)(O)=O)C(O)=O", 0, ""},
516     {"phos_threonine_warhead", "NC(C(C)OP(O)(O)=O)C(O)=O", 0, ""},
517     {"phos_tyrosine_warhead", "NC(Cc1ccc(OP(O)(O)=O)cc1)C(O)=O", 0, ""},
518     {"phosphite", "[c,C]-[P;v3]", 0, ""},
519     {"phosphonium", "[#15;+]~[!O]", 0, ""},
520     {"phosphorane", "C=P", 0, ""},
521     {"phosphorous_nitrogen_bond", "[#15]~[N,n]", 0, ""},
522     {"phosphorus_phosphorus_bond", "P~P", 0, ""},
523     {"phosphorus_sulfur_bond", "P~S", 0, ""},
524     {"polyene", "C=[C;!R][C;!R]=[C;!R][C;!R]=[C;!R]", 0, ""},
525     {"polyhalo_phenol_a",
526      "c1c([O;D1])c(-[Cl,Br,I])c(-[Cl,Br,I])cc1.c1c([O;D1])c(-[Cl,Br,I])c(-[Cl,"
527      "Br,I])cc1",
528      0, ""},
529     {"polyhalo_phenol_b",
530      "c1c([O;D1])c(-[Cl,Br,I])cc(-[Cl,Br,I])c1.c1c([O;D1])c(-[Cl,Br,I])cc(-[Cl,"
531      "Br,I])c1",
532      0, ""},
533     {"polyhalo_phenol_c",
534      "c1c([O;D1])ccc(-[Cl,Br,I])c(-[Cl,Br,I])1.c1c([O;D1])ccc(-[Cl,Br,I])c(-["
535      "Cl,Br,I])1",
536      0, ""},
537     {"polyhalo_phenol_d",
538      "c(-[Cl,Br,I])1c([O;D1])c(-[Cl,Br,I])ccc1.c(-[Cl,Br,I])1c([O;D1])c(-[Cl,"
539      "Br,I])ccc1",
540      0, ""},
541     {"polyhalo_phenol_e",
542      "c1c([O;D1])ccc(-[Cl,Br,I])c(-[Cl,Br,I])1.c1c([O;D1])ccc(-[Cl,Br,I])c(-["
543      "Cl,Br,I])1",
544      0, ""},
545     {"polysulfide", "[S;D2]-[S;D2]-[S;D2]", 0, ""},
546     {"porphyrin", "[#6;r16,r17,r18]~[#6]1~[#6]~[#6]~[#6](~[#6])~[#7]1", 0, ""},
547     {"primary_halide_sulfate",
548      "[CH2][Cl,Br,I,$(O(S(=O)(=O)[!$(N);!$([O&D1])]))]", 0, ""},
549     {"quat_N_N", "[N,n;R;+]!@[N,n]", 0, ""},
550     {"quat_N_acyl", "[N,n;+]!@C(=O)", 0, ""},
551     {"quinone_methide",
552      "[#6;!$([#6](-[N,O,S]))]1=[#6;!$([#6](-[N,O,S]))][#6](=[#6])[#6;!$([#6](-["
553      "N,O,S]))]=[#6;!$([#6](-[N,O,S]))][#6]1(=[O,N,S])",
554      0, ""},
555     {"rhodanine", "C(=C)1SC(=S)NC(=O)1", 0, ""},
556     {"secondary_halide_sulfate",
557      "[CH;!$(C=C)][Cl,Br,I,$(O(S(=O)(=O)[!$(N);!$([O&D1])]))]", 0, ""},
558     {"sulf_D2_nitrogen",
559      "[S;D2](-[N;!$(N(=C));!$(N(-S(=O)(=O)));!$(N(-C(=O)))])", 0, ""},
560     {"sulf_D2_oxygen_D2", "[S;D2][O;D2]", 0, ""},
561     {"sulf_D3_nitrogen", "[S;D3](-N)(-[c,C])(-[c,C])", 0, ""},
562     {"sulfite_sulfate_ester", "[C,c]OS(=O)O[C,c]", 0, ""},
563     {"sulfonium", "[S+;X3;$(S-C);!$(S-[O;D1])]", 0, ""},
564     {"sulfonyl_anhydride", "[$(C(=O)),$(S(=O)(=O))][O,S](S(=O)(=O))", 0, ""},
565     {"sulfonyl_halide", "S(=O)(=O)[F,Cl,Br,I]", 0, ""},
566     {"sulfonyl_heteroatom", "[!#6;!#1;!#11;!#19]O(S(=O)(=O)(-[C,c]))", 0, ""},
567     {"sulphonyl_cyanide", "S(=O)(=O)C#N", 0, ""},
568     {"tertiary_halide_sulfate",
569      "[C;X4](-[Cl,Br,I,$(O(S(=O)(=O)[!$(N);!$([O&D1])]))])(-[c,C])(-[c,C])(-[c,"
570      "C])",
571      0, ""},
572     {"thio_hydroxamate", "[S;D2]([$(N(=C)),$(N(-S(=O)(=O))),$(N(-C(=O)))])", 0,
573      ""},
574     {"thio_xanthate", "[S;!R]-[C;!R](=[S;!R])(-[S;!R])", 0, ""},
575     {"thiocarbonate", "SC(=O)[O,S]", 0, ""},
576     {"thioester", "[S;!R;H0]C(=[S,O;!R])([!O;!S;!N])", 0, ""},
577     {"thiol_warhead", "NC(C[S;D1])C(O)=O", 0, ""},
578     {"thiopyrylium", "c1[S,s;+]cccc1", 0, ""},
579     {"thiosulfoxide", "[C,c][S;X3](~O)-S", 0, ""},
580     {"triamide",
581      "[$(N(-C(=O))(-C(=O))(-C(=O))),$(n([#6](=O))([#6](=O))([#6](=O)))]", 0,
582      ""},
583     {"triaryl_phosphine_oxide", "P(=O)(a)(a)(a)", 0, ""},
584     {"trichloromethyl_ketone",
585      "[$(C(=O));!$(C-N);!$(C-O);!$(C-S)]C(Cl)(Cl)(Cl)", 0, ""},
586     {"triflate", "OS(=O)(=O)(C(F)(F)(F))", 0, ""},
587     {"trifluoroacetate_ester", "C(F)(F)(F)C(=O)O", 0, ""},
588     {"trifluoroacetate_thioester", "C(F)(F)(F)C(=O)S", 0, ""},
589     {"trifluoromethyl_ketone", "[$(C(=O));!$(C-N);!$(C-O);!$(C-S)]C(F)(F)(F)",
590      0, ""},
591     {"trihalovinyl_heteroatom",
592      "C(-[Cl,Br,I])(-[Cl,Br,I])=C(-[Cl,Br,I])(-[N,O,S])", 0, ""},
593     {"trinitro_aromatic",
594      "[$(a1aaa([$(N(=O)(=O)),$([N+](=O)[O-])])a([$(N(=O)(=O)),$([N+](=O)[O-])])"
595      "a1([$(N(=O)(=O)),$([N+](=O)[O-])])),$(a1aa([$(N(=O)(=O)),$([N+](=O)[O-])]"
596      ")a([$(N(=O)(=O)),$([N+](=O)[O-])])aa1([$(N(=O)(=O)),$([N+](=O)[O-])])),$("
597      "a1a([$(N(=O)(=O)),$([N+](=O)[O-])])aa([$(N(=O)(=O)),$([N+](=O)[O-])])aa1("
598      "[$(N(=O)(=O)),$([N+](=O)[O-])]))]",
599      0, ""},
600     {"trinitromethane_derivative",
601      "C([$([N+](=O)[O-]),$(N(=O)=O)])([$([N+](=O)[O-]),$(N(=O)=O)])([$([N+](=O)"
602      "[O-]),$(N(=O)=O)])",
603      0, ""},
604     {"tris_activated_aryl_ester",
605      "[$(O=[C,S]Oc1a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](="
606      "O)[O-]),$(C(=O)O),$(C(=O)N)])a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(="
607      "O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])a([$(S(=O)(=O)),F,$(C(F)(F)("
608      "F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])aa1),$(O=[C,"
609      "S]Oc1a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),"
610      "$(C(=O)O),$(C(=O)N)])a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),"
611      "$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])aaa([$(S(=O)(=O)),F,$(C(F)(F)(F)),$("
612      "C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])1),$(O=[C,S]Oc1a(["
613      "$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)"
614      "O),$(C(=O)N)])aa([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+]"
615      "(=O)[O-]),$(C(=O)O),$(C(=O)N)])a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$("
616      "N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])a1),$(O=[C,S]Oc1a([$(S(="
617      "O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$("
618      "C(=O)N)])aa([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)["
619      "O-]),$(C(=O)O),$(C(=O)N)])aa([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)"
620      "(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])1)]",
621      0, ""},
622     {"trisub_bis_act_olefin",
623      "[CH;!R;!$(C-N)]=C([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+]("
624      "=O)[O-]),$(C(=O))])([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+"
625      "](=O)[O-]),$(C(=O))])",
626      0, "Edited"},
627     {"vinyl_carbonyl_EWG",
628      "[C;!R]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$("
629      "C=O)])([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$("
630      "C=O)])=[C;!R]([C;!R](=O))([!$([#8]);!$([#7])])",
631      0, ""}};
632 const unsigned int NUM_NIH =
633     static_cast<unsigned int>(sizeof(NIH) / sizeof(FilterData_t));
634 
635 const FilterProperty_t NIH_PROPS[] = {
636     {"FilterSet", "NIH"},
637     {"Scope", "annotate compounds with problematic functional groups"},
638     {"Reference",
639      "Doveston R, et al. A Unified Lead-oriented Synthesis of over Fifty "
640      "Molecular Scaffolds. Org Biomol Chem 13 (2014) 859D65. "
641      "doi:10.1039/C4OB02287D."},
642     {"Reference",
643      "Jadhav A, et al. Quantitative Analyses of Aggregation, Autofluorescence, "
644      "and Reactivity Artifacts in a Screen for Inhibitors of a Thiol Protease. "
645      "J Med Chem 53 (2009) 37D51. doi:10.1021/jm901070c."}};
646 const unsigned int NUM_NIH_PROPS =
647     static_cast<unsigned int>(sizeof(NIH_PROPS) / sizeof(FilterProperty_t));
648 
649 /////////////////////////////////////////////////////////////////////////////////////////
650 // PAINS_A data
651 // # Reference: Baell JB, Holloway GA. New Substructure Filters for Removal of
652 // Pan Assay Interference Compounds (PAINS) from Screening Libraries and for
653 // Their Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40.
654 // doi:10.1021/jm901137j.
655 // # Scope: PAINS filters (family A)
656 //
657 #include "pains_a.in"
658 
659 
660 const unsigned int NUM_PAINS_A =
661     static_cast<unsigned int>(sizeof(PAINS_A) / sizeof(FilterData_t));
662 
663 const FilterProperty_t PAINS_A_PROPS[] = {
664     {"FilterSet", "PAINS_A"},
665     {"Reference",
666      "Baell JB, Holloway GA. New Substructure Filters for Removal of Pan Assay "
667      "Interference Compounds (PAINS) from Screening Libraries and for Their "
668      "Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40. "
669      "doi:10.1021/jm901137j."},
670     {"Scope", "PAINS filters (family A)"}};
671 const unsigned int NUM_PAINS_A_PROPS =
672     static_cast<unsigned int>(sizeof(PAINS_A_PROPS) / sizeof(FilterProperty_t));
673 
674 /////////////////////////////////////////////////////////////////////////////////////////
675 // PAINS_B data
676 // # Reference: Baell JB, Holloway GA. New Substructure Filters for Removal of
677 // Pan Assay Interference Compounds (PAINS) from Screening Libraries and for
678 // Their Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40.
679 // doi:10.1021/jm901137j.
680 // # Scope: PAINS filters (family B)
681 // # sulfonamide_B(41) c:1:c:c(:c:c:c:1-[#8]-[#1])-[#7](-[#1])-[#16](=[#8])=[#8]
682 // 0
683 // # sulfonamide_B(41) [N;H1](c1ccc([O;H1])cc1)S(=O)=O 0
684 // # imidazole_A(19)
685 // n:1:c(:n(:c(:c:1-c:2:c:c:c:c:c:2)-c:3:c:c:c:c:c:3)-[#1])-[#6]:,=[!#1] 0
686 //
687 #include "pains_b.in"
688 
689 
690 const unsigned int NUM_PAINS_B =
691     static_cast<unsigned int>(sizeof(PAINS_B) / sizeof(FilterData_t));
692 
693 const FilterProperty_t PAINS_B_PROPS[] = {
694     {"FilterSet", "PAINS_B"},
695     {"Reference",
696      "Baell JB, Holloway GA. New Substructure Filters for Removal of Pan Assay "
697      "Interference Compounds (PAINS) from Screening Libraries and for Their "
698      "Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40. "
699      "doi:10.1021/jm901137j."},
700     {"Scope", "PAINS filters (family B)"},
701 };
702 const unsigned int NUM_PAINS_B_PROPS =
703     static_cast<unsigned int>(sizeof(PAINS_B_PROPS) / sizeof(FilterProperty_t));
704 
705 /////////////////////////////////////////////////////////////////////////////////////////
706 // PAINS_C data
707 // # Reference: Baell JB, Holloway GA. New Substructure Filters for Removal of
708 // Pan Assay Interference Compounds (PAINS) from Screening Libraries and for
709 // Their Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40.
710 // doi:10.1021/jm901137j.
711 // # Scope: PAINS filters (family C)
712 //
713 #include "pains_c.in"
714 
715 
716 const unsigned int NUM_PAINS_C =
717     static_cast<unsigned int>(sizeof(PAINS_C) / sizeof(FilterData_t));
718 
719 const FilterProperty_t PAINS_C_PROPS[] = {
720     {"FilterSet", "PAINS_C"},
721     {"Reference",
722      "Baell JB, Holloway GA. New Substructure Filters for Removal of Pan Assay "
723      "Interference Compounds (PAINS) from Screening Libraries and for Their "
724      "Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40. "
725      "doi:10.1021/jm901137j."},
726     {"Scope", "PAINS filters (family C)"}};
727 const unsigned int NUM_PAINS_C_PROPS =
728     static_cast<unsigned int>(sizeof(PAINS_C_PROPS) / sizeof(FilterProperty_t));
729 
730 /////////////////////////////////////////////////////////////////////////////////////////
731 // ZINC data
732 // # Reference: http://blaster.docking.org/filtering/
733 // # Scope: drug-likeness and unwanted functional group filters
734 //
735 
736 const FilterData_t ZINC[] = {
737     {"Non-Hydrogen_atoms", "[a,A]", 40, ""},
738     {"carbons", "[#6]", 40, ""},
739     {"N,O,S", "[#7,#8,#16]", 20, ""},
740     {"Sulfonyl_halides", "S(=O)(=O)[Cl,Br]", 1, ""},
741     {"Acid_halides", "[S,C](=[O,S])[F,Br,Cl,I]", 1, ""},
742     {"Alkyl_halides", "[Br,Cl,I][CX4;CH,CH2]", 1, ""},
743     {"Phosphenes", "cPc", 0, ""},
744     {"Heptanes", "[CD1][CD2][CD2][CD2][CD2][CD2][CD2]", 0, ""},
745     {"Perchlorates", "OCl(O)(O)(O)", 0, ""},
746     {"Fluorines", "F", 7, ""},
747     {"Cl,Br,I", "[Cl,Br,I]", 6, ""},
748     {"Carbazides", "O=CN=[N+]=[N-]", 0, ""},
749     {"Acid_anhydrides", "C(=O)OC(=O)", 0, ""},
750     {"Peroxides", "OO", 0, ""},
751     {"Iso(thio)cyanates", "N=C=[S,O]", 1, ""},
752     {"Thiocyanates", "SC#N", 1, ""},
753     {"Phosphoranes", "C=P", 0, ""},
754     {"P/S_halides", "[P,S][Cl,Br,F,I]", 0, ""},
755     {"Cyanohydrines", "N#CC[OH]", 0, ""},
756     {"Carbazides", "O=CN=[N+]=[N-]", 0, ""},
757     {"Sulfate_esters", "COS(=O)O[C,c]", 1, ""},
758     {"Sulfonates", "COS(=O)(=O)[C,c]", 1, ""},
759     {"Pentafluorophenyl_esters", "C(=O)Oc1c(F)c(F)c(F)c(F)c1(F)", 0, ""},
760     {"Paranitrophenyl_esters", "C(=O)Oc1ccc(N(=O)=O)cc1", 0, ""},
761     {"HOBt_esters", "C(=O)Onnn", 0, ""},
762     {"Triflates", "OS(=O)(=O)C(F)(F)F", 0, ""},
763     {"Lawesson's_reagents", "P(=S)(S)S", 0, ""},
764     {"Phosphoramides", "NP(=O)(N)N", 0, ""},
765     {"Aromatic_azides", "cN=[N+]=[N-]", 0, ""},
766     {"Quaternary_C,Cl,I,P,S", "[C+,Cl+,I+,P+,S+]", 2, ""},
767     {"Beta_carbonyl_quaternary_N", "C(=O)C[N+,n+]", 2, ""},
768     {"Acylhydrazides", "[N;R0][N;R0]C(=O)", 2, ""},
769     {"Chloramidines", "[Cl]C([C&R0])=N", 0, ""},
770     {"Isonitriles", "[N+]#[C-]", 0, ""},
771     {"Triacyloximes", "C(=O)N(C(=O))OC(=O)", 0, ""},
772     {"Acyl_cyanides", "N#CC(=O)", 0, ""},
773     {"Sulfonyl_cyanides", "S(=O)(=O)C#N", 0, ""},
774     {"Cyanophosphonates", "P(OCC)(OCC)(=O)C#N", 0, ""},
775     {"Azocyanamides", "[N;R0]=[N;R0]C#N", 0, ""},
776     {"Azoalkanals", "[N;R0]=[N;R0]CC=O", 0, ""},
777     {"(Thio)epoxides,aziridines", "C1[O,S,N]C1", 2, ""},
778     {"Benzylic_quaternary_N", "cC[N+]", 2, ""},
779     {"Thioesters", "C[O,S;R0][C;R0](=S)", 2, ""},
780     {"Diand_Triphosphates", "P(=O)([OH])OP(=O)[OH]", 3, ""},
781     {"Aminooxy(oxo)", "[#7]O[#6,#16]=O", 2, ""},
782     {"nitros", "N(~[OD1])~[OD1]", 2, ""},
783     {"Imines", "C=[N;R0]*", 2, ""},
784     {"Acrylonitriles", "N#CC=C", 2, ""},
785     {"Propenals", "C=CC(=O)[!#7;!#8]", 2, ""},
786     {"Quaternary_N", "[ND4+]", 1, ""}};
787 const unsigned int NUM_ZINC =
788     static_cast<unsigned int>(sizeof(ZINC) / sizeof(FilterData_t));
789 
790 const FilterProperty_t ZINC_PROPS[] = {
791     {"FilterSet", "ZINC"},
792     {"Reference", "http://blaster.docking.org/filtering/"},
793     {"Scope", "drug-likeness and unwanted functional group filters"}};
794 const unsigned int NUM_ZINC_PROPS =
795     static_cast<unsigned int>(sizeof(ZINC_PROPS) / sizeof(FilterProperty_t));
796 
797 ////////////////////////////////////////////////////////////////////////
798 // API
GetNumEntries(FilterCatalogParams::FilterCatalogs catalog)799 unsigned int GetNumEntries(FilterCatalogParams::FilterCatalogs catalog) {
800   switch (catalog) {
801     case FilterCatalogParams::BRENK:
802       return NUM_BRENK;
803     case FilterCatalogParams::NIH:
804       return NUM_NIH;
805     case FilterCatalogParams::PAINS_A:
806       return NUM_PAINS_A;
807     case FilterCatalogParams::PAINS_B:
808       return NUM_PAINS_B;
809     case FilterCatalogParams::PAINS_C:
810       return NUM_PAINS_C;
811     case FilterCatalogParams::ZINC:
812       return NUM_ZINC;
813     default:
814       return 0;
815   }
816 }
817 
GetFilterData(FilterCatalogParams::FilterCatalogs catalog)818 const FilterData_t* GetFilterData(FilterCatalogParams::FilterCatalogs catalog) {
819   switch (catalog) {
820     case FilterCatalogParams::BRENK:
821       return BRENK;
822     case FilterCatalogParams::NIH:
823       return NIH;
824     case FilterCatalogParams::PAINS_A:
825       return PAINS_A;
826     case FilterCatalogParams::PAINS_B:
827       return PAINS_B;
828     case FilterCatalogParams::PAINS_C:
829       return PAINS_C;
830     case FilterCatalogParams::ZINC:
831       return ZINC;
832     default:
833       return nullptr;
834   }
835 }
836 
GetNumPropertyEntries(FilterCatalogParams::FilterCatalogs catalog)837 unsigned GetNumPropertyEntries(FilterCatalogParams::FilterCatalogs catalog) {
838   switch (catalog) {
839     case FilterCatalogParams::BRENK:
840       return NUM_BRENK_PROPS;
841     case FilterCatalogParams::NIH:
842       return NUM_NIH_PROPS;
843     case FilterCatalogParams::PAINS_A:
844       return NUM_PAINS_A_PROPS;
845     case FilterCatalogParams::PAINS_B:
846       return NUM_PAINS_B_PROPS;
847     case FilterCatalogParams::PAINS_C:
848       return NUM_PAINS_C_PROPS;
849     case FilterCatalogParams::ZINC:
850       return NUM_ZINC_PROPS;
851     default:
852       return 0;
853   }
854 }
855 
GetFilterProperties(FilterCatalogParams::FilterCatalogs catalog)856 const FilterProperty_t* GetFilterProperties(
857     FilterCatalogParams::FilterCatalogs catalog) {
858   switch (catalog) {
859     case FilterCatalogParams::BRENK:
860       return BRENK_PROPS;
861     case FilterCatalogParams::NIH:
862       return NIH_PROPS;
863     case FilterCatalogParams::PAINS_A:
864       return PAINS_A_PROPS;
865     case FilterCatalogParams::PAINS_B:
866       return PAINS_B_PROPS;
867     case FilterCatalogParams::PAINS_C:
868       return PAINS_C_PROPS;
869     case FilterCatalogParams::ZINC:
870       return ZINC_PROPS;
871     default:
872       return nullptr;
873   }
874 }
875 }
876