1 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31
32 #include "Filters.h"
33 #include "FilterCatalog.h"
34
35 namespace RDKit {
36
37 /////////////////////////////////////////////////////////////////////////////////////////
38 // BRENK data
39 // # Reference: Brenk R et al. Lessons Learnt from Assembling Screening
40 // Libraries for Drug Discovery for Neglected Diseases. ChemMedChem 3 (2008)
41 // 435-444. doi:10.1002/cmdc.200700139.
42 // # Scope: unwanted functionality due to potential tox reasons or unfavourable
43 // pharmacokinetic properties
44 //
45
46 const FilterData_t BRENK[] = {
47 {">_2_ester_groups", "C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]", 0, ""},
48 {"2-halo_pyridine", "n1c([F,Cl,Br,I])cccc1", 0, ""},
49 {"acid_halide", "C(=O)[Cl,Br,I,F]", 0, ""},
50 {"acyclic_C=C-O", "C=[C!r]O", 0, ""},
51 {"acyl_cyanide", "N#CC(=O)", 0, ""},
52 {"acyl_hydrazine", "C(=O)N[NH2]", 0, ""},
53 {"aldehyde", "[CH1](=O)", 0, ""},
54 {"Aliphatic_long_chain", "[R0;D2][R0;D2][R0;D2][R0;D2]", 0, ""},
55 {"alkyl_halide", "[CX4][Cl,Br,I]", 0, ""},
56 {"amidotetrazole", "c1nnnn1C=O", 0, ""},
57 {"aniline", "c1cc([NH2])ccc1", 0, ""},
58 {"azepane", "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1", 0, ""},
59 {"Azido_group", "N=[N+]=[N-]", 0, ""},
60 {"Azo_group", "N#N", 0, ""},
61 {"azocane", "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1", 0, ""},
62 {"benzidine",
63 "[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2](["
64 "Nv3X3,Nv4X4])[cR2][cR2]2",
65 0, ""},
66 {"beta-keto/anhydride", "[C,c](=O)[CX4,CR0X3,O][C,c](=O)", 0, ""},
67 {"biotin_analogue", "C12C(NC(N1)=O)CSC2", 0, ""},
68 {"Carbo_cation/anion", "[C+,c+,C-,c-]", 0, ""},
69 {"catechol", "c1c([OH])c([OH,NH2,NH])ccc1", 0, ""},
70 {"charged_oxygen_or_sulfur_atoms", "[O+,o+,S+,s+]", 0, ""},
71 {"chinone_1", "C1(=[O,N])C=CC(=[O,N])C=C1", 0, ""},
72 {"chinone_2", "C1(=[O,N])C(=[O,N])C=CC=C1", 0, ""},
73 {"conjugated_nitrile_group", "C=[C!r]C#N", 0, ""},
74 {"crown_ether", "[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]", 0,
75 ""},
76 {"cumarine", "c1ccc2c(c1)ccc(=O)o2", 0, ""},
77 {"cyanamide", "N[CH2]C#N", 0, ""},
78 {"cyanate_/aminonitrile_/thiocyanate", "[N,O,S]C#N", 0, ""},
79 {"cyanohydrins", "N#CC[OH]", 0, ""},
80 {"cycloheptane_1", "[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2]1", 0, ""},
81 {"cycloheptane_2", "[CR2]1[CR2][CR2]cc[CR2][CR2]1", 0, ""},
82 {"cyclooctane_1", "[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1", 0, ""},
83 {"cyclooctane_2", "[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1", 0, ""},
84 {"diaminobenzene_1",
85 "[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1", 0, ""},
86 {"diaminobenzene_2",
87 "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1", 0, ""},
88 {"diaminobenzene_3",
89 "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])", 0, ""},
90 {"diazo_group", "[N!R]=[N!R]", 0, ""},
91 {"diketo_group", "[C,c](=O)[C,c](=O)", 0, ""},
92 {"disulphide", "SS", 0, ""},
93 {"enamine", "[CX2R0][NX3R0]", 0, ""},
94 {"ester_of_HOBT", "C(=O)Onnn", 0, ""},
95 {"four_member_lactones", "C1(=O)OCC1", 0, ""},
96 {"halogenated_ring_1", "c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]", 0,
97 ""},
98 {"halogenated_ring_2", "c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]", 0,
99 ""},
100 {"heavy_metal", "[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si]", 0, ""},
101 {"het-C-het_not_in_ring",
102 "[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]", 0, ""},
103 {"hydantoin", "C1NC(=O)NC(=O)1", 0, ""},
104 {"hydrazine", "N[NH2]", 0, ""},
105 {"hydroquinone", "[OH]c1ccc([OH,NH2,NH])cc1", 0, ""},
106 {"hydroxamic_acid", "C(=O)N[OH]", 0, ""},
107 {"imine_1", "C=[N!R]", 0, ""},
108 {"imine_2", "N=[CR0][N,n,O,S]", 0, ""},
109 {"iodine", "I", 0, ""},
110 {"isocyanate", "N=C=O", 0, ""},
111 {"isolated_alkene",
112 "[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4]"
113 ")[CX4])]",
114 0, ""},
115 {"ketene", "C=C=O", 0, ""},
116 {"methylidene-1,3-dithiole", "S1C=CSC1=S", 0, ""},
117 {"Michael_acceptor_1", "C=!@CC=[O,S]", 0, ""},
118 {"Michael_acceptor_2", "[$([CH]),$(CC)]#CC(=O)[C,c]", 0, ""},
119 {"Michael_acceptor_3", "[$([CH]),$(CC)]#CS(=O)(=O)[C,c]", 0, ""},
120 {"Michael_acceptor_4", "C=C(C=O)C=O", 0, ""},
121 {"Michael_acceptor_5", "[$([CH]),$(CC)]#CC(=O)O[C,c]", 0, ""},
122 {"N_oxide", "[NX2,nX3][OX1]", 0, ""},
123 {"N-acyl-2-amino-5-mercapto-1,3,4-_thiadiazole", "s1c(S)nnc1NC=O", 0, ""},
124 {"N-C-halo", "NC[F,Cl,Br,I]", 0, ""},
125 {"N-halo", "[NX3,NX4][F,Cl,Br,I]", 0, ""},
126 {"N-hydroxyl_pyridine", "n[OH]", 0, ""},
127 {"nitro_group", "[N+](=O)[O-]", 0, ""},
128 {"N-nitroso", "[#7]-N=O", 0, ""},
129 {"oxime_1", "[C,c]=N[OH]", 0, ""},
130 {"oxime_2", "[C,c]=NOC=O", 0, ""},
131 {"Oxygen-nitrogen_single_bond", "[OR0,NR0][OR0,NR0]", 0, ""},
132 {"Perfluorinated_chain", "[CX4](F)(F)[CX4](F)F", 0, ""},
133 {"peroxide", "OO", 0, ""},
134 {"phenol_ester", "c1ccccc1OC(=O)[#6]", 0, ""},
135 {"phenyl_carbonate", "c1ccccc1OC(=O)O", 0, ""},
136 {"phosphor", "P", 0, ""},
137 {"phthalimide", "[cR,CR]~C(=O)NC(=O)~[cR,CR]", 0, ""},
138 {"Polycyclic_aromatic_hydrocarbon_1", "a1aa2a3a(a1)A=AA=A3=AA=A2", 0, ""},
139 {"Polycyclic_aromatic_hydrocarbon_2", "a21aa3a(aa1aaaa2)aaaa3", 0, ""},
140 {"Polycyclic_aromatic_hydrocarbon_3", "a31a(a2a(aa1)aaaa2)aaaa3", 0, ""},
141 {"polyene", "[CR0]=[CR0][CR0]=[CR0]", 0, ""},
142 {"quaternary_nitrogen_1",
143 "[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]", 0, ""},
144 {"quaternary_nitrogen_2",
145 "[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,"
146 "C,n,N,o,O]",
147 0, ""},
148 {"quaternary_nitrogen_3", "[*]=[N+]=[*]", 0, ""},
149 {"saponine_derivative", "O1CCCCC1OC2CCC3CCCCC3C2", 0, ""},
150 {"silicon_halogen", "[Si][F,Cl,Br,I]", 0, ""},
151 {"stilbene", "c1ccccc1C=Cc2ccccc2", 0, ""},
152 {"sulfinic_acid", "[SX3](=O)[O-,OH]", 0, ""},
153 {"Sulfonic_acid_1", "[C,c]S(=O)(=O)O[C,c]", 0, ""},
154 {"Sulfonic_acid_2", "S(=O)(=O)[O-,OH]", 0, ""},
155 {"sulfonyl_cyanide", "S(=O)(=O)C#N", 0, ""},
156 {"sulfur_oxygen_single_bond", "[SX2]O", 0, ""},
157 {"sulphate", "OS(=O)(=O)[O-]", 0, ""},
158 {"sulphur_nitrogen_single_bond", "[SX2H0][N]", 0, ""},
159 {"Thiobenzothiazole_1", "c12ccccc1(SC(S)=N2)", 0, ""},
160 {"thiobenzothiazole_2", "c12ccccc1(SC(=S)N2)", 0, ""},
161 {"Thiocarbonyl_group", "[C,c]=S", 0, ""},
162 {"thioester", "SC=O", 0, ""},
163 {"thiol_1", "[S-]", 0, ""},
164 {"thiol_2", "[SH]", 0, ""},
165 {"Three-membered_heterocycle", "*1[O,S,N]*1", 0, ""},
166 {"triflate", "OS(=O)(=O)C(F)(F)F", 0, ""},
167 {"triphenyl_methyl-silyl", "[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)", 0,
168 ""},
169 {"triple_bond", "C#C", 0, ""}};
170 const unsigned int NUM_BRENK =
171 static_cast<unsigned int>(sizeof(BRENK) / sizeof(FilterData_t));
172
173 const FilterProperty_t BRENK_PROPS[] = {
174 {"FilterSet", "Brenk"},
175 {"Reference",
176 "Brenk R et al. Lessons Learnt from Assembling Screening Libraries for "
177 "Drug Discovery for Neglected Diseases. ChemMedChem 3 (2008) 435-444. "
178 "doi:10.1002/cmdc.200700139."},
179 {"Scope",
180 "unwanted functionality due to potential tox reasons or unfavourable "
181 "pharmacokinetic properties"}};
182 const unsigned int NUM_BRENK_PROPS =
183 static_cast<unsigned int>(sizeof(BRENK_PROPS) / sizeof(FilterProperty_t));
184
185 /////////////////////////////////////////////////////////////////////////////////////////
186 // NIH data
187 // # Scope: annotate compounds with problematic functional groups
188 // # Reference: Doveston R, et al. A Unified Lead-oriented Synthesis of over
189 // Fifty Molecular Scaffolds. Org Biomol Chem 13 (2014) 859D65.
190 // doi:10.1039/C4OB02287D.
191 // # Reference: Jadhav A, et al. Quantitative Analyses of Aggregation,
192 // Autofluorescence, and Reactivity Artifacts in a Screen for Inhibitors of a
193 // Thiol Protease. J Med Chem 53 (2009) 37D51. doi:10.1021/jm901070c.
194 //
195
196 const FilterData_t NIH[] = {
197 {"2halo_pyrazine_3EWG",
198 "[#7;R1]1[#6]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)("
199 "=O)),$([N+](=O)[O-]),$(C=O)])[#7][#6][#6]1",
200 0, ""},
201 {"2halo_pyrazine_5EWG",
202 "[#7;R1]1[#6]([F,Cl,Br,I])[#6;!$(c-N)][#7][#6]([$(S(=O)(=O)),$(C(F)(F)(F))"
203 ",$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6;!$(c-N)]1",
204 0, ""},
205 {"2halo_pyridazine_3EWG",
206 "[#7;R1]1[#6]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)("
207 "=O)),$([N+](=O)[O-]),$(C=O)])[#6][#6][#7]1",
208 0, ""},
209 {"2halo_pyridazine_5EWG",
210 "[#7;R1]1[#6]([F,Cl,Br,I])[#6][#6][#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),"
211 "$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#7]1",
212 0, ""},
213 {"2halo_pyridine_3EWG",
214 "[#7;R1]1[#6;!$(c=O)]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),"
215 "$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6;!$(c-N)][#6][#6;!$(c-N)]1",
216 0, ""},
217 {"2halo_pyridine_5EWG",
218 "[#7;R1]1[#6;!$(c=O)]([F,Cl,Br,I])[#6][#6;!$(c-N)][#6]([$(S(=O)(=O)),$(C("
219 "F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6;!$(c=O);!$(c-N)"
220 "]1",
221 0, ""},
222 {"2halo_pyrimidine_5EWG",
223 "[#7;R1]1[#6]([F,Cl,Br,I])[#7][#6][#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),"
224 "$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6]1",
225 0, ""},
226 {"3halo_pyridazine_2EWG",
227 "[#7;R1]1[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)["
228 "O-]),$(C=O)])[#6]([F,Cl,Br,I])[#6][#6][#7]1",
229 0, ""},
230 {"3halo_pyridazine_4EWG",
231 "[#7;R1]1[#6][#6]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N("
232 "=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6][#7]1",
233 0, ""},
234 {"4_pyridone_3_5_EWG",
235 "[#7,#8,#16]1~[#6;H]~[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),"
236 "$([N+](=O)[O-]),$(C=O)])~[#6](=O)~[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N)"
237 ",$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])~[#6;H]1",
238 0, ""},
239 {"4halo_pyridine_3EWG",
240 "[#7;R1]1[#6;!$(c=O);!$(c-N)][#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(="
241 "O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6]([F,Cl,Br,I])[#6][#6;!$(c=O);!$(c-N)]"
242 "1",
243 0, ""},
244 {"4halo_pyrimidine_2_6EWG",
245 "[#7]1[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-])"
246 ",$(C=O)])[#7;R1][#6]([F,Cl,Br,I])[#6][#6]1([$(S(=O)(=O)),$(C(F)(F)(F)),$("
247 "C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])",
248 0, ""},
249 {"4halo_pyrimidine_5EWG",
250 "[#7]1[#6][#7;R1][#6]([F,Cl,Br,I])[#6]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),"
251 "$(N(=O)(=O)),$([N+](=O)[O-]),$(C=O)])[#6]1",
252 0, ""},
253 {"CH2_S#O_3_ring", "[CH2]1[O,S]C1", 0, ""},
254 {"HOBT_ester", "O=C(-[!N])O[$(nnn),$([#7]-[#7]=[#7])]", 0, ""},
255 {"NO_phosphonate", "P(=O)ON", 0, ""},
256 {"acrylate", "[CH2]=[C;!$(C-N);!$(C-O)]C(=O)", 0, ""},
257 {"activated_4mem_ring",
258 "[#6]1~[$(C(=O)),$(S(=O))]~[O,S,N]~[$(C(=O)),$(S(=O))]1", 0, ""},
259 {"activated_S#O_3_ring", "C1~[O,S]~[C,N,O,S]1[a,N,O,S]", 0, ""},
260 {"activated_acetylene",
261 "[$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O))]"
262 "C#[C;!$(C-N);!$(C-n)]",
263 0, ""},
264 {"activated_diazo",
265 "[N;!R]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$("
266 "C(=O))])=[N;!R]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](="
267 "O)[O-]),$(C(=O))])",
268 0, ""},
269 {"activated_vinyl_ester",
270 "O=COC=[$(C(S(=O)(=O))),$(C(C(F)(F)(F))),$(C(C#N)),$(C(N(=O)(=O))),$(C([N+"
271 "](=O)[O-])),$(C(C(=O)));!$(C(N))]",
272 0, ""},
273 {"activated_vinyl_sulfonate",
274 "O(-S(=O)(=O))C=[$(C(S(=O)(=O))),$(C(C(F)(F)(F))),$(C(C#N)),$(C(N(=O)(=O))"
275 "),$(C([N+](=O)[O-])),$(C(C(=O)));!$(C(N))]",
276 0, ""},
277 {"acyclic_imide", "[C,c][C;!R](=O)[N;!R][C;!R](=O)[C,c]", 0, ""},
278 {"acyl_123_triazole", "[#7;R1]1~[#7;R1]~[#7;R1](-C(=O))~[#6]~[#6]1", 0, ""},
279 {"acyl_134_triazole", "[#7]1~[#7]~[#6]~[#7](-C(=O)[!N])~[#6]1", 0, ""},
280 {"acyl_activated_NO", "O=C(-[!N])O[$([#7;+]),$(N(C=[O,S,N])(C=[O,S,N]))]",
281 0, ""},
282 {"acyl_cyanide", "C(=O)-C#N", 0, ""},
283 {"acyl_imidazole",
284 "[C;!$(C-N)](=O)[#7]1[#6;H1,$([#6]([*;!R]))][#7][#6;H1,$([#6]([*;!R]))][#"
285 "6;H1,$([#6]([*;!R]))]1",
286 0, ""},
287 {"acyl_pyrazole",
288 "[C;!$(C-N)](=O)[#7]1[#7][#6;H1,$([#6]([*;!R]))][#6;H1,$([#6]([*;!R]))][#"
289 "6;H1,$([#6]([*;!R]))]1",
290 0, ""},
291 {"aldehyde", "[C,c][C;H1](=O)", 0, ""},
292 {"alpha_dicarbonyl", "C(=O)!@C(=O)", 0, ""},
293 {"alpha_halo_EWG",
294 "[$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-])]-[CH,CH2]-[Cl,Br,I,$("
295 "O(S(=O)(=O)))]",
296 0, ""},
297 {"alpha_halo_amine",
298 "[F,Cl,Br,I,$(O(S(=O)(=O)))]-[CH,CH2;!$(C(F)(F))]-[N,n]", 0, "Edited"},
299 {"alpha_halo_carbonyl", "C(=O)([CH,CH2][Cl,Br,I,$(O(S(=O)(=O)))])", 0, ""},
300 {"alpha_halo_heteroatom",
301 "[N,n,O,S;!$(S(=O)(=O))]-[CH,CH2;!$(C(F)(F))][F,Cl,Br,I,$(O(S(=O)(=O)))]",
302 0, ""},
303 {"alpha_halo_heteroatom_tert",
304 "[N,n,O,S;!$(S(=O)(=O))]-C([Cl,Br,I,$(O(S(=O)(=O)))])(C)(C)", 0, ""},
305 {"anhydride",
306 "[$(C(=O)),$(C(=S))]-[O,S]-[$(C(=O)),$(C(=S)),$(C(=[N;!R])),$(C(=N(-[C;X4]"
307 ")))]",
308 0, ""},
309 {"aryl_phosphonate", "P(=O)-[O;!R]-a", 0, ""},
310 {"aryl_thiocarbonyl", "a-[S;X2;!R]-[C;!R](=O)", 0, ""},
311 {"azide", "[$(N#[N+]-[N-]),$([N-]=[N+]=N)]", 0, ""},
312 {"aziridine_diazirine", "[C,N]1~[C,N]~N~1", 0, ""},
313 {"azo_amino", "[N]=[N;!R]-[N]", 0, ""},
314 {"azo_aryl", "c[N;!R;!+]=[N;!R;!+]-c", 0, ""},
315 {"azo_filter1", "[N;!R]=[N;!R]-[N]=[*]", 0, ""},
316 {"azo_filter2",
317 "[N;!$(N-S(=O)(=O));!$(N-C=O)]-[N;!r3;!$(N-S(=O)(=O));!$(N-C=O)]-[N;!$(N-"
318 "S(=O)(=O));!$(N-C=O)]",
319 0, ""},
320 {"azo_filter3", "[N;!R]-[N;!R]-[N;!R]", 0, ""},
321 {"azo_filter4", "a-N=N-[N;H2]", 0, ""},
322 {"bad_boron", "[B-,BH2,BH3,$(B(F)(F))]", 0, ""},
323 {"bad_cations", "[C+,F+,Cl+,Br+,I+,Se+]", 0, ""},
324 {"benzidine_like", "c([N;!+])1ccc(c2ccc([N;!+])cc2)cc1", 0, ""},
325 {"beta_lactone", "[#6,#15,#16]1(=O)~[#6]~[#6]~[#8,#16]1", 0, ""},
326 {"betalactam", "C1(=O)~[#6]~[#6]N1", 0, ""},
327 {"betalactam_EWG",
328 "C1(=O)~[#6]~[#6]N1([$(S(=O)(=O)[C,c,O&D2]),$(C(F)(F)(F)),$(C#N),$(N(=O)(="
329 "O)),$([N+](=O)[O-]),$(C(=O)[C,c,O&D2])])",
330 0, ""},
331 {"bis_activated_aryl_ester",
332 "O=[C,S]Oc1aaa([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)["
333 "O-]),$(C(=O)O),$(C(=O)N)])aa([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(="
334 "O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])1",
335 0, ""},
336 {"bis_keto_olefin",
337 "CC(=O)[$([C&H1]),$(C-F),$(C-Cl),$(C-Br),$(C-I)]=[$([C&H1]),$(C-F),$(C-Cl)"
338 ",$(C-Br),$(C-I)]C(=O)C",
339 0, ""},
340 {"boron_warhead", "[C,c]~[#5]", 0, ""},
341 {"branched_polycyclic_aromatic", "a1(a2aa(a3aaaaa3)aa(a4aaaaa4)a2)aaaaa1",
342 0, ""},
343 {"carbodiimide_iso#thio#cyanate", "N=C=[N,O,S]", 0, ""},
344 {"carbonyl_halide", "O=C[F,Cl,Br,I]", 0, ""},
345 {"contains_metal",
346 "[$([Ru]),$([#45]),$([Se]),$([se]),$([Pd]),$([#21]),$([Bi]),$([Sb]),$([Ag]"
347 "),$([Ti]),$([Al]),$([Cd]),$([V]),$([In]),$([#24]),$([#50]),$([Mn]),$([La]"
348 "),$([Fe]),$([Er]),$([Tm]),$([Yb]),$([Lu]),$([Hf]),$([Ta]),$([W]),$([Re]),"
349 "$([#27]),$([#76]),$([Ni]),$([Ir]),$([Cu]),$([Zn]),$([Ga]),$([Ge]),$([As])"
350 ",$([Y]),$([Zr]),$([Nb]),$([Ce]),$([#59]),$([Nd]),$([Sm]),$([Eu]),$([Gd]),"
351 "$([Tb]),$([Dy]),$([#67]),$([Pt]),$([Au]),$([Hg]),$([Tl]),$([Pb]),$([Ac]),"
352 "$([Th]),$([Pa]),$([Mo]),$([U]),$([Tc]),$([Te]),$([#84]),$([At])]",
353 0, "Edited"},
354 {"crown_ether",
355 "[$([O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,"
356 "r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,"
357 "r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,"
358 "r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,"
359 "r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]),$([O,S,#7;R1;r9,"
360 "r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,"
361 "r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,"
362 "r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,"
363 "r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;"
364 "r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,"
365 "r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]),$([O,"
366 "S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,"
367 "r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]["
368 "O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,"
369 "r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]["
370 "CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,"
371 "r13,r14,r15,r16,r17,r18])]",
372 0, ""},
373 {"cyano_phosphonate", "P(O[A,a])(O[A,a])(=O)C#N", 0, ""},
374 {"cyanohydrin", "[C;X4](-[OH,NH1,NH2,SH])(-C#N)", 0, ""},
375 {"diamino_sulfide", "[N,n]~[S;!R;D2]~[N,n]", 0, ""},
376 {"diazo_carbonyl", "[$(N=N=C~C=O),$(N#N-C~C=O)]", 0, ""},
377 {"diazonium", "a[N+]#N", 0, ""},
378 {"dicarbonyl_sulfonamide",
379 "[$(N(-C(=O))(-C(=O))(-S(=O))),$(n([#6](=O))([#6](=O))([#16](=O)))]", 0,
380 ""},
381 {"disulfide_acyclic", "[S;!R;X2]-[S;!R;X2]", 0, ""},
382 {"disulfonyliminoquinone", "S(=O)(=O)N=C1C=CC(=NS(=O)(=O))C=C1", 0, ""},
383 {"double_trouble_warhead", "NC(C[S;D1])C([N;H1]([O;D1]))=O", 0, ""},
384 {"flavanoid", "O=C2CC(a3aaaaa3)Oa1aaaaa12", 0, ""},
385 {"four_nitriles", "C#N.C#N.C#N.C#N", 0, ""},
386 {"gte_10_carbon_sb_chain",
387 "[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]-[C;!R]", 0,
388 ""},
389 {"gte_2_N_quats", "[N,n;H0;+;!$(N~O);!$(n~O)].[N,n;H0;+;!$(N~O);!$(n~O)]",
390 0, ""},
391 {"gte_2_free_phos", "P([O;D1])=O.P([O;D1])=O", 0, ""},
392 {"gte_2_sulfonic_acid", "[C,c]S(=O)(=O)[O;D1].[C,c]S(=O)(=O)[O;D1]", 0, ""},
393 {"gte_3_COOH", "C(=O)[O;D1].C(=O)[O;D1].C(=O)[O;D1]", 0, ""},
394 {"gte_3_iodine", "[#53].[#53].[#53]", 0, ""},
395 {"gte_4_basic_N",
396 "[N;!$(N(=[N,O,S,C]));!$(N(S(=O)(=O)));!$(N(C(F)(F)(F)));!$(N(C#N));!$(N("
397 "C(=O)));!$(N(C(=S)));!$(N(C(=N)));!$(N(#C));!$(N-c)].[N;!$(N(=[N,O,S,C]))"
398 ";!$(N(S(=O)(=O)));!$(N(C(F)(F)(F)));!$(N(C#N));!$(N(C(=O)));!$(N(C(=S)));"
399 "!$(N(C(=N)));!$(N(#C));!$(N-c)].[N;!$(N(=[N,O,S,C]));!$(N(S(=O)(=O)));!$("
400 "N(C(F)(F)(F)));!$(N(C#N));!$(N(C(=O)));!$(N(C(=S)));!$(N(C(=N)));!$(N(#C)"
401 ");!$(N-c)].[N;!$(N(=[N,O,S,C]));!$(N(S(=O)(=O)));!$(N(C(F)(F)(F)));!$(N("
402 "C#N));!$(N(C(=O)));!$(N(C(=S)));!$(N(C(=N)));!$(N(#C));!$(N-c)]",
403 0, ""},
404 {"gte_4_nitro",
405 "[$([N+](=O)[O-]),$(N(=O)=O)].[$([N+](=O)[O-]),$(N(=O)=O)].[$([N+](=O)[O-]"
406 "),$(N(=O)=O)].[$([N+](=O)[O-]),$(N(=O)=O)]",
407 0, ""},
408 {"gte_5_phenolic_OH", "a[O;D1].a[O;D1].a[O;D1].a[O;D1].a[O;D1]", 0, ""},
409 {"gte_7_aliphatic_OH",
410 "C[O;D1].C[O;D1].C[O;D1].C[O;D1].C[O;D1].C[O;D1].C[O;D1]", 0, ""},
411 {"gte_7_total_hal",
412 "[Cl,Br,I].[Cl,Br,I].[Cl,Br,I].[Cl,Br,I].[Cl,Br,I].[Cl,Br,I].[Cl,Br,I]", 0,
413 ""},
414 {"gte_8_CF2_or_CH2",
415 "[CH2,$(C(F)(F));R0][CH2,$(C(F)(F));R0][CH2,$(C(F)(F));R0][CH2,$(C(F)(F));"
416 "R0][CH2,$(C(F)(F));R0][CH2,$(C(F)(F));R0][CH2,$(C(F)(F));R0][CH2,$(C(F)("
417 "F));R0]",
418 0, "Edited"},
419 {"halo_5heterocycle_bis_EWG",
420 "[#7,#8,#16]1[#6]([$(S(=O)(=O)),$([F,Cl]),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)"
421 "),$([N+](=O)[O-]),$(C(=O))])[#6]([$(S(=O)(=O)),$([F,Cl]),$(C(F)(F)(F)),$("
422 "C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O))])[#7][#6]1([Cl,Br,I])",
423 0, ""},
424 {"halo_acrylate",
425 "[$([C;H2]),$([C&H1;$(C-F)]),$([C&H1;$(C-Cl)]),$([C&H1;$(C-Br)]),$([C&H1;$"
426 "(C-I)]),$(C(F)F),$(C(Cl)Cl),$(C(Br)Br),$(C(I)I),$(C(F)Cl),$(C(F)Br),$(C("
427 "F)I),$(C(Cl)Br),$(C(Br)I)](=[$([C&H1;$(C(-C(=O)))]),$(C(F)(C(=O))),$(C("
428 "Cl)(C(=O))),$(C(Br)(C(=O))),$(C(I)(C(=O))),$(C(C)(C(=O))),$(C(c)(C(=O)))]"
429 ")",
430 0, ""},
431 {"halo_imino", "C(=[#7])([Cl,Br,I,$(O(S(=O)(=O)))])", 0, ""},
432 {"halo_olefin_bis_EWG",
433 "C([Cl,Br,I,$(O(S(=O)(=O)))])=C([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)"
434 "(=O)),$([N+](=O)[O-]),$(C=O)])([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)"
435 "(=O)),$([N+](=O)[O-]),$(C=O)])",
436 0, ""},
437 {"halo_phenolic_carbonyl",
438 "C(=O)Oc1c([Cl,F])[cH1,$(c[F,Cl])]c([F,Cl])[cH1,$(c[F,Cl])]c1([F,Cl])", 0,
439 ""},
440 {"halo_phenolic_sulfonyl",
441 "S(=O)Oc1c([Cl,F])[cH1,$(c[F,Cl])]c([F,Cl])[cH1,$(c[F,Cl])]c1([F,Cl])", 0,
442 ""},
443 {"halogen_heteroatom", "[!C;!c;!H][F,Cl,Br,I]", 0, ""},
444 {"hetero_silyl", "[Si]~[!#6]", 0, ""},
445 {"hydrazine",
446 "[N;X3;!$(N-S(=O)(=O));!$(N-C(F)(F)(F));!$(N-C#N);!$(N-C(=O));!$(N-C(=S));"
447 "!$(N-C(=N))]-[N;X3;!$(N-S(=O)(=O));!$(N-C(F)(F)(F));!$(N-C#N);!$(N-C(=O))"
448 ";!$(N-C(=S));!$(N-C(=N))]",
449 0, ""},
450 {"hydrazothiourea", "[N;!R]=NC(=S)N", 0, ""},
451 {"hydroxamate_warhead", "C([N;H1]([O;D1]))=O", 0, ""},
452 {"hyperval_sulfur", "[$([#16&D3]),$([#16&D4])]=,:[#6]", 0, ""},
453 {"isonitrile", "[N+]#[C-]", 0, ""},
454 {"keto_def_heterocycle",
455 "[$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naaaa1),$(c([C;!R;!$(C-[N,O,S]);"
456 "!$(C-[H])](=O))1naa[n,s,o]1)]",
457 0, ""},
458 {"linear_polycyclic_aromatic_I",
459 "[$(a12aaaaa1aa3a(aa(aaaa4)a4a3)a2),$(a12aaaaa1aa3a(aaa4a3aaaa4)a2),$("
460 "a12aaaaa1a(aa5)a3a(aaa4a3a5aaa4)a2)]",
461 0, ""},
462 {"linear_polycyclic_aromatic_II",
463 "[$(a12aaaa4a1a3a(aaaa3aa4)aa2),$(a12aaaaa1a3a(aaa4a3aaaa4)aa2),$(a1(a("
464 "aaaa4)a4a3a2aaaa3)a2aaaa1)]",
465 0, ""},
466 {"maleimide_etc",
467 "[$([C;H1]),$(C(-[F,Cl,Br,I]))]1=[$([C;H1]),$(C(-[F,Cl,Br,I]))]C(=O)[N,O,"
468 "S]C(=O)1",
469 0, ""},
470 {"meldrums_acid_deriv", "O=C1OC(C)(C)OC(C1)=O", 0, ""},
471 {"monofluoroacetate", "[C;H2](F)C(=O)[O,N,S]", 0, ""},
472 {"nitrone", "[C;!R]=[N+][O;D1]", 0, ""},
473 {"nitrosamine", "N-[N;X2](=O)", 0, ""},
474 {"non_ring_CH2O_acetal", "[O,N,S;!$(S~O)]!@[CH2]!@[O,S,N;!$(S~O)]", 0, ""},
475 {"non_ring_acetal", "[O,N,S;!$(S~O)]!@[C;H1;X4]!@[O,N,S;!$(S~O)]", 0, ""},
476 {"non_ring_ketal", "[O,N,S;!$(S~O)]!@[C;H0;X4](!@[O,N,S;!$(S~O)])(C)", 0,
477 ""},
478 {"ortho_hydroiminoquinone", "c1c([N;D1])c([N;D1])c[cH1][cH1]1", 0, ""},
479 {"ortho_hydroquinone", "a1c([O,S;D1])c([O,S;D1])a[cH1][cH1]1", 0, ""},
480 {"ortho_nitrophenyl_carbonyl",
481 "[#6]1(-O-[C;!R](=[O,N;!R]))[#6]([$(N(=O)(=O)),$([N+](=O)[O-])])[#6][#6][#"
482 "6][#6]1",
483 0, ""},
484 {"ortho_quinone",
485 "[CH1,$(C(-[Cl,Br,I]))]1=CC(=[O,N,S;!R])C(=[O,N,S])C=[CH1,$(C(-[Cl,Br,I]))"
486 "]1",
487 0, ""},
488 {"oxaziridine", "C1~[O,S]~N1", 0, ""},
489 {"oxime", "[$(C=N[O;D1]);!$(C=[N+])][#6][#6]", 0, ""},
490 {"oxonium", "[o+,O+]", 0, ""},
491 {"para_hydroiminoquinone", "a1[cH1]c([N;D1])[cH1]ac([N;D1])1", 0, ""},
492 {"para_hydroquinone", "a1[cH1]c([O,S;D1])[cH1]ac([O,S;D1])1", 0, ""},
493 {"para_nitrophenyl_ester",
494 "[#6]1(-O(-[C;!R](-[!N])(=[O,N;!R])))[#6][#6][#6]([$(N(=O)(=O)),$([N+](=O)"
495 "[O-])])[#6][#6]1",
496 0, ""},
497 {"para_quinone",
498 "[CH1,$(C(-[Cl,Br,I]))]1=[CH1,$(C(-[Cl,Br,I]))]C(=[O,N,S])[CH1,$(C(-[Cl,"
499 "Br,I]))]=[CH1,$(C(-[Cl,Br,I]))]C1(=[O,N,S])",
500 0, ""},
501 {"paraquat_like",
502 "[#6]1[#6][#6]([#6]2[#6][#6][#7;+][#6][#6]2)[#6][#6][#7;+]1", 0, ""},
503 {"pentafluorophenylester", "C(=O)Oc1c(F)c(F)c(F)c(F)c1(F)", 0, ""},
504 {"perchloro_cp", "C1(Cl)(Cl)C(Cl)C(Cl)=C(Cl)C1(Cl)", 0, ""},
505 {"perhalo_dicarbonyl_phenyl",
506 "c1(C=O)c([Br,Cl,I])c([Br,Cl,I])c([Br,Cl,I])c([Br,Cl,I])c1(C=O)", 0, ""},
507 {"perhalo_phenyl",
508 "c1c([Br,Cl,I])c([Br,Cl,I])c([Br,Cl,I])c([Br,Cl,I])c1([Br,Cl,I])", 0, ""},
509 {"peroxide", "[#8]~[#8]", 0, ""},
510 {"phenolate_bis_EWG",
511 "O=[C,S]Oc1aaa([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)["
512 "O-]),$(C(=O)O),$(C(=O)N)])aa([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(="
513 "O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])1",
514 0, ""},
515 {"phos_serine_warhead", "NC(COP(O)(O)=O)C(O)=O", 0, ""},
516 {"phos_threonine_warhead", "NC(C(C)OP(O)(O)=O)C(O)=O", 0, ""},
517 {"phos_tyrosine_warhead", "NC(Cc1ccc(OP(O)(O)=O)cc1)C(O)=O", 0, ""},
518 {"phosphite", "[c,C]-[P;v3]", 0, ""},
519 {"phosphonium", "[#15;+]~[!O]", 0, ""},
520 {"phosphorane", "C=P", 0, ""},
521 {"phosphorous_nitrogen_bond", "[#15]~[N,n]", 0, ""},
522 {"phosphorus_phosphorus_bond", "P~P", 0, ""},
523 {"phosphorus_sulfur_bond", "P~S", 0, ""},
524 {"polyene", "C=[C;!R][C;!R]=[C;!R][C;!R]=[C;!R]", 0, ""},
525 {"polyhalo_phenol_a",
526 "c1c([O;D1])c(-[Cl,Br,I])c(-[Cl,Br,I])cc1.c1c([O;D1])c(-[Cl,Br,I])c(-[Cl,"
527 "Br,I])cc1",
528 0, ""},
529 {"polyhalo_phenol_b",
530 "c1c([O;D1])c(-[Cl,Br,I])cc(-[Cl,Br,I])c1.c1c([O;D1])c(-[Cl,Br,I])cc(-[Cl,"
531 "Br,I])c1",
532 0, ""},
533 {"polyhalo_phenol_c",
534 "c1c([O;D1])ccc(-[Cl,Br,I])c(-[Cl,Br,I])1.c1c([O;D1])ccc(-[Cl,Br,I])c(-["
535 "Cl,Br,I])1",
536 0, ""},
537 {"polyhalo_phenol_d",
538 "c(-[Cl,Br,I])1c([O;D1])c(-[Cl,Br,I])ccc1.c(-[Cl,Br,I])1c([O;D1])c(-[Cl,"
539 "Br,I])ccc1",
540 0, ""},
541 {"polyhalo_phenol_e",
542 "c1c([O;D1])ccc(-[Cl,Br,I])c(-[Cl,Br,I])1.c1c([O;D1])ccc(-[Cl,Br,I])c(-["
543 "Cl,Br,I])1",
544 0, ""},
545 {"polysulfide", "[S;D2]-[S;D2]-[S;D2]", 0, ""},
546 {"porphyrin", "[#6;r16,r17,r18]~[#6]1~[#6]~[#6]~[#6](~[#6])~[#7]1", 0, ""},
547 {"primary_halide_sulfate",
548 "[CH2][Cl,Br,I,$(O(S(=O)(=O)[!$(N);!$([O&D1])]))]", 0, ""},
549 {"quat_N_N", "[N,n;R;+]!@[N,n]", 0, ""},
550 {"quat_N_acyl", "[N,n;+]!@C(=O)", 0, ""},
551 {"quinone_methide",
552 "[#6;!$([#6](-[N,O,S]))]1=[#6;!$([#6](-[N,O,S]))][#6](=[#6])[#6;!$([#6](-["
553 "N,O,S]))]=[#6;!$([#6](-[N,O,S]))][#6]1(=[O,N,S])",
554 0, ""},
555 {"rhodanine", "C(=C)1SC(=S)NC(=O)1", 0, ""},
556 {"secondary_halide_sulfate",
557 "[CH;!$(C=C)][Cl,Br,I,$(O(S(=O)(=O)[!$(N);!$([O&D1])]))]", 0, ""},
558 {"sulf_D2_nitrogen",
559 "[S;D2](-[N;!$(N(=C));!$(N(-S(=O)(=O)));!$(N(-C(=O)))])", 0, ""},
560 {"sulf_D2_oxygen_D2", "[S;D2][O;D2]", 0, ""},
561 {"sulf_D3_nitrogen", "[S;D3](-N)(-[c,C])(-[c,C])", 0, ""},
562 {"sulfite_sulfate_ester", "[C,c]OS(=O)O[C,c]", 0, ""},
563 {"sulfonium", "[S+;X3;$(S-C);!$(S-[O;D1])]", 0, ""},
564 {"sulfonyl_anhydride", "[$(C(=O)),$(S(=O)(=O))][O,S](S(=O)(=O))", 0, ""},
565 {"sulfonyl_halide", "S(=O)(=O)[F,Cl,Br,I]", 0, ""},
566 {"sulfonyl_heteroatom", "[!#6;!#1;!#11;!#19]O(S(=O)(=O)(-[C,c]))", 0, ""},
567 {"sulphonyl_cyanide", "S(=O)(=O)C#N", 0, ""},
568 {"tertiary_halide_sulfate",
569 "[C;X4](-[Cl,Br,I,$(O(S(=O)(=O)[!$(N);!$([O&D1])]))])(-[c,C])(-[c,C])(-[c,"
570 "C])",
571 0, ""},
572 {"thio_hydroxamate", "[S;D2]([$(N(=C)),$(N(-S(=O)(=O))),$(N(-C(=O)))])", 0,
573 ""},
574 {"thio_xanthate", "[S;!R]-[C;!R](=[S;!R])(-[S;!R])", 0, ""},
575 {"thiocarbonate", "SC(=O)[O,S]", 0, ""},
576 {"thioester", "[S;!R;H0]C(=[S,O;!R])([!O;!S;!N])", 0, ""},
577 {"thiol_warhead", "NC(C[S;D1])C(O)=O", 0, ""},
578 {"thiopyrylium", "c1[S,s;+]cccc1", 0, ""},
579 {"thiosulfoxide", "[C,c][S;X3](~O)-S", 0, ""},
580 {"triamide",
581 "[$(N(-C(=O))(-C(=O))(-C(=O))),$(n([#6](=O))([#6](=O))([#6](=O)))]", 0,
582 ""},
583 {"triaryl_phosphine_oxide", "P(=O)(a)(a)(a)", 0, ""},
584 {"trichloromethyl_ketone",
585 "[$(C(=O));!$(C-N);!$(C-O);!$(C-S)]C(Cl)(Cl)(Cl)", 0, ""},
586 {"triflate", "OS(=O)(=O)(C(F)(F)(F))", 0, ""},
587 {"trifluoroacetate_ester", "C(F)(F)(F)C(=O)O", 0, ""},
588 {"trifluoroacetate_thioester", "C(F)(F)(F)C(=O)S", 0, ""},
589 {"trifluoromethyl_ketone", "[$(C(=O));!$(C-N);!$(C-O);!$(C-S)]C(F)(F)(F)",
590 0, ""},
591 {"trihalovinyl_heteroatom",
592 "C(-[Cl,Br,I])(-[Cl,Br,I])=C(-[Cl,Br,I])(-[N,O,S])", 0, ""},
593 {"trinitro_aromatic",
594 "[$(a1aaa([$(N(=O)(=O)),$([N+](=O)[O-])])a([$(N(=O)(=O)),$([N+](=O)[O-])])"
595 "a1([$(N(=O)(=O)),$([N+](=O)[O-])])),$(a1aa([$(N(=O)(=O)),$([N+](=O)[O-])]"
596 ")a([$(N(=O)(=O)),$([N+](=O)[O-])])aa1([$(N(=O)(=O)),$([N+](=O)[O-])])),$("
597 "a1a([$(N(=O)(=O)),$([N+](=O)[O-])])aa([$(N(=O)(=O)),$([N+](=O)[O-])])aa1("
598 "[$(N(=O)(=O)),$([N+](=O)[O-])]))]",
599 0, ""},
600 {"trinitromethane_derivative",
601 "C([$([N+](=O)[O-]),$(N(=O)=O)])([$([N+](=O)[O-]),$(N(=O)=O)])([$([N+](=O)"
602 "[O-]),$(N(=O)=O)])",
603 0, ""},
604 {"tris_activated_aryl_ester",
605 "[$(O=[C,S]Oc1a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](="
606 "O)[O-]),$(C(=O)O),$(C(=O)N)])a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(="
607 "O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])a([$(S(=O)(=O)),F,$(C(F)(F)("
608 "F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])aa1),$(O=[C,"
609 "S]Oc1a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),"
610 "$(C(=O)O),$(C(=O)N)])a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),"
611 "$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])aaa([$(S(=O)(=O)),F,$(C(F)(F)(F)),$("
612 "C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])1),$(O=[C,S]Oc1a(["
613 "$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)"
614 "O),$(C(=O)N)])aa([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+]"
615 "(=O)[O-]),$(C(=O)O),$(C(=O)N)])a([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$("
616 "N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])a1),$(O=[C,S]Oc1a([$(S(="
617 "O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$(C(=O)O),$("
618 "C(=O)N)])aa([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)["
619 "O-]),$(C(=O)O),$(C(=O)N)])aa([$(S(=O)(=O)),F,$(C(F)(F)(F)),$(C#N),$(N(=O)"
620 "(=O)),$([N+](=O)[O-]),$(C(=O)O),$(C(=O)N)])1)]",
621 0, ""},
622 {"trisub_bis_act_olefin",
623 "[CH;!R;!$(C-N)]=C([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+]("
624 "=O)[O-]),$(C(=O))])([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+"
625 "](=O)[O-]),$(C(=O))])",
626 0, "Edited"},
627 {"vinyl_carbonyl_EWG",
628 "[C;!R]([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$("
629 "C=O)])([$(S(=O)(=O)),$(C(F)(F)(F)),$(C#N),$(N(=O)(=O)),$([N+](=O)[O-]),$("
630 "C=O)])=[C;!R]([C;!R](=O))([!$([#8]);!$([#7])])",
631 0, ""}};
632 const unsigned int NUM_NIH =
633 static_cast<unsigned int>(sizeof(NIH) / sizeof(FilterData_t));
634
635 const FilterProperty_t NIH_PROPS[] = {
636 {"FilterSet", "NIH"},
637 {"Scope", "annotate compounds with problematic functional groups"},
638 {"Reference",
639 "Doveston R, et al. A Unified Lead-oriented Synthesis of over Fifty "
640 "Molecular Scaffolds. Org Biomol Chem 13 (2014) 859D65. "
641 "doi:10.1039/C4OB02287D."},
642 {"Reference",
643 "Jadhav A, et al. Quantitative Analyses of Aggregation, Autofluorescence, "
644 "and Reactivity Artifacts in a Screen for Inhibitors of a Thiol Protease. "
645 "J Med Chem 53 (2009) 37D51. doi:10.1021/jm901070c."}};
646 const unsigned int NUM_NIH_PROPS =
647 static_cast<unsigned int>(sizeof(NIH_PROPS) / sizeof(FilterProperty_t));
648
649 /////////////////////////////////////////////////////////////////////////////////////////
650 // PAINS_A data
651 // # Reference: Baell JB, Holloway GA. New Substructure Filters for Removal of
652 // Pan Assay Interference Compounds (PAINS) from Screening Libraries and for
653 // Their Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40.
654 // doi:10.1021/jm901137j.
655 // # Scope: PAINS filters (family A)
656 //
657 #include "pains_a.in"
658
659
660 const unsigned int NUM_PAINS_A =
661 static_cast<unsigned int>(sizeof(PAINS_A) / sizeof(FilterData_t));
662
663 const FilterProperty_t PAINS_A_PROPS[] = {
664 {"FilterSet", "PAINS_A"},
665 {"Reference",
666 "Baell JB, Holloway GA. New Substructure Filters for Removal of Pan Assay "
667 "Interference Compounds (PAINS) from Screening Libraries and for Their "
668 "Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40. "
669 "doi:10.1021/jm901137j."},
670 {"Scope", "PAINS filters (family A)"}};
671 const unsigned int NUM_PAINS_A_PROPS =
672 static_cast<unsigned int>(sizeof(PAINS_A_PROPS) / sizeof(FilterProperty_t));
673
674 /////////////////////////////////////////////////////////////////////////////////////////
675 // PAINS_B data
676 // # Reference: Baell JB, Holloway GA. New Substructure Filters for Removal of
677 // Pan Assay Interference Compounds (PAINS) from Screening Libraries and for
678 // Their Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40.
679 // doi:10.1021/jm901137j.
680 // # Scope: PAINS filters (family B)
681 // # sulfonamide_B(41) c:1:c:c(:c:c:c:1-[#8]-[#1])-[#7](-[#1])-[#16](=[#8])=[#8]
682 // 0
683 // # sulfonamide_B(41) [N;H1](c1ccc([O;H1])cc1)S(=O)=O 0
684 // # imidazole_A(19)
685 // n:1:c(:n(:c(:c:1-c:2:c:c:c:c:c:2)-c:3:c:c:c:c:c:3)-[#1])-[#6]:,=[!#1] 0
686 //
687 #include "pains_b.in"
688
689
690 const unsigned int NUM_PAINS_B =
691 static_cast<unsigned int>(sizeof(PAINS_B) / sizeof(FilterData_t));
692
693 const FilterProperty_t PAINS_B_PROPS[] = {
694 {"FilterSet", "PAINS_B"},
695 {"Reference",
696 "Baell JB, Holloway GA. New Substructure Filters for Removal of Pan Assay "
697 "Interference Compounds (PAINS) from Screening Libraries and for Their "
698 "Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40. "
699 "doi:10.1021/jm901137j."},
700 {"Scope", "PAINS filters (family B)"},
701 };
702 const unsigned int NUM_PAINS_B_PROPS =
703 static_cast<unsigned int>(sizeof(PAINS_B_PROPS) / sizeof(FilterProperty_t));
704
705 /////////////////////////////////////////////////////////////////////////////////////////
706 // PAINS_C data
707 // # Reference: Baell JB, Holloway GA. New Substructure Filters for Removal of
708 // Pan Assay Interference Compounds (PAINS) from Screening Libraries and for
709 // Their Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40.
710 // doi:10.1021/jm901137j.
711 // # Scope: PAINS filters (family C)
712 //
713 #include "pains_c.in"
714
715
716 const unsigned int NUM_PAINS_C =
717 static_cast<unsigned int>(sizeof(PAINS_C) / sizeof(FilterData_t));
718
719 const FilterProperty_t PAINS_C_PROPS[] = {
720 {"FilterSet", "PAINS_C"},
721 {"Reference",
722 "Baell JB, Holloway GA. New Substructure Filters for Removal of Pan Assay "
723 "Interference Compounds (PAINS) from Screening Libraries and for Their "
724 "Exclusion in Bioassays. J Med Chem 53 (2010) 2719D40. "
725 "doi:10.1021/jm901137j."},
726 {"Scope", "PAINS filters (family C)"}};
727 const unsigned int NUM_PAINS_C_PROPS =
728 static_cast<unsigned int>(sizeof(PAINS_C_PROPS) / sizeof(FilterProperty_t));
729
730 /////////////////////////////////////////////////////////////////////////////////////////
731 // ZINC data
732 // # Reference: http://blaster.docking.org/filtering/
733 // # Scope: drug-likeness and unwanted functional group filters
734 //
735
736 const FilterData_t ZINC[] = {
737 {"Non-Hydrogen_atoms", "[a,A]", 40, ""},
738 {"carbons", "[#6]", 40, ""},
739 {"N,O,S", "[#7,#8,#16]", 20, ""},
740 {"Sulfonyl_halides", "S(=O)(=O)[Cl,Br]", 1, ""},
741 {"Acid_halides", "[S,C](=[O,S])[F,Br,Cl,I]", 1, ""},
742 {"Alkyl_halides", "[Br,Cl,I][CX4;CH,CH2]", 1, ""},
743 {"Phosphenes", "cPc", 0, ""},
744 {"Heptanes", "[CD1][CD2][CD2][CD2][CD2][CD2][CD2]", 0, ""},
745 {"Perchlorates", "OCl(O)(O)(O)", 0, ""},
746 {"Fluorines", "F", 7, ""},
747 {"Cl,Br,I", "[Cl,Br,I]", 6, ""},
748 {"Carbazides", "O=CN=[N+]=[N-]", 0, ""},
749 {"Acid_anhydrides", "C(=O)OC(=O)", 0, ""},
750 {"Peroxides", "OO", 0, ""},
751 {"Iso(thio)cyanates", "N=C=[S,O]", 1, ""},
752 {"Thiocyanates", "SC#N", 1, ""},
753 {"Phosphoranes", "C=P", 0, ""},
754 {"P/S_halides", "[P,S][Cl,Br,F,I]", 0, ""},
755 {"Cyanohydrines", "N#CC[OH]", 0, ""},
756 {"Carbazides", "O=CN=[N+]=[N-]", 0, ""},
757 {"Sulfate_esters", "COS(=O)O[C,c]", 1, ""},
758 {"Sulfonates", "COS(=O)(=O)[C,c]", 1, ""},
759 {"Pentafluorophenyl_esters", "C(=O)Oc1c(F)c(F)c(F)c(F)c1(F)", 0, ""},
760 {"Paranitrophenyl_esters", "C(=O)Oc1ccc(N(=O)=O)cc1", 0, ""},
761 {"HOBt_esters", "C(=O)Onnn", 0, ""},
762 {"Triflates", "OS(=O)(=O)C(F)(F)F", 0, ""},
763 {"Lawesson's_reagents", "P(=S)(S)S", 0, ""},
764 {"Phosphoramides", "NP(=O)(N)N", 0, ""},
765 {"Aromatic_azides", "cN=[N+]=[N-]", 0, ""},
766 {"Quaternary_C,Cl,I,P,S", "[C+,Cl+,I+,P+,S+]", 2, ""},
767 {"Beta_carbonyl_quaternary_N", "C(=O)C[N+,n+]", 2, ""},
768 {"Acylhydrazides", "[N;R0][N;R0]C(=O)", 2, ""},
769 {"Chloramidines", "[Cl]C([C&R0])=N", 0, ""},
770 {"Isonitriles", "[N+]#[C-]", 0, ""},
771 {"Triacyloximes", "C(=O)N(C(=O))OC(=O)", 0, ""},
772 {"Acyl_cyanides", "N#CC(=O)", 0, ""},
773 {"Sulfonyl_cyanides", "S(=O)(=O)C#N", 0, ""},
774 {"Cyanophosphonates", "P(OCC)(OCC)(=O)C#N", 0, ""},
775 {"Azocyanamides", "[N;R0]=[N;R0]C#N", 0, ""},
776 {"Azoalkanals", "[N;R0]=[N;R0]CC=O", 0, ""},
777 {"(Thio)epoxides,aziridines", "C1[O,S,N]C1", 2, ""},
778 {"Benzylic_quaternary_N", "cC[N+]", 2, ""},
779 {"Thioesters", "C[O,S;R0][C;R0](=S)", 2, ""},
780 {"Diand_Triphosphates", "P(=O)([OH])OP(=O)[OH]", 3, ""},
781 {"Aminooxy(oxo)", "[#7]O[#6,#16]=O", 2, ""},
782 {"nitros", "N(~[OD1])~[OD1]", 2, ""},
783 {"Imines", "C=[N;R0]*", 2, ""},
784 {"Acrylonitriles", "N#CC=C", 2, ""},
785 {"Propenals", "C=CC(=O)[!#7;!#8]", 2, ""},
786 {"Quaternary_N", "[ND4+]", 1, ""}};
787 const unsigned int NUM_ZINC =
788 static_cast<unsigned int>(sizeof(ZINC) / sizeof(FilterData_t));
789
790 const FilterProperty_t ZINC_PROPS[] = {
791 {"FilterSet", "ZINC"},
792 {"Reference", "http://blaster.docking.org/filtering/"},
793 {"Scope", "drug-likeness and unwanted functional group filters"}};
794 const unsigned int NUM_ZINC_PROPS =
795 static_cast<unsigned int>(sizeof(ZINC_PROPS) / sizeof(FilterProperty_t));
796
797 ////////////////////////////////////////////////////////////////////////
798 // API
GetNumEntries(FilterCatalogParams::FilterCatalogs catalog)799 unsigned int GetNumEntries(FilterCatalogParams::FilterCatalogs catalog) {
800 switch (catalog) {
801 case FilterCatalogParams::BRENK:
802 return NUM_BRENK;
803 case FilterCatalogParams::NIH:
804 return NUM_NIH;
805 case FilterCatalogParams::PAINS_A:
806 return NUM_PAINS_A;
807 case FilterCatalogParams::PAINS_B:
808 return NUM_PAINS_B;
809 case FilterCatalogParams::PAINS_C:
810 return NUM_PAINS_C;
811 case FilterCatalogParams::ZINC:
812 return NUM_ZINC;
813 default:
814 return 0;
815 }
816 }
817
GetFilterData(FilterCatalogParams::FilterCatalogs catalog)818 const FilterData_t* GetFilterData(FilterCatalogParams::FilterCatalogs catalog) {
819 switch (catalog) {
820 case FilterCatalogParams::BRENK:
821 return BRENK;
822 case FilterCatalogParams::NIH:
823 return NIH;
824 case FilterCatalogParams::PAINS_A:
825 return PAINS_A;
826 case FilterCatalogParams::PAINS_B:
827 return PAINS_B;
828 case FilterCatalogParams::PAINS_C:
829 return PAINS_C;
830 case FilterCatalogParams::ZINC:
831 return ZINC;
832 default:
833 return nullptr;
834 }
835 }
836
GetNumPropertyEntries(FilterCatalogParams::FilterCatalogs catalog)837 unsigned GetNumPropertyEntries(FilterCatalogParams::FilterCatalogs catalog) {
838 switch (catalog) {
839 case FilterCatalogParams::BRENK:
840 return NUM_BRENK_PROPS;
841 case FilterCatalogParams::NIH:
842 return NUM_NIH_PROPS;
843 case FilterCatalogParams::PAINS_A:
844 return NUM_PAINS_A_PROPS;
845 case FilterCatalogParams::PAINS_B:
846 return NUM_PAINS_B_PROPS;
847 case FilterCatalogParams::PAINS_C:
848 return NUM_PAINS_C_PROPS;
849 case FilterCatalogParams::ZINC:
850 return NUM_ZINC_PROPS;
851 default:
852 return 0;
853 }
854 }
855
GetFilterProperties(FilterCatalogParams::FilterCatalogs catalog)856 const FilterProperty_t* GetFilterProperties(
857 FilterCatalogParams::FilterCatalogs catalog) {
858 switch (catalog) {
859 case FilterCatalogParams::BRENK:
860 return BRENK_PROPS;
861 case FilterCatalogParams::NIH:
862 return NIH_PROPS;
863 case FilterCatalogParams::PAINS_A:
864 return PAINS_A_PROPS;
865 case FilterCatalogParams::PAINS_B:
866 return PAINS_B_PROPS;
867 case FilterCatalogParams::PAINS_C:
868 return PAINS_C_PROPS;
869 case FilterCatalogParams::ZINC:
870 return ZINC_PROPS;
871 default:
872 return nullptr;
873 }
874 }
875 }
876