ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/forceFields/SMARTS_InteLigand_051110.txt
Revision: 2459
Committed: Mon Nov 21 14:59:34 2005 UTC (18 years, 7 months ago) by tim
Content type: text/plain
File size: 38907 byte(s)
Log Message:
adding openbabel data files

File Contents

# User Rev Content
1 tim 2459 #
2     # SMARTS Patterns for Functional Group Classification
3     #
4     # written by Christian Laggner
5     # Copyright Inte:Ligand Software-Entwicklungs und Consulting GmbH
6     # Released under the Lesser General Public License (LGPL license)
7     # http://www.gnu.org/copyleft/lesser.html
8     #####################################################################################################
9    
10     # General Stuff:
11     # These patters were written in an attempt to represent the classification of organic compounds
12     # from the viewpoint of an organic chemist.
13     # They are often very restrictive. This may be generally a good thing, but it also takes some time
14     # for filtering/indexing large compound sets.
15     # For filtering undesired groups (in druglike compounds) one will want to have more general patterns
16     # (e.g. you don't want *any* halide of *any* acid, *neither* aldehyde *nor* formyl esters and amides, ...).
17     #
18    
19     # Part I: Carbon
20     # ==============
21    
22    
23     # I.1: Carbon-Carbon Bonds
24     # ------------------------
25    
26     # I.1.1 Alkanes:
27    
28     Primary_carbon: [CX4H3][#6]
29    
30     Secondary_carbon: [CX4H2]([#6])[#6]
31    
32     Tertiary_carbon: [CX4H1]([#6])([#6])[#6]
33    
34     Quartary_carbon: [CX4]([#6])([#6])([#6])[#6]
35    
36    
37     # I.1.2 C-C double and Triple Bonds
38    
39     Alkene: [CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]=[CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]
40     # sp2 C may be substituted only by C or H -
41     # does not hit ketenes and allenes, nor enamines, enols and the like
42    
43     Alkyne: [CX2]#[CX2]
44     # non-carbon substituents (e.g. alkynol ethers) are rather rare, thus no further discrimination
45    
46     Allene: [CX3]=[CX2]=[CX3]
47    
48    
49     # I.2: One Carbon-Hetero Bond
50     # ---------------------------
51    
52    
53     # I.2.1 Alkyl Halogenides
54    
55     Alkylchloride: [ClX1][CX4]
56     # will also hit chloromethylethers and the like, but no chloroalkenes, -alkynes or -aromats
57     # a more restrictive version can be obtained by modifying the Alcohol string.
58    
59     Alkylfluoride: [FX1][CX4]
60    
61     Alkylbromide: [BrX1][CX4]
62    
63     Alkyliodide: [IX1][CX4]
64    
65    
66     # I.2.2 Alcohols and Ethers
67    
68     Alcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])]
69     # nonspecific definition, no acetals, aminals, and the like
70    
71     Primary_alcohol: [OX2H][CX4H2;!$(C([OX2H])[O,S,#7,#15])]
72    
73     Secondary_alcohol: [OX2H][CX4H;!$(C([OX2H])[O,S,#7,#15])]
74    
75     Tertiary_alcohol: [OX2H][CX4D4;!$(C([OX2H])[O,S,#7,#15])]
76    
77     Dialkylether: [OX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])]
78     # no acetals and the like; no enolethers
79    
80     Dialkylthioether: [SX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])]
81     # no acetals and the like; no enolethers
82    
83     Alkylarylether: [OX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])]
84     # no acetals and the like; no enolethers
85    
86     Diarylether: [c][OX2][c]
87    
88     Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])
89    
90     Diarylthioether: [c][SX2][c]
91    
92     Oxonium: [O+;!$([O]~[!#6]);!$([S]*~[#7,#8,#15,#16])]
93     # can't be aromatic, thus O and not #8
94    
95     # I.2.3 Amines
96    
97     Amine: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
98     # hits all amines (prim/sec/tert/quart), including ammonium salts, also enamines, but not amides, imides, aminals, ...
99    
100     # the following amines include also the protonated forms
101    
102     Primary_aliph_amine: [NX3H2,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
103    
104     Secondary_aliph_amine: [NX3H1,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
105    
106     Tertiary_aliph_amine: [NX3H0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
107    
108     Quartary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
109    
110     Primary_arom_amine: [NX3H2,NX4H3+]c
111    
112     Secondary_arom_amine: [NX3H1,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
113    
114     Tertiary_arom_amine: [NX3H0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
115    
116     Quartary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
117    
118     Secondary_mixed_amine: [NX3H1,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])]
119    
120     Tertiary_mixed_amine: [NX3H0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])]
121    
122     Quartary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])]
123    
124     Ammonium: [N+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
125     # only C and H substituents allowed. NX4+ or Nv4+ is not recognized by Daylight's
126     # depictmatch if less than four C are present...
127    
128    
129     # I.2.4 Others
130    
131     Alkylthiol: [SX2H][CX4;!$(C([SX2H])~[O,S,#7,#15])]
132    
133     Dialkylthioether: [SX2]([CX4;!$(C([SX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([SX2])[O,S,#7,#15])]
134    
135     Alkylarylthioether: [SX2](c)[CX4;!$(C([SX2])[O,S,#7,#15])]
136    
137     Disulfide: [SX2D2][SX2D2]
138    
139     1,2-Aminoalcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15,F,Cl,Br,I])][CX4;!$(C([N])[O,S,#7,#15])][NX3;!$(NC=[O,S,N])]
140     # does not hit alpha-amino acids, enaminoalcohols, 1,2-aminoacetals, o-aminophenols, etc.
141    
142     1,2-Diol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])][CX4;!$(C([OX2H])[O,S,#7,#15])][OX2H]
143     # does not hit alpha-hydroxy acids, enolalcohols, 1,2-hydroxyacetals, 1,2-diphenols, etc.
144    
145     1,1-Diol: [OX2H][CX4;!$(C([OX2H])([OX2H])[O,S,#7,#15])][OX2H]
146    
147     Hydroperoxide: [OX2H][OX2]
148     #does not neccessarily have to be connected to a carbon atom, includes also hydrotrioxides
149    
150     Peroxo: [OX2D2][OX2D2]
151    
152     Organolithium_compounds: [LiX1][#6,#14]
153    
154     Organomagnesium_compounds: [MgX2][#6,#14]
155     # not restricted to Grignard compounds, also dialkyl Mg
156    
157     Organometallic_compounds: [!#1;!#5;!#6;!#7;!#8;!#9;!#14;!#15;!#16;!#17;!#33;!#34;!#35;!#52;!#53;!#85]~[#6;!-]
158     # very general, includes all metals covalently bound to carbon
159    
160    
161     # I.3: Two Carbon-Hetero Bonds (Carbonyl and Derivatives)
162     # ----------------------------
163    
164     # I.3.1 Double Bond to Hetero
165    
166     Aldehyde: [$([CX3H][#6]),$([CX3H2])]=[OX1]
167     # hits aldehydes including formaldehyde
168    
169     Ketone: [#6][CX3](=[OX1])[#6]
170     # does not include oxo-groups connected to a (hetero-) aromatic ring
171    
172     Thioaldehyde: [$([CX3H][#6]),$([CX3H2])]=[SX1]
173    
174     Thioketone: [#6][CX3](=[SX1])[#6]
175     # does not include thioxo-groups connected to a (hetero-) aromatic ring
176    
177     Imine: [NX2;$([N][#6]),$([NH]);!$([N][CX3]=[#7,#8,#15,#16])]=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])]
178     # nitrogen is not part of an amidelike strukture, nor of an aromatic ring, but can be part of an aminal or similar
179    
180     Immonium: [N+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])]]
181    
182     Oxime: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2H]
183    
184     Oximether: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2][#6;!$(C=[#7,#8])]
185     # ether, not ester or amide; does not hit isoxazole
186    
187    
188     # I.3.2. Two Single Bonds to Hetero
189    
190     Acetal: [OX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])]
191     # does not hit hydroxy-methylesters, ketenacetals, hemiacetals, orthoesters, etc.
192    
193     Hemiacetal: [OX2H][CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])]
194    
195     Aminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][NX3v3;!$(NC=[#7,#8,#15,#16])][#6]
196     # Ns are not part of an amide or similar. v3 ist to exclude nitro and similar groups
197    
198     Hemiaminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][OX2H]
199    
200     Thioacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][SX2][#6;!$(C=[O,S,N])]
201    
202     Thiohemiacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][OX2H]
203    
204     Halogen_acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1]
205     # hits chloromethylenethers and other reactive alkylating agents
206    
207     Acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])]
208     # includes all of the above and other combinations (S-C-N, hydrates, ...), but still no aminomethylenesters and similar
209    
210     Halogenmethylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1]
211     # also reactive alkylating agents. Acid does not have to be carboxylic acid, also S- and P-based acids allowed
212    
213     NOS_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])]
214     # Same as above, but N,O or S instead of halogen. Ester/amide allowed only on one side
215    
216     Hetero_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])]
217     # Combination of the last two patterns
218    
219     Cyanhydrine: [NX1]#[CX2][CX4;$([CH2]),$([CH]([CX2])[#6]),$(C([CX2])([#6])[#6])][OX2H]
220    
221    
222     # I.3.3 Single Bond to Hetero, C=C Double Bond (Enols and Similar)
223    
224     Chloroalkene: [ClX1][CX3]=[CX3]
225    
226     Fluoroalkene: [FX1][CX3]=[CX3]
227    
228     Bromoalkene: [BrX1][CX3]=[CX3]
229    
230     Iodoalkene: [IX1][CX3]=[CX3]
231    
232     Enol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3]
233     # no phenols
234    
235     Endiol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3;$([H1]),$(C[#6])][OX2H]
236     # no 1,2-diphenols, ketenacetals, ...
237    
238     Enolether: [OX2]([#6;!$(C=[N,O,S])])[CX3;$([H0][#6]),$([H1])]=[CX3]
239     # finds also endiodiethers, but not enolesters, no aromats
240    
241     Enolester: [OX2]([CX3]=[OX1])[#6X3;$([#6][#6]),$([H1])]=[#6X3;!$(C[OX2H])]
242    
243    
244     Enamine: [NX3;$([NH2][CX3]),$([NH1]([CX3])[#6]),$([N]([CX3])([#6])[#6]);!$([N]*=[#7,#8,#15,#16])][CX3;$([CH]),$([C][#6])]=[CX3]
245     # does not hit amines attached to aromatic rings, nor may the nitrogen be aromatic
246    
247     Thioenol: [SX2H][CX3;$([H1]),$(C[#6])]=[CX3]
248    
249     Thioenolether: [SX2]([#6;!$(C=[N,O,S])])[CX3;$(C[#6]),$([CH])]=[CX3]
250    
251    
252     # I.4: Three Carbon-Hetero Bonds (Carboxyl and Derivatives)
253     # ------------------------------
254    
255     Acylchloride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[ClX1]
256    
257     Acylfluoride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1]
258    
259     Acylbromide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[BrX1]
260    
261     Acyliodide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[IX1]
262    
263     Acylhalide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1,ClX1,BrX1,IX1]
264     # all of the above
265    
266    
267     # The following contains all simple carboxylic combinations of O, N, S, & Hal -
268     # - acids, esters, amides, ... as well as a few extra cases (anhydride, hydrazide...)
269     # Cyclic structures (including aromats) like lactones, lactames, ... got their own
270     # definitions. Structures where both heteroatoms are part of an aromatic ring
271     # (oxazoles, imidazoles, ...) were excluded.
272    
273     Carboxylic_acid: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[$([OX2H]),$([OX1-])]
274     # includes carboxylate anions
275    
276     Carboxylic_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][#6;!$(C=[O,N,S])]
277     # does not hit anhydrides or lactones
278    
279     Lactone: [#6][#6X3R](=[OX1])[#8X2][#6;!$(C=[O,N,S])]
280     # may also be aromatic
281    
282     Carboxylic_anhydride: [CX3;$([H0][#6]),$([H1])](=[OX1])[#8X2][CX3;$([H0][#6]),$([H1])](=[OX1])
283     # anhydride formed by two carboxylic acids, no mixed anhydrides (e.g. between carboxylic acid and sulfuric acid); may be part of a ring, even aromatic
284    
285     Carboxylic_acid_derivative: [$([#6X3H0][#6]),$([#6X3H])](=[!#6])[!#6]
286     # includes most of the structures of I.4 and many more, also 1,3-heteroaromatics such as isoxazole
287    
288     Carbothioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[OX1])[$([SX2H]),$([SX1-])]),$([C](=[SX1])[$([OX2H]),$([OX1-])])]
289     # hits both tautomeric forms, as well as anions
290    
291     Carbothioic_S_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[SX2][#6;!$(C=[O,N,S])]
292    
293     Carbothioic_S_lactone: [#6][#6X3R](=[OX1])[#16X2][#6;!$(C=[O,N,S])]
294     # may also be aromatic
295    
296     Carbothioic_O_ester: [CX3;$([H0][#6]),$([H1])](=[SX1])[OX2][#6;!$(C=[O,N,S])]
297    
298     Carbothioic_O_lactone: [#6][#6X3R](=[SX1])[#8X2][#6;!$(C=[O,N,S])]
299    
300     Carbothioic_halide: [CX3;$([H0][#6]),$([H1])](=[SX1])[FX1,ClX1,BrX1,IX1]
301    
302     Carbodithioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2H])]
303    
304     Carbodithioic_ester: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2][#6;!$(C=[O,N,S])])]
305    
306     Carbodithiolactone: [#6][#6X3R](=[SX1])[#16X2][#6;!$(C=[O,N,S])]
307    
308    
309     Amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
310     # does not hit lactames
311    
312     Primary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[NX3H2]
313    
314     Secondary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])]
315    
316     Tertiary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])]
317    
318     Lactam: [#6R][#6X3R](=[OX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
319     # cyclic amides, may also be aromatic
320    
321     Alkyl_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([#6])[#6X3;$([H0][#6]),$([H1])](=[OX1])
322     # may be part of a ring, even aromatic. only C allowed at central N. May also be triacyl amide
323    
324     N_hetero_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([!#6])[#6X3;$([H0][#6]),$([H1])](=[OX1])
325     # everything else than H or C at central N
326    
327     Imide_acidic: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H1][#6X3;$([H0][#6]),$([H1])](=[OX1])
328     # can be deprotonated
329    
330     Thioamide: [$([CX3;!R][#6]),$([CX3H;!R])](=[SX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
331     # does not hit thiolactames
332    
333     Thiolactam: [#6R][#6X3R](=[SX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
334     # cyclic thioamides, may also be aromatic
335    
336    
337     Oximester: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#8X2][#7X2]=,:[#6X3;$([H0]([#6])[#6]),$([H1][#6]),$([H2])]
338     # may also be part of a ring / aromatic
339    
340     Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH],$([C][#6])]=[NX2;!$(NC=[O,S])]
341     # only basic amidines, not as part of aromatic ring (e.g. imidazole)
342    
343     Hydroxamic_acid: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][$([OX2H]),$([OX1-])]
344    
345     Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])])]
346     # #does not hit anhydrides of carboxylic acids withs hydroxamic acids
347    
348    
349     Imidoacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
350     # not cyclic
351    
352     Imidoacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
353     # the enamide-form of lactames. may be aromatic like 2-hydroxypyridine
354    
355     Imidoester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])]
356     # esters of the above structures. no anhydrides.
357    
358     Imidolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])]
359     # no oxazoles and similar
360    
361     Imidothioacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])]
362     # not cyclic
363    
364     Imidothioacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])]
365     # the enamide-form of thiolactames. may be aromatic like 2-thiopyridine
366    
367     Imidothioester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])]
368     # thioesters of the above structures. no anhydrides.
369    
370     Imidothiolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])]
371     # no thioxazoles and similar
372    
373     Amidine: [#7X3v3;!$(N([#6X3]=[#7X2])C=[O,S])][CX3R0;$([H1]),$([H0][#6])]=[NX2v3;!$(N(=[#6X3][#7X3])C=[O,S])]
374     # only basic amidines, not substituted by carbonyl or thiocarbonyl, not as part of a ring
375    
376     Imidolactam: [#6][#6X3R;$([H0](=[NX2;!$(N(=[#6X3][#7X3])C=[O,S])])[#7X3;!$(N([#6X3]=[#7X2])C=[O,S])]),$([H0](-[NX3;!$(N([#6X3]=[#7X2])C=[O,S])])=,:[#7X2;!$(N(=[#6X3][#7X3])C=[O,S])])]
377     # one of the two C~N bonds is part of a ring (may be aromatic), but not both - thus no imidazole
378    
379     Imidoylhalide: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1]
380     # not cyclic
381    
382     Imidoylhalide_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1]
383     # may also be aromatic
384    
385     # may be ring, aromatic, substituted with carbonyls, hetero, ...
386     # (everything else would get too complicated)
387    
388     Amidrazone: [$([$([#6X3][#6]),$([#6X3H])](=[#7X2v3])[#7X3v3][#7X3v3]),$([$([#6X3][#6]),$([#6X3H])]([#7X3v3])=[#7X2v3][#7X3v3])]
389     # hits both tautomers. as above, it may be ring, aromatic, substituted with carbonyls, hetero, ...
390    
391    
392     Alpha_aminoacid: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[OX2H,OX1-]
393     # N may be alkylated, but not part of an amide (as in peptides), ionic forms are included
394     # includes also non-natural aminoacids with double-bonded or two aliph./arom. substituents at alpha-C
395     # N may not be aromatic as in 1H-pyrrole-2-carboxylic acid
396    
397     Alpha_hydroxyacid: [OX2H][C][CX3](=[OX1])[OX2H,OX1-]
398    
399     Peptide_middle: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])]
400     # finds peptidic structures which are neither C- nor N-terminal. Both neighbours must be amino-acids/peptides
401    
402     Peptide_C_term: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[OX2H,OX1-]
403     # finds C-terminal amino acids
404    
405     Peptide_N_term: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])]
406     # finds N-terminal amino acids. As above, N may be substituted, but not part of an amide-bond.
407    
408    
409     Carboxylic_orthoester: [#6][OX2][CX4;$(C[#6]),$([CH])]([OX2][#6])[OX2][#6]
410     # hits also anhydride like struktures (e. g. HC(OMe)2-OC=O residues)
411    
412     Ketene: [CX3]=[CX2]=[OX1]
413    
414     Ketenacetal: [#7X2,#8X3,#16X2;$(*[#6,#14])][#6X3]([#7X2,#8X3,#16X2;$(*[#6,#14])])=[#6X3]
415     # includes aminals, silylacetals, ketenesters, etc. C=C DB is not aromatic, everything else may be
416    
417     Nitrile: [NX1]#[CX2]
418     # includes cyanhydrines
419    
420     Isonitrile: [CX1-]#[NX2+]
421    
422    
423     Vinylogous_carbonyl_or_carboxyl_derivative: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7,#8,#16,F,Cl,Br,I]
424     # may be part of a ring, even aromatic
425    
426     Vinylogous_acid: [#6X3](=[OX1])[#6X3]=,:[#6X3][$([OX2H]),$([OX1-])]
427    
428     Vinylogous_ester: [#6X3](=[OX1])[#6X3]=,:[#6X3][#6;!$(C=[O,N,S])]
429    
430     Vinylogous_amide: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
431    
432     Vinylogous_halide: [#6X3](=[OX1])[#6X3]=,:[#6X3][FX1,ClX1,BrX1,IX1]
433    
434    
435    
436     # I.5: Four Carbon-Hetero Bonds (Carbonic Acid and Derivatives)
437     # -----------------------------
438    
439     Carbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[OX1])[#8X2][#6;!$(C=[O,N,S])]
440     # may be part of a ring, even aromatic
441    
442     Carbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[OX2][FX1,ClX1,BrX1,IX1]
443    
444     Carbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[$([OX2H]),$([OX1-])]
445     # unstable
446    
447     Carbonic_acid_derivatives: [!#6][#6X3](=[!#6])[!#6]
448    
449    
450     Thiocarbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[SX1])[#8X2][#6;!$(C=[O,N,S])]
451     # may be part of a ring, even aromatic
452    
453     Thiocarbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[OX2][FX1,ClX1,BrX1,IX1]
454    
455     Thiocarbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[$([OX2H]),$([OX1-])]
456    
457    
458     Urea:[#7X3;!$([#7][!#6])][#6X3](=[OX1])[#7X3;!$([#7][!#6])]
459     # no check whether part of imide, biuret, etc. Aromatic structures are only hit if
460     # both N share no double bonds, like in the dioxo-form of uracil
461    
462     Thiourea: [#7X3;!$([#7][!#6])][#6X3](=[SX1])[#7X3;!$([#7][!#6])]
463    
464     Isourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#8X2&!$([#8][!#6]),OX1-])[#7X3;!$([#7][!#6])]
465     # O may be substituted. no check whether further amide-like bonds are present. Aromatic
466     # structures are only hit if single bonded N shares no additional double bond, like in
467     # the 1-hydroxy-3-oxo form of uracil
468    
469     Isothiourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#16X2&!$([#16][!#6]),SX1-])[#7X3;!$([#7][!#6])]
470    
471     Guanidine: [N;v3X3,v4X4+][CX3](=[N;v3X2,v4X3+])[N;v3X3,v4X4+]
472     # also hits guanidinium salts. v3 and v4 to avoid nitroamidines
473    
474     Carbaminic_acid: [NX3]C(=[OX1])[O;X2H,X1-]
475     # quite unstable, unlikely to be found. Also hits salts
476    
477     Urethan: [#7X3][#6](=[OX1])[#8X2][#6]
478     # also hits when part of a ring, no check whether the last C is part of carbonyl
479    
480     Biuret: [#7X3][#6](=[OX1])[#7X3][#6](=[OX1])[#7X3]
481    
482     Semicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1]
483    
484     Carbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[OX1]
485    
486     Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7]))=[OX1]
487    
488     Carbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[OX1]
489    
490     Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])=[SX1]
491    
492     Thiocarbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[SX1]
493    
494     Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])=[SX1]
495    
496     Thiocarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[SX1]
497    
498    
499     Isocyanate: [NX2]=[CX2]=[OX1]
500    
501     Cyanate: [OX2][CX2]#[NX1]
502    
503     Isothiocyanate: [NX2]=[CX2]=[SX1]
504    
505     Thiocyanate: [SX2][CX2]#[NX1]
506    
507     Carbodiimide: [NX2]=[CX2]=[NX2]
508    
509     Orthocarbonic_derivatives: [CX4H0]([O,S,#7])([O,S,#7])([O,S,#7])[O,S,#7,F,Cl,Br,I]
510     # halogen allowed just once, to avoid mapping to -OCF3 and similar groups (much more
511     # stable as for example C(OCH3)4)
512    
513    
514     # I.6 Aromatics
515     # -------------
516    
517     # I know that this classification is not very logical, arylamines are found under I.2 ...
518    
519     Phenol: [OX2H][c]
520    
521     1,2-Diphenol: [OX2H][c][c][OX2H]
522    
523     Arylchloride: [Cl][c]
524    
525     Arylfluoride: [F][c]
526    
527     Arylbromide: [Br][c]
528    
529     Aryliodide: [I][c]
530    
531     Arylthiol: [SX2H][c]
532    
533     Iminoarene: [c]=[NX2;$([H1]),$([H0][#6;!$([C]=[N,S,O])])]
534     # N may be substituted with H or C, but not carbonyl or similar
535     # aromatic atom is always C, not S or P (these are not planar when substituted)
536    
537     Oxoarene: [c]=[OX1]
538    
539     Thioarene: [c]=[SX1]
540    
541     Hetero_N_basic_H: [nX3H1+0]
542     # as in pyrole. uncharged to exclude pyridinium ions
543    
544     Hetero_N_basic_no_H: [nX3H0+0]
545     # as in N-methylpyrole. uncharged to exclude pyridinium ions
546    
547     Hetero_N_nonbasic: [nX2,nX3+]
548     # as in pyridine, pyridinium
549    
550     Hetero_O: [o]
551    
552     Hetero_S: [sX2]
553     # X2 because Daylight's depictmatch falsely describes C1=CS(=O)C=C1 as aromatic
554     # (is not planar because of lonepair at S)
555    
556     Heteroaromatic: [a;!c]
557    
558    
559     # Part II: N, S, P, Si, B
560     # =======================
561    
562    
563     # II.1 Nitrogen
564     # -------------
565    
566     Nitrite: [NX2](=[OX1])[O;$([X2]),$([X1-])]
567     # hits nitrous acid, its anion, esters, and other O-substituted derivatives
568    
569     Thionitrite: [SX2][NX2]=[OX1]
570    
571     Nitrate: [$([NX3](=[OX1])(=[OX1])[O;$([X2]),$([X1-])]),$([NX3+]([OX1-])(=[OX1])[O;$([X2]),$([X1-])])]
572     # hits nitric acid, its anion, esters, and other O-substituted derivatives
573    
574     Nitro: [$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]
575     # hits nitro groups attached to C,N, ... but not nitrates
576    
577     Nitroso: [NX2](=[OX1])[!#7;!#8]
578     # no nitrites, no nitrosamines
579    
580     Azide: [NX1]~[NX2]~[NX2,NX1]
581     # hits both mesomeric forms, also anion
582    
583     Acylazide: [CX3](=[OX1])[NX2]~[NX2]~[NX1]
584    
585     Diazo: [$([#6]=[NX2+]=[NX1-]),$([#6-]-[NX2+]#[NX1])]
586    
587     Diazonium: [#6][NX2+]#[NX1]
588    
589     Nitrosamine: [#7;!$(N*=O)][NX2]=[OX1]
590    
591     Nitrosamide: [NX2](=[OX1])[N-*=O]
592     # includes nitrososulfonamides
593    
594     N-Oxide: [$([#7+][OX1-]),$([#7v5]=[OX1]);!$([#7](~[O])~[O]);!$([#7]=[#7])]
595     # Hits both forms. Won't hit azoxy, nitro, nitroso, or nitrate.
596    
597    
598     Hydrazine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])]
599     # no hydrazides
600    
601     Hydrazone: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX2]=[#6]
602    
603     Hydroxylamine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][OX2;$([H1]),$(O[#6;!$(C=[N,O,S])])]
604     # no discrimination between O-, N-, and O,N-substitution
605    
606    
607     # II.2 Sulfur
608     # -----------
609    
610     Sulfon: [$([SX4](=[OX1])(=[OX1])([#6])[#6]),$([SX4+2]([OX1-])([OX1-])([#6])[#6])]
611     # can't be aromatic, thus S and not #16
612    
613     Sulfoxide: [$([SX3](=[OX1])([#6])[#6]),$([SX3+]([OX1-])([#6])[#6])]
614    
615     Sulfonium: [S+;!$([S]~[!#6]);!$([S]*~[#7,#8,#15,#16])]
616     # can't be aromatic, thus S and not #16
617    
618     Sulfuric_acid: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
619     # includes anions
620    
621     Sulfuric_monoester: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
622    
623     Sulfuric_diester: [SX4](=[OX1])(=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
624    
625     Sulfuric_monoamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
626    
627     Sulfuric_diamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
628    
629     Sulfuric_esteramide: [SX4](=[OX1])(=[OX1])([#7X3][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
630    
631     Sulfuric_derivative: [SX4D4](=[!#6])(=[!#6])([!#6])[!#6]
632     # everything else (would not be a "true" derivative of sulfuric acid, if one of the substituents were less electronegative
633     # than sulfur, but this should be very very rare, anyway)
634    
635    
636    
637     #### sulfurous acid and derivatives missing!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
638    
639    
640    
641    
642     Sulfonic_acid: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[$([OX2H]),$([OX1-])]
643    
644     Sulfonamide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
645    
646     Sulfonic_ester: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[OX2][#6;!$(C=[O,N,S])]
647    
648     Sulfonic_halide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[FX1,ClX1,BrX1,IX1]
649    
650     Sulfonic_derivative: [SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6])[!#6]
651     # includes all of the above and many more
652     # for comparison: this is what "all sulfonic derivatives but not the ones above" would look like:
653     # [$([SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6;!O])[!#6]),$([SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[!$([FX1,ClX1,BrX1,IX1]);!$([#6]);!$([OX2H]);!$([OX1-]);!$([OX2][#6;!$(C=[O,N,S])]);!$([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])])]
654    
655    
656     Sulfinic_acid: [SX3;$([H1]),$([H0][#6])](=[OX1])[$([OX2H]),$([OX1-])]
657    
658     Sulfinic_amide: [SX3;$([H1]),$([H0][#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
659    
660     Sulfinic_ester: [SX3;$([H1]),$([H0][#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])]
661    
662     Sulfinic_halide: [SX3;$([H1]),$([H0][#6])](=[OX1])[FX1,ClX1,BrX1,IX1]
663    
664     Sulfinic_derivative: [SX3;$([H1]),$([H0][#6])](=[!#6])[!#6]
665    
666     Sulfenic_acid: [SX2;$([H1]),$([H0][#6])][$([OX2H]),$([OX1-])]
667    
668     Sulfenic_amide: [SX2;$([H1]),$([H0][#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
669    
670     Sulfenic_ester: [SX2;$([H1]),$([H0][#6])][OX2][#6;!$(C=[O,N,S])]
671    
672     Sulfenic_halide: [SX2;$([H1]),$([H0][#6])][FX1,ClX1,BrX1,IX1]
673    
674     Sulfenic_derivative: [SX2;$([H1]),$([H0][#6])][!#6]
675    
676    
677     # II.3 Phosphorous
678     # ----------------
679    
680     Phosphine: [PX3;$([H3]),$([H2][#6]),$([H1]([#6])[#6]),$([H0]([#6])([#6])[#6])]
681     # similar to amine, but less restrictive: includes also amide- and aminal-analogues
682    
683     Phosphine_oxide: [PX4;$([H3]=[OX]),$([H2](=[OX])[#6]),$([H1](=[OX])([#6])[#6]),$([H0](=[OX])([#6])([#6])[#6])]
684    
685     Phosphonium: [P+;!$([P]~[!#6]);!$([P]*~[#7,#8,#15,#16])]
686     # similar to Ammonium
687    
688     Phosphorylen: [PX4;$([H3]=[CX3]),$([H2](=[CX3])[#6]),$([H1](=[CX3])([#6])[#6]),$([H0](=[CX3])([#6])([#6])[#6])]
689    
690    
691     # conventions for the following acids and derivatives:
692     # acids find protonated and deprotonated acids
693     # esters do not find mixed anhydrides ( ...P-O-C(=O))
694     # derivatives: subtituents which go in place of the OH and =O are not H or C (may also be O,
695     # thus including acids and esters)
696    
697     Phosphonic_acid: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
698     # includes anions
699    
700     Phosphonic_monoester: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
701    
702     Phosphonic_diester: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
703    
704     Phosphonic_monoamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
705    
706     Phosphonic_diamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
707    
708     Phosphonic_esteramide: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
709    
710     Phosphonic_acid_derivative: [PX4;$([H1]),$([H0][#6])](=[!#6])([!#6])[!#6]
711     # all of the above and much more
712    
713    
714     Phosphoric_acid: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
715     # includes anions
716    
717     Phosphoric_monoester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
718    
719     Phosphoric_diester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
720    
721     Phosphoric_triester: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
722    
723     Phosphoric_monoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
724    
725     Phosphoric_diamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
726    
727     Phosphoric_triamide: [PX4D4](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
728    
729     Phosphoric_monoestermonoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
730    
731     Phosphoric_diestermonoamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
732    
733     Phosphoric_monoesterdiamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
734    
735     Phosphoric_acid_derivative: [PX4D4](=[!#6])([!#6])([!#6])[!#6]
736    
737    
738     Phosphinic_acid: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[$([OX2H]),$([OX1-])]
739    
740     Phosphinic_ester: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])]
741    
742     Phosphinic_amide: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
743    
744     Phosphinic_acid_derivative: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[!#6])[!#6]
745    
746    
747     Phosphonous_acid: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
748    
749     Phosphonous_monoester: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
750    
751     Phosphonous_diester: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
752    
753     Phosphonous_monoamide: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
754    
755     Phosphonous_diamide: [PX3;$([H1]),$([H0][#6])]([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
756    
757     Phosphonous_esteramide: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
758    
759     Phosphonous_derivatives: [PX3;$([D2]),$([D3][#6])]([!#6])[!#6]
760    
761    
762     Phosphinous_acid: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][$([OX2H]),$([OX1-])]
763    
764     Phosphinous_ester: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][OX2][#6;!$(C=[O,N,S])]
765    
766     Phosphinous_amide: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
767    
768     Phosphinous_derivatives: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][!#6]
769    
770    
771     # II.4 Silicon
772     # ------------
773    
774     Quart_silane: [SiX4]([#6])([#6])([#6])[#6]
775     # four C-substituents. non-reactive, non-toxic, in experimental phase for drug development
776    
777     Non-quart_silane: [SiX4;$([H1]([#6])([#6])[#6]),$([H2]([#6])[#6]),$([H3][#6]),$([H4])]
778     # has 1-4 hydride(s), reactive. Daylight's depictmatch does not add hydrogens automatically to
779     # the free positions at Si, thus Hs had to be added implicitly
780    
781     Silylmonohalide: [SiX4]([FX1,ClX1,BrX1,IX1])([#6])([#6])[#6]
782     # reagents for inserting protection groups
783    
784     Het_trialkylsilane: [SiX4]([!#6])([#6])([#6])[#6]
785     # mostly acid-labile protection groups such as trimethylsilyl-ethers
786    
787     Dihet_dialkylsilane: [SiX4]([!#6])([!#6])([#6])[#6]
788    
789     Trihet_alkylsilane: [SiX4]([!#6])([!#6])([!#6])[#6]
790    
791     Silicic_acid_derivative: [SiX4]([!#6])([!#6])([!#6])[!#6]
792     # four substituent which are neither C nor H
793    
794    
795     # II.5 Boron
796     # ----------
797    
798     Trialkylborane: [BX3]([#6])([#6])[#6]
799     # also carbonyls allowed
800    
801     Boric_acid_derivatives: [BX3]([!#6])([!#6])[!#6]
802     # includes acids, esters, amides, ... H-substituent at B is very rare.
803    
804     Boronic_acid_derivative: [BX3]([!#6])([!#6])[!#6]
805     # # includes acids, esters, amides, ...
806    
807     Borohydride: [BH1,BH2,BH3,BH4]
808     # at least one H attached to B
809    
810     Quartary_boron: [BX4]
811     # mostly borates (negative charge), in complex with Lewis-base
812    
813    
814    
815     # Part III: Some Special Patterns
816     # ===============================
817    
818    
819     # III.1 Chains
820     # ------------
821    
822     # some simple chains
823    
824    
825    
826     # III.2 Rings
827     # -----------
828    
829     Aromatic: a
830    
831     Heterocyclic: [!#6;!R0]
832     # may be aromatic or not
833    
834     Epoxide: [OX2r3]1[#6r3][#6r3]1
835     # toxic/reactive. may be annelated to aromat, but must not be aromatic itself (oxirane-2,3-dione)
836    
837     NH_aziridine: [NX3H1r3]1[#6r3][#6r3]1
838     # toxic/reactive according to Maybridge's garbage filter
839    
840     Spiro: [D4R;$(*(@*)(@*)(@*)@*)]
841     # at least two different rings can be found which are sharing just one atom
842    
843     Annelated_rings: [R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]
844     # two different rings sharing exactly two atoms
845    
846     Bridged_rings: [R;$(*(@*)(@*)@*);!$([D4R;$(*(@*)(@*)(@*)@*)]);!$([R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])])]
847     # part of two or more rings, not spiro, not annelated -> finds bridgehead atoms,
848     # but only if they are not annelated at the same time - otherwise impossible (?)
849     # to distinguish from non-bridgehead annelated atoms
850    
851     # some basic ring-patterns (just size, no other information):
852    
853    
854    
855    
856    
857     # III.3 Sugars and Nucleosides/Nucleotides, Steroids
858     # --------------------------------------------------
859    
860     # because of the large variety of sugar derivatives, different patterns can be applied.
861     # The choice of patterns and their combinations will depend on the contents of the database
862     # e.g. natural products, nucleoside analoges with modified sugars, ... as well as on the
863     # desired restriction
864    
865    
866     Sugar_pattern_1: [OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)]
867     # 5 or 6-membered ring containing one O and at least one (r5) or two (r6) oxygen-substituents.
868    
869     Sugar_pattern_2: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
870     # 5 or 6-membered ring containing one O and an acetal-like bond at postion 2.
871    
872     Sugar_pattern_combi: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C(O)@C1)]
873     # combination of the two above
874    
875     Sugar_pattern_2_reducing: [OX2;$([r5]1@C(!@[OX2H1])@C@C@C1),$([r6]1@C(!@[OX2H1])@C@C@C@C1)]
876     # 5 or 6-membered cyclic hemi-acetal
877    
878     Sugar_pattern_2_alpha: [OX2;$([r5]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
879     # 5 or 6-membered cyclic hemi-acetal
880    
881     Sugar_pattern_2_beta: [OX2;$([r5]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
882     # 5 or 6-membered cyclic hemi-acetal
883    
884     Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)])
885     # pattern1 occours more than once (in same molecule, but moieties don't have to be adjacent!)
886    
887     Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)])
888     # pattern2 occours more than once (in same molecule, but moieties don't have to be adjacent!)
889    
890    
891     # III.4 Everything else...
892     # ------------------------
893    
894     Conjugated_double_bond: *=*[*]=,#,:[*]
895    
896     Conjugated_tripple_bond: *#*[*]=,#,:[*]
897    
898     Cis_double_bond: */[D2]=[D2]\*
899     # only one single-bonded substituent on each DB-atom. no aromats.
900     # only found when character of DB is explicitely stated.
901    
902     Trans_double_bond: */[D2]=[D2]/*
903     # analog
904    
905     Mixed_anhydrides: [$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))][#8X2][$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))]
906     # should hits all combinations of two acids
907    
908     Halogen_on_hetero: [FX1,ClX1,BrX1,IX1][!#6]
909    
910     Halogen_multi_subst: [F,Cl,Br,I;!$([X1]);!$([X0-])]
911     # Halogen which is not mono-substituted nor an anion, e.g. chlorate.
912     # Most of these cases should be also filtered by Halogen_on_hetero.
913    
914     Trifluoromethyl: [FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F])[F])]([FX1])([FX1])
915     # C with three F attached, connected to anything which is not another halogen
916    
917     C_ONS_bond: [#6]~[#7,#8,#16]
918     # probably all drug-like molecules have at least one O, N, or S connected to a C -> nice filter
919    
920     Mixture: (*).(*)
921     # two or more seperate parts, may also be salt
922     # Did not work with Openbabel 1.0 (no component-level grouping)
923    
924    
925     Charged: [!+0]
926    
927     Anion: [-1,-2,-3,-4,-5,-6,-7]
928    
929     Kation: [+1,+2,+3,+4,+5,+6,+7]
930    
931     Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7])
932     # two or more seperate components with opposite charges
933    
934     Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7])
935     # both negative and positive charges somewhere within the same molecule.
936    
937     1,3-Tautomerizable: [$([#7X2,OX1,SX1]=*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=*),$([#7X3,OX2,SX2;!H0]*:n)]
938     # 1,3 migration of H allowed. Includes keto/enol and amide/enamide.
939     # Aromatic rings must stay aromatic - no keto form of phenol
940    
941     1,5-Tautomerizable: [$([#7X2,OX1,SX1]=,:**=,:*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=**=*),$([#7X3,OX2,SX2;!H0]*=,:**:n)]
942    
943     Rotatable_bond: [!$(*#*)&!D1]-!@[!$(*#*)&!D1]
944     # taken from http://www.daylight.com/support/contrib/smarts/content.html
945    
946     Michael_acceptor: [CX3]=[CX3][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-])]
947     # the classical case: C=C near carbonyl, nitrile, nitro, or similar
948     # Oxo-heteroaromats and similar are not included.
949    
950     Dicarbodiazene: [CX3](=[OX1])[NX2]=[NX2][CX3](=[OX1])
951     # Michael-like acceptor, see Mitsunobu reaction
952    
953     # H-Bond_donor:
954    
955     # H-Bond_acceptor:
956    
957     # Pos_ionizable:
958    
959     # Neg_ionizable:
960    
961     # Unlikely_ions:
962     # O+,N-,C+,C-, ...
963    
964     CH-acidic: [$([CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]),$([CX4;!$([H0])]1[CX3]=[CX3][CX3]=[CX3]1)]
965     # C-H alpha to carbony, nitro or similar, C is not double-bonded, only C, H, S,P=O and nitro substituents allowed.
966     # pentadiene is included. acids, their salts, prim./sec. amides, and imides are excluded.
967     # hits also CH-acidic_strong
968    
969     CH-acidic_strong: [CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])]([$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])])[$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]
970     # same as above (without pentadiene), but carbonyl or similar on two or three sides
971    
972     Chiral_center_specified: [$([*@](~*)(~*)(*)*),$([*@H](*)(*)*),$([*@](~*)(*)*),$([*@H](~*)~*)]
973     # Hits atoms with tetrahedral chirality, if chiral center is specified in the SMILES string
974     # depictmach does not find oxonium, sulfonium, or sulfoxides!
975    
976     Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)]
977     # Hits atoms with tetrahedral chirality, even if chiral center is not specified in the SMILES string