1 |
tim |
2440 |
/********************************************************************** |
2 |
|
|
typer.cpp - Open Babel atom typer. |
3 |
|
|
|
4 |
|
|
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
5 |
|
|
Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison |
6 |
|
|
|
7 |
|
|
This file is part of the Open Babel project. |
8 |
|
|
For more information, see <http://openbabel.sourceforge.net/> |
9 |
|
|
|
10 |
|
|
This program is free software; you can redistribute it and/or modify |
11 |
|
|
it under the terms of the GNU General Public License as published by |
12 |
|
|
the Free Software Foundation version 2 of the License. |
13 |
|
|
|
14 |
|
|
This program is distributed in the hope that it will be useful, |
15 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 |
|
|
GNU General Public License for more details. |
18 |
|
|
***********************************************************************/ |
19 |
|
|
|
20 |
|
|
#include "mol.hpp" |
21 |
|
|
#include "typer.hpp" |
22 |
|
|
#include "atomtyp.hpp" |
23 |
|
|
#include "aromatic.hpp" |
24 |
|
|
|
25 |
|
|
#ifdef WIN32 |
26 |
|
|
#pragma warning (disable : 4786) |
27 |
|
|
#endif |
28 |
|
|
|
29 |
|
|
using namespace std; |
30 |
|
|
|
31 |
|
|
namespace OpenBabel |
32 |
|
|
{ |
33 |
|
|
|
34 |
|
|
OBAromaticTyper aromtyper; |
35 |
|
|
OBAtomTyper atomtyper; |
36 |
|
|
|
37 |
|
|
/*! \class OBAtomTyper |
38 |
|
|
\brief Assigns atom types, hybridization, implicit valence and formal charges |
39 |
|
|
|
40 |
|
|
The OBAtomTyper class is designed to read in a list of atom typing |
41 |
|
|
rules and apply them to molecules. The code that performs atom |
42 |
|
|
typing is not usually used directly as atom typing, hybridization |
43 |
|
|
assignment, implicit valence assignment and charge are all done |
44 |
|
|
automatically when their corresponding values are requested of |
45 |
|
|
atoms: |
46 |
|
|
\code |
47 |
|
|
atom->GetType(); |
48 |
|
|
atom->GetFormalCharge(); |
49 |
|
|
atom->GetHyb(); |
50 |
|
|
\endcode |
51 |
|
|
*/ |
52 |
|
|
OBAtomTyper::OBAtomTyper() |
53 |
|
|
{ |
54 |
|
|
_init = false; |
55 |
|
|
_dir = BABEL_DATADIR; |
56 |
|
|
_envvar = "BABEL_DATADIR"; |
57 |
|
|
_filename = "atomtyp.txt"; |
58 |
|
|
_subdir = "data"; |
59 |
|
|
_dataptr = AtomTypeData; |
60 |
|
|
} |
61 |
|
|
|
62 |
|
|
void OBAtomTyper::ParseLine(const char *buffer) |
63 |
|
|
{ |
64 |
|
|
vector<string> vs; |
65 |
|
|
OBSmartsPattern *sp; |
66 |
|
|
|
67 |
|
|
if (EQn(buffer,"INTHYB",6)) |
68 |
|
|
{ |
69 |
|
|
tokenize(vs,buffer); |
70 |
|
|
if (vs.empty() || vs.size() < 3) |
71 |
|
|
{ |
72 |
|
|
obErrorLog.ThrowError(__FUNCTION__, " Could not parse INTHYB line in atom type table from atomtyp.txt", obInfo); |
73 |
|
|
return; |
74 |
|
|
} |
75 |
|
|
|
76 |
|
|
sp = new OBSmartsPattern; |
77 |
|
|
if (sp->Init(vs[1])) |
78 |
|
|
_vinthyb.push_back(pair<OBSmartsPattern*,int> (sp,atoi((char*)vs[2].c_str()))); |
79 |
|
|
else |
80 |
|
|
{ |
81 |
|
|
delete sp; |
82 |
|
|
sp = NULL; |
83 |
|
|
obErrorLog.ThrowError(__FUNCTION__, " Could not parse INTHYB line in atom type table from atomtyp.txt", obInfo); |
84 |
|
|
return; |
85 |
|
|
} |
86 |
|
|
} |
87 |
|
|
else if (EQn(buffer,"IMPVAL",6)) |
88 |
|
|
{ |
89 |
|
|
tokenize(vs,buffer); |
90 |
|
|
if (vs.empty() || vs.size() < 3) |
91 |
|
|
{ |
92 |
|
|
obErrorLog.ThrowError(__FUNCTION__, " Could not parse IMPVAL line in atom type table from atomtyp.txt", obInfo); |
93 |
|
|
return; |
94 |
|
|
} |
95 |
|
|
|
96 |
|
|
sp = new OBSmartsPattern; |
97 |
|
|
if (sp->Init(vs[1])) |
98 |
|
|
_vimpval.push_back(pair<OBSmartsPattern*,int> (sp,atoi((char*)vs[2].c_str()))); |
99 |
|
|
else |
100 |
|
|
{ |
101 |
|
|
obErrorLog.ThrowError(__FUNCTION__, " Could not parse IMPVAL line in atom type table from atomtyp.txt", obInfo); |
102 |
|
|
delete sp; |
103 |
|
|
sp = NULL; |
104 |
|
|
return; |
105 |
|
|
} |
106 |
|
|
} |
107 |
|
|
else if (EQn(buffer,"EXTTYP",6)) |
108 |
|
|
{ |
109 |
|
|
tokenize(vs,buffer); |
110 |
|
|
if (vs.empty() || vs.size() < 3) |
111 |
|
|
{ |
112 |
|
|
obErrorLog.ThrowError(__FUNCTION__, " Could not parse EXTTYP line in atom type table from atomtyp.txt", obInfo); |
113 |
|
|
return; |
114 |
|
|
} |
115 |
|
|
sp = new OBSmartsPattern; |
116 |
|
|
if (sp->Init(vs[1])) |
117 |
|
|
_vexttyp.push_back(pair<OBSmartsPattern*,string> (sp,vs[2])); |
118 |
|
|
else |
119 |
|
|
{ |
120 |
|
|
delete sp; |
121 |
|
|
sp = NULL; |
122 |
|
|
obErrorLog.ThrowError(__FUNCTION__, " Could not parse EXTTYP line in atom type table from atomtyp.txt", obInfo); |
123 |
|
|
return; |
124 |
|
|
} |
125 |
|
|
} |
126 |
|
|
} |
127 |
|
|
|
128 |
|
|
OBAtomTyper::~OBAtomTyper() |
129 |
|
|
{ |
130 |
|
|
vector<pair<OBSmartsPattern*,int> >::iterator i; |
131 |
|
|
for (i = _vinthyb.begin();i != _vinthyb.end();i++) |
132 |
|
|
{ |
133 |
|
|
delete i->first; |
134 |
|
|
i->first = NULL; |
135 |
|
|
} |
136 |
|
|
for (i = _vimpval.begin();i != _vimpval.end();i++) |
137 |
|
|
{ |
138 |
|
|
delete i->first; |
139 |
|
|
i->first = NULL; |
140 |
|
|
} |
141 |
|
|
|
142 |
|
|
vector<pair<OBSmartsPattern*,string> >::iterator j; |
143 |
|
|
for (j = _vexttyp.begin();j != _vexttyp.end();j++) |
144 |
|
|
{ |
145 |
|
|
delete j->first; |
146 |
|
|
j->first = NULL; |
147 |
|
|
} |
148 |
|
|
|
149 |
|
|
} |
150 |
|
|
|
151 |
|
|
void OBAtomTyper::AssignTypes(OBMol &mol) |
152 |
|
|
{ |
153 |
|
|
if (!_init) |
154 |
|
|
Init(); |
155 |
|
|
|
156 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
157 |
|
|
"Ran OpenBabel::AssignTypes", obAuditMsg); |
158 |
|
|
|
159 |
|
|
mol.SetAtomTypesPerceived(); |
160 |
|
|
|
161 |
|
|
vector<vector<int> >::iterator j; |
162 |
|
|
vector<pair<OBSmartsPattern*,string> >::iterator i; |
163 |
|
|
|
164 |
|
|
for (i = _vexttyp.begin();i != _vexttyp.end();i++) |
165 |
|
|
if (i->first->Match(mol)) |
166 |
|
|
{ |
167 |
|
|
_mlist = i->first->GetMapList(); |
168 |
|
|
for (j = _mlist.begin();j != _mlist.end();j++) |
169 |
|
|
mol.GetAtom((*j)[0])->SetType(i->second); |
170 |
|
|
} |
171 |
|
|
} |
172 |
|
|
|
173 |
|
|
void OBAtomTyper::AssignHyb(OBMol &mol) |
174 |
|
|
{ |
175 |
|
|
if (!_init) |
176 |
|
|
Init(); |
177 |
|
|
|
178 |
|
|
aromtyper.AssignAromaticFlags(mol); |
179 |
|
|
|
180 |
|
|
mol.SetHybridizationPerceived(); |
181 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
182 |
|
|
"Ran OpenBabel::AssignHybridization", obAuditMsg); |
183 |
|
|
|
184 |
|
|
OBAtom *atom; |
185 |
|
|
vector<OBNodeBase*>::iterator k; |
186 |
|
|
for (atom = mol.BeginAtom(k);atom;atom = mol.NextAtom(k)) |
187 |
|
|
atom->SetHyb(0); |
188 |
|
|
|
189 |
|
|
vector<vector<int> >::iterator j; |
190 |
|
|
vector<pair<OBSmartsPattern*,int> >::iterator i; |
191 |
|
|
|
192 |
|
|
for (i = _vinthyb.begin();i != _vinthyb.end();i++) |
193 |
|
|
if (i->first->Match(mol)) |
194 |
|
|
{ |
195 |
|
|
_mlist = i->first->GetMapList(); |
196 |
|
|
for (j = _mlist.begin();j != _mlist.end();j++) |
197 |
|
|
mol.GetAtom((*j)[0])->SetHyb(i->second); |
198 |
|
|
} |
199 |
|
|
} |
200 |
|
|
|
201 |
|
|
void OBAtomTyper::AssignImplicitValence(OBMol &mol) |
202 |
|
|
{ |
203 |
|
|
// FF Make sure that valence has not been perceived |
204 |
|
|
if(mol.HasImplicitValencePerceived()) |
205 |
|
|
return; |
206 |
|
|
|
207 |
|
|
if (!_init) |
208 |
|
|
Init(); |
209 |
|
|
|
210 |
|
|
mol.SetImplicitValencePerceived(); |
211 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
212 |
|
|
"Ran OpenBabel::AssignImplicitValence", obAuditMsg); |
213 |
|
|
|
214 |
|
|
// FF Ensure that the aromatic typer will not be called |
215 |
|
|
int oldflags = mol.GetFlags(); // save the current state flags |
216 |
|
|
mol.SetAromaticPerceived(); // and set the aromatic perceived flag on |
217 |
|
|
|
218 |
|
|
OBAtom *atom; |
219 |
|
|
vector<OBNodeBase*>::iterator k; |
220 |
|
|
for (atom = mol.BeginAtom(k);atom;atom = mol.NextAtom(k)) |
221 |
|
|
atom->SetImplicitValence(atom->GetValence()); |
222 |
|
|
|
223 |
|
|
vector<vector<int> >::iterator j; |
224 |
|
|
vector<pair<OBSmartsPattern*,int> >::iterator i; |
225 |
|
|
|
226 |
|
|
for (i = _vimpval.begin();i != _vimpval.end();i++) |
227 |
|
|
if (i->first->Match(mol)) |
228 |
|
|
{ |
229 |
|
|
_mlist = i->first->GetMapList(); |
230 |
|
|
for (j = _mlist.begin();j != _mlist.end();j++) |
231 |
|
|
mol.GetAtom((*j)[0])->SetImplicitValence(i->second); |
232 |
|
|
} |
233 |
|
|
|
234 |
|
|
if (!mol.HasAromaticCorrected()) |
235 |
|
|
CorrectAromaticNitrogens(mol); |
236 |
|
|
|
237 |
|
|
for (atom = mol.BeginAtom(k);atom;atom = mol.NextAtom(k)) |
238 |
|
|
{ |
239 |
|
|
if (atom->GetImplicitValence() < atom->GetValence()) |
240 |
|
|
atom->SetImplicitValence(atom->GetValence()); |
241 |
|
|
} |
242 |
|
|
|
243 |
|
|
//FF moved to OBMol::AssignSpinMultiplicity(); |
244 |
|
|
//begin CM 18 Sept 2003 |
245 |
|
|
//if there are any explicit Hs on an atom, then they consitute all the Hs |
246 |
|
|
//Any discrepancy with the expected atom valency is because it is a radical of some sort |
247 |
|
|
//Also adjust the ImplicitValence for radical atoms |
248 |
|
|
//for (atom = mol.BeginAtom(k);atom;atom = mol.NextAtom(k)) |
249 |
|
|
// { |
250 |
|
|
// if (!atom->IsHydrogen() && atom->ExplicitHydrogenCount()!=0) |
251 |
|
|
// { |
252 |
|
|
// int diff=atom->GetImplicitValence() - (atom->GetHvyValence() + atom->ExplicitHydrogenCount()); |
253 |
|
|
// if (diff) |
254 |
|
|
// atom->SetSpinMultiplicity(diff+1);//radicals =2; all carbenes =3 |
255 |
|
|
// } |
256 |
|
|
|
257 |
|
|
// int mult=atom->GetSpinMultiplicity(); |
258 |
|
|
// if(mult) //radical or carbene |
259 |
|
|
// atom->DecrementImplicitValence(); |
260 |
|
|
// if(mult==1 || mult==3) //e.g.singlet or triplet carbene |
261 |
|
|
// atom->DecrementImplicitValence(); |
262 |
|
|
// } |
263 |
|
|
//end CM |
264 |
|
|
//FF end |
265 |
|
|
|
266 |
|
|
// FF Come back to the initial flags |
267 |
|
|
mol.SetFlags(oldflags); |
268 |
|
|
|
269 |
|
|
return; |
270 |
|
|
} |
271 |
|
|
|
272 |
|
|
|
273 |
|
|
void OBAtomTyper::CorrectAromaticNitrogens(OBMol &mol) |
274 |
|
|
{ |
275 |
|
|
if (!_init) |
276 |
|
|
Init(); |
277 |
|
|
|
278 |
|
|
if (mol.HasAromaticCorrected()) |
279 |
|
|
return; |
280 |
|
|
mol.SetAromaticCorrected(); |
281 |
|
|
|
282 |
|
|
return; |
283 |
|
|
|
284 |
|
|
// int j; |
285 |
|
|
// OBAtom *atom,*nbr,*a; |
286 |
|
|
// vector<OBNodeBase*>::iterator i,m; |
287 |
|
|
// vector<OBEdgeBase*>::iterator k; |
288 |
|
|
// OBBitVec curr,used,next; |
289 |
|
|
// vector<OBNodeBase*> v_N,v_OS; |
290 |
|
|
|
291 |
|
|
// vector<bool> _aromNH; |
292 |
|
|
// _aromNH.clear(); |
293 |
|
|
// _aromNH.resize(mol.NumAtoms()+1); |
294 |
|
|
|
295 |
|
|
// for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
296 |
|
|
// if (atom->IsAromatic() && !atom->IsCarbon() && !used[atom->GetIdx()]) |
297 |
|
|
// { |
298 |
|
|
// vector<OBNodeBase*> rsys; |
299 |
|
|
// rsys.push_back(atom); |
300 |
|
|
// curr |= atom->GetIdx(); |
301 |
|
|
// used |= atom->GetIdx(); |
302 |
|
|
|
303 |
|
|
// while (!curr.Empty()) |
304 |
|
|
// { |
305 |
|
|
// next.Clear(); |
306 |
|
|
// for (j = curr.NextBit(0);j != curr.EndBit();j = curr.NextBit(j)) |
307 |
|
|
// { |
308 |
|
|
// atom = mol.GetAtom(j); |
309 |
|
|
// for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k)) |
310 |
|
|
// { |
311 |
|
|
// if (!(*k)->IsAromatic()) |
312 |
|
|
// continue; |
313 |
|
|
// if (used[nbr->GetIdx()]) |
314 |
|
|
// continue; |
315 |
|
|
|
316 |
|
|
// rsys.push_back(nbr); |
317 |
|
|
// next |= nbr->GetIdx(); |
318 |
|
|
// used |= nbr->GetIdx(); |
319 |
|
|
// } |
320 |
|
|
// } |
321 |
|
|
|
322 |
|
|
// curr = next; |
323 |
|
|
// } |
324 |
|
|
|
325 |
|
|
// //count number of electrons in the ring system |
326 |
|
|
// v_N.clear(); |
327 |
|
|
// v_OS.clear(); |
328 |
|
|
// int nelectrons = 0; |
329 |
|
|
|
330 |
|
|
// for (m = rsys.begin();m != rsys.end();m++) |
331 |
|
|
// { |
332 |
|
|
// a = (OBAtom *)*m; |
333 |
|
|
|
334 |
|
|
// int hasExoDoubleBond = false; |
335 |
|
|
// for (nbr = a->BeginNbrAtom(k);nbr;nbr = a->NextNbrAtom(k)) |
336 |
|
|
// if ((*k)->GetBO() == 2 && !(*k)->IsInRing()) |
337 |
|
|
// if (nbr->IsOxygen() || nbr->IsSulfur() || nbr->IsNitrogen()) |
338 |
|
|
// hasExoDoubleBond = true; |
339 |
|
|
|
340 |
|
|
// if (a->IsCarbon() && hasExoDoubleBond) |
341 |
|
|
// continue; |
342 |
|
|
|
343 |
|
|
// if (a->IsCarbon()) |
344 |
|
|
// nelectrons++; |
345 |
|
|
// else |
346 |
|
|
// if (a->IsOxygen() || a->IsSulfur()) |
347 |
|
|
// { |
348 |
|
|
// v_OS.push_back(a); |
349 |
|
|
// nelectrons += 2; |
350 |
|
|
// } |
351 |
|
|
// else |
352 |
|
|
// if (a->IsNitrogen()) |
353 |
|
|
// { |
354 |
|
|
// v_N.push_back(a); //store nitrogens |
355 |
|
|
// nelectrons++; |
356 |
|
|
// } |
357 |
|
|
// } |
358 |
|
|
|
359 |
|
|
// //calculate what the number of electrons should be for aromaticity |
360 |
|
|
// int naromatic = 2+4*((int)((double)(rsys.size()-2)*0.25+0.5)); |
361 |
|
|
|
362 |
|
|
// if (nelectrons > naromatic) //try to give one electron back to O or S |
363 |
|
|
// for (m = v_OS.begin();m != v_OS.end();m++) |
364 |
|
|
// { |
365 |
|
|
// if (naromatic == nelectrons) |
366 |
|
|
// break; |
367 |
|
|
// if ((*m)->GetValence() == 2 && (*m)->GetHvyValence() == 2) |
368 |
|
|
// { |
369 |
|
|
// nelectrons--; |
370 |
|
|
// ((OBAtom*)*m)->SetFormalCharge(1); |
371 |
|
|
// } |
372 |
|
|
// } |
373 |
|
|
|
374 |
|
|
// if (v_N.empty()) |
375 |
|
|
// continue; //no nitrogens found in ring |
376 |
|
|
|
377 |
|
|
// //check for protonated nitrogens |
378 |
|
|
// for (m = v_N.begin();m != v_N.end();m++) |
379 |
|
|
// { |
380 |
|
|
// if (naromatic == nelectrons) |
381 |
|
|
// break; |
382 |
|
|
// if (((OBAtom*)*m)->GetValence() == 3 && |
383 |
|
|
// ((OBAtom*)*m)->GetHvyValence() == 2) |
384 |
|
|
// { |
385 |
|
|
// nelectrons++; |
386 |
|
|
// _aromNH[((OBAtom*)*m)->GetIdx()] = true; |
387 |
|
|
// } |
388 |
|
|
// if (((OBAtom*)*m)->GetFormalCharge() == -1) |
389 |
|
|
// { |
390 |
|
|
// nelectrons++; |
391 |
|
|
// _aromNH[((OBAtom*)*m)->GetIdx()] = false; |
392 |
|
|
// } |
393 |
|
|
// } |
394 |
|
|
|
395 |
|
|
// //charge up tert nitrogens |
396 |
|
|
// for (m = v_N.begin();m != v_N.end();m++) |
397 |
|
|
// if (((OBAtom*)*m)->GetHvyValence() == 3 |
398 |
|
|
// && ((OBAtom*)*m)->BOSum() < 5) |
399 |
|
|
// ((OBAtom*)*m)->SetFormalCharge(1); |
400 |
|
|
|
401 |
|
|
// //try to uncharge nitrogens first |
402 |
|
|
// for (m = v_N.begin();m != v_N.end();m++) |
403 |
|
|
// { |
404 |
|
|
// if (((OBAtom*)*m)->BOSum() > 4) |
405 |
|
|
// continue; //skip n=O |
406 |
|
|
// if (naromatic == nelectrons) |
407 |
|
|
// break; |
408 |
|
|
// if (((OBAtom*)*m)->GetHvyValence() == 3) |
409 |
|
|
// { |
410 |
|
|
// nelectrons++; |
411 |
|
|
// ((OBAtom*)*m)->SetFormalCharge(0); |
412 |
|
|
// } |
413 |
|
|
// } |
414 |
|
|
|
415 |
|
|
// if (naromatic == nelectrons) |
416 |
|
|
// continue; |
417 |
|
|
|
418 |
|
|
// //try to protonate amides next |
419 |
|
|
// for (m = v_N.begin();m != v_N.end();m++) |
420 |
|
|
// { |
421 |
|
|
// if (naromatic == nelectrons) |
422 |
|
|
// break; |
423 |
|
|
// if (((OBAtom*)*m)->IsAmideNitrogen() && |
424 |
|
|
// ((OBAtom*)*m)->GetValence() == 2 && |
425 |
|
|
// ((OBAtom*)*m)->GetHvyValence() == 2 && |
426 |
|
|
// !_aromNH[((OBAtom*)*m)->GetIdx()]) |
427 |
|
|
// { |
428 |
|
|
// nelectrons++; |
429 |
|
|
// _aromNH[((OBAtom*)*m)->GetIdx()] = true; |
430 |
|
|
// } |
431 |
|
|
// } |
432 |
|
|
|
433 |
|
|
// if (naromatic == nelectrons) |
434 |
|
|
// continue; |
435 |
|
|
|
436 |
|
|
// //protonate amines in 5 membered rings first - try to match kekule |
437 |
|
|
// for (m = v_N.begin();m != v_N.end();m++) |
438 |
|
|
// { |
439 |
|
|
// if (naromatic == nelectrons) |
440 |
|
|
// break; |
441 |
|
|
// if (((OBAtom*)*m)->GetValence() == 2 && |
442 |
|
|
// !_aromNH[((OBAtom*)*m)->GetIdx()] && |
443 |
|
|
// ((OBAtom*)*m)->IsInRingSize(5) && |
444 |
|
|
// ((OBAtom*)*m)->BOSum() == 2) |
445 |
|
|
// { |
446 |
|
|
// nelectrons++; |
447 |
|
|
// _aromNH[((OBAtom*)*m)->GetIdx()] = true; |
448 |
|
|
// } |
449 |
|
|
// } |
450 |
|
|
|
451 |
|
|
// if (naromatic == nelectrons) |
452 |
|
|
// continue; |
453 |
|
|
|
454 |
|
|
// //protonate amines in 5 membered rings first - no kekule restriction |
455 |
|
|
// for (m = v_N.begin();m != v_N.end();m++) |
456 |
|
|
// { |
457 |
|
|
// if (naromatic == nelectrons) |
458 |
|
|
// break; |
459 |
|
|
// if ((*m)->GetValence() == 2 && !_aromNH[(*m)->GetIdx()] && |
460 |
|
|
// (*m)->IsInRingSize(5)) |
461 |
|
|
// { |
462 |
|
|
// nelectrons++; |
463 |
|
|
// _aromNH[(*m)->GetIdx()] = true; |
464 |
|
|
// } |
465 |
|
|
// } |
466 |
|
|
|
467 |
|
|
// if (naromatic == nelectrons) |
468 |
|
|
// continue; |
469 |
|
|
|
470 |
|
|
// //then others -- try to find an atom w/o a double bond first |
471 |
|
|
// for (m = v_N.begin();m != v_N.end();m++) |
472 |
|
|
// { |
473 |
|
|
// if (naromatic == nelectrons) |
474 |
|
|
// break; |
475 |
|
|
// if (((OBAtom*)*m)->GetHvyValence() == 2 && |
476 |
|
|
// !_aromNH[((OBAtom*)*m)->GetIdx()] && |
477 |
|
|
// !((OBAtom*)*m)->HasDoubleBond()) |
478 |
|
|
// { |
479 |
|
|
// nelectrons++; |
480 |
|
|
// _aromNH[(*m)->GetIdx()] = true; |
481 |
|
|
// } |
482 |
|
|
// } |
483 |
|
|
|
484 |
|
|
// for (m = v_N.begin();m != v_N.end();m++) |
485 |
|
|
// { |
486 |
|
|
// if (naromatic == nelectrons) |
487 |
|
|
// break; |
488 |
|
|
// if ((*m)->GetHvyValence() == 2 && !_aromNH[(*m)->GetIdx()]) |
489 |
|
|
// { |
490 |
|
|
// nelectrons++; |
491 |
|
|
// _aromNH[(*m)->GetIdx()] = true; |
492 |
|
|
// } |
493 |
|
|
// } |
494 |
|
|
// } |
495 |
|
|
|
496 |
|
|
// for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
497 |
|
|
// if (_aromNH[atom->GetIdx()] && atom->GetValence() == 2) |
498 |
|
|
// atom->SetImplicitValence(3); |
499 |
|
|
} |
500 |
|
|
|
501 |
|
|
/*! \class OBAromaticTyper |
502 |
|
|
\brief Assigns aromatic typing to atoms and bonds |
503 |
|
|
|
504 |
|
|
The OBAromaticTyper class is designed to read in a list of |
505 |
|
|
aromatic perception rules and apply them to molecules. The code |
506 |
|
|
that performs typing is not usually used directly -- it is usually |
507 |
|
|
done automatically when their corresponding values are requested of atoms |
508 |
|
|
or bonds. |
509 |
|
|
\code |
510 |
|
|
atom->IsAromatic(); |
511 |
|
|
bond->IsAromatic(); |
512 |
|
|
bond->IsDouble(); // needs to check aromaticity and define Kekule structures |
513 |
|
|
\endcode |
514 |
|
|
*/ |
515 |
|
|
OBAromaticTyper::OBAromaticTyper() |
516 |
|
|
{ |
517 |
|
|
_init = false; |
518 |
|
|
_dir = BABEL_DATADIR; |
519 |
|
|
_envvar = "BABEL_DATADIR"; |
520 |
|
|
_filename = "aromatic.txt"; |
521 |
|
|
_subdir = "data"; |
522 |
|
|
_dataptr = AromaticData; |
523 |
|
|
} |
524 |
|
|
|
525 |
|
|
void OBAromaticTyper::ParseLine(const char *buffer) |
526 |
|
|
{ |
527 |
|
|
OBSmartsPattern *sp; |
528 |
|
|
char temp_buffer[BUFF_SIZE]; |
529 |
|
|
|
530 |
|
|
if (buffer[0] == '#') |
531 |
|
|
return; |
532 |
|
|
vector<string> vs; |
533 |
|
|
tokenize(vs,buffer); |
534 |
|
|
if (!vs.empty() && vs.size() == 3) |
535 |
|
|
{ |
536 |
|
|
strcpy(temp_buffer,vs[0].c_str()); |
537 |
|
|
sp = new OBSmartsPattern(); |
538 |
|
|
if (sp->Init(temp_buffer)) |
539 |
|
|
{ |
540 |
|
|
_vsp.push_back(sp); |
541 |
|
|
_verange.push_back(pair<int,int> |
542 |
|
|
(atoi((char*)vs[1].c_str()), |
543 |
|
|
atoi((char*)vs[2].c_str()))); |
544 |
|
|
} |
545 |
|
|
else |
546 |
|
|
{ |
547 |
|
|
obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in aromatic typer from aromatic.txt", obInfo); |
548 |
|
|
delete sp; |
549 |
|
|
sp = NULL; |
550 |
|
|
return; |
551 |
|
|
} |
552 |
|
|
} |
553 |
|
|
else |
554 |
|
|
obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in aromatic typer from aromatic.txt", obInfo); |
555 |
|
|
} |
556 |
|
|
|
557 |
|
|
OBAromaticTyper::~OBAromaticTyper() |
558 |
|
|
{ |
559 |
|
|
vector<OBSmartsPattern*>::iterator i; |
560 |
|
|
for (i = _vsp.begin();i != _vsp.end();i++) |
561 |
|
|
{ |
562 |
|
|
delete *i; |
563 |
|
|
*i = NULL; |
564 |
|
|
} |
565 |
|
|
} |
566 |
|
|
|
567 |
|
|
void OBAromaticTyper::AssignAromaticFlags(OBMol &mol) |
568 |
|
|
{ |
569 |
|
|
if (!_init) |
570 |
|
|
Init(); |
571 |
|
|
|
572 |
|
|
if (mol.HasAromaticPerceived()) |
573 |
|
|
return; |
574 |
|
|
mol.SetAromaticPerceived(); |
575 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
576 |
|
|
"Ran OpenBabel::AssignAromaticFlags", obAuditMsg); |
577 |
|
|
|
578 |
|
|
_vpa.clear(); |
579 |
|
|
_vpa.resize(mol.NumAtoms()+1); |
580 |
|
|
_velec.clear(); |
581 |
|
|
_velec.resize(mol.NumAtoms()+1); |
582 |
|
|
_root.clear(); |
583 |
|
|
_root.resize(mol.NumAtoms()+1); |
584 |
|
|
|
585 |
|
|
OBBond *bond; |
586 |
|
|
OBAtom *atom; |
587 |
|
|
vector<OBNodeBase*>::iterator i; |
588 |
|
|
vector<OBEdgeBase*>::iterator j; |
589 |
|
|
|
590 |
|
|
//unset all aromatic flags |
591 |
|
|
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
592 |
|
|
atom->UnsetAromatic(); |
593 |
|
|
for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j)) |
594 |
|
|
bond->UnsetAromatic(); |
595 |
|
|
|
596 |
|
|
int idx; |
597 |
|
|
vector<vector<int> >::iterator m; |
598 |
|
|
vector<OBSmartsPattern*>::iterator k; |
599 |
|
|
|
600 |
|
|
//mark atoms as potentially aromatic |
601 |
|
|
for (idx=0,k = _vsp.begin();k != _vsp.end();k++,idx++) |
602 |
|
|
if ((*k)->Match(mol)) |
603 |
|
|
{ |
604 |
|
|
_mlist = (*k)->GetMapList(); |
605 |
|
|
for (m = _mlist.begin();m != _mlist.end();m++) |
606 |
|
|
{ |
607 |
|
|
_vpa[(*m)[0]] = true; |
608 |
|
|
_velec[(*m)[0]] = _verange[idx]; |
609 |
|
|
} |
610 |
|
|
} |
611 |
|
|
|
612 |
|
|
//sanity check - exclude all 4 substituted atoms and sp centers |
613 |
|
|
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
614 |
|
|
{ |
615 |
|
|
if (atom->GetImplicitValence() > 3) |
616 |
|
|
{ |
617 |
|
|
_vpa[atom->GetIdx()] = false; |
618 |
|
|
continue; |
619 |
|
|
} |
620 |
|
|
|
621 |
|
|
switch(atom->GetAtomicNum()) |
622 |
|
|
{ |
623 |
|
|
//phosphorus and sulfur may be initially typed as sp3 |
624 |
|
|
case 6: |
625 |
|
|
case 7: |
626 |
|
|
case 8: |
627 |
|
|
if (atom->GetHyb() != 2) |
628 |
|
|
_vpa[atom->GetIdx()] = false; |
629 |
|
|
break; |
630 |
|
|
} |
631 |
|
|
} |
632 |
|
|
|
633 |
|
|
//propagate potentially aromatic atoms |
634 |
|
|
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
635 |
|
|
if (_vpa[atom->GetIdx()]) |
636 |
|
|
PropagatePotentialAromatic(atom); |
637 |
|
|
|
638 |
|
|
//select root atoms |
639 |
|
|
SelectRootAtoms(mol); |
640 |
|
|
|
641 |
|
|
ExcludeSmallRing(mol); //remove 3 membered rings from consideration |
642 |
|
|
|
643 |
|
|
//loop over root atoms and look for 5-6 membered aromatic rings |
644 |
|
|
_visit.clear(); |
645 |
|
|
_visit.resize(mol.NumAtoms()+1); |
646 |
|
|
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
647 |
|
|
if (_root[atom->GetIdx()]) |
648 |
|
|
CheckAromaticity(atom,6); |
649 |
|
|
|
650 |
|
|
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
651 |
|
|
if (_root[atom->GetIdx()]) |
652 |
|
|
CheckAromaticity(atom,20); |
653 |
|
|
|
654 |
|
|
//for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
655 |
|
|
// if (atom->IsAromatic()) |
656 |
|
|
// cerr << "aro = " <<atom->GetIdx() << endl; |
657 |
|
|
|
658 |
|
|
//for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j)) |
659 |
|
|
//if (bond->IsAromatic()) |
660 |
|
|
//cerr << bond->GetIdx() << ' ' << bond->IsAromatic() << endl; |
661 |
|
|
} |
662 |
|
|
|
663 |
|
|
bool OBAromaticTyper::TraverseCycle(OBAtom *root, OBAtom *atom, OBBond *prev, |
664 |
|
|
std::pair<int,int> &er,int depth) |
665 |
|
|
{ |
666 |
|
|
if (atom == root) |
667 |
|
|
{ |
668 |
|
|
int i; |
669 |
|
|
for (i = er.first;i <= er.second;i++) |
670 |
|
|
if (i%4 == 2 && i > 2) |
671 |
|
|
return(true); |
672 |
|
|
|
673 |
|
|
return(false); |
674 |
|
|
} |
675 |
|
|
|
676 |
|
|
if (!depth || !_vpa[atom->GetIdx()] || _visit[atom->GetIdx()]) |
677 |
|
|
return(false); |
678 |
|
|
|
679 |
|
|
bool result = false; |
680 |
|
|
|
681 |
|
|
depth--; |
682 |
|
|
er.first += _velec[atom->GetIdx()].first; |
683 |
|
|
er.second += _velec[atom->GetIdx()].second; |
684 |
|
|
|
685 |
|
|
_visit[atom->GetIdx()] = true; |
686 |
|
|
OBAtom *nbr; |
687 |
|
|
vector<OBEdgeBase*>::iterator i; |
688 |
|
|
for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) |
689 |
|
|
if (*i != prev && (*i)->IsInRing() && _vpa[nbr->GetIdx()]) |
690 |
|
|
{ |
691 |
|
|
if (TraverseCycle(root,nbr,(OBBond*)(*i),er,depth)) |
692 |
|
|
{ |
693 |
|
|
result = true; |
694 |
|
|
((OBBond*) *i)->SetAromatic(); |
695 |
|
|
} |
696 |
|
|
} |
697 |
|
|
|
698 |
|
|
_visit[atom->GetIdx()] = false; |
699 |
|
|
if (result) |
700 |
|
|
atom->SetAromatic(); |
701 |
|
|
|
702 |
|
|
er.first -= _velec[atom->GetIdx()].first; |
703 |
|
|
er.second -= _velec[atom->GetIdx()].second; |
704 |
|
|
|
705 |
|
|
return(result); |
706 |
|
|
} |
707 |
|
|
|
708 |
|
|
void OBAromaticTyper::CheckAromaticity(OBAtom *atom,int depth) |
709 |
|
|
{ |
710 |
|
|
OBAtom *nbr; |
711 |
|
|
vector<OBEdgeBase*>::iterator i; |
712 |
|
|
|
713 |
|
|
pair<int,int> erange; |
714 |
|
|
for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) |
715 |
|
|
if ((*i)->IsInRing()) // check all rings, regardless of assumed aromaticity |
716 |
|
|
{ |
717 |
|
|
erange = _velec[atom->GetIdx()]; |
718 |
|
|
|
719 |
|
|
if (TraverseCycle(atom,nbr,(OBBond *)*i,erange,depth-1)) |
720 |
|
|
{ |
721 |
|
|
atom->SetAromatic(); |
722 |
|
|
((OBBond*) *i)->SetAromatic(); |
723 |
|
|
} |
724 |
|
|
} |
725 |
|
|
} |
726 |
|
|
|
727 |
|
|
void OBAromaticTyper::PropagatePotentialAromatic(OBAtom *atom) |
728 |
|
|
{ |
729 |
|
|
int count = 0; |
730 |
|
|
OBAtom *nbr; |
731 |
|
|
vector<OBEdgeBase*>::iterator i; |
732 |
|
|
|
733 |
|
|
for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) |
734 |
|
|
if ((*i)->IsInRing() && _vpa[nbr->GetIdx()]) |
735 |
|
|
count++; |
736 |
|
|
|
737 |
|
|
if (count < 2) |
738 |
|
|
{ |
739 |
|
|
_vpa[atom->GetIdx()] = false; |
740 |
|
|
if (count == 1) |
741 |
|
|
for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) |
742 |
|
|
if ((*i)->IsInRing() && _vpa[nbr->GetIdx()]) |
743 |
|
|
PropagatePotentialAromatic(nbr); |
744 |
|
|
} |
745 |
|
|
} |
746 |
|
|
|
747 |
|
|
/** |
748 |
|
|
* Select the root atoms for traversing atoms in rings. |
749 |
|
|
* |
750 |
|
|
* Picking only the begin atom of a closure bond can cause |
751 |
|
|
* difficulties when the selected atom is an inner atom |
752 |
|
|
* with three neighbour ring atoms. Why ? Because this atom |
753 |
|
|
* can get trapped by the other atoms when determining aromaticity, |
754 |
|
|
* because a simple visited flag is used in the |
755 |
|
|
* OBAromaticTyper::TraverseCycle() method. |
756 |
|
|
* |
757 |
|
|
* Ported from JOELib, copyright Joerg Wegner, 2003 under the GPL version 2 |
758 |
|
|
* |
759 |
|
|
* @param mol the molecule |
760 |
|
|
* @param avoidInnerRingAtoms inner closure ring atoms with more than 2 neighbours will be avoided |
761 |
|
|
* |
762 |
|
|
*/ |
763 |
|
|
void OBAromaticTyper::SelectRootAtoms(OBMol &mol, bool avoidInnerRingAtoms) |
764 |
|
|
{ |
765 |
|
|
OBBond *bond; |
766 |
|
|
OBAtom *atom, *nbr, *nbr2; |
767 |
|
|
OBRing *ring; |
768 |
|
|
// vector<OBNodeBase*>::iterator i; |
769 |
|
|
vector<OBEdgeBase*>::iterator j, l, nbr2Iter; |
770 |
|
|
vector<OBRing*> sssRings = mol.GetSSSR(); |
771 |
|
|
vector<OBRing*>::iterator k; |
772 |
|
|
|
773 |
|
|
int rootAtom; |
774 |
|
|
int ringNbrs; |
775 |
|
|
int heavyNbrs; |
776 |
|
|
int newRoot = -1; |
777 |
|
|
vector<int> tmpRootAtoms; |
778 |
|
|
vector<int> tmp; |
779 |
|
|
|
780 |
|
|
for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j)) |
781 |
|
|
if (bond->IsClosure()) |
782 |
|
|
tmpRootAtoms.push_back(bond->GetBeginAtomIdx()); |
783 |
|
|
|
784 |
|
|
for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j)) |
785 |
|
|
if (bond->IsClosure()) |
786 |
|
|
{ |
787 |
|
|
// BASIC APPROACH |
788 |
|
|
// pick beginning atom at closure bond |
789 |
|
|
// this is really ready, isn't it ! ;-) |
790 |
|
|
rootAtom = bond->GetBeginAtomIdx(); |
791 |
|
|
_root[rootAtom] = true; |
792 |
|
|
|
793 |
|
|
// EXTENDED APPROACH |
794 |
|
|
if (avoidInnerRingAtoms) |
795 |
|
|
{ |
796 |
|
|
// count the number of neighbor ring atoms |
797 |
|
|
atom = mol.GetAtom(rootAtom); |
798 |
|
|
ringNbrs = heavyNbrs = 0; |
799 |
|
|
|
800 |
|
|
for (nbr = atom->BeginNbrAtom(l);nbr;nbr = atom->NextNbrAtom(l)) |
801 |
|
|
{ |
802 |
|
|
// we can get this from atom->GetHvyValence() |
803 |
|
|
// but we need to find neighbors in rings too |
804 |
|
|
// so let's save some time |
805 |
|
|
if (!nbr->IsHydrogen()) |
806 |
|
|
{ |
807 |
|
|
heavyNbrs++; |
808 |
|
|
if (nbr->IsInRing()) |
809 |
|
|
ringNbrs++; |
810 |
|
|
} |
811 |
|
|
|
812 |
|
|
// if this atom has more than 2 neighbor ring atoms |
813 |
|
|
// we could get trapped later when traversing cycles |
814 |
|
|
// which can cause aromaticity false detection |
815 |
|
|
newRoot = -1; |
816 |
|
|
|
817 |
|
|
if (ringNbrs > 2) |
818 |
|
|
{ |
819 |
|
|
// try to find another root atom |
820 |
|
|
for (k = sssRings.begin();k != sssRings.end();k++) |
821 |
|
|
{ |
822 |
|
|
ring = (*k); |
823 |
|
|
tmp = ring->_path; |
824 |
|
|
|
825 |
|
|
bool checkThisRing = false; |
826 |
|
|
int rootAtomNumber=0; |
827 |
|
|
int idx=0; |
828 |
|
|
// avoiding two root atoms in one ring ! |
829 |
|
|
for (unsigned int j = 0; j < tmpRootAtoms.size(); j++) |
830 |
|
|
{ |
831 |
|
|
idx= tmpRootAtoms[j]; |
832 |
|
|
if(ring->IsInRing(idx)) |
833 |
|
|
{ |
834 |
|
|
rootAtomNumber++; |
835 |
|
|
if(rootAtomNumber>=2) |
836 |
|
|
break; |
837 |
|
|
} |
838 |
|
|
} |
839 |
|
|
if(rootAtomNumber<2) |
840 |
|
|
{ |
841 |
|
|
for (unsigned int j = 0; j < tmp.size(); j++) |
842 |
|
|
{ |
843 |
|
|
// find critical ring |
844 |
|
|
if (tmp[j] == rootAtom) |
845 |
|
|
{ |
846 |
|
|
checkThisRing = true; |
847 |
|
|
} |
848 |
|
|
else |
849 |
|
|
{ |
850 |
|
|
// second root atom in this ring ? |
851 |
|
|
if (_root[tmp[j]] == true) |
852 |
|
|
{ |
853 |
|
|
// when there is a second root |
854 |
|
|
// atom this ring can not be |
855 |
|
|
// used for getting an other |
856 |
|
|
// root atom |
857 |
|
|
checkThisRing = false; |
858 |
|
|
|
859 |
|
|
break; |
860 |
|
|
} |
861 |
|
|
} |
862 |
|
|
} |
863 |
|
|
} |
864 |
|
|
|
865 |
|
|
// check ring for getting another |
866 |
|
|
// root atom to avoid aromaticity typer problems |
867 |
|
|
if (checkThisRing) |
868 |
|
|
{ |
869 |
|
|
// check if we can find another root atom |
870 |
|
|
for (unsigned int m = 0; m < tmp.size(); m++) |
871 |
|
|
{ |
872 |
|
|
ringNbrs = heavyNbrs = 0; |
873 |
|
|
for (nbr2 = (mol.GetAtom(tmp[m]))->BeginNbrAtom(nbr2Iter); |
874 |
|
|
nbr2;nbr2 = (mol.GetAtom(tmp[m]))->NextNbrAtom(nbr2Iter)) |
875 |
|
|
{ |
876 |
|
|
if (!nbr2->IsHydrogen()) |
877 |
|
|
{ |
878 |
|
|
heavyNbrs++; |
879 |
|
|
|
880 |
|
|
if (nbr2->IsInRing()) |
881 |
|
|
ringNbrs++; |
882 |
|
|
} |
883 |
|
|
} |
884 |
|
|
|
885 |
|
|
// if the number of neighboured heavy atoms is also |
886 |
|
|
// the number of neighboured ring atoms, the aromaticity |
887 |
|
|
// typer could be stuck in a local traversing trap |
888 |
|
|
if (ringNbrs <= 2 && ring->IsInRing((mol.GetAtom(tmp[m])->GetIdx()))) |
889 |
|
|
{ |
890 |
|
|
newRoot = tmp[m]; |
891 |
|
|
} |
892 |
|
|
} |
893 |
|
|
} |
894 |
|
|
} |
895 |
|
|
|
896 |
|
|
if ((newRoot != -1) && (rootAtom != newRoot)) |
897 |
|
|
{ |
898 |
|
|
// unset root atom |
899 |
|
|
_root[rootAtom] = false; |
900 |
|
|
|
901 |
|
|
// pick new root atom |
902 |
|
|
_root[newRoot] = true; |
903 |
|
|
} |
904 |
|
|
} // if (ringNbrs > 2) |
905 |
|
|
|
906 |
|
|
} // end for |
907 |
|
|
} // if (avoid) |
908 |
|
|
} // if (bond.IsClosure()) |
909 |
|
|
} |
910 |
|
|
|
911 |
|
|
void OBAromaticTyper::ExcludeSmallRing(OBMol &mol) |
912 |
|
|
{ |
913 |
|
|
OBAtom *atom,*nbr1,*nbr2; |
914 |
|
|
vector<OBNodeBase*>::iterator i; |
915 |
|
|
vector<OBEdgeBase*>::iterator j,k; |
916 |
|
|
|
917 |
|
|
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
918 |
|
|
if (_root[atom->GetIdx()]) |
919 |
|
|
for (nbr1 = atom->BeginNbrAtom(j);nbr1;nbr1 = atom->NextNbrAtom(j)) |
920 |
|
|
if ((*j)->IsInRing() && _vpa[nbr1->GetIdx()]) |
921 |
|
|
for (nbr2 = nbr1->BeginNbrAtom(k);nbr2;nbr2 = nbr1->NextNbrAtom(k)) |
922 |
|
|
if (nbr2 != atom && (*k)->IsInRing() && _vpa[nbr2->GetIdx()]) |
923 |
|
|
if (atom->IsConnected(nbr2)) |
924 |
|
|
_root[atom->GetIdx()] = false; |
925 |
|
|
} |
926 |
|
|
|
927 |
|
|
} //namespace OpenBabel; |
928 |
|
|
|
929 |
|
|
//! \file typer.cpp |
930 |
|
|
//! \brief Open Babel atom and aromaticity typer. |