1 |
/********************************************************************** |
2 |
typer.cpp - Open Babel atom typer. |
3 |
|
4 |
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
5 |
Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison |
6 |
|
7 |
This file is part of the Open Babel project. |
8 |
For more information, see <http://openbabel.sourceforge.net/> |
9 |
|
10 |
This program is free software; you can redistribute it and/or modify |
11 |
it under the terms of the GNU General Public License as published by |
12 |
the Free Software Foundation version 2 of the License. |
13 |
|
14 |
This program is distributed in the hope that it will be useful, |
15 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 |
GNU General Public License for more details. |
18 |
***********************************************************************/ |
19 |
|
20 |
#include "mol.hpp" |
21 |
#include "typer.hpp" |
22 |
#include "atomtyp.hpp" |
23 |
#include "aromatic.hpp" |
24 |
|
25 |
#ifdef WIN32 |
26 |
#pragma warning (disable : 4786) |
27 |
#endif |
28 |
|
29 |
using namespace std; |
30 |
|
31 |
namespace OpenBabel |
32 |
{ |
33 |
|
34 |
OBAromaticTyper aromtyper; |
35 |
OBAtomTyper atomtyper; |
36 |
|
37 |
/*! \class OBAtomTyper |
38 |
\brief Assigns atom types, hybridization, implicit valence and formal charges |
39 |
|
40 |
The OBAtomTyper class is designed to read in a list of atom typing |
41 |
rules and apply them to molecules. The code that performs atom |
42 |
typing is not usually used directly as atom typing, hybridization |
43 |
assignment, implicit valence assignment and charge are all done |
44 |
automatically when their corresponding values are requested of |
45 |
atoms: |
46 |
\code |
47 |
atom->GetType(); |
48 |
atom->GetFormalCharge(); |
49 |
atom->GetHyb(); |
50 |
\endcode |
51 |
*/ |
52 |
OBAtomTyper::OBAtomTyper() |
53 |
{ |
54 |
_init = false; |
55 |
STR_DEFINE(_dir, FRC_PATH); |
56 |
_envvar = "FORCE_PARAM_PATH"; |
57 |
_filename = "atomtyp.txt"; |
58 |
_subdir = "data"; |
59 |
_dataptr = AtomTypeData; |
60 |
} |
61 |
|
62 |
void OBAtomTyper::ParseLine(const char *buffer) |
63 |
{ |
64 |
vector<string> vs; |
65 |
OBSmartsPattern *sp; |
66 |
|
67 |
if (EQn(buffer,"INTHYB",6)) |
68 |
{ |
69 |
tokenize(vs,buffer); |
70 |
if (vs.empty() || vs.size() < 3) |
71 |
{ |
72 |
obErrorLog.ThrowError(__func__, " Could not parse INTHYB line in atom type table from atomtyp.txt", obInfo); |
73 |
return; |
74 |
} |
75 |
|
76 |
sp = new OBSmartsPattern; |
77 |
if (sp->Init(vs[1])) |
78 |
_vinthyb.push_back(pair<OBSmartsPattern*,int> (sp,atoi((char*)vs[2].c_str()))); |
79 |
else |
80 |
{ |
81 |
delete sp; |
82 |
sp = NULL; |
83 |
obErrorLog.ThrowError(__func__, " Could not parse INTHYB line in atom type table from atomtyp.txt", obInfo); |
84 |
return; |
85 |
} |
86 |
} |
87 |
else if (EQn(buffer,"IMPVAL",6)) |
88 |
{ |
89 |
tokenize(vs,buffer); |
90 |
if (vs.empty() || vs.size() < 3) |
91 |
{ |
92 |
obErrorLog.ThrowError(__func__, " Could not parse IMPVAL line in atom type table from atomtyp.txt", obInfo); |
93 |
return; |
94 |
} |
95 |
|
96 |
sp = new OBSmartsPattern; |
97 |
if (sp->Init(vs[1])) |
98 |
_vimpval.push_back(pair<OBSmartsPattern*,int> (sp,atoi((char*)vs[2].c_str()))); |
99 |
else |
100 |
{ |
101 |
obErrorLog.ThrowError(__func__, " Could not parse IMPVAL line in atom type table from atomtyp.txt", obInfo); |
102 |
delete sp; |
103 |
sp = NULL; |
104 |
return; |
105 |
} |
106 |
} |
107 |
else if (EQn(buffer,"EXTTYP",6)) |
108 |
{ |
109 |
tokenize(vs,buffer); |
110 |
if (vs.empty() || vs.size() < 3) |
111 |
{ |
112 |
obErrorLog.ThrowError(__func__, " Could not parse EXTTYP line in atom type table from atomtyp.txt", obInfo); |
113 |
return; |
114 |
} |
115 |
sp = new OBSmartsPattern; |
116 |
if (sp->Init(vs[1])) |
117 |
_vexttyp.push_back(pair<OBSmartsPattern*,string> (sp,vs[2])); |
118 |
else |
119 |
{ |
120 |
delete sp; |
121 |
sp = NULL; |
122 |
obErrorLog.ThrowError(__func__, " Could not parse EXTTYP line in atom type table from atomtyp.txt", obInfo); |
123 |
return; |
124 |
} |
125 |
} |
126 |
} |
127 |
|
128 |
OBAtomTyper::~OBAtomTyper() |
129 |
{ |
130 |
vector<pair<OBSmartsPattern*,int> >::iterator i; |
131 |
for (i = _vinthyb.begin();i != _vinthyb.end();i++) |
132 |
{ |
133 |
delete i->first; |
134 |
i->first = NULL; |
135 |
} |
136 |
for (i = _vimpval.begin();i != _vimpval.end();i++) |
137 |
{ |
138 |
delete i->first; |
139 |
i->first = NULL; |
140 |
} |
141 |
|
142 |
vector<pair<OBSmartsPattern*,string> >::iterator j; |
143 |
for (j = _vexttyp.begin();j != _vexttyp.end();j++) |
144 |
{ |
145 |
delete j->first; |
146 |
j->first = NULL; |
147 |
} |
148 |
|
149 |
} |
150 |
|
151 |
void OBAtomTyper::AssignTypes(OBMol &mol) |
152 |
{ |
153 |
if (!_init) |
154 |
Init(); |
155 |
|
156 |
obErrorLog.ThrowError(__func__, |
157 |
"Ran OpenBabel::AssignTypes", obAuditMsg); |
158 |
|
159 |
mol.SetAtomTypesPerceived(); |
160 |
|
161 |
vector<vector<int> >::iterator j; |
162 |
vector<pair<OBSmartsPattern*,string> >::iterator i; |
163 |
|
164 |
for (i = _vexttyp.begin();i != _vexttyp.end();i++) |
165 |
if (i->first->Match(mol)) |
166 |
{ |
167 |
_mlist = i->first->GetMapList(); |
168 |
for (j = _mlist.begin();j != _mlist.end();j++) |
169 |
mol.GetAtom((*j)[0])->SetType(i->second); |
170 |
} |
171 |
} |
172 |
|
173 |
void OBAtomTyper::AssignHyb(OBMol &mol) |
174 |
{ |
175 |
if (!_init) |
176 |
Init(); |
177 |
|
178 |
aromtyper.AssignAromaticFlags(mol); |
179 |
|
180 |
mol.SetHybridizationPerceived(); |
181 |
obErrorLog.ThrowError(__func__, |
182 |
"Ran OpenBabel::AssignHybridization", obAuditMsg); |
183 |
|
184 |
OBAtom *atom; |
185 |
vector<OBNodeBase*>::iterator k; |
186 |
for (atom = mol.BeginAtom(k);atom;atom = mol.NextAtom(k)) |
187 |
atom->SetHyb(0); |
188 |
|
189 |
vector<vector<int> >::iterator j; |
190 |
vector<pair<OBSmartsPattern*,int> >::iterator i; |
191 |
|
192 |
for (i = _vinthyb.begin();i != _vinthyb.end();i++) |
193 |
if (i->first->Match(mol)) |
194 |
{ |
195 |
_mlist = i->first->GetMapList(); |
196 |
for (j = _mlist.begin();j != _mlist.end();j++) |
197 |
mol.GetAtom((*j)[0])->SetHyb(i->second); |
198 |
} |
199 |
} |
200 |
|
201 |
void OBAtomTyper::AssignImplicitValence(OBMol &mol) |
202 |
{ |
203 |
// FF Make sure that valence has not been perceived |
204 |
if(mol.HasImplicitValencePerceived()) |
205 |
return; |
206 |
|
207 |
if (!_init) |
208 |
Init(); |
209 |
|
210 |
mol.SetImplicitValencePerceived(); |
211 |
obErrorLog.ThrowError(__func__, |
212 |
"Ran OpenBabel::AssignImplicitValence", obAuditMsg); |
213 |
|
214 |
// FF Ensure that the aromatic typer will not be called |
215 |
int oldflags = mol.GetFlags(); // save the current state flags |
216 |
mol.SetAromaticPerceived(); // and set the aromatic perceived flag on |
217 |
|
218 |
OBAtom *atom; |
219 |
vector<OBNodeBase*>::iterator k; |
220 |
for (atom = mol.BeginAtom(k);atom;atom = mol.NextAtom(k)) |
221 |
atom->SetImplicitValence(atom->GetValence()); |
222 |
|
223 |
vector<vector<int> >::iterator j; |
224 |
vector<pair<OBSmartsPattern*,int> >::iterator i; |
225 |
|
226 |
for (i = _vimpval.begin();i != _vimpval.end();i++) |
227 |
if (i->first->Match(mol)) |
228 |
{ |
229 |
_mlist = i->first->GetMapList(); |
230 |
for (j = _mlist.begin();j != _mlist.end();j++) |
231 |
mol.GetAtom((*j)[0])->SetImplicitValence(i->second); |
232 |
} |
233 |
|
234 |
if (!mol.HasAromaticCorrected()) |
235 |
CorrectAromaticNitrogens(mol); |
236 |
|
237 |
for (atom = mol.BeginAtom(k);atom;atom = mol.NextAtom(k)) |
238 |
{ |
239 |
if (atom->GetImplicitValence() < atom->GetValence()) |
240 |
atom->SetImplicitValence(atom->GetValence()); |
241 |
} |
242 |
|
243 |
//FF moved to OBMol::AssignSpinMultiplicity(); |
244 |
//begin CM 18 Sept 2003 |
245 |
//if there are any explicit Hs on an atom, then they consitute all the Hs |
246 |
//Any discrepancy with the expected atom valency is because it is a radical of some sort |
247 |
//Also adjust the ImplicitValence for radical atoms |
248 |
//for (atom = mol.BeginAtom(k);atom;atom = mol.NextAtom(k)) |
249 |
// { |
250 |
// if (!atom->IsHydrogen() && atom->ExplicitHydrogenCount()!=0) |
251 |
// { |
252 |
// int diff=atom->GetImplicitValence() - (atom->GetHvyValence() + atom->ExplicitHydrogenCount()); |
253 |
// if (diff) |
254 |
// atom->SetSpinMultiplicity(diff+1);//radicals =2; all carbenes =3 |
255 |
// } |
256 |
|
257 |
// int mult=atom->GetSpinMultiplicity(); |
258 |
// if(mult) //radical or carbene |
259 |
// atom->DecrementImplicitValence(); |
260 |
// if(mult==1 || mult==3) //e.g.singlet or triplet carbene |
261 |
// atom->DecrementImplicitValence(); |
262 |
// } |
263 |
//end CM |
264 |
//FF end |
265 |
|
266 |
// FF Come back to the initial flags |
267 |
mol.SetFlags(oldflags); |
268 |
|
269 |
return; |
270 |
} |
271 |
|
272 |
|
273 |
void OBAtomTyper::CorrectAromaticNitrogens(OBMol &mol) |
274 |
{ |
275 |
if (!_init) |
276 |
Init(); |
277 |
|
278 |
if (mol.HasAromaticCorrected()) |
279 |
return; |
280 |
mol.SetAromaticCorrected(); |
281 |
|
282 |
return; |
283 |
|
284 |
// int j; |
285 |
// OBAtom *atom,*nbr,*a; |
286 |
// vector<OBNodeBase*>::iterator i,m; |
287 |
// vector<OBEdgeBase*>::iterator k; |
288 |
// OBBitVec curr,used,next; |
289 |
// vector<OBNodeBase*> v_N,v_OS; |
290 |
|
291 |
// vector<bool> _aromNH; |
292 |
// _aromNH.clear(); |
293 |
// _aromNH.resize(mol.NumAtoms()+1); |
294 |
|
295 |
// for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
296 |
// if (atom->IsAromatic() && !atom->IsCarbon() && !used[atom->GetIdx()]) |
297 |
// { |
298 |
// vector<OBNodeBase*> rsys; |
299 |
// rsys.push_back(atom); |
300 |
// curr |= atom->GetIdx(); |
301 |
// used |= atom->GetIdx(); |
302 |
|
303 |
// while (!curr.Empty()) |
304 |
// { |
305 |
// next.Clear(); |
306 |
// for (j = curr.NextBit(0);j != curr.EndBit();j = curr.NextBit(j)) |
307 |
// { |
308 |
// atom = mol.GetAtom(j); |
309 |
// for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k)) |
310 |
// { |
311 |
// if (!(*k)->IsAromatic()) |
312 |
// continue; |
313 |
// if (used[nbr->GetIdx()]) |
314 |
// continue; |
315 |
|
316 |
// rsys.push_back(nbr); |
317 |
// next |= nbr->GetIdx(); |
318 |
// used |= nbr->GetIdx(); |
319 |
// } |
320 |
// } |
321 |
|
322 |
// curr = next; |
323 |
// } |
324 |
|
325 |
// //count number of electrons in the ring system |
326 |
// v_N.clear(); |
327 |
// v_OS.clear(); |
328 |
// int nelectrons = 0; |
329 |
|
330 |
// for (m = rsys.begin();m != rsys.end();m++) |
331 |
// { |
332 |
// a = (OBAtom *)*m; |
333 |
|
334 |
// int hasExoDoubleBond = false; |
335 |
// for (nbr = a->BeginNbrAtom(k);nbr;nbr = a->NextNbrAtom(k)) |
336 |
// if ((*k)->GetBO() == 2 && !(*k)->IsInRing()) |
337 |
// if (nbr->IsOxygen() || nbr->IsSulfur() || nbr->IsNitrogen()) |
338 |
// hasExoDoubleBond = true; |
339 |
|
340 |
// if (a->IsCarbon() && hasExoDoubleBond) |
341 |
// continue; |
342 |
|
343 |
// if (a->IsCarbon()) |
344 |
// nelectrons++; |
345 |
// else |
346 |
// if (a->IsOxygen() || a->IsSulfur()) |
347 |
// { |
348 |
// v_OS.push_back(a); |
349 |
// nelectrons += 2; |
350 |
// } |
351 |
// else |
352 |
// if (a->IsNitrogen()) |
353 |
// { |
354 |
// v_N.push_back(a); //store nitrogens |
355 |
// nelectrons++; |
356 |
// } |
357 |
// } |
358 |
|
359 |
// //calculate what the number of electrons should be for aromaticity |
360 |
// int naromatic = 2+4*((int)((double)(rsys.size()-2)*0.25+0.5)); |
361 |
|
362 |
// if (nelectrons > naromatic) //try to give one electron back to O or S |
363 |
// for (m = v_OS.begin();m != v_OS.end();m++) |
364 |
// { |
365 |
// if (naromatic == nelectrons) |
366 |
// break; |
367 |
// if ((*m)->GetValence() == 2 && (*m)->GetHvyValence() == 2) |
368 |
// { |
369 |
// nelectrons--; |
370 |
// ((OBAtom*)*m)->SetFormalCharge(1); |
371 |
// } |
372 |
// } |
373 |
|
374 |
// if (v_N.empty()) |
375 |
// continue; //no nitrogens found in ring |
376 |
|
377 |
// //check for protonated nitrogens |
378 |
// for (m = v_N.begin();m != v_N.end();m++) |
379 |
// { |
380 |
// if (naromatic == nelectrons) |
381 |
// break; |
382 |
// if (((OBAtom*)*m)->GetValence() == 3 && |
383 |
// ((OBAtom*)*m)->GetHvyValence() == 2) |
384 |
// { |
385 |
// nelectrons++; |
386 |
// _aromNH[((OBAtom*)*m)->GetIdx()] = true; |
387 |
// } |
388 |
// if (((OBAtom*)*m)->GetFormalCharge() == -1) |
389 |
// { |
390 |
// nelectrons++; |
391 |
// _aromNH[((OBAtom*)*m)->GetIdx()] = false; |
392 |
// } |
393 |
// } |
394 |
|
395 |
// //charge up tert nitrogens |
396 |
// for (m = v_N.begin();m != v_N.end();m++) |
397 |
// if (((OBAtom*)*m)->GetHvyValence() == 3 |
398 |
// && ((OBAtom*)*m)->BOSum() < 5) |
399 |
// ((OBAtom*)*m)->SetFormalCharge(1); |
400 |
|
401 |
// //try to uncharge nitrogens first |
402 |
// for (m = v_N.begin();m != v_N.end();m++) |
403 |
// { |
404 |
// if (((OBAtom*)*m)->BOSum() > 4) |
405 |
// continue; //skip n=O |
406 |
// if (naromatic == nelectrons) |
407 |
// break; |
408 |
// if (((OBAtom*)*m)->GetHvyValence() == 3) |
409 |
// { |
410 |
// nelectrons++; |
411 |
// ((OBAtom*)*m)->SetFormalCharge(0); |
412 |
// } |
413 |
// } |
414 |
|
415 |
// if (naromatic == nelectrons) |
416 |
// continue; |
417 |
|
418 |
// //try to protonate amides next |
419 |
// for (m = v_N.begin();m != v_N.end();m++) |
420 |
// { |
421 |
// if (naromatic == nelectrons) |
422 |
// break; |
423 |
// if (((OBAtom*)*m)->IsAmideNitrogen() && |
424 |
// ((OBAtom*)*m)->GetValence() == 2 && |
425 |
// ((OBAtom*)*m)->GetHvyValence() == 2 && |
426 |
// !_aromNH[((OBAtom*)*m)->GetIdx()]) |
427 |
// { |
428 |
// nelectrons++; |
429 |
// _aromNH[((OBAtom*)*m)->GetIdx()] = true; |
430 |
// } |
431 |
// } |
432 |
|
433 |
// if (naromatic == nelectrons) |
434 |
// continue; |
435 |
|
436 |
// //protonate amines in 5 membered rings first - try to match kekule |
437 |
// for (m = v_N.begin();m != v_N.end();m++) |
438 |
// { |
439 |
// if (naromatic == nelectrons) |
440 |
// break; |
441 |
// if (((OBAtom*)*m)->GetValence() == 2 && |
442 |
// !_aromNH[((OBAtom*)*m)->GetIdx()] && |
443 |
// ((OBAtom*)*m)->IsInRingSize(5) && |
444 |
// ((OBAtom*)*m)->BOSum() == 2) |
445 |
// { |
446 |
// nelectrons++; |
447 |
// _aromNH[((OBAtom*)*m)->GetIdx()] = true; |
448 |
// } |
449 |
// } |
450 |
|
451 |
// if (naromatic == nelectrons) |
452 |
// continue; |
453 |
|
454 |
// //protonate amines in 5 membered rings first - no kekule restriction |
455 |
// for (m = v_N.begin();m != v_N.end();m++) |
456 |
// { |
457 |
// if (naromatic == nelectrons) |
458 |
// break; |
459 |
// if ((*m)->GetValence() == 2 && !_aromNH[(*m)->GetIdx()] && |
460 |
// (*m)->IsInRingSize(5)) |
461 |
// { |
462 |
// nelectrons++; |
463 |
// _aromNH[(*m)->GetIdx()] = true; |
464 |
// } |
465 |
// } |
466 |
|
467 |
// if (naromatic == nelectrons) |
468 |
// continue; |
469 |
|
470 |
// //then others -- try to find an atom w/o a double bond first |
471 |
// for (m = v_N.begin();m != v_N.end();m++) |
472 |
// { |
473 |
// if (naromatic == nelectrons) |
474 |
// break; |
475 |
// if (((OBAtom*)*m)->GetHvyValence() == 2 && |
476 |
// !_aromNH[((OBAtom*)*m)->GetIdx()] && |
477 |
// !((OBAtom*)*m)->HasDoubleBond()) |
478 |
// { |
479 |
// nelectrons++; |
480 |
// _aromNH[(*m)->GetIdx()] = true; |
481 |
// } |
482 |
// } |
483 |
|
484 |
// for (m = v_N.begin();m != v_N.end();m++) |
485 |
// { |
486 |
// if (naromatic == nelectrons) |
487 |
// break; |
488 |
// if ((*m)->GetHvyValence() == 2 && !_aromNH[(*m)->GetIdx()]) |
489 |
// { |
490 |
// nelectrons++; |
491 |
// _aromNH[(*m)->GetIdx()] = true; |
492 |
// } |
493 |
// } |
494 |
// } |
495 |
|
496 |
// for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
497 |
// if (_aromNH[atom->GetIdx()] && atom->GetValence() == 2) |
498 |
// atom->SetImplicitValence(3); |
499 |
} |
500 |
|
501 |
/*! \class OBAromaticTyper |
502 |
\brief Assigns aromatic typing to atoms and bonds |
503 |
|
504 |
The OBAromaticTyper class is designed to read in a list of |
505 |
aromatic perception rules and apply them to molecules. The code |
506 |
that performs typing is not usually used directly -- it is usually |
507 |
done automatically when their corresponding values are requested of atoms |
508 |
or bonds. |
509 |
\code |
510 |
atom->IsAromatic(); |
511 |
bond->IsAromatic(); |
512 |
bond->IsDouble(); // needs to check aromaticity and define Kekule structures |
513 |
\endcode |
514 |
*/ |
515 |
OBAromaticTyper::OBAromaticTyper() |
516 |
{ |
517 |
_init = false; |
518 |
STR_DEFINE(_dir, FRC_PATH); |
519 |
_envvar = "FORCE_PARAM_PATH"; |
520 |
_filename = "aromatic.txt"; |
521 |
_subdir = "data"; |
522 |
_dataptr = AromaticData; |
523 |
} |
524 |
|
525 |
void OBAromaticTyper::ParseLine(const char *buffer) |
526 |
{ |
527 |
OBSmartsPattern *sp; |
528 |
char temp_buffer[BUFF_SIZE]; |
529 |
|
530 |
if (buffer[0] == '#') |
531 |
return; |
532 |
vector<string> vs; |
533 |
tokenize(vs,buffer); |
534 |
if (!vs.empty() && vs.size() == 3) |
535 |
{ |
536 |
strcpy(temp_buffer,vs[0].c_str()); |
537 |
sp = new OBSmartsPattern(); |
538 |
if (sp->Init(temp_buffer)) |
539 |
{ |
540 |
_vsp.push_back(sp); |
541 |
_verange.push_back(pair<int,int> |
542 |
(atoi((char*)vs[1].c_str()), |
543 |
atoi((char*)vs[2].c_str()))); |
544 |
} |
545 |
else |
546 |
{ |
547 |
obErrorLog.ThrowError(__func__, " Could not parse line in aromatic typer from aromatic.txt", obInfo); |
548 |
delete sp; |
549 |
sp = NULL; |
550 |
return; |
551 |
} |
552 |
} |
553 |
else |
554 |
obErrorLog.ThrowError(__func__, " Could not parse line in aromatic typer from aromatic.txt", obInfo); |
555 |
} |
556 |
|
557 |
OBAromaticTyper::~OBAromaticTyper() |
558 |
{ |
559 |
vector<OBSmartsPattern*>::iterator i; |
560 |
for (i = _vsp.begin();i != _vsp.end();i++) |
561 |
{ |
562 |
delete *i; |
563 |
*i = NULL; |
564 |
} |
565 |
} |
566 |
|
567 |
void OBAromaticTyper::AssignAromaticFlags(OBMol &mol) |
568 |
{ |
569 |
if (!_init) |
570 |
Init(); |
571 |
|
572 |
if (mol.HasAromaticPerceived()) |
573 |
return; |
574 |
mol.SetAromaticPerceived(); |
575 |
obErrorLog.ThrowError(__func__, |
576 |
"Ran OpenBabel::AssignAromaticFlags", obAuditMsg); |
577 |
|
578 |
_vpa.clear(); |
579 |
_vpa.resize(mol.NumAtoms()+1); |
580 |
_velec.clear(); |
581 |
_velec.resize(mol.NumAtoms()+1); |
582 |
_root.clear(); |
583 |
_root.resize(mol.NumAtoms()+1); |
584 |
|
585 |
OBBond *bond; |
586 |
OBAtom *atom; |
587 |
vector<OBNodeBase*>::iterator i; |
588 |
vector<OBEdgeBase*>::iterator j; |
589 |
|
590 |
//unset all aromatic flags |
591 |
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
592 |
atom->UnsetAromatic(); |
593 |
for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j)) |
594 |
bond->UnsetAromatic(); |
595 |
|
596 |
int idx; |
597 |
vector<vector<int> >::iterator m; |
598 |
vector<OBSmartsPattern*>::iterator k; |
599 |
|
600 |
//mark atoms as potentially aromatic |
601 |
for (idx=0,k = _vsp.begin();k != _vsp.end();k++,idx++) |
602 |
if ((*k)->Match(mol)) |
603 |
{ |
604 |
_mlist = (*k)->GetMapList(); |
605 |
for (m = _mlist.begin();m != _mlist.end();m++) |
606 |
{ |
607 |
_vpa[(*m)[0]] = true; |
608 |
_velec[(*m)[0]] = _verange[idx]; |
609 |
} |
610 |
} |
611 |
|
612 |
//sanity check - exclude all 4 substituted atoms and sp centers |
613 |
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
614 |
{ |
615 |
if (atom->GetImplicitValence() > 3) |
616 |
{ |
617 |
_vpa[atom->GetIdx()] = false; |
618 |
continue; |
619 |
} |
620 |
|
621 |
switch(atom->GetAtomicNum()) |
622 |
{ |
623 |
//phosphorus and sulfur may be initially typed as sp3 |
624 |
case 6: |
625 |
case 7: |
626 |
case 8: |
627 |
if (atom->GetHyb() != 2) |
628 |
_vpa[atom->GetIdx()] = false; |
629 |
break; |
630 |
} |
631 |
} |
632 |
|
633 |
//propagate potentially aromatic atoms |
634 |
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
635 |
if (_vpa[atom->GetIdx()]) |
636 |
PropagatePotentialAromatic(atom); |
637 |
|
638 |
//select root atoms |
639 |
SelectRootAtoms(mol); |
640 |
|
641 |
ExcludeSmallRing(mol); //remove 3 membered rings from consideration |
642 |
|
643 |
//loop over root atoms and look for 5-6 membered aromatic rings |
644 |
_visit.clear(); |
645 |
_visit.resize(mol.NumAtoms()+1); |
646 |
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
647 |
if (_root[atom->GetIdx()]) |
648 |
CheckAromaticity(atom,6); |
649 |
|
650 |
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
651 |
if (_root[atom->GetIdx()]) |
652 |
CheckAromaticity(atom,20); |
653 |
|
654 |
//for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
655 |
// if (atom->IsAromatic()) |
656 |
// cerr << "aro = " <<atom->GetIdx() << endl; |
657 |
|
658 |
//for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j)) |
659 |
//if (bond->IsAromatic()) |
660 |
//cerr << bond->GetIdx() << ' ' << bond->IsAromatic() << endl; |
661 |
} |
662 |
|
663 |
bool OBAromaticTyper::TraverseCycle(OBAtom *root, OBAtom *atom, OBBond *prev, |
664 |
std::pair<int,int> &er,int depth) |
665 |
{ |
666 |
if (atom == root) |
667 |
{ |
668 |
int i; |
669 |
for (i = er.first;i <= er.second;i++) |
670 |
if (i%4 == 2 && i > 2) |
671 |
return(true); |
672 |
|
673 |
return(false); |
674 |
} |
675 |
|
676 |
if (!depth || !_vpa[atom->GetIdx()] || _visit[atom->GetIdx()]) |
677 |
return(false); |
678 |
|
679 |
bool result = false; |
680 |
|
681 |
depth--; |
682 |
er.first += _velec[atom->GetIdx()].first; |
683 |
er.second += _velec[atom->GetIdx()].second; |
684 |
|
685 |
_visit[atom->GetIdx()] = true; |
686 |
OBAtom *nbr; |
687 |
vector<OBEdgeBase*>::iterator i; |
688 |
for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) |
689 |
if (*i != prev && (*i)->IsInRing() && _vpa[nbr->GetIdx()]) |
690 |
{ |
691 |
if (TraverseCycle(root,nbr,(OBBond*)(*i),er,depth)) |
692 |
{ |
693 |
result = true; |
694 |
((OBBond*) *i)->SetAromatic(); |
695 |
} |
696 |
} |
697 |
|
698 |
_visit[atom->GetIdx()] = false; |
699 |
if (result) |
700 |
atom->SetAromatic(); |
701 |
|
702 |
er.first -= _velec[atom->GetIdx()].first; |
703 |
er.second -= _velec[atom->GetIdx()].second; |
704 |
|
705 |
return(result); |
706 |
} |
707 |
|
708 |
void OBAromaticTyper::CheckAromaticity(OBAtom *atom,int depth) |
709 |
{ |
710 |
OBAtom *nbr; |
711 |
vector<OBEdgeBase*>::iterator i; |
712 |
|
713 |
pair<int,int> erange; |
714 |
for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) |
715 |
if ((*i)->IsInRing()) // check all rings, regardless of assumed aromaticity |
716 |
{ |
717 |
erange = _velec[atom->GetIdx()]; |
718 |
|
719 |
if (TraverseCycle(atom,nbr,(OBBond *)*i,erange,depth-1)) |
720 |
{ |
721 |
atom->SetAromatic(); |
722 |
((OBBond*) *i)->SetAromatic(); |
723 |
} |
724 |
} |
725 |
} |
726 |
|
727 |
void OBAromaticTyper::PropagatePotentialAromatic(OBAtom *atom) |
728 |
{ |
729 |
int count = 0; |
730 |
OBAtom *nbr; |
731 |
vector<OBEdgeBase*>::iterator i; |
732 |
|
733 |
for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) |
734 |
if ((*i)->IsInRing() && _vpa[nbr->GetIdx()]) |
735 |
count++; |
736 |
|
737 |
if (count < 2) |
738 |
{ |
739 |
_vpa[atom->GetIdx()] = false; |
740 |
if (count == 1) |
741 |
for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) |
742 |
if ((*i)->IsInRing() && _vpa[nbr->GetIdx()]) |
743 |
PropagatePotentialAromatic(nbr); |
744 |
} |
745 |
} |
746 |
|
747 |
/** |
748 |
* Select the root atoms for traversing atoms in rings. |
749 |
* |
750 |
* Picking only the begin atom of a closure bond can cause |
751 |
* difficulties when the selected atom is an inner atom |
752 |
* with three neighbour ring atoms. Why ? Because this atom |
753 |
* can get trapped by the other atoms when determining aromaticity, |
754 |
* because a simple visited flag is used in the |
755 |
* OBAromaticTyper::TraverseCycle() method. |
756 |
* |
757 |
* Ported from JOELib, copyright Joerg Wegner, 2003 under the GPL version 2 |
758 |
* |
759 |
* @param mol the molecule |
760 |
* @param avoidInnerRingAtoms inner closure ring atoms with more than 2 neighbours will be avoided |
761 |
* |
762 |
*/ |
763 |
void OBAromaticTyper::SelectRootAtoms(OBMol &mol, bool avoidInnerRingAtoms) |
764 |
{ |
765 |
OBBond *bond; |
766 |
OBAtom *atom, *nbr, *nbr2; |
767 |
OBRing *ring; |
768 |
// vector<OBNodeBase*>::iterator i; |
769 |
vector<OBEdgeBase*>::iterator j, l, nbr2Iter; |
770 |
vector<OBRing*> sssRings = mol.GetSSSR(); |
771 |
vector<OBRing*>::iterator k; |
772 |
|
773 |
int rootAtom; |
774 |
int ringNbrs; |
775 |
int heavyNbrs; |
776 |
int newRoot = -1; |
777 |
vector<int> tmpRootAtoms; |
778 |
vector<int> tmp; |
779 |
|
780 |
for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j)) |
781 |
if (bond->IsClosure()) |
782 |
tmpRootAtoms.push_back(bond->GetBeginAtomIdx()); |
783 |
|
784 |
for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j)) |
785 |
if (bond->IsClosure()) |
786 |
{ |
787 |
// BASIC APPROACH |
788 |
// pick beginning atom at closure bond |
789 |
// this is really ready, isn't it ! ;-) |
790 |
rootAtom = bond->GetBeginAtomIdx(); |
791 |
_root[rootAtom] = true; |
792 |
|
793 |
// EXTENDED APPROACH |
794 |
if (avoidInnerRingAtoms) |
795 |
{ |
796 |
// count the number of neighbor ring atoms |
797 |
atom = mol.GetAtom(rootAtom); |
798 |
ringNbrs = heavyNbrs = 0; |
799 |
|
800 |
for (nbr = atom->BeginNbrAtom(l);nbr;nbr = atom->NextNbrAtom(l)) |
801 |
{ |
802 |
// we can get this from atom->GetHvyValence() |
803 |
// but we need to find neighbors in rings too |
804 |
// so let's save some time |
805 |
if (!nbr->IsHydrogen()) |
806 |
{ |
807 |
heavyNbrs++; |
808 |
if (nbr->IsInRing()) |
809 |
ringNbrs++; |
810 |
} |
811 |
|
812 |
// if this atom has more than 2 neighbor ring atoms |
813 |
// we could get trapped later when traversing cycles |
814 |
// which can cause aromaticity false detection |
815 |
newRoot = -1; |
816 |
|
817 |
if (ringNbrs > 2) |
818 |
{ |
819 |
// try to find another root atom |
820 |
for (k = sssRings.begin();k != sssRings.end();k++) |
821 |
{ |
822 |
ring = (*k); |
823 |
tmp = ring->_path; |
824 |
|
825 |
bool checkThisRing = false; |
826 |
int rootAtomNumber=0; |
827 |
int idx=0; |
828 |
// avoiding two root atoms in one ring ! |
829 |
for (unsigned int j = 0; j < tmpRootAtoms.size(); j++) |
830 |
{ |
831 |
idx= tmpRootAtoms[j]; |
832 |
if(ring->IsInRing(idx)) |
833 |
{ |
834 |
rootAtomNumber++; |
835 |
if(rootAtomNumber>=2) |
836 |
break; |
837 |
} |
838 |
} |
839 |
if(rootAtomNumber<2) |
840 |
{ |
841 |
for (unsigned int j = 0; j < tmp.size(); j++) |
842 |
{ |
843 |
// find critical ring |
844 |
if (tmp[j] == rootAtom) |
845 |
{ |
846 |
checkThisRing = true; |
847 |
} |
848 |
else |
849 |
{ |
850 |
// second root atom in this ring ? |
851 |
if (_root[tmp[j]] == true) |
852 |
{ |
853 |
// when there is a second root |
854 |
// atom this ring can not be |
855 |
// used for getting an other |
856 |
// root atom |
857 |
checkThisRing = false; |
858 |
|
859 |
break; |
860 |
} |
861 |
} |
862 |
} |
863 |
} |
864 |
|
865 |
// check ring for getting another |
866 |
// root atom to avoid aromaticity typer problems |
867 |
if (checkThisRing) |
868 |
{ |
869 |
// check if we can find another root atom |
870 |
for (unsigned int m = 0; m < tmp.size(); m++) |
871 |
{ |
872 |
ringNbrs = heavyNbrs = 0; |
873 |
for (nbr2 = (mol.GetAtom(tmp[m]))->BeginNbrAtom(nbr2Iter); |
874 |
nbr2;nbr2 = (mol.GetAtom(tmp[m]))->NextNbrAtom(nbr2Iter)) |
875 |
{ |
876 |
if (!nbr2->IsHydrogen()) |
877 |
{ |
878 |
heavyNbrs++; |
879 |
|
880 |
if (nbr2->IsInRing()) |
881 |
ringNbrs++; |
882 |
} |
883 |
} |
884 |
|
885 |
// if the number of neighboured heavy atoms is also |
886 |
// the number of neighboured ring atoms, the aromaticity |
887 |
// typer could be stuck in a local traversing trap |
888 |
if (ringNbrs <= 2 && ring->IsInRing((mol.GetAtom(tmp[m])->GetIdx()))) |
889 |
{ |
890 |
newRoot = tmp[m]; |
891 |
} |
892 |
} |
893 |
} |
894 |
} |
895 |
|
896 |
if ((newRoot != -1) && (rootAtom != newRoot)) |
897 |
{ |
898 |
// unset root atom |
899 |
_root[rootAtom] = false; |
900 |
|
901 |
// pick new root atom |
902 |
_root[newRoot] = true; |
903 |
} |
904 |
} // if (ringNbrs > 2) |
905 |
|
906 |
} // end for |
907 |
} // if (avoid) |
908 |
} // if (bond.IsClosure()) |
909 |
} |
910 |
|
911 |
void OBAromaticTyper::ExcludeSmallRing(OBMol &mol) |
912 |
{ |
913 |
OBAtom *atom,*nbr1,*nbr2; |
914 |
vector<OBNodeBase*>::iterator i; |
915 |
vector<OBEdgeBase*>::iterator j,k; |
916 |
|
917 |
for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) |
918 |
if (_root[atom->GetIdx()]) |
919 |
for (nbr1 = atom->BeginNbrAtom(j);nbr1;nbr1 = atom->NextNbrAtom(j)) |
920 |
if ((*j)->IsInRing() && _vpa[nbr1->GetIdx()]) |
921 |
for (nbr2 = nbr1->BeginNbrAtom(k);nbr2;nbr2 = nbr1->NextNbrAtom(k)) |
922 |
if (nbr2 != atom && (*k)->IsInRing() && _vpa[nbr2->GetIdx()]) |
923 |
if (atom->IsConnected(nbr2)) |
924 |
_root[atom->GetIdx()] = false; |
925 |
} |
926 |
|
927 |
} //namespace OpenBabel; |
928 |
|
929 |
//! \file typer.cpp |
930 |
//! \brief Open Babel atom and aromaticity typer. |