ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/kekulize.cpp
Revision: 2440
Committed: Wed Nov 16 19:42:11 2005 UTC (18 years, 7 months ago) by tim
File size: 17817 byte(s)
Log Message:
adding openbabel

File Contents

# Content
1 /**********************************************************************
2 kekulize.cpp - Alternate algorithm to kekulize a molecule.
3
4 Copyright (C) 2004-2005 by Fabien Fontaine
5 Some portions Copyright (C) 2005 by Geoffrey R. Hutchison
6
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.sourceforge.net/>
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19
20 #include "mol.hpp"
21 #include "oberror.hpp"
22
23 #ifdef HAVE_SSTREAM
24 #include <sstream>
25 #else
26 #include <strstream>
27 #endif
28
29 #define SINGLE 1
30 #define DOUBLE 2
31
32 using namespace std;
33
34 namespace OpenBabel {
35
36 ///////////////////////////////////////////////////////////////////////////////
37 //! \brief Kekulize aromatic rings without using implicit valence
38
39 //! This new perceive kekule bonds function has been especifically designed to
40 //! handle molecule files without explicit hydrogens such as pdb or xyz.
41 //! The function does not rely on GetImplicitValence function
42 //! The function looks for groups of aromatic cycle
43 //! For each group it tries to guess the number of electrons given by each atom
44 //! in order to satisfy the huckel (4n+2) rule
45 //! If the huckel rule cannot be satisfied the algorithm try with its best alternative guess
46 //! Then it recursively walk on the atoms of the cycle and assign single and double bonds
47 void OBMol::NewPerceiveKekuleBonds()
48 {
49
50 if (HasKekulePerceived()) return;
51 SetKekulePerceived();
52
53 OBAtom *atom;
54 int n, de, minde;
55 std::vector<OBAtom*> cycle;
56 OBBitVec avisit,cvisit;
57 avisit.Resize(NumAtoms()+1);
58 cvisit.Resize(NumAtoms()+1);
59 OBBond *bond;
60 std::vector<OBEdgeBase*>::iterator bi;
61 std::vector<int> electron;
62 int BO;
63 int sume, orden, bestorden, bestatom;
64 // Init the kekulized bonds
65 unsigned i;
66 for(i=0; i< NumBonds(); i++ ) {
67 bond = GetBond(i);
68 BO = bond->GetBO();
69 switch (BO)
70 {
71 case 1: bond->SetKSingle(); break;
72 case 2: bond->SetKDouble(); break;
73 case 3: bond->SetKTriple(); break;
74 }
75 }
76
77 // Find all the groups of aromatic cycle
78 for(i=1; i<= NumAtoms(); i++ ) {
79 atom = GetAtom(i);
80 if (atom->HasAromaticBond() && !cvisit[i]) { // is new aromatic atom of an aromatic cycle ?
81
82 avisit.Clear();
83 electron.clear();
84 cycle.clear();
85
86 avisit.SetBitOn(i);
87 expandcycle (atom, avisit);
88 //store the atoms of the cycle(s)
89 unsigned int j;
90 for(j=1; j<= NumAtoms(); j++) {
91 if ( avisit[j] ) {
92 atom = GetAtom(j);
93 cycle.push_back(atom);
94 }
95 }
96 // At the begining each atom give one electron to the cycle
97 for(j=0; j< cycle.size(); j++) {
98 electron.push_back(1);
99 }
100
101 // remove one electron if the atom make a double bond out of the cycle
102 sume =0;
103 for(j=0; j< cycle.size(); j++) {
104 atom = cycle[j];
105 for(bond = atom->BeginBond(bi); bond; bond = atom->NextBond(bi)) {
106 if ( bond->IsDouble() ) {
107 OBAtom *atom2 = bond->GetNbrAtom(atom);
108 int fcharge = atom->GetFormalCharge();
109 int fcharge2 = atom2->GetFormalCharge();
110 if(atom->IsNitrogen() && atom2->IsOxygen()
111 && fcharge == 0 && fcharge2 == 0) { //n=O to [n+][O-]
112 atom->SetFormalCharge(1);
113 atom2->SetFormalCharge(-1);
114 bond->SetKSingle();
115 bond->SetBO(1);
116 }
117 else {
118 electron[j] = 0;
119 }
120 }
121 }
122 // count the number of electrons
123 sume += electron[j];
124 }
125
126
127 // Save the electron state in case huckel rule is not satisfied
128 vector<int> previousElectron = electron;
129
130 // find the ideal number of electrons according to the huckel 4n+2 rule
131 minde=99;
132 for (i=1; 1; i++) {
133 n = 4 *i +2;
134 de = n - sume;
135 if ( de < minde )
136 minde=de;
137 else if ( minde < 0 )
138 minde=de;
139 else
140 break;
141 }
142
143 #ifdef HAVE_SSTREAM
144 stringstream errorMsg;
145 #else
146 strstream errorMsg;
147 #endif
148
149 //cout << "minde before:" << minde << endl;
150 // if huckel rule not satisfied some atoms must give more electrons
151 //cout << "minde " << minde << endl;
152 while ( minde != 0 ) {
153 bestorden=99;
154 for(j=0; j< cycle.size(); j++) {
155 if (electron[j] == 1) {
156 orden = getorden(cycle[j]);
157 if (orden < bestorden) {
158 bestorden = orden;
159 bestatom = j;
160 }
161 }
162 }
163 if (bestorden==99) { // no electron giving atom found
164 errorMsg << "Kekulize: Huckel rule not satisfied for molecule " << GetTitle() << endl;
165 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo);
166 break; // Huckel rule cannot be satisfied
167 } // try to kekulize anyway
168 else {
169 electron[bestatom] += 1;
170 minde--;
171 }
172 }
173
174 if (bestorden == 99) { // Huckel rule not satisfied, just try to get an even number of electron before kekulizing
175
176 electron = previousElectron; // restore electon's state
177
178 int odd = sume % 2;
179 //cout << "odd:" << odd << endl;
180 if(odd) { // odd number of electrons try to add an electron to the best possible atom
181 for(j=0; j< cycle.size(); j++) {
182 if (electron[j] == 1) {
183 orden = getorden(cycle[j]);
184 if (orden < bestorden) {
185 bestorden = orden;
186 bestatom = j;
187 }
188 }
189 }
190 if (bestorden==99) { // no electron giving atom found
191 errorMsg << "Kekulize: Cannot get an even number of electron for molecule " << GetTitle() << "\n";
192 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo);
193 break; // impossible to choose an atom to obtain an even number of electron
194 } // try to kekulize anyway
195 else {
196 electron[bestatom] += 1;
197 }
198 }
199 }
200
201 //cout << "minde after:" << minde <<endl;
202 //for(j=0; j < cycle.size(); j++) {
203 //OBAtom *cycleAtom = cycle[j];
204 //cout << "\t" << cycleAtom->GetIdx();
205 //}
206 //cout << endl;
207
208 //for(j=0; j < electron.size(); j++) {
209 //cout << "\t" << electron[j];
210 //}
211 //cout << endl;
212
213 // kekulize the cycle(s)
214 start_kekulize(cycle,electron);
215
216 // Set the kekulized cycle(s) as visited
217 for(j=1; j<= NumAtoms(); j++) {
218 if (avisit[j])
219 cvisit.SetBitOn(j);
220 }
221
222 }
223 }
224 // Double bond have been assigned, set the remaining aromatic bonds to single
225 //std::cout << "Set not assigned single bonds\n";
226 for(i=0;i <NumBonds(); i++) {
227 bond = GetBond(i);
228 //std::cout << "bond " << bond->GetBeginAtomIdx() << " " << bond->GetEndAtomIdx() << " ";
229 if (bond->GetBO()==5 ) {
230 bond->SetKSingle();
231 bond->SetBO(1);
232 //std::cout << "single\n";
233 }
234 //else
235 // std::cout << "double\n";
236 }
237
238 return;
239 }
240
241 ///////////////////////////////////////////////////////////////////////////////////////
242 //! \brief Start kekulizing one or a fused set of aromatic ring(s)
243
244 //! The initial electronic state indicates if an atoms must make a double bond or not
245 //! Kekulizing is attempted recursively for all the atoms bonded to the first atom
246 //! of the cycle.
247 void OBMol::start_kekulize( std::vector <OBAtom*> &cycle, std::vector<int> &electron) {
248
249 std::vector<int> initState;
250 std::vector<int> currentState;
251 std::vector<int> binitState;
252 std::vector<int> bcurrentState;
253 std::vector<bool> mark;
254 unsigned int Idx;
255 OBAtom *atom, *atom2;
256 OBBond *bond;
257
258 //init the atom arrays
259 unsigned i;
260 for(i=0;i <NumAtoms()+1; i++) {
261 initState.push_back(-1);
262 currentState.push_back(-1);
263 mark.push_back(false);
264 }
265
266 //init the bond arrays with single bonds
267 for(i=0;i <NumBonds(); i++) {
268 binitState.push_back(SINGLE);
269 bcurrentState.push_back(SINGLE);
270 }
271
272 //set the electron number
273 for(i=0; i< cycle.size(); i++) {
274 atom = cycle[i];
275 Idx = atom->GetIdx();
276 if ( electron[i] == 1)
277 initState[Idx] = 1; // make 1 double bond
278 else
279 initState[Idx] = 2; // make 2 single bonds
280
281 currentState[Idx] = initState[Idx];
282 }
283
284 std::vector<OBEdgeBase*>::iterator b;
285 OBAtom *nbr;
286
287 bool second_pass=false;
288 // for( i=1; i<= NumAtoms(); i++) {
289 // if(currentState[i] == 1) { // the atom can make a double bond
290 // atom = GetAtom(i);
291 // //find a neighbour that can make a double bond
292 // // and start kekulize
293 // for (nbr = atom->BeginNbrAtom(b);nbr;nbr = atom->NextNbrAtom(b)) {
294 // if(currentState[nbr->GetIdx()]==1){
295 // if(!expand_kekulize(atom,nbr,currentState,initState, bcurrentState,binitState, mark)) {
296 // second_pass=true;
297 // }
298
299 // }
300 // }
301 // }
302 // }
303 bool expand_successful;
304 atom = cycle[0];
305 for (nbr = atom->BeginNbrAtom(b);nbr;nbr = atom->NextNbrAtom(b)) {
306 if(initState[nbr->GetIdx()] == -1) //neighbor atom not in the cycle, try next one
307 continue;
308 //std::cout << "Expand kekulize\n";
309 expand_kekulize(atom,nbr,currentState,initState, bcurrentState,binitState, mark) ;
310 //Control that all the electron have been given to the cycle(s)
311 expand_successful = true;
312 for(unsigned i=0; i< cycle.size(); i++) {
313 atom2 = cycle[i];
314 Idx = atom2->GetIdx();
315 //cout << "\t" << currentState[Idx];
316 if (currentState[Idx] == 1)
317 expand_successful=false;
318 }
319 //cout << endl;
320 if (expand_successful)
321 break;
322 else {
323 unsigned i;
324 for(i=0;i <NumAtoms()+1; i++) {
325 currentState[i]=initState[i];
326 mark[i]=false;
327 }
328 for(i=0;i <NumBonds(); i++) {
329 bcurrentState[i]=binitState[i];
330 }
331 }
332 }
333 if (!expand_successful)
334 {
335 #ifdef HAVE_SSTREAM
336 stringstream errorMsg;
337 #else
338 strstream errorMsg;
339 #endif
340 errorMsg << "Kekulize Error for molecule " << GetTitle() << endl;
341 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo);
342 }
343
344 // Set the double bonds
345 // std::cout << "Set double bonds\n";
346 for(i=0;i <NumBonds(); i++) {
347 bond = GetBond(i);
348 // std::cout << "bond " << bond->GetBeginAtomIdx() << " " << bond->GetEndAtomIdx() << " ";
349 if (bond->GetBO()==5 && bcurrentState[i] == DOUBLE) {
350 bond->SetKDouble();
351 bond->SetBO(2);
352 //std::cout << "double\n";
353 }
354 //else
355 //std::cout << "single\n";
356 //else if (bond->IsAromatic() && bond->GetBO() != 2)
357 // bond->SetBO(1);
358 }
359
360 return;
361 }
362
363
364 /////////////////////////////////////////////////////////////////////////////////////////
365 //! \brief recursively assign single and double bonds according to the electronical state
366 //! of the atoms of the current bond
367 int OBMol::expand_kekulize(OBAtom *atom1, OBAtom *atom2, std::vector<int> &currentState, std::vector<int> &initState, std::vector<int> &bcurrentState, std::vector<int> &binitState, std::vector<bool> &mark)
368 {
369 int done;
370 int Idx1=atom1->GetIdx(), Idx2=atom2->GetIdx();
371 OBBond *bond;
372 std::vector<OBEdgeBase*>::iterator i;
373 OBAtom *nbr;
374 int natom;
375
376 mark[Idx1]= true;
377 bond = atom1->GetBond(atom2);
378 int bIdx = bond->GetIdx();
379
380 //cout << "assign bond state for atoms " << Idx1 << " and " << Idx2 << endl;
381 if (currentState[Idx1] == 1 && currentState[Idx2] == 1) {
382 currentState[Idx1]=0;
383 currentState[Idx2]=0;
384 // set bond to double
385 //std::cout << "bond " << Idx1 << " " << Idx2 << " double\n";
386 bcurrentState[bIdx]=DOUBLE;
387 }
388 else if (currentState[Idx1] == 0 && currentState[Idx2] == 1 ||
389 currentState[Idx1] == 2 && currentState[Idx2] == 1 ||
390 currentState[Idx1] == 2 && currentState[Idx2] == 2) {
391 //std::cout << "bond " << Idx1 << " " << Idx2 << " single\n";
392 // leave bond to single
393 }
394 else if (currentState[Idx1] == 1 && currentState[Idx2] == 0 ||
395 currentState[Idx1] == 1 && currentState[Idx2] == 2) {
396 mark[Idx1]=false;
397 //std::cout << "bond " << Idx1 << " " << Idx2 << " error\n";
398 return (0); // error
399 }
400 else if (currentState[Idx1] == 0 && currentState[Idx2] == 0
401 || currentState[Idx1] == 2 && currentState[Idx2] == 0) {
402 //std::cout << "bond " << Idx1 << " " << Idx2 << " done\n";
403 mark[Idx2]=true;
404 return (1); //done
405 }
406 else if (currentState[Idx1] == 0 && currentState[Idx2] == 2) {
407 currentState[Idx2]=0;
408 //std::cout << "bond " << Idx1 << " " << Idx2 << " leave single\n";
409 // leave bond to single
410 }
411 else {
412
413 #ifdef HAVE_SSTREAM
414 stringstream errorMsg;
415 #else
416 strstream errorMsg;
417 #endif
418
419 errorMsg << "unexpected state:" << "atom " << Idx1 << " " << currentState[Idx1]
420 << " atom " << Idx2 << " " << currentState[Idx2] << endl;
421 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obDebug);
422 return(false);
423 }
424
425 //int c;
426 //for(c=1; c < currentState.size(); c++) {
427 //cout << c << "\t";
428 //}
429 //cout << endl;
430 //for(c=1; c < currentState.size(); c++) {
431 //cout << currentState[c] << "\t";
432 //}
433 //cout << endl;
434
435 vector<int> previousState = currentState; // Backup the atom
436 vector<int> bpreviousState = bcurrentState; // and the bond states before expanding again
437
438 bool return_false=false;
439 // for each neighbor of atom 2 not already kekulized
440 for (nbr = atom2->BeginNbrAtom(i);nbr;nbr = atom2->NextNbrAtom(i))
441 {
442 natom = nbr->GetIdx();
443 if(initState[natom] == -1) //neighbor atom not in the cycle, try next one
444 continue;
445 if ( !mark[natom] ) {
446 done = expand_kekulize(atom2, nbr, currentState, initState, bcurrentState, binitState, mark);
447 if ( !done ) // kekulize failed
448 return_false =true;
449 else
450 return_false =false;
451 }
452
453 }
454 if (return_false) { // no good solution found
455 //cout << "return_false:no good solution\n" << endl;
456 //cout << "reset state of " << Idx1 << " and " << Idx2 << " from " << currentState[Idx1]
457 //<< " " << currentState[Idx2] << " to ";
458
459 // retrieve the states that might have been changed during kekulize expansion
460 currentState = previousState;
461
462
463 bcurrentState = bpreviousState;
464
465
466 // reset the bond and the atom states
467 if (bcurrentState[bIdx] == DOUBLE)
468 currentState[Idx1]=initState[Idx1];
469
470 currentState[Idx2]=initState[Idx2];
471 bcurrentState[bIdx]=binitState[bIdx]; // should be always single
472 mark[Idx2]=false;
473
474 //cout << currentState[Idx1] << " " << currentState[Idx2] << endl;
475 return (false);
476 }
477 // atom 2 cannot make any bond, should not have 1 electron
478 if (currentState[Idx2] == 1) {
479 // currentState[Idx1]=initState[Idx1];
480 //cout << "return true but " << Idx2 << " state = 1\n";
481 mark[Idx1]=false;
482 return (false);
483 }
484 else {
485 // if we found a good solution, then the state of Idx2 may have shifted from 1 to 0 during the kekulization
486 // If it is the case, we should check if there is a remaining unmarked neighbor because it is possible
487 // that kekulizing from this neigbor failed just because Idx2 was equal to 1
488
489 if(previousState[Idx2] == 1) {
490 // Since now Idx2 is equal to 0 because it kekulized well the kekulizing of the failed neigbor could be successfull
491 // If there is still an unmarked neigbor try to kekulize it again
492 //mark[Idx2]=true;
493 return_false=false;
494 //cout << "retry kekulizing from " << Idx2 << endl;
495
496 for (nbr = atom2->BeginNbrAtom(i);nbr;nbr = atom2->NextNbrAtom(i))
497 {
498 natom = nbr->GetIdx();
499 if(initState[natom] == -1) //neighbor atom not in the cycle, try next one
500 continue;
501 if ( !mark[natom] ) {
502 //cout << "atom " << natom << " not marked, expand again" << endl;
503 done = expand_kekulize(atom2, nbr, currentState, initState, bcurrentState, binitState, mark);
504 if ( !done ) // kekulize failed
505 return_false =true;
506 else
507 return_false =false;
508 }
509
510 }
511
512 // if we cannot kekulize the remaining neigbor again then we have to return false
513 // we do not have to reset the states because the kekulize will fail anyway
514 if(return_false) {
515 //cout << "rekekulize failed" << endl;
516 return(false);
517 }
518 else {
519 //cout << "rekekulized successfull" << endl;
520 return (true);
521 }
522
523 }
524
525
526 //cout << "return_true: good solution" << endl;
527 return (true);
528 }
529 }
530
531 //! Give the priority to give two electrons instead of 1
532 int OBMol::getorden( OBAtom *atom)
533 {
534 if ( atom->IsSulfur() ) return 1;
535 if ( atom->IsOxygen() ) return 2;
536 if ( atom->GetAtomicNum() == 34 ) return 3;
537 if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 && atom->GetValence() == 3) return 5;
538 if ( atom->IsAmideNitrogen() ) return 4;
539 if ( atom->IsNitrogen() && atom->GetFormalCharge() == -1) return 6;
540 if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 && atom->IsInRingSize(5) ) return 7;
541 if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 ) return 8;
542 if ( atom->IsCarbon() && atom->GetFormalCharge() == -1) return 9;
543 //if ( atom->IsCarbon() ) return 9;
544
545 return (100); //no atom found
546 }
547
548 //! Recursively find the aromatic atoms with an aromatic bond to the current atom
549 void OBMol::expandcycle (OBAtom *atom, OBBitVec &avisit)
550 {
551 OBAtom *nbr;
552 // OBBond *bond;
553 std::vector<OBEdgeBase*>::iterator i;
554 int natom;
555 //for each neighbour atom test if it is in the aromatic ring
556 for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
557 {
558 natom = nbr->GetIdx();
559 // if (!avisit[natom] && nbr->IsAromatic() && ((OBBond*) *i)->IsAromatic()) {
560 if (!avisit[natom] && ((OBBond*) *i)->GetBO()==5) {
561 avisit.SetBitOn(natom);
562 expandcycle(nbr, avisit);
563 }
564 }
565 }
566
567 } // end namespace OpenBabel
568
569 //! \file kekulize.cpp
570 //! \brief Alternate algorithm to kekulize a molecule (OBMol::NewPerceiveKekuleBonds()).