ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/kekulize.cpp
Revision: 3057
Committed: Thu Oct 19 20:49:05 2006 UTC (17 years, 11 months ago) by gezelter
File size: 17953 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     kekulize.cpp - Alternate algorithm to kekulize a molecule.
3    
4     Copyright (C) 2004-2005 by Fabien Fontaine
5     Some portions Copyright (C) 2005 by Geoffrey R. Hutchison
6    
7     This file is part of the Open Babel project.
8     For more information, see <http://openbabel.sourceforge.net/>
9    
10     This program is free software; you can redistribute it and/or modify
11     it under the terms of the GNU General Public License as published by
12     the Free Software Foundation version 2 of the License.
13    
14     This program is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17     GNU General Public License for more details.
18     ***********************************************************************/
19    
20     #include "mol.hpp"
21     #include "oberror.hpp"
22    
23     #ifdef HAVE_SSTREAM
24     #include <sstream>
25     #else
26     #include <strstream>
27     #endif
28    
29     #define SINGLE 1
30     #define DOUBLE 2
31    
32     using namespace std;
33    
34     namespace OpenBabel {
35    
36     ///////////////////////////////////////////////////////////////////////////////
37     //! \brief Kekulize aromatic rings without using implicit valence
38    
39     //! This new perceive kekule bonds function has been especifically designed to
40     //! handle molecule files without explicit hydrogens such as pdb or xyz.
41     //! The function does not rely on GetImplicitValence function
42     //! The function looks for groups of aromatic cycle
43     //! For each group it tries to guess the number of electrons given by each atom
44     //! in order to satisfy the huckel (4n+2) rule
45     //! If the huckel rule cannot be satisfied the algorithm try with its best alternative guess
46     //! Then it recursively walk on the atoms of the cycle and assign single and double bonds
47     void OBMol::NewPerceiveKekuleBonds()
48     {
49    
50     if (HasKekulePerceived()) return;
51     SetKekulePerceived();
52    
53     OBAtom *atom;
54     int n, de, minde;
55     std::vector<OBAtom*> cycle;
56     OBBitVec avisit,cvisit;
57     avisit.Resize(NumAtoms()+1);
58     cvisit.Resize(NumAtoms()+1);
59     OBBond *bond;
60     std::vector<OBEdgeBase*>::iterator bi;
61     std::vector<int> electron;
62 gezelter 3057 int sume, orden, bestatom;
63     int bestorden = 99;
64 tim 2440 // Init the kekulized bonds
65     unsigned i;
66 gezelter 3057 FOR_BONDS_OF_MOL(bond, *this)
67 tim 2440 {
68 gezelter 3057 switch (bond->GetBO())
69     {
70     case 1: bond->SetKSingle(); break;
71     case 2: bond->SetKDouble(); break;
72     case 3: bond->SetKTriple(); break;
73     }
74 tim 2440 }
75    
76     // Find all the groups of aromatic cycle
77     for(i=1; i<= NumAtoms(); i++ ) {
78     atom = GetAtom(i);
79     if (atom->HasAromaticBond() && !cvisit[i]) { // is new aromatic atom of an aromatic cycle ?
80    
81     avisit.Clear();
82     electron.clear();
83     cycle.clear();
84    
85     avisit.SetBitOn(i);
86     expandcycle (atom, avisit);
87     //store the atoms of the cycle(s)
88     unsigned int j;
89     for(j=1; j<= NumAtoms(); j++) {
90     if ( avisit[j] ) {
91     atom = GetAtom(j);
92     cycle.push_back(atom);
93     }
94     }
95     // At the begining each atom give one electron to the cycle
96     for(j=0; j< cycle.size(); j++) {
97     electron.push_back(1);
98     }
99    
100     // remove one electron if the atom make a double bond out of the cycle
101     sume =0;
102     for(j=0; j< cycle.size(); j++) {
103     atom = cycle[j];
104     for(bond = atom->BeginBond(bi); bond; bond = atom->NextBond(bi)) {
105     if ( bond->IsDouble() ) {
106     OBAtom *atom2 = bond->GetNbrAtom(atom);
107     int fcharge = atom->GetFormalCharge();
108     int fcharge2 = atom2->GetFormalCharge();
109     if(atom->IsNitrogen() && atom2->IsOxygen()
110     && fcharge == 0 && fcharge2 == 0) { //n=O to [n+][O-]
111     atom->SetFormalCharge(1);
112     atom2->SetFormalCharge(-1);
113     bond->SetKSingle();
114     bond->SetBO(1);
115     }
116     else {
117     electron[j] = 0;
118     }
119     }
120     }
121     // count the number of electrons
122     sume += electron[j];
123     }
124    
125    
126     // Save the electron state in case huckel rule is not satisfied
127     vector<int> previousElectron = electron;
128    
129     // find the ideal number of electrons according to the huckel 4n+2 rule
130     minde=99;
131     for (i=1; 1; i++) {
132     n = 4 *i +2;
133     de = n - sume;
134     if ( de < minde )
135     minde=de;
136     else if ( minde < 0 )
137     minde=de;
138     else
139     break;
140     }
141    
142     #ifdef HAVE_SSTREAM
143     stringstream errorMsg;
144     #else
145     strstream errorMsg;
146     #endif
147    
148     //cout << "minde before:" << minde << endl;
149     // if huckel rule not satisfied some atoms must give more electrons
150     //cout << "minde " << minde << endl;
151     while ( minde != 0 ) {
152     bestorden=99;
153     for(j=0; j< cycle.size(); j++) {
154     if (electron[j] == 1) {
155     orden = getorden(cycle[j]);
156     if (orden < bestorden) {
157     bestorden = orden;
158     bestatom = j;
159     }
160     }
161     }
162     if (bestorden==99) { // no electron giving atom found
163     errorMsg << "Kekulize: Huckel rule not satisfied for molecule " << GetTitle() << endl;
164 tim 2518 obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo);
165 tim 2440 break; // Huckel rule cannot be satisfied
166     } // try to kekulize anyway
167     else {
168     electron[bestatom] += 1;
169     minde--;
170     }
171     }
172    
173     if (bestorden == 99) { // Huckel rule not satisfied, just try to get an even number of electron before kekulizing
174    
175     electron = previousElectron; // restore electon's state
176    
177     int odd = sume % 2;
178     //cout << "odd:" << odd << endl;
179     if(odd) { // odd number of electrons try to add an electron to the best possible atom
180     for(j=0; j< cycle.size(); j++) {
181     if (electron[j] == 1) {
182     orden = getorden(cycle[j]);
183     if (orden < bestorden) {
184     bestorden = orden;
185     bestatom = j;
186     }
187     }
188     }
189     if (bestorden==99) { // no electron giving atom found
190     errorMsg << "Kekulize: Cannot get an even number of electron for molecule " << GetTitle() << "\n";
191 tim 2518 obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo);
192 tim 2440 break; // impossible to choose an atom to obtain an even number of electron
193     } // try to kekulize anyway
194     else {
195     electron[bestatom] += 1;
196     }
197     }
198     }
199    
200     //cout << "minde after:" << minde <<endl;
201     //for(j=0; j < cycle.size(); j++) {
202     //OBAtom *cycleAtom = cycle[j];
203     //cout << "\t" << cycleAtom->GetIdx();
204     //}
205     //cout << endl;
206    
207     //for(j=0; j < electron.size(); j++) {
208     //cout << "\t" << electron[j];
209     //}
210     //cout << endl;
211    
212     // kekulize the cycle(s)
213     start_kekulize(cycle,electron);
214    
215     // Set the kekulized cycle(s) as visited
216     for(j=1; j<= NumAtoms(); j++) {
217     if (avisit[j])
218     cvisit.SetBitOn(j);
219     }
220    
221     }
222     }
223     // Double bond have been assigned, set the remaining aromatic bonds to single
224     //std::cout << "Set not assigned single bonds\n";
225     for(i=0;i <NumBonds(); i++) {
226     bond = GetBond(i);
227     //std::cout << "bond " << bond->GetBeginAtomIdx() << " " << bond->GetEndAtomIdx() << " ";
228     if (bond->GetBO()==5 ) {
229     bond->SetKSingle();
230     bond->SetBO(1);
231     //std::cout << "single\n";
232     }
233     //else
234     // std::cout << "double\n";
235     }
236    
237     return;
238     }
239    
240     ///////////////////////////////////////////////////////////////////////////////////////
241     //! \brief Start kekulizing one or a fused set of aromatic ring(s)
242    
243     //! The initial electronic state indicates if an atoms must make a double bond or not
244     //! Kekulizing is attempted recursively for all the atoms bonded to the first atom
245     //! of the cycle.
246     void OBMol::start_kekulize( std::vector <OBAtom*> &cycle, std::vector<int> &electron) {
247    
248     std::vector<int> initState;
249     std::vector<int> currentState;
250     std::vector<int> binitState;
251     std::vector<int> bcurrentState;
252     std::vector<bool> mark;
253     unsigned int Idx;
254     OBAtom *atom, *atom2;
255     OBBond *bond;
256    
257     //init the atom arrays
258     unsigned i;
259     for(i=0;i <NumAtoms()+1; i++) {
260     initState.push_back(-1);
261     currentState.push_back(-1);
262     mark.push_back(false);
263     }
264    
265     //init the bond arrays with single bonds
266     for(i=0;i <NumBonds(); i++) {
267     binitState.push_back(SINGLE);
268     bcurrentState.push_back(SINGLE);
269     }
270    
271     //set the electron number
272     for(i=0; i< cycle.size(); i++) {
273     atom = cycle[i];
274     Idx = atom->GetIdx();
275     if ( electron[i] == 1)
276     initState[Idx] = 1; // make 1 double bond
277     else
278     initState[Idx] = 2; // make 2 single bonds
279    
280     currentState[Idx] = initState[Idx];
281     }
282    
283     std::vector<OBEdgeBase*>::iterator b;
284     OBAtom *nbr;
285    
286     bool second_pass=false;
287     // for( i=1; i<= NumAtoms(); i++) {
288     // if(currentState[i] == 1) { // the atom can make a double bond
289     // atom = GetAtom(i);
290     // //find a neighbour that can make a double bond
291     // // and start kekulize
292     // for (nbr = atom->BeginNbrAtom(b);nbr;nbr = atom->NextNbrAtom(b)) {
293     // if(currentState[nbr->GetIdx()]==1){
294     // if(!expand_kekulize(atom,nbr,currentState,initState, bcurrentState,binitState, mark)) {
295     // second_pass=true;
296     // }
297    
298     // }
299     // }
300     // }
301     // }
302     bool expand_successful;
303     atom = cycle[0];
304     for (nbr = atom->BeginNbrAtom(b);nbr;nbr = atom->NextNbrAtom(b)) {
305     if(initState[nbr->GetIdx()] == -1) //neighbor atom not in the cycle, try next one
306     continue;
307     //std::cout << "Expand kekulize\n";
308     expand_kekulize(atom,nbr,currentState,initState, bcurrentState,binitState, mark) ;
309     //Control that all the electron have been given to the cycle(s)
310     expand_successful = true;
311     for(unsigned i=0; i< cycle.size(); i++) {
312     atom2 = cycle[i];
313     Idx = atom2->GetIdx();
314     //cout << "\t" << currentState[Idx];
315     if (currentState[Idx] == 1)
316     expand_successful=false;
317     }
318     //cout << endl;
319     if (expand_successful)
320     break;
321     else {
322     unsigned i;
323     for(i=0;i <NumAtoms()+1; i++) {
324     currentState[i]=initState[i];
325     mark[i]=false;
326     }
327     for(i=0;i <NumBonds(); i++) {
328     bcurrentState[i]=binitState[i];
329     }
330     }
331     }
332     if (!expand_successful)
333     {
334     #ifdef HAVE_SSTREAM
335     stringstream errorMsg;
336     #else
337     strstream errorMsg;
338     #endif
339     errorMsg << "Kekulize Error for molecule " << GetTitle() << endl;
340 tim 2518 obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo);
341 tim 2440 }
342    
343     // Set the double bonds
344     // std::cout << "Set double bonds\n";
345     for(i=0;i <NumBonds(); i++) {
346     bond = GetBond(i);
347     // std::cout << "bond " << bond->GetBeginAtomIdx() << " " << bond->GetEndAtomIdx() << " ";
348     if (bond->GetBO()==5 && bcurrentState[i] == DOUBLE) {
349 gezelter 3057 if ( (bond->GetBeginAtom())->IsSulfur()
350     && bond->GetEndAtom()->IsSulfur() ) {
351     // no double bonds between aromatic sulfur atoms -- PR#1504089
352     continue;
353     }
354    
355 tim 2440 bond->SetKDouble();
356     bond->SetBO(2);
357     //std::cout << "double\n";
358     }
359     //else
360     //std::cout << "single\n";
361     //else if (bond->IsAromatic() && bond->GetBO() != 2)
362     // bond->SetBO(1);
363     }
364    
365     return;
366     }
367    
368    
369     /////////////////////////////////////////////////////////////////////////////////////////
370     //! \brief recursively assign single and double bonds according to the electronical state
371     //! of the atoms of the current bond
372     int OBMol::expand_kekulize(OBAtom *atom1, OBAtom *atom2, std::vector<int> &currentState, std::vector<int> &initState, std::vector<int> &bcurrentState, std::vector<int> &binitState, std::vector<bool> &mark)
373     {
374     int done;
375     int Idx1=atom1->GetIdx(), Idx2=atom2->GetIdx();
376     OBBond *bond;
377     std::vector<OBEdgeBase*>::iterator i;
378     OBAtom *nbr;
379     int natom;
380    
381     mark[Idx1]= true;
382     bond = atom1->GetBond(atom2);
383     int bIdx = bond->GetIdx();
384    
385     //cout << "assign bond state for atoms " << Idx1 << " and " << Idx2 << endl;
386     if (currentState[Idx1] == 1 && currentState[Idx2] == 1) {
387     currentState[Idx1]=0;
388     currentState[Idx2]=0;
389     // set bond to double
390     //std::cout << "bond " << Idx1 << " " << Idx2 << " double\n";
391     bcurrentState[bIdx]=DOUBLE;
392     }
393     else if (currentState[Idx1] == 0 && currentState[Idx2] == 1 ||
394     currentState[Idx1] == 2 && currentState[Idx2] == 1 ||
395     currentState[Idx1] == 2 && currentState[Idx2] == 2) {
396     //std::cout << "bond " << Idx1 << " " << Idx2 << " single\n";
397     // leave bond to single
398     }
399     else if (currentState[Idx1] == 1 && currentState[Idx2] == 0 ||
400     currentState[Idx1] == 1 && currentState[Idx2] == 2) {
401     mark[Idx1]=false;
402     //std::cout << "bond " << Idx1 << " " << Idx2 << " error\n";
403     return (0); // error
404     }
405     else if (currentState[Idx1] == 0 && currentState[Idx2] == 0
406     || currentState[Idx1] == 2 && currentState[Idx2] == 0) {
407     //std::cout << "bond " << Idx1 << " " << Idx2 << " done\n";
408     mark[Idx2]=true;
409     return (1); //done
410     }
411     else if (currentState[Idx1] == 0 && currentState[Idx2] == 2) {
412     currentState[Idx2]=0;
413     //std::cout << "bond " << Idx1 << " " << Idx2 << " leave single\n";
414     // leave bond to single
415     }
416     else {
417    
418     #ifdef HAVE_SSTREAM
419     stringstream errorMsg;
420     #else
421     strstream errorMsg;
422     #endif
423    
424     errorMsg << "unexpected state:" << "atom " << Idx1 << " " << currentState[Idx1]
425     << " atom " << Idx2 << " " << currentState[Idx2] << endl;
426 tim 2518 obErrorLog.ThrowError(__func__, errorMsg.str(), obDebug);
427 tim 2440 return(false);
428     }
429    
430     //int c;
431     //for(c=1; c < currentState.size(); c++) {
432     //cout << c << "\t";
433     //}
434     //cout << endl;
435     //for(c=1; c < currentState.size(); c++) {
436     //cout << currentState[c] << "\t";
437     //}
438     //cout << endl;
439    
440     vector<int> previousState = currentState; // Backup the atom
441     vector<int> bpreviousState = bcurrentState; // and the bond states before expanding again
442    
443     bool return_false=false;
444     // for each neighbor of atom 2 not already kekulized
445     for (nbr = atom2->BeginNbrAtom(i);nbr;nbr = atom2->NextNbrAtom(i))
446     {
447     natom = nbr->GetIdx();
448     if(initState[natom] == -1) //neighbor atom not in the cycle, try next one
449     continue;
450     if ( !mark[natom] ) {
451     done = expand_kekulize(atom2, nbr, currentState, initState, bcurrentState, binitState, mark);
452     if ( !done ) // kekulize failed
453     return_false =true;
454     else
455     return_false =false;
456     }
457    
458     }
459     if (return_false) { // no good solution found
460     //cout << "return_false:no good solution\n" << endl;
461     //cout << "reset state of " << Idx1 << " and " << Idx2 << " from " << currentState[Idx1]
462     //<< " " << currentState[Idx2] << " to ";
463    
464     // retrieve the states that might have been changed during kekulize expansion
465     currentState = previousState;
466    
467    
468     bcurrentState = bpreviousState;
469    
470    
471     // reset the bond and the atom states
472     if (bcurrentState[bIdx] == DOUBLE)
473     currentState[Idx1]=initState[Idx1];
474    
475     currentState[Idx2]=initState[Idx2];
476     bcurrentState[bIdx]=binitState[bIdx]; // should be always single
477     mark[Idx2]=false;
478    
479     //cout << currentState[Idx1] << " " << currentState[Idx2] << endl;
480     return (false);
481     }
482     // atom 2 cannot make any bond, should not have 1 electron
483     if (currentState[Idx2] == 1) {
484     // currentState[Idx1]=initState[Idx1];
485     //cout << "return true but " << Idx2 << " state = 1\n";
486     mark[Idx1]=false;
487     return (false);
488     }
489     else {
490     // if we found a good solution, then the state of Idx2 may have shifted from 1 to 0 during the kekulization
491     // If it is the case, we should check if there is a remaining unmarked neighbor because it is possible
492     // that kekulizing from this neigbor failed just because Idx2 was equal to 1
493    
494     if(previousState[Idx2] == 1) {
495     // Since now Idx2 is equal to 0 because it kekulized well the kekulizing of the failed neigbor could be successfull
496     // If there is still an unmarked neigbor try to kekulize it again
497     //mark[Idx2]=true;
498     return_false=false;
499     //cout << "retry kekulizing from " << Idx2 << endl;
500    
501     for (nbr = atom2->BeginNbrAtom(i);nbr;nbr = atom2->NextNbrAtom(i))
502     {
503     natom = nbr->GetIdx();
504     if(initState[natom] == -1) //neighbor atom not in the cycle, try next one
505     continue;
506     if ( !mark[natom] ) {
507     //cout << "atom " << natom << " not marked, expand again" << endl;
508     done = expand_kekulize(atom2, nbr, currentState, initState, bcurrentState, binitState, mark);
509     if ( !done ) // kekulize failed
510     return_false =true;
511     else
512     return_false =false;
513     }
514    
515     }
516    
517     // if we cannot kekulize the remaining neigbor again then we have to return false
518     // we do not have to reset the states because the kekulize will fail anyway
519     if(return_false) {
520     //cout << "rekekulize failed" << endl;
521     return(false);
522     }
523     else {
524     //cout << "rekekulized successfull" << endl;
525     return (true);
526     }
527    
528     }
529    
530    
531     //cout << "return_true: good solution" << endl;
532     return (true);
533     }
534     }
535    
536     //! Give the priority to give two electrons instead of 1
537     int OBMol::getorden( OBAtom *atom)
538     {
539     if ( atom->IsSulfur() ) return 1;
540     if ( atom->IsOxygen() ) return 2;
541     if ( atom->GetAtomicNum() == 34 ) return 3;
542     if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 && atom->GetValence() == 3) return 5;
543     if ( atom->IsAmideNitrogen() ) return 4;
544     if ( atom->IsNitrogen() && atom->GetFormalCharge() == -1) return 6;
545     if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 && atom->IsInRingSize(5) ) return 7;
546     if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 ) return 8;
547     if ( atom->IsCarbon() && atom->GetFormalCharge() == -1) return 9;
548     //if ( atom->IsCarbon() ) return 9;
549    
550     return (100); //no atom found
551     }
552    
553     //! Recursively find the aromatic atoms with an aromatic bond to the current atom
554     void OBMol::expandcycle (OBAtom *atom, OBBitVec &avisit)
555     {
556     OBAtom *nbr;
557     // OBBond *bond;
558     std::vector<OBEdgeBase*>::iterator i;
559     int natom;
560     //for each neighbour atom test if it is in the aromatic ring
561     for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
562     {
563     natom = nbr->GetIdx();
564     // if (!avisit[natom] && nbr->IsAromatic() && ((OBBond*) *i)->IsAromatic()) {
565     if (!avisit[natom] && ((OBBond*) *i)->GetBO()==5) {
566     avisit.SetBitOn(natom);
567     expandcycle(nbr, avisit);
568     }
569     }
570     }
571    
572     } // end namespace OpenBabel
573    
574     //! \file kekulize.cpp
575     //! \brief Alternate algorithm to kekulize a molecule (OBMol::NewPerceiveKekuleBonds()).