ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/kekulize.cpp
Revision: 2440
Committed: Wed Nov 16 19:42:11 2005 UTC (18 years, 8 months ago) by tim
File size: 17817 byte(s)
Log Message:
adding openbabel

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     kekulize.cpp - Alternate algorithm to kekulize a molecule.
3    
4     Copyright (C) 2004-2005 by Fabien Fontaine
5     Some portions Copyright (C) 2005 by Geoffrey R. Hutchison
6    
7     This file is part of the Open Babel project.
8     For more information, see <http://openbabel.sourceforge.net/>
9    
10     This program is free software; you can redistribute it and/or modify
11     it under the terms of the GNU General Public License as published by
12     the Free Software Foundation version 2 of the License.
13    
14     This program is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17     GNU General Public License for more details.
18     ***********************************************************************/
19    
20     #include "mol.hpp"
21     #include "oberror.hpp"
22    
23     #ifdef HAVE_SSTREAM
24     #include <sstream>
25     #else
26     #include <strstream>
27     #endif
28    
29     #define SINGLE 1
30     #define DOUBLE 2
31    
32     using namespace std;
33    
34     namespace OpenBabel {
35    
36     ///////////////////////////////////////////////////////////////////////////////
37     //! \brief Kekulize aromatic rings without using implicit valence
38    
39     //! This new perceive kekule bonds function has been especifically designed to
40     //! handle molecule files without explicit hydrogens such as pdb or xyz.
41     //! The function does not rely on GetImplicitValence function
42     //! The function looks for groups of aromatic cycle
43     //! For each group it tries to guess the number of electrons given by each atom
44     //! in order to satisfy the huckel (4n+2) rule
45     //! If the huckel rule cannot be satisfied the algorithm try with its best alternative guess
46     //! Then it recursively walk on the atoms of the cycle and assign single and double bonds
47     void OBMol::NewPerceiveKekuleBonds()
48     {
49    
50     if (HasKekulePerceived()) return;
51     SetKekulePerceived();
52    
53     OBAtom *atom;
54     int n, de, minde;
55     std::vector<OBAtom*> cycle;
56     OBBitVec avisit,cvisit;
57     avisit.Resize(NumAtoms()+1);
58     cvisit.Resize(NumAtoms()+1);
59     OBBond *bond;
60     std::vector<OBEdgeBase*>::iterator bi;
61     std::vector<int> electron;
62     int BO;
63     int sume, orden, bestorden, bestatom;
64     // Init the kekulized bonds
65     unsigned i;
66     for(i=0; i< NumBonds(); i++ ) {
67     bond = GetBond(i);
68     BO = bond->GetBO();
69     switch (BO)
70     {
71     case 1: bond->SetKSingle(); break;
72     case 2: bond->SetKDouble(); break;
73     case 3: bond->SetKTriple(); break;
74     }
75     }
76    
77     // Find all the groups of aromatic cycle
78     for(i=1; i<= NumAtoms(); i++ ) {
79     atom = GetAtom(i);
80     if (atom->HasAromaticBond() && !cvisit[i]) { // is new aromatic atom of an aromatic cycle ?
81    
82     avisit.Clear();
83     electron.clear();
84     cycle.clear();
85    
86     avisit.SetBitOn(i);
87     expandcycle (atom, avisit);
88     //store the atoms of the cycle(s)
89     unsigned int j;
90     for(j=1; j<= NumAtoms(); j++) {
91     if ( avisit[j] ) {
92     atom = GetAtom(j);
93     cycle.push_back(atom);
94     }
95     }
96     // At the begining each atom give one electron to the cycle
97     for(j=0; j< cycle.size(); j++) {
98     electron.push_back(1);
99     }
100    
101     // remove one electron if the atom make a double bond out of the cycle
102     sume =0;
103     for(j=0; j< cycle.size(); j++) {
104     atom = cycle[j];
105     for(bond = atom->BeginBond(bi); bond; bond = atom->NextBond(bi)) {
106     if ( bond->IsDouble() ) {
107     OBAtom *atom2 = bond->GetNbrAtom(atom);
108     int fcharge = atom->GetFormalCharge();
109     int fcharge2 = atom2->GetFormalCharge();
110     if(atom->IsNitrogen() && atom2->IsOxygen()
111     && fcharge == 0 && fcharge2 == 0) { //n=O to [n+][O-]
112     atom->SetFormalCharge(1);
113     atom2->SetFormalCharge(-1);
114     bond->SetKSingle();
115     bond->SetBO(1);
116     }
117     else {
118     electron[j] = 0;
119     }
120     }
121     }
122     // count the number of electrons
123     sume += electron[j];
124     }
125    
126    
127     // Save the electron state in case huckel rule is not satisfied
128     vector<int> previousElectron = electron;
129    
130     // find the ideal number of electrons according to the huckel 4n+2 rule
131     minde=99;
132     for (i=1; 1; i++) {
133     n = 4 *i +2;
134     de = n - sume;
135     if ( de < minde )
136     minde=de;
137     else if ( minde < 0 )
138     minde=de;
139     else
140     break;
141     }
142    
143     #ifdef HAVE_SSTREAM
144     stringstream errorMsg;
145     #else
146     strstream errorMsg;
147     #endif
148    
149     //cout << "minde before:" << minde << endl;
150     // if huckel rule not satisfied some atoms must give more electrons
151     //cout << "minde " << minde << endl;
152     while ( minde != 0 ) {
153     bestorden=99;
154     for(j=0; j< cycle.size(); j++) {
155     if (electron[j] == 1) {
156     orden = getorden(cycle[j]);
157     if (orden < bestorden) {
158     bestorden = orden;
159     bestatom = j;
160     }
161     }
162     }
163     if (bestorden==99) { // no electron giving atom found
164     errorMsg << "Kekulize: Huckel rule not satisfied for molecule " << GetTitle() << endl;
165     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo);
166     break; // Huckel rule cannot be satisfied
167     } // try to kekulize anyway
168     else {
169     electron[bestatom] += 1;
170     minde--;
171     }
172     }
173    
174     if (bestorden == 99) { // Huckel rule not satisfied, just try to get an even number of electron before kekulizing
175    
176     electron = previousElectron; // restore electon's state
177    
178     int odd = sume % 2;
179     //cout << "odd:" << odd << endl;
180     if(odd) { // odd number of electrons try to add an electron to the best possible atom
181     for(j=0; j< cycle.size(); j++) {
182     if (electron[j] == 1) {
183     orden = getorden(cycle[j]);
184     if (orden < bestorden) {
185     bestorden = orden;
186     bestatom = j;
187     }
188     }
189     }
190     if (bestorden==99) { // no electron giving atom found
191     errorMsg << "Kekulize: Cannot get an even number of electron for molecule " << GetTitle() << "\n";
192     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo);
193     break; // impossible to choose an atom to obtain an even number of electron
194     } // try to kekulize anyway
195     else {
196     electron[bestatom] += 1;
197     }
198     }
199     }
200    
201     //cout << "minde after:" << minde <<endl;
202     //for(j=0; j < cycle.size(); j++) {
203     //OBAtom *cycleAtom = cycle[j];
204     //cout << "\t" << cycleAtom->GetIdx();
205     //}
206     //cout << endl;
207    
208     //for(j=0; j < electron.size(); j++) {
209     //cout << "\t" << electron[j];
210     //}
211     //cout << endl;
212    
213     // kekulize the cycle(s)
214     start_kekulize(cycle,electron);
215    
216     // Set the kekulized cycle(s) as visited
217     for(j=1; j<= NumAtoms(); j++) {
218     if (avisit[j])
219     cvisit.SetBitOn(j);
220     }
221    
222     }
223     }
224     // Double bond have been assigned, set the remaining aromatic bonds to single
225     //std::cout << "Set not assigned single bonds\n";
226     for(i=0;i <NumBonds(); i++) {
227     bond = GetBond(i);
228     //std::cout << "bond " << bond->GetBeginAtomIdx() << " " << bond->GetEndAtomIdx() << " ";
229     if (bond->GetBO()==5 ) {
230     bond->SetKSingle();
231     bond->SetBO(1);
232     //std::cout << "single\n";
233     }
234     //else
235     // std::cout << "double\n";
236     }
237    
238     return;
239     }
240    
241     ///////////////////////////////////////////////////////////////////////////////////////
242     //! \brief Start kekulizing one or a fused set of aromatic ring(s)
243    
244     //! The initial electronic state indicates if an atoms must make a double bond or not
245     //! Kekulizing is attempted recursively for all the atoms bonded to the first atom
246     //! of the cycle.
247     void OBMol::start_kekulize( std::vector <OBAtom*> &cycle, std::vector<int> &electron) {
248    
249     std::vector<int> initState;
250     std::vector<int> currentState;
251     std::vector<int> binitState;
252     std::vector<int> bcurrentState;
253     std::vector<bool> mark;
254     unsigned int Idx;
255     OBAtom *atom, *atom2;
256     OBBond *bond;
257    
258     //init the atom arrays
259     unsigned i;
260     for(i=0;i <NumAtoms()+1; i++) {
261     initState.push_back(-1);
262     currentState.push_back(-1);
263     mark.push_back(false);
264     }
265    
266     //init the bond arrays with single bonds
267     for(i=0;i <NumBonds(); i++) {
268     binitState.push_back(SINGLE);
269     bcurrentState.push_back(SINGLE);
270     }
271    
272     //set the electron number
273     for(i=0; i< cycle.size(); i++) {
274     atom = cycle[i];
275     Idx = atom->GetIdx();
276     if ( electron[i] == 1)
277     initState[Idx] = 1; // make 1 double bond
278     else
279     initState[Idx] = 2; // make 2 single bonds
280    
281     currentState[Idx] = initState[Idx];
282     }
283    
284     std::vector<OBEdgeBase*>::iterator b;
285     OBAtom *nbr;
286    
287     bool second_pass=false;
288     // for( i=1; i<= NumAtoms(); i++) {
289     // if(currentState[i] == 1) { // the atom can make a double bond
290     // atom = GetAtom(i);
291     // //find a neighbour that can make a double bond
292     // // and start kekulize
293     // for (nbr = atom->BeginNbrAtom(b);nbr;nbr = atom->NextNbrAtom(b)) {
294     // if(currentState[nbr->GetIdx()]==1){
295     // if(!expand_kekulize(atom,nbr,currentState,initState, bcurrentState,binitState, mark)) {
296     // second_pass=true;
297     // }
298    
299     // }
300     // }
301     // }
302     // }
303     bool expand_successful;
304     atom = cycle[0];
305     for (nbr = atom->BeginNbrAtom(b);nbr;nbr = atom->NextNbrAtom(b)) {
306     if(initState[nbr->GetIdx()] == -1) //neighbor atom not in the cycle, try next one
307     continue;
308     //std::cout << "Expand kekulize\n";
309     expand_kekulize(atom,nbr,currentState,initState, bcurrentState,binitState, mark) ;
310     //Control that all the electron have been given to the cycle(s)
311     expand_successful = true;
312     for(unsigned i=0; i< cycle.size(); i++) {
313     atom2 = cycle[i];
314     Idx = atom2->GetIdx();
315     //cout << "\t" << currentState[Idx];
316     if (currentState[Idx] == 1)
317     expand_successful=false;
318     }
319     //cout << endl;
320     if (expand_successful)
321     break;
322     else {
323     unsigned i;
324     for(i=0;i <NumAtoms()+1; i++) {
325     currentState[i]=initState[i];
326     mark[i]=false;
327     }
328     for(i=0;i <NumBonds(); i++) {
329     bcurrentState[i]=binitState[i];
330     }
331     }
332     }
333     if (!expand_successful)
334     {
335     #ifdef HAVE_SSTREAM
336     stringstream errorMsg;
337     #else
338     strstream errorMsg;
339     #endif
340     errorMsg << "Kekulize Error for molecule " << GetTitle() << endl;
341     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo);
342     }
343    
344     // Set the double bonds
345     // std::cout << "Set double bonds\n";
346     for(i=0;i <NumBonds(); i++) {
347     bond = GetBond(i);
348     // std::cout << "bond " << bond->GetBeginAtomIdx() << " " << bond->GetEndAtomIdx() << " ";
349     if (bond->GetBO()==5 && bcurrentState[i] == DOUBLE) {
350     bond->SetKDouble();
351     bond->SetBO(2);
352     //std::cout << "double\n";
353     }
354     //else
355     //std::cout << "single\n";
356     //else if (bond->IsAromatic() && bond->GetBO() != 2)
357     // bond->SetBO(1);
358     }
359    
360     return;
361     }
362    
363    
364     /////////////////////////////////////////////////////////////////////////////////////////
365     //! \brief recursively assign single and double bonds according to the electronical state
366     //! of the atoms of the current bond
367     int OBMol::expand_kekulize(OBAtom *atom1, OBAtom *atom2, std::vector<int> &currentState, std::vector<int> &initState, std::vector<int> &bcurrentState, std::vector<int> &binitState, std::vector<bool> &mark)
368     {
369     int done;
370     int Idx1=atom1->GetIdx(), Idx2=atom2->GetIdx();
371     OBBond *bond;
372     std::vector<OBEdgeBase*>::iterator i;
373     OBAtom *nbr;
374     int natom;
375    
376     mark[Idx1]= true;
377     bond = atom1->GetBond(atom2);
378     int bIdx = bond->GetIdx();
379    
380     //cout << "assign bond state for atoms " << Idx1 << " and " << Idx2 << endl;
381     if (currentState[Idx1] == 1 && currentState[Idx2] == 1) {
382     currentState[Idx1]=0;
383     currentState[Idx2]=0;
384     // set bond to double
385     //std::cout << "bond " << Idx1 << " " << Idx2 << " double\n";
386     bcurrentState[bIdx]=DOUBLE;
387     }
388     else if (currentState[Idx1] == 0 && currentState[Idx2] == 1 ||
389     currentState[Idx1] == 2 && currentState[Idx2] == 1 ||
390     currentState[Idx1] == 2 && currentState[Idx2] == 2) {
391     //std::cout << "bond " << Idx1 << " " << Idx2 << " single\n";
392     // leave bond to single
393     }
394     else if (currentState[Idx1] == 1 && currentState[Idx2] == 0 ||
395     currentState[Idx1] == 1 && currentState[Idx2] == 2) {
396     mark[Idx1]=false;
397     //std::cout << "bond " << Idx1 << " " << Idx2 << " error\n";
398     return (0); // error
399     }
400     else if (currentState[Idx1] == 0 && currentState[Idx2] == 0
401     || currentState[Idx1] == 2 && currentState[Idx2] == 0) {
402     //std::cout << "bond " << Idx1 << " " << Idx2 << " done\n";
403     mark[Idx2]=true;
404     return (1); //done
405     }
406     else if (currentState[Idx1] == 0 && currentState[Idx2] == 2) {
407     currentState[Idx2]=0;
408     //std::cout << "bond " << Idx1 << " " << Idx2 << " leave single\n";
409     // leave bond to single
410     }
411     else {
412    
413     #ifdef HAVE_SSTREAM
414     stringstream errorMsg;
415     #else
416     strstream errorMsg;
417     #endif
418    
419     errorMsg << "unexpected state:" << "atom " << Idx1 << " " << currentState[Idx1]
420     << " atom " << Idx2 << " " << currentState[Idx2] << endl;
421     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obDebug);
422     return(false);
423     }
424    
425     //int c;
426     //for(c=1; c < currentState.size(); c++) {
427     //cout << c << "\t";
428     //}
429     //cout << endl;
430     //for(c=1; c < currentState.size(); c++) {
431     //cout << currentState[c] << "\t";
432     //}
433     //cout << endl;
434    
435     vector<int> previousState = currentState; // Backup the atom
436     vector<int> bpreviousState = bcurrentState; // and the bond states before expanding again
437    
438     bool return_false=false;
439     // for each neighbor of atom 2 not already kekulized
440     for (nbr = atom2->BeginNbrAtom(i);nbr;nbr = atom2->NextNbrAtom(i))
441     {
442     natom = nbr->GetIdx();
443     if(initState[natom] == -1) //neighbor atom not in the cycle, try next one
444     continue;
445     if ( !mark[natom] ) {
446     done = expand_kekulize(atom2, nbr, currentState, initState, bcurrentState, binitState, mark);
447     if ( !done ) // kekulize failed
448     return_false =true;
449     else
450     return_false =false;
451     }
452    
453     }
454     if (return_false) { // no good solution found
455     //cout << "return_false:no good solution\n" << endl;
456     //cout << "reset state of " << Idx1 << " and " << Idx2 << " from " << currentState[Idx1]
457     //<< " " << currentState[Idx2] << " to ";
458    
459     // retrieve the states that might have been changed during kekulize expansion
460     currentState = previousState;
461    
462    
463     bcurrentState = bpreviousState;
464    
465    
466     // reset the bond and the atom states
467     if (bcurrentState[bIdx] == DOUBLE)
468     currentState[Idx1]=initState[Idx1];
469    
470     currentState[Idx2]=initState[Idx2];
471     bcurrentState[bIdx]=binitState[bIdx]; // should be always single
472     mark[Idx2]=false;
473    
474     //cout << currentState[Idx1] << " " << currentState[Idx2] << endl;
475     return (false);
476     }
477     // atom 2 cannot make any bond, should not have 1 electron
478     if (currentState[Idx2] == 1) {
479     // currentState[Idx1]=initState[Idx1];
480     //cout << "return true but " << Idx2 << " state = 1\n";
481     mark[Idx1]=false;
482     return (false);
483     }
484     else {
485     // if we found a good solution, then the state of Idx2 may have shifted from 1 to 0 during the kekulization
486     // If it is the case, we should check if there is a remaining unmarked neighbor because it is possible
487     // that kekulizing from this neigbor failed just because Idx2 was equal to 1
488    
489     if(previousState[Idx2] == 1) {
490     // Since now Idx2 is equal to 0 because it kekulized well the kekulizing of the failed neigbor could be successfull
491     // If there is still an unmarked neigbor try to kekulize it again
492     //mark[Idx2]=true;
493     return_false=false;
494     //cout << "retry kekulizing from " << Idx2 << endl;
495    
496     for (nbr = atom2->BeginNbrAtom(i);nbr;nbr = atom2->NextNbrAtom(i))
497     {
498     natom = nbr->GetIdx();
499     if(initState[natom] == -1) //neighbor atom not in the cycle, try next one
500     continue;
501     if ( !mark[natom] ) {
502     //cout << "atom " << natom << " not marked, expand again" << endl;
503     done = expand_kekulize(atom2, nbr, currentState, initState, bcurrentState, binitState, mark);
504     if ( !done ) // kekulize failed
505     return_false =true;
506     else
507     return_false =false;
508     }
509    
510     }
511    
512     // if we cannot kekulize the remaining neigbor again then we have to return false
513     // we do not have to reset the states because the kekulize will fail anyway
514     if(return_false) {
515     //cout << "rekekulize failed" << endl;
516     return(false);
517     }
518     else {
519     //cout << "rekekulized successfull" << endl;
520     return (true);
521     }
522    
523     }
524    
525    
526     //cout << "return_true: good solution" << endl;
527     return (true);
528     }
529     }
530    
531     //! Give the priority to give two electrons instead of 1
532     int OBMol::getorden( OBAtom *atom)
533     {
534     if ( atom->IsSulfur() ) return 1;
535     if ( atom->IsOxygen() ) return 2;
536     if ( atom->GetAtomicNum() == 34 ) return 3;
537     if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 && atom->GetValence() == 3) return 5;
538     if ( atom->IsAmideNitrogen() ) return 4;
539     if ( atom->IsNitrogen() && atom->GetFormalCharge() == -1) return 6;
540     if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 && atom->IsInRingSize(5) ) return 7;
541     if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 ) return 8;
542     if ( atom->IsCarbon() && atom->GetFormalCharge() == -1) return 9;
543     //if ( atom->IsCarbon() ) return 9;
544    
545     return (100); //no atom found
546     }
547    
548     //! Recursively find the aromatic atoms with an aromatic bond to the current atom
549     void OBMol::expandcycle (OBAtom *atom, OBBitVec &avisit)
550     {
551     OBAtom *nbr;
552     // OBBond *bond;
553     std::vector<OBEdgeBase*>::iterator i;
554     int natom;
555     //for each neighbour atom test if it is in the aromatic ring
556     for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
557     {
558     natom = nbr->GetIdx();
559     // if (!avisit[natom] && nbr->IsAromatic() && ((OBBond*) *i)->IsAromatic()) {
560     if (!avisit[natom] && ((OBBond*) *i)->GetBO()==5) {
561     avisit.SetBitOn(natom);
562     expandcycle(nbr, avisit);
563     }
564     }
565     }
566    
567     } // end namespace OpenBabel
568    
569     //! \file kekulize.cpp
570     //! \brief Alternate algorithm to kekulize a molecule (OBMol::NewPerceiveKekuleBonds()).