ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/smilesformat.cpp
Revision: 2450
Committed: Thu Nov 17 20:38:45 2005 UTC (18 years, 8 months ago) by gezelter
File size: 68001 byte(s)
Log Message:
Unifying config.h stuff and making sure the OpenBabel codes can find
our default (and environment variable) Force Field directories.

File Contents

# User Rev Content
1 tim 2445 /**********************************************************************
2     Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
3     Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison
4     Some portions Copyright (C) 2004 by Chris Morley
5    
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation version 2 of the License.
9    
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     GNU General Public License for more details.
14     ***********************************************************************/
15     //Contains SMIFormat and FIXFormat classes
16    
17     #include "smilesformat.hpp"
18    
19    
20     using namespace std;
21    
22     namespace OpenBabel
23     {
24     class OBSmiNode
25     {
26     OBAtom *_atom,*_parent;
27     std::vector<OBSmiNode*> _nextnode;
28     std::vector<OBBond*> _nextbond;
29     public:
30     OBSmiNode(OBAtom *atom);
31     ~OBSmiNode();
32     int Size()
33     {
34     return((_nextnode.empty())?0:_nextnode.size());
35     }
36     void SetParent(OBAtom *a)
37     {
38     _parent = a;
39     }
40     void SetNextNode(OBSmiNode*,OBBond*);
41     OBAtom *GetAtom()
42     {
43     return(_atom);
44     }
45     OBAtom *GetParent()
46     {
47     return(_parent);
48     }
49     OBAtom *GetNextAtom(int i)
50     {
51     return(_nextnode[i]->GetAtom());
52     }
53     OBBond *GetNextBond(int i)
54     {
55     return(_nextbond[i]);
56     }
57     OBSmiNode *GetNextNode(int i)
58     {
59     return(_nextnode[i]);
60     }
61     };
62    
63     class OBMol2Smi
64     {
65     std::vector<int> _atmorder;
66     std::vector<int> _storder;
67     std::vector<bool> _aromNH;
68     OBBitVec _uatoms,_ubonds;
69     std::vector<OBEdgeBase*> _vclose;
70     std::vector<std::pair<OBAtom*,std::pair<int,int> > > _vopen;
71     OBConversion* _pconv;
72     public:
73     OBMol2Smi()
74     {
75     _vclose.clear();
76     }
77     ~OBMol2Smi()
78     {}
79     int GetUnusedIndex();
80     void Init(OBConversion* pconv=NULL);
81     void CreateSmiString(OBMol&,char*);
82     void GetClosureAtoms(OBAtom*,std::vector<OBNodeBase*>&);
83     void FindClosureBonds(OBMol&);
84     void ToSmilesString(OBSmiNode *node,char *buffer);
85     void RemoveUsedClosures();
86     void AssignCisTrans(OBSmiNode*);
87     bool BuildTree(OBSmiNode*);
88     bool GetSmilesElement(OBSmiNode*,char*);
89     bool GetChiralStereo(OBSmiNode*,char*);
90     void CorrectAromaticAmineCharge(OBMol&);
91     std::vector<std::pair<int,OBBond*> > GetClosureDigits(OBAtom*);
92     std::vector<int> &GetOutputOrder()
93     {
94     return(_atmorder);
95     }
96     };
97    
98     bool WriteTheSmiles(OBMol & mol,char *out);
99    
100     /////////////////////////////////////////////////////////////////
101     class OBSmilesParser
102     {
103     int _bondflags;
104     int _order;
105     int _prev;
106     char *_ptr;
107     vector<int> _vprev;
108     vector<vector<int> > _rclose;
109     vector<vector<int> > _extbond;
110     vector<int> _path;
111     vector<bool> _avisit;
112     vector<bool> _bvisit;
113     char _buffer[BUFF_SIZE];
114     bool chiralWatch; // set when a chiral atom is read
115     map<OBAtom*,OBChiralData*> _mapcd; // map of ChiralAtoms and their data
116     public:
117    
118     OBSmilesParser() { }
119     ~OBSmilesParser() { }
120    
121     bool SmiToMol(OBMol&,string&);
122     bool ParseSmiles(OBMol&);
123     bool ParseSimple(OBMol&);
124     bool ParseComplex(OBMol&);
125     bool ParseRingBond(OBMol&);
126     bool ParseExternalBond(OBMol&);
127     bool CapExternalBonds(OBMol &mol);
128     void FindAromaticBonds(OBMol &mol,OBAtom*,int);
129     void FindAromaticBonds(OBMol&);
130     void FindOrphanAromaticAtoms(OBMol &mol); //CM 18 Sept 2003
131     };
132    
133     /////////////////////////////////////////////////////////////////
134     bool SMIFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
135     {
136     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
137    
138     //Define some references so we can use the old parameter names
139     istream &ifs = *pConv->GetInStream();
140     OBMol &mol = *pmol;
141     const char* title = pConv->GetTitle();
142    
143     //Taken unchanged from ReadSmiles
144     char buffer[BUFF_SIZE];
145    
146     if (!ifs.getline(buffer,BUFF_SIZE))
147     return(false);
148     vector<string> vs;
149     tokenize(vs,buffer);
150    
151     // RWT 10/3/2000
152     //
153     // added the following to allow spaces in compound names (titles).
154     // Essentially everything after the first space on a SMILES file line
155     // is treated as the name.
156     // Also had to change the condition a few lines below from:
157     // if (vs.size() == 2) ... to
158     // if (vs.size() >= 2)
159    
160     if (vs.size() > 2)
161     {
162     for (unsigned int i=2;i<vs.size(); i++)
163     {
164     vs[1]=vs[1]+" "+vs[i];
165     }
166     }
167    
168     if (vs.empty())
169     return false;
170     mol.SetDimension(0);
171    
172     if (vs.size() >= 2)
173     mol.SetTitle(vs[1].c_str());
174    
175     OBSmilesParser sp;
176     return sp.SmiToMol(mol,vs[0]);
177     }
178    
179     //////////////////////////////////////////////////
180     bool SMIFormat::WriteMolecule(OBBase* pOb,OBConversion* pConv)
181     {
182     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
183    
184     //Define some references so we can use the old parameter names
185     ostream &ofs = *pConv->GetOutStream();
186     OBMol &mol = *pmol;
187    
188     if(pConv->IsOption("t")) //Title only option
189     {
190     ofs << mol.GetTitle() <<endl;
191     return true;
192     }
193     char buffer[BUFF_SIZE];
194     *buffer='\0'; //empty buffer
195    
196     // This is a hack to prevent recursion problems.
197     // we still need to fix the underlying problem (mainly chiral centers) -GRH
198     if (mol.NumAtoms() > 1000)
199     {
200     #ifdef HAVE_SSTREAM
201     stringstream errorMsg;
202     #else
203     strstream errorMsg;
204     #endif
205     errorMsg << "SMILES Conversion failed: Molecule is too large to convert. Open Babel is currently limited to 1000 atoms." << endl;
206     errorMsg << " Molecule size: " << mol.NumAtoms() << " atoms " << endl;
207     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
208     return(false);
209     }
210    
211     if(mol.NumAtoms()!=0)
212     {
213     OBMol2Smi m2s;
214     m2s.Init(pConv);
215     m2s.CorrectAromaticAmineCharge(mol);
216     m2s.CreateSmiString(mol,buffer);
217     }
218    
219     ofs << buffer ;
220     if(!pConv->IsOption("n"))
221     ofs << '\t' << mol.GetTitle();
222     ofs << endl;
223    
224     return true;
225     }
226    
227     //////////////////////////////////////////////
228    
229     //CM not needed extern OBAromaticTyper aromtyper;
230     //OBAtomTyper atomtyperx; //CM
231    
232     bool OBSmilesParser::SmiToMol(OBMol &mol,string &s)
233     {
234     strcpy(_buffer,s.c_str());
235    
236     _vprev.clear();
237     _rclose.clear();
238     _prev=0;
239     chiralWatch=false;
240    
241     if (!ParseSmiles(mol))
242     {
243     mol.Clear();
244     return(false);
245     }
246    
247     // mol.AddHydrogens(); // need to add implicit hydrogens
248    
249     return(true);
250     }
251    
252     bool OBSmilesParser::ParseSmiles(OBMol &mol)
253     {
254     mol.BeginModify();
255    
256     for (_ptr=_buffer;*_ptr;_ptr++)
257     {
258     if (isspace(*_ptr))
259     continue;
260     else if (isdigit(*_ptr) || *_ptr == '%') //ring open/close
261     {
262     ParseRingBond(mol);
263     continue;
264     }
265     else if(*_ptr == '&') //external bond
266     {
267     ParseExternalBond(mol);
268     continue;
269     }
270     else
271     switch(*_ptr)
272     {
273     case '.':
274     _prev=0;
275     break;
276     case '(':
277     _vprev.push_back(_prev);
278     break;
279     case ')':
280     if(_vprev.empty()) //CM
281     return false;
282     _prev = _vprev.back();
283     _vprev.pop_back();
284     break;
285     case '[':
286     if (!ParseComplex(mol))
287     {
288     mol.EndModify();
289     mol.Clear();
290     return(false);
291     }
292     break;
293     case '-':
294     _order = 1;
295     break;
296     case '=':
297     _order = 2;
298     break;
299     case '#':
300     _order = 3;
301     break;
302     case ':':
303     _order = 5;
304     break;
305     case '/':
306     _bondflags |= OB_TORDOWN_BOND;
307     break;
308     case '\\':
309     _bondflags |= OB_TORUP_BOND;
310     break;
311     default:
312     if (!ParseSimple(mol))
313     {
314     mol.EndModify();
315     mol.Clear();
316     return(false);
317     }
318     } // end switch
319     } // end for _ptr
320    
321     // place dummy atoms for each unfilled external bond
322     if(!_extbond.empty())
323     CapExternalBonds(mol);
324    
325     //set aromatic bond orders
326     mol.SetAromaticPerceived();
327     FindAromaticBonds(mol);
328     FindOrphanAromaticAtoms(mol);// CM 18 Sept 2003
329     mol.AssignSpinMultiplicity();
330     mol.UnsetAromaticPerceived();
331    
332     mol.EndModify();
333    
334     //NE add the OBChiralData stored inside the _mapcd to the atoms now after end
335     // modify so they don't get lost.
336     if(_mapcd.size()>0)
337     {
338     OBAtom* atom;
339     OBChiralData* cd;
340     map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
341     for(ChiralSearch=_mapcd.begin();ChiralSearch!=_mapcd.end();ChiralSearch++)
342     {
343     atom=ChiralSearch->first;
344     cd=ChiralSearch->second;
345     atom->SetData(cd);
346     }
347     }
348    
349     return(true);
350     }
351    
352     // CM 18 Sept 2003
353     void OBSmilesParser::FindOrphanAromaticAtoms(OBMol &mol)
354     {
355     //Facilitates the use lower case shorthand for radical entry
356     //Atoms which are marked as aromatic but have no aromatic bonds
357     //are taken to be radical centres
358     OBAtom *atom;
359     vector<OBNodeBase*>::iterator j;
360    
361     for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
362     if(atom->IsAromatic())
363     {
364     if(atom->CountBondsOfOrder(5)<2) //bonds order 5 set in FindAromaticBonds()
365     //not proper aromatic atoms - could be conjugated chain or radical centre
366     atom->UnsetAromatic();
367     else
368     {
369     //recognized as aromatic, so are not radicals
370     atom->SetSpinMultiplicity(0);
371     }
372     }
373     }
374    
375     void OBSmilesParser::FindAromaticBonds(OBMol &mol)
376     {
377     _path.clear();
378     _avisit.clear();
379     _bvisit.clear();
380     _avisit.resize(mol.NumAtoms()+1);
381     _bvisit.resize(mol.NumBonds());
382     _path.resize(mol.NumAtoms()+1);
383    
384     OBBond *bond;
385     vector<OBEdgeBase*>::iterator i;
386     for (bond = mol.BeginBond(i);bond;bond = mol.NextBond(i))
387     if (!bond->GetBeginAtom()->IsAromatic() ||
388     !bond->GetEndAtom()->IsAromatic())
389     _bvisit[bond->GetIdx()] = true;
390    
391     OBAtom *atom;
392     vector<OBNodeBase*>::iterator j;
393    
394     for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
395     if(!_avisit[atom->GetIdx()] && atom->IsAromatic())
396     FindAromaticBonds(mol,atom,0);
397     }
398    
399     void OBSmilesParser::FindAromaticBonds(OBMol &mol,OBAtom *atom,int depth )
400     {
401     OBBond *bond;
402     vector<OBEdgeBase*>::iterator k;
403    
404     if (_avisit[atom->GetIdx()])
405     {
406     int j = depth-1;
407     bond=mol.GetBond(_path[j--]);
408     bond->SetBO(5);
409     while( j >= 0 )
410     {
411     bond=mol.GetBond(_path[j--]);
412     bond->SetBO(5);
413     if(bond->GetBeginAtom() == atom || bond->GetEndAtom() == atom)
414     break;
415     }
416     }
417     else
418     {
419     _avisit[atom->GetIdx()] = true;
420     for(bond = atom->BeginBond(k);bond;bond = atom->NextBond(k))
421     if( !_bvisit[bond->GetIdx()])
422     {
423     _path[depth] = bond->GetIdx();
424     _bvisit[bond->GetIdx()] = true;
425     FindAromaticBonds(mol,bond->GetNbrAtom(atom),depth+1);
426     }
427     }
428     }
429    
430    
431     bool OBSmilesParser::ParseSimple(OBMol &mol)
432     {
433     char symbol[3];
434     int element;
435     bool arom=false;
436     memset(symbol,'\0',sizeof(char)*3);
437    
438     if (isupper(*_ptr))
439     switch(*_ptr)
440     {
441     case 'C':
442     _ptr++;
443     if (*_ptr == 'l')
444     {
445     strcpy(symbol,"Cl");
446     element = 17;
447     }
448     else
449     {
450     symbol[0] = 'C';
451     element = 6;
452     _ptr--;
453     }
454     break;
455    
456     case 'N':
457     element = 7;
458     symbol[0] = 'N';
459     break;
460     case 'O':
461     element = 8;
462     symbol[0] = 'O';
463     break;
464     case 'S':
465     element = 16;
466     symbol[0] = 'S';
467     break;
468     case 'P':
469     element = 15;
470     symbol[0] = 'P';
471     break;
472     case 'F':
473     element = 9;
474     symbol[0] = 'F';
475     break;
476     case 'I':
477     element = 53;
478     symbol[0] = 'I';
479     break;
480    
481     case 'B':
482     _ptr++;
483     if (*_ptr == 'r')
484     {
485     element = 35;
486     strcpy(symbol,"Br");
487     }
488     else
489     {
490     element = 5;
491     symbol[0] = 'B';
492     _ptr--;
493     }
494     break;
495     default:
496     return(false);
497     }
498     else
499     {
500     arom = true;
501     switch(*_ptr)
502     {
503     case 'c':
504     element = 6;
505     symbol[0] = 'C';
506     break;
507     case 'n':
508     element = 7;
509     symbol[0] = 'N';
510     break;
511     case 'o':
512     element = 8;
513     symbol[0] = 'O';
514     break;
515     case 'p':
516     element = 15;
517     symbol[0] = 'P';
518     break;
519     case 's':
520     element = 16;
521     symbol[0] = 'S';
522     break;
523     case '*':
524     element = 0;
525     strcpy(symbol,"Du");
526     break;
527     default:
528     return(false);
529     }
530     }
531    
532     OBAtom *atom = mol.NewAtom();
533     atom->SetAtomicNum(element);
534     atom->SetType(symbol);
535     if (arom)
536     {
537     atom->SetAromatic();
538     atom->SetSpinMultiplicity(2); // CM 18 Sept 2003
539     }
540    
541     if (_prev) //need to add bond
542     {
543     /* CM 18 Sept 2003
544     An extension to the SMILES format has been added so that lower case c,n,o can
545     represent a radical centre: CcC is isopropyl radical;
546     and cccc... a carbon chain bonded by conjugated double bonds.
547     Fails sometimes when using c as both aromatic and as the extened form.
548     For benzyl radical C6H5CH2. c1ccccc1c is ok; c1cc(c)ccc1 fails.
549     Radical centres should not be involved in ring closure:
550     for cyclohexyl radical C1cCCCC1 is ok, c1CCCCC1 is not.
551    
552     Implementation
553     Atoms c,n,o, etc initially added as a radical centre
554     unless _prev is a radical centre when both are made a normal atoms
555     connected by a double bond.
556     Since they are still marked as aromatic, FindAromaticBonds() will
557     replace the bonds by aromatic bonds if they are in a ring.
558     FindOrphanAromand removes the aromatic tag from the atoms not found in this way
559     and removes stray radical centres in .
560    
561     To avoid difficulties in complex aromatics with 5 membered rings containing N and O,
562     the above scheme modified to prevent recognition of aromatic structures is not confused.
563     - the double bond is made single if it would exceed valence of atom (not all aromatics have conjugated bonds)
564     - the radical centre is removed on both atoms when forming a ring (in ParseRingBond())
565     and on the new atom if the valence of the prev atom is being exceeded.
566     Note that the bond orders made here are reassigned in aromatic structures in FindAromaticBonds()
567     */
568     if(arom)
569     {
570     OBAtom* prevatom = mol.GetAtom(_prev);
571    
572     //Calculate available valency on prevatom
573     //This is far more difficult than it should be!
574     //Data not always updated during molecule constuction.
575     int val=0;
576     if(prevatom->IsCarbon())
577     val=4;
578     else if(prevatom->IsNitrogen())
579     val=3;
580     else if(prevatom->IsPhosphorus())
581     val=3;
582     else if(prevatom->IsOxygen())
583     val=2;
584     else if(prevatom->IsSulfur())
585     val=2;
586    
587     /* int sumBO=0;
588     vector<OBEdgeBase*>::iterator itr;
589     OBBond* bond = prevatom->BeginBond(itr);
590     while(bond)
591     {
592     sumBO +=bond->GetBO();
593     bond=prevatom->NextBond(itr);
594     }
595     if(prevatom->BOSum() != sumBO)
596     cerr << "BOSum != sumBO" << endl;
597     */
598     int AvailableValence = val + prevatom->GetFormalCharge() - prevatom->BOSum();//sumBO;
599    
600     if (prevatom->GetSpinMultiplicity())
601     {
602     prevatom->SetSpinMultiplicity(0);
603     atom->SetSpinMultiplicity(0);
604    
605     //Make the new bond double unless not allowed by prevatom's valence
606     _order = AvailableValence>=2 ? 2 : 1 ;
607     }
608     else
609     if(AvailableValence<1) //Must be complex aromatic with O, N
610     atom->SetSpinMultiplicity(0); //radical centres not appropriate in complex aromatics
611     }
612     // CM end
613     mol.AddBond(_prev,mol.NumAtoms(),_order,_bondflags);
614    
615     //NE iterate through and see if atom is bonded to chiral atom
616     map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
617     ChiralSearch=_mapcd.find(mol.GetAtom(_prev));
618     if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
619     {
620     (ChiralSearch->second)->AddAtomRef(mol.NumAtoms(), input);
621     // cout << "Line 650: Adding "<<mol.NumAtoms()<<" to "<<ChiralSearch->second<<endl;
622     }
623     }
624     //set values
625     _prev = mol.NumAtoms();
626     _order = 1;
627     _bondflags = 0;
628    
629     return(true);
630     }
631    
632     bool OBSmilesParser::ParseComplex(OBMol &mol)
633     {
634     char symbol[7];
635     int element=0;
636     int isotope=0;
637     int isoPtr=0;
638     bool arom=false;
639     memset(symbol,'\0',sizeof(char)*7);
640    
641     _ptr++;
642    
643     //grab isotope information
644     for (;*_ptr && isdigit(*_ptr);_ptr++)
645     {
646     symbol[isoPtr] = *_ptr;
647     isoPtr++;
648     }
649     isotope = atoi(symbol);
650    
651     //parse element data
652     if (isupper(*_ptr))
653     switch(*_ptr)
654     {
655     case 'C':
656     _ptr++;
657     switch(*_ptr)
658     {
659     case 'a':
660     element = 20;
661     strcpy(symbol,"Ca");
662     break;
663     case 'd':
664     element = 48;
665     strcpy(symbol,"Cd");
666     break;
667     case 'e':
668     element = 58;
669     strcpy(symbol,"Ce");
670     break;
671     case 'f':
672     element = 98;
673     strcpy(symbol,"Cf");
674     break;
675     case 'l':
676     element = 17;
677     strcpy(symbol,"Cl");
678     break;
679     case 'm':
680     element = 96;
681     strcpy(symbol,"Cm");
682     break;
683     case 'o':
684     element = 27;
685     strcpy(symbol,"Co");
686     break;
687     case 'r':
688     element = 24;
689     strcpy(symbol,"Cr");
690     break;
691     case 's':
692     element = 55;
693     strcpy(symbol,"Cs");
694     break;
695     case 'u':
696     element = 29;
697     strcpy(symbol,"Cu");
698     break;
699     default:
700     element = 6;
701     symbol[0] = 'C';
702     _ptr--;
703     }
704     break;
705    
706     case 'N':
707     _ptr++;
708     switch(*_ptr)
709     {
710     case 'a':
711     element = 11;
712     strcpy(symbol,"Na");
713     break;
714     case 'b':
715     element = 41;
716     strcpy(symbol,"Nb");
717     break;
718     case 'd':
719     element = 60;
720     strcpy(symbol,"Nd");
721     break;
722     case 'e':
723     element = 10;
724     strcpy(symbol,"Ne");
725     break;
726     case 'i':
727     element = 28;
728     strcpy(symbol,"Ni");
729     break;
730     case 'o':
731     element = 102;
732     strcpy(symbol,"No");
733     break;
734     case 'p':
735     element = 93;
736     strcpy(symbol,"Np");
737     break;
738     default:
739     element = 7;
740     symbol[0] = 'N';
741     _ptr--;
742     }
743     break;
744    
745     case('O'):
746     _ptr++;
747     if(*_ptr == 's')
748     {
749     element = 76;
750     strcpy(symbol,"Os");
751     }
752     else
753     {
754     element = 8;
755     symbol[0] = 'O';
756     _ptr--;
757     }
758     break;
759    
760     case 'P':
761     _ptr++;
762     switch(*_ptr)
763     {
764     case 'a':
765     element = 91;
766     strcpy(symbol,"Pa");
767     break;
768     case 'b':
769     element = 82;
770     strcpy(symbol,"Pb");
771     break;
772     case 'd':
773     element = 46;
774     strcpy(symbol,"Pd");
775     break;
776     case 'm':
777     element = 61;
778     strcpy(symbol,"Pm");
779     break;
780     case 'o':
781     element = 84;
782     strcpy(symbol,"Po");
783     break;
784     case 'r':
785     element = 59;
786     strcpy(symbol,"Pr");
787     break;
788     case 't':
789     element = 78;
790     strcpy(symbol,"Pt");
791     break;
792     case 'u':
793     element = 94;
794     strcpy(symbol,"Pu");
795     break;
796     default:
797     element = 15;
798     symbol[0] = 'P';
799     _ptr--;
800     }
801     break;
802    
803     case('S'):
804     _ptr++;
805     switch(*_ptr)
806     {
807     case 'b':
808     element = 51;
809     strcpy(symbol,"Sb");
810     break;
811     case 'c':
812     element = 21;
813     strcpy(symbol,"Sc");
814     break;
815     case 'e':
816     element = 34;
817     strcpy(symbol,"Se");
818     break;
819     case 'i':
820     element = 14;
821     strcpy(symbol,"Si");
822     break;
823     case 'm':
824     element = 62;
825     strcpy(symbol,"Sm");
826     break;
827     case 'n':
828     element = 50;
829     strcpy(symbol,"Sn");
830     break;
831     case 'r':
832     element = 38;
833     strcpy(symbol,"Sr");
834     break;
835     default:
836     element = 16;
837     symbol[0] = 'S';
838     _ptr--;
839     }
840     break;
841    
842     case 'B':
843     _ptr++;
844     switch(*_ptr)
845     {
846     case 'a':
847     element = 56;
848     strcpy(symbol,"Ba");
849     break;
850     case 'e':
851     element = 4;
852     strcpy(symbol,"Be");
853     break;
854     case 'i':
855     element = 83;
856     strcpy(symbol,"Bi");
857     break;
858     case 'k':
859     element = 97;
860     strcpy(symbol,"Bk");
861     break;
862     case 'r':
863     element = 35;
864     strcpy(symbol,"Br");
865     break;
866     default:
867     element = 5;
868     symbol[0] = 'B';
869     _ptr--;
870     }
871     break;
872    
873     case 'F':
874     _ptr++;
875     switch(*_ptr)
876     {
877     case 'e':
878     element = 26;
879     strcpy(symbol,"Fe");
880     break;
881     case 'm':
882     element = 100;
883     strcpy(symbol,"Fm");
884     break;
885     case 'r':
886     element = 87;
887     strcpy(symbol,"Fr");
888     break;
889     default:
890     element = 9;
891     symbol[0] = 'F';
892     _ptr--;
893     }
894     break;
895    
896     case 'I':
897     _ptr++;
898     switch(*_ptr)
899     {
900     case 'n':
901     element = 49;
902     strcpy(symbol,"In");
903     break;
904     case 'r':
905     element = 77;
906     strcpy(symbol,"Ir");
907     break;
908     default:
909     element = 53;
910     symbol[0] = 'I';
911     _ptr--;
912     }
913     break;
914    
915     case 'A':
916     _ptr++;
917     switch(*_ptr)
918     {
919     case 'c':
920     element = 89;
921     strcpy(symbol,"Ac");
922     break;
923     case 'g':
924     element = 47;
925     strcpy(symbol,"Ag");
926     break;
927     case 'l':
928     element = 13;
929     strcpy(symbol,"Al");
930     break;
931     case 'm':
932     element = 95;
933     strcpy(symbol,"Am");
934     break;
935     case 'r':
936     element = 18;
937     strcpy(symbol,"Ar");
938     break;
939     case 's':
940     element = 33;
941     strcpy(symbol,"As");
942     break;
943     case 't':
944     element = 85;
945     strcpy(symbol,"At");
946     break;
947     case 'u':
948     element = 79;
949     strcpy(symbol,"Au");
950     break;
951     default:
952     _ptr--;
953     return(false);
954     }
955     break;
956    
957     case 'D':
958     _ptr++;
959     if (*_ptr == 'y')
960     {
961     element = 66;
962     strcpy(symbol,"Dy");
963     }
964     else
965     {
966     _ptr--;
967     return(false);
968     }
969     break;
970    
971     case 'E':
972     _ptr++;
973     switch(*_ptr)
974     {
975     case 'r':
976     element = 68;
977     strcpy(symbol,"Er");
978     break;
979     case 's':
980     element = 99;
981     strcpy(symbol,"Es");
982     break;
983     case 'u':
984     element = 63;
985     strcpy(symbol,"Eu");
986     break;
987     default:
988     _ptr--;
989     return(false);
990     }
991     break;
992    
993     case 'G':
994     _ptr++;
995     switch (*_ptr)
996     {
997     case 'a':
998     element = 31;
999     strcpy(symbol,"Ga");
1000     break;
1001     case 'd':
1002     element = 64;
1003     strcpy(symbol,"Gd");
1004     break;
1005     case 'e':
1006     element = 32;
1007     strcpy(symbol,"Ge");
1008     break;
1009     default:
1010     _ptr--;
1011     return(false);
1012     }
1013     break;
1014    
1015     case 'H':
1016     _ptr++;
1017     switch (*_ptr)
1018     {
1019     case 'e':
1020     element = 2;
1021     strcpy(symbol,"He");
1022     break;
1023     case 'f':
1024     element = 72;
1025     strcpy(symbol,"Hf");
1026     break;
1027     case 'g':
1028     element = 80;
1029     strcpy(symbol,"Hg");
1030     break;
1031     case 'o':
1032     element = 67;
1033     strcpy(symbol,"Ho");
1034     break;
1035     default:
1036     element = 1;
1037     symbol[0] = 'H';
1038     _ptr--;
1039     }
1040     break;
1041    
1042     case 'K':
1043     _ptr++;
1044     if(*_ptr == 'r')
1045     {
1046     element = 36;
1047     strcpy(symbol,"Kr");
1048     }
1049     else
1050     {
1051     element = 19;
1052     symbol[0] = 'K';
1053     _ptr--;
1054     }
1055     break;
1056    
1057     case 'L':
1058     _ptr++;
1059     switch(*_ptr)
1060     {
1061     case 'a':
1062     element = 57;
1063     strcpy(symbol,"La");
1064     break;
1065     case 'i':
1066     element = 3;
1067     strcpy(symbol,"Li");
1068     break;
1069     case 'r':
1070     element = 103;
1071     strcpy(symbol,"Lr");
1072     break;
1073     case 'u':
1074     element = 71;
1075     strcpy(symbol,"Lu");
1076     break;
1077     default:
1078     _ptr--;
1079     return(false);
1080     }
1081     break;
1082    
1083     case 'M':
1084     _ptr++;
1085     switch(*_ptr)
1086     {
1087     case 'd':
1088     element = 101;
1089     strcpy(symbol,"Md");
1090     break;
1091     case 'g':
1092     element = 12;
1093     strcpy(symbol,"Mg");
1094     break;
1095     case 'n':
1096     element = 25;
1097     strcpy(symbol,"Mn");
1098     break;
1099     case 'o':
1100     element = 42;
1101     strcpy(symbol,"Mo");
1102     break;
1103     default:
1104     _ptr--;
1105     return(false);
1106     }
1107     break;
1108    
1109     case 'R':
1110     _ptr++;
1111     switch(*_ptr)
1112     {
1113     case 'a':
1114     element = 88;
1115     strcpy(symbol,"Ra");
1116     break;
1117     case 'b':
1118     element = 37;
1119     strcpy(symbol,"Rb");
1120     break;
1121     case 'e':
1122     element = 75;
1123     strcpy(symbol,"Re");
1124     break;
1125     case 'h':
1126     element = 45;
1127     strcpy(symbol,"Rh");
1128     break;
1129     case 'n':
1130     element = 86;
1131     strcpy(symbol,"Rn");
1132     break;
1133     case 'u':
1134     element = 44;
1135     strcpy(symbol,"Ru");
1136     break;
1137     default:
1138     _ptr--;
1139     return(false);
1140     }
1141     break;
1142    
1143     case 'T':
1144     _ptr++;
1145     switch(*_ptr)
1146     {
1147     case 'a':
1148     element = 73;
1149     strcpy(symbol,"Ta");
1150     break;
1151     case 'b':
1152     element = 65;
1153     strcpy(symbol,"Tb");
1154     break;
1155     case 'c':
1156     element = 43;
1157     strcpy(symbol,"Tc");
1158     break;
1159     case 'e':
1160     element = 52;
1161     strcpy(symbol,"Te");
1162     break;
1163     case 'h':
1164     element = 90;
1165     strcpy(symbol,"Th");
1166     break;
1167     case 'i':
1168     element = 22;
1169     strcpy(symbol,"Ti");
1170     break;
1171     case 'l':
1172     element = 81;
1173     strcpy(symbol,"Tl");
1174     break;
1175     case 'm':
1176     element = 69;
1177     strcpy(symbol,"Tm");
1178     break;
1179     default:
1180     _ptr--;
1181     return(false);
1182     }
1183     break;
1184    
1185     case('U'): element = 92;
1186     symbol[0] = 'U';
1187     break;
1188     case('V'): element = 23;
1189     symbol[0] = 'V';
1190     break;
1191     case('W'): element = 74;
1192     symbol[0] = 'W';
1193     break;
1194    
1195     case('X'):
1196     _ptr++;
1197     if (*_ptr == 'e')
1198     {
1199     element = 54;
1200     strcpy(symbol,"Xe");
1201     }
1202     else
1203     {
1204     _ptr--;
1205     return(false);
1206     }
1207     break;
1208    
1209     case('Y'):
1210     _ptr++;
1211     if (*_ptr == 'b')
1212     {
1213     element = 70;
1214     strcpy(symbol,"Yb");
1215     }
1216     else
1217     {
1218     element = 39;
1219     symbol[0] = 'Y';
1220     _ptr--;
1221     }
1222     break;
1223    
1224     case('Z'):
1225     _ptr++;
1226     switch(*_ptr)
1227     {
1228     case 'n':
1229     element = 30;
1230     strcpy(symbol,"Zn");
1231     break;
1232     case 'r':
1233     element = 40;
1234     strcpy(symbol,"Zr");
1235     break;
1236     default:
1237     _ptr--;
1238     return(false);
1239     }
1240     break;
1241     }
1242     else
1243     {
1244     arom = true;
1245     switch(*_ptr)
1246     {
1247     case 'c':
1248     element = 6;
1249     symbol[0] = 'C';
1250     break;
1251     case 'n':
1252     element = 7;
1253     symbol[0] = 'N';
1254     break;
1255     case 'o':
1256     element = 8;
1257     symbol[0] = 'O';
1258     break;
1259     case 'p':
1260     element = 15;
1261     symbol[0] = 'P';
1262     break;
1263     case 's':
1264     _ptr++;
1265     if (*_ptr == 'e')
1266     {
1267     element = 34;
1268     strcpy(symbol,"Se");
1269     }
1270     else
1271     {
1272     element = 16;
1273     symbol[0] = 'S';
1274     _ptr--;
1275     }
1276     break;
1277     case 'a':
1278     _ptr++;
1279     if (*_ptr == 's')
1280     {
1281     element = 33;
1282     strcpy(symbol,"As");
1283     }
1284     else
1285     return(false);
1286     break;
1287     default:
1288     return(false);
1289     }
1290     }
1291    
1292     //handle hydrogen count, stereochemistry, and charge
1293    
1294     OBAtom *atom = mol.NewAtom();
1295     int hcount = 0;
1296     int charge=0;
1297     int rad=0;
1298     char tmpc[2];
1299     tmpc[1] = '\0';
1300     for (_ptr++;*_ptr && *_ptr != ']';_ptr++)
1301     {
1302     switch(*_ptr)
1303     {
1304     case '@':
1305     _ptr++;
1306     chiralWatch=true;
1307     _mapcd[atom]= new OBChiralData;
1308     if (*_ptr == '@')
1309     atom->SetClockwiseStereo();
1310     else
1311     {
1312     atom->SetAntiClockwiseStereo();
1313     _ptr--;
1314     }
1315     break;
1316     case '-':
1317     _ptr++;
1318     if (isdigit(*_ptr))
1319     {
1320     tmpc[0] = *_ptr;
1321     charge = -atoi(tmpc);
1322     }
1323     else
1324     {
1325     charge--;
1326     _ptr--;
1327     }
1328     break;
1329     case '+':
1330     _ptr++;
1331     if (isdigit(*_ptr))
1332     {
1333     tmpc[0] = *_ptr;
1334     charge = atoi(tmpc);
1335     }
1336     else
1337     {
1338     charge++;
1339     _ptr--;
1340     }
1341     break;
1342    
1343     case 'H':
1344     _ptr++;
1345     if (isdigit(*_ptr))
1346     {
1347     tmpc[0] = *_ptr;
1348     hcount = atoi(tmpc);
1349     }
1350     else
1351     {
1352     hcount = 1;
1353     _ptr--;
1354     }
1355     break;
1356     case '.': //CM Feb05
1357     rad=2;
1358     if(*(++_ptr)=='.')
1359     rad=3;
1360     else
1361     _ptr--;
1362     break;
1363    
1364     default:
1365     return(false);
1366     }
1367     }
1368    
1369     if (charge)
1370     atom->SetFormalCharge(charge);
1371     if (rad)
1372     atom->SetSpinMultiplicity(rad);
1373     atom->SetAtomicNum(element);
1374     atom->SetIsotope(isotope);
1375     atom->SetType(symbol);
1376     if (arom)
1377     atom->SetAromatic();
1378    
1379     if (_prev) //need to add bond
1380     {
1381     mol.AddBond(_prev,mol.NumAtoms(),_order,_bondflags);
1382     if(chiralWatch) // if chiral atom need to add its previous into atom4ref
1383     {
1384     if (_mapcd[atom] == NULL)
1385     _mapcd[atom]= new OBChiralData;
1386    
1387     (_mapcd[atom])->AddAtomRef((unsigned int)_prev,input);
1388     // cout <<"line 1405: Added atom ref "<<_prev<<" to "<<_mapcd[atom]<<endl;
1389     }
1390     map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
1391     ChiralSearch = _mapcd.find(mol.GetAtom(_prev));
1392     if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
1393     {
1394     (ChiralSearch->second)->AddAtomRef(mol.NumAtoms(), input);
1395     // cout <<"line 1431: Added atom ref "<<mol.NumAtoms()<<" to "<<ChiralSearch->second<<endl;
1396     }
1397     }
1398    
1399     //set values
1400     _prev = mol.NumAtoms();
1401     _order = 1;
1402     _bondflags = 0;
1403    
1404     //now add hydrogens
1405    
1406     for (int i = 0;i < hcount;i++)
1407     {
1408     atom = mol.NewAtom();
1409     atom->SetAtomicNum(1);
1410     atom->SetType("H");
1411     mol.AddBond(_prev,mol.NumAtoms(),1);
1412     if(chiralWatch)
1413     {
1414     if (_mapcd[mol.GetAtom(_prev)] != NULL)
1415     (_mapcd[mol.GetAtom(_prev)])->AddAtomRef(mol.NumAtoms(),input);
1416     // cout << "line 1434: Added atom ref "<<mol.NumAtoms()<<" to "<<_mapcd[mol.GetAtom(_prev)]<<endl;
1417    
1418     }
1419     }
1420     chiralWatch=false;
1421     return(true);
1422     }
1423    
1424     bool OBSmilesParser::CapExternalBonds(OBMol &mol)
1425     {
1426    
1427     if(_extbond.empty())
1428     return(true);
1429    
1430     OBAtom *atom;
1431     vector<vector<int> >::iterator bond;
1432    
1433     for(bond = _extbond.begin();bond != _extbond.end();bond++)
1434     {
1435     // create new dummy atom
1436     atom = mol.NewAtom();
1437     atom->SetAtomicNum(0);
1438     atom->SetType("*");
1439    
1440     // bond dummy atom to mol via external bond
1441     mol.AddBond((*bond)[1],atom->GetIdx(),(*bond)[2],(*bond)[3]);
1442     OBBond *refbond = atom->GetBond(mol.GetAtom((*bond)[1]));
1443    
1444     //record external bond information
1445     OBExternalBondData *xbd;
1446     if(mol.HasData(OBGenericDataType::ExternalBondData))
1447     xbd = (OBExternalBondData*)mol.GetData(OBGenericDataType::ExternalBondData);
1448     else
1449     {
1450     xbd = new OBExternalBondData;
1451     mol.SetData(xbd);
1452     }
1453     xbd->SetData(atom,refbond,(*bond)[0]);
1454    
1455     /* old code written by AGS -- mts
1456     {
1457     externalbonds = (vector<pair<int,pair<OBAtom *,OBBond *> > > *)mol.GetData("extBonds");
1458     }
1459     else
1460     {
1461     externalbonds = new vector<pair<int,pair<OBAtom *,OBBond *> > >;
1462     }
1463    
1464     //save data <external bond count, bond index>
1465     externalbonds->push_back(pair<int,pair<OBAtom *,OBBond *> > ((*bond)[0], pair<OBAtom *,OBBond *> (atom,mol.GetBond((*bond)[1],atom->GetIdx()))));
1466     mol.SetData("extBonds",externalbonds);
1467     */
1468    
1469     //this data gets cleaned up in mol.Clear.
1470     }
1471    
1472     return(true);
1473     }
1474    
1475     bool OBSmilesParser::ParseExternalBond(OBMol &mol)
1476     {
1477     int digit;
1478     char str[10];
1479    
1480     //*_ptr should == '&'
1481     _ptr++;
1482    
1483     switch (*_ptr) // check for bond order indicators CC&=1.C&1
1484     {
1485     case '-':
1486     _order = 1;
1487     _ptr++;
1488     break;
1489     case '=':
1490     _order = 2;
1491     _ptr++;
1492     break;
1493     case '#':
1494     _order = 3;
1495     _ptr++;
1496     break;
1497     case ';':
1498     _order = 5;
1499     _ptr++;
1500     break;
1501     case '/': //chiral, but _order still == 1
1502     _bondflags |= OB_TORDOWN_BOND;
1503     _ptr++;
1504     break;
1505     _ptr++;
1506     case '\\': // chiral, but _order still == 1
1507     _bondflags |= OB_TORUP_BOND;
1508     _ptr++;
1509     break;
1510     default: // no bond indicator just leave order = 1
1511     break;
1512     }
1513    
1514     if (*_ptr == '%') // external bond indicator > 10
1515     {
1516     _ptr++;
1517     str[0] = *_ptr;
1518     _ptr++;
1519     str[1] = *_ptr;
1520     str[2] = '\0';
1521     }
1522     else // simple single digit external bond indicator
1523     {
1524     str[0] = *_ptr;
1525     str[1] = '\0';
1526     }
1527     digit = atoi(str); // convert indicator to digit
1528    
1529     //check for dot disconnect closures
1530     vector<vector<int> >::iterator j;
1531     int bondFlags,bondOrder;
1532     for(j = _extbond.begin();j != _extbond.end();j++)
1533     {
1534     if((*j)[0] == digit)
1535     {
1536     bondFlags = (_bondflags > (*j)[3]) ? _bondflags : (*j)[3];
1537     bondOrder = (_order > (*j)[2]) ? _order : (*j)[2];
1538     mol.AddBond((*j)[1],_prev,bondOrder,bondFlags);
1539    
1540     // after adding a bond to atom "_prev"
1541     // search to see if atom is bonded to a chiral atom
1542     map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
1543     ChiralSearch = _mapcd.find(mol.GetAtom(_prev));
1544     if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
1545     {
1546     (ChiralSearch->second)->AddAtomRef((*j)[1], input);
1547     // cout << "Added external "<<(*j)[1]<<" to "<<ChiralSearch->second<<endl;
1548     }
1549    
1550     _extbond.erase(j);
1551     _bondflags = 0;
1552     _order = 0;
1553     return(true);
1554     }
1555     }
1556    
1557     //since no closures save another ext bond
1558     vector<int> vtmp(4);
1559     vtmp[0] = digit;
1560     vtmp[1] = _prev;
1561     vtmp[2] = _order;
1562     vtmp[3] = _bondflags;
1563    
1564     _extbond.push_back(vtmp);
1565     _order = 1;
1566     _bondflags = 0;
1567    
1568     return(true);
1569    
1570     }
1571    
1572     bool OBSmilesParser::ParseRingBond(OBMol &mol)
1573     {
1574     int digit;
1575     char str[10];
1576    
1577     if (*_ptr == '%')
1578     {
1579     _ptr++;
1580     str[0] = *_ptr;
1581     _ptr++;
1582     str[1] = *_ptr;
1583     str[2] = '\0';
1584     }
1585     else
1586     {
1587     str[0] = *_ptr;
1588     str[1] = '\0';
1589     }
1590     digit = atoi(str);
1591    
1592     int bf,ord;
1593     vector<vector<int> >::iterator j;
1594     for (j = _rclose.begin();j != _rclose.end();j++)
1595     if ((*j)[0] == digit)
1596     {
1597     bf = (_bondflags > (*j)[3]) ? _bondflags : (*j)[3];
1598     ord = (_order > (*j)[2]) ? _order : (*j)[2];
1599     mol.AddBond((*j)[1],_prev,ord,bf,(*j)[4]);
1600    
1601     // after adding a bond to atom "_prev"
1602     // search to see if atom is bonded to a chiral atom
1603     // need to check both _prev and (*j)[1] as closure is direction independant
1604     map<OBAtom*,OBChiralData*>::iterator ChiralSearch,cs2;
1605     ChiralSearch = _mapcd.find(mol.GetAtom(_prev));
1606     cs2=_mapcd.find(mol.GetAtom((*j)[1]));
1607     if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
1608     {
1609     (ChiralSearch->second)->AddAtomRef((*j)[1], input);
1610     //cout << "Added ring closure "<<(*j)[1]<<" to "<<ChiralSearch->second<<endl;
1611     }
1612     if (cs2!=_mapcd.end() && cs2->second != NULL)
1613     {
1614     (cs2->second)->AddAtomRef(_prev,input);
1615     // cout <<"Added ring opening "<<_prev<<" to "<<cs2->second<<endl;
1616     }
1617    
1618     //CM ensure neither atoms in ring closure is a radical centre
1619     OBAtom* patom = mol.GetAtom(_prev);
1620     patom->SetSpinMultiplicity(0);
1621     patom = mol.GetAtom((*j)[1]);
1622     patom->SetSpinMultiplicity(0);
1623     //CM end
1624     _rclose.erase(j);
1625     _bondflags = 0;
1626     _order = 1;
1627     return(true);
1628     }
1629    
1630     vector<int> vtmp(5);
1631     vtmp[0] = digit;
1632     vtmp[1] = _prev;
1633     vtmp[2] = _order;
1634     vtmp[3] = _bondflags;
1635     vtmp[4] = mol.GetAtom(_prev)->GetValence(); //store position to insert closure bond
1636     for (j = _rclose.begin();j != _rclose.end();j++) //correct for multiple closure bonds to a single atom
1637     if ((*j)[1] == _prev)
1638     vtmp[4]++;
1639    
1640     _rclose.push_back(vtmp);
1641     _order = 1;
1642     _bondflags = 0;
1643    
1644     return(true);
1645     }
1646    
1647    
1648     void OBMol2Smi::CreateSmiString(OBMol &mol,char *buffer)
1649     {
1650     OBAtom *atom;
1651     OBSmiNode *root =NULL;
1652     buffer[0] = '\0';
1653     vector<OBNodeBase*>::iterator i;
1654    
1655     for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
1656     // if ((!atom->IsHydrogen() || atom->GetValence() == 0) && !_uatoms[atom->GetIdx()])
1657     if (!atom->IsHydrogen() && !_uatoms[atom->GetIdx()])
1658     if (!atom->IsChiral()) //don't use chiral atoms as root node
1659     {
1660     //clear out closures in case structure is dot disconnected
1661     _vclose.clear();
1662     _atmorder.clear();
1663     _storder.clear();
1664     _vopen.clear();
1665     //dot disconnected structure
1666     if (strlen(buffer) > 0)
1667     strcat(buffer,".");
1668     root = new OBSmiNode (atom);
1669     BuildTree(root);
1670     FindClosureBonds(mol);
1671     if (mol.Has2D())
1672     AssignCisTrans(root);
1673     ToSmilesString(root,buffer);
1674     delete root;
1675     }
1676    
1677     //If no starting node found e.g. [H][H] CM 21Mar05
1678     if(root==NULL)
1679     {
1680     root = new OBSmiNode(mol.GetFirstAtom());
1681     BuildTree(root);
1682     ToSmilesString(root,buffer);
1683     delete root;
1684     }
1685     }
1686    
1687     bool OBMol2Smi::BuildTree(OBSmiNode *node)
1688     {
1689     vector<OBEdgeBase*>::iterator i;
1690     OBAtom *nbr,*atom = node->GetAtom();
1691    
1692     _uatoms.SetBitOn(atom->GetIdx()); //mark the atom as visited
1693     _atmorder.push_back(atom->GetIdx()); //store the atom ordering
1694     _storder.push_back(atom->GetIdx()); //store the atom ordering for stereo
1695    
1696     for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
1697     if ((!nbr->IsHydrogen()||nbr->GetIsotope()||atom->IsHydrogen()) //so D,T is explicit and H2 works!CM 21Mar05
1698     && !_uatoms[nbr->GetIdx()])
1699     {
1700     _ubonds.SetBitOn((*i)->GetIdx());
1701     OBSmiNode *next = new OBSmiNode (nbr);
1702     next->SetParent(atom);
1703     node->SetNextNode(next,(OBBond*)*i);
1704     BuildTree(next);
1705     }
1706    
1707     return(true);
1708     }
1709    
1710     void OBMol2Smi::ToSmilesString(OBSmiNode *node,char *buffer)
1711     {
1712     char tmpbuf[10];
1713     OBAtom *atom = node->GetAtom();
1714    
1715     //write the current atom to the string
1716     GetSmilesElement(node,tmpbuf);
1717     strcat(buffer,tmpbuf);
1718    
1719     //handle ring closures here
1720     vector<pair<int,OBBond*> > vc = GetClosureDigits(atom);
1721     if (!vc.empty())
1722     {
1723     vector<pair<int,OBBond*> >::iterator i;
1724     for (i = vc.begin();i != vc.end();i++)
1725     {
1726     if (i->second)
1727     {
1728     if (i->second->IsUp())
1729     {
1730     if ( (i->second->GetBeginAtom())->HasDoubleBond() ||
1731     (i->second->GetEndAtom())->HasDoubleBond() )
1732     strcat(buffer,"\\");
1733     }
1734     if (i->second->IsDown())
1735     {
1736     if ( (i->second->GetBeginAtom())->HasDoubleBond() ||
1737     (i->second->GetEndAtom())->HasDoubleBond() )
1738     strcat(buffer,"/");
1739     }
1740     #ifndef KEKULE
1741    
1742     if (i->second->GetBO() == 2 && !i->second->IsAromatic())
1743     strcat(buffer,"=");
1744     #else
1745    
1746     if (i->second->GetBO() == 2)
1747     strcat(buffer,"=");
1748     #endif
1749    
1750     if (i->second->GetBO() == 3)
1751     strcat(buffer,"#");
1752     }
1753    
1754     if (i->first > 9)
1755     strcat(buffer,"%");
1756     sprintf(tmpbuf,"%d",i->first);
1757     strcat(buffer,tmpbuf);
1758     }
1759     }
1760    
1761     //follow path to child atoms
1762     OBBond *bond;
1763     for (int i = 0;i < node->Size();i++)
1764     {
1765     bond = node->GetNextBond(i);
1766     if (i+1 < node->Size())
1767     strcat(buffer,"(");
1768     if (bond->IsUp())
1769     {
1770     if ( (bond->GetBeginAtom())->HasDoubleBond() ||
1771     (bond->GetEndAtom())->HasDoubleBond() )
1772     strcat(buffer,"\\");
1773     }
1774     if (bond->IsDown())
1775     {
1776     if ( (bond->GetBeginAtom())->HasDoubleBond() ||
1777     (bond->GetEndAtom())->HasDoubleBond() )
1778     strcat(buffer,"/");
1779     }
1780     #ifndef KEKULE
1781    
1782     if (bond->GetBO() == 2 && !bond->IsAromatic())
1783     strcat(buffer,"=");
1784     #else
1785    
1786     if (bond->GetBO() == 2)
1787     strcat(buffer,"=");
1788     #endif
1789    
1790     if (bond->GetBO() == 3)
1791     strcat(buffer,"#");
1792    
1793     ToSmilesString(node->GetNextNode(i),buffer);
1794     if (i+1 < node->Size())
1795     strcat(buffer,")");
1796     }
1797     }
1798    
1799     void OBMol2Smi::GetClosureAtoms(OBAtom *atom,vector<OBNodeBase*> &va)
1800     {
1801    
1802     //look through closure list for start atom
1803     vector<OBEdgeBase*>::iterator i;
1804     for (i = _vclose.begin();i != _vclose.end();i++)
1805     if (*i)
1806     {
1807     if (((OBBond*)*i)->GetBeginAtom() == atom)
1808     va.push_back(((OBBond*)*i)->GetEndAtom());
1809     if (((OBBond*)*i)->GetEndAtom() == atom)
1810     va.push_back(((OBBond*)*i)->GetBeginAtom());
1811     }
1812    
1813     OBAtom *nbr;
1814     vector<pair<OBAtom*,pair<int,int> > >::iterator j;
1815     for (j = _vopen.begin();j != _vopen.end();j++)
1816     for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
1817     if (nbr == j->first)
1818     va.push_back(nbr);
1819     }
1820    
1821     vector<pair<int,OBBond*> > OBMol2Smi::GetClosureDigits(OBAtom *atom)
1822     {
1823     vector<pair<int,OBBond*> > vc;
1824     vc.clear();
1825    
1826     //look through closure list for start atom
1827     int idx,bo;
1828     OBBond *bond;
1829     vector<OBEdgeBase*>::iterator i;
1830     for (i = _vclose.begin();i != _vclose.end();i++)
1831     if ((bond=(OBBond*)*i))
1832     if (bond->GetBeginAtom() == atom || bond->GetEndAtom() == atom)
1833     {
1834     idx = GetUnusedIndex();
1835     vc.push_back(pair<int,OBBond*> (idx,bond));
1836     bo = (bond->IsAromatic())? 1 : bond->GetBO();
1837     _vopen.push_back(pair<OBAtom*,pair<int,int> >
1838     (bond->GetNbrAtom(atom),pair<int,int>(idx,bo)));
1839     *i = NULL;//remove bond from closure list
1840     }
1841    
1842     //try to complete closures
1843     if (!_vopen.empty())
1844     {
1845     vector<pair<OBAtom*,pair<int,int> > >::iterator j;
1846     for (j = _vopen.begin();j != _vopen.end();)
1847     if (j->first == atom)
1848     {
1849     vc.push_back(pair<int,OBBond*> (j->second.first,(OBBond*)NULL));
1850     _vopen.erase(j);
1851     j = _vopen.begin();
1852     }
1853     else
1854     j++;
1855     }
1856    
1857     return(vc);
1858     }
1859    
1860     void OBMol2Smi::FindClosureBonds(OBMol &mol)
1861     {
1862     //find closure bonds
1863     OBAtom *a1,*a2;
1864     OBBond *bond;
1865     vector<OBEdgeBase*>::iterator i;
1866     OBBitVec bv;
1867     bv.FromVecInt(_storder);
1868    
1869     for (bond = mol.BeginBond(i);bond;bond = mol.NextBond(i))
1870     if (!_ubonds[bond->GetIdx()] && bv[bond->GetBeginAtomIdx()])
1871     {
1872     a1 = bond->GetBeginAtom();
1873     a2 = bond->GetEndAtom();
1874     if (!a1->IsHydrogen() && !a2->IsHydrogen())
1875     _vclose.push_back(bond);
1876     }
1877    
1878     vector<OBEdgeBase*>::reverse_iterator j;
1879     vector<int>::iterator k;
1880    
1881     //modify _order to reflect ring closures
1882     for (j = _vclose.rbegin();j != _vclose.rend();j++)
1883     {
1884     bond = (OBBond*)*j;
1885     a1 = a2 = NULL;
1886    
1887     for (k = _storder.begin();k != _storder.end();k++)
1888     if (bond->GetBeginAtomIdx() == static_cast<unsigned int>(*k) ||
1889     bond->
1890     GetEndAtomIdx() == static_cast<unsigned int>(*k))
1891     if (!a1) a1 = mol.GetAtom(*k);
1892     else if (!a2)
1893     {
1894     a2 = mol.GetAtom(*k)
1895     ;
1896     _storder.erase(k);
1897     break;
1898     }
1899    
1900     for (k = _storder.begin()
1901     ;
1902     k != _storder.end();
1903     k++)
1904     if (a1->GetIdx()
1905     == static_cast<unsigned int>(*k))
1906     {
1907     k++;
1908     if (k != _storder.end())
1909     _storder.insert(k,a2->GetIdx());
1910     else
1911     _storder.push_back(a2->GetIdx());
1912     break;
1913     }
1914     }
1915     }
1916    
1917     int OBMol2Smi::GetUnusedIndex()
1918     {
1919     int idx=1;
1920    
1921     vector<pair<OBAtom*,pair<int,int> > >::iterator j;
1922     for (j = _vopen.begin();j != _vopen.end();)
1923     if (j->second.first == idx)
1924     {
1925     idx++; //increment idx and start over if digit is already used
1926     j = _vopen.begin();
1927     }
1928     else
1929     j++;
1930    
1931     return(idx);
1932     }
1933    
1934     void OBMol2Smi::CorrectAromaticAmineCharge(OBMol &mol)
1935     {
1936     OBAtom *atom;
1937     vector<OBNodeBase*>::iterator i;
1938    
1939     _aromNH.clear();
1940     _aromNH.resize(mol.NumAtoms()+1);
1941    
1942     for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
1943     if (atom->IsNitrogen() && atom->IsAromatic())
1944     if (atom->GetHvyValence() == 2)
1945     {
1946     if (atom->GetValence() == 3 || atom->GetImplicitValence() == 3)
1947     _aromNH[atom->GetIdx()] = true;
1948     }
1949     }
1950    
1951     void OBMol2Smi::AssignCisTrans(OBSmiNode *node)
1952     {
1953     //traverse the tree searching for acyclic olefins - if it
1954     //has at least one heavy atom attachment on each end assign stereochem
1955    
1956     OBBond *bond;
1957     for (int i = 0;i < node->Size();i++)
1958     {
1959     bond = node->GetNextBond(i);
1960     if (bond->GetBO() == 2 && !bond->IsInRing())
1961     {
1962     OBAtom *b = node->GetAtom();
1963     OBAtom *c = bond->GetNbrAtom(b);
1964    
1965     //skip allenes
1966     if (b->GetHyb() == 1 || c->GetHyb() == 1)
1967     continue;
1968    
1969     if (b->GetHvyValence() > 1 && c->GetHvyValence() > 1)
1970     {
1971     OBAtom *a,*d;
1972     vector<OBEdgeBase*>::iterator j,k;
1973    
1974     //look for bond with assigned stereo as in poly-ene
1975     for (a = b->BeginNbrAtom(j);a;a = b->NextNbrAtom(j))
1976     if (((OBBond*)*j)->IsUp() ||((OBBond*)*j)->IsDown())
1977     break;
1978    
1979     if (!a)
1980     for (a = b->BeginNbrAtom(j);a;a = b->NextNbrAtom(j))
1981     if (a != c && !a->IsHydrogen())
1982     break;
1983     for (d = c->BeginNbrAtom(k);d;d = c->NextNbrAtom(k))
1984     if (d != b && !d->IsHydrogen())
1985     break;
1986     // obAssert(a);
1987     // obAssert(d);
1988    
1989     if (((OBBond*)*j)->IsUp() || ((OBBond*)*j)->IsDown()) //stereo already assigned
1990     {
1991     if (fabs(CalcTorsionAngle(a->GetVector(),b->GetVector(),
1992     c->GetVector(),d->GetVector())) > 10.0)
1993     if (((OBBond*)*j)->IsUp())
1994     ((OBBond*)*k)->SetUp();
1995     else
1996     ((OBBond*)*k)->SetDown();
1997     else
1998     if (((OBBond*)*j)->IsUp())
1999     ((OBBond*)*k)->SetDown();
2000     else
2001     ((OBBond*)*k)->SetUp();
2002     }
2003     else //assign stereo to both ends
2004     {
2005     ((OBBond*)*j)->SetUp();
2006     if (fabs(CalcTorsionAngle(a->GetVector(),b->GetVector(),
2007     c->GetVector(),d->GetVector())) > 10.0)
2008     ((OBBond*)*k)->SetUp();
2009     else
2010     ((OBBond*)*k)->SetDown();
2011     }
2012     }
2013     }
2014     AssignCisTrans(node->GetNextNode(i));
2015     }
2016     }
2017    
2018     void OBMol2Smi::Init(OBConversion* pconv)
2019     {
2020     _vclose.clear();
2021     _atmorder.clear();
2022     _storder.clear();
2023     _aromNH.clear();
2024     _uatoms.Clear();
2025     _ubonds.Clear();
2026     _vopen.clear();
2027     _pconv = pconv;
2028     }
2029    
2030     bool OBMol2Smi::GetSmilesElement(OBSmiNode *node,char *element)
2031     {
2032     //***handle reference atom stuff here and return***
2033     char symbol[10];
2034     bool bracketElement = false;
2035     bool normalValence = true;
2036    
2037     OBAtom *atom = node->GetAtom();
2038    
2039     int bosum = atom->KBOSum();
2040     atom->BOSum(); //CM temp
2041     switch (atom->GetAtomicNum())
2042     {
2043     case 0:
2044     break;
2045     case 5: /*bracketElement = !(normalValence = (bosum == 3)); break;*/
2046     break;
2047     case 6:
2048     break;
2049     case 7:
2050     if (atom->IsAromatic() && atom->GetHvyValence() == 2 && atom->GetImplicitValence() == 3)
2051     {
2052 gezelter 2450 normalValence = false;
2053     bracketElement = !(normalValence);
2054 tim 2445 break;
2055     }
2056     else
2057     bracketElement = !(normalValence = (bosum == 3 || bosum == 5));
2058     break;
2059     case 8:
2060     break;
2061     case 9:
2062     break;
2063     case 15:
2064     break;
2065     case 16:
2066     bracketElement = !(normalValence = (bosum == 2 || bosum == 4 || bosum == 6));
2067     break;
2068     case 17:
2069     break;
2070     case 35:
2071     break;
2072     case 53:
2073     break;
2074    
2075     default:
2076     bracketElement = true;
2077     }
2078    
2079     if (atom->GetHvyValence() > 2 && atom->IsChiral())
2080     if (((OBMol*)atom->GetParent())->HasNonZeroCoords() || atom->HasChiralitySpecified())
2081     bracketElement = true;
2082    
2083     if (atom->GetFormalCharge() != 0) //bracket charged elements
2084     bracketElement = true;
2085    
2086     if(atom->GetIsotope()) //CM 19Mar05
2087     bracketElement = true;
2088    
2089     //CM begin 18 Sept 2003
2090    
2091     //This outputs form [CH3][CH3] rather than CC if -h option has been specified
2092     if (((OBMol*)atom->GetParent())->HasHydrogensAdded())
2093     bracketElement = true;
2094     else
2095     {
2096     if (atom->GetSpinMultiplicity())
2097     {
2098     //For radicals output bracket form anyway unless r option specified
2099     if(!(_pconv && _pconv->IsOption ("r")))
2100     bracketElement = true;
2101     }
2102     }
2103     //CM end
2104    
2105     if (!bracketElement)
2106     {
2107     if (!atom->GetAtomicNum())
2108     {
2109     bool external = false;
2110     vector<pair<int,pair<OBAtom *,OBBond *> > > *externalBonds =
2111     (vector<pair<int,pair<OBAtom *,OBBond *> > > *)((OBMol*)atom->GetParent())->GetData("extBonds");
2112     vector<pair<int,pair<OBAtom *,OBBond *> > >::iterator externalBond;
2113    
2114     if (externalBonds)
2115     for(externalBond = externalBonds->begin();externalBond != externalBonds->end();externalBond++)
2116     {
2117     if (externalBond->second.first == atom)
2118     {
2119     external = true;
2120     strcpy(symbol,"&");
2121     OBBond *bond = externalBond->second.second;
2122     if (bond->IsUp())
2123     {
2124     if ( (bond->GetBeginAtom())->HasDoubleBond() ||
2125     (bond->GetEndAtom())->HasDoubleBond() )
2126     strcat(symbol,"\\");
2127     }
2128     if (bond->IsDown())
2129     {
2130     if ( (bond->GetBeginAtom())->HasDoubleBond() ||
2131     (bond->GetEndAtom())->HasDoubleBond() )
2132     strcat(symbol,"/");
2133     }
2134     #ifndef KEKULE
2135    
2136     if (bond->GetBO() == 2 && !bond->IsAromatic())
2137     strcat(symbol,"=");
2138     if (bond->GetBO() == 2 && bond->IsAromatic())
2139     strcat(symbol,";");
2140     #else
2141    
2142     if (bond->GetBO() == 2)
2143     strcat(symbol,"=");
2144     #endif
2145    
2146     if (bond->GetBO() == 3)
2147     strcat(symbol,"#");
2148     sprintf(symbol,"%s%d",symbol,externalBond->first);
2149     break;
2150     }
2151     }
2152    
2153     if(!external)
2154     strcpy(symbol,"*");
2155     }
2156     else
2157     {
2158     strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum()));
2159     #ifndef KEKULE
2160    
2161     if (atom->IsAromatic())
2162     symbol[0] = tolower(symbol[0]);
2163     #endif
2164    
2165     //Radical centres lc if r option set
2166     if(atom->GetSpinMultiplicity() && _pconv && _pconv->IsOption ("r"))
2167     symbol[0] = tolower(symbol[0]);
2168     }
2169     strcpy(element,symbol);
2170    
2171     return(true);
2172     }
2173    
2174     strcpy(element,"[");
2175     if(atom->GetIsotope()) //CM 19Mar05
2176     {
2177     char iso[4];
2178     sprintf(iso,"%d",atom->GetIsotope());
2179     strcat(element,iso);
2180     }
2181     if (!atom->GetAtomicNum())
2182     strcpy(symbol,"*");
2183     else
2184     {
2185     strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum()));
2186     #ifndef KEKULE
2187    
2188     if (atom->IsAromatic())
2189     symbol[0] = tolower(symbol[0]);
2190     #endif
2191    
2192     }
2193     strcat(element,symbol);
2194    
2195     //if (atom->IsCarbon() && atom->GetHvyValence() > 2 && atom->IsChiral())
2196     if (atom->GetHvyValence() > 2 && atom->IsChiral())
2197     {
2198     char stereo[5];
2199     if (GetChiralStereo(node,stereo))
2200     strcat(element,stereo);
2201     }
2202    
2203     //add extra hydrogens
2204     // if (!normalValence && atom->ImplicitHydrogenCount())
2205     if (atom->ImplicitHydrogenCount() && !atom->IsHydrogen()) //CM 21Mar05
2206     {
2207     strcat(element,"H");
2208     if (atom->ImplicitHydrogenCount() > 1)
2209     {
2210     char tcount[10];
2211     sprintf(tcount,"%d",atom->ImplicitHydrogenCount());
2212     strcat(element,tcount);
2213     }
2214     }
2215    
2216     //cat charge on the end
2217     if (atom->GetFormalCharge() != 0)
2218     {
2219    
2220     /*
2221     if (atom->ImplicitHydrogenCount())
2222     {
2223     cerr << "imp = " << atom->GetAtomicNum() << ' ' << atom->GetImplicitValence() << endl;
2224     strcat(element,"H");
2225     if (atom->ImplicitHydrogenCount() > 1)
2226     {
2227     char tcount[10];
2228     sprintf(tcount,"%d",atom->ImplicitHydrogenCount());
2229     strcat(element,tcount);
2230     }
2231     }
2232     */
2233     if (atom->GetFormalCharge() > 0)
2234     strcat(element,"+");
2235     else
2236     strcat(element,"-");
2237    
2238     if (abs(atom->GetFormalCharge()) > 1)
2239     {
2240     char tcharge[10];
2241     sprintf(tcharge,"%d",abs(atom->GetFormalCharge()));
2242     strcat(element,tcharge);
2243     }
2244     }
2245    
2246     strcat(element,"]");
2247    
2248     return(true);
2249     }
2250    
2251     bool OBMol2Smi::GetChiralStereo(OBSmiNode *node,char *stereo)
2252     {
2253     bool is2D=false;
2254     double torsion;
2255     OBAtom *a,*b,*c,*d,hydrogen;
2256    
2257     b = node->GetAtom();
2258     OBMol *mol = (OBMol*)b->GetParent();
2259    
2260     if (!mol->HasNonZeroCoords()) //must have come in from smiles string
2261     {
2262     if (!b->HasChiralitySpecified())
2263     return(false);
2264     if (b->IsClockwise())
2265     strcpy(stereo,"@@");
2266     else if (b->IsAntiClockwise())
2267     strcpy(stereo,"@");
2268     else
2269     return(false);
2270     //if (b->GetHvyValence() == 3) strcat(stereo,"H");
2271     return(true);
2272     }
2273    
2274     //give peudo Z coords if mol is 2D
2275     if (!mol->Has3D())
2276     {
2277     vector3 v,vz(0.0,0.0,1.0);
2278     is2D = true;
2279     OBAtom *nbr;
2280     OBBond *bond;
2281     vector<OBEdgeBase*>::iterator i;
2282     for (bond = b->BeginBond(i);bond;bond = b->NextBond(i))
2283     {
2284     nbr = bond->GetEndAtom();
2285     if (nbr != b)
2286     {
2287     v = nbr->GetVector();
2288     if (bond->IsWedge())
2289     v += vz;
2290     else
2291     if (bond->IsHash())
2292     v -= vz;
2293    
2294     nbr->SetVector(v);
2295     }
2296     else
2297     {
2298     nbr = bond->GetBeginAtom();
2299     v = nbr->GetVector();
2300     if (bond->IsWedge())
2301     v -= vz;
2302     else
2303     if (bond->IsHash())
2304     v += vz;
2305    
2306     nbr->SetVector(v);
2307     }
2308     }
2309     }
2310    
2311     c = d = NULL;
2312     a = node->GetParent();
2313     // obAssert(a); //chiral atom can't be used as root node - must have parent
2314    
2315     if (b->GetHvyValence() == 3) //must have attached hydrogen
2316     {
2317     if (b->GetValence() == 4)//has explicit hydrogen
2318     {
2319     vector<OBEdgeBase*>::iterator i;
2320     for (c = b->BeginNbrAtom(i);c;c = b->NextNbrAtom(i))
2321     if (c->IsHydrogen())
2322     break;
2323     // obAssert(c);
2324     }
2325     else //implicit hydrogen
2326     {
2327     vector3 v;
2328     b->GetNewBondVector(v,1.0);
2329     hydrogen.SetVector(v);
2330     c = &hydrogen;
2331     }
2332     }
2333    
2334     //get connected atoms in order
2335     OBAtom *nbr;
2336     vector<int>::iterator j;
2337    
2338     //try to get neighbors that are closure atoms in the order they appear in the string
2339     vector<OBNodeBase*> va;
2340     GetClosureAtoms(b,va);
2341     if (!va.empty())
2342     {
2343     vector<OBNodeBase*>::iterator k;
2344     for (k = va.begin();k != va.end();k++)
2345     if (*k != a)
2346     {
2347     if (!c)
2348     c = (OBAtom*)*k;
2349     else if (!d)
2350     d = (OBAtom*)*k;
2351     }
2352     }
2353    
2354     for (j = _storder.begin();j != _storder.end();j++)
2355     {
2356     nbr = mol->GetAtom(*j);
2357     if (!b->IsConnected(nbr))
2358     continue;
2359     if (nbr == a || nbr == b || nbr == c)
2360     continue;
2361     if (!c)
2362     c = nbr;
2363     else if (!d)
2364     d = nbr;
2365     }
2366    
2367     torsion = CalcTorsionAngle(a->GetVector(),b->GetVector(),
2368     c->GetVector(),d->GetVector());
2369    
2370     strcpy(stereo,(torsion<0.0)?"@":"@@");
2371     //if (b->GetHvyValence() == 3) strcat(stereo,"H");
2372    
2373     //re-zero psuedo-coords
2374     if (is2D)
2375     {
2376     vector3 v;
2377     OBAtom *atom;
2378     vector<OBNodeBase*>::iterator k;
2379     for (atom = mol->BeginAtom(k);atom;atom = mol->NextAtom(k))
2380     {
2381     v = atom->GetVector();
2382     v.SetZ(0.0);
2383     atom->SetVector(v);
2384     }
2385     }
2386    
2387     return(true);
2388     }
2389     //********************************************************
2390     class FIXFormat : public OBFormat
2391     {
2392     public:
2393     //Register this format type ID
2394     FIXFormat()
2395     {
2396     OBConversion::RegisterFormat("fix",this);
2397     }
2398    
2399     virtual const char* Description() //required
2400     {
2401     return
2402     "SMILES FIX format\n \
2403     No comments yet\n \
2404     ";
2405     };
2406    
2407     virtual const char* SpecificationURL(){return
2408     "";}; //optional
2409    
2410     //Flags() can return be any the following combined by | or be omitted if none apply
2411     // NOTREADABLE READONEONLY NOTWRITABLE WRITEONEONLY
2412     virtual unsigned int Flags()
2413     {
2414     return NOTREADABLE;
2415     };
2416    
2417     ////////////////////////////////////////////////////
2418     /// The "API" interface functions
2419     virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
2420    
2421     ////////////////////////////////////////////////////
2422     /// The "Convert" interface functions
2423     virtual bool WriteChemObject(OBConversion* pConv)
2424     {
2425     //Retrieve the target OBMol
2426     OBBase* pOb = pConv->GetChemObject();
2427     OBMol* pmol = dynamic_cast<OBMol*> (pOb);
2428     bool ret=false;
2429     if(pmol)
2430     ret=WriteMolecule(pmol,pConv);
2431     delete pOb;
2432     return ret;
2433     };
2434     };
2435    
2436     //Make an instance of the format class
2437     FIXFormat theFIXFormat;
2438    
2439     /////////////////////////////////////////////////////////////////
2440    
2441     bool FIXFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
2442     {
2443     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
2444     if(pmol==NULL)
2445     return false;
2446    
2447     //Define some references so we can use the old parameter names
2448     ostream &ofs = *pConv->GetOutStream();
2449     OBMol &mol = *pmol;
2450    
2451     char buffer[BUFF_SIZE];
2452     OBMol2Smi m2s;
2453    
2454     // This is a hack to prevent recursion problems.
2455     // we still need to fix the underlying problem -GRH
2456     if (mol.NumAtoms() > 1000)
2457     {
2458     #ifdef HAVE_SSTREAM
2459     stringstream errorMsg;
2460     #else
2461     strstream errorMsg;
2462     #endif
2463     errorMsg << "SMILES Conversion failed: Molecule is too large to convert. Open Babel is currently limited to 1000 atoms." << endl;
2464     errorMsg << " Molecule size: " << mol.NumAtoms() << " atoms " << endl;
2465     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo);
2466     return(false);
2467     }
2468    
2469     m2s.Init();
2470     //m2s.AssignCisTrans(mol);
2471     m2s.CorrectAromaticAmineCharge(mol);
2472     m2s.CreateSmiString(mol,buffer);
2473    
2474     OBAtom *atom;
2475     vector<int>::iterator i;
2476     vector<int> order = m2s.GetOutputOrder();
2477     ofs << buffer << endl;
2478    
2479     int j;
2480     for (j = 0;j < mol.NumConformers();j++)
2481     {
2482     mol.SetConformer(j);
2483     for (i = order.begin();i != order.end();i++)
2484     {
2485     atom = mol.GetAtom(*i);
2486     sprintf(buffer,"%9.3f %9.3f %9.3f",atom->GetX(),atom->GetY(),atom->GetZ());
2487     ofs << buffer<< endl;
2488     }
2489     }
2490     return(true);
2491     }
2492    
2493     OBSmiNode::OBSmiNode(OBAtom *atom)
2494     {
2495     _atom = atom;
2496     _parent = NULL;
2497     _nextnode.clear();
2498     _nextbond.clear();
2499     }
2500    
2501     void OBSmiNode::SetNextNode(OBSmiNode *node,OBBond *bond)
2502     {
2503     _nextnode.push_back(node);
2504     _nextbond.push_back(bond);
2505     }
2506    
2507     OBSmiNode::~OBSmiNode()
2508     {
2509     vector<OBSmiNode*>::iterator i;
2510     for (i = _nextnode.begin();i != _nextnode.end();i++)
2511     delete (*i);
2512     }
2513    
2514    
2515     bool WriteTheSmiles(OBMol & mol,char *out)
2516     {
2517     char buffer[2*BUFF_SIZE];
2518    
2519     OBMol2Smi m2s;
2520    
2521     m2s.Init();
2522     m2s.CorrectAromaticAmineCharge(mol);
2523     m2s.CreateSmiString(mol,buffer);
2524    
2525     strcpy(out,buffer);
2526     return(true);
2527    
2528     }
2529     }