ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/obconversion.cpp
Revision: 2440
Committed: Wed Nov 16 19:42:11 2005 UTC (18 years, 8 months ago) by tim
File size: 31868 byte(s)
Log Message:
adding openbabel

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     obconversion.cpp - Declaration of OBFormat and OBConversion
3    
4     Copyright (C) 2004 by Chris Morley
5     Some portions Copyright (C) 2005 by Geoffrey Hutchison
6    
7     This file is part of the Open Babel project.
8     For more information, see <http://openbabel.sourceforge.net/>
9    
10     This program is free software; you can redistribute it and/or modify
11     it under the terms of the GNU General Public License as published by
12     the Free Software Foundation version 2 of the License.
13    
14     This program is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17     GNU General Public License for more details.
18     ***********************************************************************/
19     // Definition of OBConversion routines
20    
21     #ifdef _WIN32
22     #pragma warning (disable : 4786)
23    
24     //using 'this' in base class initializer
25     #pragma warning (disable : 4355)
26    
27     #ifdef GUI
28     #undef DATADIR
29     #include "stdafx.h" //(includes<windows.h>
30     #endif
31     #endif
32    
33     #include <iostream>
34     #include <fstream>
35     #include <sstream>
36     #include <string>
37     #include <map>
38     //#include <dlfcn.h>
39    
40     #include "obconversion.hpp"
41    
42     #ifdef HAVE_LIBZ
43     #include "zipstream.hpp"
44     #endif
45    
46     #if !HAVE_STRNCASECMP
47     extern "C" int strncasecmp(const char *s1, const char *s2, size_t n);
48     #endif
49    
50     #ifndef BUFF_SIZE
51     #define BUFF_SIZE 32768
52     #endif
53    
54     using namespace std;
55     namespace OpenBabel {
56    
57     const char* OBFormat::TargetClassDescription()
58     {
59     //Provides class of default format unless overridden
60     if(OBConversion::GetDefaultFormat())
61     return OBConversion::GetDefaultFormat()->TargetClassDescription();
62     else
63     return "";
64     };
65     const type_info& OBFormat::GetType()
66     {
67     //Provides info on class of default format unless overridden
68     if(OBConversion::GetDefaultFormat())
69     return OBConversion::GetDefaultFormat()->GetType();
70     else
71     return typeid(this); //rubbish return if DefaultFormat not set
72     };
73    
74     //***************************************************
75    
76     /** @class OBConversion
77     OBConversion maintains a list of the available formats,
78     provides information on them, and controls the conversion process.
79    
80     A conversion is carried out by the calling routine, usually in a
81     user interface or an application program, making an instance of
82     OBConversion. It is loaded with the in and out formats, any options
83     and (usually) the default streams for input and output. Then either
84     the Convert() function is called, which allows a single input file
85     to be converted, or the extended functionality of FullConvert()
86     is used. This allows multiple input and output files, allowing:
87     - aggregation - the contents of many input files converted
88     and sent to one output file;
89     - splitting - the molecules from one input file sent to
90     separate output files;
91     - batch conversion - each input file converted to an output file.
92    
93     These procedures constitute the "Convert" interface. OBConversion
94     and the user interface or application program do not need to be
95     aware of any other part of OpenBabel - mol.h is not \#included. This
96     allows any chemical object derived from OBBase to be converted;
97     the type of object is decided by the input format class.
98     However,currently, almost all the conversions are for molecules of
99     class OBMol.
100     ///
101     OBConversion can also be used with an "API" interface
102     called from programs which manipulate chemical objects. Input/output is
103     done with the Read() and Write() functions which work with any
104     chemical object, but need to have its type specified. (The
105     ReadMolecule() and WriteMolecule() functions of the format classes
106     can also be used directly.)
107    
108    
109     Example code using OBConversion
110    
111     <b>To read in a molecule, manipulate it and write it out.</b>
112    
113     Set up an istream and an ostream, to and from files or elsewhere.
114     (cin and cout are used in the example). Specify the file formats.
115    
116     @code
117     OBConversion conv(&cin,&cout);
118     if(conv.SetInAndOutFormats("SMI","MOL"))
119     {
120     OBMol mol;
121     if(conv.Read(&mol))
122     ...manipulate molecule
123    
124     conv->Write(&mol);
125     }
126     @endcode
127    
128     A two stage construction is used to allow error handling
129     if the format ID is not recognized. This is necessary now that the
130     formats are dynamic and errors are not caught at compile time.
131     OBConversion::Read() is a templated function so that objects derived
132     from OBBase can also be handled, in addition to OBMol, if the format
133     routines are written appropriately.
134    
135     <b>To make a molecule from a SMILES string.</b>
136     @code
137     std::string SmilesString;
138     OBMol mol;
139     stringstream ss(SmilesString)
140     OBConversion conv(&ss);
141     if(conv.SetInFormat("smi") && conv.Read(&mol))
142     ...
143     @endcode
144    
145     <b>To do a file conversion without manipulating the molecule.</b>
146    
147     @code
148     #include "obconversion.h" //mol.h is not needed
149     ...set up an istream is and an ostream os
150     OBConversion conv(&is,&os);
151     if(conv.SetInAndOutFormats("SMI","MOL"))
152     {
153     conv.SetOptions("h"); //Optional; (h adds expicit hydrogens)
154     conv.Convert();
155     }
156     @endcode
157    
158     <b>To add automatic format conversion to an existing program.</b>
159    
160     The existing program inputs from the file identified by the
161     const char* filename into the istream is. The file is assumed to have
162     a format ORIG, but otherformats, identified by their file extensions,
163     can now be used.
164    
165     @code
166     ifstream ifs(filename); //Original code
167    
168     OBConversion conv;
169     OBFormat* inFormat = conv.FormatFromExt(filename);
170     OBFormat* outFormat = conv.GetFormat("ORIG");
171     istream* pIn = &ifs;
172     stringstream newstream;
173     if(inFormat && outFormat)
174     {
175     conv.SetInAndOutFormats(inFormat,outFormat);
176     conv.Convert(pIn,&newstream);
177     pIn=&newstream;
178     }
179     //else error; new features not available; fallback to original functionality
180    
181     ...Carry on with original code using pIn
182     @endcode
183    
184     In Windows a degree of independence from OpenBabel can be achieved using DLLs.
185     This code would be linked with obconv.lib.
186     At runtime the following DLLs would be in the executable directory:
187     obconv.dll, obdll.dll, one or more *.obf format files.
188     */
189    
190     int OBConversion::FormatFilesLoaded = 0;
191    
192     OBFormat* OBConversion::pDefaultFormat=NULL;
193    
194     OBConversion::OBConversion(istream* is, ostream* os) :
195     pInFormat(NULL),pOutFormat(NULL), Index(0), StartNumber(1),
196     EndNumber(0), Count(-1), m_IsLast(true), MoreFilesToCome(false),
197     OneObjectOnly(false), pOb1(NULL), pAuxConv(NULL)
198     {
199     pInStream=is;
200     pOutStream=os;
201     if (FormatFilesLoaded == 0)
202     FormatFilesLoaded = LoadFormatFiles();
203    
204     //These options take a parameter
205     RegisterOptionParam("f", NULL, 1,GENOPTIONS);
206     RegisterOptionParam("l", NULL, 1,GENOPTIONS);
207     }
208    
209     ///This static function returns a reference to the FormatsMap
210     ///which, because it is a static local variable is constructed only once.
211     ///This fiddle is to avoid the "static initialization order fiasco"
212     ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
213     FMapType& OBConversion::FormatsMap()
214     {
215     static FMapType* fm = new FMapType;
216     return *fm;
217     }
218    
219     ///This static function returns a reference to the FormatsMIMEMap
220     ///which, because it is a static local variable is constructed only once.
221     ///This fiddle is to avoid the "static initialization order fiasco"
222     ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
223     FMapType& OBConversion::FormatsMIMEMap()
224     {
225     static FMapType* fm = new FMapType;
226     return *fm;
227     }
228    
229     /////////////////////////////////////////////////
230     OBConversion::OBConversion(const OBConversion& o)
231     {
232     Index = o.Index;
233     Count = o.Count;
234     StartNumber = o.StartNumber;
235     EndNumber = o.EndNumber;
236     pInFormat = o.pInFormat;
237     pInStream = o.pInStream;
238     pOutFormat = o.pOutFormat;
239     pOutStream = o.pOutStream;
240     OptionsArray[0]= o.OptionsArray[0];
241     OptionsArray[1]= o.OptionsArray[1];
242     OptionsArray[2]= o.OptionsArray[2];
243     InFilename = o.InFilename;
244     rInpos = o.rInpos;
245     wInpos = o.wInpos;
246     rInlen = o.rInlen;
247     wInlen = o.wInlen;
248     m_IsLast = o.m_IsLast;
249     MoreFilesToCome= o.MoreFilesToCome;
250     OneObjectOnly = o.OneObjectOnly;
251     pOb1 = o.pOb1;
252     ReadyToInput = o.ReadyToInput;
253    
254     pAuxConv = NULL;
255     }
256     ////////////////////////////////////////////////
257    
258     OBConversion::~OBConversion()
259     {
260     if(pAuxConv!=this)
261     delete pAuxConv;
262     }
263     //////////////////////////////////////////////////////
264    
265     /// Class information on formats is collected by making an instance of the class
266     /// derived from OBFormat(only one is usually required). RegisterFormat() is called
267     /// from its constructor.
268     ///
269     /// If the compiled format is stored separately, like in a DLL or shared library,
270     /// the initialization code makes an instance of the imported OBFormat class.
271     int OBConversion::RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME)
272     {
273     FormatsMap()[ID] = pFormat;
274     if (MIME)
275     FormatsMIMEMap()[MIME] = pFormat;
276     if(pFormat->Flags() & DEFAULTFORMAT)
277     pDefaultFormat=pFormat;
278     return FormatsMap().size();
279     }
280    
281     //////////////////////////////////////////////////////
282     int OBConversion::LoadFormatFiles()
283     {
284     /*
285     int count=0;
286     // if(FormatFilesLoaded) return 0;
287     // FormatFilesLoaded=true; //so will load files only once
288     #ifdef USING_DYNAMIC_LIBS
289     //Depending on availablilty, look successively in
290     //FORMATFILE_DIR, executable directory,or current directory
291     string TargetDir;
292     #ifdef FORMATFILE_DIR
293     TargetDir="FORMATFILE_DIR";
294     #endif
295    
296     DLHandler::getConvDirectory(TargetDir);
297    
298     vector<string> files;
299     if(!DLHandler::findFiles(files,DLHandler::getFormatFilePattern(),TargetDir)) return 0;
300    
301     vector<string>::iterator itr;
302     for(itr=files.begin();itr!=files.end();itr++)
303     {
304     if(DLHandler::openLib(*itr))
305     count++;
306     else
307     cerr << *itr << " did not load properly" << endl;
308     }
309     #else
310     count = 1; //avoid calling this function several times
311     #endif //USING_DYNAMIC_LIBS
312     */
313     int count = 1;
314     return count;
315     }
316    
317     /**
318     *Returns the ID + the first line of the description in str
319     *and a pointer to the format in pFormat.
320     *If called with str==NULL the first format is returned;
321     *subsequent formats are returned by calling with str!=NULL and the previous value of itr
322     *returns false, and str and pFormat NULL, when there are no more formats.
323     *Use like:
324     *@code
325     * const char* str=NULL;
326     * Formatpos pos;
327     * while(OBConversion::GetNextFormat(pos,str,pFormat))
328     * {
329     * use str and pFormat
330     * }
331     *@endcode
332     */
333     bool OBConversion::GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat)
334     {
335    
336     pFormat = NULL;
337     if(str==NULL)
338     itr = FormatsMap().begin();
339     else
340     itr++;
341     if(itr == FormatsMap().end())
342     {
343     str=NULL; pFormat=NULL;
344     return false;
345     }
346     static string s;
347     s =itr->first;
348     pFormat = itr->second;
349     if(pFormat)
350     {
351     string description(pFormat->Description());
352     s += " -- ";
353     s += description.substr(0,description.find('\n'));
354     }
355    
356     if(pFormat->Flags() & NOTWRITABLE) s+=" [Read-only]";
357     if(pFormat->Flags() & NOTREADABLE) s+=" [Write-only]";
358    
359     str = s.c_str();
360     return true;
361     }
362    
363     //////////////////////////////////////////////////////
364     /// Sets the formats from their ids, e g CML.
365     /// If inID is NULL, the input format is left unchanged. Similarly for outID
366     /// Returns true if both formats have been successfully set at sometime
367     bool OBConversion::SetInAndOutFormats(const char* inID, const char* outID)
368     {
369     return SetInFormat(inID) && SetOutFormat(outID);
370     }
371     //////////////////////////////////////////////////////
372    
373     bool OBConversion::SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut)
374     {
375     return SetInFormat(pIn) && SetOutFormat(pOut);
376     }
377     //////////////////////////////////////////////////////
378     bool OBConversion::SetInFormat(OBFormat* pIn)
379     {
380     if(pIn==NULL)
381     return true;
382     pInFormat=pIn;
383     return !(pInFormat->Flags() & NOTREADABLE);
384     }
385     //////////////////////////////////////////////////////
386     bool OBConversion::SetOutFormat(OBFormat* pOut)
387     {
388     pOutFormat=pOut;
389     return !(pOutFormat->Flags() & NOTWRITABLE);
390     }
391     //////////////////////////////////////////////////////
392     bool OBConversion::SetInFormat(const char* inID)
393     {
394     if(inID)
395     pInFormat = FindFormat(inID);
396     return pInFormat && !(pInFormat->Flags() & NOTREADABLE);
397     }
398     //////////////////////////////////////////////////////
399    
400     bool OBConversion::SetOutFormat(const char* outID)
401     {
402     if(outID)
403     pOutFormat= FindFormat(outID);
404     return pOutFormat && !(pOutFormat->Flags() & NOTWRITABLE);
405     }
406    
407     //////////////////////////////////////////////////////
408     int OBConversion::Convert(istream* is, ostream* os)
409     {
410     if(is) pInStream=is;
411     if(os) pOutStream=os;
412     ostream* pOrigOutStream = pOutStream;
413    
414     #ifdef HAVE_LIBZ
415     zlib_stream::zip_istream zIn(*pInStream);
416     if(zIn.is_gzip())
417     pInStream = &zIn;
418    
419     zlib_stream::zip_ostream zOut(*pOutStream);
420     if(IsOption("z",GENOPTIONS))
421     {
422     // make sure to output the header
423     zOut.make_gzip();
424     pOutStream = &zOut;
425     }
426     #endif
427    
428     int count = Convert();
429     pOutStream = pOrigOutStream;
430     return count;
431    
432     }
433    
434     ////////////////////////////////////////////////////
435     /// Actions the "convert" interface.
436     /// Calls the OBFormat class's ReadMolecule() which
437     /// - makes a new chemical object of its chosen type (e.g. OBMol)
438     /// - reads an object from the input file
439     /// - subjects the chemical object to 'transformations' as specified by the Options
440     /// - calls AddChemObject to add it to a buffer. The previous object is first output
441     /// via the output Format's WriteMolecule(). During the output process calling
442     /// IsFirst() and GetIndex() (the number of objects including the current one already output.
443     /// allows more control, for instance writing \<cml\> and \</cml\> tags for multiple molecule outputs only.
444     ///
445     /// AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation())
446     /// or if the number of the object is outside the range defined by
447     /// StartNumber and EndNumber.This means the start and end counts apply to all chemical objects
448     /// found whether or not they are output.
449     ///
450     /// If ReadMolecule returns false the input conversion loop is exited.
451     ///
452     int OBConversion::Convert()
453     {
454     if(pInStream==NULL || pOutStream==NULL)
455     {
456     cerr << "input or output stream not set" << endl;
457     return 0;
458     }
459    
460     if(!pInFormat) return 0;
461     Count=0;//number objects processed
462    
463     if(!SetStartAndEnd())
464     return 0;
465    
466     ReadyToInput=true;
467     m_IsLast=false;
468     pOb1=NULL;
469     wInlen=0;
470    
471     //Input loop
472     while(ReadyToInput && pInStream->peek() != EOF && pInStream->good())
473     {
474     if(pInStream==&cin)
475     {
476     if(pInStream->peek()=='\n')
477     break;
478     }
479     else
480     rInpos = pInStream->tellg();
481    
482     bool ret=false;
483     try
484     {
485     ret = pInFormat->ReadChemObject(this);
486     }
487     catch(...)
488     {
489     if(!IsOption("e", GENOPTIONS) && !OneObjectOnly)
490     throw;
491     }
492    
493     if(!ret)
494     {
495     //error or termination request: terminate unless
496     // -e option requested and sucessfully can skip past current object
497     if(!IsOption("e", GENOPTIONS) || pInFormat->SkipObjects(0,this)!=1)
498     break;
499     }
500     if(OneObjectOnly)
501     break;
502     // Objects supplied to AddChemObject() which may output them after a delay
503     //ReadyToInput may be made false in AddChemObject()
504     // by WriteMolecule() returning false or by Count==EndNumber
505     }
506    
507     //Output last object
508     if(!MoreFilesToCome)
509     m_IsLast=true;
510    
511     if(pOutFormat)
512     if(!pOutFormat->WriteChemObject(this))
513     Index--;
514    
515     //Put AddChemObject() into non-queue mode
516     Count= -1;
517     EndNumber=StartNumber=0; pOb1=NULL;//leave tidy
518     MoreFilesToCome=false;
519     OneObjectOnly=false;
520    
521     return Index; //The number actually output
522     }
523     //////////////////////////////////////////////////////
524     bool OBConversion::SetStartAndEnd()
525     {
526     int TempStartNumber=0;
527     const char* p = IsOption("f",GENOPTIONS);
528     if(p)
529     {
530     StartNumber=atoi(p);
531     if(StartNumber>1)
532     {
533     TempStartNumber=StartNumber;
534     //Try to skip objects now
535     int ret = pInFormat->SkipObjects(StartNumber-1,this);
536     if(ret==-1) //error
537     return false;
538     if(ret==1) //success:objects skipped
539     {
540     Count = StartNumber-1;
541     StartNumber=0;
542     }
543     }
544     }
545    
546     p = IsOption("l",GENOPTIONS);
547     if(p)
548     {
549     EndNumber=atoi(p);
550     if(TempStartNumber && EndNumber<TempStartNumber)
551     EndNumber=TempStartNumber;
552     }
553    
554     return true;
555     }
556    
557     //////////////////////////////////////////////////////
558     /// Retrieves an object stored by AddChemObject() during output
559     OBBase* OBConversion::GetChemObject()
560     {
561     Index++;
562     return pOb1;
563     }
564    
565     //////////////////////////////////////////////////////
566     /// Called by ReadMolecule() to deliver an object it has read from an input stream.
567     /// Used in two modes:
568     /// - When Count is negative it is left negative and the routine is just a store
569     /// for an OBBase object. The negative value returned tells the calling
570     /// routine that no more objects are required.
571     /// - When count is >=0, probably set by Convert(), it acts as a queue of 2:
572     /// writing the currently stored value before accepting the supplied one. This delay
573     /// allows output routines to respond differently when the written object is the last.
574     /// Count is incremented with each call, even if pOb=NULL.
575     /// Objects are not added to the queue if the count is outside the range
576     /// StartNumber to EndNumber. There is no upper limit if EndNumber is zero.
577     /// The return value is the number of objects, including this one, which have been
578     /// input (but not necessarily output).
579     int OBConversion::AddChemObject(OBBase* pOb)
580     {
581     if(Count<0)
582     {
583     pOb1=pOb;
584     return Count;
585     }
586     Count++;
587     if(Count>=(int)StartNumber)//keeps reading objects but does nothing with them
588     {
589     if(Count==(int)EndNumber)
590     ReadyToInput=false; //stops any more objects being read
591    
592     rInlen = pInStream->tellg() - rInpos;
593    
594     if(pOb)
595     {
596     if(pOb1 && pOutFormat) //see if there is an object ready to be output
597     {
598     //Output object
599     if (!pOutFormat->WriteChemObject(this))
600     {
601     //faultly write, so finish
602     --Index;
603     ReadyToInput=false;
604     return Count;
605     }
606     }
607     pOb1=pOb;
608     wInpos = rInpos; //Save the position in the input file to be accessed when writing it
609     wInlen = rInlen;
610     }
611     }
612     return Count;
613     }
614     //////////////////////////////////////////////////////
615     int OBConversion::GetOutputIndex() const
616     {
617     //The number of objects actually written already from this instance of OBConversion
618     return Index;
619     }
620     void OBConversion::SetOutputIndex(int indx)
621     {
622     Index=indx;
623     }
624     //////////////////////////////////////////////////////
625     OBFormat* OBConversion::FindFormat(const char* ID)
626     {
627     //Case insensitive
628     if(FormatsMap().find(ID) == FormatsMap().end())
629     return NULL;
630     else
631     return FormatsMap()[ID];
632     }
633    
634     //////////////////////////////////////////////////
635     const char* OBConversion::GetTitle() const
636     {
637     return(InFilename.c_str());
638     }
639    
640     void OBConversion::SetMoreFilesToCome()
641     {
642     MoreFilesToCome=true;
643     }
644    
645     void OBConversion::SetOneObjectOnly()
646     {
647     OneObjectOnly=true;
648     m_IsLast=true;
649     }
650    
651     /////////////////////////////////////////////////////////
652     OBFormat* OBConversion::FormatFromExt(const char* filename)
653     {
654     string file = filename;
655     size_t extPos = file.rfind(".");
656    
657     if(extPos!=string::npos)
658     {
659     // only do this if we actually can read .gz files
660     #ifdef HAVE_LIBZ
661     if (file.substr(extPos,3) == ".gz")
662     {
663     file.erase(extPos);
664     extPos = file.rfind(".");
665     if (extPos!=string::npos)
666     return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
667     }
668     else
669     #endif
670     return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
671     }
672     return NULL; //if no extension
673     }
674    
675     OBFormat* OBConversion::FormatFromMIME(const char* MIME)
676     {
677     if(FormatsMIMEMap().find(MIME) == FormatsMIMEMap().end())
678     return NULL;
679     else
680     return FormatsMIMEMap()[MIME];
681     }
682    
683     bool OBConversion::Read(OBBase* pOb, std::istream* pin)
684     {
685     if(pin)
686     pInStream=pin;
687     if(!pInFormat) return false;
688    
689     #ifdef HAVE_LIBZ
690     zlib_stream::zip_istream zIn(*pInStream);
691     if(zIn.is_gzip())
692     pInStream = &zIn;
693     #endif
694    
695     return pInFormat->ReadMolecule(pOb, this);
696     }
697     //////////////////////////////////////////////////
698     /// Writes the object pOb but does not delete it afterwards.
699     /// The output stream is lastingly changed if pout is not NULL
700     /// Returns true if successful.
701     bool OBConversion::Write(OBBase* pOb, ostream* pos)
702     {
703     if(pos)
704     pOutStream=pos;
705     if(!pOutFormat) return false;
706    
707     ostream* pOrigOutStream = pOutStream;
708     #ifdef HAVE_LIBZ
709     zlib_stream::zip_ostream zOut(*pOutStream);
710     if(IsOption("z",GENOPTIONS))
711     {
712     // make sure to output the header
713     zOut.make_gzip();
714     pOutStream = &zOut;
715     }
716     #endif
717    
718     bool ret = pOutFormat->WriteMolecule(pOb,this);
719     pOutStream = pOrigOutStream;
720     return ret;
721     }
722    
723     //////////////////////////////////////////////////
724     /// Writes the object pOb but does not delete it afterwards.
725     /// The output stream not changed (since we cannot write to this string later)
726     /// Returns true if successful.
727     std::string OBConversion::WriteString(OBBase* pOb)
728     {
729     ostream *oldStream = pOutStream; // save old output
730     stringstream newStream;
731    
732     if(pOutFormat)
733     {
734     Write(pOb, &newStream);
735     }
736     pOutStream = oldStream;
737    
738     return newStream.str();
739     }
740    
741     //////////////////////////////////////////////////
742     /// Writes the object pOb but does not delete it afterwards.
743     /// The output stream is lastingly changed to point to the file
744     /// Returns true if successful.
745     bool OBConversion::WriteFile(OBBase* pOb, string filePath)
746     {
747     if(!pOutFormat) return false;
748    
749     ofstream ofs;
750     ios_base::openmode omode =
751     pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
752    
753     ofs.open(filePath.c_str(),omode);
754     if(!ofs)
755     {
756     cerr << "Cannot write to " << filePath <<endl;
757     return false;
758     }
759    
760     return Write(pOb, &ofs);
761     }
762    
763     ////////////////////////////////////////////
764     bool OBConversion::ReadString(OBBase* pOb, std::string input)
765     {
766     stringstream pin(input);
767     return Read(pOb,&pin);
768     }
769    
770    
771     ////////////////////////////////////////////
772     bool OBConversion::ReadFile(OBBase* pOb, std::string filePath)
773     {
774     if(!pInFormat) return false;
775    
776     ifstream ifs;
777     ios_base::openmode imode =
778     pOutFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
779    
780     ifs.open(filePath.c_str(),imode);
781     if(!ifs)
782     {
783     cerr << "Cannot read from " << filePath << endl;
784     return false;
785     }
786    
787     return Read(pOb,&ifs);
788     }
789    
790    
791     ////////////////////////////////////////////
792     const char* OBConversion::Description()
793     {
794     return "Conversion options\n \
795     -f <#> Start import at molecule # specified\n \
796     -l <#> End import at molecule # specified\n \
797     -t All input files describe a single molecule\n \
798     -e Continue with next object after error, if possible\n \
799     -z Compress the output with gzip\n";
800     }
801    
802     ////////////////////////////////////////////
803     bool OBConversion::IsLast()
804     {
805     return m_IsLast;
806     }
807     ////////////////////////////////////////////
808     bool OBConversion::IsFirstInput()
809     {
810     return (Count==0);
811     }
812    
813     /////////////////////////////////////////////////
814     string OBConversion::BatchFileName(string& BaseName, string& InFile)
815     {
816     //Replaces * in BaseName by InFile without extension and path
817     string ofname(BaseName);
818     int pos = ofname.find('*');
819     if(pos>=0)
820     {
821     //Replace * by input filename
822     int posdot=(InFile).rfind('.');
823     if(posdot==-1) posdot=(InFile).size();
824     int posname=(InFile).find_last_of("\\/");
825     ofname.replace(pos,1, (InFile), posname+1, posdot-posname-1);
826     }
827     return ofname;
828     }
829    
830     ////////////////////////////////////////////////
831     string OBConversion::IncrementedFileName(string& BaseName, const int Count)
832     {
833     //Replaces * in BaseName by Count
834     string ofname(BaseName);
835     int pos = ofname.find('*');
836     if(pos>=0)
837     {
838     char num[33];
839     snprintf(num, 33, "%d", Count);
840     ofname.replace(pos,1, num);
841     }
842     return ofname;
843     }
844     ////////////////////////////////////////////////////
845    
846     /**
847     Makes input and output streams, and carries out normal,
848     batch, aggregation, and splitting conversion.
849    
850     Normal
851     Done if FileList contains a single file name and OutputFileName
852     does not contain a *.
853    
854     Aggregation
855     Done if FileList has more than one file name and OutputFileName does
856     not contain * . All the chemical objects are converted and sent
857     to the single output file.
858    
859     Splitting
860     Done if FileList contains a single file name and OutputFileName
861     contains a * . Each chemical object in the input file converted
862     and sent to a separate file whose name is OutputFileName with the
863     * replaced by 1, 2, 3, etc.
864     For example, if OutputFileName is NEW*.smi then the output files are
865     NEW1.smi, NEW2.smi, etc.
866    
867     Batch Conversion
868     Done if FileList has more than one file name and contains a * .
869     Each input file is converted to an output file whose name is
870     OutputFileName with the * replaced by the inputfile name without its
871     path and extension.
872     So if the input files were inpath/First.cml, inpath/Second.cml
873     and OutputFileName was NEW*.mol, the output files would be
874     NEWFirst.mol, NEWSecond.mol.
875    
876     If FileList is empty, the input stream that has already been set
877     (usually in the constructor) is used. If OutputFileName is empty,
878     the output stream already set is used.
879    
880     On exit, OutputFileList contains the names of the output files.
881    
882     Returns the number of Chemical objects converted.
883     */
884     int OBConversion::FullConvert(std::vector<std::string>& FileList, std::string& OutputFileName,
885     std::vector<std::string>& OutputFileList)
886     {
887    
888     istream* pInStream;
889     ostream* pOutStream=NULL;
890     ifstream is;
891     ofstream os;
892     bool HasMultipleOutputFiles=false;
893     int Count=0;
894     bool CommonInFormat = pInFormat ? true:false; //whether set in calling routine
895     ios_base::openmode omode =
896     pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
897     try
898     {
899     ofstream ofs;
900    
901     //OUTPUT
902     if(OutputFileName.empty())
903     pOutStream = NULL; //use existing stream
904     else
905     {
906     if(OutputFileName.find_first_of('*')!=string::npos) HasMultipleOutputFiles = true;
907     if(!HasMultipleOutputFiles)
908     {
909     os.open(OutputFileName.c_str(),omode);
910     if(!os)
911     {
912     cerr << "Cannot write to " << OutputFileName <<endl;
913     return 0;
914     }
915     OutputFileList.push_back(OutputFileName);
916     pOutStream=&os;
917     }
918     }
919    
920     if(IsOption("t",GENOPTIONS))
921     {
922     //Concatenate input file option (multiple files, single molecule)
923     if(HasMultipleOutputFiles)
924     {
925     cerr << "Cannot have multiple output files and also concatenate input files (-t option)" <<endl;
926     return 0;
927     }
928    
929     stringstream allinput;
930     vector<string>::iterator itr;
931     for(itr=FileList.begin();itr!=FileList.end();itr++)
932     {
933     ifstream ifs((*itr).c_str());
934     if(!ifs)
935     {
936     cerr << "Cannot open " << *itr <<endl;
937     continue;
938     }
939     allinput << ifs.rdbuf(); //Copy all file contents
940     ifs.close();
941     }
942     Count = Convert(&allinput,pOutStream);
943     return Count;
944     }
945    
946     //INPUT
947     if(FileList.empty())
948     pInStream = NULL;
949     else
950     {
951     if(FileList.size()>1)
952     {
953     //multiple input files
954     vector<string>::iterator itr, tempitr;
955     tempitr = FileList.end();
956     tempitr--;
957     for(itr=FileList.begin();itr!=FileList.end();itr++)
958     {
959     InFilename = *itr;
960     ifstream ifs;
961     if(!OpenAndSetFormat(CommonInFormat, &ifs))
962     continue;
963    
964     if(HasMultipleOutputFiles)
965     {
966     //Batch conversion
967     string batchfile = BatchFileName(OutputFileName,*itr);
968     if(ofs.is_open()) ofs.close();
969     ofs.open(batchfile.c_str(), omode);
970     if(!ofs)
971     {
972     cerr << "Cannot open " << batchfile << endl;
973     return Count;
974     }
975     OutputFileList.push_back(batchfile);
976     SetOutputIndex(0); //reset for new file
977     Count += Convert(&ifs,&ofs);
978     }
979     else
980     {
981     //Aggregation
982     if(itr!=tempitr) SetMoreFilesToCome();
983     Count = Convert(&ifs,pOutStream);
984     }
985     }
986     return Count;
987     }
988     else
989     {
990     //Single input file
991     InFilename = FileList[0];
992     if(!OpenAndSetFormat(CommonInFormat, &is))
993     return 0;
994     pInStream=&is;
995    
996     if(HasMultipleOutputFiles)
997     {
998     //Splitting
999     //Output is put in a temporary stream and written to a file
1000     //with an augmenting name only when it contains a valid object.
1001     int Indx=1;
1002     for(;;)
1003     {
1004     stringstream ss;
1005     SetOutputIndex(0); //reset for new file
1006     SetOneObjectOnly();
1007    
1008     int ThisFileCount = Convert(pInStream,&ss);
1009     if(ThisFileCount==0) break;
1010     Count+=ThisFileCount;
1011    
1012     if(ofs.is_open()) ofs.close();
1013     string incrfile = IncrementedFileName(OutputFileName,Indx++);
1014     ofs.open(incrfile.c_str(), omode);
1015     if(!ofs)
1016     {
1017     cerr << "Cannot write to " << incrfile << endl;
1018     return Count;
1019     }
1020     OutputFileList.push_back(incrfile);
1021     ofs << ss.rdbuf();
1022     ofs.close();
1023     ss.clear();
1024     }
1025     return Count;
1026     }
1027     }
1028     }
1029    
1030     //Single input and output files
1031     Count = Convert(pInStream,pOutStream);
1032     return Count;
1033     }
1034     catch(...)
1035     {
1036     cerr << "Conversion failed with an exception. Count=" << Count <<endl;
1037     return Count;
1038     }
1039     }
1040    
1041     bool OBConversion::OpenAndSetFormat(bool SetFormat, ifstream* is)
1042     {
1043     //Opens file using InFilename and sets pInFormat if requested
1044     if(!SetFormat)
1045     {
1046     pInFormat = FormatFromExt(InFilename.c_str());
1047     if(pInFormat==NULL)
1048     {
1049     string::size_type pos = InFilename.rfind('.');
1050     string ext;
1051     if(pos!=string::npos)
1052     ext = InFilename.substr(pos);
1053     cerr << "Cannot read input format \"" << ext << '\"'
1054     << " for file \"" << InFilename << "\"" << endl;
1055     return false;
1056     }
1057     }
1058    
1059     ios_base::openmode imode;
1060     #ifdef ALL_READS_BINARY //Makes unix files compatible with VC++6
1061     imode = ios_base::in|ios_base::binary;
1062     #else
1063     imode = pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
1064     #endif
1065    
1066     is->open(InFilename.c_str(), imode);
1067     if(!is->good())
1068     {
1069     cerr << "Cannot open " << InFilename <<endl;
1070     return false;
1071     }
1072    
1073     return true;
1074     }
1075    
1076     ///////////////////////////////////////////////
1077     void OBConversion::AddOption(const char* opt, Option_type opttyp, const char* txt)
1078     {
1079     //Also updates an option
1080     if(txt==NULL)
1081     OptionsArray[opttyp][opt]=string();
1082     else
1083     OptionsArray[opttyp][opt]=txt;
1084     }
1085    
1086     const char* OBConversion::IsOption(const char* opt, Option_type opttyp)
1087     {
1088     //Returns NULL if option not found or a pointer to the text if it is
1089     map<string,string>::iterator pos;
1090     pos = OptionsArray[opttyp].find(opt);
1091     if(pos==OptionsArray[opttyp].end())
1092     return NULL;
1093     return pos->second.c_str();
1094     }
1095    
1096     bool OBConversion::RemoveOption(const char* opt, Option_type opttyp)
1097     {
1098     return OptionsArray[opttyp].erase(opt)!=0;//true if was there
1099     }
1100    
1101     void OBConversion::SetOptions(const char* options, Option_type opttyp)
1102     {
1103     while(*options)
1104     {
1105     string ch(1, *options++);
1106     if(*options=='\"')
1107     {
1108     string txt = options+1;
1109     string::size_type pos = txt.find('\"');
1110     if(pos==string::npos)
1111     return; //options is illformed
1112     txt.erase(pos);
1113     OptionsArray[opttyp][ch]= txt;
1114     options += pos+2;
1115     }
1116     else
1117     OptionsArray[opttyp][ch] = string();
1118     }
1119     }
1120    
1121     typedef std::map<string,int> OPAMapType;
1122     OPAMapType& OBConversion::OptionParamArray(Option_type typ)
1123     {
1124     static OPAMapType* opa = new OPAMapType[3];
1125     return opa[typ];
1126     }
1127    
1128     void OBConversion::RegisterOptionParam(string name, OBFormat* pFormat,
1129     int numberParams, Option_type typ)
1130     {
1131     //Gives error message if the number of parameters conflicts with an existing registration
1132     map<string,int>::iterator pos;
1133     pos = OptionParamArray(typ).find(name);
1134     if(pos!=OptionParamArray(typ).end())
1135     {
1136     if(pos->second!=numberParams)
1137     {
1138     string description("API");
1139     if(pFormat)
1140     description=pFormat->Description();
1141     cerr << "The number of parameters needed by option \"" << name << "\" in "
1142     << description.substr(0,description.find('\n'))
1143     << " differs from an earlier registration." << endl;
1144     return;
1145     }
1146     }
1147     OptionParamArray(typ)[name] = numberParams;
1148     }
1149    
1150     int OBConversion::GetOptionParams(string name, Option_type typ)
1151     {
1152     //returns the number of parameters registered for the option, or 0 if not found
1153     map<string,int>::iterator pos;
1154     pos = OptionParamArray(typ).find(name);
1155     if(pos==OptionParamArray(typ).end())
1156     return 0;
1157     return pos->second;
1158     }
1159    
1160     }//namespace OpenBabel
1161    
1162     //! \file obconversion.cpp
1163     //! \brief Implementation of OBFormat and OBConversion classes.