ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-3.0/src/openbabel/obconversion.cpp
Revision: 2469
Committed: Fri Dec 2 15:38:03 2005 UTC (18 years, 8 months ago) by tim
File size: 27818 byte(s)
Log Message:
End of the Link --> List
Return of the Oject-Oriented
replace yacc/lex parser with antlr parser

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     obconversion.cpp - Declaration of OBFormat and OBConversion
3    
4     Copyright (C) 2004 by Chris Morley
5     Some portions Copyright (C) 2005 by Geoffrey Hutchison
6    
7     This file is part of the Open Babel project.
8     For more information, see <http://openbabel.sourceforge.net/>
9    
10     This program is free software; you can redistribute it and/or modify
11     it under the terms of the GNU General Public License as published by
12     the Free Software Foundation version 2 of the License.
13    
14     This program is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17     GNU General Public License for more details.
18     ***********************************************************************/
19     // Definition of OBConversion routines
20    
21     #ifdef _WIN32
22     #pragma warning (disable : 4786)
23    
24     //using 'this' in base class initializer
25     #pragma warning (disable : 4355)
26    
27     #ifdef GUI
28     #undef DATADIR
29     #include "stdafx.h" //(includes<windows.h>
30     #endif
31     #endif
32    
33     #include <iostream>
34     #include <fstream>
35     #include <sstream>
36     #include <string>
37     #include <map>
38     //#include <dlfcn.h>
39    
40     #include "obconversion.hpp"
41    
42     #ifdef HAVE_LIBZ
43     #include "zipstream.hpp"
44     #endif
45    
46     #if !HAVE_STRNCASECMP
47     extern "C" int strncasecmp(const char *s1, const char *s2, size_t n);
48     #endif
49    
50     #ifndef BUFF_SIZE
51     #define BUFF_SIZE 32768
52     #endif
53    
54     using namespace std;
55     namespace OpenBabel {
56    
57     const char* OBFormat::TargetClassDescription()
58     {
59     //Provides class of default format unless overridden
60     if(OBConversion::GetDefaultFormat())
61     return OBConversion::GetDefaultFormat()->TargetClassDescription();
62     else
63     return "";
64 gezelter 2454 }
65 tim 2440 const type_info& OBFormat::GetType()
66     {
67     //Provides info on class of default format unless overridden
68     if(OBConversion::GetDefaultFormat())
69     return OBConversion::GetDefaultFormat()->GetType();
70     else
71     return typeid(this); //rubbish return if DefaultFormat not set
72 gezelter 2454 }
73 tim 2440
74    
75     int OBConversion::FormatFilesLoaded = 0;
76    
77     OBFormat* OBConversion::pDefaultFormat=NULL;
78    
79     OBConversion::OBConversion(istream* is, ostream* os) :
80     pInFormat(NULL),pOutFormat(NULL), Index(0), StartNumber(1),
81     EndNumber(0), Count(-1), m_IsLast(true), MoreFilesToCome(false),
82     OneObjectOnly(false), pOb1(NULL), pAuxConv(NULL)
83     {
84     pInStream=is;
85     pOutStream=os;
86     if (FormatFilesLoaded == 0)
87     FormatFilesLoaded = LoadFormatFiles();
88    
89     //These options take a parameter
90     RegisterOptionParam("f", NULL, 1,GENOPTIONS);
91     RegisterOptionParam("l", NULL, 1,GENOPTIONS);
92     }
93    
94     ///This static function returns a reference to the FormatsMap
95     ///which, because it is a static local variable is constructed only once.
96     ///This fiddle is to avoid the "static initialization order fiasco"
97     ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
98     FMapType& OBConversion::FormatsMap()
99     {
100     static FMapType* fm = new FMapType;
101     return *fm;
102     }
103    
104     ///This static function returns a reference to the FormatsMIMEMap
105     ///which, because it is a static local variable is constructed only once.
106     ///This fiddle is to avoid the "static initialization order fiasco"
107     ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
108     FMapType& OBConversion::FormatsMIMEMap()
109     {
110     static FMapType* fm = new FMapType;
111     return *fm;
112     }
113    
114     /////////////////////////////////////////////////
115     OBConversion::OBConversion(const OBConversion& o)
116     {
117     Index = o.Index;
118     Count = o.Count;
119     StartNumber = o.StartNumber;
120     EndNumber = o.EndNumber;
121     pInFormat = o.pInFormat;
122     pInStream = o.pInStream;
123     pOutFormat = o.pOutFormat;
124     pOutStream = o.pOutStream;
125     OptionsArray[0]= o.OptionsArray[0];
126     OptionsArray[1]= o.OptionsArray[1];
127     OptionsArray[2]= o.OptionsArray[2];
128     InFilename = o.InFilename;
129     rInpos = o.rInpos;
130     wInpos = o.wInpos;
131     rInlen = o.rInlen;
132     wInlen = o.wInlen;
133     m_IsLast = o.m_IsLast;
134     MoreFilesToCome= o.MoreFilesToCome;
135     OneObjectOnly = o.OneObjectOnly;
136     pOb1 = o.pOb1;
137     ReadyToInput = o.ReadyToInput;
138    
139     pAuxConv = NULL;
140     }
141     ////////////////////////////////////////////////
142    
143     OBConversion::~OBConversion()
144     {
145     if(pAuxConv!=this)
146     delete pAuxConv;
147     }
148     //////////////////////////////////////////////////////
149    
150     /// Class information on formats is collected by making an instance of the class
151     /// derived from OBFormat(only one is usually required). RegisterFormat() is called
152     /// from its constructor.
153     ///
154     /// If the compiled format is stored separately, like in a DLL or shared library,
155     /// the initialization code makes an instance of the imported OBFormat class.
156     int OBConversion::RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME)
157     {
158     FormatsMap()[ID] = pFormat;
159     if (MIME)
160     FormatsMIMEMap()[MIME] = pFormat;
161     if(pFormat->Flags() & DEFAULTFORMAT)
162     pDefaultFormat=pFormat;
163     return FormatsMap().size();
164     }
165    
166     //////////////////////////////////////////////////////
167     int OBConversion::LoadFormatFiles()
168     {
169     /*
170     int count=0;
171     // if(FormatFilesLoaded) return 0;
172     // FormatFilesLoaded=true; //so will load files only once
173     #ifdef USING_DYNAMIC_LIBS
174     //Depending on availablilty, look successively in
175     //FORMATFILE_DIR, executable directory,or current directory
176     string TargetDir;
177     #ifdef FORMATFILE_DIR
178     TargetDir="FORMATFILE_DIR";
179     #endif
180    
181     DLHandler::getConvDirectory(TargetDir);
182    
183     vector<string> files;
184     if(!DLHandler::findFiles(files,DLHandler::getFormatFilePattern(),TargetDir)) return 0;
185    
186     vector<string>::iterator itr;
187     for(itr=files.begin();itr!=files.end();itr++)
188     {
189     if(DLHandler::openLib(*itr))
190     count++;
191     else
192     cerr << *itr << " did not load properly" << endl;
193     }
194     #else
195     count = 1; //avoid calling this function several times
196     #endif //USING_DYNAMIC_LIBS
197     */
198     int count = 1;
199     return count;
200     }
201    
202     /**
203     *Returns the ID + the first line of the description in str
204     *and a pointer to the format in pFormat.
205     *If called with str==NULL the first format is returned;
206     *subsequent formats are returned by calling with str!=NULL and the previous value of itr
207     *returns false, and str and pFormat NULL, when there are no more formats.
208     *Use like:
209     *@code
210     * const char* str=NULL;
211     * Formatpos pos;
212     * while(OBConversion::GetNextFormat(pos,str,pFormat))
213     * {
214     * use str and pFormat
215     * }
216     *@endcode
217     */
218     bool OBConversion::GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat)
219     {
220    
221     pFormat = NULL;
222     if(str==NULL)
223     itr = FormatsMap().begin();
224     else
225     itr++;
226     if(itr == FormatsMap().end())
227     {
228     str=NULL; pFormat=NULL;
229     return false;
230     }
231     static string s;
232     s =itr->first;
233     pFormat = itr->second;
234     if(pFormat)
235     {
236     string description(pFormat->Description());
237     s += " -- ";
238     s += description.substr(0,description.find('\n'));
239     }
240    
241     if(pFormat->Flags() & NOTWRITABLE) s+=" [Read-only]";
242     if(pFormat->Flags() & NOTREADABLE) s+=" [Write-only]";
243    
244     str = s.c_str();
245     return true;
246     }
247    
248     //////////////////////////////////////////////////////
249     /// Sets the formats from their ids, e g CML.
250     /// If inID is NULL, the input format is left unchanged. Similarly for outID
251     /// Returns true if both formats have been successfully set at sometime
252     bool OBConversion::SetInAndOutFormats(const char* inID, const char* outID)
253     {
254     return SetInFormat(inID) && SetOutFormat(outID);
255     }
256     //////////////////////////////////////////////////////
257    
258     bool OBConversion::SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut)
259     {
260     return SetInFormat(pIn) && SetOutFormat(pOut);
261     }
262     //////////////////////////////////////////////////////
263     bool OBConversion::SetInFormat(OBFormat* pIn)
264     {
265     if(pIn==NULL)
266     return true;
267     pInFormat=pIn;
268     return !(pInFormat->Flags() & NOTREADABLE);
269     }
270     //////////////////////////////////////////////////////
271     bool OBConversion::SetOutFormat(OBFormat* pOut)
272     {
273     pOutFormat=pOut;
274     return !(pOutFormat->Flags() & NOTWRITABLE);
275     }
276     //////////////////////////////////////////////////////
277     bool OBConversion::SetInFormat(const char* inID)
278     {
279     if(inID)
280     pInFormat = FindFormat(inID);
281     return pInFormat && !(pInFormat->Flags() & NOTREADABLE);
282     }
283     //////////////////////////////////////////////////////
284    
285     bool OBConversion::SetOutFormat(const char* outID)
286     {
287     if(outID)
288     pOutFormat= FindFormat(outID);
289     return pOutFormat && !(pOutFormat->Flags() & NOTWRITABLE);
290     }
291    
292     //////////////////////////////////////////////////////
293     int OBConversion::Convert(istream* is, ostream* os)
294     {
295     if(is) pInStream=is;
296     if(os) pOutStream=os;
297     ostream* pOrigOutStream = pOutStream;
298    
299     #ifdef HAVE_LIBZ
300     zlib_stream::zip_istream zIn(*pInStream);
301     if(zIn.is_gzip())
302     pInStream = &zIn;
303    
304     zlib_stream::zip_ostream zOut(*pOutStream);
305     if(IsOption("z",GENOPTIONS))
306     {
307     // make sure to output the header
308     zOut.make_gzip();
309     pOutStream = &zOut;
310     }
311     #endif
312    
313     int count = Convert();
314     pOutStream = pOrigOutStream;
315     return count;
316    
317     }
318    
319     ////////////////////////////////////////////////////
320     /// Actions the "convert" interface.
321     /// Calls the OBFormat class's ReadMolecule() which
322     /// - makes a new chemical object of its chosen type (e.g. OBMol)
323     /// - reads an object from the input file
324     /// - subjects the chemical object to 'transformations' as specified by the Options
325     /// - calls AddChemObject to add it to a buffer. The previous object is first output
326     /// via the output Format's WriteMolecule(). During the output process calling
327     /// IsFirst() and GetIndex() (the number of objects including the current one already output.
328     /// allows more control, for instance writing \<cml\> and \</cml\> tags for multiple molecule outputs only.
329     ///
330     /// AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation())
331     /// or if the number of the object is outside the range defined by
332     /// StartNumber and EndNumber.This means the start and end counts apply to all chemical objects
333     /// found whether or not they are output.
334     ///
335     /// If ReadMolecule returns false the input conversion loop is exited.
336     ///
337     int OBConversion::Convert()
338     {
339     if(pInStream==NULL || pOutStream==NULL)
340     {
341     cerr << "input or output stream not set" << endl;
342     return 0;
343     }
344    
345     if(!pInFormat) return 0;
346     Count=0;//number objects processed
347    
348     if(!SetStartAndEnd())
349     return 0;
350    
351     ReadyToInput=true;
352     m_IsLast=false;
353     pOb1=NULL;
354     wInlen=0;
355    
356     //Input loop
357     while(ReadyToInput && pInStream->peek() != EOF && pInStream->good())
358     {
359     if(pInStream==&cin)
360     {
361     if(pInStream->peek()=='\n')
362     break;
363     }
364     else
365     rInpos = pInStream->tellg();
366    
367     bool ret=false;
368     try
369     {
370     ret = pInFormat->ReadChemObject(this);
371     }
372     catch(...)
373     {
374     if(!IsOption("e", GENOPTIONS) && !OneObjectOnly)
375     throw;
376     }
377    
378     if(!ret)
379     {
380     //error or termination request: terminate unless
381     // -e option requested and sucessfully can skip past current object
382     if(!IsOption("e", GENOPTIONS) || pInFormat->SkipObjects(0,this)!=1)
383     break;
384     }
385     if(OneObjectOnly)
386     break;
387     // Objects supplied to AddChemObject() which may output them after a delay
388     //ReadyToInput may be made false in AddChemObject()
389     // by WriteMolecule() returning false or by Count==EndNumber
390     }
391    
392     //Output last object
393     if(!MoreFilesToCome)
394     m_IsLast=true;
395    
396     if(pOutFormat)
397     if(!pOutFormat->WriteChemObject(this))
398     Index--;
399    
400     //Put AddChemObject() into non-queue mode
401     Count= -1;
402     EndNumber=StartNumber=0; pOb1=NULL;//leave tidy
403     MoreFilesToCome=false;
404     OneObjectOnly=false;
405    
406     return Index; //The number actually output
407     }
408     //////////////////////////////////////////////////////
409     bool OBConversion::SetStartAndEnd()
410     {
411     int TempStartNumber=0;
412     const char* p = IsOption("f",GENOPTIONS);
413     if(p)
414     {
415     StartNumber=atoi(p);
416     if(StartNumber>1)
417     {
418     TempStartNumber=StartNumber;
419     //Try to skip objects now
420     int ret = pInFormat->SkipObjects(StartNumber-1,this);
421     if(ret==-1) //error
422     return false;
423     if(ret==1) //success:objects skipped
424     {
425     Count = StartNumber-1;
426     StartNumber=0;
427     }
428     }
429     }
430    
431     p = IsOption("l",GENOPTIONS);
432     if(p)
433     {
434     EndNumber=atoi(p);
435     if(TempStartNumber && EndNumber<TempStartNumber)
436     EndNumber=TempStartNumber;
437     }
438    
439     return true;
440     }
441    
442     //////////////////////////////////////////////////////
443     /// Retrieves an object stored by AddChemObject() during output
444     OBBase* OBConversion::GetChemObject()
445     {
446     Index++;
447     return pOb1;
448     }
449    
450     //////////////////////////////////////////////////////
451     /// Called by ReadMolecule() to deliver an object it has read from an input stream.
452     /// Used in two modes:
453     /// - When Count is negative it is left negative and the routine is just a store
454     /// for an OBBase object. The negative value returned tells the calling
455     /// routine that no more objects are required.
456     /// - When count is >=0, probably set by Convert(), it acts as a queue of 2:
457     /// writing the currently stored value before accepting the supplied one. This delay
458     /// allows output routines to respond differently when the written object is the last.
459     /// Count is incremented with each call, even if pOb=NULL.
460     /// Objects are not added to the queue if the count is outside the range
461     /// StartNumber to EndNumber. There is no upper limit if EndNumber is zero.
462     /// The return value is the number of objects, including this one, which have been
463     /// input (but not necessarily output).
464     int OBConversion::AddChemObject(OBBase* pOb)
465     {
466     if(Count<0)
467     {
468     pOb1=pOb;
469     return Count;
470     }
471     Count++;
472     if(Count>=(int)StartNumber)//keeps reading objects but does nothing with them
473     {
474     if(Count==(int)EndNumber)
475     ReadyToInput=false; //stops any more objects being read
476    
477     rInlen = pInStream->tellg() - rInpos;
478    
479     if(pOb)
480     {
481     if(pOb1 && pOutFormat) //see if there is an object ready to be output
482     {
483     //Output object
484     if (!pOutFormat->WriteChemObject(this))
485     {
486     //faultly write, so finish
487     --Index;
488     ReadyToInput=false;
489     return Count;
490     }
491     }
492     pOb1=pOb;
493     wInpos = rInpos; //Save the position in the input file to be accessed when writing it
494     wInlen = rInlen;
495     }
496     }
497     return Count;
498     }
499     //////////////////////////////////////////////////////
500     int OBConversion::GetOutputIndex() const
501     {
502     //The number of objects actually written already from this instance of OBConversion
503     return Index;
504     }
505     void OBConversion::SetOutputIndex(int indx)
506     {
507     Index=indx;
508     }
509     //////////////////////////////////////////////////////
510     OBFormat* OBConversion::FindFormat(const char* ID)
511     {
512     //Case insensitive
513     if(FormatsMap().find(ID) == FormatsMap().end())
514     return NULL;
515     else
516     return FormatsMap()[ID];
517     }
518    
519     //////////////////////////////////////////////////
520     const char* OBConversion::GetTitle() const
521     {
522     return(InFilename.c_str());
523     }
524    
525     void OBConversion::SetMoreFilesToCome()
526     {
527     MoreFilesToCome=true;
528     }
529    
530     void OBConversion::SetOneObjectOnly()
531     {
532     OneObjectOnly=true;
533     m_IsLast=true;
534     }
535    
536     /////////////////////////////////////////////////////////
537     OBFormat* OBConversion::FormatFromExt(const char* filename)
538     {
539     string file = filename;
540     size_t extPos = file.rfind(".");
541    
542     if(extPos!=string::npos)
543     {
544     // only do this if we actually can read .gz files
545     #ifdef HAVE_LIBZ
546     if (file.substr(extPos,3) == ".gz")
547     {
548     file.erase(extPos);
549     extPos = file.rfind(".");
550     if (extPos!=string::npos)
551     return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
552     }
553     else
554     #endif
555     return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
556     }
557     return NULL; //if no extension
558     }
559    
560     OBFormat* OBConversion::FormatFromMIME(const char* MIME)
561     {
562     if(FormatsMIMEMap().find(MIME) == FormatsMIMEMap().end())
563     return NULL;
564     else
565     return FormatsMIMEMap()[MIME];
566     }
567    
568     bool OBConversion::Read(OBBase* pOb, std::istream* pin)
569     {
570     if(pin)
571     pInStream=pin;
572     if(!pInFormat) return false;
573    
574     #ifdef HAVE_LIBZ
575     zlib_stream::zip_istream zIn(*pInStream);
576     if(zIn.is_gzip())
577     pInStream = &zIn;
578     #endif
579    
580     return pInFormat->ReadMolecule(pOb, this);
581     }
582     //////////////////////////////////////////////////
583     /// Writes the object pOb but does not delete it afterwards.
584     /// The output stream is lastingly changed if pout is not NULL
585     /// Returns true if successful.
586     bool OBConversion::Write(OBBase* pOb, ostream* pos)
587     {
588     if(pos)
589     pOutStream=pos;
590     if(!pOutFormat) return false;
591    
592     ostream* pOrigOutStream = pOutStream;
593     #ifdef HAVE_LIBZ
594     zlib_stream::zip_ostream zOut(*pOutStream);
595     if(IsOption("z",GENOPTIONS))
596     {
597     // make sure to output the header
598     zOut.make_gzip();
599     pOutStream = &zOut;
600     }
601     #endif
602    
603     bool ret = pOutFormat->WriteMolecule(pOb,this);
604     pOutStream = pOrigOutStream;
605     return ret;
606     }
607    
608     //////////////////////////////////////////////////
609     /// Writes the object pOb but does not delete it afterwards.
610     /// The output stream not changed (since we cannot write to this string later)
611     /// Returns true if successful.
612     std::string OBConversion::WriteString(OBBase* pOb)
613     {
614     ostream *oldStream = pOutStream; // save old output
615     stringstream newStream;
616    
617     if(pOutFormat)
618     {
619     Write(pOb, &newStream);
620     }
621     pOutStream = oldStream;
622    
623     return newStream.str();
624     }
625    
626     //////////////////////////////////////////////////
627     /// Writes the object pOb but does not delete it afterwards.
628     /// The output stream is lastingly changed to point to the file
629     /// Returns true if successful.
630     bool OBConversion::WriteFile(OBBase* pOb, string filePath)
631     {
632     if(!pOutFormat) return false;
633    
634     ofstream ofs;
635     ios_base::openmode omode =
636     pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
637    
638     ofs.open(filePath.c_str(),omode);
639     if(!ofs)
640     {
641     cerr << "Cannot write to " << filePath <<endl;
642     return false;
643     }
644    
645     return Write(pOb, &ofs);
646     }
647    
648     ////////////////////////////////////////////
649     bool OBConversion::ReadString(OBBase* pOb, std::string input)
650     {
651     stringstream pin(input);
652     return Read(pOb,&pin);
653     }
654    
655    
656     ////////////////////////////////////////////
657     bool OBConversion::ReadFile(OBBase* pOb, std::string filePath)
658     {
659     if(!pInFormat) return false;
660    
661     ifstream ifs;
662     ios_base::openmode imode =
663     pOutFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
664    
665     ifs.open(filePath.c_str(),imode);
666     if(!ifs)
667     {
668     cerr << "Cannot read from " << filePath << endl;
669     return false;
670     }
671    
672     return Read(pOb,&ifs);
673     }
674    
675    
676     ////////////////////////////////////////////
677     const char* OBConversion::Description()
678     {
679     return "Conversion options\n \
680     -f <#> Start import at molecule # specified\n \
681     -l <#> End import at molecule # specified\n \
682     -t All input files describe a single molecule\n \
683     -e Continue with next object after error, if possible\n \
684     -z Compress the output with gzip\n";
685     }
686    
687     ////////////////////////////////////////////
688     bool OBConversion::IsLast()
689     {
690     return m_IsLast;
691     }
692     ////////////////////////////////////////////
693     bool OBConversion::IsFirstInput()
694     {
695     return (Count==0);
696     }
697    
698     /////////////////////////////////////////////////
699     string OBConversion::BatchFileName(string& BaseName, string& InFile)
700     {
701     //Replaces * in BaseName by InFile without extension and path
702     string ofname(BaseName);
703     int pos = ofname.find('*');
704     if(pos>=0)
705     {
706     //Replace * by input filename
707     int posdot=(InFile).rfind('.');
708     if(posdot==-1) posdot=(InFile).size();
709     int posname=(InFile).find_last_of("\\/");
710     ofname.replace(pos,1, (InFile), posname+1, posdot-posname-1);
711     }
712     return ofname;
713     }
714    
715     ////////////////////////////////////////////////
716     string OBConversion::IncrementedFileName(string& BaseName, const int Count)
717     {
718     //Replaces * in BaseName by Count
719     string ofname(BaseName);
720     int pos = ofname.find('*');
721     if(pos>=0)
722     {
723     char num[33];
724     snprintf(num, 33, "%d", Count);
725     ofname.replace(pos,1, num);
726     }
727     return ofname;
728     }
729     ////////////////////////////////////////////////////
730    
731     /**
732     Makes input and output streams, and carries out normal,
733     batch, aggregation, and splitting conversion.
734    
735     Normal
736     Done if FileList contains a single file name and OutputFileName
737     does not contain a *.
738    
739     Aggregation
740     Done if FileList has more than one file name and OutputFileName does
741     not contain * . All the chemical objects are converted and sent
742     to the single output file.
743    
744     Splitting
745     Done if FileList contains a single file name and OutputFileName
746     contains a * . Each chemical object in the input file converted
747     and sent to a separate file whose name is OutputFileName with the
748     * replaced by 1, 2, 3, etc.
749     For example, if OutputFileName is NEW*.smi then the output files are
750     NEW1.smi, NEW2.smi, etc.
751    
752     Batch Conversion
753     Done if FileList has more than one file name and contains a * .
754     Each input file is converted to an output file whose name is
755     OutputFileName with the * replaced by the inputfile name without its
756     path and extension.
757     So if the input files were inpath/First.cml, inpath/Second.cml
758     and OutputFileName was NEW*.mol, the output files would be
759     NEWFirst.mol, NEWSecond.mol.
760    
761     If FileList is empty, the input stream that has already been set
762     (usually in the constructor) is used. If OutputFileName is empty,
763     the output stream already set is used.
764    
765     On exit, OutputFileList contains the names of the output files.
766    
767     Returns the number of Chemical objects converted.
768     */
769     int OBConversion::FullConvert(std::vector<std::string>& FileList, std::string& OutputFileName,
770     std::vector<std::string>& OutputFileList)
771     {
772    
773     istream* pInStream;
774     ostream* pOutStream=NULL;
775     ifstream is;
776     ofstream os;
777     bool HasMultipleOutputFiles=false;
778     int Count=0;
779     bool CommonInFormat = pInFormat ? true:false; //whether set in calling routine
780     ios_base::openmode omode =
781     pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
782     try
783     {
784     ofstream ofs;
785    
786     //OUTPUT
787     if(OutputFileName.empty())
788     pOutStream = NULL; //use existing stream
789     else
790     {
791     if(OutputFileName.find_first_of('*')!=string::npos) HasMultipleOutputFiles = true;
792     if(!HasMultipleOutputFiles)
793     {
794     os.open(OutputFileName.c_str(),omode);
795     if(!os)
796     {
797     cerr << "Cannot write to " << OutputFileName <<endl;
798     return 0;
799     }
800     OutputFileList.push_back(OutputFileName);
801     pOutStream=&os;
802     }
803     }
804    
805     if(IsOption("t",GENOPTIONS))
806     {
807     //Concatenate input file option (multiple files, single molecule)
808     if(HasMultipleOutputFiles)
809     {
810     cerr << "Cannot have multiple output files and also concatenate input files (-t option)" <<endl;
811     return 0;
812     }
813    
814     stringstream allinput;
815     vector<string>::iterator itr;
816     for(itr=FileList.begin();itr!=FileList.end();itr++)
817     {
818     ifstream ifs((*itr).c_str());
819     if(!ifs)
820     {
821     cerr << "Cannot open " << *itr <<endl;
822     continue;
823     }
824     allinput << ifs.rdbuf(); //Copy all file contents
825     ifs.close();
826     }
827     Count = Convert(&allinput,pOutStream);
828     return Count;
829     }
830    
831     //INPUT
832     if(FileList.empty())
833     pInStream = NULL;
834     else
835     {
836     if(FileList.size()>1)
837     {
838     //multiple input files
839     vector<string>::iterator itr, tempitr;
840     tempitr = FileList.end();
841     tempitr--;
842     for(itr=FileList.begin();itr!=FileList.end();itr++)
843     {
844     InFilename = *itr;
845     ifstream ifs;
846     if(!OpenAndSetFormat(CommonInFormat, &ifs))
847     continue;
848    
849     if(HasMultipleOutputFiles)
850     {
851     //Batch conversion
852     string batchfile = BatchFileName(OutputFileName,*itr);
853     if(ofs.is_open()) ofs.close();
854     ofs.open(batchfile.c_str(), omode);
855     if(!ofs)
856     {
857     cerr << "Cannot open " << batchfile << endl;
858     return Count;
859     }
860     OutputFileList.push_back(batchfile);
861     SetOutputIndex(0); //reset for new file
862     Count += Convert(&ifs,&ofs);
863     }
864     else
865     {
866     //Aggregation
867     if(itr!=tempitr) SetMoreFilesToCome();
868     Count = Convert(&ifs,pOutStream);
869     }
870     }
871     return Count;
872     }
873     else
874     {
875     //Single input file
876     InFilename = FileList[0];
877     if(!OpenAndSetFormat(CommonInFormat, &is))
878     return 0;
879     pInStream=&is;
880    
881     if(HasMultipleOutputFiles)
882     {
883     //Splitting
884     //Output is put in a temporary stream and written to a file
885     //with an augmenting name only when it contains a valid object.
886     int Indx=1;
887     for(;;)
888     {
889     stringstream ss;
890     SetOutputIndex(0); //reset for new file
891     SetOneObjectOnly();
892    
893     int ThisFileCount = Convert(pInStream,&ss);
894     if(ThisFileCount==0) break;
895     Count+=ThisFileCount;
896    
897     if(ofs.is_open()) ofs.close();
898     string incrfile = IncrementedFileName(OutputFileName,Indx++);
899     ofs.open(incrfile.c_str(), omode);
900     if(!ofs)
901     {
902     cerr << "Cannot write to " << incrfile << endl;
903     return Count;
904     }
905     OutputFileList.push_back(incrfile);
906     ofs << ss.rdbuf();
907     ofs.close();
908     ss.clear();
909     }
910     return Count;
911     }
912     }
913     }
914    
915     //Single input and output files
916     Count = Convert(pInStream,pOutStream);
917     return Count;
918     }
919     catch(...)
920     {
921     cerr << "Conversion failed with an exception. Count=" << Count <<endl;
922     return Count;
923     }
924     }
925    
926     bool OBConversion::OpenAndSetFormat(bool SetFormat, ifstream* is)
927     {
928     //Opens file using InFilename and sets pInFormat if requested
929     if(!SetFormat)
930     {
931     pInFormat = FormatFromExt(InFilename.c_str());
932     if(pInFormat==NULL)
933     {
934     string::size_type pos = InFilename.rfind('.');
935     string ext;
936     if(pos!=string::npos)
937     ext = InFilename.substr(pos);
938     cerr << "Cannot read input format \"" << ext << '\"'
939     << " for file \"" << InFilename << "\"" << endl;
940     return false;
941     }
942     }
943    
944     ios_base::openmode imode;
945     #ifdef ALL_READS_BINARY //Makes unix files compatible with VC++6
946     imode = ios_base::in|ios_base::binary;
947     #else
948     imode = pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
949     #endif
950    
951     is->open(InFilename.c_str(), imode);
952     if(!is->good())
953     {
954     cerr << "Cannot open " << InFilename <<endl;
955     return false;
956     }
957    
958     return true;
959     }
960    
961     ///////////////////////////////////////////////
962     void OBConversion::AddOption(const char* opt, Option_type opttyp, const char* txt)
963     {
964     //Also updates an option
965     if(txt==NULL)
966     OptionsArray[opttyp][opt]=string();
967     else
968     OptionsArray[opttyp][opt]=txt;
969     }
970    
971     const char* OBConversion::IsOption(const char* opt, Option_type opttyp)
972     {
973     //Returns NULL if option not found or a pointer to the text if it is
974     map<string,string>::iterator pos;
975     pos = OptionsArray[opttyp].find(opt);
976     if(pos==OptionsArray[opttyp].end())
977     return NULL;
978     return pos->second.c_str();
979     }
980    
981     bool OBConversion::RemoveOption(const char* opt, Option_type opttyp)
982     {
983     return OptionsArray[opttyp].erase(opt)!=0;//true if was there
984     }
985    
986     void OBConversion::SetOptions(const char* options, Option_type opttyp)
987     {
988     while(*options)
989     {
990     string ch(1, *options++);
991     if(*options=='\"')
992     {
993     string txt = options+1;
994     string::size_type pos = txt.find('\"');
995     if(pos==string::npos)
996     return; //options is illformed
997     txt.erase(pos);
998     OptionsArray[opttyp][ch]= txt;
999     options += pos+2;
1000     }
1001     else
1002     OptionsArray[opttyp][ch] = string();
1003     }
1004     }
1005    
1006     typedef std::map<string,int> OPAMapType;
1007     OPAMapType& OBConversion::OptionParamArray(Option_type typ)
1008     {
1009     static OPAMapType* opa = new OPAMapType[3];
1010     return opa[typ];
1011     }
1012    
1013     void OBConversion::RegisterOptionParam(string name, OBFormat* pFormat,
1014     int numberParams, Option_type typ)
1015     {
1016     //Gives error message if the number of parameters conflicts with an existing registration
1017     map<string,int>::iterator pos;
1018     pos = OptionParamArray(typ).find(name);
1019     if(pos!=OptionParamArray(typ).end())
1020     {
1021     if(pos->second!=numberParams)
1022     {
1023     string description("API");
1024     if(pFormat)
1025     description=pFormat->Description();
1026     cerr << "The number of parameters needed by option \"" << name << "\" in "
1027     << description.substr(0,description.find('\n'))
1028     << " differs from an earlier registration." << endl;
1029     return;
1030     }
1031     }
1032     OptionParamArray(typ)[name] = numberParams;
1033     }
1034    
1035     int OBConversion::GetOptionParams(string name, Option_type typ)
1036     {
1037     //returns the number of parameters registered for the option, or 0 if not found
1038     map<string,int>::iterator pos;
1039     pos = OptionParamArray(typ).find(name);
1040     if(pos==OptionParamArray(typ).end())
1041     return 0;
1042     return pos->second;
1043     }
1044    
1045     }//namespace OpenBabel
1046    
1047     //! \file obconversion.cpp
1048     //! \brief Implementation of OBFormat and OBConversion classes.