ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/obconversion.cpp
Revision: 3057
Committed: Thu Oct 19 20:49:05 2006 UTC (17 years, 11 months ago) by gezelter
File size: 39394 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     obconversion.cpp - Declaration of OBFormat and OBConversion
3    
4     Copyright (C) 2004 by Chris Morley
5     Some portions Copyright (C) 2005 by Geoffrey Hutchison
6    
7     This file is part of the Open Babel project.
8     For more information, see <http://openbabel.sourceforge.net/>
9    
10     This program is free software; you can redistribute it and/or modify
11     it under the terms of the GNU General Public License as published by
12     the Free Software Foundation version 2 of the License.
13    
14     This program is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17     GNU General Public License for more details.
18     ***********************************************************************/
19     // Definition of OBConversion routines
20    
21     #ifdef _WIN32
22 gezelter 3057 #pragma warning (disable : 4786)
23 tim 2440
24 gezelter 3057 //using 'this' in base class initializer
25     #pragma warning (disable : 4355)
26 tim 2440
27 gezelter 3057 #ifdef GUI
28     #undef DATADIR
29     #include "stdafx.hpp" //(includes<windows.h>
30     #endif
31 tim 2440 #endif
32    
33     #include <iostream>
34     #include <fstream>
35     #include <sstream>
36     #include <string>
37     #include <map>
38     //#include <dlfcn.h>
39    
40     #include "obconversion.hpp"
41    
42     #ifdef HAVE_LIBZ
43     #include "zipstream.hpp"
44     #endif
45    
46     #if !HAVE_STRNCASECMP
47     extern "C" int strncasecmp(const char *s1, const char *s2, size_t n);
48     #endif
49    
50     #ifndef BUFF_SIZE
51     #define BUFF_SIZE 32768
52     #endif
53    
54     using namespace std;
55     namespace OpenBabel {
56    
57 gezelter 3057 const char* OBFormat::TargetClassDescription()
58     {
59     //Provides class of default format unless overridden
60     if(OBConversion::GetDefaultFormat())
61     return OBConversion::GetDefaultFormat()->TargetClassDescription();
62     else
63     return "";
64     }
65     const type_info& OBFormat::GetType()
66     {
67     //Provides info on class of default format unless overridden
68     if(OBConversion::GetDefaultFormat())
69     return OBConversion::GetDefaultFormat()->GetType();
70     else
71     return typeid(this); //rubbish return if DefaultFormat not set
72     }
73 tim 2440
74 gezelter 3057 //***************************************************
75 tim 2440
76 gezelter 3057 /** @class OBConversion
77     OBConversion maintains a list of the available formats,
78     provides information on them, and controls the conversion process.
79    
80     A conversion is carried out by the calling routine, usually in a
81     user interface or an application program, making an instance of
82     OBConversion. It is loaded with the in and out formats, any options
83     and (usually) the default streams for input and output. Then either
84     the Convert() function is called, which allows a single input file
85     to be converted, or the extended functionality of FullConvert()
86     is used. This allows multiple input and output files, allowing:
87     - aggregation - the contents of many input files converted
88     and sent to one output file;
89     - splitting - the molecules from one input file sent to
90     separate output files;
91     - batch conversion - each input file converted to an output file.
92    
93     These procedures constitute the "Convert" interface. OBConversion
94     and the user interface or application program do not need to be
95     aware of any other part of OpenBabel - mol.h is not \#included. This
96     allows any chemical object derived from OBBase to be converted;
97     the type of object is decided by the input format class.
98     However,currently, almost all the conversions are for molecules of
99     class OBMol.
100     ///
101     OBConversion can also be used with an "API" interface
102     called from programs which manipulate chemical objects. Input/output is
103     done with the Read() and Write() functions which work with any
104     chemical object, but need to have its type specified. (The
105     ReadMolecule() and WriteMolecule() functions of the format classes
106     can also be used directly.)
107    
108    
109     Example code using OBConversion
110    
111     <b>To read in a molecule, manipulate it and write it out.</b>
112    
113     Set up an istream and an ostream, to and from files or elsewhere.
114     (cin and cout are used in the example). Specify the file formats.
115    
116     @code
117     OBConversion conv(&cin,&cout);
118     if(conv.SetInAndOutFormats("SMI","MOL"))
119     {
120     OBMol mol;
121     if(conv.Read(&mol))
122     ...manipulate molecule
123    
124     conv->Write(&mol);
125     }
126     @endcode
127    
128     A two stage construction is used to allow error handling
129     if the format ID is not recognized. This is necessary now that the
130     formats are dynamic and errors are not caught at compile time.
131     OBConversion::Read() is a templated function so that objects derived
132     from OBBase can also be handled, in addition to OBMol, if the format
133     routines are written appropriately.
134    
135     <b>To make a molecule from a SMILES string.</b>
136     @code
137     std::string SmilesString;
138     OBMol mol;
139     stringstream ss(SmilesString)
140     OBConversion conv(&ss);
141     if(conv.SetInFormat("smi") && conv.Read(&mol))
142     ...
143     @endcode
144    
145     <b>To do a file conversion without manipulating the molecule.</b>
146    
147     @code
148     #include "obconversion.hpp" //mol.h is not needed
149     ...set up an istream is and an ostream os
150     OBConversion conv(&is,&os);
151     if(conv.SetInAndOutFormats("SMI","MOL"))
152     {
153     conv.SetOptions("h"); //Optional; (h adds expicit hydrogens)
154     conv.Convert();
155     }
156     @endcode
157    
158     <b>To add automatic format conversion to an existing program.</b>
159    
160     The existing program inputs from the file identified by the
161     const char* filename into the istream is. The file is assumed to have
162     a format ORIG, but otherformats, identified by their file extensions,
163     can now be used.
164    
165     @code
166     ifstream ifs(filename); //Original code
167    
168     OBConversion conv;
169     OBFormat* inFormat = conv.FormatFromExt(filename);
170     OBFormat* outFormat = conv.GetFormat("ORIG");
171     istream* pIn = &ifs;
172     stringstream newstream;
173     if(inFormat && outFormat)
174     {
175     conv.SetInAndOutFormats(inFormat,outFormat);
176     conv.Convert(pIn,&newstream);
177     pIn=&newstream;
178     }
179     //else error; new features not available; fallback to original functionality
180    
181     ...Carry on with original code using pIn
182     @endcode
183    
184     In Windows a degree of independence from OpenBabel can be achieved using DLLs.
185     This code would be linked with obconv.lib.
186     At runtime the following DLLs would be in the executable directory:
187     obconv.dll, obdll.dll, one or more *.obf format files.
188     */
189    
190 tim 2440 int OBConversion::FormatFilesLoaded = 0;
191    
192 gezelter 3057 OBFormat* OBConversion::pDefaultFormat=NULL;
193 tim 2440
194 gezelter 3057 OBConversion::OBConversion(istream* is, ostream* os) :
195     pInFormat(NULL),pOutFormat(NULL), Index(0), StartNumber(1),
196     EndNumber(0), Count(-1), m_IsLast(true), MoreFilesToCome(false),
197     OneObjectOnly(false), pOb1(NULL), pAuxConv(NULL)
198     {
199     pInStream=is;
200     pOutStream=os;
201     if (FormatFilesLoaded == 0)
202     FormatFilesLoaded = LoadFormatFiles();
203 tim 2440
204 gezelter 3057 //These options take a parameter
205     RegisterOptionParam("f", NULL, 1,GENOPTIONS);
206     RegisterOptionParam("l", NULL, 1,GENOPTIONS);
207     }
208 tim 2440
209 gezelter 3057 ///This static function returns a reference to the FormatsMap
210     ///which, because it is a static local variable is constructed only once.
211     ///This fiddle is to avoid the "static initialization order fiasco"
212     ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
213     FMapType& OBConversion::FormatsMap()
214     {
215     static FMapType* fm = NULL;
216     if (!fm)
217     fm = new FMapType;
218     return *fm;
219     }
220 tim 2440
221 gezelter 3057 ///This static function returns a reference to the FormatsMIMEMap
222     ///which, because it is a static local variable is constructed only once.
223     ///This fiddle is to avoid the "static initialization order fiasco"
224     ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
225     FMapType& OBConversion::FormatsMIMEMap()
226     {
227     static FMapType* fm = NULL;
228     if (!fm)
229     fm = new FMapType;
230     return *fm;
231     }
232 tim 2440
233 gezelter 3057 /////////////////////////////////////////////////
234     OBConversion::OBConversion(const OBConversion& o)
235     {
236     Index = o.Index;
237     Count = o.Count;
238     StartNumber = o.StartNumber;
239     EndNumber = o.EndNumber;
240     pInFormat = o.pInFormat;
241     pInStream = o.pInStream;
242     pOutFormat = o.pOutFormat;
243     pOutStream = o.pOutStream;
244     OptionsArray[0]= o.OptionsArray[0];
245     OptionsArray[1]= o.OptionsArray[1];
246     OptionsArray[2]= o.OptionsArray[2];
247     InFilename = o.InFilename;
248     rInpos = o.rInpos;
249     wInpos = o.wInpos;
250     rInlen = o.rInlen;
251     wInlen = o.wInlen;
252     m_IsLast = o.m_IsLast;
253     MoreFilesToCome= o.MoreFilesToCome;
254     OneObjectOnly = o.OneObjectOnly;
255     pOb1 = o.pOb1;
256     ReadyToInput = o.ReadyToInput;
257 tim 2440
258 gezelter 3057 pAuxConv = NULL;
259     }
260     ////////////////////////////////////////////////
261 tim 2440
262 gezelter 3057 OBConversion::~OBConversion()
263     {
264     if(pAuxConv!=this)
265     delete pAuxConv;
266     }
267     //////////////////////////////////////////////////////
268 tim 2440
269 gezelter 3057 /// Class information on formats is collected by making an instance of the class
270     /// derived from OBFormat(only one is usually required). RegisterFormat() is called
271     /// from its constructor.
272     ///
273     /// If the compiled format is stored separately, like in a DLL or shared library,
274     /// the initialization code makes an instance of the imported OBFormat class.
275     int OBConversion::RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME)
276     {
277     FormatsMap()[ID] = pFormat;
278     if (MIME)
279     FormatsMIMEMap()[MIME] = pFormat;
280     if(pFormat->Flags() & DEFAULTFORMAT)
281     pDefaultFormat=pFormat;
282     return FormatsMap().size();
283     }
284 tim 2440
285 gezelter 3057 //////////////////////////////////////////////////////
286     int OBConversion::LoadFormatFiles()
287     {
288     int count=0;
289     // if(FormatFilesLoaded) return 0;
290     // FormatFilesLoaded=true; //so will load files only once
291 tim 2440 #ifdef USING_DYNAMIC_LIBS
292 gezelter 3057 //Depending on availablilty, look successively in
293     //FORMATFILE_DIR, executable directory,or current directory
294     string TargetDir;
295     #ifdef FORMATFILE_DIR
296     TargetDir="FORMATFILE_DIR";
297     #endif
298 tim 2440
299 gezelter 3057 DLHandler::getConvDirectory(TargetDir);
300 tim 2440
301 gezelter 3057 vector<string> files;
302     if(!DLHandler::findFiles(files,DLHandler::getFormatFilePattern(),TargetDir)) return 0;
303 tim 2440
304 gezelter 3057 vector<string>::iterator itr;
305     for(itr=files.begin();itr!=files.end();itr++)
306     {
307     if(DLHandler::openLib(*itr))
308     count++;
309     else
310     cerr << *itr << " did not load properly" << endl;
311     }
312 tim 2440 #else
313 gezelter 3057 count = 1; //avoid calling this function several times
314 tim 2440 #endif //USING_DYNAMIC_LIBS
315 gezelter 3057 return count;
316     }
317 tim 2440
318 gezelter 3057 /**
319     *Returns the ID + the first line of the description in str
320     *and a pointer to the format in pFormat.
321     *If called with str==NULL the first format is returned;
322     *subsequent formats are returned by calling with str!=NULL and the previous value of itr
323     *returns false, and str and pFormat NULL, when there are no more formats.
324     *Use like:
325     *@code
326     * const char* str=NULL;
327     * Formatpos pos;
328     * OBConversion conv; // dummy to make sure static data is available
329     * while(OBConversion::GetNextFormat(pos,str,pFormat))
330     * {
331     * use str and pFormat
332     * }
333     *@endcode
334     *
335     * NOTE: Because of dynamic loading problems, it is usually necessary to
336     * declare a "dummy" OBConversion object to access this static method.
337     * (Not elegant, but will hopefully be fixed in the future.)
338     */
339     bool OBConversion::GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat)
340     {
341 tim 2440
342 gezelter 3057 pFormat = NULL;
343     if(str==NULL)
344     itr = FormatsMap().begin();
345     else
346     itr++;
347     if(itr == FormatsMap().end())
348     {
349     str=NULL; pFormat=NULL;
350     return false;
351     }
352     static string s;
353     s =itr->first;
354     pFormat = itr->second;
355     if(pFormat)
356     {
357     string description(pFormat->Description());
358     s += " -- ";
359     s += description.substr(0,description.find('\n'));
360     }
361 tim 2440
362 gezelter 3057 if(pFormat->Flags() & NOTWRITABLE) s+=" [Read-only]";
363     if(pFormat->Flags() & NOTREADABLE) s+=" [Write-only]";
364 tim 2440
365 gezelter 3057 str = s.c_str();
366     return true;
367     }
368 tim 2440
369 gezelter 3057 //////////////////////////////////////////////////////
370     /// Sets the formats from their ids, e g CML.
371     /// If inID is NULL, the input format is left unchanged. Similarly for outID
372     /// Returns true if both formats have been successfully set at sometime
373     bool OBConversion::SetInAndOutFormats(const char* inID, const char* outID)
374     {
375     return SetInFormat(inID) && SetOutFormat(outID);
376     }
377     //////////////////////////////////////////////////////
378 tim 2440
379 gezelter 3057 bool OBConversion::SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut)
380     {
381     return SetInFormat(pIn) && SetOutFormat(pOut);
382     }
383     //////////////////////////////////////////////////////
384     bool OBConversion::SetInFormat(OBFormat* pIn)
385     {
386     if(pIn==NULL)
387     return true;
388     pInFormat=pIn;
389     return !(pInFormat->Flags() & NOTREADABLE);
390     }
391     //////////////////////////////////////////////////////
392     bool OBConversion::SetOutFormat(OBFormat* pOut)
393     {
394     pOutFormat=pOut;
395     return !(pOutFormat->Flags() & NOTWRITABLE);
396     }
397     //////////////////////////////////////////////////////
398     bool OBConversion::SetInFormat(const char* inID)
399     {
400     if(inID)
401     pInFormat = FindFormat(inID);
402     return pInFormat && !(pInFormat->Flags() & NOTREADABLE);
403     }
404     //////////////////////////////////////////////////////
405 tim 2440
406 gezelter 3057 bool OBConversion::SetOutFormat(const char* outID)
407     {
408     if(outID)
409     pOutFormat= FindFormat(outID);
410     return pOutFormat && !(pOutFormat->Flags() & NOTWRITABLE);
411     }
412 tim 2440
413 gezelter 3057 //////////////////////////////////////////////////////
414     int OBConversion::Convert(istream* is, ostream* os)
415     {
416     if(is) pInStream=is;
417     if(os) pOutStream=os;
418     ostream* pOrigOutStream = pOutStream;
419 tim 2440
420     #ifdef HAVE_LIBZ
421 gezelter 3057 zlib_stream::zip_istream zIn(*pInStream);
422     if(zIn.is_gzip())
423     pInStream = &zIn;
424 tim 2440
425 gezelter 3057 zlib_stream::zip_ostream zOut(*pOutStream);
426     if(IsOption("z",GENOPTIONS))
427     {
428     // make sure to output the header
429     zOut.make_gzip();
430     pOutStream = &zOut;
431     }
432 tim 2440 #endif
433    
434 gezelter 3057 int count = Convert();
435     pOutStream = pOrigOutStream;
436     return count;
437 tim 2440
438 gezelter 3057 }
439 tim 2440
440 gezelter 3057 ////////////////////////////////////////////////////
441     /// Actions the "convert" interface.
442     /// Calls the OBFormat class's ReadMolecule() which
443     /// - makes a new chemical object of its chosen type (e.g. OBMol)
444     /// - reads an object from the input file
445     /// - subjects the chemical object to 'transformations' as specified by the Options
446     /// - calls AddChemObject to add it to a buffer. The previous object is first output
447     /// via the output Format's WriteMolecule(). During the output process calling
448     /// IsFirst() and GetIndex() (the number of objects including the current one already output.
449     /// allows more control, for instance writing \<cml\> and \</cml\> tags for multiple molecule outputs only.
450     ///
451     /// AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation())
452     /// or if the number of the object is outside the range defined by
453     /// StartNumber and EndNumber.This means the start and end counts apply to all chemical objects
454     /// found whether or not they are output.
455     ///
456     /// If ReadMolecule returns false the input conversion loop is exited.
457     ///
458     int OBConversion::Convert()
459     {
460     if(pInStream==NULL || pOutStream==NULL)
461     {
462     cerr << "input or output stream not set" << endl;
463     return 0;
464     }
465 tim 2440
466 gezelter 3057 if(!pInFormat) return 0;
467     Count=0;//number objects processed
468 tim 2440
469 gezelter 3057 if(!SetStartAndEnd())
470     return 0;
471 tim 2440
472 gezelter 3057 ReadyToInput=true;
473     m_IsLast=false;
474     pOb1=NULL;
475     wInlen=0;
476 tim 2440
477 gezelter 3057 //Input loop
478     while(ReadyToInput && pInStream->peek() != EOF && pInStream->good())
479     {
480     if(pInStream==&cin)
481     {
482     if(pInStream->peek()=='\n')
483     break;
484     }
485     else
486     rInpos = pInStream->tellg();
487 tim 2440
488 gezelter 3057 bool ret=false;
489     try
490     {
491     ret = pInFormat->ReadChemObject(this);
492     }
493     catch(...)
494     {
495     if(!IsOption("e", GENOPTIONS) && !OneObjectOnly)
496     throw;
497     }
498 tim 2440
499 gezelter 3057 if(!ret)
500     {
501     //error or termination request: terminate unless
502     // -e option requested and sucessfully can skip past current object
503     if(!IsOption("e", GENOPTIONS) || pInFormat->SkipObjects(0,this)!=1)
504     break;
505     }
506     if(OneObjectOnly)
507     break;
508     // Objects supplied to AddChemObject() which may output them after a delay
509     //ReadyToInput may be made false in AddChemObject()
510     // by WriteMolecule() returning false or by Count==EndNumber
511     }
512 tim 2440
513 gezelter 3057 //Output last object
514     //if(!MoreFilesToCome)
515     // m_IsLast=true;
516     m_IsLast= !MoreFilesToCome;
517 tim 2440
518 gezelter 3057 if(pOutFormat)
519     if(!pOutFormat->WriteChemObject(this))
520     Index--;
521 tim 2440
522 gezelter 3057 //Put AddChemObject() into non-queue mode
523     Count= -1;
524     EndNumber=StartNumber=0; pOb1=NULL;//leave tidy
525     MoreFilesToCome=false;
526     OneObjectOnly=false;
527 tim 2440
528 gezelter 3057 return Index; //The number actually output
529     }
530     //////////////////////////////////////////////////////
531     bool OBConversion::SetStartAndEnd()
532     {
533     int TempStartNumber=0;
534     const char* p = IsOption("f",GENOPTIONS);
535     if(p)
536     {
537     StartNumber=atoi(p);
538     if(StartNumber>1)
539     {
540     TempStartNumber=StartNumber;
541     //Try to skip objects now
542     int ret = pInFormat->SkipObjects(StartNumber-1,this);
543     if(ret==-1) //error
544     return false;
545     if(ret==1) //success:objects skipped
546     {
547     Count = StartNumber-1;
548     StartNumber=0;
549     }
550     }
551     }
552 tim 2440
553 gezelter 3057 p = IsOption("l",GENOPTIONS);
554     if(p)
555     {
556     EndNumber=atoi(p);
557     if(TempStartNumber && EndNumber<TempStartNumber)
558     EndNumber=TempStartNumber;
559     }
560 tim 2440
561 gezelter 3057 return true;
562     }
563 tim 2440
564 gezelter 3057 //////////////////////////////////////////////////////
565     /// Retrieves an object stored by AddChemObject() during output
566     OBBase* OBConversion::GetChemObject()
567     {
568     Index++;
569     return pOb1;
570     }
571 tim 2440
572 gezelter 3057 //////////////////////////////////////////////////////
573     /// Called by ReadMolecule() to deliver an object it has read from an input stream.
574     /// Used in two modes:
575     /// - When Count is negative it is left negative and the routine is just a store
576     /// for an OBBase object. The negative value returned tells the calling
577     /// routine that no more objects are required.
578     /// - When count is >=0, probably set by Convert(), it acts as a queue of 2:
579     /// writing the currently stored value before accepting the supplied one. This delay
580     /// allows output routines to respond differently when the written object is the last.
581     /// Count is incremented with each call, even if pOb=NULL.
582     /// Objects are not added to the queue if the count is outside the range
583     /// StartNumber to EndNumber. There is no upper limit if EndNumber is zero.
584     /// The return value is the number of objects, including this one, which have been
585     /// input (but not necessarily output).
586     int OBConversion::AddChemObject(OBBase* pOb)
587     {
588     if(Count<0)
589     {
590     pOb1=pOb;
591     return Count;
592     }
593     Count++;
594     if(Count>=(int)StartNumber)//keeps reading objects but does nothing with them
595     {
596     if(Count==(int)EndNumber)
597     ReadyToInput=false; //stops any more objects being read
598 tim 2440
599 gezelter 3057 rInlen = pInStream->tellg() - rInpos;
600 tim 2440
601 gezelter 3057 if(pOb)
602     {
603     if(pOb1 && pOutFormat) //see if there is an object ready to be output
604     {
605     //Output object
606     if (!pOutFormat->WriteChemObject(this))
607     {
608     //faultly write, so finish
609     --Index;
610     ReadyToInput=false;
611     return Count;
612     }
613     //Stop after writing with single object output files
614     if(pOutFormat->Flags() & WRITEONEONLY)
615     {
616     ReadyToInput = false;
617     pOb1 = NULL;
618 tim 2440
619 gezelter 3057 // if there are more molecules to output, send a warning
620     cerr << "WARNING: You are attempting to convert a file"
621     << " with multiple molecule entries into a format"
622     << " which can only store one molecule. The current"
623     << " output will only contain the first molecule.\n\n";
624 tim 2440
625 gezelter 3057 cerr << "To convert this input into multiple separate"
626     << " output files, with one molecule per file, try:\n"
627     << "babel [input] [ouptut] -m\n\n";
628 tim 2440
629 gezelter 3057 cerr << "To pick one particular molecule"
630     << " (e.g., molecule 4), try:\n"
631     << "babel -f 4 -l 4 [input] [output]" << endl;
632 tim 2440
633 gezelter 3057 return true;
634     }
635     }
636     pOb1=pOb;
637     wInpos = rInpos; //Save the position in the input file to be accessed when writing it
638     wInlen = rInlen;
639     }
640     }
641     return Count;
642     }
643     //////////////////////////////////////////////////////
644     int OBConversion::GetOutputIndex() const
645     {
646     //The number of objects actually written already from this instance of OBConversion
647     return Index;
648     }
649     void OBConversion::SetOutputIndex(int indx)
650     {
651     Index=indx;
652     }
653     //////////////////////////////////////////////////////
654     OBFormat* OBConversion::FindFormat(const char* ID)
655     {
656     //Case insensitive
657     if(FormatsMap().find(ID) == FormatsMap().end())
658     return NULL;
659     else
660     return FormatsMap()[ID];
661     }
662 tim 2440
663 gezelter 3057 //////////////////////////////////////////////////
664     const char* OBConversion::GetTitle() const
665     {
666     return(InFilename.c_str());
667     }
668    
669     void OBConversion::SetMoreFilesToCome()
670     {
671     MoreFilesToCome=true;
672     }
673    
674     void OBConversion::SetOneObjectOnly()
675     {
676     OneObjectOnly=true;
677     m_IsLast=true;
678     }
679    
680     /////////////////////////////////////////////////////////
681     OBFormat* OBConversion::FormatFromExt(const char* filename)
682     {
683     string file = filename;
684     size_t extPos = file.rfind(".");
685    
686     if(extPos!=string::npos)
687     {
688     // only do this if we actually can read .gz files
689 tim 2440 #ifdef HAVE_LIBZ
690 gezelter 3057 if (file.substr(extPos,3) == ".gz")
691     {
692     file.erase(extPos);
693     extPos = file.rfind(".");
694     if (extPos!=string::npos)
695     return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
696     }
697     else
698 tim 2440 #endif
699 gezelter 3057 return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
700     }
701     return NULL; //if no extension
702     }
703 tim 2440
704 gezelter 3057 OBFormat* OBConversion::FormatFromMIME(const char* MIME)
705     {
706     if(FormatsMIMEMap().find(MIME) == FormatsMIMEMap().end())
707     return NULL;
708     else
709     return FormatsMIMEMap()[MIME];
710     }
711 tim 2440
712 gezelter 3057 bool OBConversion::Read(OBBase* pOb, std::istream* pin)
713     {
714     if(pin)
715     pInStream=pin;
716     if(!pInFormat) return false;
717 tim 2440
718     #ifdef HAVE_LIBZ
719 gezelter 3057 zlib_stream::zip_istream zIn(*pInStream);
720     if(zIn.is_gzip())
721     pInStream = &zIn;
722 tim 2440 #endif
723    
724 gezelter 3057 return pInFormat->ReadMolecule(pOb, this);
725     }
726     //////////////////////////////////////////////////
727     /// Writes the object pOb but does not delete it afterwards.
728     /// The output stream is lastingly changed if pos is not NULL
729     /// Returns true if successful.
730     bool OBConversion::Write(OBBase* pOb, ostream* pos)
731     {
732     if(pos)
733     pOutStream=pos;
734     if(!pOutFormat) return false;
735 tim 2440
736 gezelter 3057 ostream* pOrigOutStream = pOutStream;
737 tim 2440 #ifdef HAVE_LIBZ
738 gezelter 3057 #ifndef _WIN32
739     zlib_stream::zip_ostream zOut(*pOutStream);
740     if(IsOption("z",GENOPTIONS))
741     {
742     // make sure to output the header
743     zOut.make_gzip();
744     pOutStream = &zOut;
745     }
746 tim 2440 #endif
747 gezelter 3057 #endif
748 tim 2440
749 gezelter 3057 bool ret = pOutFormat->WriteMolecule(pOb,this);
750     pOutStream = pOrigOutStream;
751     return ret;
752     }
753 tim 2440
754 gezelter 3057 //////////////////////////////////////////////////
755     /// Writes the object pOb but does not delete it afterwards.
756     /// The output stream not changed (since we cannot write to this string later)
757     /// Returns true if successful.
758     std::string OBConversion::WriteString(OBBase* pOb)
759     {
760     ostream *oldStream = pOutStream; // save old output
761     stringstream newStream;
762 tim 2440
763 gezelter 3057 if(pOutFormat)
764     {
765     Write(pOb, &newStream);
766     }
767     pOutStream = oldStream;
768 tim 2440
769 gezelter 3057 return newStream.str();
770     }
771 tim 2440
772 gezelter 3057 //////////////////////////////////////////////////
773     /// Writes the object pOb but does not delete it afterwards.
774     /// The output stream is lastingly changed to point to the file
775     /// Returns true if successful.
776     bool OBConversion::WriteFile(OBBase* pOb, string filePath)
777     {
778     if(!pOutFormat) return false;
779 tim 2440
780 gezelter 3057 ofstream *ofs = new ofstream;
781     ios_base::openmode omode =
782     pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
783 tim 2440
784 gezelter 3057 ofs->open(filePath.c_str(),omode);
785     if(!ofs || !ofs->good())
786     {
787     cerr << "Cannot write to " << filePath <<endl;
788     return false;
789     }
790 tim 2440
791 gezelter 3057 return Write(pOb, ofs);
792     }
793 tim 2440
794 gezelter 3057 ////////////////////////////////////////////
795     bool OBConversion::ReadString(OBBase* pOb, std::string input)
796     {
797     stringstream *pin = new stringstream(input);
798     return Read(pOb,pin);
799     }
800 tim 2440
801    
802 gezelter 3057 ////////////////////////////////////////////
803     bool OBConversion::ReadFile(OBBase* pOb, std::string filePath)
804     {
805     if(!pInFormat) return false;
806 tim 2440
807 gezelter 3057 ifstream *ifs = new ifstream;
808     ios_base::openmode imode =
809     pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
810 tim 2440
811 gezelter 3057 ifs->open(filePath.c_str(),imode);
812     if(!ifs || !ifs->good())
813     {
814     cerr << "Cannot read from " << filePath << endl;
815     return false;
816     }
817 tim 2440
818 gezelter 3057 return Read(pOb,ifs);
819     }
820 tim 2440
821    
822 gezelter 3057 ////////////////////////////////////////////
823     const char* OBConversion::Description()
824     {
825     return "Conversion options\n \
826 tim 2440 -f <#> Start import at molecule # specified\n \
827     -l <#> End import at molecule # specified\n \
828     -t All input files describe a single molecule\n \
829     -e Continue with next object after error, if possible\n \
830     -z Compress the output with gzip\n";
831 gezelter 3057 }
832 tim 2440
833 gezelter 3057 ////////////////////////////////////////////
834     bool OBConversion::IsLast()
835     {
836     return m_IsLast;
837     }
838     ////////////////////////////////////////////
839     bool OBConversion::IsFirstInput()
840     {
841     return (Count==0);
842     }
843 tim 2440
844 gezelter 3057 /////////////////////////////////////////////////
845     string OBConversion::BatchFileName(string& BaseName, string& InFile)
846     {
847     //Replaces * in BaseName by InFile without extension and path
848     string ofname(BaseName);
849     string::size_type pos = ofname.find('*');
850     if(pos != string::npos)
851     {
852     //Replace * by input filename
853     string::size_type posdot= InFile.rfind('.');
854     if(posdot == string::npos)
855     posdot = InFile.size();
856     else {
857     #ifdef HAVE_LIBZ
858     if (InFile.substr(posdot,3) == ".gz")
859     {
860     InFile.erase(posdot);
861     posdot = InFile.rfind('.');
862     if (posdot == string::npos)
863     posdot = InFile.size();
864     }
865     #endif
866     }
867 tim 2440
868 gezelter 3057 int posname= InFile.find_last_of("\\/");
869     ofname.replace(pos,1, InFile, posname+1, posdot-posname-1);
870     }
871     return ofname;
872     }
873 tim 2440
874 gezelter 3057 ////////////////////////////////////////////////
875     string OBConversion::IncrementedFileName(string& BaseName, const int Count)
876     {
877     //Replaces * in BaseName by Count
878     string ofname(BaseName);
879     int pos = ofname.find('*');
880     if(pos>=0)
881     {
882     char num[33];
883     snprintf(num, 33, "%d", Count);
884     ofname.replace(pos,1, num);
885     }
886     return ofname;
887     }
888     ////////////////////////////////////////////////////
889 tim 2440
890 gezelter 3057 /**
891     Makes input and output streams, and carries out normal,
892     batch, aggregation, and splitting conversion.
893 tim 2440
894 gezelter 3057 Normal
895     Done if FileList contains a single file name and OutputFileName
896     does not contain a *.
897    
898     Aggregation
899     Done if FileList has more than one file name and OutputFileName does
900     not contain * . All the chemical objects are converted and sent
901     to the single output file.
902 tim 2440
903 gezelter 3057 Splitting
904     Done if FileList contains a single file name and OutputFileName
905     contains a * . Each chemical object in the input file is converted
906     and sent to a separate file whose name is OutputFileName with the
907     * replaced by 1, 2, 3, etc.
908     For example, if OutputFileName is NEW*.smi then the output files are
909     NEW1.smi, NEW2.smi, etc.
910 tim 2440
911 gezelter 3057 Batch Conversion
912     Done if FileList has more than one file name and contains a * .
913     Each input file is converted to an output file whose name is
914     OutputFileName with the * replaced by the inputfile name without its
915     path and extension.
916     So if the input files were inpath/First.cml, inpath/Second.cml
917     and OutputFileName was NEW*.mol, the output files would be
918     NEWFirst.mol, NEWSecond.mol.
919 tim 2440
920 gezelter 3057 If FileList is empty, the input stream that has already been set
921     (usually in the constructor) is used. If OutputFileName is empty,
922     the output stream already set is used.
923 tim 2440
924 gezelter 3057 On exit, OutputFileList contains the names of the output files.
925 tim 2440
926 gezelter 3057 Returns the number of Chemical objects converted.
927     */
928     int OBConversion::FullConvert(std::vector<std::string>& FileList, std::string& OutputFileName,
929     std::vector<std::string>& OutputFileList)
930     {
931     ostream* pOs=NULL;
932     istream* pIs=NULL;
933     ifstream is;
934     ofstream os;
935     bool HasMultipleOutputFiles=false;
936     int Count=0;
937     bool CommonInFormat = pInFormat ? true:false; //whether set in calling routine
938     ios_base::openmode omode =
939     pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
940     try
941     {
942     ofstream ofs;
943 tim 2440
944 gezelter 3057 //OUTPUT
945     if(OutputFileName.empty())
946     pOs = NULL; //use existing stream
947     else
948     {
949     if(OutputFileName.find_first_of('*')!=string::npos) HasMultipleOutputFiles = true;
950     if(!HasMultipleOutputFiles)
951     {
952     os.open(OutputFileName.c_str(),omode);
953     if(!os)
954     {
955     cerr << "Cannot write to " << OutputFileName <<endl;
956     return 0;
957     }
958     OutputFileList.push_back(OutputFileName);
959     pOs=&os;
960     }
961     }
962 tim 2440
963 gezelter 3057 if(IsOption("t",GENOPTIONS))
964     {
965     //Concatenate input file option (multiple files, single molecule)
966     if(HasMultipleOutputFiles)
967     {
968     cerr << "Cannot have multiple output files and also concatenate input files (-t option)" <<endl;
969     return 0;
970     }
971 tim 2440
972 gezelter 3057 stringstream allinput;
973     vector<string>::iterator itr;
974     for(itr=FileList.begin();itr!=FileList.end();itr++)
975     {
976     ifstream ifs((*itr).c_str());
977     if(!ifs)
978     {
979     cerr << "Cannot open " << *itr <<endl;
980     continue;
981     }
982     allinput << ifs.rdbuf(); //Copy all file contents
983     ifs.close();
984     }
985     Count = Convert(&allinput,pOs);
986     return Count;
987     }
988 tim 2440
989 gezelter 3057 //INPUT
990     if(FileList.empty())
991     pIs = NULL;
992     else
993     {
994     if(FileList.size()>1)
995     {
996     //multiple input files
997     vector<string>::iterator itr, tempitr;
998     tempitr = FileList.end();
999     tempitr--;
1000     for(itr=FileList.begin();itr!=FileList.end();itr++)
1001     {
1002     InFilename = *itr;
1003     ifstream ifs;
1004     if(!OpenAndSetFormat(CommonInFormat, &ifs))
1005     continue;
1006 tim 2440
1007 gezelter 3057 if(HasMultipleOutputFiles)
1008     {
1009     //Batch conversion
1010     string batchfile = BatchFileName(OutputFileName,*itr);
1011     if(ofs.is_open()) ofs.close();
1012     ofs.open(batchfile.c_str(), omode);
1013     if(!ofs)
1014     {
1015     cerr << "Cannot open " << batchfile << endl;
1016     return Count;
1017     }
1018     OutputFileList.push_back(batchfile);
1019     SetOutputIndex(0); //reset for new file
1020     Count += Convert(&ifs,&ofs);
1021     }
1022     else
1023     {
1024     //Aggregation
1025     if(itr!=tempitr) SetMoreFilesToCome();
1026     Count = Convert(&ifs,pOs);
1027     }
1028     }
1029     return Count;
1030     }
1031     else
1032     {
1033     //Single input file
1034     InFilename = FileList[0];
1035     if(!OpenAndSetFormat(CommonInFormat, &is))
1036     return 0;
1037     pIs=&is;
1038 tim 2440
1039 gezelter 3057 if(HasMultipleOutputFiles)
1040     {
1041     //Splitting
1042     //Output is put in a temporary stream and written to a file
1043     //with an augmenting name only when it contains a valid object.
1044     int Indx=1;
1045     SetInStream(&is);
1046     #ifdef HAVE_LIBZ
1047     zlib_stream::zip_istream zIn(is);
1048     #endif
1049     for(;;)
1050     {
1051     stringstream ss;
1052     SetOutStream(&ss);
1053     SetOutputIndex(0); //reset for new file
1054     SetOneObjectOnly();
1055 tim 2440
1056 gezelter 3057 #ifdef HAVE_LIBZ
1057     if(Indx==1 && zIn.is_gzip())
1058     SetInStream(&zIn);
1059     #endif
1060 tim 2440
1061 gezelter 3057 int ThisFileCount = Convert();
1062     if(ThisFileCount==0) break;
1063     Count+=ThisFileCount;
1064 tim 2440
1065 gezelter 3057 if(ofs.is_open()) ofs.close();
1066     string incrfile = IncrementedFileName(OutputFileName,Indx++);
1067     ofs.open(incrfile.c_str(), omode);
1068     if(!ofs)
1069     {
1070     cerr << "Cannot write to " << incrfile << endl;
1071     return Count;
1072     }
1073    
1074     OutputFileList.push_back(incrfile);
1075     #ifdef HAVE_LIBZ
1076     if(IsOption("z",GENOPTIONS))
1077     {
1078     zlib_stream::zip_ostream zOut(ofs);
1079     // make sure to output the header
1080     zOut.make_gzip();
1081     zOut << ss.rdbuf();
1082     }
1083     else
1084     #endif
1085     ofs << ss.rdbuf();
1086 tim 2440
1087 gezelter 3057 ofs.close();
1088     ss.clear();
1089     }
1090     return Count;
1091     }
1092     }
1093     }
1094 tim 2440
1095 gezelter 3057 //Single input and output files
1096     Count = Convert(pIs,pOs);
1097     return Count;
1098     }
1099     catch(...)
1100     {
1101     cerr << "Conversion failed with an exception. Count=" << Count <<endl;
1102     return Count;
1103     }
1104     }
1105    
1106     bool OBConversion::OpenAndSetFormat(bool SetFormat, ifstream* is)
1107     {
1108     //Opens file using InFilename and sets pInFormat if requested
1109     if(!SetFormat)
1110     {
1111     pInFormat = FormatFromExt(InFilename.c_str());
1112     if(pInFormat==NULL)
1113     {
1114     string::size_type pos = InFilename.rfind('.');
1115     string ext;
1116     if(pos!=string::npos)
1117     ext = InFilename.substr(pos);
1118     cerr << "Cannot read input format \"" << ext << '\"'
1119     << " for file \"" << InFilename << "\"" << endl;
1120     return false;
1121     }
1122     }
1123    
1124     ios_base::openmode imode;
1125 tim 2440 #ifdef ALL_READS_BINARY //Makes unix files compatible with VC++6
1126 gezelter 3057 imode = ios_base::in|ios_base::binary;
1127 tim 2440 #else
1128 gezelter 3057 imode = pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
1129 tim 2440 #endif
1130    
1131 gezelter 3057 is->open(InFilename.c_str(), imode);
1132     if(!is->good())
1133     {
1134     cerr << "Cannot open " << InFilename <<endl;
1135     return false;
1136     }
1137 tim 2440
1138 gezelter 3057 return true;
1139     }
1140 tim 2440
1141 gezelter 3057 ///////////////////////////////////////////////
1142     void OBConversion::AddOption(const char* opt, Option_type opttyp, const char* txt)
1143     {
1144     //Also updates an option
1145     if(txt==NULL)
1146     OptionsArray[opttyp][opt]=string();
1147     else
1148     OptionsArray[opttyp][opt]=txt;
1149     }
1150 tim 2440
1151 gezelter 3057 const char* OBConversion::IsOption(const char* opt, Option_type opttyp)
1152     {
1153     //Returns NULL if option not found or a pointer to the text if it is
1154     map<string,string>::iterator pos;
1155     pos = OptionsArray[opttyp].find(opt);
1156     if(pos==OptionsArray[opttyp].end())
1157     return NULL;
1158     return pos->second.c_str();
1159     }
1160 tim 2440
1161 gezelter 3057 bool OBConversion::RemoveOption(const char* opt, Option_type opttyp)
1162     {
1163     return OptionsArray[opttyp].erase(opt)!=0;//true if was there
1164     }
1165 tim 2440
1166 gezelter 3057 void OBConversion::SetOptions(const char* options, Option_type opttyp)
1167     {
1168     while(*options)
1169     {
1170     string ch(1, *options++);
1171     if(*options=='\"')
1172     {
1173     string txt = options+1;
1174     string::size_type pos = txt.find('\"');
1175     if(pos==string::npos)
1176     return; //options is illformed
1177     txt.erase(pos);
1178     OptionsArray[opttyp][ch]= txt;
1179     options += pos+2;
1180     }
1181     else
1182     OptionsArray[opttyp][ch] = string();
1183     }
1184     }
1185 tim 2440
1186 gezelter 3057 typedef std::map<string,int> OPAMapType;
1187     OPAMapType& OBConversion::OptionParamArray(Option_type typ)
1188     {
1189     static OPAMapType* opa = NULL;
1190     if (!opa)
1191     opa = new OPAMapType[3];
1192     return opa[typ];
1193     }
1194 tim 2440
1195 gezelter 3057 void OBConversion::RegisterOptionParam(string name, OBFormat* pFormat,
1196     int numberParams, Option_type typ)
1197     {
1198     //Gives error message if the number of parameters conflicts with an existing registration
1199     map<string,int>::iterator pos;
1200     pos = OptionParamArray(typ).find(name);
1201     if(pos!=OptionParamArray(typ).end())
1202     {
1203     if(pos->second!=numberParams)
1204     {
1205     string description("API");
1206     if(pFormat)
1207     description=pFormat->Description();
1208     cerr << "The number of parameters needed by option \"" << name << "\" in "
1209     << description.substr(0,description.find('\n'))
1210     << " differs from an earlier registration." << endl;
1211     return;
1212     }
1213     }
1214     OptionParamArray(typ)[name] = numberParams;
1215     }
1216 tim 2440
1217 gezelter 3057 int OBConversion::GetOptionParams(string name, Option_type typ)
1218     {
1219     //returns the number of parameters registered for the option, or 0 if not found
1220     map<string,int>::iterator pos;
1221     pos = OptionParamArray(typ).find(name);
1222     if(pos==OptionParamArray(typ).end())
1223     return 0;
1224     return pos->second;
1225     }
1226 tim 2440
1227     }//namespace OpenBabel
1228    
1229     //! \file obconversion.cpp
1230     //! \brief Implementation of OBFormat and OBConversion classes.