ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/obconversion.cpp
Revision: 2440
Committed: Wed Nov 16 19:42:11 2005 UTC (18 years, 7 months ago) by tim
File size: 31868 byte(s)
Log Message:
adding openbabel

File Contents

# Content
1 /**********************************************************************
2 obconversion.cpp - Declaration of OBFormat and OBConversion
3
4 Copyright (C) 2004 by Chris Morley
5 Some portions Copyright (C) 2005 by Geoffrey Hutchison
6
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.sourceforge.net/>
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19 // Definition of OBConversion routines
20
21 #ifdef _WIN32
22 #pragma warning (disable : 4786)
23
24 //using 'this' in base class initializer
25 #pragma warning (disable : 4355)
26
27 #ifdef GUI
28 #undef DATADIR
29 #include "stdafx.h" //(includes<windows.h>
30 #endif
31 #endif
32
33 #include <iostream>
34 #include <fstream>
35 #include <sstream>
36 #include <string>
37 #include <map>
38 //#include <dlfcn.h>
39
40 #include "obconversion.hpp"
41
42 #ifdef HAVE_LIBZ
43 #include "zipstream.hpp"
44 #endif
45
46 #if !HAVE_STRNCASECMP
47 extern "C" int strncasecmp(const char *s1, const char *s2, size_t n);
48 #endif
49
50 #ifndef BUFF_SIZE
51 #define BUFF_SIZE 32768
52 #endif
53
54 using namespace std;
55 namespace OpenBabel {
56
57 const char* OBFormat::TargetClassDescription()
58 {
59 //Provides class of default format unless overridden
60 if(OBConversion::GetDefaultFormat())
61 return OBConversion::GetDefaultFormat()->TargetClassDescription();
62 else
63 return "";
64 };
65 const type_info& OBFormat::GetType()
66 {
67 //Provides info on class of default format unless overridden
68 if(OBConversion::GetDefaultFormat())
69 return OBConversion::GetDefaultFormat()->GetType();
70 else
71 return typeid(this); //rubbish return if DefaultFormat not set
72 };
73
74 //***************************************************
75
76 /** @class OBConversion
77 OBConversion maintains a list of the available formats,
78 provides information on them, and controls the conversion process.
79
80 A conversion is carried out by the calling routine, usually in a
81 user interface or an application program, making an instance of
82 OBConversion. It is loaded with the in and out formats, any options
83 and (usually) the default streams for input and output. Then either
84 the Convert() function is called, which allows a single input file
85 to be converted, or the extended functionality of FullConvert()
86 is used. This allows multiple input and output files, allowing:
87 - aggregation - the contents of many input files converted
88 and sent to one output file;
89 - splitting - the molecules from one input file sent to
90 separate output files;
91 - batch conversion - each input file converted to an output file.
92
93 These procedures constitute the "Convert" interface. OBConversion
94 and the user interface or application program do not need to be
95 aware of any other part of OpenBabel - mol.h is not \#included. This
96 allows any chemical object derived from OBBase to be converted;
97 the type of object is decided by the input format class.
98 However,currently, almost all the conversions are for molecules of
99 class OBMol.
100 ///
101 OBConversion can also be used with an "API" interface
102 called from programs which manipulate chemical objects. Input/output is
103 done with the Read() and Write() functions which work with any
104 chemical object, but need to have its type specified. (The
105 ReadMolecule() and WriteMolecule() functions of the format classes
106 can also be used directly.)
107
108
109 Example code using OBConversion
110
111 <b>To read in a molecule, manipulate it and write it out.</b>
112
113 Set up an istream and an ostream, to and from files or elsewhere.
114 (cin and cout are used in the example). Specify the file formats.
115
116 @code
117 OBConversion conv(&cin,&cout);
118 if(conv.SetInAndOutFormats("SMI","MOL"))
119 {
120 OBMol mol;
121 if(conv.Read(&mol))
122 ...manipulate molecule
123
124 conv->Write(&mol);
125 }
126 @endcode
127
128 A two stage construction is used to allow error handling
129 if the format ID is not recognized. This is necessary now that the
130 formats are dynamic and errors are not caught at compile time.
131 OBConversion::Read() is a templated function so that objects derived
132 from OBBase can also be handled, in addition to OBMol, if the format
133 routines are written appropriately.
134
135 <b>To make a molecule from a SMILES string.</b>
136 @code
137 std::string SmilesString;
138 OBMol mol;
139 stringstream ss(SmilesString)
140 OBConversion conv(&ss);
141 if(conv.SetInFormat("smi") && conv.Read(&mol))
142 ...
143 @endcode
144
145 <b>To do a file conversion without manipulating the molecule.</b>
146
147 @code
148 #include "obconversion.h" //mol.h is not needed
149 ...set up an istream is and an ostream os
150 OBConversion conv(&is,&os);
151 if(conv.SetInAndOutFormats("SMI","MOL"))
152 {
153 conv.SetOptions("h"); //Optional; (h adds expicit hydrogens)
154 conv.Convert();
155 }
156 @endcode
157
158 <b>To add automatic format conversion to an existing program.</b>
159
160 The existing program inputs from the file identified by the
161 const char* filename into the istream is. The file is assumed to have
162 a format ORIG, but otherformats, identified by their file extensions,
163 can now be used.
164
165 @code
166 ifstream ifs(filename); //Original code
167
168 OBConversion conv;
169 OBFormat* inFormat = conv.FormatFromExt(filename);
170 OBFormat* outFormat = conv.GetFormat("ORIG");
171 istream* pIn = &ifs;
172 stringstream newstream;
173 if(inFormat && outFormat)
174 {
175 conv.SetInAndOutFormats(inFormat,outFormat);
176 conv.Convert(pIn,&newstream);
177 pIn=&newstream;
178 }
179 //else error; new features not available; fallback to original functionality
180
181 ...Carry on with original code using pIn
182 @endcode
183
184 In Windows a degree of independence from OpenBabel can be achieved using DLLs.
185 This code would be linked with obconv.lib.
186 At runtime the following DLLs would be in the executable directory:
187 obconv.dll, obdll.dll, one or more *.obf format files.
188 */
189
190 int OBConversion::FormatFilesLoaded = 0;
191
192 OBFormat* OBConversion::pDefaultFormat=NULL;
193
194 OBConversion::OBConversion(istream* is, ostream* os) :
195 pInFormat(NULL),pOutFormat(NULL), Index(0), StartNumber(1),
196 EndNumber(0), Count(-1), m_IsLast(true), MoreFilesToCome(false),
197 OneObjectOnly(false), pOb1(NULL), pAuxConv(NULL)
198 {
199 pInStream=is;
200 pOutStream=os;
201 if (FormatFilesLoaded == 0)
202 FormatFilesLoaded = LoadFormatFiles();
203
204 //These options take a parameter
205 RegisterOptionParam("f", NULL, 1,GENOPTIONS);
206 RegisterOptionParam("l", NULL, 1,GENOPTIONS);
207 }
208
209 ///This static function returns a reference to the FormatsMap
210 ///which, because it is a static local variable is constructed only once.
211 ///This fiddle is to avoid the "static initialization order fiasco"
212 ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
213 FMapType& OBConversion::FormatsMap()
214 {
215 static FMapType* fm = new FMapType;
216 return *fm;
217 }
218
219 ///This static function returns a reference to the FormatsMIMEMap
220 ///which, because it is a static local variable is constructed only once.
221 ///This fiddle is to avoid the "static initialization order fiasco"
222 ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
223 FMapType& OBConversion::FormatsMIMEMap()
224 {
225 static FMapType* fm = new FMapType;
226 return *fm;
227 }
228
229 /////////////////////////////////////////////////
230 OBConversion::OBConversion(const OBConversion& o)
231 {
232 Index = o.Index;
233 Count = o.Count;
234 StartNumber = o.StartNumber;
235 EndNumber = o.EndNumber;
236 pInFormat = o.pInFormat;
237 pInStream = o.pInStream;
238 pOutFormat = o.pOutFormat;
239 pOutStream = o.pOutStream;
240 OptionsArray[0]= o.OptionsArray[0];
241 OptionsArray[1]= o.OptionsArray[1];
242 OptionsArray[2]= o.OptionsArray[2];
243 InFilename = o.InFilename;
244 rInpos = o.rInpos;
245 wInpos = o.wInpos;
246 rInlen = o.rInlen;
247 wInlen = o.wInlen;
248 m_IsLast = o.m_IsLast;
249 MoreFilesToCome= o.MoreFilesToCome;
250 OneObjectOnly = o.OneObjectOnly;
251 pOb1 = o.pOb1;
252 ReadyToInput = o.ReadyToInput;
253
254 pAuxConv = NULL;
255 }
256 ////////////////////////////////////////////////
257
258 OBConversion::~OBConversion()
259 {
260 if(pAuxConv!=this)
261 delete pAuxConv;
262 }
263 //////////////////////////////////////////////////////
264
265 /// Class information on formats is collected by making an instance of the class
266 /// derived from OBFormat(only one is usually required). RegisterFormat() is called
267 /// from its constructor.
268 ///
269 /// If the compiled format is stored separately, like in a DLL or shared library,
270 /// the initialization code makes an instance of the imported OBFormat class.
271 int OBConversion::RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME)
272 {
273 FormatsMap()[ID] = pFormat;
274 if (MIME)
275 FormatsMIMEMap()[MIME] = pFormat;
276 if(pFormat->Flags() & DEFAULTFORMAT)
277 pDefaultFormat=pFormat;
278 return FormatsMap().size();
279 }
280
281 //////////////////////////////////////////////////////
282 int OBConversion::LoadFormatFiles()
283 {
284 /*
285 int count=0;
286 // if(FormatFilesLoaded) return 0;
287 // FormatFilesLoaded=true; //so will load files only once
288 #ifdef USING_DYNAMIC_LIBS
289 //Depending on availablilty, look successively in
290 //FORMATFILE_DIR, executable directory,or current directory
291 string TargetDir;
292 #ifdef FORMATFILE_DIR
293 TargetDir="FORMATFILE_DIR";
294 #endif
295
296 DLHandler::getConvDirectory(TargetDir);
297
298 vector<string> files;
299 if(!DLHandler::findFiles(files,DLHandler::getFormatFilePattern(),TargetDir)) return 0;
300
301 vector<string>::iterator itr;
302 for(itr=files.begin();itr!=files.end();itr++)
303 {
304 if(DLHandler::openLib(*itr))
305 count++;
306 else
307 cerr << *itr << " did not load properly" << endl;
308 }
309 #else
310 count = 1; //avoid calling this function several times
311 #endif //USING_DYNAMIC_LIBS
312 */
313 int count = 1;
314 return count;
315 }
316
317 /**
318 *Returns the ID + the first line of the description in str
319 *and a pointer to the format in pFormat.
320 *If called with str==NULL the first format is returned;
321 *subsequent formats are returned by calling with str!=NULL and the previous value of itr
322 *returns false, and str and pFormat NULL, when there are no more formats.
323 *Use like:
324 *@code
325 * const char* str=NULL;
326 * Formatpos pos;
327 * while(OBConversion::GetNextFormat(pos,str,pFormat))
328 * {
329 * use str and pFormat
330 * }
331 *@endcode
332 */
333 bool OBConversion::GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat)
334 {
335
336 pFormat = NULL;
337 if(str==NULL)
338 itr = FormatsMap().begin();
339 else
340 itr++;
341 if(itr == FormatsMap().end())
342 {
343 str=NULL; pFormat=NULL;
344 return false;
345 }
346 static string s;
347 s =itr->first;
348 pFormat = itr->second;
349 if(pFormat)
350 {
351 string description(pFormat->Description());
352 s += " -- ";
353 s += description.substr(0,description.find('\n'));
354 }
355
356 if(pFormat->Flags() & NOTWRITABLE) s+=" [Read-only]";
357 if(pFormat->Flags() & NOTREADABLE) s+=" [Write-only]";
358
359 str = s.c_str();
360 return true;
361 }
362
363 //////////////////////////////////////////////////////
364 /// Sets the formats from their ids, e g CML.
365 /// If inID is NULL, the input format is left unchanged. Similarly for outID
366 /// Returns true if both formats have been successfully set at sometime
367 bool OBConversion::SetInAndOutFormats(const char* inID, const char* outID)
368 {
369 return SetInFormat(inID) && SetOutFormat(outID);
370 }
371 //////////////////////////////////////////////////////
372
373 bool OBConversion::SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut)
374 {
375 return SetInFormat(pIn) && SetOutFormat(pOut);
376 }
377 //////////////////////////////////////////////////////
378 bool OBConversion::SetInFormat(OBFormat* pIn)
379 {
380 if(pIn==NULL)
381 return true;
382 pInFormat=pIn;
383 return !(pInFormat->Flags() & NOTREADABLE);
384 }
385 //////////////////////////////////////////////////////
386 bool OBConversion::SetOutFormat(OBFormat* pOut)
387 {
388 pOutFormat=pOut;
389 return !(pOutFormat->Flags() & NOTWRITABLE);
390 }
391 //////////////////////////////////////////////////////
392 bool OBConversion::SetInFormat(const char* inID)
393 {
394 if(inID)
395 pInFormat = FindFormat(inID);
396 return pInFormat && !(pInFormat->Flags() & NOTREADABLE);
397 }
398 //////////////////////////////////////////////////////
399
400 bool OBConversion::SetOutFormat(const char* outID)
401 {
402 if(outID)
403 pOutFormat= FindFormat(outID);
404 return pOutFormat && !(pOutFormat->Flags() & NOTWRITABLE);
405 }
406
407 //////////////////////////////////////////////////////
408 int OBConversion::Convert(istream* is, ostream* os)
409 {
410 if(is) pInStream=is;
411 if(os) pOutStream=os;
412 ostream* pOrigOutStream = pOutStream;
413
414 #ifdef HAVE_LIBZ
415 zlib_stream::zip_istream zIn(*pInStream);
416 if(zIn.is_gzip())
417 pInStream = &zIn;
418
419 zlib_stream::zip_ostream zOut(*pOutStream);
420 if(IsOption("z",GENOPTIONS))
421 {
422 // make sure to output the header
423 zOut.make_gzip();
424 pOutStream = &zOut;
425 }
426 #endif
427
428 int count = Convert();
429 pOutStream = pOrigOutStream;
430 return count;
431
432 }
433
434 ////////////////////////////////////////////////////
435 /// Actions the "convert" interface.
436 /// Calls the OBFormat class's ReadMolecule() which
437 /// - makes a new chemical object of its chosen type (e.g. OBMol)
438 /// - reads an object from the input file
439 /// - subjects the chemical object to 'transformations' as specified by the Options
440 /// - calls AddChemObject to add it to a buffer. The previous object is first output
441 /// via the output Format's WriteMolecule(). During the output process calling
442 /// IsFirst() and GetIndex() (the number of objects including the current one already output.
443 /// allows more control, for instance writing \<cml\> and \</cml\> tags for multiple molecule outputs only.
444 ///
445 /// AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation())
446 /// or if the number of the object is outside the range defined by
447 /// StartNumber and EndNumber.This means the start and end counts apply to all chemical objects
448 /// found whether or not they are output.
449 ///
450 /// If ReadMolecule returns false the input conversion loop is exited.
451 ///
452 int OBConversion::Convert()
453 {
454 if(pInStream==NULL || pOutStream==NULL)
455 {
456 cerr << "input or output stream not set" << endl;
457 return 0;
458 }
459
460 if(!pInFormat) return 0;
461 Count=0;//number objects processed
462
463 if(!SetStartAndEnd())
464 return 0;
465
466 ReadyToInput=true;
467 m_IsLast=false;
468 pOb1=NULL;
469 wInlen=0;
470
471 //Input loop
472 while(ReadyToInput && pInStream->peek() != EOF && pInStream->good())
473 {
474 if(pInStream==&cin)
475 {
476 if(pInStream->peek()=='\n')
477 break;
478 }
479 else
480 rInpos = pInStream->tellg();
481
482 bool ret=false;
483 try
484 {
485 ret = pInFormat->ReadChemObject(this);
486 }
487 catch(...)
488 {
489 if(!IsOption("e", GENOPTIONS) && !OneObjectOnly)
490 throw;
491 }
492
493 if(!ret)
494 {
495 //error or termination request: terminate unless
496 // -e option requested and sucessfully can skip past current object
497 if(!IsOption("e", GENOPTIONS) || pInFormat->SkipObjects(0,this)!=1)
498 break;
499 }
500 if(OneObjectOnly)
501 break;
502 // Objects supplied to AddChemObject() which may output them after a delay
503 //ReadyToInput may be made false in AddChemObject()
504 // by WriteMolecule() returning false or by Count==EndNumber
505 }
506
507 //Output last object
508 if(!MoreFilesToCome)
509 m_IsLast=true;
510
511 if(pOutFormat)
512 if(!pOutFormat->WriteChemObject(this))
513 Index--;
514
515 //Put AddChemObject() into non-queue mode
516 Count= -1;
517 EndNumber=StartNumber=0; pOb1=NULL;//leave tidy
518 MoreFilesToCome=false;
519 OneObjectOnly=false;
520
521 return Index; //The number actually output
522 }
523 //////////////////////////////////////////////////////
524 bool OBConversion::SetStartAndEnd()
525 {
526 int TempStartNumber=0;
527 const char* p = IsOption("f",GENOPTIONS);
528 if(p)
529 {
530 StartNumber=atoi(p);
531 if(StartNumber>1)
532 {
533 TempStartNumber=StartNumber;
534 //Try to skip objects now
535 int ret = pInFormat->SkipObjects(StartNumber-1,this);
536 if(ret==-1) //error
537 return false;
538 if(ret==1) //success:objects skipped
539 {
540 Count = StartNumber-1;
541 StartNumber=0;
542 }
543 }
544 }
545
546 p = IsOption("l",GENOPTIONS);
547 if(p)
548 {
549 EndNumber=atoi(p);
550 if(TempStartNumber && EndNumber<TempStartNumber)
551 EndNumber=TempStartNumber;
552 }
553
554 return true;
555 }
556
557 //////////////////////////////////////////////////////
558 /// Retrieves an object stored by AddChemObject() during output
559 OBBase* OBConversion::GetChemObject()
560 {
561 Index++;
562 return pOb1;
563 }
564
565 //////////////////////////////////////////////////////
566 /// Called by ReadMolecule() to deliver an object it has read from an input stream.
567 /// Used in two modes:
568 /// - When Count is negative it is left negative and the routine is just a store
569 /// for an OBBase object. The negative value returned tells the calling
570 /// routine that no more objects are required.
571 /// - When count is >=0, probably set by Convert(), it acts as a queue of 2:
572 /// writing the currently stored value before accepting the supplied one. This delay
573 /// allows output routines to respond differently when the written object is the last.
574 /// Count is incremented with each call, even if pOb=NULL.
575 /// Objects are not added to the queue if the count is outside the range
576 /// StartNumber to EndNumber. There is no upper limit if EndNumber is zero.
577 /// The return value is the number of objects, including this one, which have been
578 /// input (but not necessarily output).
579 int OBConversion::AddChemObject(OBBase* pOb)
580 {
581 if(Count<0)
582 {
583 pOb1=pOb;
584 return Count;
585 }
586 Count++;
587 if(Count>=(int)StartNumber)//keeps reading objects but does nothing with them
588 {
589 if(Count==(int)EndNumber)
590 ReadyToInput=false; //stops any more objects being read
591
592 rInlen = pInStream->tellg() - rInpos;
593
594 if(pOb)
595 {
596 if(pOb1 && pOutFormat) //see if there is an object ready to be output
597 {
598 //Output object
599 if (!pOutFormat->WriteChemObject(this))
600 {
601 //faultly write, so finish
602 --Index;
603 ReadyToInput=false;
604 return Count;
605 }
606 }
607 pOb1=pOb;
608 wInpos = rInpos; //Save the position in the input file to be accessed when writing it
609 wInlen = rInlen;
610 }
611 }
612 return Count;
613 }
614 //////////////////////////////////////////////////////
615 int OBConversion::GetOutputIndex() const
616 {
617 //The number of objects actually written already from this instance of OBConversion
618 return Index;
619 }
620 void OBConversion::SetOutputIndex(int indx)
621 {
622 Index=indx;
623 }
624 //////////////////////////////////////////////////////
625 OBFormat* OBConversion::FindFormat(const char* ID)
626 {
627 //Case insensitive
628 if(FormatsMap().find(ID) == FormatsMap().end())
629 return NULL;
630 else
631 return FormatsMap()[ID];
632 }
633
634 //////////////////////////////////////////////////
635 const char* OBConversion::GetTitle() const
636 {
637 return(InFilename.c_str());
638 }
639
640 void OBConversion::SetMoreFilesToCome()
641 {
642 MoreFilesToCome=true;
643 }
644
645 void OBConversion::SetOneObjectOnly()
646 {
647 OneObjectOnly=true;
648 m_IsLast=true;
649 }
650
651 /////////////////////////////////////////////////////////
652 OBFormat* OBConversion::FormatFromExt(const char* filename)
653 {
654 string file = filename;
655 size_t extPos = file.rfind(".");
656
657 if(extPos!=string::npos)
658 {
659 // only do this if we actually can read .gz files
660 #ifdef HAVE_LIBZ
661 if (file.substr(extPos,3) == ".gz")
662 {
663 file.erase(extPos);
664 extPos = file.rfind(".");
665 if (extPos!=string::npos)
666 return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
667 }
668 else
669 #endif
670 return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
671 }
672 return NULL; //if no extension
673 }
674
675 OBFormat* OBConversion::FormatFromMIME(const char* MIME)
676 {
677 if(FormatsMIMEMap().find(MIME) == FormatsMIMEMap().end())
678 return NULL;
679 else
680 return FormatsMIMEMap()[MIME];
681 }
682
683 bool OBConversion::Read(OBBase* pOb, std::istream* pin)
684 {
685 if(pin)
686 pInStream=pin;
687 if(!pInFormat) return false;
688
689 #ifdef HAVE_LIBZ
690 zlib_stream::zip_istream zIn(*pInStream);
691 if(zIn.is_gzip())
692 pInStream = &zIn;
693 #endif
694
695 return pInFormat->ReadMolecule(pOb, this);
696 }
697 //////////////////////////////////////////////////
698 /// Writes the object pOb but does not delete it afterwards.
699 /// The output stream is lastingly changed if pout is not NULL
700 /// Returns true if successful.
701 bool OBConversion::Write(OBBase* pOb, ostream* pos)
702 {
703 if(pos)
704 pOutStream=pos;
705 if(!pOutFormat) return false;
706
707 ostream* pOrigOutStream = pOutStream;
708 #ifdef HAVE_LIBZ
709 zlib_stream::zip_ostream zOut(*pOutStream);
710 if(IsOption("z",GENOPTIONS))
711 {
712 // make sure to output the header
713 zOut.make_gzip();
714 pOutStream = &zOut;
715 }
716 #endif
717
718 bool ret = pOutFormat->WriteMolecule(pOb,this);
719 pOutStream = pOrigOutStream;
720 return ret;
721 }
722
723 //////////////////////////////////////////////////
724 /// Writes the object pOb but does not delete it afterwards.
725 /// The output stream not changed (since we cannot write to this string later)
726 /// Returns true if successful.
727 std::string OBConversion::WriteString(OBBase* pOb)
728 {
729 ostream *oldStream = pOutStream; // save old output
730 stringstream newStream;
731
732 if(pOutFormat)
733 {
734 Write(pOb, &newStream);
735 }
736 pOutStream = oldStream;
737
738 return newStream.str();
739 }
740
741 //////////////////////////////////////////////////
742 /// Writes the object pOb but does not delete it afterwards.
743 /// The output stream is lastingly changed to point to the file
744 /// Returns true if successful.
745 bool OBConversion::WriteFile(OBBase* pOb, string filePath)
746 {
747 if(!pOutFormat) return false;
748
749 ofstream ofs;
750 ios_base::openmode omode =
751 pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
752
753 ofs.open(filePath.c_str(),omode);
754 if(!ofs)
755 {
756 cerr << "Cannot write to " << filePath <<endl;
757 return false;
758 }
759
760 return Write(pOb, &ofs);
761 }
762
763 ////////////////////////////////////////////
764 bool OBConversion::ReadString(OBBase* pOb, std::string input)
765 {
766 stringstream pin(input);
767 return Read(pOb,&pin);
768 }
769
770
771 ////////////////////////////////////////////
772 bool OBConversion::ReadFile(OBBase* pOb, std::string filePath)
773 {
774 if(!pInFormat) return false;
775
776 ifstream ifs;
777 ios_base::openmode imode =
778 pOutFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
779
780 ifs.open(filePath.c_str(),imode);
781 if(!ifs)
782 {
783 cerr << "Cannot read from " << filePath << endl;
784 return false;
785 }
786
787 return Read(pOb,&ifs);
788 }
789
790
791 ////////////////////////////////////////////
792 const char* OBConversion::Description()
793 {
794 return "Conversion options\n \
795 -f <#> Start import at molecule # specified\n \
796 -l <#> End import at molecule # specified\n \
797 -t All input files describe a single molecule\n \
798 -e Continue with next object after error, if possible\n \
799 -z Compress the output with gzip\n";
800 }
801
802 ////////////////////////////////////////////
803 bool OBConversion::IsLast()
804 {
805 return m_IsLast;
806 }
807 ////////////////////////////////////////////
808 bool OBConversion::IsFirstInput()
809 {
810 return (Count==0);
811 }
812
813 /////////////////////////////////////////////////
814 string OBConversion::BatchFileName(string& BaseName, string& InFile)
815 {
816 //Replaces * in BaseName by InFile without extension and path
817 string ofname(BaseName);
818 int pos = ofname.find('*');
819 if(pos>=0)
820 {
821 //Replace * by input filename
822 int posdot=(InFile).rfind('.');
823 if(posdot==-1) posdot=(InFile).size();
824 int posname=(InFile).find_last_of("\\/");
825 ofname.replace(pos,1, (InFile), posname+1, posdot-posname-1);
826 }
827 return ofname;
828 }
829
830 ////////////////////////////////////////////////
831 string OBConversion::IncrementedFileName(string& BaseName, const int Count)
832 {
833 //Replaces * in BaseName by Count
834 string ofname(BaseName);
835 int pos = ofname.find('*');
836 if(pos>=0)
837 {
838 char num[33];
839 snprintf(num, 33, "%d", Count);
840 ofname.replace(pos,1, num);
841 }
842 return ofname;
843 }
844 ////////////////////////////////////////////////////
845
846 /**
847 Makes input and output streams, and carries out normal,
848 batch, aggregation, and splitting conversion.
849
850 Normal
851 Done if FileList contains a single file name and OutputFileName
852 does not contain a *.
853
854 Aggregation
855 Done if FileList has more than one file name and OutputFileName does
856 not contain * . All the chemical objects are converted and sent
857 to the single output file.
858
859 Splitting
860 Done if FileList contains a single file name and OutputFileName
861 contains a * . Each chemical object in the input file converted
862 and sent to a separate file whose name is OutputFileName with the
863 * replaced by 1, 2, 3, etc.
864 For example, if OutputFileName is NEW*.smi then the output files are
865 NEW1.smi, NEW2.smi, etc.
866
867 Batch Conversion
868 Done if FileList has more than one file name and contains a * .
869 Each input file is converted to an output file whose name is
870 OutputFileName with the * replaced by the inputfile name without its
871 path and extension.
872 So if the input files were inpath/First.cml, inpath/Second.cml
873 and OutputFileName was NEW*.mol, the output files would be
874 NEWFirst.mol, NEWSecond.mol.
875
876 If FileList is empty, the input stream that has already been set
877 (usually in the constructor) is used. If OutputFileName is empty,
878 the output stream already set is used.
879
880 On exit, OutputFileList contains the names of the output files.
881
882 Returns the number of Chemical objects converted.
883 */
884 int OBConversion::FullConvert(std::vector<std::string>& FileList, std::string& OutputFileName,
885 std::vector<std::string>& OutputFileList)
886 {
887
888 istream* pInStream;
889 ostream* pOutStream=NULL;
890 ifstream is;
891 ofstream os;
892 bool HasMultipleOutputFiles=false;
893 int Count=0;
894 bool CommonInFormat = pInFormat ? true:false; //whether set in calling routine
895 ios_base::openmode omode =
896 pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
897 try
898 {
899 ofstream ofs;
900
901 //OUTPUT
902 if(OutputFileName.empty())
903 pOutStream = NULL; //use existing stream
904 else
905 {
906 if(OutputFileName.find_first_of('*')!=string::npos) HasMultipleOutputFiles = true;
907 if(!HasMultipleOutputFiles)
908 {
909 os.open(OutputFileName.c_str(),omode);
910 if(!os)
911 {
912 cerr << "Cannot write to " << OutputFileName <<endl;
913 return 0;
914 }
915 OutputFileList.push_back(OutputFileName);
916 pOutStream=&os;
917 }
918 }
919
920 if(IsOption("t",GENOPTIONS))
921 {
922 //Concatenate input file option (multiple files, single molecule)
923 if(HasMultipleOutputFiles)
924 {
925 cerr << "Cannot have multiple output files and also concatenate input files (-t option)" <<endl;
926 return 0;
927 }
928
929 stringstream allinput;
930 vector<string>::iterator itr;
931 for(itr=FileList.begin();itr!=FileList.end();itr++)
932 {
933 ifstream ifs((*itr).c_str());
934 if(!ifs)
935 {
936 cerr << "Cannot open " << *itr <<endl;
937 continue;
938 }
939 allinput << ifs.rdbuf(); //Copy all file contents
940 ifs.close();
941 }
942 Count = Convert(&allinput,pOutStream);
943 return Count;
944 }
945
946 //INPUT
947 if(FileList.empty())
948 pInStream = NULL;
949 else
950 {
951 if(FileList.size()>1)
952 {
953 //multiple input files
954 vector<string>::iterator itr, tempitr;
955 tempitr = FileList.end();
956 tempitr--;
957 for(itr=FileList.begin();itr!=FileList.end();itr++)
958 {
959 InFilename = *itr;
960 ifstream ifs;
961 if(!OpenAndSetFormat(CommonInFormat, &ifs))
962 continue;
963
964 if(HasMultipleOutputFiles)
965 {
966 //Batch conversion
967 string batchfile = BatchFileName(OutputFileName,*itr);
968 if(ofs.is_open()) ofs.close();
969 ofs.open(batchfile.c_str(), omode);
970 if(!ofs)
971 {
972 cerr << "Cannot open " << batchfile << endl;
973 return Count;
974 }
975 OutputFileList.push_back(batchfile);
976 SetOutputIndex(0); //reset for new file
977 Count += Convert(&ifs,&ofs);
978 }
979 else
980 {
981 //Aggregation
982 if(itr!=tempitr) SetMoreFilesToCome();
983 Count = Convert(&ifs,pOutStream);
984 }
985 }
986 return Count;
987 }
988 else
989 {
990 //Single input file
991 InFilename = FileList[0];
992 if(!OpenAndSetFormat(CommonInFormat, &is))
993 return 0;
994 pInStream=&is;
995
996 if(HasMultipleOutputFiles)
997 {
998 //Splitting
999 //Output is put in a temporary stream and written to a file
1000 //with an augmenting name only when it contains a valid object.
1001 int Indx=1;
1002 for(;;)
1003 {
1004 stringstream ss;
1005 SetOutputIndex(0); //reset for new file
1006 SetOneObjectOnly();
1007
1008 int ThisFileCount = Convert(pInStream,&ss);
1009 if(ThisFileCount==0) break;
1010 Count+=ThisFileCount;
1011
1012 if(ofs.is_open()) ofs.close();
1013 string incrfile = IncrementedFileName(OutputFileName,Indx++);
1014 ofs.open(incrfile.c_str(), omode);
1015 if(!ofs)
1016 {
1017 cerr << "Cannot write to " << incrfile << endl;
1018 return Count;
1019 }
1020 OutputFileList.push_back(incrfile);
1021 ofs << ss.rdbuf();
1022 ofs.close();
1023 ss.clear();
1024 }
1025 return Count;
1026 }
1027 }
1028 }
1029
1030 //Single input and output files
1031 Count = Convert(pInStream,pOutStream);
1032 return Count;
1033 }
1034 catch(...)
1035 {
1036 cerr << "Conversion failed with an exception. Count=" << Count <<endl;
1037 return Count;
1038 }
1039 }
1040
1041 bool OBConversion::OpenAndSetFormat(bool SetFormat, ifstream* is)
1042 {
1043 //Opens file using InFilename and sets pInFormat if requested
1044 if(!SetFormat)
1045 {
1046 pInFormat = FormatFromExt(InFilename.c_str());
1047 if(pInFormat==NULL)
1048 {
1049 string::size_type pos = InFilename.rfind('.');
1050 string ext;
1051 if(pos!=string::npos)
1052 ext = InFilename.substr(pos);
1053 cerr << "Cannot read input format \"" << ext << '\"'
1054 << " for file \"" << InFilename << "\"" << endl;
1055 return false;
1056 }
1057 }
1058
1059 ios_base::openmode imode;
1060 #ifdef ALL_READS_BINARY //Makes unix files compatible with VC++6
1061 imode = ios_base::in|ios_base::binary;
1062 #else
1063 imode = pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
1064 #endif
1065
1066 is->open(InFilename.c_str(), imode);
1067 if(!is->good())
1068 {
1069 cerr << "Cannot open " << InFilename <<endl;
1070 return false;
1071 }
1072
1073 return true;
1074 }
1075
1076 ///////////////////////////////////////////////
1077 void OBConversion::AddOption(const char* opt, Option_type opttyp, const char* txt)
1078 {
1079 //Also updates an option
1080 if(txt==NULL)
1081 OptionsArray[opttyp][opt]=string();
1082 else
1083 OptionsArray[opttyp][opt]=txt;
1084 }
1085
1086 const char* OBConversion::IsOption(const char* opt, Option_type opttyp)
1087 {
1088 //Returns NULL if option not found or a pointer to the text if it is
1089 map<string,string>::iterator pos;
1090 pos = OptionsArray[opttyp].find(opt);
1091 if(pos==OptionsArray[opttyp].end())
1092 return NULL;
1093 return pos->second.c_str();
1094 }
1095
1096 bool OBConversion::RemoveOption(const char* opt, Option_type opttyp)
1097 {
1098 return OptionsArray[opttyp].erase(opt)!=0;//true if was there
1099 }
1100
1101 void OBConversion::SetOptions(const char* options, Option_type opttyp)
1102 {
1103 while(*options)
1104 {
1105 string ch(1, *options++);
1106 if(*options=='\"')
1107 {
1108 string txt = options+1;
1109 string::size_type pos = txt.find('\"');
1110 if(pos==string::npos)
1111 return; //options is illformed
1112 txt.erase(pos);
1113 OptionsArray[opttyp][ch]= txt;
1114 options += pos+2;
1115 }
1116 else
1117 OptionsArray[opttyp][ch] = string();
1118 }
1119 }
1120
1121 typedef std::map<string,int> OPAMapType;
1122 OPAMapType& OBConversion::OptionParamArray(Option_type typ)
1123 {
1124 static OPAMapType* opa = new OPAMapType[3];
1125 return opa[typ];
1126 }
1127
1128 void OBConversion::RegisterOptionParam(string name, OBFormat* pFormat,
1129 int numberParams, Option_type typ)
1130 {
1131 //Gives error message if the number of parameters conflicts with an existing registration
1132 map<string,int>::iterator pos;
1133 pos = OptionParamArray(typ).find(name);
1134 if(pos!=OptionParamArray(typ).end())
1135 {
1136 if(pos->second!=numberParams)
1137 {
1138 string description("API");
1139 if(pFormat)
1140 description=pFormat->Description();
1141 cerr << "The number of parameters needed by option \"" << name << "\" in "
1142 << description.substr(0,description.find('\n'))
1143 << " differs from an earlier registration." << endl;
1144 return;
1145 }
1146 }
1147 OptionParamArray(typ)[name] = numberParams;
1148 }
1149
1150 int OBConversion::GetOptionParams(string name, Option_type typ)
1151 {
1152 //returns the number of parameters registered for the option, or 0 if not found
1153 map<string,int>::iterator pos;
1154 pos = OptionParamArray(typ).find(name);
1155 if(pos==OptionParamArray(typ).end())
1156 return 0;
1157 return pos->second;
1158 }
1159
1160 }//namespace OpenBabel
1161
1162 //! \file obconversion.cpp
1163 //! \brief Implementation of OBFormat and OBConversion classes.