ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-3.0/src/openbabel/obconversion.cpp
Revision: 2469
Committed: Fri Dec 2 15:38:03 2005 UTC (18 years, 7 months ago) by tim
File size: 27818 byte(s)
Log Message:
End of the Link --> List
Return of the Oject-Oriented
replace yacc/lex parser with antlr parser

File Contents

# Content
1 /**********************************************************************
2 obconversion.cpp - Declaration of OBFormat and OBConversion
3
4 Copyright (C) 2004 by Chris Morley
5 Some portions Copyright (C) 2005 by Geoffrey Hutchison
6
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.sourceforge.net/>
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19 // Definition of OBConversion routines
20
21 #ifdef _WIN32
22 #pragma warning (disable : 4786)
23
24 //using 'this' in base class initializer
25 #pragma warning (disable : 4355)
26
27 #ifdef GUI
28 #undef DATADIR
29 #include "stdafx.h" //(includes<windows.h>
30 #endif
31 #endif
32
33 #include <iostream>
34 #include <fstream>
35 #include <sstream>
36 #include <string>
37 #include <map>
38 //#include <dlfcn.h>
39
40 #include "obconversion.hpp"
41
42 #ifdef HAVE_LIBZ
43 #include "zipstream.hpp"
44 #endif
45
46 #if !HAVE_STRNCASECMP
47 extern "C" int strncasecmp(const char *s1, const char *s2, size_t n);
48 #endif
49
50 #ifndef BUFF_SIZE
51 #define BUFF_SIZE 32768
52 #endif
53
54 using namespace std;
55 namespace OpenBabel {
56
57 const char* OBFormat::TargetClassDescription()
58 {
59 //Provides class of default format unless overridden
60 if(OBConversion::GetDefaultFormat())
61 return OBConversion::GetDefaultFormat()->TargetClassDescription();
62 else
63 return "";
64 }
65 const type_info& OBFormat::GetType()
66 {
67 //Provides info on class of default format unless overridden
68 if(OBConversion::GetDefaultFormat())
69 return OBConversion::GetDefaultFormat()->GetType();
70 else
71 return typeid(this); //rubbish return if DefaultFormat not set
72 }
73
74
75 int OBConversion::FormatFilesLoaded = 0;
76
77 OBFormat* OBConversion::pDefaultFormat=NULL;
78
79 OBConversion::OBConversion(istream* is, ostream* os) :
80 pInFormat(NULL),pOutFormat(NULL), Index(0), StartNumber(1),
81 EndNumber(0), Count(-1), m_IsLast(true), MoreFilesToCome(false),
82 OneObjectOnly(false), pOb1(NULL), pAuxConv(NULL)
83 {
84 pInStream=is;
85 pOutStream=os;
86 if (FormatFilesLoaded == 0)
87 FormatFilesLoaded = LoadFormatFiles();
88
89 //These options take a parameter
90 RegisterOptionParam("f", NULL, 1,GENOPTIONS);
91 RegisterOptionParam("l", NULL, 1,GENOPTIONS);
92 }
93
94 ///This static function returns a reference to the FormatsMap
95 ///which, because it is a static local variable is constructed only once.
96 ///This fiddle is to avoid the "static initialization order fiasco"
97 ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
98 FMapType& OBConversion::FormatsMap()
99 {
100 static FMapType* fm = new FMapType;
101 return *fm;
102 }
103
104 ///This static function returns a reference to the FormatsMIMEMap
105 ///which, because it is a static local variable is constructed only once.
106 ///This fiddle is to avoid the "static initialization order fiasco"
107 ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
108 FMapType& OBConversion::FormatsMIMEMap()
109 {
110 static FMapType* fm = new FMapType;
111 return *fm;
112 }
113
114 /////////////////////////////////////////////////
115 OBConversion::OBConversion(const OBConversion& o)
116 {
117 Index = o.Index;
118 Count = o.Count;
119 StartNumber = o.StartNumber;
120 EndNumber = o.EndNumber;
121 pInFormat = o.pInFormat;
122 pInStream = o.pInStream;
123 pOutFormat = o.pOutFormat;
124 pOutStream = o.pOutStream;
125 OptionsArray[0]= o.OptionsArray[0];
126 OptionsArray[1]= o.OptionsArray[1];
127 OptionsArray[2]= o.OptionsArray[2];
128 InFilename = o.InFilename;
129 rInpos = o.rInpos;
130 wInpos = o.wInpos;
131 rInlen = o.rInlen;
132 wInlen = o.wInlen;
133 m_IsLast = o.m_IsLast;
134 MoreFilesToCome= o.MoreFilesToCome;
135 OneObjectOnly = o.OneObjectOnly;
136 pOb1 = o.pOb1;
137 ReadyToInput = o.ReadyToInput;
138
139 pAuxConv = NULL;
140 }
141 ////////////////////////////////////////////////
142
143 OBConversion::~OBConversion()
144 {
145 if(pAuxConv!=this)
146 delete pAuxConv;
147 }
148 //////////////////////////////////////////////////////
149
150 /// Class information on formats is collected by making an instance of the class
151 /// derived from OBFormat(only one is usually required). RegisterFormat() is called
152 /// from its constructor.
153 ///
154 /// If the compiled format is stored separately, like in a DLL or shared library,
155 /// the initialization code makes an instance of the imported OBFormat class.
156 int OBConversion::RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME)
157 {
158 FormatsMap()[ID] = pFormat;
159 if (MIME)
160 FormatsMIMEMap()[MIME] = pFormat;
161 if(pFormat->Flags() & DEFAULTFORMAT)
162 pDefaultFormat=pFormat;
163 return FormatsMap().size();
164 }
165
166 //////////////////////////////////////////////////////
167 int OBConversion::LoadFormatFiles()
168 {
169 /*
170 int count=0;
171 // if(FormatFilesLoaded) return 0;
172 // FormatFilesLoaded=true; //so will load files only once
173 #ifdef USING_DYNAMIC_LIBS
174 //Depending on availablilty, look successively in
175 //FORMATFILE_DIR, executable directory,or current directory
176 string TargetDir;
177 #ifdef FORMATFILE_DIR
178 TargetDir="FORMATFILE_DIR";
179 #endif
180
181 DLHandler::getConvDirectory(TargetDir);
182
183 vector<string> files;
184 if(!DLHandler::findFiles(files,DLHandler::getFormatFilePattern(),TargetDir)) return 0;
185
186 vector<string>::iterator itr;
187 for(itr=files.begin();itr!=files.end();itr++)
188 {
189 if(DLHandler::openLib(*itr))
190 count++;
191 else
192 cerr << *itr << " did not load properly" << endl;
193 }
194 #else
195 count = 1; //avoid calling this function several times
196 #endif //USING_DYNAMIC_LIBS
197 */
198 int count = 1;
199 return count;
200 }
201
202 /**
203 *Returns the ID + the first line of the description in str
204 *and a pointer to the format in pFormat.
205 *If called with str==NULL the first format is returned;
206 *subsequent formats are returned by calling with str!=NULL and the previous value of itr
207 *returns false, and str and pFormat NULL, when there are no more formats.
208 *Use like:
209 *@code
210 * const char* str=NULL;
211 * Formatpos pos;
212 * while(OBConversion::GetNextFormat(pos,str,pFormat))
213 * {
214 * use str and pFormat
215 * }
216 *@endcode
217 */
218 bool OBConversion::GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat)
219 {
220
221 pFormat = NULL;
222 if(str==NULL)
223 itr = FormatsMap().begin();
224 else
225 itr++;
226 if(itr == FormatsMap().end())
227 {
228 str=NULL; pFormat=NULL;
229 return false;
230 }
231 static string s;
232 s =itr->first;
233 pFormat = itr->second;
234 if(pFormat)
235 {
236 string description(pFormat->Description());
237 s += " -- ";
238 s += description.substr(0,description.find('\n'));
239 }
240
241 if(pFormat->Flags() & NOTWRITABLE) s+=" [Read-only]";
242 if(pFormat->Flags() & NOTREADABLE) s+=" [Write-only]";
243
244 str = s.c_str();
245 return true;
246 }
247
248 //////////////////////////////////////////////////////
249 /// Sets the formats from their ids, e g CML.
250 /// If inID is NULL, the input format is left unchanged. Similarly for outID
251 /// Returns true if both formats have been successfully set at sometime
252 bool OBConversion::SetInAndOutFormats(const char* inID, const char* outID)
253 {
254 return SetInFormat(inID) && SetOutFormat(outID);
255 }
256 //////////////////////////////////////////////////////
257
258 bool OBConversion::SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut)
259 {
260 return SetInFormat(pIn) && SetOutFormat(pOut);
261 }
262 //////////////////////////////////////////////////////
263 bool OBConversion::SetInFormat(OBFormat* pIn)
264 {
265 if(pIn==NULL)
266 return true;
267 pInFormat=pIn;
268 return !(pInFormat->Flags() & NOTREADABLE);
269 }
270 //////////////////////////////////////////////////////
271 bool OBConversion::SetOutFormat(OBFormat* pOut)
272 {
273 pOutFormat=pOut;
274 return !(pOutFormat->Flags() & NOTWRITABLE);
275 }
276 //////////////////////////////////////////////////////
277 bool OBConversion::SetInFormat(const char* inID)
278 {
279 if(inID)
280 pInFormat = FindFormat(inID);
281 return pInFormat && !(pInFormat->Flags() & NOTREADABLE);
282 }
283 //////////////////////////////////////////////////////
284
285 bool OBConversion::SetOutFormat(const char* outID)
286 {
287 if(outID)
288 pOutFormat= FindFormat(outID);
289 return pOutFormat && !(pOutFormat->Flags() & NOTWRITABLE);
290 }
291
292 //////////////////////////////////////////////////////
293 int OBConversion::Convert(istream* is, ostream* os)
294 {
295 if(is) pInStream=is;
296 if(os) pOutStream=os;
297 ostream* pOrigOutStream = pOutStream;
298
299 #ifdef HAVE_LIBZ
300 zlib_stream::zip_istream zIn(*pInStream);
301 if(zIn.is_gzip())
302 pInStream = &zIn;
303
304 zlib_stream::zip_ostream zOut(*pOutStream);
305 if(IsOption("z",GENOPTIONS))
306 {
307 // make sure to output the header
308 zOut.make_gzip();
309 pOutStream = &zOut;
310 }
311 #endif
312
313 int count = Convert();
314 pOutStream = pOrigOutStream;
315 return count;
316
317 }
318
319 ////////////////////////////////////////////////////
320 /// Actions the "convert" interface.
321 /// Calls the OBFormat class's ReadMolecule() which
322 /// - makes a new chemical object of its chosen type (e.g. OBMol)
323 /// - reads an object from the input file
324 /// - subjects the chemical object to 'transformations' as specified by the Options
325 /// - calls AddChemObject to add it to a buffer. The previous object is first output
326 /// via the output Format's WriteMolecule(). During the output process calling
327 /// IsFirst() and GetIndex() (the number of objects including the current one already output.
328 /// allows more control, for instance writing \<cml\> and \</cml\> tags for multiple molecule outputs only.
329 ///
330 /// AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation())
331 /// or if the number of the object is outside the range defined by
332 /// StartNumber and EndNumber.This means the start and end counts apply to all chemical objects
333 /// found whether or not they are output.
334 ///
335 /// If ReadMolecule returns false the input conversion loop is exited.
336 ///
337 int OBConversion::Convert()
338 {
339 if(pInStream==NULL || pOutStream==NULL)
340 {
341 cerr << "input or output stream not set" << endl;
342 return 0;
343 }
344
345 if(!pInFormat) return 0;
346 Count=0;//number objects processed
347
348 if(!SetStartAndEnd())
349 return 0;
350
351 ReadyToInput=true;
352 m_IsLast=false;
353 pOb1=NULL;
354 wInlen=0;
355
356 //Input loop
357 while(ReadyToInput && pInStream->peek() != EOF && pInStream->good())
358 {
359 if(pInStream==&cin)
360 {
361 if(pInStream->peek()=='\n')
362 break;
363 }
364 else
365 rInpos = pInStream->tellg();
366
367 bool ret=false;
368 try
369 {
370 ret = pInFormat->ReadChemObject(this);
371 }
372 catch(...)
373 {
374 if(!IsOption("e", GENOPTIONS) && !OneObjectOnly)
375 throw;
376 }
377
378 if(!ret)
379 {
380 //error or termination request: terminate unless
381 // -e option requested and sucessfully can skip past current object
382 if(!IsOption("e", GENOPTIONS) || pInFormat->SkipObjects(0,this)!=1)
383 break;
384 }
385 if(OneObjectOnly)
386 break;
387 // Objects supplied to AddChemObject() which may output them after a delay
388 //ReadyToInput may be made false in AddChemObject()
389 // by WriteMolecule() returning false or by Count==EndNumber
390 }
391
392 //Output last object
393 if(!MoreFilesToCome)
394 m_IsLast=true;
395
396 if(pOutFormat)
397 if(!pOutFormat->WriteChemObject(this))
398 Index--;
399
400 //Put AddChemObject() into non-queue mode
401 Count= -1;
402 EndNumber=StartNumber=0; pOb1=NULL;//leave tidy
403 MoreFilesToCome=false;
404 OneObjectOnly=false;
405
406 return Index; //The number actually output
407 }
408 //////////////////////////////////////////////////////
409 bool OBConversion::SetStartAndEnd()
410 {
411 int TempStartNumber=0;
412 const char* p = IsOption("f",GENOPTIONS);
413 if(p)
414 {
415 StartNumber=atoi(p);
416 if(StartNumber>1)
417 {
418 TempStartNumber=StartNumber;
419 //Try to skip objects now
420 int ret = pInFormat->SkipObjects(StartNumber-1,this);
421 if(ret==-1) //error
422 return false;
423 if(ret==1) //success:objects skipped
424 {
425 Count = StartNumber-1;
426 StartNumber=0;
427 }
428 }
429 }
430
431 p = IsOption("l",GENOPTIONS);
432 if(p)
433 {
434 EndNumber=atoi(p);
435 if(TempStartNumber && EndNumber<TempStartNumber)
436 EndNumber=TempStartNumber;
437 }
438
439 return true;
440 }
441
442 //////////////////////////////////////////////////////
443 /// Retrieves an object stored by AddChemObject() during output
444 OBBase* OBConversion::GetChemObject()
445 {
446 Index++;
447 return pOb1;
448 }
449
450 //////////////////////////////////////////////////////
451 /// Called by ReadMolecule() to deliver an object it has read from an input stream.
452 /// Used in two modes:
453 /// - When Count is negative it is left negative and the routine is just a store
454 /// for an OBBase object. The negative value returned tells the calling
455 /// routine that no more objects are required.
456 /// - When count is >=0, probably set by Convert(), it acts as a queue of 2:
457 /// writing the currently stored value before accepting the supplied one. This delay
458 /// allows output routines to respond differently when the written object is the last.
459 /// Count is incremented with each call, even if pOb=NULL.
460 /// Objects are not added to the queue if the count is outside the range
461 /// StartNumber to EndNumber. There is no upper limit if EndNumber is zero.
462 /// The return value is the number of objects, including this one, which have been
463 /// input (but not necessarily output).
464 int OBConversion::AddChemObject(OBBase* pOb)
465 {
466 if(Count<0)
467 {
468 pOb1=pOb;
469 return Count;
470 }
471 Count++;
472 if(Count>=(int)StartNumber)//keeps reading objects but does nothing with them
473 {
474 if(Count==(int)EndNumber)
475 ReadyToInput=false; //stops any more objects being read
476
477 rInlen = pInStream->tellg() - rInpos;
478
479 if(pOb)
480 {
481 if(pOb1 && pOutFormat) //see if there is an object ready to be output
482 {
483 //Output object
484 if (!pOutFormat->WriteChemObject(this))
485 {
486 //faultly write, so finish
487 --Index;
488 ReadyToInput=false;
489 return Count;
490 }
491 }
492 pOb1=pOb;
493 wInpos = rInpos; //Save the position in the input file to be accessed when writing it
494 wInlen = rInlen;
495 }
496 }
497 return Count;
498 }
499 //////////////////////////////////////////////////////
500 int OBConversion::GetOutputIndex() const
501 {
502 //The number of objects actually written already from this instance of OBConversion
503 return Index;
504 }
505 void OBConversion::SetOutputIndex(int indx)
506 {
507 Index=indx;
508 }
509 //////////////////////////////////////////////////////
510 OBFormat* OBConversion::FindFormat(const char* ID)
511 {
512 //Case insensitive
513 if(FormatsMap().find(ID) == FormatsMap().end())
514 return NULL;
515 else
516 return FormatsMap()[ID];
517 }
518
519 //////////////////////////////////////////////////
520 const char* OBConversion::GetTitle() const
521 {
522 return(InFilename.c_str());
523 }
524
525 void OBConversion::SetMoreFilesToCome()
526 {
527 MoreFilesToCome=true;
528 }
529
530 void OBConversion::SetOneObjectOnly()
531 {
532 OneObjectOnly=true;
533 m_IsLast=true;
534 }
535
536 /////////////////////////////////////////////////////////
537 OBFormat* OBConversion::FormatFromExt(const char* filename)
538 {
539 string file = filename;
540 size_t extPos = file.rfind(".");
541
542 if(extPos!=string::npos)
543 {
544 // only do this if we actually can read .gz files
545 #ifdef HAVE_LIBZ
546 if (file.substr(extPos,3) == ".gz")
547 {
548 file.erase(extPos);
549 extPos = file.rfind(".");
550 if (extPos!=string::npos)
551 return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
552 }
553 else
554 #endif
555 return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
556 }
557 return NULL; //if no extension
558 }
559
560 OBFormat* OBConversion::FormatFromMIME(const char* MIME)
561 {
562 if(FormatsMIMEMap().find(MIME) == FormatsMIMEMap().end())
563 return NULL;
564 else
565 return FormatsMIMEMap()[MIME];
566 }
567
568 bool OBConversion::Read(OBBase* pOb, std::istream* pin)
569 {
570 if(pin)
571 pInStream=pin;
572 if(!pInFormat) return false;
573
574 #ifdef HAVE_LIBZ
575 zlib_stream::zip_istream zIn(*pInStream);
576 if(zIn.is_gzip())
577 pInStream = &zIn;
578 #endif
579
580 return pInFormat->ReadMolecule(pOb, this);
581 }
582 //////////////////////////////////////////////////
583 /// Writes the object pOb but does not delete it afterwards.
584 /// The output stream is lastingly changed if pout is not NULL
585 /// Returns true if successful.
586 bool OBConversion::Write(OBBase* pOb, ostream* pos)
587 {
588 if(pos)
589 pOutStream=pos;
590 if(!pOutFormat) return false;
591
592 ostream* pOrigOutStream = pOutStream;
593 #ifdef HAVE_LIBZ
594 zlib_stream::zip_ostream zOut(*pOutStream);
595 if(IsOption("z",GENOPTIONS))
596 {
597 // make sure to output the header
598 zOut.make_gzip();
599 pOutStream = &zOut;
600 }
601 #endif
602
603 bool ret = pOutFormat->WriteMolecule(pOb,this);
604 pOutStream = pOrigOutStream;
605 return ret;
606 }
607
608 //////////////////////////////////////////////////
609 /// Writes the object pOb but does not delete it afterwards.
610 /// The output stream not changed (since we cannot write to this string later)
611 /// Returns true if successful.
612 std::string OBConversion::WriteString(OBBase* pOb)
613 {
614 ostream *oldStream = pOutStream; // save old output
615 stringstream newStream;
616
617 if(pOutFormat)
618 {
619 Write(pOb, &newStream);
620 }
621 pOutStream = oldStream;
622
623 return newStream.str();
624 }
625
626 //////////////////////////////////////////////////
627 /// Writes the object pOb but does not delete it afterwards.
628 /// The output stream is lastingly changed to point to the file
629 /// Returns true if successful.
630 bool OBConversion::WriteFile(OBBase* pOb, string filePath)
631 {
632 if(!pOutFormat) return false;
633
634 ofstream ofs;
635 ios_base::openmode omode =
636 pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
637
638 ofs.open(filePath.c_str(),omode);
639 if(!ofs)
640 {
641 cerr << "Cannot write to " << filePath <<endl;
642 return false;
643 }
644
645 return Write(pOb, &ofs);
646 }
647
648 ////////////////////////////////////////////
649 bool OBConversion::ReadString(OBBase* pOb, std::string input)
650 {
651 stringstream pin(input);
652 return Read(pOb,&pin);
653 }
654
655
656 ////////////////////////////////////////////
657 bool OBConversion::ReadFile(OBBase* pOb, std::string filePath)
658 {
659 if(!pInFormat) return false;
660
661 ifstream ifs;
662 ios_base::openmode imode =
663 pOutFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
664
665 ifs.open(filePath.c_str(),imode);
666 if(!ifs)
667 {
668 cerr << "Cannot read from " << filePath << endl;
669 return false;
670 }
671
672 return Read(pOb,&ifs);
673 }
674
675
676 ////////////////////////////////////////////
677 const char* OBConversion::Description()
678 {
679 return "Conversion options\n \
680 -f <#> Start import at molecule # specified\n \
681 -l <#> End import at molecule # specified\n \
682 -t All input files describe a single molecule\n \
683 -e Continue with next object after error, if possible\n \
684 -z Compress the output with gzip\n";
685 }
686
687 ////////////////////////////////////////////
688 bool OBConversion::IsLast()
689 {
690 return m_IsLast;
691 }
692 ////////////////////////////////////////////
693 bool OBConversion::IsFirstInput()
694 {
695 return (Count==0);
696 }
697
698 /////////////////////////////////////////////////
699 string OBConversion::BatchFileName(string& BaseName, string& InFile)
700 {
701 //Replaces * in BaseName by InFile without extension and path
702 string ofname(BaseName);
703 int pos = ofname.find('*');
704 if(pos>=0)
705 {
706 //Replace * by input filename
707 int posdot=(InFile).rfind('.');
708 if(posdot==-1) posdot=(InFile).size();
709 int posname=(InFile).find_last_of("\\/");
710 ofname.replace(pos,1, (InFile), posname+1, posdot-posname-1);
711 }
712 return ofname;
713 }
714
715 ////////////////////////////////////////////////
716 string OBConversion::IncrementedFileName(string& BaseName, const int Count)
717 {
718 //Replaces * in BaseName by Count
719 string ofname(BaseName);
720 int pos = ofname.find('*');
721 if(pos>=0)
722 {
723 char num[33];
724 snprintf(num, 33, "%d", Count);
725 ofname.replace(pos,1, num);
726 }
727 return ofname;
728 }
729 ////////////////////////////////////////////////////
730
731 /**
732 Makes input and output streams, and carries out normal,
733 batch, aggregation, and splitting conversion.
734
735 Normal
736 Done if FileList contains a single file name and OutputFileName
737 does not contain a *.
738
739 Aggregation
740 Done if FileList has more than one file name and OutputFileName does
741 not contain * . All the chemical objects are converted and sent
742 to the single output file.
743
744 Splitting
745 Done if FileList contains a single file name and OutputFileName
746 contains a * . Each chemical object in the input file converted
747 and sent to a separate file whose name is OutputFileName with the
748 * replaced by 1, 2, 3, etc.
749 For example, if OutputFileName is NEW*.smi then the output files are
750 NEW1.smi, NEW2.smi, etc.
751
752 Batch Conversion
753 Done if FileList has more than one file name and contains a * .
754 Each input file is converted to an output file whose name is
755 OutputFileName with the * replaced by the inputfile name without its
756 path and extension.
757 So if the input files were inpath/First.cml, inpath/Second.cml
758 and OutputFileName was NEW*.mol, the output files would be
759 NEWFirst.mol, NEWSecond.mol.
760
761 If FileList is empty, the input stream that has already been set
762 (usually in the constructor) is used. If OutputFileName is empty,
763 the output stream already set is used.
764
765 On exit, OutputFileList contains the names of the output files.
766
767 Returns the number of Chemical objects converted.
768 */
769 int OBConversion::FullConvert(std::vector<std::string>& FileList, std::string& OutputFileName,
770 std::vector<std::string>& OutputFileList)
771 {
772
773 istream* pInStream;
774 ostream* pOutStream=NULL;
775 ifstream is;
776 ofstream os;
777 bool HasMultipleOutputFiles=false;
778 int Count=0;
779 bool CommonInFormat = pInFormat ? true:false; //whether set in calling routine
780 ios_base::openmode omode =
781 pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
782 try
783 {
784 ofstream ofs;
785
786 //OUTPUT
787 if(OutputFileName.empty())
788 pOutStream = NULL; //use existing stream
789 else
790 {
791 if(OutputFileName.find_first_of('*')!=string::npos) HasMultipleOutputFiles = true;
792 if(!HasMultipleOutputFiles)
793 {
794 os.open(OutputFileName.c_str(),omode);
795 if(!os)
796 {
797 cerr << "Cannot write to " << OutputFileName <<endl;
798 return 0;
799 }
800 OutputFileList.push_back(OutputFileName);
801 pOutStream=&os;
802 }
803 }
804
805 if(IsOption("t",GENOPTIONS))
806 {
807 //Concatenate input file option (multiple files, single molecule)
808 if(HasMultipleOutputFiles)
809 {
810 cerr << "Cannot have multiple output files and also concatenate input files (-t option)" <<endl;
811 return 0;
812 }
813
814 stringstream allinput;
815 vector<string>::iterator itr;
816 for(itr=FileList.begin();itr!=FileList.end();itr++)
817 {
818 ifstream ifs((*itr).c_str());
819 if(!ifs)
820 {
821 cerr << "Cannot open " << *itr <<endl;
822 continue;
823 }
824 allinput << ifs.rdbuf(); //Copy all file contents
825 ifs.close();
826 }
827 Count = Convert(&allinput,pOutStream);
828 return Count;
829 }
830
831 //INPUT
832 if(FileList.empty())
833 pInStream = NULL;
834 else
835 {
836 if(FileList.size()>1)
837 {
838 //multiple input files
839 vector<string>::iterator itr, tempitr;
840 tempitr = FileList.end();
841 tempitr--;
842 for(itr=FileList.begin();itr!=FileList.end();itr++)
843 {
844 InFilename = *itr;
845 ifstream ifs;
846 if(!OpenAndSetFormat(CommonInFormat, &ifs))
847 continue;
848
849 if(HasMultipleOutputFiles)
850 {
851 //Batch conversion
852 string batchfile = BatchFileName(OutputFileName,*itr);
853 if(ofs.is_open()) ofs.close();
854 ofs.open(batchfile.c_str(), omode);
855 if(!ofs)
856 {
857 cerr << "Cannot open " << batchfile << endl;
858 return Count;
859 }
860 OutputFileList.push_back(batchfile);
861 SetOutputIndex(0); //reset for new file
862 Count += Convert(&ifs,&ofs);
863 }
864 else
865 {
866 //Aggregation
867 if(itr!=tempitr) SetMoreFilesToCome();
868 Count = Convert(&ifs,pOutStream);
869 }
870 }
871 return Count;
872 }
873 else
874 {
875 //Single input file
876 InFilename = FileList[0];
877 if(!OpenAndSetFormat(CommonInFormat, &is))
878 return 0;
879 pInStream=&is;
880
881 if(HasMultipleOutputFiles)
882 {
883 //Splitting
884 //Output is put in a temporary stream and written to a file
885 //with an augmenting name only when it contains a valid object.
886 int Indx=1;
887 for(;;)
888 {
889 stringstream ss;
890 SetOutputIndex(0); //reset for new file
891 SetOneObjectOnly();
892
893 int ThisFileCount = Convert(pInStream,&ss);
894 if(ThisFileCount==0) break;
895 Count+=ThisFileCount;
896
897 if(ofs.is_open()) ofs.close();
898 string incrfile = IncrementedFileName(OutputFileName,Indx++);
899 ofs.open(incrfile.c_str(), omode);
900 if(!ofs)
901 {
902 cerr << "Cannot write to " << incrfile << endl;
903 return Count;
904 }
905 OutputFileList.push_back(incrfile);
906 ofs << ss.rdbuf();
907 ofs.close();
908 ss.clear();
909 }
910 return Count;
911 }
912 }
913 }
914
915 //Single input and output files
916 Count = Convert(pInStream,pOutStream);
917 return Count;
918 }
919 catch(...)
920 {
921 cerr << "Conversion failed with an exception. Count=" << Count <<endl;
922 return Count;
923 }
924 }
925
926 bool OBConversion::OpenAndSetFormat(bool SetFormat, ifstream* is)
927 {
928 //Opens file using InFilename and sets pInFormat if requested
929 if(!SetFormat)
930 {
931 pInFormat = FormatFromExt(InFilename.c_str());
932 if(pInFormat==NULL)
933 {
934 string::size_type pos = InFilename.rfind('.');
935 string ext;
936 if(pos!=string::npos)
937 ext = InFilename.substr(pos);
938 cerr << "Cannot read input format \"" << ext << '\"'
939 << " for file \"" << InFilename << "\"" << endl;
940 return false;
941 }
942 }
943
944 ios_base::openmode imode;
945 #ifdef ALL_READS_BINARY //Makes unix files compatible with VC++6
946 imode = ios_base::in|ios_base::binary;
947 #else
948 imode = pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
949 #endif
950
951 is->open(InFilename.c_str(), imode);
952 if(!is->good())
953 {
954 cerr << "Cannot open " << InFilename <<endl;
955 return false;
956 }
957
958 return true;
959 }
960
961 ///////////////////////////////////////////////
962 void OBConversion::AddOption(const char* opt, Option_type opttyp, const char* txt)
963 {
964 //Also updates an option
965 if(txt==NULL)
966 OptionsArray[opttyp][opt]=string();
967 else
968 OptionsArray[opttyp][opt]=txt;
969 }
970
971 const char* OBConversion::IsOption(const char* opt, Option_type opttyp)
972 {
973 //Returns NULL if option not found or a pointer to the text if it is
974 map<string,string>::iterator pos;
975 pos = OptionsArray[opttyp].find(opt);
976 if(pos==OptionsArray[opttyp].end())
977 return NULL;
978 return pos->second.c_str();
979 }
980
981 bool OBConversion::RemoveOption(const char* opt, Option_type opttyp)
982 {
983 return OptionsArray[opttyp].erase(opt)!=0;//true if was there
984 }
985
986 void OBConversion::SetOptions(const char* options, Option_type opttyp)
987 {
988 while(*options)
989 {
990 string ch(1, *options++);
991 if(*options=='\"')
992 {
993 string txt = options+1;
994 string::size_type pos = txt.find('\"');
995 if(pos==string::npos)
996 return; //options is illformed
997 txt.erase(pos);
998 OptionsArray[opttyp][ch]= txt;
999 options += pos+2;
1000 }
1001 else
1002 OptionsArray[opttyp][ch] = string();
1003 }
1004 }
1005
1006 typedef std::map<string,int> OPAMapType;
1007 OPAMapType& OBConversion::OptionParamArray(Option_type typ)
1008 {
1009 static OPAMapType* opa = new OPAMapType[3];
1010 return opa[typ];
1011 }
1012
1013 void OBConversion::RegisterOptionParam(string name, OBFormat* pFormat,
1014 int numberParams, Option_type typ)
1015 {
1016 //Gives error message if the number of parameters conflicts with an existing registration
1017 map<string,int>::iterator pos;
1018 pos = OptionParamArray(typ).find(name);
1019 if(pos!=OptionParamArray(typ).end())
1020 {
1021 if(pos->second!=numberParams)
1022 {
1023 string description("API");
1024 if(pFormat)
1025 description=pFormat->Description();
1026 cerr << "The number of parameters needed by option \"" << name << "\" in "
1027 << description.substr(0,description.find('\n'))
1028 << " differs from an earlier registration." << endl;
1029 return;
1030 }
1031 }
1032 OptionParamArray(typ)[name] = numberParams;
1033 }
1034
1035 int OBConversion::GetOptionParams(string name, Option_type typ)
1036 {
1037 //returns the number of parameters registered for the option, or 0 if not found
1038 map<string,int>::iterator pos;
1039 pos = OptionParamArray(typ).find(name);
1040 if(pos==OptionParamArray(typ).end())
1041 return 0;
1042 return pos->second;
1043 }
1044
1045 }//namespace OpenBabel
1046
1047 //! \file obconversion.cpp
1048 //! \brief Implementation of OBFormat and OBConversion classes.