1 |
tim |
2440 |
/********************************************************************** |
2 |
|
|
obconversion.cpp - Declaration of OBFormat and OBConversion |
3 |
|
|
|
4 |
|
|
Copyright (C) 2004 by Chris Morley |
5 |
|
|
Some portions Copyright (C) 2005 by Geoffrey Hutchison |
6 |
|
|
|
7 |
|
|
This file is part of the Open Babel project. |
8 |
|
|
For more information, see <http://openbabel.sourceforge.net/> |
9 |
|
|
|
10 |
|
|
This program is free software; you can redistribute it and/or modify |
11 |
|
|
it under the terms of the GNU General Public License as published by |
12 |
|
|
the Free Software Foundation version 2 of the License. |
13 |
|
|
|
14 |
|
|
This program is distributed in the hope that it will be useful, |
15 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 |
|
|
GNU General Public License for more details. |
18 |
|
|
***********************************************************************/ |
19 |
|
|
// Definition of OBConversion routines |
20 |
|
|
|
21 |
|
|
#ifdef _WIN32 |
22 |
gezelter |
3057 |
#pragma warning (disable : 4786) |
23 |
tim |
2440 |
|
24 |
gezelter |
3057 |
//using 'this' in base class initializer |
25 |
|
|
#pragma warning (disable : 4355) |
26 |
tim |
2440 |
|
27 |
gezelter |
3057 |
#ifdef GUI |
28 |
|
|
#undef DATADIR |
29 |
|
|
#include "stdafx.hpp" //(includes<windows.h> |
30 |
|
|
#endif |
31 |
tim |
2440 |
#endif |
32 |
|
|
|
33 |
|
|
#include <iostream> |
34 |
|
|
#include <fstream> |
35 |
|
|
#include <sstream> |
36 |
|
|
#include <string> |
37 |
|
|
#include <map> |
38 |
|
|
//#include <dlfcn.h> |
39 |
|
|
|
40 |
|
|
#include "obconversion.hpp" |
41 |
|
|
|
42 |
|
|
#ifdef HAVE_LIBZ |
43 |
|
|
#include "zipstream.hpp" |
44 |
|
|
#endif |
45 |
|
|
|
46 |
|
|
#if !HAVE_STRNCASECMP |
47 |
|
|
extern "C" int strncasecmp(const char *s1, const char *s2, size_t n); |
48 |
|
|
#endif |
49 |
|
|
|
50 |
|
|
#ifndef BUFF_SIZE |
51 |
|
|
#define BUFF_SIZE 32768 |
52 |
|
|
#endif |
53 |
|
|
|
54 |
|
|
using namespace std; |
55 |
|
|
namespace OpenBabel { |
56 |
|
|
|
57 |
gezelter |
3057 |
const char* OBFormat::TargetClassDescription() |
58 |
|
|
{ |
59 |
|
|
//Provides class of default format unless overridden |
60 |
|
|
if(OBConversion::GetDefaultFormat()) |
61 |
|
|
return OBConversion::GetDefaultFormat()->TargetClassDescription(); |
62 |
|
|
else |
63 |
|
|
return ""; |
64 |
|
|
} |
65 |
|
|
const type_info& OBFormat::GetType() |
66 |
|
|
{ |
67 |
|
|
//Provides info on class of default format unless overridden |
68 |
|
|
if(OBConversion::GetDefaultFormat()) |
69 |
|
|
return OBConversion::GetDefaultFormat()->GetType(); |
70 |
|
|
else |
71 |
|
|
return typeid(this); //rubbish return if DefaultFormat not set |
72 |
|
|
} |
73 |
tim |
2440 |
|
74 |
gezelter |
3057 |
//*************************************************** |
75 |
tim |
2440 |
|
76 |
gezelter |
3057 |
/** @class OBConversion |
77 |
|
|
OBConversion maintains a list of the available formats, |
78 |
|
|
provides information on them, and controls the conversion process. |
79 |
|
|
|
80 |
|
|
A conversion is carried out by the calling routine, usually in a |
81 |
|
|
user interface or an application program, making an instance of |
82 |
|
|
OBConversion. It is loaded with the in and out formats, any options |
83 |
|
|
and (usually) the default streams for input and output. Then either |
84 |
|
|
the Convert() function is called, which allows a single input file |
85 |
|
|
to be converted, or the extended functionality of FullConvert() |
86 |
|
|
is used. This allows multiple input and output files, allowing: |
87 |
|
|
- aggregation - the contents of many input files converted |
88 |
|
|
and sent to one output file; |
89 |
|
|
- splitting - the molecules from one input file sent to |
90 |
|
|
separate output files; |
91 |
|
|
- batch conversion - each input file converted to an output file. |
92 |
|
|
|
93 |
|
|
These procedures constitute the "Convert" interface. OBConversion |
94 |
|
|
and the user interface or application program do not need to be |
95 |
|
|
aware of any other part of OpenBabel - mol.h is not \#included. This |
96 |
|
|
allows any chemical object derived from OBBase to be converted; |
97 |
|
|
the type of object is decided by the input format class. |
98 |
|
|
However,currently, almost all the conversions are for molecules of |
99 |
|
|
class OBMol. |
100 |
|
|
/// |
101 |
|
|
OBConversion can also be used with an "API" interface |
102 |
|
|
called from programs which manipulate chemical objects. Input/output is |
103 |
|
|
done with the Read() and Write() functions which work with any |
104 |
|
|
chemical object, but need to have its type specified. (The |
105 |
|
|
ReadMolecule() and WriteMolecule() functions of the format classes |
106 |
|
|
can also be used directly.) |
107 |
|
|
|
108 |
|
|
|
109 |
|
|
Example code using OBConversion |
110 |
|
|
|
111 |
|
|
<b>To read in a molecule, manipulate it and write it out.</b> |
112 |
|
|
|
113 |
|
|
Set up an istream and an ostream, to and from files or elsewhere. |
114 |
|
|
(cin and cout are used in the example). Specify the file formats. |
115 |
|
|
|
116 |
|
|
@code |
117 |
|
|
OBConversion conv(&cin,&cout); |
118 |
|
|
if(conv.SetInAndOutFormats("SMI","MOL")) |
119 |
|
|
{ |
120 |
|
|
OBMol mol; |
121 |
|
|
if(conv.Read(&mol)) |
122 |
|
|
...manipulate molecule |
123 |
|
|
|
124 |
|
|
conv->Write(&mol); |
125 |
|
|
} |
126 |
|
|
@endcode |
127 |
|
|
|
128 |
|
|
A two stage construction is used to allow error handling |
129 |
|
|
if the format ID is not recognized. This is necessary now that the |
130 |
|
|
formats are dynamic and errors are not caught at compile time. |
131 |
|
|
OBConversion::Read() is a templated function so that objects derived |
132 |
|
|
from OBBase can also be handled, in addition to OBMol, if the format |
133 |
|
|
routines are written appropriately. |
134 |
|
|
|
135 |
|
|
<b>To make a molecule from a SMILES string.</b> |
136 |
|
|
@code |
137 |
|
|
std::string SmilesString; |
138 |
|
|
OBMol mol; |
139 |
|
|
stringstream ss(SmilesString) |
140 |
|
|
OBConversion conv(&ss); |
141 |
|
|
if(conv.SetInFormat("smi") && conv.Read(&mol)) |
142 |
|
|
... |
143 |
|
|
@endcode |
144 |
|
|
|
145 |
|
|
<b>To do a file conversion without manipulating the molecule.</b> |
146 |
|
|
|
147 |
|
|
@code |
148 |
|
|
#include "obconversion.hpp" //mol.h is not needed |
149 |
|
|
...set up an istream is and an ostream os |
150 |
|
|
OBConversion conv(&is,&os); |
151 |
|
|
if(conv.SetInAndOutFormats("SMI","MOL")) |
152 |
|
|
{ |
153 |
|
|
conv.SetOptions("h"); //Optional; (h adds expicit hydrogens) |
154 |
|
|
conv.Convert(); |
155 |
|
|
} |
156 |
|
|
@endcode |
157 |
|
|
|
158 |
|
|
<b>To add automatic format conversion to an existing program.</b> |
159 |
|
|
|
160 |
|
|
The existing program inputs from the file identified by the |
161 |
|
|
const char* filename into the istream is. The file is assumed to have |
162 |
|
|
a format ORIG, but otherformats, identified by their file extensions, |
163 |
|
|
can now be used. |
164 |
|
|
|
165 |
|
|
@code |
166 |
|
|
ifstream ifs(filename); //Original code |
167 |
|
|
|
168 |
|
|
OBConversion conv; |
169 |
|
|
OBFormat* inFormat = conv.FormatFromExt(filename); |
170 |
|
|
OBFormat* outFormat = conv.GetFormat("ORIG"); |
171 |
|
|
istream* pIn = &ifs; |
172 |
|
|
stringstream newstream; |
173 |
|
|
if(inFormat && outFormat) |
174 |
|
|
{ |
175 |
|
|
conv.SetInAndOutFormats(inFormat,outFormat); |
176 |
|
|
conv.Convert(pIn,&newstream); |
177 |
|
|
pIn=&newstream; |
178 |
|
|
} |
179 |
|
|
//else error; new features not available; fallback to original functionality |
180 |
|
|
|
181 |
|
|
...Carry on with original code using pIn |
182 |
|
|
@endcode |
183 |
|
|
|
184 |
|
|
In Windows a degree of independence from OpenBabel can be achieved using DLLs. |
185 |
|
|
This code would be linked with obconv.lib. |
186 |
|
|
At runtime the following DLLs would be in the executable directory: |
187 |
|
|
obconv.dll, obdll.dll, one or more *.obf format files. |
188 |
|
|
*/ |
189 |
|
|
|
190 |
tim |
2440 |
int OBConversion::FormatFilesLoaded = 0; |
191 |
|
|
|
192 |
gezelter |
3057 |
OBFormat* OBConversion::pDefaultFormat=NULL; |
193 |
tim |
2440 |
|
194 |
gezelter |
3057 |
OBConversion::OBConversion(istream* is, ostream* os) : |
195 |
|
|
pInFormat(NULL),pOutFormat(NULL), Index(0), StartNumber(1), |
196 |
|
|
EndNumber(0), Count(-1), m_IsLast(true), MoreFilesToCome(false), |
197 |
|
|
OneObjectOnly(false), pOb1(NULL), pAuxConv(NULL) |
198 |
|
|
{ |
199 |
|
|
pInStream=is; |
200 |
|
|
pOutStream=os; |
201 |
|
|
if (FormatFilesLoaded == 0) |
202 |
|
|
FormatFilesLoaded = LoadFormatFiles(); |
203 |
tim |
2440 |
|
204 |
gezelter |
3057 |
//These options take a parameter |
205 |
|
|
RegisterOptionParam("f", NULL, 1,GENOPTIONS); |
206 |
|
|
RegisterOptionParam("l", NULL, 1,GENOPTIONS); |
207 |
|
|
} |
208 |
tim |
2440 |
|
209 |
gezelter |
3057 |
///This static function returns a reference to the FormatsMap |
210 |
|
|
///which, because it is a static local variable is constructed only once. |
211 |
|
|
///This fiddle is to avoid the "static initialization order fiasco" |
212 |
|
|
///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/". |
213 |
|
|
FMapType& OBConversion::FormatsMap() |
214 |
|
|
{ |
215 |
|
|
static FMapType* fm = NULL; |
216 |
|
|
if (!fm) |
217 |
|
|
fm = new FMapType; |
218 |
|
|
return *fm; |
219 |
|
|
} |
220 |
tim |
2440 |
|
221 |
gezelter |
3057 |
///This static function returns a reference to the FormatsMIMEMap |
222 |
|
|
///which, because it is a static local variable is constructed only once. |
223 |
|
|
///This fiddle is to avoid the "static initialization order fiasco" |
224 |
|
|
///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/". |
225 |
|
|
FMapType& OBConversion::FormatsMIMEMap() |
226 |
|
|
{ |
227 |
|
|
static FMapType* fm = NULL; |
228 |
|
|
if (!fm) |
229 |
|
|
fm = new FMapType; |
230 |
|
|
return *fm; |
231 |
|
|
} |
232 |
tim |
2440 |
|
233 |
gezelter |
3057 |
///////////////////////////////////////////////// |
234 |
|
|
OBConversion::OBConversion(const OBConversion& o) |
235 |
|
|
{ |
236 |
|
|
Index = o.Index; |
237 |
|
|
Count = o.Count; |
238 |
|
|
StartNumber = o.StartNumber; |
239 |
|
|
EndNumber = o.EndNumber; |
240 |
|
|
pInFormat = o.pInFormat; |
241 |
|
|
pInStream = o.pInStream; |
242 |
|
|
pOutFormat = o.pOutFormat; |
243 |
|
|
pOutStream = o.pOutStream; |
244 |
|
|
OptionsArray[0]= o.OptionsArray[0]; |
245 |
|
|
OptionsArray[1]= o.OptionsArray[1]; |
246 |
|
|
OptionsArray[2]= o.OptionsArray[2]; |
247 |
|
|
InFilename = o.InFilename; |
248 |
|
|
rInpos = o.rInpos; |
249 |
|
|
wInpos = o.wInpos; |
250 |
|
|
rInlen = o.rInlen; |
251 |
|
|
wInlen = o.wInlen; |
252 |
|
|
m_IsLast = o.m_IsLast; |
253 |
|
|
MoreFilesToCome= o.MoreFilesToCome; |
254 |
|
|
OneObjectOnly = o.OneObjectOnly; |
255 |
|
|
pOb1 = o.pOb1; |
256 |
|
|
ReadyToInput = o.ReadyToInput; |
257 |
tim |
2440 |
|
258 |
gezelter |
3057 |
pAuxConv = NULL; |
259 |
|
|
} |
260 |
|
|
//////////////////////////////////////////////// |
261 |
tim |
2440 |
|
262 |
gezelter |
3057 |
OBConversion::~OBConversion() |
263 |
|
|
{ |
264 |
|
|
if(pAuxConv!=this) |
265 |
|
|
delete pAuxConv; |
266 |
|
|
} |
267 |
|
|
////////////////////////////////////////////////////// |
268 |
tim |
2440 |
|
269 |
gezelter |
3057 |
/// Class information on formats is collected by making an instance of the class |
270 |
|
|
/// derived from OBFormat(only one is usually required). RegisterFormat() is called |
271 |
|
|
/// from its constructor. |
272 |
|
|
/// |
273 |
|
|
/// If the compiled format is stored separately, like in a DLL or shared library, |
274 |
|
|
/// the initialization code makes an instance of the imported OBFormat class. |
275 |
|
|
int OBConversion::RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME) |
276 |
|
|
{ |
277 |
|
|
FormatsMap()[ID] = pFormat; |
278 |
|
|
if (MIME) |
279 |
|
|
FormatsMIMEMap()[MIME] = pFormat; |
280 |
|
|
if(pFormat->Flags() & DEFAULTFORMAT) |
281 |
|
|
pDefaultFormat=pFormat; |
282 |
|
|
return FormatsMap().size(); |
283 |
|
|
} |
284 |
tim |
2440 |
|
285 |
gezelter |
3057 |
////////////////////////////////////////////////////// |
286 |
|
|
int OBConversion::LoadFormatFiles() |
287 |
|
|
{ |
288 |
|
|
int count=0; |
289 |
|
|
// if(FormatFilesLoaded) return 0; |
290 |
|
|
// FormatFilesLoaded=true; //so will load files only once |
291 |
tim |
2440 |
#ifdef USING_DYNAMIC_LIBS |
292 |
gezelter |
3057 |
//Depending on availablilty, look successively in |
293 |
|
|
//FORMATFILE_DIR, executable directory,or current directory |
294 |
|
|
string TargetDir; |
295 |
|
|
#ifdef FORMATFILE_DIR |
296 |
|
|
TargetDir="FORMATFILE_DIR"; |
297 |
|
|
#endif |
298 |
tim |
2440 |
|
299 |
gezelter |
3057 |
DLHandler::getConvDirectory(TargetDir); |
300 |
tim |
2440 |
|
301 |
gezelter |
3057 |
vector<string> files; |
302 |
|
|
if(!DLHandler::findFiles(files,DLHandler::getFormatFilePattern(),TargetDir)) return 0; |
303 |
tim |
2440 |
|
304 |
gezelter |
3057 |
vector<string>::iterator itr; |
305 |
|
|
for(itr=files.begin();itr!=files.end();itr++) |
306 |
|
|
{ |
307 |
|
|
if(DLHandler::openLib(*itr)) |
308 |
|
|
count++; |
309 |
|
|
else |
310 |
|
|
cerr << *itr << " did not load properly" << endl; |
311 |
|
|
} |
312 |
tim |
2440 |
#else |
313 |
gezelter |
3057 |
count = 1; //avoid calling this function several times |
314 |
tim |
2440 |
#endif //USING_DYNAMIC_LIBS |
315 |
gezelter |
3057 |
return count; |
316 |
|
|
} |
317 |
tim |
2440 |
|
318 |
gezelter |
3057 |
/** |
319 |
|
|
*Returns the ID + the first line of the description in str |
320 |
|
|
*and a pointer to the format in pFormat. |
321 |
|
|
*If called with str==NULL the first format is returned; |
322 |
|
|
*subsequent formats are returned by calling with str!=NULL and the previous value of itr |
323 |
|
|
*returns false, and str and pFormat NULL, when there are no more formats. |
324 |
|
|
*Use like: |
325 |
|
|
*@code |
326 |
|
|
* const char* str=NULL; |
327 |
|
|
* Formatpos pos; |
328 |
|
|
* OBConversion conv; // dummy to make sure static data is available |
329 |
|
|
* while(OBConversion::GetNextFormat(pos,str,pFormat)) |
330 |
|
|
* { |
331 |
|
|
* use str and pFormat |
332 |
|
|
* } |
333 |
|
|
*@endcode |
334 |
|
|
* |
335 |
|
|
* NOTE: Because of dynamic loading problems, it is usually necessary to |
336 |
|
|
* declare a "dummy" OBConversion object to access this static method. |
337 |
|
|
* (Not elegant, but will hopefully be fixed in the future.) |
338 |
|
|
*/ |
339 |
|
|
bool OBConversion::GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat) |
340 |
|
|
{ |
341 |
tim |
2440 |
|
342 |
gezelter |
3057 |
pFormat = NULL; |
343 |
|
|
if(str==NULL) |
344 |
|
|
itr = FormatsMap().begin(); |
345 |
|
|
else |
346 |
|
|
itr++; |
347 |
|
|
if(itr == FormatsMap().end()) |
348 |
|
|
{ |
349 |
|
|
str=NULL; pFormat=NULL; |
350 |
|
|
return false; |
351 |
|
|
} |
352 |
|
|
static string s; |
353 |
|
|
s =itr->first; |
354 |
|
|
pFormat = itr->second; |
355 |
|
|
if(pFormat) |
356 |
|
|
{ |
357 |
|
|
string description(pFormat->Description()); |
358 |
|
|
s += " -- "; |
359 |
|
|
s += description.substr(0,description.find('\n')); |
360 |
|
|
} |
361 |
tim |
2440 |
|
362 |
gezelter |
3057 |
if(pFormat->Flags() & NOTWRITABLE) s+=" [Read-only]"; |
363 |
|
|
if(pFormat->Flags() & NOTREADABLE) s+=" [Write-only]"; |
364 |
tim |
2440 |
|
365 |
gezelter |
3057 |
str = s.c_str(); |
366 |
|
|
return true; |
367 |
|
|
} |
368 |
tim |
2440 |
|
369 |
gezelter |
3057 |
////////////////////////////////////////////////////// |
370 |
|
|
/// Sets the formats from their ids, e g CML. |
371 |
|
|
/// If inID is NULL, the input format is left unchanged. Similarly for outID |
372 |
|
|
/// Returns true if both formats have been successfully set at sometime |
373 |
|
|
bool OBConversion::SetInAndOutFormats(const char* inID, const char* outID) |
374 |
|
|
{ |
375 |
|
|
return SetInFormat(inID) && SetOutFormat(outID); |
376 |
|
|
} |
377 |
|
|
////////////////////////////////////////////////////// |
378 |
tim |
2440 |
|
379 |
gezelter |
3057 |
bool OBConversion::SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut) |
380 |
|
|
{ |
381 |
|
|
return SetInFormat(pIn) && SetOutFormat(pOut); |
382 |
|
|
} |
383 |
|
|
////////////////////////////////////////////////////// |
384 |
|
|
bool OBConversion::SetInFormat(OBFormat* pIn) |
385 |
|
|
{ |
386 |
|
|
if(pIn==NULL) |
387 |
|
|
return true; |
388 |
|
|
pInFormat=pIn; |
389 |
|
|
return !(pInFormat->Flags() & NOTREADABLE); |
390 |
|
|
} |
391 |
|
|
////////////////////////////////////////////////////// |
392 |
|
|
bool OBConversion::SetOutFormat(OBFormat* pOut) |
393 |
|
|
{ |
394 |
|
|
pOutFormat=pOut; |
395 |
|
|
return !(pOutFormat->Flags() & NOTWRITABLE); |
396 |
|
|
} |
397 |
|
|
////////////////////////////////////////////////////// |
398 |
|
|
bool OBConversion::SetInFormat(const char* inID) |
399 |
|
|
{ |
400 |
|
|
if(inID) |
401 |
|
|
pInFormat = FindFormat(inID); |
402 |
|
|
return pInFormat && !(pInFormat->Flags() & NOTREADABLE); |
403 |
|
|
} |
404 |
|
|
////////////////////////////////////////////////////// |
405 |
tim |
2440 |
|
406 |
gezelter |
3057 |
bool OBConversion::SetOutFormat(const char* outID) |
407 |
|
|
{ |
408 |
|
|
if(outID) |
409 |
|
|
pOutFormat= FindFormat(outID); |
410 |
|
|
return pOutFormat && !(pOutFormat->Flags() & NOTWRITABLE); |
411 |
|
|
} |
412 |
tim |
2440 |
|
413 |
gezelter |
3057 |
////////////////////////////////////////////////////// |
414 |
|
|
int OBConversion::Convert(istream* is, ostream* os) |
415 |
|
|
{ |
416 |
|
|
if(is) pInStream=is; |
417 |
|
|
if(os) pOutStream=os; |
418 |
|
|
ostream* pOrigOutStream = pOutStream; |
419 |
tim |
2440 |
|
420 |
|
|
#ifdef HAVE_LIBZ |
421 |
gezelter |
3057 |
zlib_stream::zip_istream zIn(*pInStream); |
422 |
|
|
if(zIn.is_gzip()) |
423 |
|
|
pInStream = &zIn; |
424 |
tim |
2440 |
|
425 |
gezelter |
3057 |
zlib_stream::zip_ostream zOut(*pOutStream); |
426 |
|
|
if(IsOption("z",GENOPTIONS)) |
427 |
|
|
{ |
428 |
|
|
// make sure to output the header |
429 |
|
|
zOut.make_gzip(); |
430 |
|
|
pOutStream = &zOut; |
431 |
|
|
} |
432 |
tim |
2440 |
#endif |
433 |
|
|
|
434 |
gezelter |
3057 |
int count = Convert(); |
435 |
|
|
pOutStream = pOrigOutStream; |
436 |
|
|
return count; |
437 |
tim |
2440 |
|
438 |
gezelter |
3057 |
} |
439 |
tim |
2440 |
|
440 |
gezelter |
3057 |
//////////////////////////////////////////////////// |
441 |
|
|
/// Actions the "convert" interface. |
442 |
|
|
/// Calls the OBFormat class's ReadMolecule() which |
443 |
|
|
/// - makes a new chemical object of its chosen type (e.g. OBMol) |
444 |
|
|
/// - reads an object from the input file |
445 |
|
|
/// - subjects the chemical object to 'transformations' as specified by the Options |
446 |
|
|
/// - calls AddChemObject to add it to a buffer. The previous object is first output |
447 |
|
|
/// via the output Format's WriteMolecule(). During the output process calling |
448 |
|
|
/// IsFirst() and GetIndex() (the number of objects including the current one already output. |
449 |
|
|
/// allows more control, for instance writing \<cml\> and \</cml\> tags for multiple molecule outputs only. |
450 |
|
|
/// |
451 |
|
|
/// AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation()) |
452 |
|
|
/// or if the number of the object is outside the range defined by |
453 |
|
|
/// StartNumber and EndNumber.This means the start and end counts apply to all chemical objects |
454 |
|
|
/// found whether or not they are output. |
455 |
|
|
/// |
456 |
|
|
/// If ReadMolecule returns false the input conversion loop is exited. |
457 |
|
|
/// |
458 |
|
|
int OBConversion::Convert() |
459 |
|
|
{ |
460 |
|
|
if(pInStream==NULL || pOutStream==NULL) |
461 |
|
|
{ |
462 |
|
|
cerr << "input or output stream not set" << endl; |
463 |
|
|
return 0; |
464 |
|
|
} |
465 |
tim |
2440 |
|
466 |
gezelter |
3057 |
if(!pInFormat) return 0; |
467 |
|
|
Count=0;//number objects processed |
468 |
tim |
2440 |
|
469 |
gezelter |
3057 |
if(!SetStartAndEnd()) |
470 |
|
|
return 0; |
471 |
tim |
2440 |
|
472 |
gezelter |
3057 |
ReadyToInput=true; |
473 |
|
|
m_IsLast=false; |
474 |
|
|
pOb1=NULL; |
475 |
|
|
wInlen=0; |
476 |
tim |
2440 |
|
477 |
gezelter |
3057 |
//Input loop |
478 |
|
|
while(ReadyToInput && pInStream->peek() != EOF && pInStream->good()) |
479 |
|
|
{ |
480 |
|
|
if(pInStream==&cin) |
481 |
|
|
{ |
482 |
|
|
if(pInStream->peek()=='\n') |
483 |
|
|
break; |
484 |
|
|
} |
485 |
|
|
else |
486 |
|
|
rInpos = pInStream->tellg(); |
487 |
tim |
2440 |
|
488 |
gezelter |
3057 |
bool ret=false; |
489 |
|
|
try |
490 |
|
|
{ |
491 |
|
|
ret = pInFormat->ReadChemObject(this); |
492 |
|
|
} |
493 |
|
|
catch(...) |
494 |
|
|
{ |
495 |
|
|
if(!IsOption("e", GENOPTIONS) && !OneObjectOnly) |
496 |
|
|
throw; |
497 |
|
|
} |
498 |
tim |
2440 |
|
499 |
gezelter |
3057 |
if(!ret) |
500 |
|
|
{ |
501 |
|
|
//error or termination request: terminate unless |
502 |
|
|
// -e option requested and sucessfully can skip past current object |
503 |
|
|
if(!IsOption("e", GENOPTIONS) || pInFormat->SkipObjects(0,this)!=1) |
504 |
|
|
break; |
505 |
|
|
} |
506 |
|
|
if(OneObjectOnly) |
507 |
|
|
break; |
508 |
|
|
// Objects supplied to AddChemObject() which may output them after a delay |
509 |
|
|
//ReadyToInput may be made false in AddChemObject() |
510 |
|
|
// by WriteMolecule() returning false or by Count==EndNumber |
511 |
|
|
} |
512 |
tim |
2440 |
|
513 |
gezelter |
3057 |
//Output last object |
514 |
|
|
//if(!MoreFilesToCome) |
515 |
|
|
// m_IsLast=true; |
516 |
|
|
m_IsLast= !MoreFilesToCome; |
517 |
tim |
2440 |
|
518 |
gezelter |
3057 |
if(pOutFormat) |
519 |
|
|
if(!pOutFormat->WriteChemObject(this)) |
520 |
|
|
Index--; |
521 |
tim |
2440 |
|
522 |
gezelter |
3057 |
//Put AddChemObject() into non-queue mode |
523 |
|
|
Count= -1; |
524 |
|
|
EndNumber=StartNumber=0; pOb1=NULL;//leave tidy |
525 |
|
|
MoreFilesToCome=false; |
526 |
|
|
OneObjectOnly=false; |
527 |
tim |
2440 |
|
528 |
gezelter |
3057 |
return Index; //The number actually output |
529 |
|
|
} |
530 |
|
|
////////////////////////////////////////////////////// |
531 |
|
|
bool OBConversion::SetStartAndEnd() |
532 |
|
|
{ |
533 |
|
|
int TempStartNumber=0; |
534 |
|
|
const char* p = IsOption("f",GENOPTIONS); |
535 |
|
|
if(p) |
536 |
|
|
{ |
537 |
|
|
StartNumber=atoi(p); |
538 |
|
|
if(StartNumber>1) |
539 |
|
|
{ |
540 |
|
|
TempStartNumber=StartNumber; |
541 |
|
|
//Try to skip objects now |
542 |
|
|
int ret = pInFormat->SkipObjects(StartNumber-1,this); |
543 |
|
|
if(ret==-1) //error |
544 |
|
|
return false; |
545 |
|
|
if(ret==1) //success:objects skipped |
546 |
|
|
{ |
547 |
|
|
Count = StartNumber-1; |
548 |
|
|
StartNumber=0; |
549 |
|
|
} |
550 |
|
|
} |
551 |
|
|
} |
552 |
tim |
2440 |
|
553 |
gezelter |
3057 |
p = IsOption("l",GENOPTIONS); |
554 |
|
|
if(p) |
555 |
|
|
{ |
556 |
|
|
EndNumber=atoi(p); |
557 |
|
|
if(TempStartNumber && EndNumber<TempStartNumber) |
558 |
|
|
EndNumber=TempStartNumber; |
559 |
|
|
} |
560 |
tim |
2440 |
|
561 |
gezelter |
3057 |
return true; |
562 |
|
|
} |
563 |
tim |
2440 |
|
564 |
gezelter |
3057 |
////////////////////////////////////////////////////// |
565 |
|
|
/// Retrieves an object stored by AddChemObject() during output |
566 |
|
|
OBBase* OBConversion::GetChemObject() |
567 |
|
|
{ |
568 |
|
|
Index++; |
569 |
|
|
return pOb1; |
570 |
|
|
} |
571 |
tim |
2440 |
|
572 |
gezelter |
3057 |
////////////////////////////////////////////////////// |
573 |
|
|
/// Called by ReadMolecule() to deliver an object it has read from an input stream. |
574 |
|
|
/// Used in two modes: |
575 |
|
|
/// - When Count is negative it is left negative and the routine is just a store |
576 |
|
|
/// for an OBBase object. The negative value returned tells the calling |
577 |
|
|
/// routine that no more objects are required. |
578 |
|
|
/// - When count is >=0, probably set by Convert(), it acts as a queue of 2: |
579 |
|
|
/// writing the currently stored value before accepting the supplied one. This delay |
580 |
|
|
/// allows output routines to respond differently when the written object is the last. |
581 |
|
|
/// Count is incremented with each call, even if pOb=NULL. |
582 |
|
|
/// Objects are not added to the queue if the count is outside the range |
583 |
|
|
/// StartNumber to EndNumber. There is no upper limit if EndNumber is zero. |
584 |
|
|
/// The return value is the number of objects, including this one, which have been |
585 |
|
|
/// input (but not necessarily output). |
586 |
|
|
int OBConversion::AddChemObject(OBBase* pOb) |
587 |
|
|
{ |
588 |
|
|
if(Count<0) |
589 |
|
|
{ |
590 |
|
|
pOb1=pOb; |
591 |
|
|
return Count; |
592 |
|
|
} |
593 |
|
|
Count++; |
594 |
|
|
if(Count>=(int)StartNumber)//keeps reading objects but does nothing with them |
595 |
|
|
{ |
596 |
|
|
if(Count==(int)EndNumber) |
597 |
|
|
ReadyToInput=false; //stops any more objects being read |
598 |
tim |
2440 |
|
599 |
gezelter |
3057 |
rInlen = pInStream->tellg() - rInpos; |
600 |
tim |
2440 |
|
601 |
gezelter |
3057 |
if(pOb) |
602 |
|
|
{ |
603 |
|
|
if(pOb1 && pOutFormat) //see if there is an object ready to be output |
604 |
|
|
{ |
605 |
|
|
//Output object |
606 |
|
|
if (!pOutFormat->WriteChemObject(this)) |
607 |
|
|
{ |
608 |
|
|
//faultly write, so finish |
609 |
|
|
--Index; |
610 |
|
|
ReadyToInput=false; |
611 |
|
|
return Count; |
612 |
|
|
} |
613 |
|
|
//Stop after writing with single object output files |
614 |
|
|
if(pOutFormat->Flags() & WRITEONEONLY) |
615 |
|
|
{ |
616 |
|
|
ReadyToInput = false; |
617 |
|
|
pOb1 = NULL; |
618 |
tim |
2440 |
|
619 |
gezelter |
3057 |
// if there are more molecules to output, send a warning |
620 |
|
|
cerr << "WARNING: You are attempting to convert a file" |
621 |
|
|
<< " with multiple molecule entries into a format" |
622 |
|
|
<< " which can only store one molecule. The current" |
623 |
|
|
<< " output will only contain the first molecule.\n\n"; |
624 |
tim |
2440 |
|
625 |
gezelter |
3057 |
cerr << "To convert this input into multiple separate" |
626 |
|
|
<< " output files, with one molecule per file, try:\n" |
627 |
|
|
<< "babel [input] [ouptut] -m\n\n"; |
628 |
tim |
2440 |
|
629 |
gezelter |
3057 |
cerr << "To pick one particular molecule" |
630 |
|
|
<< " (e.g., molecule 4), try:\n" |
631 |
|
|
<< "babel -f 4 -l 4 [input] [output]" << endl; |
632 |
tim |
2440 |
|
633 |
gezelter |
3057 |
return true; |
634 |
|
|
} |
635 |
|
|
} |
636 |
|
|
pOb1=pOb; |
637 |
|
|
wInpos = rInpos; //Save the position in the input file to be accessed when writing it |
638 |
|
|
wInlen = rInlen; |
639 |
|
|
} |
640 |
|
|
} |
641 |
|
|
return Count; |
642 |
|
|
} |
643 |
|
|
////////////////////////////////////////////////////// |
644 |
|
|
int OBConversion::GetOutputIndex() const |
645 |
|
|
{ |
646 |
|
|
//The number of objects actually written already from this instance of OBConversion |
647 |
|
|
return Index; |
648 |
|
|
} |
649 |
|
|
void OBConversion::SetOutputIndex(int indx) |
650 |
|
|
{ |
651 |
|
|
Index=indx; |
652 |
|
|
} |
653 |
|
|
////////////////////////////////////////////////////// |
654 |
|
|
OBFormat* OBConversion::FindFormat(const char* ID) |
655 |
|
|
{ |
656 |
|
|
//Case insensitive |
657 |
|
|
if(FormatsMap().find(ID) == FormatsMap().end()) |
658 |
|
|
return NULL; |
659 |
|
|
else |
660 |
|
|
return FormatsMap()[ID]; |
661 |
|
|
} |
662 |
tim |
2440 |
|
663 |
gezelter |
3057 |
////////////////////////////////////////////////// |
664 |
|
|
const char* OBConversion::GetTitle() const |
665 |
|
|
{ |
666 |
|
|
return(InFilename.c_str()); |
667 |
|
|
} |
668 |
|
|
|
669 |
|
|
void OBConversion::SetMoreFilesToCome() |
670 |
|
|
{ |
671 |
|
|
MoreFilesToCome=true; |
672 |
|
|
} |
673 |
|
|
|
674 |
|
|
void OBConversion::SetOneObjectOnly() |
675 |
|
|
{ |
676 |
|
|
OneObjectOnly=true; |
677 |
|
|
m_IsLast=true; |
678 |
|
|
} |
679 |
|
|
|
680 |
|
|
///////////////////////////////////////////////////////// |
681 |
|
|
OBFormat* OBConversion::FormatFromExt(const char* filename) |
682 |
|
|
{ |
683 |
|
|
string file = filename; |
684 |
|
|
size_t extPos = file.rfind("."); |
685 |
|
|
|
686 |
|
|
if(extPos!=string::npos) |
687 |
|
|
{ |
688 |
|
|
// only do this if we actually can read .gz files |
689 |
tim |
2440 |
#ifdef HAVE_LIBZ |
690 |
gezelter |
3057 |
if (file.substr(extPos,3) == ".gz") |
691 |
|
|
{ |
692 |
|
|
file.erase(extPos); |
693 |
|
|
extPos = file.rfind("."); |
694 |
|
|
if (extPos!=string::npos) |
695 |
|
|
return FindFormat( (file.substr(extPos + 1, file.size())).c_str() ); |
696 |
|
|
} |
697 |
|
|
else |
698 |
tim |
2440 |
#endif |
699 |
gezelter |
3057 |
return FindFormat( (file.substr(extPos + 1, file.size())).c_str() ); |
700 |
|
|
} |
701 |
|
|
return NULL; //if no extension |
702 |
|
|
} |
703 |
tim |
2440 |
|
704 |
gezelter |
3057 |
OBFormat* OBConversion::FormatFromMIME(const char* MIME) |
705 |
|
|
{ |
706 |
|
|
if(FormatsMIMEMap().find(MIME) == FormatsMIMEMap().end()) |
707 |
|
|
return NULL; |
708 |
|
|
else |
709 |
|
|
return FormatsMIMEMap()[MIME]; |
710 |
|
|
} |
711 |
tim |
2440 |
|
712 |
gezelter |
3057 |
bool OBConversion::Read(OBBase* pOb, std::istream* pin) |
713 |
|
|
{ |
714 |
|
|
if(pin) |
715 |
|
|
pInStream=pin; |
716 |
|
|
if(!pInFormat) return false; |
717 |
tim |
2440 |
|
718 |
|
|
#ifdef HAVE_LIBZ |
719 |
gezelter |
3057 |
zlib_stream::zip_istream zIn(*pInStream); |
720 |
|
|
if(zIn.is_gzip()) |
721 |
|
|
pInStream = &zIn; |
722 |
tim |
2440 |
#endif |
723 |
|
|
|
724 |
gezelter |
3057 |
return pInFormat->ReadMolecule(pOb, this); |
725 |
|
|
} |
726 |
|
|
////////////////////////////////////////////////// |
727 |
|
|
/// Writes the object pOb but does not delete it afterwards. |
728 |
|
|
/// The output stream is lastingly changed if pos is not NULL |
729 |
|
|
/// Returns true if successful. |
730 |
|
|
bool OBConversion::Write(OBBase* pOb, ostream* pos) |
731 |
|
|
{ |
732 |
|
|
if(pos) |
733 |
|
|
pOutStream=pos; |
734 |
|
|
if(!pOutFormat) return false; |
735 |
tim |
2440 |
|
736 |
gezelter |
3057 |
ostream* pOrigOutStream = pOutStream; |
737 |
tim |
2440 |
#ifdef HAVE_LIBZ |
738 |
gezelter |
3057 |
#ifndef _WIN32 |
739 |
|
|
zlib_stream::zip_ostream zOut(*pOutStream); |
740 |
|
|
if(IsOption("z",GENOPTIONS)) |
741 |
|
|
{ |
742 |
|
|
// make sure to output the header |
743 |
|
|
zOut.make_gzip(); |
744 |
|
|
pOutStream = &zOut; |
745 |
|
|
} |
746 |
tim |
2440 |
#endif |
747 |
gezelter |
3057 |
#endif |
748 |
tim |
2440 |
|
749 |
gezelter |
3057 |
bool ret = pOutFormat->WriteMolecule(pOb,this); |
750 |
|
|
pOutStream = pOrigOutStream; |
751 |
|
|
return ret; |
752 |
|
|
} |
753 |
tim |
2440 |
|
754 |
gezelter |
3057 |
////////////////////////////////////////////////// |
755 |
|
|
/// Writes the object pOb but does not delete it afterwards. |
756 |
|
|
/// The output stream not changed (since we cannot write to this string later) |
757 |
|
|
/// Returns true if successful. |
758 |
|
|
std::string OBConversion::WriteString(OBBase* pOb) |
759 |
|
|
{ |
760 |
|
|
ostream *oldStream = pOutStream; // save old output |
761 |
|
|
stringstream newStream; |
762 |
tim |
2440 |
|
763 |
gezelter |
3057 |
if(pOutFormat) |
764 |
|
|
{ |
765 |
|
|
Write(pOb, &newStream); |
766 |
|
|
} |
767 |
|
|
pOutStream = oldStream; |
768 |
tim |
2440 |
|
769 |
gezelter |
3057 |
return newStream.str(); |
770 |
|
|
} |
771 |
tim |
2440 |
|
772 |
gezelter |
3057 |
////////////////////////////////////////////////// |
773 |
|
|
/// Writes the object pOb but does not delete it afterwards. |
774 |
|
|
/// The output stream is lastingly changed to point to the file |
775 |
|
|
/// Returns true if successful. |
776 |
|
|
bool OBConversion::WriteFile(OBBase* pOb, string filePath) |
777 |
|
|
{ |
778 |
|
|
if(!pOutFormat) return false; |
779 |
tim |
2440 |
|
780 |
gezelter |
3057 |
ofstream *ofs = new ofstream; |
781 |
|
|
ios_base::openmode omode = |
782 |
|
|
pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out; |
783 |
tim |
2440 |
|
784 |
gezelter |
3057 |
ofs->open(filePath.c_str(),omode); |
785 |
|
|
if(!ofs || !ofs->good()) |
786 |
|
|
{ |
787 |
|
|
cerr << "Cannot write to " << filePath <<endl; |
788 |
|
|
return false; |
789 |
|
|
} |
790 |
tim |
2440 |
|
791 |
gezelter |
3057 |
return Write(pOb, ofs); |
792 |
|
|
} |
793 |
tim |
2440 |
|
794 |
gezelter |
3057 |
//////////////////////////////////////////// |
795 |
|
|
bool OBConversion::ReadString(OBBase* pOb, std::string input) |
796 |
|
|
{ |
797 |
|
|
stringstream *pin = new stringstream(input); |
798 |
|
|
return Read(pOb,pin); |
799 |
|
|
} |
800 |
tim |
2440 |
|
801 |
|
|
|
802 |
gezelter |
3057 |
//////////////////////////////////////////// |
803 |
|
|
bool OBConversion::ReadFile(OBBase* pOb, std::string filePath) |
804 |
|
|
{ |
805 |
|
|
if(!pInFormat) return false; |
806 |
tim |
2440 |
|
807 |
gezelter |
3057 |
ifstream *ifs = new ifstream; |
808 |
|
|
ios_base::openmode imode = |
809 |
|
|
pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in; |
810 |
tim |
2440 |
|
811 |
gezelter |
3057 |
ifs->open(filePath.c_str(),imode); |
812 |
|
|
if(!ifs || !ifs->good()) |
813 |
|
|
{ |
814 |
|
|
cerr << "Cannot read from " << filePath << endl; |
815 |
|
|
return false; |
816 |
|
|
} |
817 |
tim |
2440 |
|
818 |
gezelter |
3057 |
return Read(pOb,ifs); |
819 |
|
|
} |
820 |
tim |
2440 |
|
821 |
|
|
|
822 |
gezelter |
3057 |
//////////////////////////////////////////// |
823 |
|
|
const char* OBConversion::Description() |
824 |
|
|
{ |
825 |
|
|
return "Conversion options\n \ |
826 |
tim |
2440 |
-f <#> Start import at molecule # specified\n \ |
827 |
|
|
-l <#> End import at molecule # specified\n \ |
828 |
|
|
-t All input files describe a single molecule\n \ |
829 |
|
|
-e Continue with next object after error, if possible\n \ |
830 |
|
|
-z Compress the output with gzip\n"; |
831 |
gezelter |
3057 |
} |
832 |
tim |
2440 |
|
833 |
gezelter |
3057 |
//////////////////////////////////////////// |
834 |
|
|
bool OBConversion::IsLast() |
835 |
|
|
{ |
836 |
|
|
return m_IsLast; |
837 |
|
|
} |
838 |
|
|
//////////////////////////////////////////// |
839 |
|
|
bool OBConversion::IsFirstInput() |
840 |
|
|
{ |
841 |
|
|
return (Count==0); |
842 |
|
|
} |
843 |
tim |
2440 |
|
844 |
gezelter |
3057 |
///////////////////////////////////////////////// |
845 |
|
|
string OBConversion::BatchFileName(string& BaseName, string& InFile) |
846 |
|
|
{ |
847 |
|
|
//Replaces * in BaseName by InFile without extension and path |
848 |
|
|
string ofname(BaseName); |
849 |
|
|
string::size_type pos = ofname.find('*'); |
850 |
|
|
if(pos != string::npos) |
851 |
|
|
{ |
852 |
|
|
//Replace * by input filename |
853 |
|
|
string::size_type posdot= InFile.rfind('.'); |
854 |
|
|
if(posdot == string::npos) |
855 |
|
|
posdot = InFile.size(); |
856 |
|
|
else { |
857 |
|
|
#ifdef HAVE_LIBZ |
858 |
|
|
if (InFile.substr(posdot,3) == ".gz") |
859 |
|
|
{ |
860 |
|
|
InFile.erase(posdot); |
861 |
|
|
posdot = InFile.rfind('.'); |
862 |
|
|
if (posdot == string::npos) |
863 |
|
|
posdot = InFile.size(); |
864 |
|
|
} |
865 |
|
|
#endif |
866 |
|
|
} |
867 |
tim |
2440 |
|
868 |
gezelter |
3057 |
int posname= InFile.find_last_of("\\/"); |
869 |
|
|
ofname.replace(pos,1, InFile, posname+1, posdot-posname-1); |
870 |
|
|
} |
871 |
|
|
return ofname; |
872 |
|
|
} |
873 |
tim |
2440 |
|
874 |
gezelter |
3057 |
//////////////////////////////////////////////// |
875 |
|
|
string OBConversion::IncrementedFileName(string& BaseName, const int Count) |
876 |
|
|
{ |
877 |
|
|
//Replaces * in BaseName by Count |
878 |
|
|
string ofname(BaseName); |
879 |
|
|
int pos = ofname.find('*'); |
880 |
|
|
if(pos>=0) |
881 |
|
|
{ |
882 |
|
|
char num[33]; |
883 |
|
|
snprintf(num, 33, "%d", Count); |
884 |
|
|
ofname.replace(pos,1, num); |
885 |
|
|
} |
886 |
|
|
return ofname; |
887 |
|
|
} |
888 |
|
|
//////////////////////////////////////////////////// |
889 |
tim |
2440 |
|
890 |
gezelter |
3057 |
/** |
891 |
|
|
Makes input and output streams, and carries out normal, |
892 |
|
|
batch, aggregation, and splitting conversion. |
893 |
tim |
2440 |
|
894 |
gezelter |
3057 |
Normal |
895 |
|
|
Done if FileList contains a single file name and OutputFileName |
896 |
|
|
does not contain a *. |
897 |
|
|
|
898 |
|
|
Aggregation |
899 |
|
|
Done if FileList has more than one file name and OutputFileName does |
900 |
|
|
not contain * . All the chemical objects are converted and sent |
901 |
|
|
to the single output file. |
902 |
tim |
2440 |
|
903 |
gezelter |
3057 |
Splitting |
904 |
|
|
Done if FileList contains a single file name and OutputFileName |
905 |
|
|
contains a * . Each chemical object in the input file is converted |
906 |
|
|
and sent to a separate file whose name is OutputFileName with the |
907 |
|
|
* replaced by 1, 2, 3, etc. |
908 |
|
|
For example, if OutputFileName is NEW*.smi then the output files are |
909 |
|
|
NEW1.smi, NEW2.smi, etc. |
910 |
tim |
2440 |
|
911 |
gezelter |
3057 |
Batch Conversion |
912 |
|
|
Done if FileList has more than one file name and contains a * . |
913 |
|
|
Each input file is converted to an output file whose name is |
914 |
|
|
OutputFileName with the * replaced by the inputfile name without its |
915 |
|
|
path and extension. |
916 |
|
|
So if the input files were inpath/First.cml, inpath/Second.cml |
917 |
|
|
and OutputFileName was NEW*.mol, the output files would be |
918 |
|
|
NEWFirst.mol, NEWSecond.mol. |
919 |
tim |
2440 |
|
920 |
gezelter |
3057 |
If FileList is empty, the input stream that has already been set |
921 |
|
|
(usually in the constructor) is used. If OutputFileName is empty, |
922 |
|
|
the output stream already set is used. |
923 |
tim |
2440 |
|
924 |
gezelter |
3057 |
On exit, OutputFileList contains the names of the output files. |
925 |
tim |
2440 |
|
926 |
gezelter |
3057 |
Returns the number of Chemical objects converted. |
927 |
|
|
*/ |
928 |
|
|
int OBConversion::FullConvert(std::vector<std::string>& FileList, std::string& OutputFileName, |
929 |
|
|
std::vector<std::string>& OutputFileList) |
930 |
|
|
{ |
931 |
|
|
ostream* pOs=NULL; |
932 |
|
|
istream* pIs=NULL; |
933 |
|
|
ifstream is; |
934 |
|
|
ofstream os; |
935 |
|
|
bool HasMultipleOutputFiles=false; |
936 |
|
|
int Count=0; |
937 |
|
|
bool CommonInFormat = pInFormat ? true:false; //whether set in calling routine |
938 |
|
|
ios_base::openmode omode = |
939 |
|
|
pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out; |
940 |
|
|
try |
941 |
|
|
{ |
942 |
|
|
ofstream ofs; |
943 |
tim |
2440 |
|
944 |
gezelter |
3057 |
//OUTPUT |
945 |
|
|
if(OutputFileName.empty()) |
946 |
|
|
pOs = NULL; //use existing stream |
947 |
|
|
else |
948 |
|
|
{ |
949 |
|
|
if(OutputFileName.find_first_of('*')!=string::npos) HasMultipleOutputFiles = true; |
950 |
|
|
if(!HasMultipleOutputFiles) |
951 |
|
|
{ |
952 |
|
|
os.open(OutputFileName.c_str(),omode); |
953 |
|
|
if(!os) |
954 |
|
|
{ |
955 |
|
|
cerr << "Cannot write to " << OutputFileName <<endl; |
956 |
|
|
return 0; |
957 |
|
|
} |
958 |
|
|
OutputFileList.push_back(OutputFileName); |
959 |
|
|
pOs=&os; |
960 |
|
|
} |
961 |
|
|
} |
962 |
tim |
2440 |
|
963 |
gezelter |
3057 |
if(IsOption("t",GENOPTIONS)) |
964 |
|
|
{ |
965 |
|
|
//Concatenate input file option (multiple files, single molecule) |
966 |
|
|
if(HasMultipleOutputFiles) |
967 |
|
|
{ |
968 |
|
|
cerr << "Cannot have multiple output files and also concatenate input files (-t option)" <<endl; |
969 |
|
|
return 0; |
970 |
|
|
} |
971 |
tim |
2440 |
|
972 |
gezelter |
3057 |
stringstream allinput; |
973 |
|
|
vector<string>::iterator itr; |
974 |
|
|
for(itr=FileList.begin();itr!=FileList.end();itr++) |
975 |
|
|
{ |
976 |
|
|
ifstream ifs((*itr).c_str()); |
977 |
|
|
if(!ifs) |
978 |
|
|
{ |
979 |
|
|
cerr << "Cannot open " << *itr <<endl; |
980 |
|
|
continue; |
981 |
|
|
} |
982 |
|
|
allinput << ifs.rdbuf(); //Copy all file contents |
983 |
|
|
ifs.close(); |
984 |
|
|
} |
985 |
|
|
Count = Convert(&allinput,pOs); |
986 |
|
|
return Count; |
987 |
|
|
} |
988 |
tim |
2440 |
|
989 |
gezelter |
3057 |
//INPUT |
990 |
|
|
if(FileList.empty()) |
991 |
|
|
pIs = NULL; |
992 |
|
|
else |
993 |
|
|
{ |
994 |
|
|
if(FileList.size()>1) |
995 |
|
|
{ |
996 |
|
|
//multiple input files |
997 |
|
|
vector<string>::iterator itr, tempitr; |
998 |
|
|
tempitr = FileList.end(); |
999 |
|
|
tempitr--; |
1000 |
|
|
for(itr=FileList.begin();itr!=FileList.end();itr++) |
1001 |
|
|
{ |
1002 |
|
|
InFilename = *itr; |
1003 |
|
|
ifstream ifs; |
1004 |
|
|
if(!OpenAndSetFormat(CommonInFormat, &ifs)) |
1005 |
|
|
continue; |
1006 |
tim |
2440 |
|
1007 |
gezelter |
3057 |
if(HasMultipleOutputFiles) |
1008 |
|
|
{ |
1009 |
|
|
//Batch conversion |
1010 |
|
|
string batchfile = BatchFileName(OutputFileName,*itr); |
1011 |
|
|
if(ofs.is_open()) ofs.close(); |
1012 |
|
|
ofs.open(batchfile.c_str(), omode); |
1013 |
|
|
if(!ofs) |
1014 |
|
|
{ |
1015 |
|
|
cerr << "Cannot open " << batchfile << endl; |
1016 |
|
|
return Count; |
1017 |
|
|
} |
1018 |
|
|
OutputFileList.push_back(batchfile); |
1019 |
|
|
SetOutputIndex(0); //reset for new file |
1020 |
|
|
Count += Convert(&ifs,&ofs); |
1021 |
|
|
} |
1022 |
|
|
else |
1023 |
|
|
{ |
1024 |
|
|
//Aggregation |
1025 |
|
|
if(itr!=tempitr) SetMoreFilesToCome(); |
1026 |
|
|
Count = Convert(&ifs,pOs); |
1027 |
|
|
} |
1028 |
|
|
} |
1029 |
|
|
return Count; |
1030 |
|
|
} |
1031 |
|
|
else |
1032 |
|
|
{ |
1033 |
|
|
//Single input file |
1034 |
|
|
InFilename = FileList[0]; |
1035 |
|
|
if(!OpenAndSetFormat(CommonInFormat, &is)) |
1036 |
|
|
return 0; |
1037 |
|
|
pIs=&is; |
1038 |
tim |
2440 |
|
1039 |
gezelter |
3057 |
if(HasMultipleOutputFiles) |
1040 |
|
|
{ |
1041 |
|
|
//Splitting |
1042 |
|
|
//Output is put in a temporary stream and written to a file |
1043 |
|
|
//with an augmenting name only when it contains a valid object. |
1044 |
|
|
int Indx=1; |
1045 |
|
|
SetInStream(&is); |
1046 |
|
|
#ifdef HAVE_LIBZ |
1047 |
|
|
zlib_stream::zip_istream zIn(is); |
1048 |
|
|
#endif |
1049 |
|
|
for(;;) |
1050 |
|
|
{ |
1051 |
|
|
stringstream ss; |
1052 |
|
|
SetOutStream(&ss); |
1053 |
|
|
SetOutputIndex(0); //reset for new file |
1054 |
|
|
SetOneObjectOnly(); |
1055 |
tim |
2440 |
|
1056 |
gezelter |
3057 |
#ifdef HAVE_LIBZ |
1057 |
|
|
if(Indx==1 && zIn.is_gzip()) |
1058 |
|
|
SetInStream(&zIn); |
1059 |
|
|
#endif |
1060 |
tim |
2440 |
|
1061 |
gezelter |
3057 |
int ThisFileCount = Convert(); |
1062 |
|
|
if(ThisFileCount==0) break; |
1063 |
|
|
Count+=ThisFileCount; |
1064 |
tim |
2440 |
|
1065 |
gezelter |
3057 |
if(ofs.is_open()) ofs.close(); |
1066 |
|
|
string incrfile = IncrementedFileName(OutputFileName,Indx++); |
1067 |
|
|
ofs.open(incrfile.c_str(), omode); |
1068 |
|
|
if(!ofs) |
1069 |
|
|
{ |
1070 |
|
|
cerr << "Cannot write to " << incrfile << endl; |
1071 |
|
|
return Count; |
1072 |
|
|
} |
1073 |
|
|
|
1074 |
|
|
OutputFileList.push_back(incrfile); |
1075 |
|
|
#ifdef HAVE_LIBZ |
1076 |
|
|
if(IsOption("z",GENOPTIONS)) |
1077 |
|
|
{ |
1078 |
|
|
zlib_stream::zip_ostream zOut(ofs); |
1079 |
|
|
// make sure to output the header |
1080 |
|
|
zOut.make_gzip(); |
1081 |
|
|
zOut << ss.rdbuf(); |
1082 |
|
|
} |
1083 |
|
|
else |
1084 |
|
|
#endif |
1085 |
|
|
ofs << ss.rdbuf(); |
1086 |
tim |
2440 |
|
1087 |
gezelter |
3057 |
ofs.close(); |
1088 |
|
|
ss.clear(); |
1089 |
|
|
} |
1090 |
|
|
return Count; |
1091 |
|
|
} |
1092 |
|
|
} |
1093 |
|
|
} |
1094 |
tim |
2440 |
|
1095 |
gezelter |
3057 |
//Single input and output files |
1096 |
|
|
Count = Convert(pIs,pOs); |
1097 |
|
|
return Count; |
1098 |
|
|
} |
1099 |
|
|
catch(...) |
1100 |
|
|
{ |
1101 |
|
|
cerr << "Conversion failed with an exception. Count=" << Count <<endl; |
1102 |
|
|
return Count; |
1103 |
|
|
} |
1104 |
|
|
} |
1105 |
|
|
|
1106 |
|
|
bool OBConversion::OpenAndSetFormat(bool SetFormat, ifstream* is) |
1107 |
|
|
{ |
1108 |
|
|
//Opens file using InFilename and sets pInFormat if requested |
1109 |
|
|
if(!SetFormat) |
1110 |
|
|
{ |
1111 |
|
|
pInFormat = FormatFromExt(InFilename.c_str()); |
1112 |
|
|
if(pInFormat==NULL) |
1113 |
|
|
{ |
1114 |
|
|
string::size_type pos = InFilename.rfind('.'); |
1115 |
|
|
string ext; |
1116 |
|
|
if(pos!=string::npos) |
1117 |
|
|
ext = InFilename.substr(pos); |
1118 |
|
|
cerr << "Cannot read input format \"" << ext << '\"' |
1119 |
|
|
<< " for file \"" << InFilename << "\"" << endl; |
1120 |
|
|
return false; |
1121 |
|
|
} |
1122 |
|
|
} |
1123 |
|
|
|
1124 |
|
|
ios_base::openmode imode; |
1125 |
tim |
2440 |
#ifdef ALL_READS_BINARY //Makes unix files compatible with VC++6 |
1126 |
gezelter |
3057 |
imode = ios_base::in|ios_base::binary; |
1127 |
tim |
2440 |
#else |
1128 |
gezelter |
3057 |
imode = pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in; |
1129 |
tim |
2440 |
#endif |
1130 |
|
|
|
1131 |
gezelter |
3057 |
is->open(InFilename.c_str(), imode); |
1132 |
|
|
if(!is->good()) |
1133 |
|
|
{ |
1134 |
|
|
cerr << "Cannot open " << InFilename <<endl; |
1135 |
|
|
return false; |
1136 |
|
|
} |
1137 |
tim |
2440 |
|
1138 |
gezelter |
3057 |
return true; |
1139 |
|
|
} |
1140 |
tim |
2440 |
|
1141 |
gezelter |
3057 |
/////////////////////////////////////////////// |
1142 |
|
|
void OBConversion::AddOption(const char* opt, Option_type opttyp, const char* txt) |
1143 |
|
|
{ |
1144 |
|
|
//Also updates an option |
1145 |
|
|
if(txt==NULL) |
1146 |
|
|
OptionsArray[opttyp][opt]=string(); |
1147 |
|
|
else |
1148 |
|
|
OptionsArray[opttyp][opt]=txt; |
1149 |
|
|
} |
1150 |
tim |
2440 |
|
1151 |
gezelter |
3057 |
const char* OBConversion::IsOption(const char* opt, Option_type opttyp) |
1152 |
|
|
{ |
1153 |
|
|
//Returns NULL if option not found or a pointer to the text if it is |
1154 |
|
|
map<string,string>::iterator pos; |
1155 |
|
|
pos = OptionsArray[opttyp].find(opt); |
1156 |
|
|
if(pos==OptionsArray[opttyp].end()) |
1157 |
|
|
return NULL; |
1158 |
|
|
return pos->second.c_str(); |
1159 |
|
|
} |
1160 |
tim |
2440 |
|
1161 |
gezelter |
3057 |
bool OBConversion::RemoveOption(const char* opt, Option_type opttyp) |
1162 |
|
|
{ |
1163 |
|
|
return OptionsArray[opttyp].erase(opt)!=0;//true if was there |
1164 |
|
|
} |
1165 |
tim |
2440 |
|
1166 |
gezelter |
3057 |
void OBConversion::SetOptions(const char* options, Option_type opttyp) |
1167 |
|
|
{ |
1168 |
|
|
while(*options) |
1169 |
|
|
{ |
1170 |
|
|
string ch(1, *options++); |
1171 |
|
|
if(*options=='\"') |
1172 |
|
|
{ |
1173 |
|
|
string txt = options+1; |
1174 |
|
|
string::size_type pos = txt.find('\"'); |
1175 |
|
|
if(pos==string::npos) |
1176 |
|
|
return; //options is illformed |
1177 |
|
|
txt.erase(pos); |
1178 |
|
|
OptionsArray[opttyp][ch]= txt; |
1179 |
|
|
options += pos+2; |
1180 |
|
|
} |
1181 |
|
|
else |
1182 |
|
|
OptionsArray[opttyp][ch] = string(); |
1183 |
|
|
} |
1184 |
|
|
} |
1185 |
tim |
2440 |
|
1186 |
gezelter |
3057 |
typedef std::map<string,int> OPAMapType; |
1187 |
|
|
OPAMapType& OBConversion::OptionParamArray(Option_type typ) |
1188 |
|
|
{ |
1189 |
|
|
static OPAMapType* opa = NULL; |
1190 |
|
|
if (!opa) |
1191 |
|
|
opa = new OPAMapType[3]; |
1192 |
|
|
return opa[typ]; |
1193 |
|
|
} |
1194 |
tim |
2440 |
|
1195 |
gezelter |
3057 |
void OBConversion::RegisterOptionParam(string name, OBFormat* pFormat, |
1196 |
|
|
int numberParams, Option_type typ) |
1197 |
|
|
{ |
1198 |
|
|
//Gives error message if the number of parameters conflicts with an existing registration |
1199 |
|
|
map<string,int>::iterator pos; |
1200 |
|
|
pos = OptionParamArray(typ).find(name); |
1201 |
|
|
if(pos!=OptionParamArray(typ).end()) |
1202 |
|
|
{ |
1203 |
|
|
if(pos->second!=numberParams) |
1204 |
|
|
{ |
1205 |
|
|
string description("API"); |
1206 |
|
|
if(pFormat) |
1207 |
|
|
description=pFormat->Description(); |
1208 |
|
|
cerr << "The number of parameters needed by option \"" << name << "\" in " |
1209 |
|
|
<< description.substr(0,description.find('\n')) |
1210 |
|
|
<< " differs from an earlier registration." << endl; |
1211 |
|
|
return; |
1212 |
|
|
} |
1213 |
|
|
} |
1214 |
|
|
OptionParamArray(typ)[name] = numberParams; |
1215 |
|
|
} |
1216 |
tim |
2440 |
|
1217 |
gezelter |
3057 |
int OBConversion::GetOptionParams(string name, Option_type typ) |
1218 |
|
|
{ |
1219 |
|
|
//returns the number of parameters registered for the option, or 0 if not found |
1220 |
|
|
map<string,int>::iterator pos; |
1221 |
|
|
pos = OptionParamArray(typ).find(name); |
1222 |
|
|
if(pos==OptionParamArray(typ).end()) |
1223 |
|
|
return 0; |
1224 |
|
|
return pos->second; |
1225 |
|
|
} |
1226 |
tim |
2440 |
|
1227 |
|
|
}//namespace OpenBabel |
1228 |
|
|
|
1229 |
|
|
//! \file obconversion.cpp |
1230 |
|
|
//! \brief Implementation of OBFormat and OBConversion classes. |