1 |
tim |
741 |
/********************************************************************** |
2 |
|
|
obconversion.cpp - Declaration of OBFormat and OBConversion |
3 |
|
|
|
4 |
|
|
Copyright (C) 2004 by Chris Morley |
5 |
|
|
Some portions Copyright (C) 2005 by Geoffrey Hutchison |
6 |
|
|
|
7 |
|
|
This file is part of the Open Babel project. |
8 |
|
|
For more information, see <http://openbabel.sourceforge.net/> |
9 |
|
|
|
10 |
|
|
This program is free software; you can redistribute it and/or modify |
11 |
|
|
it under the terms of the GNU General Public License as published by |
12 |
|
|
the Free Software Foundation version 2 of the License. |
13 |
|
|
|
14 |
|
|
This program is distributed in the hope that it will be useful, |
15 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 |
|
|
GNU General Public License for more details. |
18 |
|
|
***********************************************************************/ |
19 |
|
|
// Definition of OBConversion routines |
20 |
|
|
|
21 |
|
|
#ifdef _WIN32 |
22 |
|
|
#pragma warning (disable : 4786) |
23 |
|
|
|
24 |
|
|
//using 'this' in base class initializer |
25 |
|
|
#pragma warning (disable : 4355) |
26 |
|
|
|
27 |
|
|
#ifdef GUI |
28 |
|
|
#undef DATADIR |
29 |
|
|
#include "stdafx.h" //(includes<windows.h> |
30 |
|
|
#endif |
31 |
|
|
#endif |
32 |
|
|
|
33 |
|
|
#include <iostream> |
34 |
|
|
#include <fstream> |
35 |
|
|
#include <sstream> |
36 |
|
|
#include <string> |
37 |
|
|
#include <map> |
38 |
|
|
//#include <dlfcn.h> |
39 |
|
|
|
40 |
|
|
#include "obconversion.hpp" |
41 |
|
|
|
42 |
|
|
#ifdef HAVE_LIBZ |
43 |
|
|
#include "zipstream.hpp" |
44 |
|
|
#endif |
45 |
|
|
|
46 |
|
|
#if !HAVE_STRNCASECMP |
47 |
|
|
extern "C" int strncasecmp(const char *s1, const char *s2, size_t n); |
48 |
|
|
#endif |
49 |
|
|
|
50 |
|
|
#ifndef BUFF_SIZE |
51 |
|
|
#define BUFF_SIZE 32768 |
52 |
|
|
#endif |
53 |
|
|
|
54 |
|
|
using namespace std; |
55 |
|
|
namespace OpenBabel { |
56 |
|
|
|
57 |
|
|
const char* OBFormat::TargetClassDescription() |
58 |
|
|
{ |
59 |
|
|
//Provides class of default format unless overridden |
60 |
|
|
if(OBConversion::GetDefaultFormat()) |
61 |
|
|
return OBConversion::GetDefaultFormat()->TargetClassDescription(); |
62 |
|
|
else |
63 |
|
|
return ""; |
64 |
gezelter |
755 |
} |
65 |
tim |
741 |
const type_info& OBFormat::GetType() |
66 |
|
|
{ |
67 |
|
|
//Provides info on class of default format unless overridden |
68 |
|
|
if(OBConversion::GetDefaultFormat()) |
69 |
|
|
return OBConversion::GetDefaultFormat()->GetType(); |
70 |
|
|
else |
71 |
|
|
return typeid(this); //rubbish return if DefaultFormat not set |
72 |
gezelter |
755 |
} |
73 |
tim |
741 |
|
74 |
|
|
//*************************************************** |
75 |
|
|
|
76 |
|
|
/** @class OBConversion |
77 |
|
|
OBConversion maintains a list of the available formats, |
78 |
|
|
provides information on them, and controls the conversion process. |
79 |
|
|
|
80 |
|
|
A conversion is carried out by the calling routine, usually in a |
81 |
|
|
user interface or an application program, making an instance of |
82 |
|
|
OBConversion. It is loaded with the in and out formats, any options |
83 |
|
|
and (usually) the default streams for input and output. Then either |
84 |
|
|
the Convert() function is called, which allows a single input file |
85 |
|
|
to be converted, or the extended functionality of FullConvert() |
86 |
|
|
is used. This allows multiple input and output files, allowing: |
87 |
|
|
- aggregation - the contents of many input files converted |
88 |
|
|
and sent to one output file; |
89 |
|
|
- splitting - the molecules from one input file sent to |
90 |
|
|
separate output files; |
91 |
|
|
- batch conversion - each input file converted to an output file. |
92 |
|
|
|
93 |
|
|
These procedures constitute the "Convert" interface. OBConversion |
94 |
|
|
and the user interface or application program do not need to be |
95 |
|
|
aware of any other part of OpenBabel - mol.h is not \#included. This |
96 |
|
|
allows any chemical object derived from OBBase to be converted; |
97 |
|
|
the type of object is decided by the input format class. |
98 |
|
|
However,currently, almost all the conversions are for molecules of |
99 |
|
|
class OBMol. |
100 |
|
|
/// |
101 |
|
|
OBConversion can also be used with an "API" interface |
102 |
|
|
called from programs which manipulate chemical objects. Input/output is |
103 |
|
|
done with the Read() and Write() functions which work with any |
104 |
|
|
chemical object, but need to have its type specified. (The |
105 |
|
|
ReadMolecule() and WriteMolecule() functions of the format classes |
106 |
|
|
can also be used directly.) |
107 |
|
|
|
108 |
|
|
|
109 |
|
|
Example code using OBConversion |
110 |
|
|
|
111 |
|
|
<b>To read in a molecule, manipulate it and write it out.</b> |
112 |
|
|
|
113 |
|
|
Set up an istream and an ostream, to and from files or elsewhere. |
114 |
|
|
(cin and cout are used in the example). Specify the file formats. |
115 |
|
|
|
116 |
|
|
@code |
117 |
|
|
OBConversion conv(&cin,&cout); |
118 |
|
|
if(conv.SetInAndOutFormats("SMI","MOL")) |
119 |
|
|
{ |
120 |
|
|
OBMol mol; |
121 |
|
|
if(conv.Read(&mol)) |
122 |
|
|
...manipulate molecule |
123 |
|
|
|
124 |
|
|
conv->Write(&mol); |
125 |
|
|
} |
126 |
|
|
@endcode |
127 |
|
|
|
128 |
|
|
A two stage construction is used to allow error handling |
129 |
|
|
if the format ID is not recognized. This is necessary now that the |
130 |
|
|
formats are dynamic and errors are not caught at compile time. |
131 |
|
|
OBConversion::Read() is a templated function so that objects derived |
132 |
|
|
from OBBase can also be handled, in addition to OBMol, if the format |
133 |
|
|
routines are written appropriately. |
134 |
|
|
|
135 |
|
|
<b>To make a molecule from a SMILES string.</b> |
136 |
|
|
@code |
137 |
|
|
std::string SmilesString; |
138 |
|
|
OBMol mol; |
139 |
|
|
stringstream ss(SmilesString) |
140 |
|
|
OBConversion conv(&ss); |
141 |
|
|
if(conv.SetInFormat("smi") && conv.Read(&mol)) |
142 |
|
|
... |
143 |
|
|
@endcode |
144 |
|
|
|
145 |
|
|
<b>To do a file conversion without manipulating the molecule.</b> |
146 |
|
|
|
147 |
|
|
@code |
148 |
|
|
#include "obconversion.h" //mol.h is not needed |
149 |
|
|
...set up an istream is and an ostream os |
150 |
|
|
OBConversion conv(&is,&os); |
151 |
|
|
if(conv.SetInAndOutFormats("SMI","MOL")) |
152 |
|
|
{ |
153 |
|
|
conv.SetOptions("h"); //Optional; (h adds expicit hydrogens) |
154 |
|
|
conv.Convert(); |
155 |
|
|
} |
156 |
|
|
@endcode |
157 |
|
|
|
158 |
|
|
<b>To add automatic format conversion to an existing program.</b> |
159 |
|
|
|
160 |
|
|
The existing program inputs from the file identified by the |
161 |
|
|
const char* filename into the istream is. The file is assumed to have |
162 |
|
|
a format ORIG, but otherformats, identified by their file extensions, |
163 |
|
|
can now be used. |
164 |
|
|
|
165 |
|
|
@code |
166 |
|
|
ifstream ifs(filename); //Original code |
167 |
|
|
|
168 |
|
|
OBConversion conv; |
169 |
|
|
OBFormat* inFormat = conv.FormatFromExt(filename); |
170 |
|
|
OBFormat* outFormat = conv.GetFormat("ORIG"); |
171 |
|
|
istream* pIn = &ifs; |
172 |
|
|
stringstream newstream; |
173 |
|
|
if(inFormat && outFormat) |
174 |
|
|
{ |
175 |
|
|
conv.SetInAndOutFormats(inFormat,outFormat); |
176 |
|
|
conv.Convert(pIn,&newstream); |
177 |
|
|
pIn=&newstream; |
178 |
|
|
} |
179 |
|
|
//else error; new features not available; fallback to original functionality |
180 |
|
|
|
181 |
|
|
...Carry on with original code using pIn |
182 |
|
|
@endcode |
183 |
|
|
|
184 |
|
|
In Windows a degree of independence from OpenBabel can be achieved using DLLs. |
185 |
|
|
This code would be linked with obconv.lib. |
186 |
|
|
At runtime the following DLLs would be in the executable directory: |
187 |
|
|
obconv.dll, obdll.dll, one or more *.obf format files. |
188 |
|
|
*/ |
189 |
|
|
|
190 |
|
|
int OBConversion::FormatFilesLoaded = 0; |
191 |
|
|
|
192 |
|
|
OBFormat* OBConversion::pDefaultFormat=NULL; |
193 |
|
|
|
194 |
|
|
OBConversion::OBConversion(istream* is, ostream* os) : |
195 |
|
|
pInFormat(NULL),pOutFormat(NULL), Index(0), StartNumber(1), |
196 |
|
|
EndNumber(0), Count(-1), m_IsLast(true), MoreFilesToCome(false), |
197 |
|
|
OneObjectOnly(false), pOb1(NULL), pAuxConv(NULL) |
198 |
|
|
{ |
199 |
|
|
pInStream=is; |
200 |
|
|
pOutStream=os; |
201 |
|
|
if (FormatFilesLoaded == 0) |
202 |
|
|
FormatFilesLoaded = LoadFormatFiles(); |
203 |
|
|
|
204 |
|
|
//These options take a parameter |
205 |
|
|
RegisterOptionParam("f", NULL, 1,GENOPTIONS); |
206 |
|
|
RegisterOptionParam("l", NULL, 1,GENOPTIONS); |
207 |
|
|
} |
208 |
|
|
|
209 |
|
|
///This static function returns a reference to the FormatsMap |
210 |
|
|
///which, because it is a static local variable is constructed only once. |
211 |
|
|
///This fiddle is to avoid the "static initialization order fiasco" |
212 |
|
|
///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/". |
213 |
|
|
FMapType& OBConversion::FormatsMap() |
214 |
|
|
{ |
215 |
|
|
static FMapType* fm = new FMapType; |
216 |
|
|
return *fm; |
217 |
|
|
} |
218 |
|
|
|
219 |
|
|
///This static function returns a reference to the FormatsMIMEMap |
220 |
|
|
///which, because it is a static local variable is constructed only once. |
221 |
|
|
///This fiddle is to avoid the "static initialization order fiasco" |
222 |
|
|
///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/". |
223 |
|
|
FMapType& OBConversion::FormatsMIMEMap() |
224 |
|
|
{ |
225 |
|
|
static FMapType* fm = new FMapType; |
226 |
|
|
return *fm; |
227 |
|
|
} |
228 |
|
|
|
229 |
|
|
///////////////////////////////////////////////// |
230 |
|
|
OBConversion::OBConversion(const OBConversion& o) |
231 |
|
|
{ |
232 |
|
|
Index = o.Index; |
233 |
|
|
Count = o.Count; |
234 |
|
|
StartNumber = o.StartNumber; |
235 |
|
|
EndNumber = o.EndNumber; |
236 |
|
|
pInFormat = o.pInFormat; |
237 |
|
|
pInStream = o.pInStream; |
238 |
|
|
pOutFormat = o.pOutFormat; |
239 |
|
|
pOutStream = o.pOutStream; |
240 |
|
|
OptionsArray[0]= o.OptionsArray[0]; |
241 |
|
|
OptionsArray[1]= o.OptionsArray[1]; |
242 |
|
|
OptionsArray[2]= o.OptionsArray[2]; |
243 |
|
|
InFilename = o.InFilename; |
244 |
|
|
rInpos = o.rInpos; |
245 |
|
|
wInpos = o.wInpos; |
246 |
|
|
rInlen = o.rInlen; |
247 |
|
|
wInlen = o.wInlen; |
248 |
|
|
m_IsLast = o.m_IsLast; |
249 |
|
|
MoreFilesToCome= o.MoreFilesToCome; |
250 |
|
|
OneObjectOnly = o.OneObjectOnly; |
251 |
|
|
pOb1 = o.pOb1; |
252 |
|
|
ReadyToInput = o.ReadyToInput; |
253 |
|
|
|
254 |
|
|
pAuxConv = NULL; |
255 |
|
|
} |
256 |
|
|
//////////////////////////////////////////////// |
257 |
|
|
|
258 |
|
|
OBConversion::~OBConversion() |
259 |
|
|
{ |
260 |
|
|
if(pAuxConv!=this) |
261 |
|
|
delete pAuxConv; |
262 |
|
|
} |
263 |
|
|
////////////////////////////////////////////////////// |
264 |
|
|
|
265 |
|
|
/// Class information on formats is collected by making an instance of the class |
266 |
|
|
/// derived from OBFormat(only one is usually required). RegisterFormat() is called |
267 |
|
|
/// from its constructor. |
268 |
|
|
/// |
269 |
|
|
/// If the compiled format is stored separately, like in a DLL or shared library, |
270 |
|
|
/// the initialization code makes an instance of the imported OBFormat class. |
271 |
|
|
int OBConversion::RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME) |
272 |
|
|
{ |
273 |
|
|
FormatsMap()[ID] = pFormat; |
274 |
|
|
if (MIME) |
275 |
|
|
FormatsMIMEMap()[MIME] = pFormat; |
276 |
|
|
if(pFormat->Flags() & DEFAULTFORMAT) |
277 |
|
|
pDefaultFormat=pFormat; |
278 |
|
|
return FormatsMap().size(); |
279 |
|
|
} |
280 |
|
|
|
281 |
|
|
////////////////////////////////////////////////////// |
282 |
|
|
int OBConversion::LoadFormatFiles() |
283 |
|
|
{ |
284 |
|
|
/* |
285 |
|
|
int count=0; |
286 |
|
|
// if(FormatFilesLoaded) return 0; |
287 |
|
|
// FormatFilesLoaded=true; //so will load files only once |
288 |
|
|
#ifdef USING_DYNAMIC_LIBS |
289 |
|
|
//Depending on availablilty, look successively in |
290 |
|
|
//FORMATFILE_DIR, executable directory,or current directory |
291 |
|
|
string TargetDir; |
292 |
|
|
#ifdef FORMATFILE_DIR |
293 |
|
|
TargetDir="FORMATFILE_DIR"; |
294 |
|
|
#endif |
295 |
|
|
|
296 |
|
|
DLHandler::getConvDirectory(TargetDir); |
297 |
|
|
|
298 |
|
|
vector<string> files; |
299 |
|
|
if(!DLHandler::findFiles(files,DLHandler::getFormatFilePattern(),TargetDir)) return 0; |
300 |
|
|
|
301 |
|
|
vector<string>::iterator itr; |
302 |
|
|
for(itr=files.begin();itr!=files.end();itr++) |
303 |
|
|
{ |
304 |
|
|
if(DLHandler::openLib(*itr)) |
305 |
|
|
count++; |
306 |
|
|
else |
307 |
|
|
cerr << *itr << " did not load properly" << endl; |
308 |
|
|
} |
309 |
|
|
#else |
310 |
|
|
count = 1; //avoid calling this function several times |
311 |
|
|
#endif //USING_DYNAMIC_LIBS |
312 |
|
|
*/ |
313 |
|
|
int count = 1; |
314 |
|
|
return count; |
315 |
|
|
} |
316 |
|
|
|
317 |
|
|
/** |
318 |
|
|
*Returns the ID + the first line of the description in str |
319 |
|
|
*and a pointer to the format in pFormat. |
320 |
|
|
*If called with str==NULL the first format is returned; |
321 |
|
|
*subsequent formats are returned by calling with str!=NULL and the previous value of itr |
322 |
|
|
*returns false, and str and pFormat NULL, when there are no more formats. |
323 |
|
|
*Use like: |
324 |
|
|
*@code |
325 |
|
|
* const char* str=NULL; |
326 |
|
|
* Formatpos pos; |
327 |
|
|
* while(OBConversion::GetNextFormat(pos,str,pFormat)) |
328 |
|
|
* { |
329 |
|
|
* use str and pFormat |
330 |
|
|
* } |
331 |
|
|
*@endcode |
332 |
|
|
*/ |
333 |
|
|
bool OBConversion::GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat) |
334 |
|
|
{ |
335 |
|
|
|
336 |
|
|
pFormat = NULL; |
337 |
|
|
if(str==NULL) |
338 |
|
|
itr = FormatsMap().begin(); |
339 |
|
|
else |
340 |
|
|
itr++; |
341 |
|
|
if(itr == FormatsMap().end()) |
342 |
|
|
{ |
343 |
|
|
str=NULL; pFormat=NULL; |
344 |
|
|
return false; |
345 |
|
|
} |
346 |
|
|
static string s; |
347 |
|
|
s =itr->first; |
348 |
|
|
pFormat = itr->second; |
349 |
|
|
if(pFormat) |
350 |
|
|
{ |
351 |
|
|
string description(pFormat->Description()); |
352 |
|
|
s += " -- "; |
353 |
|
|
s += description.substr(0,description.find('\n')); |
354 |
|
|
} |
355 |
|
|
|
356 |
|
|
if(pFormat->Flags() & NOTWRITABLE) s+=" [Read-only]"; |
357 |
|
|
if(pFormat->Flags() & NOTREADABLE) s+=" [Write-only]"; |
358 |
|
|
|
359 |
|
|
str = s.c_str(); |
360 |
|
|
return true; |
361 |
|
|
} |
362 |
|
|
|
363 |
|
|
////////////////////////////////////////////////////// |
364 |
|
|
/// Sets the formats from their ids, e g CML. |
365 |
|
|
/// If inID is NULL, the input format is left unchanged. Similarly for outID |
366 |
|
|
/// Returns true if both formats have been successfully set at sometime |
367 |
|
|
bool OBConversion::SetInAndOutFormats(const char* inID, const char* outID) |
368 |
|
|
{ |
369 |
|
|
return SetInFormat(inID) && SetOutFormat(outID); |
370 |
|
|
} |
371 |
|
|
////////////////////////////////////////////////////// |
372 |
|
|
|
373 |
|
|
bool OBConversion::SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut) |
374 |
|
|
{ |
375 |
|
|
return SetInFormat(pIn) && SetOutFormat(pOut); |
376 |
|
|
} |
377 |
|
|
////////////////////////////////////////////////////// |
378 |
|
|
bool OBConversion::SetInFormat(OBFormat* pIn) |
379 |
|
|
{ |
380 |
|
|
if(pIn==NULL) |
381 |
|
|
return true; |
382 |
|
|
pInFormat=pIn; |
383 |
|
|
return !(pInFormat->Flags() & NOTREADABLE); |
384 |
|
|
} |
385 |
|
|
////////////////////////////////////////////////////// |
386 |
|
|
bool OBConversion::SetOutFormat(OBFormat* pOut) |
387 |
|
|
{ |
388 |
|
|
pOutFormat=pOut; |
389 |
|
|
return !(pOutFormat->Flags() & NOTWRITABLE); |
390 |
|
|
} |
391 |
|
|
////////////////////////////////////////////////////// |
392 |
|
|
bool OBConversion::SetInFormat(const char* inID) |
393 |
|
|
{ |
394 |
|
|
if(inID) |
395 |
|
|
pInFormat = FindFormat(inID); |
396 |
|
|
return pInFormat && !(pInFormat->Flags() & NOTREADABLE); |
397 |
|
|
} |
398 |
|
|
////////////////////////////////////////////////////// |
399 |
|
|
|
400 |
|
|
bool OBConversion::SetOutFormat(const char* outID) |
401 |
|
|
{ |
402 |
|
|
if(outID) |
403 |
|
|
pOutFormat= FindFormat(outID); |
404 |
|
|
return pOutFormat && !(pOutFormat->Flags() & NOTWRITABLE); |
405 |
|
|
} |
406 |
|
|
|
407 |
|
|
////////////////////////////////////////////////////// |
408 |
|
|
int OBConversion::Convert(istream* is, ostream* os) |
409 |
|
|
{ |
410 |
|
|
if(is) pInStream=is; |
411 |
|
|
if(os) pOutStream=os; |
412 |
|
|
ostream* pOrigOutStream = pOutStream; |
413 |
|
|
|
414 |
|
|
#ifdef HAVE_LIBZ |
415 |
|
|
zlib_stream::zip_istream zIn(*pInStream); |
416 |
|
|
if(zIn.is_gzip()) |
417 |
|
|
pInStream = &zIn; |
418 |
|
|
|
419 |
|
|
zlib_stream::zip_ostream zOut(*pOutStream); |
420 |
|
|
if(IsOption("z",GENOPTIONS)) |
421 |
|
|
{ |
422 |
|
|
// make sure to output the header |
423 |
|
|
zOut.make_gzip(); |
424 |
|
|
pOutStream = &zOut; |
425 |
|
|
} |
426 |
|
|
#endif |
427 |
|
|
|
428 |
|
|
int count = Convert(); |
429 |
|
|
pOutStream = pOrigOutStream; |
430 |
|
|
return count; |
431 |
|
|
|
432 |
|
|
} |
433 |
|
|
|
434 |
|
|
//////////////////////////////////////////////////// |
435 |
|
|
/// Actions the "convert" interface. |
436 |
|
|
/// Calls the OBFormat class's ReadMolecule() which |
437 |
|
|
/// - makes a new chemical object of its chosen type (e.g. OBMol) |
438 |
|
|
/// - reads an object from the input file |
439 |
|
|
/// - subjects the chemical object to 'transformations' as specified by the Options |
440 |
|
|
/// - calls AddChemObject to add it to a buffer. The previous object is first output |
441 |
|
|
/// via the output Format's WriteMolecule(). During the output process calling |
442 |
|
|
/// IsFirst() and GetIndex() (the number of objects including the current one already output. |
443 |
|
|
/// allows more control, for instance writing \<cml\> and \</cml\> tags for multiple molecule outputs only. |
444 |
|
|
/// |
445 |
|
|
/// AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation()) |
446 |
|
|
/// or if the number of the object is outside the range defined by |
447 |
|
|
/// StartNumber and EndNumber.This means the start and end counts apply to all chemical objects |
448 |
|
|
/// found whether or not they are output. |
449 |
|
|
/// |
450 |
|
|
/// If ReadMolecule returns false the input conversion loop is exited. |
451 |
|
|
/// |
452 |
|
|
int OBConversion::Convert() |
453 |
|
|
{ |
454 |
|
|
if(pInStream==NULL || pOutStream==NULL) |
455 |
|
|
{ |
456 |
|
|
cerr << "input or output stream not set" << endl; |
457 |
|
|
return 0; |
458 |
|
|
} |
459 |
|
|
|
460 |
|
|
if(!pInFormat) return 0; |
461 |
|
|
Count=0;//number objects processed |
462 |
|
|
|
463 |
|
|
if(!SetStartAndEnd()) |
464 |
|
|
return 0; |
465 |
|
|
|
466 |
|
|
ReadyToInput=true; |
467 |
|
|
m_IsLast=false; |
468 |
|
|
pOb1=NULL; |
469 |
|
|
wInlen=0; |
470 |
|
|
|
471 |
|
|
//Input loop |
472 |
|
|
while(ReadyToInput && pInStream->peek() != EOF && pInStream->good()) |
473 |
|
|
{ |
474 |
|
|
if(pInStream==&cin) |
475 |
|
|
{ |
476 |
|
|
if(pInStream->peek()=='\n') |
477 |
|
|
break; |
478 |
|
|
} |
479 |
|
|
else |
480 |
|
|
rInpos = pInStream->tellg(); |
481 |
|
|
|
482 |
|
|
bool ret=false; |
483 |
|
|
try |
484 |
|
|
{ |
485 |
|
|
ret = pInFormat->ReadChemObject(this); |
486 |
|
|
} |
487 |
|
|
catch(...) |
488 |
|
|
{ |
489 |
|
|
if(!IsOption("e", GENOPTIONS) && !OneObjectOnly) |
490 |
|
|
throw; |
491 |
|
|
} |
492 |
|
|
|
493 |
|
|
if(!ret) |
494 |
|
|
{ |
495 |
|
|
//error or termination request: terminate unless |
496 |
|
|
// -e option requested and sucessfully can skip past current object |
497 |
|
|
if(!IsOption("e", GENOPTIONS) || pInFormat->SkipObjects(0,this)!=1) |
498 |
|
|
break; |
499 |
|
|
} |
500 |
|
|
if(OneObjectOnly) |
501 |
|
|
break; |
502 |
|
|
// Objects supplied to AddChemObject() which may output them after a delay |
503 |
|
|
//ReadyToInput may be made false in AddChemObject() |
504 |
|
|
// by WriteMolecule() returning false or by Count==EndNumber |
505 |
|
|
} |
506 |
|
|
|
507 |
|
|
//Output last object |
508 |
|
|
if(!MoreFilesToCome) |
509 |
|
|
m_IsLast=true; |
510 |
|
|
|
511 |
|
|
if(pOutFormat) |
512 |
|
|
if(!pOutFormat->WriteChemObject(this)) |
513 |
|
|
Index--; |
514 |
|
|
|
515 |
|
|
//Put AddChemObject() into non-queue mode |
516 |
|
|
Count= -1; |
517 |
|
|
EndNumber=StartNumber=0; pOb1=NULL;//leave tidy |
518 |
|
|
MoreFilesToCome=false; |
519 |
|
|
OneObjectOnly=false; |
520 |
|
|
|
521 |
|
|
return Index; //The number actually output |
522 |
|
|
} |
523 |
|
|
////////////////////////////////////////////////////// |
524 |
|
|
bool OBConversion::SetStartAndEnd() |
525 |
|
|
{ |
526 |
|
|
int TempStartNumber=0; |
527 |
|
|
const char* p = IsOption("f",GENOPTIONS); |
528 |
|
|
if(p) |
529 |
|
|
{ |
530 |
|
|
StartNumber=atoi(p); |
531 |
|
|
if(StartNumber>1) |
532 |
|
|
{ |
533 |
|
|
TempStartNumber=StartNumber; |
534 |
|
|
//Try to skip objects now |
535 |
|
|
int ret = pInFormat->SkipObjects(StartNumber-1,this); |
536 |
|
|
if(ret==-1) //error |
537 |
|
|
return false; |
538 |
|
|
if(ret==1) //success:objects skipped |
539 |
|
|
{ |
540 |
|
|
Count = StartNumber-1; |
541 |
|
|
StartNumber=0; |
542 |
|
|
} |
543 |
|
|
} |
544 |
|
|
} |
545 |
|
|
|
546 |
|
|
p = IsOption("l",GENOPTIONS); |
547 |
|
|
if(p) |
548 |
|
|
{ |
549 |
|
|
EndNumber=atoi(p); |
550 |
|
|
if(TempStartNumber && EndNumber<TempStartNumber) |
551 |
|
|
EndNumber=TempStartNumber; |
552 |
|
|
} |
553 |
|
|
|
554 |
|
|
return true; |
555 |
|
|
} |
556 |
|
|
|
557 |
|
|
////////////////////////////////////////////////////// |
558 |
|
|
/// Retrieves an object stored by AddChemObject() during output |
559 |
|
|
OBBase* OBConversion::GetChemObject() |
560 |
|
|
{ |
561 |
|
|
Index++; |
562 |
|
|
return pOb1; |
563 |
|
|
} |
564 |
|
|
|
565 |
|
|
////////////////////////////////////////////////////// |
566 |
|
|
/// Called by ReadMolecule() to deliver an object it has read from an input stream. |
567 |
|
|
/// Used in two modes: |
568 |
|
|
/// - When Count is negative it is left negative and the routine is just a store |
569 |
|
|
/// for an OBBase object. The negative value returned tells the calling |
570 |
|
|
/// routine that no more objects are required. |
571 |
|
|
/// - When count is >=0, probably set by Convert(), it acts as a queue of 2: |
572 |
|
|
/// writing the currently stored value before accepting the supplied one. This delay |
573 |
|
|
/// allows output routines to respond differently when the written object is the last. |
574 |
|
|
/// Count is incremented with each call, even if pOb=NULL. |
575 |
|
|
/// Objects are not added to the queue if the count is outside the range |
576 |
|
|
/// StartNumber to EndNumber. There is no upper limit if EndNumber is zero. |
577 |
|
|
/// The return value is the number of objects, including this one, which have been |
578 |
|
|
/// input (but not necessarily output). |
579 |
|
|
int OBConversion::AddChemObject(OBBase* pOb) |
580 |
|
|
{ |
581 |
|
|
if(Count<0) |
582 |
|
|
{ |
583 |
|
|
pOb1=pOb; |
584 |
|
|
return Count; |
585 |
|
|
} |
586 |
|
|
Count++; |
587 |
|
|
if(Count>=(int)StartNumber)//keeps reading objects but does nothing with them |
588 |
|
|
{ |
589 |
|
|
if(Count==(int)EndNumber) |
590 |
|
|
ReadyToInput=false; //stops any more objects being read |
591 |
|
|
|
592 |
|
|
rInlen = pInStream->tellg() - rInpos; |
593 |
|
|
|
594 |
|
|
if(pOb) |
595 |
|
|
{ |
596 |
|
|
if(pOb1 && pOutFormat) //see if there is an object ready to be output |
597 |
|
|
{ |
598 |
|
|
//Output object |
599 |
|
|
if (!pOutFormat->WriteChemObject(this)) |
600 |
|
|
{ |
601 |
|
|
//faultly write, so finish |
602 |
|
|
--Index; |
603 |
|
|
ReadyToInput=false; |
604 |
|
|
return Count; |
605 |
|
|
} |
606 |
|
|
} |
607 |
|
|
pOb1=pOb; |
608 |
|
|
wInpos = rInpos; //Save the position in the input file to be accessed when writing it |
609 |
|
|
wInlen = rInlen; |
610 |
|
|
} |
611 |
|
|
} |
612 |
|
|
return Count; |
613 |
|
|
} |
614 |
|
|
////////////////////////////////////////////////////// |
615 |
|
|
int OBConversion::GetOutputIndex() const |
616 |
|
|
{ |
617 |
|
|
//The number of objects actually written already from this instance of OBConversion |
618 |
|
|
return Index; |
619 |
|
|
} |
620 |
|
|
void OBConversion::SetOutputIndex(int indx) |
621 |
|
|
{ |
622 |
|
|
Index=indx; |
623 |
|
|
} |
624 |
|
|
////////////////////////////////////////////////////// |
625 |
|
|
OBFormat* OBConversion::FindFormat(const char* ID) |
626 |
|
|
{ |
627 |
|
|
//Case insensitive |
628 |
|
|
if(FormatsMap().find(ID) == FormatsMap().end()) |
629 |
|
|
return NULL; |
630 |
|
|
else |
631 |
|
|
return FormatsMap()[ID]; |
632 |
|
|
} |
633 |
|
|
|
634 |
|
|
////////////////////////////////////////////////// |
635 |
|
|
const char* OBConversion::GetTitle() const |
636 |
|
|
{ |
637 |
|
|
return(InFilename.c_str()); |
638 |
|
|
} |
639 |
|
|
|
640 |
|
|
void OBConversion::SetMoreFilesToCome() |
641 |
|
|
{ |
642 |
|
|
MoreFilesToCome=true; |
643 |
|
|
} |
644 |
|
|
|
645 |
|
|
void OBConversion::SetOneObjectOnly() |
646 |
|
|
{ |
647 |
|
|
OneObjectOnly=true; |
648 |
|
|
m_IsLast=true; |
649 |
|
|
} |
650 |
|
|
|
651 |
|
|
///////////////////////////////////////////////////////// |
652 |
|
|
OBFormat* OBConversion::FormatFromExt(const char* filename) |
653 |
|
|
{ |
654 |
|
|
string file = filename; |
655 |
|
|
size_t extPos = file.rfind("."); |
656 |
|
|
|
657 |
|
|
if(extPos!=string::npos) |
658 |
|
|
{ |
659 |
|
|
// only do this if we actually can read .gz files |
660 |
|
|
#ifdef HAVE_LIBZ |
661 |
|
|
if (file.substr(extPos,3) == ".gz") |
662 |
|
|
{ |
663 |
|
|
file.erase(extPos); |
664 |
|
|
extPos = file.rfind("."); |
665 |
|
|
if (extPos!=string::npos) |
666 |
|
|
return FindFormat( (file.substr(extPos + 1, file.size())).c_str() ); |
667 |
|
|
} |
668 |
|
|
else |
669 |
|
|
#endif |
670 |
|
|
return FindFormat( (file.substr(extPos + 1, file.size())).c_str() ); |
671 |
|
|
} |
672 |
|
|
return NULL; //if no extension |
673 |
|
|
} |
674 |
|
|
|
675 |
|
|
OBFormat* OBConversion::FormatFromMIME(const char* MIME) |
676 |
|
|
{ |
677 |
|
|
if(FormatsMIMEMap().find(MIME) == FormatsMIMEMap().end()) |
678 |
|
|
return NULL; |
679 |
|
|
else |
680 |
|
|
return FormatsMIMEMap()[MIME]; |
681 |
|
|
} |
682 |
|
|
|
683 |
|
|
bool OBConversion::Read(OBBase* pOb, std::istream* pin) |
684 |
|
|
{ |
685 |
|
|
if(pin) |
686 |
|
|
pInStream=pin; |
687 |
|
|
if(!pInFormat) return false; |
688 |
|
|
|
689 |
|
|
#ifdef HAVE_LIBZ |
690 |
|
|
zlib_stream::zip_istream zIn(*pInStream); |
691 |
|
|
if(zIn.is_gzip()) |
692 |
|
|
pInStream = &zIn; |
693 |
|
|
#endif |
694 |
|
|
|
695 |
|
|
return pInFormat->ReadMolecule(pOb, this); |
696 |
|
|
} |
697 |
|
|
////////////////////////////////////////////////// |
698 |
|
|
/// Writes the object pOb but does not delete it afterwards. |
699 |
|
|
/// The output stream is lastingly changed if pout is not NULL |
700 |
|
|
/// Returns true if successful. |
701 |
|
|
bool OBConversion::Write(OBBase* pOb, ostream* pos) |
702 |
|
|
{ |
703 |
|
|
if(pos) |
704 |
|
|
pOutStream=pos; |
705 |
|
|
if(!pOutFormat) return false; |
706 |
|
|
|
707 |
|
|
ostream* pOrigOutStream = pOutStream; |
708 |
|
|
#ifdef HAVE_LIBZ |
709 |
|
|
zlib_stream::zip_ostream zOut(*pOutStream); |
710 |
|
|
if(IsOption("z",GENOPTIONS)) |
711 |
|
|
{ |
712 |
|
|
// make sure to output the header |
713 |
|
|
zOut.make_gzip(); |
714 |
|
|
pOutStream = &zOut; |
715 |
|
|
} |
716 |
|
|
#endif |
717 |
|
|
|
718 |
|
|
bool ret = pOutFormat->WriteMolecule(pOb,this); |
719 |
|
|
pOutStream = pOrigOutStream; |
720 |
|
|
return ret; |
721 |
|
|
} |
722 |
|
|
|
723 |
|
|
////////////////////////////////////////////////// |
724 |
|
|
/// Writes the object pOb but does not delete it afterwards. |
725 |
|
|
/// The output stream not changed (since we cannot write to this string later) |
726 |
|
|
/// Returns true if successful. |
727 |
|
|
std::string OBConversion::WriteString(OBBase* pOb) |
728 |
|
|
{ |
729 |
|
|
ostream *oldStream = pOutStream; // save old output |
730 |
|
|
stringstream newStream; |
731 |
|
|
|
732 |
|
|
if(pOutFormat) |
733 |
|
|
{ |
734 |
|
|
Write(pOb, &newStream); |
735 |
|
|
} |
736 |
|
|
pOutStream = oldStream; |
737 |
|
|
|
738 |
|
|
return newStream.str(); |
739 |
|
|
} |
740 |
|
|
|
741 |
|
|
////////////////////////////////////////////////// |
742 |
|
|
/// Writes the object pOb but does not delete it afterwards. |
743 |
|
|
/// The output stream is lastingly changed to point to the file |
744 |
|
|
/// Returns true if successful. |
745 |
|
|
bool OBConversion::WriteFile(OBBase* pOb, string filePath) |
746 |
|
|
{ |
747 |
|
|
if(!pOutFormat) return false; |
748 |
|
|
|
749 |
|
|
ofstream ofs; |
750 |
|
|
ios_base::openmode omode = |
751 |
|
|
pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out; |
752 |
|
|
|
753 |
|
|
ofs.open(filePath.c_str(),omode); |
754 |
|
|
if(!ofs) |
755 |
|
|
{ |
756 |
|
|
cerr << "Cannot write to " << filePath <<endl; |
757 |
|
|
return false; |
758 |
|
|
} |
759 |
|
|
|
760 |
|
|
return Write(pOb, &ofs); |
761 |
|
|
} |
762 |
|
|
|
763 |
|
|
//////////////////////////////////////////// |
764 |
|
|
bool OBConversion::ReadString(OBBase* pOb, std::string input) |
765 |
|
|
{ |
766 |
|
|
stringstream pin(input); |
767 |
|
|
return Read(pOb,&pin); |
768 |
|
|
} |
769 |
|
|
|
770 |
|
|
|
771 |
|
|
//////////////////////////////////////////// |
772 |
|
|
bool OBConversion::ReadFile(OBBase* pOb, std::string filePath) |
773 |
|
|
{ |
774 |
|
|
if(!pInFormat) return false; |
775 |
|
|
|
776 |
|
|
ifstream ifs; |
777 |
|
|
ios_base::openmode imode = |
778 |
|
|
pOutFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in; |
779 |
|
|
|
780 |
|
|
ifs.open(filePath.c_str(),imode); |
781 |
|
|
if(!ifs) |
782 |
|
|
{ |
783 |
|
|
cerr << "Cannot read from " << filePath << endl; |
784 |
|
|
return false; |
785 |
|
|
} |
786 |
|
|
|
787 |
|
|
return Read(pOb,&ifs); |
788 |
|
|
} |
789 |
|
|
|
790 |
|
|
|
791 |
|
|
//////////////////////////////////////////// |
792 |
|
|
const char* OBConversion::Description() |
793 |
|
|
{ |
794 |
|
|
return "Conversion options\n \ |
795 |
|
|
-f <#> Start import at molecule # specified\n \ |
796 |
|
|
-l <#> End import at molecule # specified\n \ |
797 |
|
|
-t All input files describe a single molecule\n \ |
798 |
|
|
-e Continue with next object after error, if possible\n \ |
799 |
|
|
-z Compress the output with gzip\n"; |
800 |
|
|
} |
801 |
|
|
|
802 |
|
|
//////////////////////////////////////////// |
803 |
|
|
bool OBConversion::IsLast() |
804 |
|
|
{ |
805 |
|
|
return m_IsLast; |
806 |
|
|
} |
807 |
|
|
//////////////////////////////////////////// |
808 |
|
|
bool OBConversion::IsFirstInput() |
809 |
|
|
{ |
810 |
|
|
return (Count==0); |
811 |
|
|
} |
812 |
|
|
|
813 |
|
|
///////////////////////////////////////////////// |
814 |
|
|
string OBConversion::BatchFileName(string& BaseName, string& InFile) |
815 |
|
|
{ |
816 |
|
|
//Replaces * in BaseName by InFile without extension and path |
817 |
|
|
string ofname(BaseName); |
818 |
|
|
int pos = ofname.find('*'); |
819 |
|
|
if(pos>=0) |
820 |
|
|
{ |
821 |
|
|
//Replace * by input filename |
822 |
|
|
int posdot=(InFile).rfind('.'); |
823 |
|
|
if(posdot==-1) posdot=(InFile).size(); |
824 |
|
|
int posname=(InFile).find_last_of("\\/"); |
825 |
|
|
ofname.replace(pos,1, (InFile), posname+1, posdot-posname-1); |
826 |
|
|
} |
827 |
|
|
return ofname; |
828 |
|
|
} |
829 |
|
|
|
830 |
|
|
//////////////////////////////////////////////// |
831 |
|
|
string OBConversion::IncrementedFileName(string& BaseName, const int Count) |
832 |
|
|
{ |
833 |
|
|
//Replaces * in BaseName by Count |
834 |
|
|
string ofname(BaseName); |
835 |
|
|
int pos = ofname.find('*'); |
836 |
|
|
if(pos>=0) |
837 |
|
|
{ |
838 |
|
|
char num[33]; |
839 |
|
|
snprintf(num, 33, "%d", Count); |
840 |
|
|
ofname.replace(pos,1, num); |
841 |
|
|
} |
842 |
|
|
return ofname; |
843 |
|
|
} |
844 |
|
|
//////////////////////////////////////////////////// |
845 |
|
|
|
846 |
|
|
/** |
847 |
|
|
Makes input and output streams, and carries out normal, |
848 |
|
|
batch, aggregation, and splitting conversion. |
849 |
|
|
|
850 |
|
|
Normal |
851 |
|
|
Done if FileList contains a single file name and OutputFileName |
852 |
|
|
does not contain a *. |
853 |
|
|
|
854 |
|
|
Aggregation |
855 |
|
|
Done if FileList has more than one file name and OutputFileName does |
856 |
|
|
not contain * . All the chemical objects are converted and sent |
857 |
|
|
to the single output file. |
858 |
|
|
|
859 |
|
|
Splitting |
860 |
|
|
Done if FileList contains a single file name and OutputFileName |
861 |
|
|
contains a * . Each chemical object in the input file converted |
862 |
|
|
and sent to a separate file whose name is OutputFileName with the |
863 |
|
|
* replaced by 1, 2, 3, etc. |
864 |
|
|
For example, if OutputFileName is NEW*.smi then the output files are |
865 |
|
|
NEW1.smi, NEW2.smi, etc. |
866 |
|
|
|
867 |
|
|
Batch Conversion |
868 |
|
|
Done if FileList has more than one file name and contains a * . |
869 |
|
|
Each input file is converted to an output file whose name is |
870 |
|
|
OutputFileName with the * replaced by the inputfile name without its |
871 |
|
|
path and extension. |
872 |
|
|
So if the input files were inpath/First.cml, inpath/Second.cml |
873 |
|
|
and OutputFileName was NEW*.mol, the output files would be |
874 |
|
|
NEWFirst.mol, NEWSecond.mol. |
875 |
|
|
|
876 |
|
|
If FileList is empty, the input stream that has already been set |
877 |
|
|
(usually in the constructor) is used. If OutputFileName is empty, |
878 |
|
|
the output stream already set is used. |
879 |
|
|
|
880 |
|
|
On exit, OutputFileList contains the names of the output files. |
881 |
|
|
|
882 |
|
|
Returns the number of Chemical objects converted. |
883 |
|
|
*/ |
884 |
|
|
int OBConversion::FullConvert(std::vector<std::string>& FileList, std::string& OutputFileName, |
885 |
|
|
std::vector<std::string>& OutputFileList) |
886 |
|
|
{ |
887 |
|
|
|
888 |
|
|
istream* pInStream; |
889 |
|
|
ostream* pOutStream=NULL; |
890 |
|
|
ifstream is; |
891 |
|
|
ofstream os; |
892 |
|
|
bool HasMultipleOutputFiles=false; |
893 |
|
|
int Count=0; |
894 |
|
|
bool CommonInFormat = pInFormat ? true:false; //whether set in calling routine |
895 |
|
|
ios_base::openmode omode = |
896 |
|
|
pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out; |
897 |
|
|
try |
898 |
|
|
{ |
899 |
|
|
ofstream ofs; |
900 |
|
|
|
901 |
|
|
//OUTPUT |
902 |
|
|
if(OutputFileName.empty()) |
903 |
|
|
pOutStream = NULL; //use existing stream |
904 |
|
|
else |
905 |
|
|
{ |
906 |
|
|
if(OutputFileName.find_first_of('*')!=string::npos) HasMultipleOutputFiles = true; |
907 |
|
|
if(!HasMultipleOutputFiles) |
908 |
|
|
{ |
909 |
|
|
os.open(OutputFileName.c_str(),omode); |
910 |
|
|
if(!os) |
911 |
|
|
{ |
912 |
|
|
cerr << "Cannot write to " << OutputFileName <<endl; |
913 |
|
|
return 0; |
914 |
|
|
} |
915 |
|
|
OutputFileList.push_back(OutputFileName); |
916 |
|
|
pOutStream=&os; |
917 |
|
|
} |
918 |
|
|
} |
919 |
|
|
|
920 |
|
|
if(IsOption("t",GENOPTIONS)) |
921 |
|
|
{ |
922 |
|
|
//Concatenate input file option (multiple files, single molecule) |
923 |
|
|
if(HasMultipleOutputFiles) |
924 |
|
|
{ |
925 |
|
|
cerr << "Cannot have multiple output files and also concatenate input files (-t option)" <<endl; |
926 |
|
|
return 0; |
927 |
|
|
} |
928 |
|
|
|
929 |
|
|
stringstream allinput; |
930 |
|
|
vector<string>::iterator itr; |
931 |
|
|
for(itr=FileList.begin();itr!=FileList.end();itr++) |
932 |
|
|
{ |
933 |
|
|
ifstream ifs((*itr).c_str()); |
934 |
|
|
if(!ifs) |
935 |
|
|
{ |
936 |
|
|
cerr << "Cannot open " << *itr <<endl; |
937 |
|
|
continue; |
938 |
|
|
} |
939 |
|
|
allinput << ifs.rdbuf(); //Copy all file contents |
940 |
|
|
ifs.close(); |
941 |
|
|
} |
942 |
|
|
Count = Convert(&allinput,pOutStream); |
943 |
|
|
return Count; |
944 |
|
|
} |
945 |
|
|
|
946 |
|
|
//INPUT |
947 |
|
|
if(FileList.empty()) |
948 |
|
|
pInStream = NULL; |
949 |
|
|
else |
950 |
|
|
{ |
951 |
|
|
if(FileList.size()>1) |
952 |
|
|
{ |
953 |
|
|
//multiple input files |
954 |
|
|
vector<string>::iterator itr, tempitr; |
955 |
|
|
tempitr = FileList.end(); |
956 |
|
|
tempitr--; |
957 |
|
|
for(itr=FileList.begin();itr!=FileList.end();itr++) |
958 |
|
|
{ |
959 |
|
|
InFilename = *itr; |
960 |
|
|
ifstream ifs; |
961 |
|
|
if(!OpenAndSetFormat(CommonInFormat, &ifs)) |
962 |
|
|
continue; |
963 |
|
|
|
964 |
|
|
if(HasMultipleOutputFiles) |
965 |
|
|
{ |
966 |
|
|
//Batch conversion |
967 |
|
|
string batchfile = BatchFileName(OutputFileName,*itr); |
968 |
|
|
if(ofs.is_open()) ofs.close(); |
969 |
|
|
ofs.open(batchfile.c_str(), omode); |
970 |
|
|
if(!ofs) |
971 |
|
|
{ |
972 |
|
|
cerr << "Cannot open " << batchfile << endl; |
973 |
|
|
return Count; |
974 |
|
|
} |
975 |
|
|
OutputFileList.push_back(batchfile); |
976 |
|
|
SetOutputIndex(0); //reset for new file |
977 |
|
|
Count += Convert(&ifs,&ofs); |
978 |
|
|
} |
979 |
|
|
else |
980 |
|
|
{ |
981 |
|
|
//Aggregation |
982 |
|
|
if(itr!=tempitr) SetMoreFilesToCome(); |
983 |
|
|
Count = Convert(&ifs,pOutStream); |
984 |
|
|
} |
985 |
|
|
} |
986 |
|
|
return Count; |
987 |
|
|
} |
988 |
|
|
else |
989 |
|
|
{ |
990 |
|
|
//Single input file |
991 |
|
|
InFilename = FileList[0]; |
992 |
|
|
if(!OpenAndSetFormat(CommonInFormat, &is)) |
993 |
|
|
return 0; |
994 |
|
|
pInStream=&is; |
995 |
|
|
|
996 |
|
|
if(HasMultipleOutputFiles) |
997 |
|
|
{ |
998 |
|
|
//Splitting |
999 |
|
|
//Output is put in a temporary stream and written to a file |
1000 |
|
|
//with an augmenting name only when it contains a valid object. |
1001 |
|
|
int Indx=1; |
1002 |
|
|
for(;;) |
1003 |
|
|
{ |
1004 |
|
|
stringstream ss; |
1005 |
|
|
SetOutputIndex(0); //reset for new file |
1006 |
|
|
SetOneObjectOnly(); |
1007 |
|
|
|
1008 |
|
|
int ThisFileCount = Convert(pInStream,&ss); |
1009 |
|
|
if(ThisFileCount==0) break; |
1010 |
|
|
Count+=ThisFileCount; |
1011 |
|
|
|
1012 |
|
|
if(ofs.is_open()) ofs.close(); |
1013 |
|
|
string incrfile = IncrementedFileName(OutputFileName,Indx++); |
1014 |
|
|
ofs.open(incrfile.c_str(), omode); |
1015 |
|
|
if(!ofs) |
1016 |
|
|
{ |
1017 |
|
|
cerr << "Cannot write to " << incrfile << endl; |
1018 |
|
|
return Count; |
1019 |
|
|
} |
1020 |
|
|
OutputFileList.push_back(incrfile); |
1021 |
|
|
ofs << ss.rdbuf(); |
1022 |
|
|
ofs.close(); |
1023 |
|
|
ss.clear(); |
1024 |
|
|
} |
1025 |
|
|
return Count; |
1026 |
|
|
} |
1027 |
|
|
} |
1028 |
|
|
} |
1029 |
|
|
|
1030 |
|
|
//Single input and output files |
1031 |
|
|
Count = Convert(pInStream,pOutStream); |
1032 |
|
|
return Count; |
1033 |
|
|
} |
1034 |
|
|
catch(...) |
1035 |
|
|
{ |
1036 |
|
|
cerr << "Conversion failed with an exception. Count=" << Count <<endl; |
1037 |
|
|
return Count; |
1038 |
|
|
} |
1039 |
|
|
} |
1040 |
|
|
|
1041 |
|
|
bool OBConversion::OpenAndSetFormat(bool SetFormat, ifstream* is) |
1042 |
|
|
{ |
1043 |
|
|
//Opens file using InFilename and sets pInFormat if requested |
1044 |
|
|
if(!SetFormat) |
1045 |
|
|
{ |
1046 |
|
|
pInFormat = FormatFromExt(InFilename.c_str()); |
1047 |
|
|
if(pInFormat==NULL) |
1048 |
|
|
{ |
1049 |
|
|
string::size_type pos = InFilename.rfind('.'); |
1050 |
|
|
string ext; |
1051 |
|
|
if(pos!=string::npos) |
1052 |
|
|
ext = InFilename.substr(pos); |
1053 |
|
|
cerr << "Cannot read input format \"" << ext << '\"' |
1054 |
|
|
<< " for file \"" << InFilename << "\"" << endl; |
1055 |
|
|
return false; |
1056 |
|
|
} |
1057 |
|
|
} |
1058 |
|
|
|
1059 |
|
|
ios_base::openmode imode; |
1060 |
|
|
#ifdef ALL_READS_BINARY //Makes unix files compatible with VC++6 |
1061 |
|
|
imode = ios_base::in|ios_base::binary; |
1062 |
|
|
#else |
1063 |
|
|
imode = pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in; |
1064 |
|
|
#endif |
1065 |
|
|
|
1066 |
|
|
is->open(InFilename.c_str(), imode); |
1067 |
|
|
if(!is->good()) |
1068 |
|
|
{ |
1069 |
|
|
cerr << "Cannot open " << InFilename <<endl; |
1070 |
|
|
return false; |
1071 |
|
|
} |
1072 |
|
|
|
1073 |
|
|
return true; |
1074 |
|
|
} |
1075 |
|
|
|
1076 |
|
|
/////////////////////////////////////////////// |
1077 |
|
|
void OBConversion::AddOption(const char* opt, Option_type opttyp, const char* txt) |
1078 |
|
|
{ |
1079 |
|
|
//Also updates an option |
1080 |
|
|
if(txt==NULL) |
1081 |
|
|
OptionsArray[opttyp][opt]=string(); |
1082 |
|
|
else |
1083 |
|
|
OptionsArray[opttyp][opt]=txt; |
1084 |
|
|
} |
1085 |
|
|
|
1086 |
|
|
const char* OBConversion::IsOption(const char* opt, Option_type opttyp) |
1087 |
|
|
{ |
1088 |
|
|
//Returns NULL if option not found or a pointer to the text if it is |
1089 |
|
|
map<string,string>::iterator pos; |
1090 |
|
|
pos = OptionsArray[opttyp].find(opt); |
1091 |
|
|
if(pos==OptionsArray[opttyp].end()) |
1092 |
|
|
return NULL; |
1093 |
|
|
return pos->second.c_str(); |
1094 |
|
|
} |
1095 |
|
|
|
1096 |
|
|
bool OBConversion::RemoveOption(const char* opt, Option_type opttyp) |
1097 |
|
|
{ |
1098 |
|
|
return OptionsArray[opttyp].erase(opt)!=0;//true if was there |
1099 |
|
|
} |
1100 |
|
|
|
1101 |
|
|
void OBConversion::SetOptions(const char* options, Option_type opttyp) |
1102 |
|
|
{ |
1103 |
|
|
while(*options) |
1104 |
|
|
{ |
1105 |
|
|
string ch(1, *options++); |
1106 |
|
|
if(*options=='\"') |
1107 |
|
|
{ |
1108 |
|
|
string txt = options+1; |
1109 |
|
|
string::size_type pos = txt.find('\"'); |
1110 |
|
|
if(pos==string::npos) |
1111 |
|
|
return; //options is illformed |
1112 |
|
|
txt.erase(pos); |
1113 |
|
|
OptionsArray[opttyp][ch]= txt; |
1114 |
|
|
options += pos+2; |
1115 |
|
|
} |
1116 |
|
|
else |
1117 |
|
|
OptionsArray[opttyp][ch] = string(); |
1118 |
|
|
} |
1119 |
|
|
} |
1120 |
|
|
|
1121 |
|
|
typedef std::map<string,int> OPAMapType; |
1122 |
|
|
OPAMapType& OBConversion::OptionParamArray(Option_type typ) |
1123 |
|
|
{ |
1124 |
|
|
static OPAMapType* opa = new OPAMapType[3]; |
1125 |
|
|
return opa[typ]; |
1126 |
|
|
} |
1127 |
|
|
|
1128 |
|
|
void OBConversion::RegisterOptionParam(string name, OBFormat* pFormat, |
1129 |
|
|
int numberParams, Option_type typ) |
1130 |
|
|
{ |
1131 |
|
|
//Gives error message if the number of parameters conflicts with an existing registration |
1132 |
|
|
map<string,int>::iterator pos; |
1133 |
|
|
pos = OptionParamArray(typ).find(name); |
1134 |
|
|
if(pos!=OptionParamArray(typ).end()) |
1135 |
|
|
{ |
1136 |
|
|
if(pos->second!=numberParams) |
1137 |
|
|
{ |
1138 |
|
|
string description("API"); |
1139 |
|
|
if(pFormat) |
1140 |
|
|
description=pFormat->Description(); |
1141 |
|
|
cerr << "The number of parameters needed by option \"" << name << "\" in " |
1142 |
|
|
<< description.substr(0,description.find('\n')) |
1143 |
|
|
<< " differs from an earlier registration." << endl; |
1144 |
|
|
return; |
1145 |
|
|
} |
1146 |
|
|
} |
1147 |
|
|
OptionParamArray(typ)[name] = numberParams; |
1148 |
|
|
} |
1149 |
|
|
|
1150 |
|
|
int OBConversion::GetOptionParams(string name, Option_type typ) |
1151 |
|
|
{ |
1152 |
|
|
//returns the number of parameters registered for the option, or 0 if not found |
1153 |
|
|
map<string,int>::iterator pos; |
1154 |
|
|
pos = OptionParamArray(typ).find(name); |
1155 |
|
|
if(pos==OptionParamArray(typ).end()) |
1156 |
|
|
return 0; |
1157 |
|
|
return pos->second; |
1158 |
|
|
} |
1159 |
|
|
|
1160 |
|
|
}//namespace OpenBabel |
1161 |
|
|
|
1162 |
|
|
//! \file obconversion.cpp |
1163 |
|
|
//! \brief Implementation of OBFormat and OBConversion classes. |