| 1 |
/********************************************************************** |
| 2 |
obconversion.cpp - Declaration of OBFormat and OBConversion |
| 3 |
|
| 4 |
Copyright (C) 2004 by Chris Morley |
| 5 |
Some portions Copyright (C) 2005 by Geoffrey Hutchison |
| 6 |
|
| 7 |
This file is part of the Open Babel project. |
| 8 |
For more information, see <http://openbabel.sourceforge.net/> |
| 9 |
|
| 10 |
This program is free software; you can redistribute it and/or modify |
| 11 |
it under the terms of the GNU General Public License as published by |
| 12 |
the Free Software Foundation version 2 of the License. |
| 13 |
|
| 14 |
This program is distributed in the hope that it will be useful, |
| 15 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 |
GNU General Public License for more details. |
| 18 |
***********************************************************************/ |
| 19 |
// Definition of OBConversion routines |
| 20 |
|
| 21 |
#ifdef _WIN32 |
| 22 |
#pragma warning (disable : 4786) |
| 23 |
|
| 24 |
//using 'this' in base class initializer |
| 25 |
#pragma warning (disable : 4355) |
| 26 |
|
| 27 |
#ifdef GUI |
| 28 |
#undef DATADIR |
| 29 |
#include "stdafx.h" //(includes<windows.h> |
| 30 |
#endif |
| 31 |
#endif |
| 32 |
|
| 33 |
#include <iostream> |
| 34 |
#include <fstream> |
| 35 |
#include <sstream> |
| 36 |
#include <string> |
| 37 |
#include <map> |
| 38 |
//#include <dlfcn.h> |
| 39 |
|
| 40 |
#include "obconversion.hpp" |
| 41 |
|
| 42 |
#ifdef HAVE_LIBZ |
| 43 |
#include "zipstream.hpp" |
| 44 |
#endif |
| 45 |
|
| 46 |
#if !HAVE_STRNCASECMP |
| 47 |
extern "C" int strncasecmp(const char *s1, const char *s2, size_t n); |
| 48 |
#endif |
| 49 |
|
| 50 |
#ifndef BUFF_SIZE |
| 51 |
#define BUFF_SIZE 32768 |
| 52 |
#endif |
| 53 |
|
| 54 |
using namespace std; |
| 55 |
namespace OpenBabel { |
| 56 |
|
| 57 |
const char* OBFormat::TargetClassDescription() |
| 58 |
{ |
| 59 |
//Provides class of default format unless overridden |
| 60 |
if(OBConversion::GetDefaultFormat()) |
| 61 |
return OBConversion::GetDefaultFormat()->TargetClassDescription(); |
| 62 |
else |
| 63 |
return ""; |
| 64 |
} |
| 65 |
const type_info& OBFormat::GetType() |
| 66 |
{ |
| 67 |
//Provides info on class of default format unless overridden |
| 68 |
if(OBConversion::GetDefaultFormat()) |
| 69 |
return OBConversion::GetDefaultFormat()->GetType(); |
| 70 |
else |
| 71 |
return typeid(this); //rubbish return if DefaultFormat not set |
| 72 |
} |
| 73 |
|
| 74 |
|
| 75 |
int OBConversion::FormatFilesLoaded = 0; |
| 76 |
|
| 77 |
OBFormat* OBConversion::pDefaultFormat=NULL; |
| 78 |
|
| 79 |
OBConversion::OBConversion(istream* is, ostream* os) : |
| 80 |
pInFormat(NULL),pOutFormat(NULL), Index(0), StartNumber(1), |
| 81 |
EndNumber(0), Count(-1), m_IsLast(true), MoreFilesToCome(false), |
| 82 |
OneObjectOnly(false), pOb1(NULL), pAuxConv(NULL) |
| 83 |
{ |
| 84 |
pInStream=is; |
| 85 |
pOutStream=os; |
| 86 |
if (FormatFilesLoaded == 0) |
| 87 |
FormatFilesLoaded = LoadFormatFiles(); |
| 88 |
|
| 89 |
//These options take a parameter |
| 90 |
RegisterOptionParam("f", NULL, 1,GENOPTIONS); |
| 91 |
RegisterOptionParam("l", NULL, 1,GENOPTIONS); |
| 92 |
} |
| 93 |
|
| 94 |
///This static function returns a reference to the FormatsMap |
| 95 |
///which, because it is a static local variable is constructed only once. |
| 96 |
///This fiddle is to avoid the "static initialization order fiasco" |
| 97 |
///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/". |
| 98 |
FMapType& OBConversion::FormatsMap() |
| 99 |
{ |
| 100 |
static FMapType* fm = new FMapType; |
| 101 |
return *fm; |
| 102 |
} |
| 103 |
|
| 104 |
///This static function returns a reference to the FormatsMIMEMap |
| 105 |
///which, because it is a static local variable is constructed only once. |
| 106 |
///This fiddle is to avoid the "static initialization order fiasco" |
| 107 |
///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/". |
| 108 |
FMapType& OBConversion::FormatsMIMEMap() |
| 109 |
{ |
| 110 |
static FMapType* fm = new FMapType; |
| 111 |
return *fm; |
| 112 |
} |
| 113 |
|
| 114 |
///////////////////////////////////////////////// |
| 115 |
OBConversion::OBConversion(const OBConversion& o) |
| 116 |
{ |
| 117 |
Index = o.Index; |
| 118 |
Count = o.Count; |
| 119 |
StartNumber = o.StartNumber; |
| 120 |
EndNumber = o.EndNumber; |
| 121 |
pInFormat = o.pInFormat; |
| 122 |
pInStream = o.pInStream; |
| 123 |
pOutFormat = o.pOutFormat; |
| 124 |
pOutStream = o.pOutStream; |
| 125 |
OptionsArray[0]= o.OptionsArray[0]; |
| 126 |
OptionsArray[1]= o.OptionsArray[1]; |
| 127 |
OptionsArray[2]= o.OptionsArray[2]; |
| 128 |
InFilename = o.InFilename; |
| 129 |
rInpos = o.rInpos; |
| 130 |
wInpos = o.wInpos; |
| 131 |
rInlen = o.rInlen; |
| 132 |
wInlen = o.wInlen; |
| 133 |
m_IsLast = o.m_IsLast; |
| 134 |
MoreFilesToCome= o.MoreFilesToCome; |
| 135 |
OneObjectOnly = o.OneObjectOnly; |
| 136 |
pOb1 = o.pOb1; |
| 137 |
ReadyToInput = o.ReadyToInput; |
| 138 |
|
| 139 |
pAuxConv = NULL; |
| 140 |
} |
| 141 |
//////////////////////////////////////////////// |
| 142 |
|
| 143 |
OBConversion::~OBConversion() |
| 144 |
{ |
| 145 |
if(pAuxConv!=this) |
| 146 |
delete pAuxConv; |
| 147 |
} |
| 148 |
////////////////////////////////////////////////////// |
| 149 |
|
| 150 |
/// Class information on formats is collected by making an instance of the class |
| 151 |
/// derived from OBFormat(only one is usually required). RegisterFormat() is called |
| 152 |
/// from its constructor. |
| 153 |
/// |
| 154 |
/// If the compiled format is stored separately, like in a DLL or shared library, |
| 155 |
/// the initialization code makes an instance of the imported OBFormat class. |
| 156 |
int OBConversion::RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME) |
| 157 |
{ |
| 158 |
FormatsMap()[ID] = pFormat; |
| 159 |
if (MIME) |
| 160 |
FormatsMIMEMap()[MIME] = pFormat; |
| 161 |
if(pFormat->Flags() & DEFAULTFORMAT) |
| 162 |
pDefaultFormat=pFormat; |
| 163 |
return FormatsMap().size(); |
| 164 |
} |
| 165 |
|
| 166 |
////////////////////////////////////////////////////// |
| 167 |
int OBConversion::LoadFormatFiles() |
| 168 |
{ |
| 169 |
/* |
| 170 |
int count=0; |
| 171 |
// if(FormatFilesLoaded) return 0; |
| 172 |
// FormatFilesLoaded=true; //so will load files only once |
| 173 |
#ifdef USING_DYNAMIC_LIBS |
| 174 |
//Depending on availablilty, look successively in |
| 175 |
//FORMATFILE_DIR, executable directory,or current directory |
| 176 |
string TargetDir; |
| 177 |
#ifdef FORMATFILE_DIR |
| 178 |
TargetDir="FORMATFILE_DIR"; |
| 179 |
#endif |
| 180 |
|
| 181 |
DLHandler::getConvDirectory(TargetDir); |
| 182 |
|
| 183 |
vector<string> files; |
| 184 |
if(!DLHandler::findFiles(files,DLHandler::getFormatFilePattern(),TargetDir)) return 0; |
| 185 |
|
| 186 |
vector<string>::iterator itr; |
| 187 |
for(itr=files.begin();itr!=files.end();itr++) |
| 188 |
{ |
| 189 |
if(DLHandler::openLib(*itr)) |
| 190 |
count++; |
| 191 |
else |
| 192 |
cerr << *itr << " did not load properly" << endl; |
| 193 |
} |
| 194 |
#else |
| 195 |
count = 1; //avoid calling this function several times |
| 196 |
#endif //USING_DYNAMIC_LIBS |
| 197 |
*/ |
| 198 |
int count = 1; |
| 199 |
return count; |
| 200 |
} |
| 201 |
|
| 202 |
/** |
| 203 |
*Returns the ID + the first line of the description in str |
| 204 |
*and a pointer to the format in pFormat. |
| 205 |
*If called with str==NULL the first format is returned; |
| 206 |
*subsequent formats are returned by calling with str!=NULL and the previous value of itr |
| 207 |
*returns false, and str and pFormat NULL, when there are no more formats. |
| 208 |
*Use like: |
| 209 |
*@code |
| 210 |
* const char* str=NULL; |
| 211 |
* Formatpos pos; |
| 212 |
* while(OBConversion::GetNextFormat(pos,str,pFormat)) |
| 213 |
* { |
| 214 |
* use str and pFormat |
| 215 |
* } |
| 216 |
*@endcode |
| 217 |
*/ |
| 218 |
bool OBConversion::GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat) |
| 219 |
{ |
| 220 |
|
| 221 |
pFormat = NULL; |
| 222 |
if(str==NULL) |
| 223 |
itr = FormatsMap().begin(); |
| 224 |
else |
| 225 |
itr++; |
| 226 |
if(itr == FormatsMap().end()) |
| 227 |
{ |
| 228 |
str=NULL; pFormat=NULL; |
| 229 |
return false; |
| 230 |
} |
| 231 |
static string s; |
| 232 |
s =itr->first; |
| 233 |
pFormat = itr->second; |
| 234 |
if(pFormat) |
| 235 |
{ |
| 236 |
string description(pFormat->Description()); |
| 237 |
s += " -- "; |
| 238 |
s += description.substr(0,description.find('\n')); |
| 239 |
} |
| 240 |
|
| 241 |
if(pFormat->Flags() & NOTWRITABLE) s+=" [Read-only]"; |
| 242 |
if(pFormat->Flags() & NOTREADABLE) s+=" [Write-only]"; |
| 243 |
|
| 244 |
str = s.c_str(); |
| 245 |
return true; |
| 246 |
} |
| 247 |
|
| 248 |
////////////////////////////////////////////////////// |
| 249 |
/// Sets the formats from their ids, e g CML. |
| 250 |
/// If inID is NULL, the input format is left unchanged. Similarly for outID |
| 251 |
/// Returns true if both formats have been successfully set at sometime |
| 252 |
bool OBConversion::SetInAndOutFormats(const char* inID, const char* outID) |
| 253 |
{ |
| 254 |
return SetInFormat(inID) && SetOutFormat(outID); |
| 255 |
} |
| 256 |
////////////////////////////////////////////////////// |
| 257 |
|
| 258 |
bool OBConversion::SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut) |
| 259 |
{ |
| 260 |
return SetInFormat(pIn) && SetOutFormat(pOut); |
| 261 |
} |
| 262 |
////////////////////////////////////////////////////// |
| 263 |
bool OBConversion::SetInFormat(OBFormat* pIn) |
| 264 |
{ |
| 265 |
if(pIn==NULL) |
| 266 |
return true; |
| 267 |
pInFormat=pIn; |
| 268 |
return !(pInFormat->Flags() & NOTREADABLE); |
| 269 |
} |
| 270 |
////////////////////////////////////////////////////// |
| 271 |
bool OBConversion::SetOutFormat(OBFormat* pOut) |
| 272 |
{ |
| 273 |
pOutFormat=pOut; |
| 274 |
return !(pOutFormat->Flags() & NOTWRITABLE); |
| 275 |
} |
| 276 |
////////////////////////////////////////////////////// |
| 277 |
bool OBConversion::SetInFormat(const char* inID) |
| 278 |
{ |
| 279 |
if(inID) |
| 280 |
pInFormat = FindFormat(inID); |
| 281 |
return pInFormat && !(pInFormat->Flags() & NOTREADABLE); |
| 282 |
} |
| 283 |
////////////////////////////////////////////////////// |
| 284 |
|
| 285 |
bool OBConversion::SetOutFormat(const char* outID) |
| 286 |
{ |
| 287 |
if(outID) |
| 288 |
pOutFormat= FindFormat(outID); |
| 289 |
return pOutFormat && !(pOutFormat->Flags() & NOTWRITABLE); |
| 290 |
} |
| 291 |
|
| 292 |
////////////////////////////////////////////////////// |
| 293 |
int OBConversion::Convert(istream* is, ostream* os) |
| 294 |
{ |
| 295 |
if(is) pInStream=is; |
| 296 |
if(os) pOutStream=os; |
| 297 |
ostream* pOrigOutStream = pOutStream; |
| 298 |
|
| 299 |
#ifdef HAVE_LIBZ |
| 300 |
zlib_stream::zip_istream zIn(*pInStream); |
| 301 |
if(zIn.is_gzip()) |
| 302 |
pInStream = &zIn; |
| 303 |
|
| 304 |
zlib_stream::zip_ostream zOut(*pOutStream); |
| 305 |
if(IsOption("z",GENOPTIONS)) |
| 306 |
{ |
| 307 |
// make sure to output the header |
| 308 |
zOut.make_gzip(); |
| 309 |
pOutStream = &zOut; |
| 310 |
} |
| 311 |
#endif |
| 312 |
|
| 313 |
int count = Convert(); |
| 314 |
pOutStream = pOrigOutStream; |
| 315 |
return count; |
| 316 |
|
| 317 |
} |
| 318 |
|
| 319 |
//////////////////////////////////////////////////// |
| 320 |
/// Actions the "convert" interface. |
| 321 |
/// Calls the OBFormat class's ReadMolecule() which |
| 322 |
/// - makes a new chemical object of its chosen type (e.g. OBMol) |
| 323 |
/// - reads an object from the input file |
| 324 |
/// - subjects the chemical object to 'transformations' as specified by the Options |
| 325 |
/// - calls AddChemObject to add it to a buffer. The previous object is first output |
| 326 |
/// via the output Format's WriteMolecule(). During the output process calling |
| 327 |
/// IsFirst() and GetIndex() (the number of objects including the current one already output. |
| 328 |
/// allows more control, for instance writing \<cml\> and \</cml\> tags for multiple molecule outputs only. |
| 329 |
/// |
| 330 |
/// AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation()) |
| 331 |
/// or if the number of the object is outside the range defined by |
| 332 |
/// StartNumber and EndNumber.This means the start and end counts apply to all chemical objects |
| 333 |
/// found whether or not they are output. |
| 334 |
/// |
| 335 |
/// If ReadMolecule returns false the input conversion loop is exited. |
| 336 |
/// |
| 337 |
int OBConversion::Convert() |
| 338 |
{ |
| 339 |
if(pInStream==NULL || pOutStream==NULL) |
| 340 |
{ |
| 341 |
cerr << "input or output stream not set" << endl; |
| 342 |
return 0; |
| 343 |
} |
| 344 |
|
| 345 |
if(!pInFormat) return 0; |
| 346 |
Count=0;//number objects processed |
| 347 |
|
| 348 |
if(!SetStartAndEnd()) |
| 349 |
return 0; |
| 350 |
|
| 351 |
ReadyToInput=true; |
| 352 |
m_IsLast=false; |
| 353 |
pOb1=NULL; |
| 354 |
wInlen=0; |
| 355 |
|
| 356 |
//Input loop |
| 357 |
while(ReadyToInput && pInStream->peek() != EOF && pInStream->good()) |
| 358 |
{ |
| 359 |
if(pInStream==&cin) |
| 360 |
{ |
| 361 |
if(pInStream->peek()=='\n') |
| 362 |
break; |
| 363 |
} |
| 364 |
else |
| 365 |
rInpos = pInStream->tellg(); |
| 366 |
|
| 367 |
bool ret=false; |
| 368 |
try |
| 369 |
{ |
| 370 |
ret = pInFormat->ReadChemObject(this); |
| 371 |
} |
| 372 |
catch(...) |
| 373 |
{ |
| 374 |
if(!IsOption("e", GENOPTIONS) && !OneObjectOnly) |
| 375 |
throw; |
| 376 |
} |
| 377 |
|
| 378 |
if(!ret) |
| 379 |
{ |
| 380 |
//error or termination request: terminate unless |
| 381 |
// -e option requested and sucessfully can skip past current object |
| 382 |
if(!IsOption("e", GENOPTIONS) || pInFormat->SkipObjects(0,this)!=1) |
| 383 |
break; |
| 384 |
} |
| 385 |
if(OneObjectOnly) |
| 386 |
break; |
| 387 |
// Objects supplied to AddChemObject() which may output them after a delay |
| 388 |
//ReadyToInput may be made false in AddChemObject() |
| 389 |
// by WriteMolecule() returning false or by Count==EndNumber |
| 390 |
} |
| 391 |
|
| 392 |
//Output last object |
| 393 |
if(!MoreFilesToCome) |
| 394 |
m_IsLast=true; |
| 395 |
|
| 396 |
if(pOutFormat) |
| 397 |
if(!pOutFormat->WriteChemObject(this)) |
| 398 |
Index--; |
| 399 |
|
| 400 |
//Put AddChemObject() into non-queue mode |
| 401 |
Count= -1; |
| 402 |
EndNumber=StartNumber=0; pOb1=NULL;//leave tidy |
| 403 |
MoreFilesToCome=false; |
| 404 |
OneObjectOnly=false; |
| 405 |
|
| 406 |
return Index; //The number actually output |
| 407 |
} |
| 408 |
////////////////////////////////////////////////////// |
| 409 |
bool OBConversion::SetStartAndEnd() |
| 410 |
{ |
| 411 |
int TempStartNumber=0; |
| 412 |
const char* p = IsOption("f",GENOPTIONS); |
| 413 |
if(p) |
| 414 |
{ |
| 415 |
StartNumber=atoi(p); |
| 416 |
if(StartNumber>1) |
| 417 |
{ |
| 418 |
TempStartNumber=StartNumber; |
| 419 |
//Try to skip objects now |
| 420 |
int ret = pInFormat->SkipObjects(StartNumber-1,this); |
| 421 |
if(ret==-1) //error |
| 422 |
return false; |
| 423 |
if(ret==1) //success:objects skipped |
| 424 |
{ |
| 425 |
Count = StartNumber-1; |
| 426 |
StartNumber=0; |
| 427 |
} |
| 428 |
} |
| 429 |
} |
| 430 |
|
| 431 |
p = IsOption("l",GENOPTIONS); |
| 432 |
if(p) |
| 433 |
{ |
| 434 |
EndNumber=atoi(p); |
| 435 |
if(TempStartNumber && EndNumber<TempStartNumber) |
| 436 |
EndNumber=TempStartNumber; |
| 437 |
} |
| 438 |
|
| 439 |
return true; |
| 440 |
} |
| 441 |
|
| 442 |
////////////////////////////////////////////////////// |
| 443 |
/// Retrieves an object stored by AddChemObject() during output |
| 444 |
OBBase* OBConversion::GetChemObject() |
| 445 |
{ |
| 446 |
Index++; |
| 447 |
return pOb1; |
| 448 |
} |
| 449 |
|
| 450 |
////////////////////////////////////////////////////// |
| 451 |
/// Called by ReadMolecule() to deliver an object it has read from an input stream. |
| 452 |
/// Used in two modes: |
| 453 |
/// - When Count is negative it is left negative and the routine is just a store |
| 454 |
/// for an OBBase object. The negative value returned tells the calling |
| 455 |
/// routine that no more objects are required. |
| 456 |
/// - When count is >=0, probably set by Convert(), it acts as a queue of 2: |
| 457 |
/// writing the currently stored value before accepting the supplied one. This delay |
| 458 |
/// allows output routines to respond differently when the written object is the last. |
| 459 |
/// Count is incremented with each call, even if pOb=NULL. |
| 460 |
/// Objects are not added to the queue if the count is outside the range |
| 461 |
/// StartNumber to EndNumber. There is no upper limit if EndNumber is zero. |
| 462 |
/// The return value is the number of objects, including this one, which have been |
| 463 |
/// input (but not necessarily output). |
| 464 |
int OBConversion::AddChemObject(OBBase* pOb) |
| 465 |
{ |
| 466 |
if(Count<0) |
| 467 |
{ |
| 468 |
pOb1=pOb; |
| 469 |
return Count; |
| 470 |
} |
| 471 |
Count++; |
| 472 |
if(Count>=(int)StartNumber)//keeps reading objects but does nothing with them |
| 473 |
{ |
| 474 |
if(Count==(int)EndNumber) |
| 475 |
ReadyToInput=false; //stops any more objects being read |
| 476 |
|
| 477 |
rInlen = pInStream->tellg() - rInpos; |
| 478 |
|
| 479 |
if(pOb) |
| 480 |
{ |
| 481 |
if(pOb1 && pOutFormat) //see if there is an object ready to be output |
| 482 |
{ |
| 483 |
//Output object |
| 484 |
if (!pOutFormat->WriteChemObject(this)) |
| 485 |
{ |
| 486 |
//faultly write, so finish |
| 487 |
--Index; |
| 488 |
ReadyToInput=false; |
| 489 |
return Count; |
| 490 |
} |
| 491 |
} |
| 492 |
pOb1=pOb; |
| 493 |
wInpos = rInpos; //Save the position in the input file to be accessed when writing it |
| 494 |
wInlen = rInlen; |
| 495 |
} |
| 496 |
} |
| 497 |
return Count; |
| 498 |
} |
| 499 |
////////////////////////////////////////////////////// |
| 500 |
int OBConversion::GetOutputIndex() const |
| 501 |
{ |
| 502 |
//The number of objects actually written already from this instance of OBConversion |
| 503 |
return Index; |
| 504 |
} |
| 505 |
void OBConversion::SetOutputIndex(int indx) |
| 506 |
{ |
| 507 |
Index=indx; |
| 508 |
} |
| 509 |
////////////////////////////////////////////////////// |
| 510 |
OBFormat* OBConversion::FindFormat(const char* ID) |
| 511 |
{ |
| 512 |
//Case insensitive |
| 513 |
if(FormatsMap().find(ID) == FormatsMap().end()) |
| 514 |
return NULL; |
| 515 |
else |
| 516 |
return FormatsMap()[ID]; |
| 517 |
} |
| 518 |
|
| 519 |
////////////////////////////////////////////////// |
| 520 |
const char* OBConversion::GetTitle() const |
| 521 |
{ |
| 522 |
return(InFilename.c_str()); |
| 523 |
} |
| 524 |
|
| 525 |
void OBConversion::SetMoreFilesToCome() |
| 526 |
{ |
| 527 |
MoreFilesToCome=true; |
| 528 |
} |
| 529 |
|
| 530 |
void OBConversion::SetOneObjectOnly() |
| 531 |
{ |
| 532 |
OneObjectOnly=true; |
| 533 |
m_IsLast=true; |
| 534 |
} |
| 535 |
|
| 536 |
///////////////////////////////////////////////////////// |
| 537 |
OBFormat* OBConversion::FormatFromExt(const char* filename) |
| 538 |
{ |
| 539 |
string file = filename; |
| 540 |
size_t extPos = file.rfind("."); |
| 541 |
|
| 542 |
if(extPos!=string::npos) |
| 543 |
{ |
| 544 |
// only do this if we actually can read .gz files |
| 545 |
#ifdef HAVE_LIBZ |
| 546 |
if (file.substr(extPos,3) == ".gz") |
| 547 |
{ |
| 548 |
file.erase(extPos); |
| 549 |
extPos = file.rfind("."); |
| 550 |
if (extPos!=string::npos) |
| 551 |
return FindFormat( (file.substr(extPos + 1, file.size())).c_str() ); |
| 552 |
} |
| 553 |
else |
| 554 |
#endif |
| 555 |
return FindFormat( (file.substr(extPos + 1, file.size())).c_str() ); |
| 556 |
} |
| 557 |
return NULL; //if no extension |
| 558 |
} |
| 559 |
|
| 560 |
OBFormat* OBConversion::FormatFromMIME(const char* MIME) |
| 561 |
{ |
| 562 |
if(FormatsMIMEMap().find(MIME) == FormatsMIMEMap().end()) |
| 563 |
return NULL; |
| 564 |
else |
| 565 |
return FormatsMIMEMap()[MIME]; |
| 566 |
} |
| 567 |
|
| 568 |
bool OBConversion::Read(OBBase* pOb, std::istream* pin) |
| 569 |
{ |
| 570 |
if(pin) |
| 571 |
pInStream=pin; |
| 572 |
if(!pInFormat) return false; |
| 573 |
|
| 574 |
#ifdef HAVE_LIBZ |
| 575 |
zlib_stream::zip_istream zIn(*pInStream); |
| 576 |
if(zIn.is_gzip()) |
| 577 |
pInStream = &zIn; |
| 578 |
#endif |
| 579 |
|
| 580 |
return pInFormat->ReadMolecule(pOb, this); |
| 581 |
} |
| 582 |
////////////////////////////////////////////////// |
| 583 |
/// Writes the object pOb but does not delete it afterwards. |
| 584 |
/// The output stream is lastingly changed if pout is not NULL |
| 585 |
/// Returns true if successful. |
| 586 |
bool OBConversion::Write(OBBase* pOb, ostream* pos) |
| 587 |
{ |
| 588 |
if(pos) |
| 589 |
pOutStream=pos; |
| 590 |
if(!pOutFormat) return false; |
| 591 |
|
| 592 |
ostream* pOrigOutStream = pOutStream; |
| 593 |
#ifdef HAVE_LIBZ |
| 594 |
zlib_stream::zip_ostream zOut(*pOutStream); |
| 595 |
if(IsOption("z",GENOPTIONS)) |
| 596 |
{ |
| 597 |
// make sure to output the header |
| 598 |
zOut.make_gzip(); |
| 599 |
pOutStream = &zOut; |
| 600 |
} |
| 601 |
#endif |
| 602 |
|
| 603 |
bool ret = pOutFormat->WriteMolecule(pOb,this); |
| 604 |
pOutStream = pOrigOutStream; |
| 605 |
return ret; |
| 606 |
} |
| 607 |
|
| 608 |
////////////////////////////////////////////////// |
| 609 |
/// Writes the object pOb but does not delete it afterwards. |
| 610 |
/// The output stream not changed (since we cannot write to this string later) |
| 611 |
/// Returns true if successful. |
| 612 |
std::string OBConversion::WriteString(OBBase* pOb) |
| 613 |
{ |
| 614 |
ostream *oldStream = pOutStream; // save old output |
| 615 |
stringstream newStream; |
| 616 |
|
| 617 |
if(pOutFormat) |
| 618 |
{ |
| 619 |
Write(pOb, &newStream); |
| 620 |
} |
| 621 |
pOutStream = oldStream; |
| 622 |
|
| 623 |
return newStream.str(); |
| 624 |
} |
| 625 |
|
| 626 |
////////////////////////////////////////////////// |
| 627 |
/// Writes the object pOb but does not delete it afterwards. |
| 628 |
/// The output stream is lastingly changed to point to the file |
| 629 |
/// Returns true if successful. |
| 630 |
bool OBConversion::WriteFile(OBBase* pOb, string filePath) |
| 631 |
{ |
| 632 |
if(!pOutFormat) return false; |
| 633 |
|
| 634 |
ofstream ofs; |
| 635 |
ios_base::openmode omode = |
| 636 |
pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out; |
| 637 |
|
| 638 |
ofs.open(filePath.c_str(),omode); |
| 639 |
if(!ofs) |
| 640 |
{ |
| 641 |
cerr << "Cannot write to " << filePath <<endl; |
| 642 |
return false; |
| 643 |
} |
| 644 |
|
| 645 |
return Write(pOb, &ofs); |
| 646 |
} |
| 647 |
|
| 648 |
//////////////////////////////////////////// |
| 649 |
bool OBConversion::ReadString(OBBase* pOb, std::string input) |
| 650 |
{ |
| 651 |
stringstream pin(input); |
| 652 |
return Read(pOb,&pin); |
| 653 |
} |
| 654 |
|
| 655 |
|
| 656 |
//////////////////////////////////////////// |
| 657 |
bool OBConversion::ReadFile(OBBase* pOb, std::string filePath) |
| 658 |
{ |
| 659 |
if(!pInFormat) return false; |
| 660 |
|
| 661 |
ifstream ifs; |
| 662 |
ios_base::openmode imode = |
| 663 |
pOutFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in; |
| 664 |
|
| 665 |
ifs.open(filePath.c_str(),imode); |
| 666 |
if(!ifs) |
| 667 |
{ |
| 668 |
cerr << "Cannot read from " << filePath << endl; |
| 669 |
return false; |
| 670 |
} |
| 671 |
|
| 672 |
return Read(pOb,&ifs); |
| 673 |
} |
| 674 |
|
| 675 |
|
| 676 |
//////////////////////////////////////////// |
| 677 |
const char* OBConversion::Description() |
| 678 |
{ |
| 679 |
return "Conversion options\n \ |
| 680 |
-f <#> Start import at molecule # specified\n \ |
| 681 |
-l <#> End import at molecule # specified\n \ |
| 682 |
-t All input files describe a single molecule\n \ |
| 683 |
-e Continue with next object after error, if possible\n \ |
| 684 |
-z Compress the output with gzip\n"; |
| 685 |
} |
| 686 |
|
| 687 |
//////////////////////////////////////////// |
| 688 |
bool OBConversion::IsLast() |
| 689 |
{ |
| 690 |
return m_IsLast; |
| 691 |
} |
| 692 |
//////////////////////////////////////////// |
| 693 |
bool OBConversion::IsFirstInput() |
| 694 |
{ |
| 695 |
return (Count==0); |
| 696 |
} |
| 697 |
|
| 698 |
///////////////////////////////////////////////// |
| 699 |
string OBConversion::BatchFileName(string& BaseName, string& InFile) |
| 700 |
{ |
| 701 |
//Replaces * in BaseName by InFile without extension and path |
| 702 |
string ofname(BaseName); |
| 703 |
int pos = ofname.find('*'); |
| 704 |
if(pos>=0) |
| 705 |
{ |
| 706 |
//Replace * by input filename |
| 707 |
int posdot=(InFile).rfind('.'); |
| 708 |
if(posdot==-1) posdot=(InFile).size(); |
| 709 |
int posname=(InFile).find_last_of("\\/"); |
| 710 |
ofname.replace(pos,1, (InFile), posname+1, posdot-posname-1); |
| 711 |
} |
| 712 |
return ofname; |
| 713 |
} |
| 714 |
|
| 715 |
//////////////////////////////////////////////// |
| 716 |
string OBConversion::IncrementedFileName(string& BaseName, const int Count) |
| 717 |
{ |
| 718 |
//Replaces * in BaseName by Count |
| 719 |
string ofname(BaseName); |
| 720 |
int pos = ofname.find('*'); |
| 721 |
if(pos>=0) |
| 722 |
{ |
| 723 |
char num[33]; |
| 724 |
snprintf(num, 33, "%d", Count); |
| 725 |
ofname.replace(pos,1, num); |
| 726 |
} |
| 727 |
return ofname; |
| 728 |
} |
| 729 |
//////////////////////////////////////////////////// |
| 730 |
|
| 731 |
/** |
| 732 |
Makes input and output streams, and carries out normal, |
| 733 |
batch, aggregation, and splitting conversion. |
| 734 |
|
| 735 |
Normal |
| 736 |
Done if FileList contains a single file name and OutputFileName |
| 737 |
does not contain a *. |
| 738 |
|
| 739 |
Aggregation |
| 740 |
Done if FileList has more than one file name and OutputFileName does |
| 741 |
not contain * . All the chemical objects are converted and sent |
| 742 |
to the single output file. |
| 743 |
|
| 744 |
Splitting |
| 745 |
Done if FileList contains a single file name and OutputFileName |
| 746 |
contains a * . Each chemical object in the input file converted |
| 747 |
and sent to a separate file whose name is OutputFileName with the |
| 748 |
* replaced by 1, 2, 3, etc. |
| 749 |
For example, if OutputFileName is NEW*.smi then the output files are |
| 750 |
NEW1.smi, NEW2.smi, etc. |
| 751 |
|
| 752 |
Batch Conversion |
| 753 |
Done if FileList has more than one file name and contains a * . |
| 754 |
Each input file is converted to an output file whose name is |
| 755 |
OutputFileName with the * replaced by the inputfile name without its |
| 756 |
path and extension. |
| 757 |
So if the input files were inpath/First.cml, inpath/Second.cml |
| 758 |
and OutputFileName was NEW*.mol, the output files would be |
| 759 |
NEWFirst.mol, NEWSecond.mol. |
| 760 |
|
| 761 |
If FileList is empty, the input stream that has already been set |
| 762 |
(usually in the constructor) is used. If OutputFileName is empty, |
| 763 |
the output stream already set is used. |
| 764 |
|
| 765 |
On exit, OutputFileList contains the names of the output files. |
| 766 |
|
| 767 |
Returns the number of Chemical objects converted. |
| 768 |
*/ |
| 769 |
int OBConversion::FullConvert(std::vector<std::string>& FileList, std::string& OutputFileName, |
| 770 |
std::vector<std::string>& OutputFileList) |
| 771 |
{ |
| 772 |
|
| 773 |
istream* pInStream; |
| 774 |
ostream* pOutStream=NULL; |
| 775 |
ifstream is; |
| 776 |
ofstream os; |
| 777 |
bool HasMultipleOutputFiles=false; |
| 778 |
int Count=0; |
| 779 |
bool CommonInFormat = pInFormat ? true:false; //whether set in calling routine |
| 780 |
ios_base::openmode omode = |
| 781 |
pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out; |
| 782 |
try |
| 783 |
{ |
| 784 |
ofstream ofs; |
| 785 |
|
| 786 |
//OUTPUT |
| 787 |
if(OutputFileName.empty()) |
| 788 |
pOutStream = NULL; //use existing stream |
| 789 |
else |
| 790 |
{ |
| 791 |
if(OutputFileName.find_first_of('*')!=string::npos) HasMultipleOutputFiles = true; |
| 792 |
if(!HasMultipleOutputFiles) |
| 793 |
{ |
| 794 |
os.open(OutputFileName.c_str(),omode); |
| 795 |
if(!os) |
| 796 |
{ |
| 797 |
cerr << "Cannot write to " << OutputFileName <<endl; |
| 798 |
return 0; |
| 799 |
} |
| 800 |
OutputFileList.push_back(OutputFileName); |
| 801 |
pOutStream=&os; |
| 802 |
} |
| 803 |
} |
| 804 |
|
| 805 |
if(IsOption("t",GENOPTIONS)) |
| 806 |
{ |
| 807 |
//Concatenate input file option (multiple files, single molecule) |
| 808 |
if(HasMultipleOutputFiles) |
| 809 |
{ |
| 810 |
cerr << "Cannot have multiple output files and also concatenate input files (-t option)" <<endl; |
| 811 |
return 0; |
| 812 |
} |
| 813 |
|
| 814 |
stringstream allinput; |
| 815 |
vector<string>::iterator itr; |
| 816 |
for(itr=FileList.begin();itr!=FileList.end();itr++) |
| 817 |
{ |
| 818 |
ifstream ifs((*itr).c_str()); |
| 819 |
if(!ifs) |
| 820 |
{ |
| 821 |
cerr << "Cannot open " << *itr <<endl; |
| 822 |
continue; |
| 823 |
} |
| 824 |
allinput << ifs.rdbuf(); //Copy all file contents |
| 825 |
ifs.close(); |
| 826 |
} |
| 827 |
Count = Convert(&allinput,pOutStream); |
| 828 |
return Count; |
| 829 |
} |
| 830 |
|
| 831 |
//INPUT |
| 832 |
if(FileList.empty()) |
| 833 |
pInStream = NULL; |
| 834 |
else |
| 835 |
{ |
| 836 |
if(FileList.size()>1) |
| 837 |
{ |
| 838 |
//multiple input files |
| 839 |
vector<string>::iterator itr, tempitr; |
| 840 |
tempitr = FileList.end(); |
| 841 |
tempitr--; |
| 842 |
for(itr=FileList.begin();itr!=FileList.end();itr++) |
| 843 |
{ |
| 844 |
InFilename = *itr; |
| 845 |
ifstream ifs; |
| 846 |
if(!OpenAndSetFormat(CommonInFormat, &ifs)) |
| 847 |
continue; |
| 848 |
|
| 849 |
if(HasMultipleOutputFiles) |
| 850 |
{ |
| 851 |
//Batch conversion |
| 852 |
string batchfile = BatchFileName(OutputFileName,*itr); |
| 853 |
if(ofs.is_open()) ofs.close(); |
| 854 |
ofs.open(batchfile.c_str(), omode); |
| 855 |
if(!ofs) |
| 856 |
{ |
| 857 |
cerr << "Cannot open " << batchfile << endl; |
| 858 |
return Count; |
| 859 |
} |
| 860 |
OutputFileList.push_back(batchfile); |
| 861 |
SetOutputIndex(0); //reset for new file |
| 862 |
Count += Convert(&ifs,&ofs); |
| 863 |
} |
| 864 |
else |
| 865 |
{ |
| 866 |
//Aggregation |
| 867 |
if(itr!=tempitr) SetMoreFilesToCome(); |
| 868 |
Count = Convert(&ifs,pOutStream); |
| 869 |
} |
| 870 |
} |
| 871 |
return Count; |
| 872 |
} |
| 873 |
else |
| 874 |
{ |
| 875 |
//Single input file |
| 876 |
InFilename = FileList[0]; |
| 877 |
if(!OpenAndSetFormat(CommonInFormat, &is)) |
| 878 |
return 0; |
| 879 |
pInStream=&is; |
| 880 |
|
| 881 |
if(HasMultipleOutputFiles) |
| 882 |
{ |
| 883 |
//Splitting |
| 884 |
//Output is put in a temporary stream and written to a file |
| 885 |
//with an augmenting name only when it contains a valid object. |
| 886 |
int Indx=1; |
| 887 |
for(;;) |
| 888 |
{ |
| 889 |
stringstream ss; |
| 890 |
SetOutputIndex(0); //reset for new file |
| 891 |
SetOneObjectOnly(); |
| 892 |
|
| 893 |
int ThisFileCount = Convert(pInStream,&ss); |
| 894 |
if(ThisFileCount==0) break; |
| 895 |
Count+=ThisFileCount; |
| 896 |
|
| 897 |
if(ofs.is_open()) ofs.close(); |
| 898 |
string incrfile = IncrementedFileName(OutputFileName,Indx++); |
| 899 |
ofs.open(incrfile.c_str(), omode); |
| 900 |
if(!ofs) |
| 901 |
{ |
| 902 |
cerr << "Cannot write to " << incrfile << endl; |
| 903 |
return Count; |
| 904 |
} |
| 905 |
OutputFileList.push_back(incrfile); |
| 906 |
ofs << ss.rdbuf(); |
| 907 |
ofs.close(); |
| 908 |
ss.clear(); |
| 909 |
} |
| 910 |
return Count; |
| 911 |
} |
| 912 |
} |
| 913 |
} |
| 914 |
|
| 915 |
//Single input and output files |
| 916 |
Count = Convert(pInStream,pOutStream); |
| 917 |
return Count; |
| 918 |
} |
| 919 |
catch(...) |
| 920 |
{ |
| 921 |
cerr << "Conversion failed with an exception. Count=" << Count <<endl; |
| 922 |
return Count; |
| 923 |
} |
| 924 |
} |
| 925 |
|
| 926 |
bool OBConversion::OpenAndSetFormat(bool SetFormat, ifstream* is) |
| 927 |
{ |
| 928 |
//Opens file using InFilename and sets pInFormat if requested |
| 929 |
if(!SetFormat) |
| 930 |
{ |
| 931 |
pInFormat = FormatFromExt(InFilename.c_str()); |
| 932 |
if(pInFormat==NULL) |
| 933 |
{ |
| 934 |
string::size_type pos = InFilename.rfind('.'); |
| 935 |
string ext; |
| 936 |
if(pos!=string::npos) |
| 937 |
ext = InFilename.substr(pos); |
| 938 |
cerr << "Cannot read input format \"" << ext << '\"' |
| 939 |
<< " for file \"" << InFilename << "\"" << endl; |
| 940 |
return false; |
| 941 |
} |
| 942 |
} |
| 943 |
|
| 944 |
ios_base::openmode imode; |
| 945 |
#ifdef ALL_READS_BINARY //Makes unix files compatible with VC++6 |
| 946 |
imode = ios_base::in|ios_base::binary; |
| 947 |
#else |
| 948 |
imode = pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in; |
| 949 |
#endif |
| 950 |
|
| 951 |
is->open(InFilename.c_str(), imode); |
| 952 |
if(!is->good()) |
| 953 |
{ |
| 954 |
cerr << "Cannot open " << InFilename <<endl; |
| 955 |
return false; |
| 956 |
} |
| 957 |
|
| 958 |
return true; |
| 959 |
} |
| 960 |
|
| 961 |
/////////////////////////////////////////////// |
| 962 |
void OBConversion::AddOption(const char* opt, Option_type opttyp, const char* txt) |
| 963 |
{ |
| 964 |
//Also updates an option |
| 965 |
if(txt==NULL) |
| 966 |
OptionsArray[opttyp][opt]=string(); |
| 967 |
else |
| 968 |
OptionsArray[opttyp][opt]=txt; |
| 969 |
} |
| 970 |
|
| 971 |
const char* OBConversion::IsOption(const char* opt, Option_type opttyp) |
| 972 |
{ |
| 973 |
//Returns NULL if option not found or a pointer to the text if it is |
| 974 |
map<string,string>::iterator pos; |
| 975 |
pos = OptionsArray[opttyp].find(opt); |
| 976 |
if(pos==OptionsArray[opttyp].end()) |
| 977 |
return NULL; |
| 978 |
return pos->second.c_str(); |
| 979 |
} |
| 980 |
|
| 981 |
bool OBConversion::RemoveOption(const char* opt, Option_type opttyp) |
| 982 |
{ |
| 983 |
return OptionsArray[opttyp].erase(opt)!=0;//true if was there |
| 984 |
} |
| 985 |
|
| 986 |
void OBConversion::SetOptions(const char* options, Option_type opttyp) |
| 987 |
{ |
| 988 |
while(*options) |
| 989 |
{ |
| 990 |
string ch(1, *options++); |
| 991 |
if(*options=='\"') |
| 992 |
{ |
| 993 |
string txt = options+1; |
| 994 |
string::size_type pos = txt.find('\"'); |
| 995 |
if(pos==string::npos) |
| 996 |
return; //options is illformed |
| 997 |
txt.erase(pos); |
| 998 |
OptionsArray[opttyp][ch]= txt; |
| 999 |
options += pos+2; |
| 1000 |
} |
| 1001 |
else |
| 1002 |
OptionsArray[opttyp][ch] = string(); |
| 1003 |
} |
| 1004 |
} |
| 1005 |
|
| 1006 |
typedef std::map<string,int> OPAMapType; |
| 1007 |
OPAMapType& OBConversion::OptionParamArray(Option_type typ) |
| 1008 |
{ |
| 1009 |
static OPAMapType* opa = new OPAMapType[3]; |
| 1010 |
return opa[typ]; |
| 1011 |
} |
| 1012 |
|
| 1013 |
void OBConversion::RegisterOptionParam(string name, OBFormat* pFormat, |
| 1014 |
int numberParams, Option_type typ) |
| 1015 |
{ |
| 1016 |
//Gives error message if the number of parameters conflicts with an existing registration |
| 1017 |
map<string,int>::iterator pos; |
| 1018 |
pos = OptionParamArray(typ).find(name); |
| 1019 |
if(pos!=OptionParamArray(typ).end()) |
| 1020 |
{ |
| 1021 |
if(pos->second!=numberParams) |
| 1022 |
{ |
| 1023 |
string description("API"); |
| 1024 |
if(pFormat) |
| 1025 |
description=pFormat->Description(); |
| 1026 |
cerr << "The number of parameters needed by option \"" << name << "\" in " |
| 1027 |
<< description.substr(0,description.find('\n')) |
| 1028 |
<< " differs from an earlier registration." << endl; |
| 1029 |
return; |
| 1030 |
} |
| 1031 |
} |
| 1032 |
OptionParamArray(typ)[name] = numberParams; |
| 1033 |
} |
| 1034 |
|
| 1035 |
int OBConversion::GetOptionParams(string name, Option_type typ) |
| 1036 |
{ |
| 1037 |
//returns the number of parameters registered for the option, or 0 if not found |
| 1038 |
map<string,int>::iterator pos; |
| 1039 |
pos = OptionParamArray(typ).find(name); |
| 1040 |
if(pos==OptionParamArray(typ).end()) |
| 1041 |
return 0; |
| 1042 |
return pos->second; |
| 1043 |
} |
| 1044 |
|
| 1045 |
}//namespace OpenBabel |
| 1046 |
|
| 1047 |
//! \file obconversion.cpp |
| 1048 |
//! \brief Implementation of OBFormat and OBConversion classes. |