| 1 | /********************************************************************** | 
| 2 | obconversion.h - Handle file conversions. Declaration of OBFormat, OBConversion | 
| 3 |  | 
| 4 | Copyright (C) 2004-2005 by Chris Morley | 
| 5 |  | 
| 6 | This file is part of the Open Babel project. | 
| 7 | For more information, see <http://openbabel.sourceforge.net/> | 
| 8 |  | 
| 9 | This program is free software; you can redistribute it and/or modify | 
| 10 | it under the terms of the GNU General Public License as published by | 
| 11 | the Free Software Foundation version 2 of the License. | 
| 12 |  | 
| 13 | This program is distributed in the hope that it will be useful, | 
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 16 | GNU General Public License for more details. | 
| 17 | ***********************************************************************/ | 
| 18 |  | 
| 19 | #ifndef OB_CONV_H | 
| 20 | #define OB_CONV_H | 
| 21 |  | 
| 22 | #include "config.h" | 
| 23 |  | 
| 24 | #if HAVE_IOSTREAM | 
| 25 | #include <iostream> | 
| 26 | #elif HAVE_IOSTREAM_H | 
| 27 | #include <iostream.h> | 
| 28 | #endif | 
| 29 | #if HAVE_FSTREAM | 
| 30 | #include <fstream> | 
| 31 | #elif HAVE_FSTREAM_H | 
| 32 | #include <fstream.h> | 
| 33 | #endif | 
| 34 |  | 
| 35 | #if HAVE_SSTREAM | 
| 36 | #include <sstream> | 
| 37 | #elif | 
| 38 | #include <sstream.h> | 
| 39 | #endif | 
| 40 |  | 
| 41 | #include <string> | 
| 42 | #include <vector> | 
| 43 | #include <map> | 
| 44 |  | 
| 45 | //#include "dlhandler.h" | 
| 46 |  | 
| 47 | // These macros are used in DLL builds. If they have not | 
| 48 | // been set in babelconfig.h, define them as nothing. | 
| 49 | #ifndef OBCONV | 
| 50 | #define OBCONV | 
| 51 | #endif | 
| 52 | #ifndef OBDLL | 
| 53 | #define OBDLL | 
| 54 | #endif | 
| 55 |  | 
| 56 | //using namespace std; | 
| 57 | namespace OpenBabel { | 
| 58 |  | 
| 59 |  | 
| 60 | class OBBase; | 
| 61 | class OBConversion; | 
| 62 | //************************************************* | 
| 63 |  | 
| 64 | /// @brief Base class for file formats. | 
| 65 |  | 
| 66 | /// Two sets of Read and Write functions are specified for each format | 
| 67 | /// to handle two different requirements. | 
| 68 | /// The "Convert" interface is for use in file format conversion applications. The | 
| 69 | /// user interface, a console, a GUI, or another program is kept unaware of the | 
| 70 | /// details of the chemistry and does not need to \#include mol.h. It is then | 
| 71 | /// necessary to manipulate only pointers to OBBase in OBConversion and the user | 
| 72 | /// interface, with all the construction and deletion of OBMol etc objects being | 
| 73 | /// done in the Format classes or the OB core. The convention  with "Covert" | 
| 74 | /// interface functions is that chemical objects are made on the heap with new | 
| 75 | /// in the ReadChemicalObject() functions and and deleted in WriteChemicalObject() | 
| 76 | /// functions | 
| 77 | /// | 
| 78 | /// The "API" interface is for programatic use of the OB routines in application | 
| 79 | /// programs where mol.h is \#included. There is generally no creation or | 
| 80 | /// destruction of objects in ReadMolecule() and WriteMolecule() and no restriction | 
| 81 | /// on whether the pointers are to the heap or the stack. | 
| 82 | /// | 
| 83 | class OBCONV OBFormat | 
| 84 | { | 
| 85 | public: | 
| 86 | /// @brief The "API" interface Read function. | 
| 87 |  | 
| 88 | /// Reads a single object. | 
| 89 | /// Does not make a new object on the heap; | 
| 90 | /// can be used with a pointer to an chem object on the heap or the stack. | 
| 91 | virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv) | 
| 92 | { std::cerr << "Not a valid input format"; return false;} | 
| 93 |  | 
| 94 | /// @brief The "Convert" interface Read function. | 
| 95 |  | 
| 96 | /// Possibly reads multiple new objects on the heap and subjects them | 
| 97 | /// to its DoTransformations() function, which may delete them again. | 
| 98 | /// Sends result to pConv->AddChemObject() | 
| 99 | virtual bool ReadChemObject(OBConversion* pConv) | 
| 100 | { std::cerr << "Not a valid input format"; return false;} | 
| 101 |  | 
| 102 | /// @brief The "API" interface Write function. | 
| 103 |  | 
| 104 | /// Writes a single object | 
| 105 | /// Does not delete the object; | 
| 106 | /// can be used with a pointer to an chem object on the heap or the stack. | 
| 107 | /// Returns false on error. | 
| 108 | virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv) | 
| 109 | { std::cerr << "Not a valid output format"; return false;} | 
| 110 |  | 
| 111 | /// @brief The "Convert" interface Write function. | 
| 112 |  | 
| 113 | /// Writes a single object | 
| 114 | /// Deletes the object after writing | 
| 115 | /// Returns false on error | 
| 116 | virtual bool WriteChemObject(OBConversion* pConv) | 
| 117 | { std::cerr << "Not a valid output format"; return false;} | 
| 118 |  | 
| 119 | /// @brief Information on this format. Printed out in response to -Hxxx option where xxx id the id of the format. | 
| 120 |  | 
| 121 | /// Must be provided by each format class. | 
| 122 | /// Can include a list of command line Options. These may be used to construction | 
| 123 | /// check boxes, radio buttons etc for GUI interface. | 
| 124 | virtual const char* Description()=0; | 
| 125 |  | 
| 126 | /// @brief A decription of the chemical object converted by this format. | 
| 127 |  | 
| 128 | /// If not provided, the object type used by the default format is used (usually OBMol). | 
| 129 | virtual const char* TargetClassDescription(); | 
| 130 |  | 
| 131 | /// @brief Returns the type of chemical object used by the format. | 
| 132 |  | 
| 133 | /// Defaults to that used by the default format. Useful for checking | 
| 134 | /// that a format can handle a particular object. | 
| 135 | virtual const std::type_info& GetType(); | 
| 136 |  | 
| 137 | /// @brief Web address where the format is defined. | 
| 138 | virtual const char* SpecificationURL() { return ""; } | 
| 139 |  | 
| 140 | /// @brief Chemical MIME type associated with this file type (if any) | 
| 141 | virtual const char* GetMIMEType() { return ""; } | 
| 142 |  | 
| 143 | /// @brief Decribes the capabilities of the format (Read only etc.) | 
| 144 |  | 
| 145 | /// Currently, can be a bitwise OR of any of the following | 
| 146 | /// NOTREADABLE READONEONLY NOTWRITABLE WRITEONEONLY DEFAULTFORMAT | 
| 147 | /// READBINARY WRITEBINARY | 
| 148 | virtual unsigned int Flags() { return 0;}; | 
| 149 |  | 
| 150 | /// @brief Skip past first n objects in input stream (or current one with n=0) | 
| 151 |  | 
| 152 | /// Returns 1 on success, -1 on error and 0 if not implemented | 
| 153 | virtual int SkipObjects(int n, OBConversion* pConv) | 
| 154 | { | 
| 155 | return 0; //shows not implemented in the format class | 
| 156 | }; | 
| 157 |  | 
| 158 | /// @brief Returns a pointer to a new instance of the format, or NULL if fails. | 
| 159 |  | 
| 160 | /// Normally a single global instance is used but this may cause problems | 
| 161 | /// if there are member variables and the format is used in more than one place | 
| 162 | /// in the program. | 
| 163 | virtual OBFormat* MakeNewInstance() | 
| 164 | { | 
| 165 | return NULL; //shows not implemented in the format class | 
| 166 | } | 
| 167 |  | 
| 168 | /// @brief Format classes do not have a destructor | 
| 169 | virtual ~OBFormat(){}; | 
| 170 | }; | 
| 171 |  | 
| 172 | //************************************************* | 
| 173 | /// @brief Case insensitive string comparison for FormatsMap key. | 
| 174 | struct CharPtrLess : public std::binary_function<const char*,const char*, bool> | 
| 175 | { | 
| 176 | bool operator()(const char* p1,const char* p2) const | 
| 177 | { return strcasecmp(p1,p2)<0; } | 
| 178 | }; | 
| 179 |  | 
| 180 | typedef std::map<const char*,OBFormat*,CharPtrLess > FMapType; | 
| 181 | typedef FMapType::iterator Formatpos; | 
| 182 |  | 
| 183 | //************************************************* | 
| 184 |  | 
| 185 | /// Class to convert from one format to another. | 
| 186 | class OBCONV OBConversion | 
| 187 | { | 
| 188 | /// @nosubgrouping | 
| 189 | public: | 
| 190 | /// @name Construction | 
| 191 | //@{ | 
| 192 | OBConversion(std::istream* is=NULL, std::ostream* os=NULL); | 
| 193 | /// @brief Copy constructor | 
| 194 | OBConversion(const OBConversion& o); | 
| 195 | virtual     ~OBConversion(); | 
| 196 | //@} | 
| 197 | /// @name Collection of formats | 
| 198 | //@{ | 
| 199 | /// @brief Called once by each format class | 
| 200 | static int                              RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME = NULL); | 
| 201 | /// @brief Searches registered formats | 
| 202 | static OBFormat*        FindFormat(const char* ID); | 
| 203 | /// @brief Searches registered formats for an ID the same as the file extension | 
| 204 | static OBFormat*        FormatFromExt(const char* filename); | 
| 205 | /// @brief Searches registered formats for a MIME the same as the chemical MIME type passed | 
| 206 | static OBFormat*        FormatFromMIME(const char* MIME); | 
| 207 |  | 
| 208 | ///Repeatedly called to recover available Formats | 
| 209 | static bool             GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat); | 
| 210 | //@} | 
| 211 |  | 
| 212 | /// @name Information | 
| 213 | //@{ | 
| 214 | static const char* Description(); //generic conversion options | 
| 215 | //@} | 
| 216 |  | 
| 217 | /// @name Parameter get and set | 
| 218 | //@{ | 
| 219 | std::istream* GetInStream() const {return pInStream;}; | 
| 220 | std::ostream* GetOutStream() const {return pOutStream;}; | 
| 221 | void          SetInStream(std::istream* pIn){pInStream=pIn;}; | 
| 222 | void          SetOutStream(std::ostream* pOut){pOutStream=pOut;}; | 
| 223 | bool          SetInAndOutFormats(const char* inID, const char* outID);///< Sets the formats from their ids, e g CML | 
| 224 | bool          SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut); | 
| 225 | bool          SetInFormat(const char* inID); | 
| 226 | bool          SetInFormat(OBFormat* pIn); | 
| 227 | bool          SetOutFormat(const char* outID); | 
| 228 | bool          SetOutFormat(OBFormat* pOut); | 
| 229 |  | 
| 230 | OBFormat*   GetInFormat() const{return pInFormat;}; | 
| 231 | OBFormat*   GetOutFormat() const{return pOutFormat;}; | 
| 232 | std::string GetInFilename() const{return InFilename;}; | 
| 233 |  | 
| 234 | ///Get the position in the input stream of the object being read | 
| 235 | std::streampos GetInPos()const{return wInpos;}; | 
| 236 |  | 
| 237 | ///Get the length in the input stream of the object being read | 
| 238 | size_t GetInLen()const{return wInlen;}; | 
| 239 |  | 
| 240 | ///@brief Returns a default title which is the filename | 
| 241 | const char* GetTitle() const; | 
| 242 |  | 
| 243 | ///@brief Extension method: deleted in ~OBConversion() | 
| 244 | OBConversion* GetAuxConv() const {return pAuxConv;}; | 
| 245 | void          SetAuxConv(OBConversion* pConv) {pAuxConv=pConv;}; | 
| 246 | //@} | 
| 247 | /// @name Option handling | 
| 248 | //@{ | 
| 249 | ///@brief Three types of options set on the the command line by -a? , -x? , or -? | 
| 250 | enum Option_type { INOPTIONS, OUTOPTIONS, GENOPTIONS }; | 
| 251 |  | 
| 252 | ///@brief Determine whether an option is set. Returns NULL if option not and a pointer to the associated text if it is | 
| 253 | const char* IsOption(const char* opt,Option_type opttyp=OUTOPTIONS); | 
| 254 |  | 
| 255 | ///@brief Access the map with option name as key and any associated text as value | 
| 256 | const std::map<std::string,std::string>* GetOptions(Option_type opttyp) | 
| 257 | { return &OptionsArray[opttyp];}; | 
| 258 |  | 
| 259 | ///@brief Set an option of specified type, with optional text | 
| 260 | void AddOption(const char* opt, Option_type opttyp, const char* txt=NULL); | 
| 261 |  | 
| 262 | bool RemoveOption(const char* opt, Option_type optype); | 
| 263 |  | 
| 264 | ///@brief Set several single character options of specified type from string like ab"btext"c"ctext" | 
| 265 | void SetOptions(const char* options, Option_type opttyp); | 
| 266 |  | 
| 267 | ///@brief For example -h takes 0 parameters; -f takes 1. Call in a format constructor. | 
| 268 | static void RegisterOptionParam(std::string name, OBFormat* pFormat, | 
| 269 | int numberParams=0, Option_type typ=OUTOPTIONS); | 
| 270 |  | 
| 271 | ///@brief Returns the number of parameters registered for the option, or 0 if not found | 
| 272 | static int GetOptionParams(std::string name, Option_type typ); | 
| 273 | //@} | 
| 274 |  | 
| 275 | /// @name Conversion | 
| 276 | //@{ | 
| 277 | /// @brief Conversion for single input and output stream | 
| 278 | int         Convert(std::istream* is, std::ostream* os); | 
| 279 |  | 
| 280 | /// @brief Conversion with existing streams | 
| 281 | int         Convert(); | 
| 282 |  | 
| 283 | /// @brief Conversion with multiple input/output files: | 
| 284 | /// makes input and output streams, and carries out normal, batch, aggregation, and splitting conversion. | 
| 285 | int                                     FullConvert(std::vector<std::string>& FileList, | 
| 286 | std::string& OutputFileName, std::vector<std::string>& OutputFileList); | 
| 287 | //@} | 
| 288 |  | 
| 289 | /// @name Conversion loop control | 
| 290 | //@{ | 
| 291 | int                                     AddChemObject(OBBase* pOb);///< @brief Adds to internal array during input | 
| 292 | OBBase*                 GetChemObject(); ///< @brief Retrieve from internal array during output | 
| 293 | bool                            IsLast();///< @brief True if no more objects to be output | 
| 294 | bool                            IsFirstInput();///< @brief True if the first input object is being processed | 
| 295 | int         GetOutputIndex() const ;///< @brief Retrieves number of ChemObjects that have been actually output | 
| 296 | void                            SetOutputIndex(int indx);///< @brief Sets ouput index (maybe to control whether seen as first object) | 
| 297 | void                            SetMoreFilesToCome();///<@brief Used with multiple input files. Off by default. | 
| 298 | void                            SetOneObjectOnly();///<@brief Used with multiple input files. Off by default. | 
| 299 | //@} | 
| 300 | /// @name Convenience functions | 
| 301 | //@{ | 
| 302 | ///The default format is set in a single OBFormat class (generally it is OBMol) | 
| 303 | static OBFormat* GetDefaultFormat(){return pDefaultFormat;}; | 
| 304 |  | 
| 305 | /// @brief Outputs an object of a class derived from OBBase. | 
| 306 |  | 
| 307 | /// Part of "API" interface. | 
| 308 | /// The output stream can be specified and the change is retained in the OBConversion instance | 
| 309 | bool                            Write(OBBase* pOb, std::ostream* pout=NULL); | 
| 310 |  | 
| 311 | /// @brief Outputs an object of a class derived from OBBase as a string | 
| 312 |  | 
| 313 | /// Part of "API" interface. | 
| 314 | /// The output stream is temporarily changed to the string and then restored | 
| 315 | /// This method is primarily intended for scripting languages without "stream" classes | 
| 316 | std::string                     WriteString(OBBase* pOb); | 
| 317 |  | 
| 318 | /// @brief Outputs an object of a class derived from OBBase as a file (with the supplied path) | 
| 319 |  | 
| 320 | /// Part of "API" interface. | 
| 321 | /// The output stream is changed to the supplied file and the change is retained in the | 
| 322 | /// OBConversion instance. | 
| 323 | /// This method is primarily intended for scripting languages without "stream" classes | 
| 324 | bool                            WriteFile(OBBase* pOb, std::string filePath); | 
| 325 |  | 
| 326 | /// @brief Reads an object of a class derived from OBBase into pOb. | 
| 327 |  | 
| 328 | /// Part of "API" interface. | 
| 329 | /// The input stream can be specified and the change is retained in the OBConversion instance | 
| 330 | /// Returns false and pOb=NULL on error | 
| 331 | bool    Read(OBBase* pOb, std::istream* pin=NULL); | 
| 332 |  | 
| 333 | /// @brief Reads an object of a class derived from OBBase into pOb from the supplied string | 
| 334 |  | 
| 335 | /// Part of "API" interface. | 
| 336 | /// Returns false and pOb=NULL on error | 
| 337 | /// This method is primarily intended for scripting languages without "stream" classes | 
| 338 | bool  ReadString(OBBase* pOb, std::string input); | 
| 339 |  | 
| 340 | /// @brief Reads an object of a class derived from OBBase into pOb from the file specified | 
| 341 |  | 
| 342 | /// Part of "API" interface. | 
| 343 | /// The output stream is changed to the supplied file and the change is retained in the | 
| 344 | /// OBConversion instance. | 
| 345 | /// Returns false and pOb=NULL on error | 
| 346 | /// This method is primarily intended for scripting languages without "stream" classes | 
| 347 | bool  ReadFile(OBBase* pOb, std::string filePath); | 
| 348 |  | 
| 349 |  | 
| 350 | ///Replaces * in BaseName by InFile without extension and path | 
| 351 | static std::string BatchFileName(std::string& BaseName, std::string& InFile); | 
| 352 | ///Replaces * in BaseName by Count | 
| 353 | static std::string IncrementedFileName(std::string& BaseName, const int Count); | 
| 354 | //@} | 
| 355 |  | 
| 356 | protected: | 
| 357 | bool             SetStartAndEnd(); | 
| 358 | static FMapType& FormatsMap();///<contains ID and pointer to all OBFormat classes | 
| 359 | static FMapType& FormatsMIMEMap();///<contains MIME and pointer to all OBFormat classes | 
| 360 | typedef std::map<std::string,int> OPAMapType; | 
| 361 | static OPAMapType& OptionParamArray(Option_type typ); | 
| 362 | static int       LoadFormatFiles(); | 
| 363 | bool             OpenAndSetFormat(bool SetFormat, std::ifstream* is); | 
| 364 |  | 
| 365 | std::string       InFilename; | 
| 366 | std::istream*     pInStream; | 
| 367 | std::ostream*     pOutStream; | 
| 368 | static OBFormat*  pDefaultFormat; | 
| 369 | OBFormat*         pInFormat; | 
| 370 | OBFormat*         pOutFormat; | 
| 371 |  | 
| 372 | std::map<std::string,std::string> OptionsArray[3]; | 
| 373 |  | 
| 374 | int               Index; | 
| 375 | unsigned int      StartNumber; | 
| 376 | unsigned int      EndNumber; | 
| 377 | int               Count; | 
| 378 | bool              m_IsLast; | 
| 379 | bool              MoreFilesToCome; | 
| 380 | bool              OneObjectOnly; | 
| 381 | bool              ReadyToInput; | 
| 382 | static int FormatFilesLoaded; | 
| 383 | OBBase*           pOb1; | 
| 384 | std::streampos wInpos; ///<position in the input stream of the object being written | 
| 385 | std::streampos rInpos; ///<position in the input stream of the object being read | 
| 386 | size_t wInlen; ///<length in the input stream of the object being written | 
| 387 | size_t rInlen; ///<length in the input stream of the object being read | 
| 388 |  | 
| 389 | OBConversion* pAuxConv;///<Way to extend OBConversion | 
| 390 | }; | 
| 391 |  | 
| 392 | ///For OBFormat::Flags() | 
| 393 | #define NOTREADABLE     0x01 | 
| 394 | #define READONEONLY     0x02 | 
| 395 | #define READBINARY      0x04 | 
| 396 | #define NOTWRITABLE     0x10 | 
| 397 | #define WRITEONEONLY    0x20 | 
| 398 | #define WRITEBINARY     0x40 | 
| 399 | #define DEFAULTFORMAT 0x4000 | 
| 400 |  | 
| 401 | } //namespace OpenBabel | 
| 402 | #endif //OB_CONV_H | 
| 403 |  | 
| 404 | //! \file | 
| 405 | //! \brief Handle file conversions. Declaration of OBFormat, OBConversion. | 
| 406 |  | 
| 407 |  |