| 1 |
gezelter |
1210 |
/********************************************************************** |
| 2 |
|
|
obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol. |
| 3 |
|
|
|
| 4 |
|
|
Copyright (C) 2005 Chris Morley |
| 5 |
|
|
|
| 6 |
|
|
This file is part of the Open Babel project. |
| 7 |
|
|
For more information, see <http://openbabel.sourceforge.net/> |
| 8 |
|
|
|
| 9 |
|
|
This program is free software; you can redistribute it and/or modify |
| 10 |
|
|
it under the terms of the GNU General Public License as published by |
| 11 |
|
|
the Free Software Foundation version 2 of the License. |
| 12 |
|
|
|
| 13 |
|
|
This program is distributed in the hope that it will be useful, |
| 14 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 |
|
|
GNU General Public License for more details. |
| 17 |
|
|
***********************************************************************/ |
| 18 |
|
|
#include <openbabel/babelconfig.h> |
| 19 |
|
|
#include <openbabel/obmolecformat.h> |
| 20 |
|
|
#include <openbabel/obiter.h> |
| 21 |
|
|
|
| 22 |
|
|
#ifdef _DEBUG |
| 23 |
|
|
#undef THIS_FILE |
| 24 |
|
|
static char THIS_FILE[]=__FILE__; |
| 25 |
|
|
#define new DEBUG_NEW |
| 26 |
|
|
#endif |
| 27 |
|
|
|
| 28 |
|
|
using namespace std; |
| 29 |
|
|
namespace OpenBabel |
| 30 |
|
|
{ |
| 31 |
|
|
|
| 32 |
|
|
std::map<std::string, OBMol*> OBMoleculeFormat::IMols; |
| 33 |
|
|
OBMol* OBMoleculeFormat::_jmol; |
| 34 |
|
|
std::vector<OBMol> OBMoleculeFormat::MolArray; |
| 35 |
|
|
bool OBMoleculeFormat::StoredMolsReady=false; |
| 36 |
|
|
|
| 37 |
|
|
bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat) |
| 38 |
|
|
{ |
| 39 |
|
|
std::istream &ifs = *pConv->GetInStream(); |
| 40 |
|
|
if (!ifs.good()) //Possible to omit? ifs.peek() == EOF || |
| 41 |
|
|
return false; |
| 42 |
|
|
|
| 43 |
|
|
OBMol* pmol = new OBMol; |
| 44 |
|
|
|
| 45 |
|
|
std::string auditMsg = "OpenBabel::Read molecule "; |
| 46 |
|
|
std::string description(pFormat->Description()); |
| 47 |
|
|
auditMsg += description.substr(0,description.find('\n')); |
| 48 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
| 49 |
|
|
auditMsg, |
| 50 |
|
|
obAuditMsg); |
| 51 |
|
|
|
| 52 |
|
|
if(pConv->IsOption("C",OBConversion::GENOPTIONS)) |
| 53 |
|
|
return DeferMolOutput(pmol, pConv, pFormat); |
| 54 |
|
|
|
| 55 |
|
|
bool ret; |
| 56 |
|
|
if(pConv->IsOption("separate",OBConversion::GENOPTIONS)) |
| 57 |
|
|
{ |
| 58 |
|
|
//On first call, separate molecule and put fragments in MolArray. |
| 59 |
|
|
//On subsequent calls, remove a fragment from MolArray and send it for writing |
| 60 |
|
|
//Done this way so that each fragment can be written to its own file (with -m option) |
| 61 |
|
|
if(!StoredMolsReady) |
| 62 |
|
|
{ |
| 63 |
|
|
ret = pFormat->ReadMolecule(pmol,pConv); |
| 64 |
|
|
if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK))) |
| 65 |
|
|
MolArray = pmol->Separate(); //use un-transformed molecule |
| 66 |
|
|
//Add an appropriate title to each fragment |
| 67 |
|
|
for(unsigned int i=0;i<MolArray.size();++i) |
| 68 |
|
|
{ |
| 69 |
|
|
stringstream ss; |
| 70 |
|
|
ss << pmol->GetTitle() << '#' << i+1; |
| 71 |
|
|
string title = ss.str(); |
| 72 |
|
|
MolArray[i].SetTitle(title); |
| 73 |
|
|
} |
| 74 |
|
|
reverse(MolArray.begin(),MolArray.end()); |
| 75 |
|
|
StoredMolsReady = true; |
| 76 |
|
|
} |
| 77 |
|
|
|
| 78 |
|
|
if(MolArray.empty()) //normal end of fragments |
| 79 |
|
|
ret =false; |
| 80 |
|
|
else |
| 81 |
|
|
{ |
| 82 |
|
|
// Copying is needed because the OBMol passed to AddChemObject will be deleted. |
| 83 |
|
|
// The OBMol in the vector is deleted here. |
| 84 |
|
|
OBMol* pMolCopy = new OBMol( MolArray.back()); |
| 85 |
|
|
MolArray.pop_back(); |
| 86 |
|
|
ret = pConv->AddChemObject( |
| 87 |
|
|
pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS))); |
| 88 |
|
|
} |
| 89 |
|
|
if(!ret) |
| 90 |
|
|
StoredMolsReady = false; |
| 91 |
|
|
|
| 92 |
|
|
delete pmol; |
| 93 |
|
|
return ret; |
| 94 |
|
|
} |
| 95 |
|
|
|
| 96 |
|
|
ret=pFormat->ReadMolecule(pmol,pConv); |
| 97 |
|
|
|
| 98 |
|
|
OBMol* ptmol = NULL; |
| 99 |
|
|
//Molecule is valid if it has some atoms |
| 100 |
|
|
//or the format allows zero-atom molecules and it has a title |
| 101 |
|
|
if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle()))) |
| 102 |
|
|
{ |
| 103 |
|
|
ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS))); |
| 104 |
|
|
if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS) |
| 105 |
|
|
|| pConv->IsOption("join",OBConversion::GENOPTIONS))) |
| 106 |
|
|
{ |
| 107 |
|
|
//With j option, accumulate all mols in one stored in this class |
| 108 |
|
|
if(pConv->IsFirstInput()) |
| 109 |
|
|
_jmol = new OBMol; |
| 110 |
|
|
pConv->AddChemObject(_jmol); |
| 111 |
|
|
//will be discarded in WriteChemObjectImpl until the last input mol. This complication |
| 112 |
|
|
//is needed to allow joined molecules to be from different files. pOb1 in AddChem Object |
| 113 |
|
|
//is zeroed at the end of a file and _jmol is in danger of not being output. |
| 114 |
|
|
*_jmol += *ptmol; |
| 115 |
|
|
delete ptmol; |
| 116 |
|
|
return true; |
| 117 |
|
|
} |
| 118 |
|
|
} |
| 119 |
|
|
else |
| 120 |
|
|
delete pmol; |
| 121 |
|
|
|
| 122 |
|
|
// Normal operation - send molecule to be written |
| 123 |
|
|
ret = ret && pConv->AddChemObject(ptmol); //success of both writing and reading |
| 124 |
|
|
return ret; |
| 125 |
|
|
} |
| 126 |
|
|
|
| 127 |
|
|
bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat) |
| 128 |
|
|
{ |
| 129 |
|
|
if(pConv->IsOption("C",OBConversion::GENOPTIONS)) |
| 130 |
|
|
return OutputDeferredMols(pConv); |
| 131 |
|
|
if(pConv->IsOption("j",OBConversion::GENOPTIONS) |
| 132 |
|
|
|| pConv->IsOption("join",OBConversion::GENOPTIONS)) |
| 133 |
|
|
{ |
| 134 |
|
|
//arrives here at the end of a file |
| 135 |
|
|
if(!pConv->IsLast()) |
| 136 |
|
|
return true; |
| 137 |
|
|
bool ret=pFormat->WriteMolecule(_jmol,pConv); |
| 138 |
|
|
pConv->SetOutputIndex(1); |
| 139 |
|
|
delete _jmol; |
| 140 |
|
|
return ret; |
| 141 |
|
|
} |
| 142 |
|
|
|
| 143 |
|
|
|
| 144 |
|
|
//Retrieve the target OBMol |
| 145 |
|
|
OBBase* pOb = pConv->GetChemObject(); |
| 146 |
|
|
OBMol* pmol = dynamic_cast<OBMol*> (pOb); |
| 147 |
|
|
bool ret=false; |
| 148 |
|
|
if(pmol) |
| 149 |
|
|
{ |
| 150 |
|
|
if(pmol->NumAtoms()==0) |
| 151 |
|
|
{ |
| 152 |
|
|
std::string auditMsg = "OpenBabel::Molecule "; |
| 153 |
|
|
auditMsg += pmol->GetTitle(); |
| 154 |
|
|
auditMsg += " has 0 atoms"; |
| 155 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
| 156 |
|
|
auditMsg, |
| 157 |
|
|
obInfo); |
| 158 |
|
|
} |
| 159 |
|
|
ret=true; |
| 160 |
|
|
|
| 161 |
|
|
std::string auditMsg = "OpenBabel::Write molecule "; |
| 162 |
|
|
std::string description(pFormat->Description()); |
| 163 |
|
|
auditMsg += description.substr(0,description.find('\n')); |
| 164 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
| 165 |
|
|
auditMsg, |
| 166 |
|
|
obAuditMsg); |
| 167 |
|
|
|
| 168 |
|
|
ret=pFormat->WriteMolecule(pmol,pConv); |
| 169 |
|
|
} |
| 170 |
|
|
delete pOb; //move so that non-OBMol objects are deleted 9March2006 |
| 171 |
|
|
return ret; |
| 172 |
|
|
} |
| 173 |
|
|
|
| 174 |
|
|
/*! Instead of sending molecules for output via AddChemObject(), they are |
| 175 |
|
|
saved in here in OBMoleculeFormat or discarded. By default they are |
| 176 |
|
|
saved only if they are in the first input file. Parts of subsequent |
| 177 |
|
|
molecules, such as chemical structure, coordinates and OBGenericData |
| 178 |
|
|
can replace the parts in molecules with the same title that have already |
| 179 |
|
|
been stored, subject to a set of rules. After all input files have been |
| 180 |
|
|
read, the stored molecules (possibly now having augmented properties) are |
| 181 |
|
|
sent to the output format. |
| 182 |
|
|
|
| 183 |
|
|
Is a static function with <this> as parameter so that it can be called from other |
| 184 |
|
|
format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat. |
| 185 |
|
|
*/ |
| 186 |
|
|
bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF ) |
| 187 |
|
|
{ |
| 188 |
|
|
static bool IsFirstFile; |
| 189 |
|
|
bool OnlyMolsInFirstFile=true; |
| 190 |
|
|
|
| 191 |
|
|
if(pConv->IsFirstInput()) |
| 192 |
|
|
{ |
| 193 |
|
|
IsFirstFile=true; |
| 194 |
|
|
IMols.clear(); |
| 195 |
|
|
} |
| 196 |
|
|
else |
| 197 |
|
|
{ |
| 198 |
|
|
if((std::streamoff)pConv->GetInStream()->tellg()<=0) |
| 199 |
|
|
IsFirstFile=false;//File has changed |
| 200 |
|
|
} |
| 201 |
|
|
|
| 202 |
|
|
if (!pF->ReadMolecule(pmol,pConv)) |
| 203 |
|
|
{ |
| 204 |
|
|
delete pmol; |
| 205 |
|
|
return false; |
| 206 |
|
|
} |
| 207 |
|
|
const char* ptitle = pmol->GetTitle(); |
| 208 |
|
|
if(*ptitle==0) |
| 209 |
|
|
obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning); |
| 210 |
|
|
else |
| 211 |
|
|
{ |
| 212 |
|
|
string title(ptitle); |
| 213 |
|
|
string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended |
| 214 |
|
|
if(pos!=string::npos) |
| 215 |
|
|
title.erase(pos); |
| 216 |
|
|
|
| 217 |
|
|
map<std::string, OBMol*>::iterator itr; |
| 218 |
|
|
itr = IMols.find(title); |
| 219 |
|
|
if(itr!=IMols.end()) |
| 220 |
|
|
{ |
| 221 |
|
|
//Molecule with the same title has been input previously: update it |
| 222 |
|
|
OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol); |
| 223 |
|
|
if(pNewMol) |
| 224 |
|
|
{ |
| 225 |
|
|
delete itr->second; |
| 226 |
|
|
IMols[title] = pNewMol; |
| 227 |
|
|
} |
| 228 |
|
|
else |
| 229 |
|
|
{ |
| 230 |
|
|
//error: cleanup and return false |
| 231 |
|
|
delete pmol; |
| 232 |
|
|
return DeleteDeferredMols(); |
| 233 |
|
|
} |
| 234 |
|
|
} |
| 235 |
|
|
else |
| 236 |
|
|
{ |
| 237 |
|
|
//Molecule not already saved in IMols: save it if in first file |
| 238 |
|
|
if(!OnlyMolsInFirstFile || IsFirstFile) |
| 239 |
|
|
{ |
| 240 |
|
|
IMols[title] = pmol; |
| 241 |
|
|
return true; //don't delete pmol |
| 242 |
|
|
} |
| 243 |
|
|
} |
| 244 |
|
|
} |
| 245 |
|
|
delete pmol; |
| 246 |
|
|
return true; |
| 247 |
|
|
} |
| 248 |
|
|
|
| 249 |
|
|
/*! Makes a new OBMol on the heap by combining two molecules according to the rule below. |
| 250 |
|
|
If both have OBGenericData of the same type, or OBPairData with the |
| 251 |
|
|
same attribute, the version from pFirst is used. |
| 252 |
|
|
Returns a pointer to a new OBMol which will need deleting by the calling program |
| 253 |
|
|
(probably by being sent to an output format). |
| 254 |
|
|
If the molecules cannot be regarded as being the same structure a NULL |
| 255 |
|
|
pointer is returned and an error message logged. |
| 256 |
|
|
|
| 257 |
|
|
pFirst and pSecond and the objects they point to are not changed. (const |
| 258 |
|
|
modifiers difficult because class OBMol not designed appropriately) |
| 259 |
|
|
|
| 260 |
|
|
Combining molecules: rules for each of the three parts |
| 261 |
|
|
Title: |
| 262 |
|
|
Use the title of pFirst unless it has none, when use that of pSecond. |
| 263 |
|
|
Warning if neither molecule has a title. |
| 264 |
|
|
|
| 265 |
|
|
Structure |
| 266 |
|
|
- a structure with atoms replaces one with no atoms |
| 267 |
|
|
- a structure with bonds replaces one with no bonds, |
| 268 |
|
|
provided the formula is the same, else an error. |
| 269 |
|
|
- structures with atoms and bonds are compared by InChI; error if not the same. |
| 270 |
|
|
- a structure with 3D coordinates replaces one with 2D coordinates |
| 271 |
|
|
- a structure with 2D coordinates replace one with 0D coordinates |
| 272 |
|
|
|
| 273 |
|
|
OBGenericData |
| 274 |
|
|
OBPairData |
| 275 |
|
|
*/ |
| 276 |
|
|
OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond) |
| 277 |
|
|
{ |
| 278 |
|
|
string title("No title"); |
| 279 |
|
|
if(*pFirst->GetTitle()!=0) |
| 280 |
|
|
title = pFirst->GetTitle(); |
| 281 |
|
|
else |
| 282 |
|
|
{ |
| 283 |
|
|
if(*pSecond->GetTitle()!=0) |
| 284 |
|
|
title = pSecond->GetTitle(); |
| 285 |
|
|
else |
| 286 |
|
|
obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning); |
| 287 |
|
|
} |
| 288 |
|
|
|
| 289 |
|
|
bool swap=false; |
| 290 |
|
|
if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0) |
| 291 |
|
|
swap=true; |
| 292 |
|
|
else |
| 293 |
|
|
{ |
| 294 |
|
|
if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula()) |
| 295 |
|
|
{ |
| 296 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
| 297 |
|
|
"Molecules with name = " + title + " have different formula",obError); |
| 298 |
|
|
return NULL; |
| 299 |
|
|
} |
| 300 |
|
|
else |
| 301 |
|
|
{ |
| 302 |
|
|
if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0) |
| 303 |
|
|
swap=true; |
| 304 |
|
|
else |
| 305 |
|
|
{ |
| 306 |
|
|
//Compare by inchi; error if different NOT YET IMPLEMENTED |
| 307 |
|
|
//Use the one with the higher dimension |
| 308 |
|
|
if(pSecond->GetDimension() > pFirst->GetDimension()) |
| 309 |
|
|
swap=true; |
| 310 |
|
|
} |
| 311 |
|
|
} |
| 312 |
|
|
} |
| 313 |
|
|
|
| 314 |
|
|
OBMol* pNewMol = new OBMol; |
| 315 |
|
|
pNewMol->SetTitle(title); |
| 316 |
|
|
|
| 317 |
|
|
OBMol* pMain = swap ? pSecond : pFirst; |
| 318 |
|
|
OBMol* pOther = swap ? pFirst : pSecond; |
| 319 |
|
|
|
| 320 |
|
|
*pNewMol = *pMain; //Now copies all data |
| 321 |
|
|
|
| 322 |
|
|
//Copy some OBGenericData from the OBMol which did not provide the structure |
| 323 |
|
|
vector<OBGenericData*>::iterator igd; |
| 324 |
|
|
for(igd=pOther->BeginData();igd!=pOther->EndData();++igd) |
| 325 |
|
|
{ |
| 326 |
|
|
//copy only if not already data of the same type from molecule already copied to pNewMol |
| 327 |
|
|
unsigned datatype = (*igd)->GetDataType(); |
| 328 |
|
|
OBGenericData* pData = pNewMol->GetData(datatype); |
| 329 |
|
|
if(datatype==OBGenericDataType::PairData) |
| 330 |
|
|
{ |
| 331 |
|
|
if(pData->GetAttribute() == (*igd)->GetAttribute()) |
| 332 |
|
|
continue; |
| 333 |
|
|
} |
| 334 |
|
|
else if(pNewMol->GetData(datatype)!=NULL) |
| 335 |
|
|
continue; |
| 336 |
|
|
|
| 337 |
|
|
OBGenericData* pCopiedData = (*igd)->Clone(pNewMol); |
| 338 |
|
|
pNewMol->SetData(pCopiedData); |
| 339 |
|
|
} |
| 340 |
|
|
return pNewMol; |
| 341 |
|
|
} |
| 342 |
|
|
|
| 343 |
|
|
bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv) |
| 344 |
|
|
{ |
| 345 |
|
|
std::map<std::string, OBMol*>::iterator itr, lastitr; |
| 346 |
|
|
bool ret=false; |
| 347 |
|
|
int i=1; |
| 348 |
|
|
lastitr = IMols.end(); |
| 349 |
|
|
--lastitr; |
| 350 |
|
|
pConv->SetOneObjectOnly(false); |
| 351 |
|
|
for(itr=IMols.begin();itr!=IMols.end();++itr,++i) |
| 352 |
|
|
{ |
| 353 |
|
|
if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS))) |
| 354 |
|
|
continue; |
| 355 |
|
|
pConv->SetOutputIndex(i); |
| 356 |
|
|
if(itr==lastitr) |
| 357 |
|
|
pConv->SetOneObjectOnly(); //to set IsLast |
| 358 |
|
|
|
| 359 |
|
|
std::string auditMsg = "OpenBabel::Write molecule "; |
| 360 |
|
|
std::string description((pConv->GetOutFormat())->Description()); |
| 361 |
|
|
auditMsg += description.substr(0,description.find('\n')); |
| 362 |
|
|
obErrorLog.ThrowError(__FUNCTION__, auditMsg, obAuditMsg); |
| 363 |
|
|
|
| 364 |
|
|
ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv); |
| 365 |
|
|
|
| 366 |
|
|
delete itr->second; //always delete OBMol object |
| 367 |
|
|
itr->second = NULL; // so can be deleted in DeleteDeferredMols() |
| 368 |
|
|
if (!ret) break; |
| 369 |
|
|
} |
| 370 |
|
|
DeleteDeferredMols();//cleans up in case there have been errors |
| 371 |
|
|
return ret; |
| 372 |
|
|
} |
| 373 |
|
|
|
| 374 |
|
|
bool OBMoleculeFormat::DeleteDeferredMols() |
| 375 |
|
|
{ |
| 376 |
|
|
//Empties IMols, deteting the OBMol objects whose pointers are stored there |
| 377 |
|
|
std::map<std::string, OBMol*>::iterator itr; |
| 378 |
|
|
for(itr=IMols.begin();itr!=IMols.end();++itr) |
| 379 |
|
|
{ |
| 380 |
|
|
delete itr->second; //usually NULL |
| 381 |
|
|
} |
| 382 |
|
|
IMols.clear(); |
| 383 |
|
|
return false; |
| 384 |
|
|
} |
| 385 |
|
|
|
| 386 |
|
|
////////////////////////////////////////////////////////////////// |
| 387 |
|
|
/** Attempts to read the index file datafilename.obindx successively |
| 388 |
|
|
from the following directories: |
| 389 |
|
|
- the current directory |
| 390 |
|
|
- that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR |
| 391 |
|
|
if the environment variable is not set |
| 392 |
|
|
- in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name |
| 393 |
|
|
An index of type NameIndexType is then constructed. NameIndexType is defined |
| 394 |
|
|
in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map. |
| 395 |
|
|
In any case it is searched by |
| 396 |
|
|
@code |
| 397 |
|
|
NameIndexType::iterator itr = index.find(molecule_name); |
| 398 |
|
|
if(itr!=index.end()) |
| 399 |
|
|
unsigned pos_in_datafile = itr->second; |
| 400 |
|
|
@endcode |
| 401 |
|
|
pos_in_datafile is used as a paramter in seekg() to read from the datafile |
| 402 |
|
|
|
| 403 |
|
|
If no index is found, it is constructed from the datafile by reading all of |
| 404 |
|
|
it using the format pInFormat, and written to the directory containing the datafile. |
| 405 |
|
|
This means that this function can be used without worrying whether there is an index. |
| 406 |
|
|
It will be slow to execute the first time, but subsequent uses get the speed benefit |
| 407 |
|
|
of indexed access to the datafile. |
| 408 |
|
|
|
| 409 |
|
|
The serialization and de-serialization of the NameIndexType is entirely in |
| 410 |
|
|
this routine and could possibly be improved. Currently re-hashing is done |
| 411 |
|
|
every time the index is read. |
| 412 |
|
|
**/ |
| 413 |
|
|
bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index, |
| 414 |
|
|
const string& datafilename, OBFormat* pInFormat) |
| 415 |
|
|
{ |
| 416 |
|
|
struct headertype |
| 417 |
|
|
{ |
| 418 |
|
|
char filename[256]; |
| 419 |
|
|
unsigned size; |
| 420 |
|
|
} header; |
| 421 |
|
|
|
| 422 |
|
|
NameIndexType::iterator itr; |
| 423 |
|
|
|
| 424 |
|
|
ifstream indexstream; |
| 425 |
|
|
OpenDatafile(indexstream, datafilename + ".obindx"); |
| 426 |
|
|
if(!indexstream) |
| 427 |
|
|
{ |
| 428 |
|
|
//Need to prepare the index |
| 429 |
|
|
ifstream datastream; |
| 430 |
|
|
string datafilepath = OpenDatafile(datastream, datafilename); |
| 431 |
|
|
if(!datastream) |
| 432 |
|
|
{ |
| 433 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
| 434 |
|
|
datafilepath + " was not found or could not be opened", obError); |
| 435 |
|
|
return false; |
| 436 |
|
|
} |
| 437 |
|
|
|
| 438 |
|
|
OBConversion Conv(&datastream,NULL); |
| 439 |
|
|
Conv.SetInFormat(pInFormat); |
| 440 |
|
|
OBMol mol; |
| 441 |
|
|
streampos pos; |
| 442 |
|
|
while(Conv.Read(&mol)) |
| 443 |
|
|
{ |
| 444 |
|
|
string name = mol.GetTitle(); |
| 445 |
|
|
if(!name.empty()) |
| 446 |
|
|
index.insert(make_pair(name, pos)); |
| 447 |
|
|
mol.Clear(); |
| 448 |
|
|
pos = datastream.tellg(); |
| 449 |
|
|
} |
| 450 |
|
|
obErrorLog.ThrowError(__FUNCTION__, |
| 451 |
|
|
"Prepared an index for " + datafilepath, obAuditMsg); |
| 452 |
|
|
//Save index to file |
| 453 |
|
|
ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary); |
| 454 |
|
|
if(!dofs) return false; |
| 455 |
|
|
|
| 456 |
|
|
strncpy(header.filename,datafilename.c_str(), sizeof(header.filename)); |
| 457 |
|
|
header.filename[sizeof(header.filename) - 1] = '\0'; |
| 458 |
|
|
header.size = index.size(); |
| 459 |
|
|
dofs.write((const char*)&header, sizeof(headertype)); |
| 460 |
|
|
|
| 461 |
|
|
for(itr=index.begin();itr!=index.end();++itr) |
| 462 |
|
|
{ |
| 463 |
|
|
//#chars; chars; ofset(4bytes). |
| 464 |
|
|
const char n = itr->first.size(); |
| 465 |
|
|
dofs.put(n); |
| 466 |
|
|
dofs.write(itr->first.c_str(),n); |
| 467 |
|
|
dofs.write((const char*)&itr->second,sizeof(unsigned)); |
| 468 |
|
|
} |
| 469 |
|
|
} |
| 470 |
|
|
else |
| 471 |
|
|
{ |
| 472 |
|
|
//Read index data from file and put into hash_map |
| 473 |
|
|
indexstream.read((char*)&header,sizeof(headertype)); |
| 474 |
|
|
itr=index.begin(); // for hint |
| 475 |
|
|
for(unsigned int i=0;i<header.size;++i) |
| 476 |
|
|
{ |
| 477 |
|
|
char len; |
| 478 |
|
|
indexstream.get(len); |
| 479 |
|
|
string title(len, 0); |
| 480 |
|
|
unsigned pos; |
| 481 |
|
|
indexstream.read(&title[0],len); |
| 482 |
|
|
indexstream.read((char*)&pos,sizeof(unsigned)); |
| 483 |
|
|
index.insert(itr, make_pair(title,pos)); |
| 484 |
|
|
} |
| 485 |
|
|
} |
| 486 |
|
|
return true; |
| 487 |
|
|
} |
| 488 |
|
|
|
| 489 |
|
|
|
| 490 |
|
|
} //namespace OpenBabel |