| 1 | tim | 741 | /********************************************************************** | 
| 2 |  |  | data.cpp - Global data and resource file parsers. | 
| 3 |  |  |  | 
| 4 |  |  | Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. | 
| 5 |  |  | Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison | 
| 6 |  |  |  | 
| 7 |  |  | This file is part of the Open Babel project. | 
| 8 |  |  | For more information, see <http://openbabel.sourceforge.net/> | 
| 9 |  |  |  | 
| 10 |  |  | This program is free software; you can redistribute it and/or modify | 
| 11 |  |  | it under the terms of the GNU General Public License as published by | 
| 12 |  |  | the Free Software Foundation version 2 of the License. | 
| 13 |  |  |  | 
| 14 |  |  | This program is distributed in the hope that it will be useful, | 
| 15 |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 16 |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 17 |  |  | GNU General Public License for more details. | 
| 18 |  |  | ***********************************************************************/ | 
| 19 |  |  |  | 
| 20 |  |  | #ifdef WIN32 | 
| 21 |  |  | #pragma warning (disable : 4786) | 
| 22 |  |  | #endif | 
| 23 |  |  |  | 
| 24 |  |  | #include "babelconfig.hpp" | 
| 25 |  |  | #include "data.hpp" | 
| 26 |  |  | #include "mol.hpp" | 
| 27 |  |  |  | 
| 28 |  |  | // data headers | 
| 29 |  |  | #include "element.hpp" | 
| 30 |  |  | #include "types.hpp" | 
| 31 |  |  | #include "isotope.hpp" | 
| 32 |  |  | #include "resdata.hpp" | 
| 33 |  |  |  | 
| 34 |  |  |  | 
| 35 |  |  | #if !HAVE_STRNCASECMP | 
| 36 |  |  | extern "C" int strncasecmp(const char *s1, const char *s2, size_t n); | 
| 37 |  |  | #endif | 
| 38 |  |  |  | 
| 39 |  |  | using namespace std; | 
| 40 |  |  |  | 
| 41 |  |  | namespace OpenBabel | 
| 42 |  |  | { | 
| 43 |  |  |  | 
| 44 |  |  | OBElementTable   etab; | 
| 45 |  |  | OBTypeTable      ttab; | 
| 46 |  |  | OBIsotopeTable   isotab; | 
| 47 |  |  | OBResidueData    resdat; | 
| 48 |  |  |  | 
| 49 |  |  | /** \class OBElementTable | 
| 50 |  |  | \brief Periodic Table of the Elements | 
| 51 |  |  |  | 
| 52 |  |  | Translating element data is a common task given that many file | 
| 53 |  |  | formats give either element symbol or atomic number information, but | 
| 54 |  |  | not both. The OBElementTable class facilitates conversion between | 
| 55 |  |  | textual and numeric element information. An instance of the | 
| 56 |  |  | OBElementTable class (etab) is declared as external in data.cpp. Source | 
| 57 |  |  | files that include the header file mol.h automatically have an extern | 
| 58 |  |  | definition to etab. The following code sample demonstrates the use | 
| 59 |  |  | of the OBElementTable class: | 
| 60 |  |  | \code | 
| 61 |  |  | cout << "The symbol for element 6 is " << etab.GetSymbol(6) << endl; | 
| 62 |  |  | cout << "The atomic number for Sulfur is " << etab.GetAtomicNum(16) << endl; | 
| 63 |  |  | cout << "The van der Waal radius for Nitrogen is " << etab.GetVdwRad(7); | 
| 64 |  |  | \endcode | 
| 65 |  |  |  | 
| 66 |  |  | Stored information in the OBElementTable includes elemental: | 
| 67 |  |  | - symbols | 
| 68 |  |  | - covalent radii | 
| 69 |  |  | - van der Waal radii | 
| 70 |  |  | - expected maximum bonding valence | 
| 71 |  |  | - molar mass (by IUPAC recommended atomic masses) | 
| 72 |  |  | - electronegativity | 
| 73 |  |  | - ionization potential | 
| 74 |  |  | - electron affinity | 
| 75 |  |  | - RGB colors for visualization programs | 
| 76 |  |  | - names (by IUPAC recommendation) | 
| 77 |  |  | */ | 
| 78 |  |  |  | 
| 79 |  |  | OBElementTable::OBElementTable() | 
| 80 |  |  | { | 
| 81 |  |  | _init = false; | 
| 82 |  |  | _dir = BABEL_DATADIR; | 
| 83 |  |  | _envvar = "BABEL_DATADIR"; | 
| 84 |  |  | _filename = "element.txt"; | 
| 85 |  |  | _subdir = "data"; | 
| 86 |  |  | _dataptr = ElementData; | 
| 87 |  |  | } | 
| 88 |  |  |  | 
| 89 |  |  | OBElementTable::~OBElementTable() | 
| 90 |  |  | { | 
| 91 |  |  | vector<OBElement*>::iterator i; | 
| 92 |  |  | for (i = _element.begin();i != _element.end();i++) | 
| 93 |  |  | delete *i; | 
| 94 |  |  | } | 
| 95 |  |  |  | 
| 96 |  |  | void OBElementTable::ParseLine(const char *buffer) | 
| 97 |  |  | { | 
| 98 |  |  | int num,maxbonds; | 
| 99 |  |  | char symbol[5]; | 
| 100 |  |  | char name[BUFF_SIZE]; | 
| 101 |  |  | double Rcov,Rvdw,mass, elNeg, ionize, elAffin; | 
| 102 |  |  | double red, green, blue; | 
| 103 |  |  |  | 
| 104 |  |  | if (buffer[0] != '#') // skip comment line (at the top) | 
| 105 |  |  | { | 
| 106 |  |  | sscanf(buffer,"%d %s %lf %*f %lf %d %lf %lf %lf %lf %lf %lf %lf %s", | 
| 107 |  |  | &num, | 
| 108 |  |  | symbol, | 
| 109 |  |  | &Rcov, | 
| 110 |  |  | &Rvdw, | 
| 111 |  |  | &maxbonds, | 
| 112 |  |  | &mass, | 
| 113 |  |  | &elNeg, | 
| 114 |  |  | &ionize, | 
| 115 |  |  | &elAffin, | 
| 116 |  |  | &red, | 
| 117 |  |  | &green, | 
| 118 |  |  | &blue, | 
| 119 |  |  | name); | 
| 120 |  |  |  | 
| 121 |  |  | OBElement *ele = new OBElement(num,symbol,Rcov,Rvdw,maxbonds,mass,elNeg, | 
| 122 |  |  | ionize, elAffin, red, green, blue, name); | 
| 123 |  |  | _element.push_back(ele); | 
| 124 |  |  | } | 
| 125 |  |  | } | 
| 126 |  |  |  | 
| 127 |  |  | unsigned int OBElementTable::GetNumberOfElements() | 
| 128 |  |  | { | 
| 129 |  |  | if (!_init) | 
| 130 |  |  | Init(); | 
| 131 |  |  |  | 
| 132 |  |  | return _element.size(); | 
| 133 |  |  | } | 
| 134 |  |  |  | 
| 135 |  |  | char *OBElementTable::GetSymbol(int atomicnum) | 
| 136 |  |  | { | 
| 137 |  |  | if (!_init) | 
| 138 |  |  | Init(); | 
| 139 |  |  |  | 
| 140 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 141 |  |  | return("\0"); | 
| 142 |  |  |  | 
| 143 |  |  | return(_element[atomicnum]->GetSymbol()); | 
| 144 |  |  | } | 
| 145 |  |  |  | 
| 146 |  |  | int OBElementTable::GetMaxBonds(int atomicnum) | 
| 147 |  |  | { | 
| 148 |  |  | if (!_init) | 
| 149 |  |  | Init(); | 
| 150 |  |  |  | 
| 151 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 152 |  |  | return(0); | 
| 153 |  |  |  | 
| 154 |  |  | return(_element[atomicnum]->GetMaxBonds()); | 
| 155 |  |  | } | 
| 156 |  |  |  | 
| 157 |  |  | double OBElementTable::GetElectroNeg(int atomicnum) | 
| 158 |  |  | { | 
| 159 |  |  | if (!_init) | 
| 160 |  |  | Init(); | 
| 161 |  |  |  | 
| 162 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 163 |  |  | return(0.0); | 
| 164 |  |  |  | 
| 165 |  |  | return(_element[atomicnum]->GetElectroNeg()); | 
| 166 |  |  | } | 
| 167 |  |  |  | 
| 168 |  |  | double OBElementTable::GetIonization(int atomicnum) | 
| 169 |  |  | { | 
| 170 |  |  | if (!_init) | 
| 171 |  |  | Init(); | 
| 172 |  |  |  | 
| 173 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 174 |  |  | return(0.0); | 
| 175 |  |  |  | 
| 176 |  |  | return(_element[atomicnum]->GetIonization()); | 
| 177 |  |  | } | 
| 178 |  |  |  | 
| 179 |  |  |  | 
| 180 |  |  | double OBElementTable::GetElectronAffinity(int atomicnum) | 
| 181 |  |  | { | 
| 182 |  |  | if (!_init) | 
| 183 |  |  | Init(); | 
| 184 |  |  |  | 
| 185 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 186 |  |  | return(0.0); | 
| 187 |  |  |  | 
| 188 |  |  | return(_element[atomicnum]->GetElectronAffinity()); | 
| 189 |  |  | } | 
| 190 |  |  |  | 
| 191 |  |  | vector<double> OBElementTable::GetRGB(int atomicnum) | 
| 192 |  |  | { | 
| 193 |  |  | if (!_init) | 
| 194 |  |  | Init(); | 
| 195 |  |  |  | 
| 196 |  |  | vector <double> colors; | 
| 197 |  |  | colors.reserve(3); | 
| 198 |  |  |  | 
| 199 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 200 |  |  | { | 
| 201 |  |  | colors.push_back(0.0f); | 
| 202 |  |  | colors.push_back(0.0f); | 
| 203 |  |  | colors.push_back(0.0f); | 
| 204 |  |  | return(colors); | 
| 205 |  |  | } | 
| 206 |  |  |  | 
| 207 |  |  | colors.push_back(_element[atomicnum]->GetRed()); | 
| 208 |  |  | colors.push_back(_element[atomicnum]->GetGreen()); | 
| 209 |  |  | colors.push_back(_element[atomicnum]->GetBlue()); | 
| 210 |  |  |  | 
| 211 |  |  | return (colors); | 
| 212 |  |  | } | 
| 213 |  |  |  | 
| 214 |  |  | string OBElementTable::GetName(int atomicnum) | 
| 215 |  |  | { | 
| 216 |  |  | if (!_init) | 
| 217 |  |  | Init(); | 
| 218 |  |  |  | 
| 219 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 220 |  |  | return("Unknown"); | 
| 221 |  |  |  | 
| 222 |  |  | return(_element[atomicnum]->GetName()); | 
| 223 |  |  | } | 
| 224 |  |  |  | 
| 225 |  |  | double OBElementTable::GetVdwRad(int atomicnum) | 
| 226 |  |  | { | 
| 227 |  |  | if (!_init) | 
| 228 |  |  | Init(); | 
| 229 |  |  |  | 
| 230 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 231 |  |  | return(0.0); | 
| 232 |  |  |  | 
| 233 |  |  | return(_element[atomicnum]->GetVdwRad()); | 
| 234 |  |  | } | 
| 235 |  |  |  | 
| 236 |  |  | double OBElementTable::CorrectedBondRad(int atomicnum, int hyb) | 
| 237 |  |  | { | 
| 238 |  |  | double rad; | 
| 239 |  |  | if (!_init) | 
| 240 |  |  | Init(); | 
| 241 |  |  |  | 
| 242 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 243 |  |  | return(1.0); | 
| 244 |  |  |  | 
| 245 |  |  | rad = _element[atomicnum]->GetCovalentRad(); | 
| 246 |  |  |  | 
| 247 |  |  | if (hyb == 2) | 
| 248 |  |  | rad *= 0.95; | 
| 249 |  |  | else if (hyb == 1) | 
| 250 |  |  | rad *= 0.90; | 
| 251 |  |  |  | 
| 252 |  |  | return(rad); | 
| 253 |  |  | } | 
| 254 |  |  |  | 
| 255 |  |  | double OBElementTable::CorrectedVdwRad(int atomicnum, int hyb) | 
| 256 |  |  | { | 
| 257 |  |  | double rad; | 
| 258 |  |  | if (!_init) | 
| 259 |  |  | Init(); | 
| 260 |  |  |  | 
| 261 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 262 |  |  | return(1.95); | 
| 263 |  |  |  | 
| 264 |  |  | rad = _element[atomicnum]->GetVdwRad(); | 
| 265 |  |  |  | 
| 266 |  |  | if (hyb == 2) | 
| 267 |  |  | rad *= 0.95; | 
| 268 |  |  | else if (hyb == 1) | 
| 269 |  |  | rad *= 0.90; | 
| 270 |  |  |  | 
| 271 |  |  | return(rad); | 
| 272 |  |  | } | 
| 273 |  |  |  | 
| 274 |  |  | double OBElementTable::GetCovalentRad(int atomicnum) | 
| 275 |  |  | { | 
| 276 |  |  | if (!_init) | 
| 277 |  |  | Init(); | 
| 278 |  |  |  | 
| 279 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 280 |  |  | return(0.0); | 
| 281 |  |  |  | 
| 282 |  |  | return(_element[atomicnum]->GetCovalentRad()); | 
| 283 |  |  | } | 
| 284 |  |  |  | 
| 285 |  |  | double OBElementTable::GetMass(int atomicnum) | 
| 286 |  |  | { | 
| 287 |  |  | if (!_init) | 
| 288 |  |  | Init(); | 
| 289 |  |  |  | 
| 290 |  |  | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 291 |  |  | return(0.0); | 
| 292 |  |  |  | 
| 293 |  |  | return(_element[atomicnum]->GetMass()); | 
| 294 |  |  | } | 
| 295 |  |  |  | 
| 296 |  |  | int OBElementTable::GetAtomicNum(const char *sym) | 
| 297 |  |  | { | 
| 298 |  |  | int temp; | 
| 299 |  |  | return GetAtomicNum(sym, temp); | 
| 300 |  |  | } | 
| 301 |  |  |  | 
| 302 |  |  | int OBElementTable::GetAtomicNum(const char *sym, int &iso) | 
| 303 |  |  | { | 
| 304 |  |  | if (!_init) | 
| 305 |  |  | Init(); | 
| 306 |  |  |  | 
| 307 |  |  | vector<OBElement*>::iterator i; | 
| 308 |  |  | for (i = _element.begin();i != _element.end();i++) | 
| 309 |  |  | if (!strncasecmp(sym,(*i)->GetSymbol(),2)) | 
| 310 |  |  | return((*i)->GetAtomicNum()); | 
| 311 |  |  | if (strcasecmp(sym, "D") == 0) | 
| 312 |  |  | { | 
| 313 |  |  | iso = 2; | 
| 314 |  |  | return(1); | 
| 315 |  |  | } | 
| 316 |  |  | else if (strcasecmp(sym, "T") == 0) | 
| 317 |  |  | { | 
| 318 |  |  | iso = 3; | 
| 319 |  |  | return(1); | 
| 320 |  |  | } | 
| 321 |  |  | else | 
| 322 |  |  | iso = 0; | 
| 323 |  |  | return(0); | 
| 324 |  |  | } | 
| 325 |  |  |  | 
| 326 |  |  | /** \class OBIsotopeTable | 
| 327 |  |  | \brief Table of atomic isotope masses | 
| 328 |  |  |  | 
| 329 |  |  | */ | 
| 330 |  |  |  | 
| 331 |  |  | OBIsotopeTable::OBIsotopeTable() | 
| 332 |  |  | { | 
| 333 |  |  | _init = false; | 
| 334 |  |  | _dir = BABEL_DATADIR; | 
| 335 |  |  | _envvar = "BABEL_DATADIR"; | 
| 336 |  |  | _filename = "isotope.txt"; | 
| 337 |  |  | _subdir = "data"; | 
| 338 |  |  | _dataptr = IsotopeData; | 
| 339 |  |  | } | 
| 340 |  |  |  | 
| 341 |  |  | void OBIsotopeTable::ParseLine(const char *buffer) | 
| 342 |  |  | { | 
| 343 |  |  | unsigned int atomicNum; | 
| 344 |  |  | unsigned int i; | 
| 345 |  |  | vector<string> vs; | 
| 346 |  |  |  | 
| 347 |  |  | pair <unsigned int, double> entry; | 
| 348 |  |  | vector <pair <unsigned int, double> > row; | 
| 349 |  |  |  | 
| 350 |  |  | if (buffer[0] != '#') // skip comment line (at the top) | 
| 351 |  |  | { | 
| 352 |  |  | tokenize(vs,buffer); | 
| 353 |  |  | if (vs.size() > 3) // atomic number, 0, most abundant mass (...) | 
| 354 |  |  | { | 
| 355 |  |  | atomicNum = atoi(vs[0].c_str()); | 
| 356 |  |  | for (i = 1; i < vs.size() - 1; i += 2) // make sure i+1 still exists | 
| 357 |  |  | { | 
| 358 |  |  | entry.first = atoi(vs[i].c_str()); // isotope | 
| 359 |  |  | entry.second = atof(vs[i + 1].c_str()); // exact mass | 
| 360 |  |  | row.push_back(entry); | 
| 361 |  |  | } | 
| 362 |  |  | _isotopes.push_back(row); | 
| 363 |  |  | } | 
| 364 |  |  | else | 
| 365 |  |  | obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in isotope table isotope.txt", obInfo); | 
| 366 |  |  | } | 
| 367 |  |  | } | 
| 368 |  |  |  | 
| 369 |  |  | double  OBIsotopeTable::GetExactMass(const unsigned int ele, | 
| 370 |  |  | const unsigned int isotope) | 
| 371 |  |  | { | 
| 372 |  |  | if (!_init) | 
| 373 |  |  | Init(); | 
| 374 |  |  |  | 
| 375 |  |  | if (ele > _isotopes.size()) | 
| 376 |  |  | return 0.0; | 
| 377 |  |  |  | 
| 378 |  |  | unsigned int iso; | 
| 379 |  |  | for (iso = 0; iso < _isotopes[ele].size(); iso++) | 
| 380 |  |  | if (isotope == _isotopes[ele][iso].first) | 
| 381 |  |  | return _isotopes[ele][iso].second; | 
| 382 |  |  |  | 
| 383 |  |  | return 0.0; | 
| 384 |  |  | } | 
| 385 |  |  |  | 
| 386 |  |  | /** \class OBTypeTable | 
| 387 |  |  | \brief Atom Type Translation Table | 
| 388 |  |  |  | 
| 389 |  |  | Molecular file formats frequently store information about atoms in an | 
| 390 |  |  | atom type field. Some formats store only the element for each atom, | 
| 391 |  |  | while others include hybridization and local environments, such as the | 
| 392 |  |  | Sybyl mol2 atom type field. The OBTypeTable class acts as a translation | 
| 393 |  |  | table to convert atom types between a number of different molecular | 
| 394 |  |  | file formats. The constructor for OBTypeTable automatically reads the | 
| 395 |  |  | text file types.txt. Just as OBElementTable, an instance of | 
| 396 |  |  | OBTypeTable (ttab) is declared external in data.cpp and is referenced as | 
| 397 |  |  | extern OBTypeTable ttab in mol.h.  The following code demonstrates how | 
| 398 |  |  | to use the OBTypeTable class to translate the internal representation | 
| 399 |  |  | of atom types in an OBMol Internal to Sybyl Mol2 atom types. | 
| 400 |  |  |  | 
| 401 |  |  | \code | 
| 402 |  |  | ttab.SetFromType("INT"); | 
| 403 |  |  | ttab.SetToType("SYB"); | 
| 404 |  |  | OBAtom *atom; | 
| 405 |  |  | vector<OBAtom*>::iterator i; | 
| 406 |  |  | string src,dst; | 
| 407 |  |  | for (atom = mol.BeginAtom(i);atom;atom = mol.EndAtom(i)) | 
| 408 |  |  | { | 
| 409 |  |  | src = atom->GetType(); | 
| 410 |  |  | ttab.Translate(dst,src); | 
| 411 |  |  | cout << "atom number " << atom->GetIdx() << "has mol2 type " << dst << endl; | 
| 412 |  |  | } | 
| 413 |  |  | \endcode | 
| 414 |  |  |  | 
| 415 |  |  | Current atom types include (defined in the top line of the data file types.txt): | 
| 416 |  |  | - INT (Open Babel internal codes) | 
| 417 |  |  | - ATN (atomic numbers) | 
| 418 |  |  | - HYB (hybridization) | 
| 419 |  |  | - MMD | 
| 420 |  |  | - MM2 (MM2 force field) | 
| 421 |  |  | - XYZ (element symbols from XYZ file format) | 
| 422 |  |  | - ALC (Alchemy file) | 
| 423 |  |  | - HAD | 
| 424 |  |  | - MCML | 
| 425 |  |  | - C3D (Chem3D) | 
| 426 |  |  | - SYB (Sybyl mol2) | 
| 427 |  |  | - MOL | 
| 428 |  |  | - MAP | 
| 429 |  |  | - DRE | 
| 430 |  |  | - XED (XED format) | 
| 431 |  |  | - DOK (Dock) | 
| 432 |  |  | - M3D | 
| 433 |  |  | */ | 
| 434 |  |  |  | 
| 435 |  |  | OBTypeTable::OBTypeTable() | 
| 436 |  |  | { | 
| 437 |  |  | _init = false; | 
| 438 |  |  | _dir = BABEL_DATADIR; | 
| 439 |  |  | _envvar = "BABEL_DATADIR"; | 
| 440 |  |  | _filename = "types.txt"; | 
| 441 |  |  | _subdir = "data"; | 
| 442 |  |  | _dataptr = TypesData; | 
| 443 |  |  | _linecount = 0; | 
| 444 |  |  | _from = _to = -1; | 
| 445 |  |  | } | 
| 446 |  |  |  | 
| 447 |  |  | void OBTypeTable::ParseLine(const char *buffer) | 
| 448 |  |  | { | 
| 449 |  |  | if (buffer[0] == '#') | 
| 450 |  |  | return; // just a comment line | 
| 451 |  |  |  | 
| 452 |  |  | if (_linecount == 0) | 
| 453 |  |  | sscanf(buffer,"%d%d",&_nrows,&_ncols); | 
| 454 |  |  | else if (_linecount == 1) | 
| 455 |  |  | tokenize(_colnames,buffer); | 
| 456 |  |  | else | 
| 457 |  |  | { | 
| 458 |  |  | vector<string> vc; | 
| 459 |  |  | tokenize(vc,buffer); | 
| 460 |  |  | if (vc.size() == (unsigned)_ncols) | 
| 461 |  |  | _table.push_back(vc); | 
| 462 |  |  | else | 
| 463 |  |  | { | 
| 464 |  |  | stringstream errorMsg; | 
| 465 |  |  | errorMsg << " Could not parse line in type translation table types.txt -- incorect number of columns"; | 
| 466 |  |  | errorMsg << " found " << vc.size() << " expected " << _ncols << "."; | 
| 467 |  |  | obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo); | 
| 468 |  |  | } | 
| 469 |  |  | } | 
| 470 |  |  | _linecount++; | 
| 471 |  |  | } | 
| 472 |  |  |  | 
| 473 |  |  | bool OBTypeTable::SetFromType(const char* from) | 
| 474 |  |  | { | 
| 475 |  |  | if (!_init) | 
| 476 |  |  | Init(); | 
| 477 |  |  |  | 
| 478 |  |  | string tmp = from; | 
| 479 |  |  |  | 
| 480 |  |  | unsigned int i; | 
| 481 |  |  | for (i = 0;i < _colnames.size();i++) | 
| 482 |  |  | if (tmp == _colnames[i]) | 
| 483 |  |  | { | 
| 484 |  |  | _from = i; | 
| 485 |  |  | return(true); | 
| 486 |  |  | } | 
| 487 |  |  |  | 
| 488 |  |  | obErrorLog.ThrowError(__FUNCTION__, "Requested type column not found", obInfo); | 
| 489 |  |  |  | 
| 490 |  |  | return(false); | 
| 491 |  |  | } | 
| 492 |  |  |  | 
| 493 |  |  | bool OBTypeTable::SetToType(const char* to) | 
| 494 |  |  | { | 
| 495 |  |  | if (!_init) | 
| 496 |  |  | Init(); | 
| 497 |  |  |  | 
| 498 |  |  | string tmp = to; | 
| 499 |  |  |  | 
| 500 |  |  | unsigned int i; | 
| 501 |  |  | for (i = 0;i < _colnames.size();i++) | 
| 502 |  |  | if (tmp == _colnames[i]) | 
| 503 |  |  | { | 
| 504 |  |  | _to = i; | 
| 505 |  |  | return(true); | 
| 506 |  |  | } | 
| 507 |  |  |  | 
| 508 |  |  | obErrorLog.ThrowError(__FUNCTION__, "Requested type column not found", obInfo); | 
| 509 |  |  |  | 
| 510 |  |  | return(false); | 
| 511 |  |  | } | 
| 512 |  |  |  | 
| 513 |  |  | bool OBTypeTable::Translate(char *to, const char *from) | 
| 514 |  |  | { | 
| 515 |  |  | if (!_init) | 
| 516 |  |  | Init(); | 
| 517 |  |  |  | 
| 518 |  |  | bool rval; | 
| 519 |  |  | string sto,sfrom; | 
| 520 |  |  | sfrom = from; | 
| 521 |  |  | rval = Translate(sto,sfrom); | 
| 522 |  |  | strcpy(to,(char*)sto.c_str()); | 
| 523 |  |  |  | 
| 524 |  |  | return(rval); | 
| 525 |  |  | } | 
| 526 |  |  |  | 
| 527 |  |  | bool OBTypeTable::Translate(string &to, const string &from) | 
| 528 |  |  | { | 
| 529 |  |  | if (!_init) | 
| 530 |  |  | Init(); | 
| 531 |  |  |  | 
| 532 |  |  | if (from == "") | 
| 533 |  |  | return(false); | 
| 534 |  |  |  | 
| 535 |  |  | if (_from >= 0 && _to >= 0 && | 
| 536 |  |  | _from < _table.size() && _to < _table.size()) | 
| 537 |  |  | { | 
| 538 |  |  | vector<vector<string> >::iterator i; | 
| 539 |  |  | for (i = _table.begin();i != _table.end();i++) | 
| 540 |  |  | if ((signed)(*i).size() > _from &&  (*i)[_from] == from) | 
| 541 |  |  | { | 
| 542 |  |  | to = (*i)[_to]; | 
| 543 |  |  | return(true); | 
| 544 |  |  | } | 
| 545 |  |  | } | 
| 546 |  |  |  | 
| 547 |  |  | // Throw an error, copy the string and return false | 
| 548 |  |  | obErrorLog.ThrowError(__FUNCTION__, "Cannot perform atom type translation: table cannot find requested types.", obWarning); | 
| 549 |  |  | to = from; | 
| 550 |  |  | return(false); | 
| 551 |  |  | } | 
| 552 |  |  |  | 
| 553 |  |  | std::string OBTypeTable::GetFromType() | 
| 554 |  |  | { | 
| 555 |  |  | if (!_init) | 
| 556 |  |  | Init(); | 
| 557 |  |  |  | 
| 558 |  |  | if (_from > 0 && _from < _table.size()) | 
| 559 |  |  | return( _colnames[_from] ); | 
| 560 |  |  | else | 
| 561 |  |  | return( _colnames[0] ); | 
| 562 |  |  | } | 
| 563 |  |  |  | 
| 564 |  |  | std::string OBTypeTable::GetToType() | 
| 565 |  |  | { | 
| 566 |  |  | if (!_init) | 
| 567 |  |  | Init(); | 
| 568 |  |  |  | 
| 569 |  |  | if (_to > 0 && _to < _table.size()) | 
| 570 |  |  | return( _colnames[_to] ); | 
| 571 |  |  | else | 
| 572 |  |  | return( _colnames[0] ); | 
| 573 |  |  | } | 
| 574 |  |  |  | 
| 575 |  |  | void Toupper(string &s) | 
| 576 |  |  | { | 
| 577 |  |  | unsigned int i; | 
| 578 |  |  | for (i = 0;i < s.size();i++) | 
| 579 |  |  | s[i] = toupper(s[i]); | 
| 580 |  |  | } | 
| 581 |  |  |  | 
| 582 |  |  | void Tolower(string &s) | 
| 583 |  |  | { | 
| 584 |  |  | unsigned int i; | 
| 585 |  |  | for (i = 0;i < s.size();i++) | 
| 586 |  |  | s[i] = tolower(s[i]); | 
| 587 |  |  | } | 
| 588 |  |  |  | 
| 589 |  |  | /////////////////////////////////////////////////////////////////////// | 
| 590 |  |  | OBResidueData::OBResidueData() | 
| 591 |  |  | { | 
| 592 |  |  | _init = false; | 
| 593 |  |  | _dir = BABEL_DATADIR; | 
| 594 |  |  | _envvar = "BABEL_DATADIR"; | 
| 595 |  |  | _filename = "resdata.txt"; | 
| 596 |  |  | _subdir = "data"; | 
| 597 |  |  | _dataptr = ResidueData; | 
| 598 |  |  | } | 
| 599 |  |  |  | 
| 600 |  |  | bool OBResidueData::AssignBonds(OBMol &mol,OBBitVec &bv) | 
| 601 |  |  | { | 
| 602 |  |  | OBAtom *a1,*a2; | 
| 603 |  |  | OBResidue *r1,*r2; | 
| 604 |  |  | vector<OBNodeBase*>::iterator i,j; | 
| 605 |  |  | vector3 v; | 
| 606 |  |  |  | 
| 607 |  |  | int bo; | 
| 608 |  |  | unsigned int skipres=0; | 
| 609 |  |  | string rname = ""; | 
| 610 |  |  | //assign residue bonds | 
| 611 |  |  | for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) | 
| 612 |  |  | { | 
| 613 |  |  | r1 = a1->GetResidue(); | 
| 614 |  |  | if (skipres && r1->GetNum() == skipres) | 
| 615 |  |  | continue; | 
| 616 |  |  |  | 
| 617 |  |  | if (r1->GetName() != rname) | 
| 618 |  |  | { | 
| 619 |  |  | skipres = SetResName(r1->GetName()) ? 0 : r1->GetNum(); | 
| 620 |  |  | rname = r1->GetName(); | 
| 621 |  |  | } | 
| 622 |  |  | //assign bonds for each atom | 
| 623 |  |  | for (j=i,a2 = mol.NextAtom(j);a2;a2 = mol.NextAtom(j)) | 
| 624 |  |  | { | 
| 625 |  |  | r2 = a2->GetResidue(); | 
| 626 |  |  | if (r1->GetNum() != r2->GetNum()) | 
| 627 |  |  | break; | 
| 628 |  |  | if (r1->GetName() != r2->GetName()) | 
| 629 |  |  | break; | 
| 630 |  |  |  | 
| 631 |  |  | if ((bo = LookupBO(r1->GetAtomID(a1),r2->GetAtomID(a2)))) | 
| 632 |  |  | { | 
| 633 |  |  | v = a1->GetVector() - a2->GetVector(); | 
| 634 |  |  | if (v.length_2() < 3.5) //check by distance | 
| 635 |  |  | mol.AddBond(a1->GetIdx(),a2->GetIdx(),bo); | 
| 636 |  |  | } | 
| 637 |  |  | } | 
| 638 |  |  | } | 
| 639 |  |  |  | 
| 640 |  |  | int hyb; | 
| 641 |  |  | string type; | 
| 642 |  |  |  | 
| 643 |  |  | //types and hybridization | 
| 644 |  |  | for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) | 
| 645 |  |  | { | 
| 646 |  |  | if (a1->IsOxygen() && !a1->GetValence()) | 
| 647 |  |  | { | 
| 648 |  |  | a1->SetType("O3"); | 
| 649 |  |  | continue; | 
| 650 |  |  | } | 
| 651 |  |  |  | 
| 652 |  |  | if (a1->IsHydrogen()) | 
| 653 |  |  | { | 
| 654 |  |  | a1->SetType("H"); | 
| 655 |  |  | continue; | 
| 656 |  |  | } | 
| 657 |  |  |  | 
| 658 |  |  | r1 = a1->GetResidue(); | 
| 659 |  |  | if (skipres && r1->GetNum() == skipres) | 
| 660 |  |  | continue; | 
| 661 |  |  |  | 
| 662 |  |  | if (r1->GetName() != rname) | 
| 663 |  |  | { | 
| 664 |  |  | skipres = SetResName(r1->GetName()) ? 0 : r1->GetNum(); | 
| 665 |  |  | rname = r1->GetName(); | 
| 666 |  |  | } | 
| 667 |  |  |  | 
| 668 |  |  | //***valence rule for O- | 
| 669 |  |  | if (a1->IsOxygen() && a1->GetValence() == 1) | 
| 670 |  |  | { | 
| 671 |  |  | OBBond *bond; | 
| 672 |  |  | bond = (OBBond*)*(a1->BeginBonds()); | 
| 673 |  |  | if (bond->GetBO() == 2) | 
| 674 |  |  | { | 
| 675 |  |  | a1->SetType("O2"); | 
| 676 |  |  | a1->SetHyb(2); | 
| 677 |  |  | } | 
| 678 |  |  | if (bond->GetBO() == 1) | 
| 679 |  |  | { | 
| 680 |  |  | a1->SetType("O-"); | 
| 681 |  |  | a1->SetHyb(3); | 
| 682 |  |  | a1->SetFormalCharge(-1); | 
| 683 |  |  | } | 
| 684 |  |  | } | 
| 685 |  |  | else | 
| 686 |  |  | if (LookupType(r1->GetAtomID(a1),type,hyb)) | 
| 687 |  |  | { | 
| 688 |  |  | a1->SetType(type); | 
| 689 |  |  | a1->SetHyb(hyb); | 
| 690 |  |  | } | 
| 691 |  |  | else // try to figure it out by bond order ??? | 
| 692 |  |  | {} | 
| 693 |  |  | } | 
| 694 |  |  |  | 
| 695 |  |  | return(true); | 
| 696 |  |  | } | 
| 697 |  |  |  | 
| 698 |  |  | void OBResidueData::ParseLine(const char *buffer) | 
| 699 |  |  | { | 
| 700 |  |  | int bo; | 
| 701 |  |  | string s; | 
| 702 |  |  | vector<string> vs; | 
| 703 |  |  |  | 
| 704 |  |  | if (buffer[0] == '#') | 
| 705 |  |  | return; | 
| 706 |  |  |  | 
| 707 |  |  | tokenize(vs,buffer); | 
| 708 |  |  | if (!vs.empty()) | 
| 709 |  |  | { | 
| 710 |  |  | if (vs[0] == "BOND") | 
| 711 |  |  | { | 
| 712 |  |  | s = (vs[1] < vs[2]) ? vs[1] + " " + vs[2] : | 
| 713 |  |  | vs[2] + " " + vs[1]; | 
| 714 |  |  | bo = atoi(vs[3].c_str()); | 
| 715 |  |  | _vtmp.push_back(pair<string,int> (s,bo)); | 
| 716 |  |  | } | 
| 717 |  |  |  | 
| 718 |  |  | if (vs[0] == "ATOM" && vs.size() == 4) | 
| 719 |  |  | { | 
| 720 |  |  | _vatmtmp.push_back(vs[1]); | 
| 721 |  |  | _vatmtmp.push_back(vs[2]); | 
| 722 |  |  | _vatmtmp.push_back(vs[3]); | 
| 723 |  |  | } | 
| 724 |  |  |  | 
| 725 |  |  | if (vs[0] == "RES") | 
| 726 |  |  | _resname.push_back(vs[1]); | 
| 727 |  |  |  | 
| 728 |  |  | if (vs[0]== "END") | 
| 729 |  |  | { | 
| 730 |  |  | _resatoms.push_back(_vatmtmp); | 
| 731 |  |  | _resbonds.push_back(_vtmp); | 
| 732 |  |  | _vtmp.clear(); | 
| 733 |  |  | _vatmtmp.clear(); | 
| 734 |  |  | } | 
| 735 |  |  | } | 
| 736 |  |  | } | 
| 737 |  |  |  | 
| 738 |  |  | bool OBResidueData::SetResName(const string &s) | 
| 739 |  |  | { | 
| 740 |  |  | unsigned int i; | 
| 741 |  |  | for (i = 0;i < _resname.size();i++) | 
| 742 |  |  | if (_resname[i] == s) | 
| 743 |  |  | { | 
| 744 |  |  | _resnum = i; | 
| 745 |  |  | return(true); | 
| 746 |  |  | } | 
| 747 |  |  |  | 
| 748 |  |  | _resnum = -1; | 
| 749 |  |  | return(false); | 
| 750 |  |  | } | 
| 751 |  |  |  | 
| 752 |  |  | int OBResidueData::LookupBO(const string &s) | 
| 753 |  |  | { | 
| 754 |  |  | if (_resnum == -1) | 
| 755 |  |  | return(0); | 
| 756 |  |  |  | 
| 757 |  |  | unsigned int i; | 
| 758 |  |  | for (i = 0;i < _resbonds[_resnum].size();i++) | 
| 759 |  |  | if (_resbonds[_resnum][i].first == s) | 
| 760 |  |  | return(_resbonds[_resnum][i].second); | 
| 761 |  |  |  | 
| 762 |  |  | return(0); | 
| 763 |  |  | } | 
| 764 |  |  |  | 
| 765 |  |  | int OBResidueData::LookupBO(const string &s1, const string &s2) | 
| 766 |  |  | { | 
| 767 |  |  | if (_resnum == -1) | 
| 768 |  |  | return(0); | 
| 769 |  |  | string s; | 
| 770 |  |  |  | 
| 771 |  |  | s = (s1 < s2) ? s1 + " " + s2 : s2 + " " + s1; | 
| 772 |  |  |  | 
| 773 |  |  | unsigned int i; | 
| 774 |  |  | for (i = 0;i < _resbonds[_resnum].size();i++) | 
| 775 |  |  | if (_resbonds[_resnum][i].first == s) | 
| 776 |  |  | return(_resbonds[_resnum][i].second); | 
| 777 |  |  |  | 
| 778 |  |  | return(0); | 
| 779 |  |  | } | 
| 780 |  |  |  | 
| 781 |  |  | bool OBResidueData::LookupType(const string &atmid,string &type,int &hyb) | 
| 782 |  |  | { | 
| 783 |  |  | if (_resnum == -1) | 
| 784 |  |  | return(false); | 
| 785 |  |  |  | 
| 786 |  |  | string s; | 
| 787 |  |  | vector<string>::iterator i; | 
| 788 |  |  |  | 
| 789 |  |  | for (i = _resatoms[_resnum].begin();i != _resatoms[_resnum].end();i+=3) | 
| 790 |  |  | if (atmid == *i) | 
| 791 |  |  | { | 
| 792 |  |  | i++; | 
| 793 |  |  | type = *i; | 
| 794 |  |  | i++; | 
| 795 |  |  | hyb = atoi((*i).c_str()); | 
| 796 |  |  | return(true); | 
| 797 |  |  | } | 
| 798 |  |  |  | 
| 799 |  |  | return(false); | 
| 800 |  |  | } | 
| 801 |  |  |  | 
| 802 |  |  | void OBGlobalDataBase::Init() | 
| 803 |  |  | { | 
| 804 |  |  | if (_init) | 
| 805 |  |  | return; | 
| 806 |  |  | _init = true; | 
| 807 |  |  |  | 
| 808 |  |  | char buffer[BUFF_SIZE],subbuffer[BUFF_SIZE]; | 
| 809 |  |  | ifstream ifs1, ifs2, ifs3, ifs4, *ifsP; | 
| 810 |  |  | // First, look for an environment variable | 
| 811 |  |  | if (getenv(_envvar.c_str()) != NULL) | 
| 812 |  |  | { | 
| 813 |  |  | strcpy(buffer,getenv(_envvar.c_str())); | 
| 814 |  |  | strcat(buffer,FILE_SEP_CHAR); | 
| 815 |  |  |  | 
| 816 |  |  | if (!_subdir.empty()) | 
| 817 |  |  | { | 
| 818 |  |  | strcpy(subbuffer,buffer); | 
| 819 |  |  | strcat(subbuffer,_subdir.c_str()); | 
| 820 |  |  | strcat(subbuffer,FILE_SEP_CHAR); | 
| 821 |  |  | } | 
| 822 |  |  |  | 
| 823 |  |  | strcat(buffer,(char*)_filename.c_str()); | 
| 824 |  |  | strcat(subbuffer,(char*)_filename.c_str()); | 
| 825 |  |  |  | 
| 826 |  |  | ifs1.open(subbuffer); | 
| 827 |  |  | ifsP= &ifs1; | 
| 828 |  |  | if (!(*ifsP)) | 
| 829 |  |  | { | 
| 830 |  |  | ifs2.open(buffer); | 
| 831 |  |  | ifsP = &ifs2; | 
| 832 |  |  | } | 
| 833 |  |  | } | 
| 834 |  |  | // Then, check the configured data directory | 
| 835 |  |  | else // if (!(*ifsP)) | 
| 836 |  |  | { | 
| 837 |  |  | strcpy(buffer,_dir.c_str()); | 
| 838 |  |  | strcat(buffer,FILE_SEP_CHAR); | 
| 839 |  |  |  | 
| 840 |  |  | strcpy(subbuffer,buffer); | 
| 841 |  |  | strcat(subbuffer,BABEL_VERSION); | 
| 842 |  |  | strcat(subbuffer,FILE_SEP_CHAR); | 
| 843 |  |  | strcat(subbuffer,(char*)_filename.c_str()); | 
| 844 |  |  |  | 
| 845 |  |  | strcat(buffer,(char*)_filename.c_str()); | 
| 846 |  |  |  | 
| 847 |  |  | ifs3.open(subbuffer); | 
| 848 |  |  | ifsP= &ifs3; | 
| 849 |  |  | if (!(*ifsP)) | 
| 850 |  |  | { | 
| 851 |  |  | ifs4.open(buffer); | 
| 852 |  |  | ifsP = &ifs4; | 
| 853 |  |  | } | 
| 854 |  |  | } | 
| 855 |  |  |  | 
| 856 |  |  | if ((*ifsP)) | 
| 857 |  |  | { | 
| 858 |  |  | while(ifsP->getline(buffer,BUFF_SIZE)) | 
| 859 |  |  | ParseLine(buffer); | 
| 860 |  |  | } | 
| 861 |  |  |  | 
| 862 |  |  | else | 
| 863 |  |  | // If all else fails, use the compiled in values | 
| 864 |  |  | if (_dataptr) | 
| 865 |  |  | { | 
| 866 |  |  | const char *p1,*p2; | 
| 867 |  |  | for (p1 = p2 = _dataptr;*p2 != '\0';p2++) | 
| 868 |  |  | if (*p2 == '\n') | 
| 869 |  |  | { | 
| 870 |  |  | strncpy(buffer, p1, (p2 - p1)); | 
| 871 |  |  | buffer[(p2 - p1)] = '\0'; | 
| 872 |  |  | ParseLine(buffer); | 
| 873 |  |  | p1 = ++p2; | 
| 874 |  |  | } | 
| 875 |  |  | } | 
| 876 |  |  | else | 
| 877 |  |  | { | 
| 878 |  |  | string s = "Unable to open data file '"; | 
| 879 |  |  | s += _filename; | 
| 880 |  |  | s += "'"; | 
| 881 |  |  | obErrorLog.ThrowError(__FUNCTION__, s, obWarning); | 
| 882 |  |  | } | 
| 883 |  |  |  | 
| 884 |  |  | if (ifs1) | 
| 885 |  |  | ifs1.close(); | 
| 886 |  |  | if (ifs2) | 
| 887 |  |  | ifs2.close(); | 
| 888 |  |  | if (ifs3) | 
| 889 |  |  | ifs3.close(); | 
| 890 |  |  | if (ifs4) | 
| 891 |  |  | ifs4.close(); | 
| 892 |  |  |  | 
| 893 |  |  | if (GetSize() == 0) | 
| 894 |  |  | { | 
| 895 |  |  | string s = "Cannot initialize database '"; | 
| 896 |  |  | s += _filename; | 
| 897 |  |  | s += "' which may cause further errors."; | 
| 898 |  |  | obErrorLog.ThrowError(__FUNCTION__, "Cannot initialize database", obWarning); | 
| 899 |  |  | } | 
| 900 |  |  |  | 
| 901 |  |  | } | 
| 902 |  |  |  | 
| 903 |  |  | } // end namespace OpenBabel | 
| 904 |  |  |  | 
| 905 |  |  | //! \file data.cpp | 
| 906 |  |  | //! \brief Global data and resource file parsers. |