| 1 | /********************************************************************** | 
| 2 | data.cpp - Global data and resource file parsers. | 
| 3 |  | 
| 4 | Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. | 
| 5 | Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison | 
| 6 |  | 
| 7 | This file is part of the Open Babel project. | 
| 8 | For more information, see <http://openbabel.sourceforge.net/> | 
| 9 |  | 
| 10 | This program is free software; you can redistribute it and/or modify | 
| 11 | it under the terms of the GNU General Public License as published by | 
| 12 | the Free Software Foundation version 2 of the License. | 
| 13 |  | 
| 14 | This program is distributed in the hope that it will be useful, | 
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 17 | GNU General Public License for more details. | 
| 18 | ***********************************************************************/ | 
| 19 |  | 
| 20 | #ifdef WIN32 | 
| 21 | #pragma warning (disable : 4786) | 
| 22 | #endif | 
| 23 |  | 
| 24 | #include "config.h" | 
| 25 | #include "data.hpp" | 
| 26 | #include "mol.hpp" | 
| 27 |  | 
| 28 | // data headers | 
| 29 | #include "element.hpp" | 
| 30 | #include "types.hpp" | 
| 31 | #include "isotope.hpp" | 
| 32 | #include "resdata.hpp" | 
| 33 |  | 
| 34 |  | 
| 35 | #if !HAVE_STRNCASECMP | 
| 36 | extern "C" int strncasecmp(const char *s1, const char *s2, size_t n); | 
| 37 | #endif | 
| 38 |  | 
| 39 | using namespace std; | 
| 40 |  | 
| 41 | namespace OpenBabel | 
| 42 | { | 
| 43 |  | 
| 44 | OBElementTable   etab; | 
| 45 | OBTypeTable      ttab; | 
| 46 | OBIsotopeTable   isotab; | 
| 47 | OBResidueData    resdat; | 
| 48 |  | 
| 49 | /** \class OBElementTable | 
| 50 | \brief Periodic Table of the Elements | 
| 51 |  | 
| 52 | Translating element data is a common task given that many file | 
| 53 | formats give either element symbol or atomic number information, but | 
| 54 | not both. The OBElementTable class facilitates conversion between | 
| 55 | textual and numeric element information. An instance of the | 
| 56 | OBElementTable class (etab) is declared as external in data.cpp. Source | 
| 57 | files that include the header file mol.h automatically have an extern | 
| 58 | definition to etab. The following code sample demonstrates the use | 
| 59 | of the OBElementTable class: | 
| 60 | \code | 
| 61 | cout << "The symbol for element 6 is " << etab.GetSymbol(6) << endl; | 
| 62 | cout << "The atomic number for Sulfur is " << etab.GetAtomicNum(16) << endl; | 
| 63 | cout << "The van der Waal radius for Nitrogen is " << etab.GetVdwRad(7); | 
| 64 | \endcode | 
| 65 |  | 
| 66 | Stored information in the OBElementTable includes elemental: | 
| 67 | - symbols | 
| 68 | - covalent radii | 
| 69 | - van der Waal radii | 
| 70 | - expected maximum bonding valence | 
| 71 | - molar mass (by IUPAC recommended atomic masses) | 
| 72 | - electronegativity | 
| 73 | - ionization potential | 
| 74 | - electron affinity | 
| 75 | - RGB colors for visualization programs | 
| 76 | - names (by IUPAC recommendation) | 
| 77 | */ | 
| 78 |  | 
| 79 | OBElementTable::OBElementTable() | 
| 80 | { | 
| 81 | _init = false; | 
| 82 | STR_DEFINE(_dir, FRC_PATH); | 
| 83 | _envvar = "FORCE_PARAM_PATH"; | 
| 84 | _filename = "element.txt"; | 
| 85 | _subdir = "data"; | 
| 86 | _dataptr = ElementData; | 
| 87 | } | 
| 88 |  | 
| 89 | OBElementTable::~OBElementTable() | 
| 90 | { | 
| 91 | vector<OBElement*>::iterator i; | 
| 92 | for (i = _element.begin();i != _element.end();i++) | 
| 93 | delete *i; | 
| 94 | } | 
| 95 |  | 
| 96 | void OBElementTable::ParseLine(const char *buffer) | 
| 97 | { | 
| 98 | int num,maxbonds; | 
| 99 | char symbol[5]; | 
| 100 | char name[BUFF_SIZE]; | 
| 101 | double Rcov,Rvdw,mass, elNeg, ionize, elAffin; | 
| 102 | double red, green, blue; | 
| 103 |  | 
| 104 | if (buffer[0] != '#') // skip comment line (at the top) | 
| 105 | { | 
| 106 | sscanf(buffer,"%d %s %lf %*f %lf %d %lf %lf %lf %lf %lf %lf %lf %s", | 
| 107 | &num, | 
| 108 | symbol, | 
| 109 | &Rcov, | 
| 110 | &Rvdw, | 
| 111 | &maxbonds, | 
| 112 | &mass, | 
| 113 | &elNeg, | 
| 114 | &ionize, | 
| 115 | &elAffin, | 
| 116 | &red, | 
| 117 | &green, | 
| 118 | &blue, | 
| 119 | name); | 
| 120 |  | 
| 121 | OBElement *ele = new OBElement(num,symbol,Rcov,Rvdw,maxbonds,mass,elNeg, | 
| 122 | ionize, elAffin, red, green, blue, name); | 
| 123 | _element.push_back(ele); | 
| 124 | } | 
| 125 | } | 
| 126 |  | 
| 127 | unsigned int OBElementTable::GetNumberOfElements() | 
| 128 | { | 
| 129 | if (!_init) | 
| 130 | Init(); | 
| 131 |  | 
| 132 | return _element.size(); | 
| 133 | } | 
| 134 |  | 
| 135 | char *OBElementTable::GetSymbol(int atomicnum) | 
| 136 | { | 
| 137 | if (!_init) | 
| 138 | Init(); | 
| 139 |  | 
| 140 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 141 | return("\0"); | 
| 142 |  | 
| 143 | return(_element[atomicnum]->GetSymbol()); | 
| 144 | } | 
| 145 |  | 
| 146 | int OBElementTable::GetMaxBonds(int atomicnum) | 
| 147 | { | 
| 148 | if (!_init) | 
| 149 | Init(); | 
| 150 |  | 
| 151 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 152 | return(0); | 
| 153 |  | 
| 154 | return(_element[atomicnum]->GetMaxBonds()); | 
| 155 | } | 
| 156 |  | 
| 157 | double OBElementTable::GetElectroNeg(int atomicnum) | 
| 158 | { | 
| 159 | if (!_init) | 
| 160 | Init(); | 
| 161 |  | 
| 162 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 163 | return(0.0); | 
| 164 |  | 
| 165 | return(_element[atomicnum]->GetElectroNeg()); | 
| 166 | } | 
| 167 |  | 
| 168 | double OBElementTable::GetIonization(int atomicnum) | 
| 169 | { | 
| 170 | if (!_init) | 
| 171 | Init(); | 
| 172 |  | 
| 173 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 174 | return(0.0); | 
| 175 |  | 
| 176 | return(_element[atomicnum]->GetIonization()); | 
| 177 | } | 
| 178 |  | 
| 179 |  | 
| 180 | double OBElementTable::GetElectronAffinity(int atomicnum) | 
| 181 | { | 
| 182 | if (!_init) | 
| 183 | Init(); | 
| 184 |  | 
| 185 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 186 | return(0.0); | 
| 187 |  | 
| 188 | return(_element[atomicnum]->GetElectronAffinity()); | 
| 189 | } | 
| 190 |  | 
| 191 | vector<double> OBElementTable::GetRGB(int atomicnum) | 
| 192 | { | 
| 193 | if (!_init) | 
| 194 | Init(); | 
| 195 |  | 
| 196 | vector <double> colors; | 
| 197 | colors.reserve(3); | 
| 198 |  | 
| 199 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 200 | { | 
| 201 | colors.push_back(0.0f); | 
| 202 | colors.push_back(0.0f); | 
| 203 | colors.push_back(0.0f); | 
| 204 | return(colors); | 
| 205 | } | 
| 206 |  | 
| 207 | colors.push_back(_element[atomicnum]->GetRed()); | 
| 208 | colors.push_back(_element[atomicnum]->GetGreen()); | 
| 209 | colors.push_back(_element[atomicnum]->GetBlue()); | 
| 210 |  | 
| 211 | return (colors); | 
| 212 | } | 
| 213 |  | 
| 214 | string OBElementTable::GetName(int atomicnum) | 
| 215 | { | 
| 216 | if (!_init) | 
| 217 | Init(); | 
| 218 |  | 
| 219 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 220 | return("Unknown"); | 
| 221 |  | 
| 222 | return(_element[atomicnum]->GetName()); | 
| 223 | } | 
| 224 |  | 
| 225 | double OBElementTable::GetVdwRad(int atomicnum) | 
| 226 | { | 
| 227 | if (!_init) | 
| 228 | Init(); | 
| 229 |  | 
| 230 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 231 | return(0.0); | 
| 232 |  | 
| 233 | return(_element[atomicnum]->GetVdwRad()); | 
| 234 | } | 
| 235 |  | 
| 236 | double OBElementTable::CorrectedBondRad(int atomicnum, int hyb) | 
| 237 | { | 
| 238 | double rad; | 
| 239 | if (!_init) | 
| 240 | Init(); | 
| 241 |  | 
| 242 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 243 | return(1.0); | 
| 244 |  | 
| 245 | rad = _element[atomicnum]->GetCovalentRad(); | 
| 246 |  | 
| 247 | if (hyb == 2) | 
| 248 | rad *= 0.95; | 
| 249 | else if (hyb == 1) | 
| 250 | rad *= 0.90; | 
| 251 |  | 
| 252 | return(rad); | 
| 253 | } | 
| 254 |  | 
| 255 | double OBElementTable::CorrectedVdwRad(int atomicnum, int hyb) | 
| 256 | { | 
| 257 | double rad; | 
| 258 | if (!_init) | 
| 259 | Init(); | 
| 260 |  | 
| 261 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 262 | return(1.95); | 
| 263 |  | 
| 264 | rad = _element[atomicnum]->GetVdwRad(); | 
| 265 |  | 
| 266 | if (hyb == 2) | 
| 267 | rad *= 0.95; | 
| 268 | else if (hyb == 1) | 
| 269 | rad *= 0.90; | 
| 270 |  | 
| 271 | return(rad); | 
| 272 | } | 
| 273 |  | 
| 274 | double OBElementTable::GetCovalentRad(int atomicnum) | 
| 275 | { | 
| 276 | if (!_init) | 
| 277 | Init(); | 
| 278 |  | 
| 279 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 280 | return(0.0); | 
| 281 |  | 
| 282 | return(_element[atomicnum]->GetCovalentRad()); | 
| 283 | } | 
| 284 |  | 
| 285 | double OBElementTable::GetMass(int atomicnum) | 
| 286 | { | 
| 287 | if (!_init) | 
| 288 | Init(); | 
| 289 |  | 
| 290 | if (atomicnum < 0 || atomicnum > static_cast<int>(_element.size())) | 
| 291 | return(0.0); | 
| 292 |  | 
| 293 | return(_element[atomicnum]->GetMass()); | 
| 294 | } | 
| 295 |  | 
| 296 | int OBElementTable::GetAtomicNum(const char *sym) | 
| 297 | { | 
| 298 | int temp; | 
| 299 | return GetAtomicNum(sym, temp); | 
| 300 | } | 
| 301 |  | 
| 302 | int OBElementTable::GetAtomicNum(const char *sym, int &iso) | 
| 303 | { | 
| 304 | if (!_init) | 
| 305 | Init(); | 
| 306 |  | 
| 307 | vector<OBElement*>::iterator i; | 
| 308 | for (i = _element.begin();i != _element.end();i++) | 
| 309 | if (!strncasecmp(sym,(*i)->GetSymbol(),2)) | 
| 310 | return((*i)->GetAtomicNum()); | 
| 311 | if (strcasecmp(sym, "D") == 0) | 
| 312 | { | 
| 313 | iso = 2; | 
| 314 | return(1); | 
| 315 | } | 
| 316 | else if (strcasecmp(sym, "T") == 0) | 
| 317 | { | 
| 318 | iso = 3; | 
| 319 | return(1); | 
| 320 | } | 
| 321 | else | 
| 322 | iso = 0; | 
| 323 | return(0); | 
| 324 | } | 
| 325 |  | 
| 326 | /** \class OBIsotopeTable | 
| 327 | \brief Table of atomic isotope masses | 
| 328 |  | 
| 329 | */ | 
| 330 |  | 
| 331 | OBIsotopeTable::OBIsotopeTable() | 
| 332 | { | 
| 333 | _init = false; | 
| 334 | STR_DEFINE(_dir, FRC_PATH); | 
| 335 | _envvar = "FORCE_PARAM_PATH"; | 
| 336 | _filename = "isotope.txt"; | 
| 337 | _subdir = "data"; | 
| 338 | _dataptr = IsotopeData; | 
| 339 | } | 
| 340 |  | 
| 341 | void OBIsotopeTable::ParseLine(const char *buffer) | 
| 342 | { | 
| 343 | unsigned int atomicNum; | 
| 344 | unsigned int i; | 
| 345 | vector<string> vs; | 
| 346 |  | 
| 347 | pair <unsigned int, double> entry; | 
| 348 | vector <pair <unsigned int, double> > row; | 
| 349 |  | 
| 350 | if (buffer[0] != '#') // skip comment line (at the top) | 
| 351 | { | 
| 352 | tokenize(vs,buffer); | 
| 353 | if (vs.size() > 3) // atomic number, 0, most abundant mass (...) | 
| 354 | { | 
| 355 | atomicNum = atoi(vs[0].c_str()); | 
| 356 | for (i = 1; i < vs.size() - 1; i += 2) // make sure i+1 still exists | 
| 357 | { | 
| 358 | entry.first = atoi(vs[i].c_str()); // isotope | 
| 359 | entry.second = atof(vs[i + 1].c_str()); // exact mass | 
| 360 | row.push_back(entry); | 
| 361 | } | 
| 362 | _isotopes.push_back(row); | 
| 363 | } | 
| 364 | else | 
| 365 | obErrorLog.ThrowError(__func__, " Could not parse line in isotope table isotope.txt", obInfo); | 
| 366 | } | 
| 367 | } | 
| 368 |  | 
| 369 | double  OBIsotopeTable::GetExactMass(const unsigned int ele, | 
| 370 | const unsigned int isotope) | 
| 371 | { | 
| 372 | if (!_init) | 
| 373 | Init(); | 
| 374 |  | 
| 375 | if (ele > _isotopes.size()) | 
| 376 | return 0.0; | 
| 377 |  | 
| 378 | unsigned int iso; | 
| 379 | for (iso = 0; iso < _isotopes[ele].size(); iso++) | 
| 380 | if (isotope == _isotopes[ele][iso].first) | 
| 381 | return _isotopes[ele][iso].second; | 
| 382 |  | 
| 383 | return 0.0; | 
| 384 | } | 
| 385 |  | 
| 386 | /** \class OBTypeTable | 
| 387 | \brief Atom Type Translation Table | 
| 388 |  | 
| 389 | Molecular file formats frequently store information about atoms in an | 
| 390 | atom type field. Some formats store only the element for each atom, | 
| 391 | while others include hybridization and local environments, such as the | 
| 392 | Sybyl mol2 atom type field. The OBTypeTable class acts as a translation | 
| 393 | table to convert atom types between a number of different molecular | 
| 394 | file formats. The constructor for OBTypeTable automatically reads the | 
| 395 | text file types.txt. Just as OBElementTable, an instance of | 
| 396 | OBTypeTable (ttab) is declared external in data.cpp and is referenced as | 
| 397 | extern OBTypeTable ttab in mol.h.  The following code demonstrates how | 
| 398 | to use the OBTypeTable class to translate the internal representation | 
| 399 | of atom types in an OBMol Internal to Sybyl Mol2 atom types. | 
| 400 |  | 
| 401 | \code | 
| 402 | ttab.SetFromType("INT"); | 
| 403 | ttab.SetToType("SYB"); | 
| 404 | OBAtom *atom; | 
| 405 | vector<OBAtom*>::iterator i; | 
| 406 | string src,dst; | 
| 407 | for (atom = mol.BeginAtom(i);atom;atom = mol.EndAtom(i)) | 
| 408 | { | 
| 409 | src = atom->GetType(); | 
| 410 | ttab.Translate(dst,src); | 
| 411 | cout << "atom number " << atom->GetIdx() << "has mol2 type " << dst << endl; | 
| 412 | } | 
| 413 | \endcode | 
| 414 |  | 
| 415 | Current atom types include (defined in the top line of the data file types.txt): | 
| 416 | - INT (Open Babel internal codes) | 
| 417 | - ATN (atomic numbers) | 
| 418 | - HYB (hybridization) | 
| 419 | - MMD | 
| 420 | - MM2 (MM2 force field) | 
| 421 | - XYZ (element symbols from XYZ file format) | 
| 422 | - ALC (Alchemy file) | 
| 423 | - HAD | 
| 424 | - MCML | 
| 425 | - C3D (Chem3D) | 
| 426 | - SYB (Sybyl mol2) | 
| 427 | - MOL | 
| 428 | - MAP | 
| 429 | - DRE | 
| 430 | - XED (XED format) | 
| 431 | - DOK (Dock) | 
| 432 | - M3D | 
| 433 | */ | 
| 434 |  | 
| 435 | OBTypeTable::OBTypeTable() | 
| 436 | { | 
| 437 | _init = false; | 
| 438 | STR_DEFINE(_dir, FRC_PATH); | 
| 439 | _envvar = "FORCE_PARAM_PATH"; | 
| 440 | _filename = "types.txt"; | 
| 441 | _subdir = "data"; | 
| 442 | _dataptr = TypesData; | 
| 443 | _linecount = 0; | 
| 444 | _from = _to = -1; | 
| 445 | } | 
| 446 |  | 
| 447 | void OBTypeTable::ParseLine(const char *buffer) | 
| 448 | { | 
| 449 | if (buffer[0] == '#') | 
| 450 | return; // just a comment line | 
| 451 |  | 
| 452 | if (_linecount == 0) | 
| 453 | sscanf(buffer,"%d%d",&_nrows,&_ncols); | 
| 454 | else if (_linecount == 1) | 
| 455 | tokenize(_colnames,buffer); | 
| 456 | else | 
| 457 | { | 
| 458 | vector<string> vc; | 
| 459 | tokenize(vc,buffer); | 
| 460 | if (vc.size() == (unsigned)_ncols) | 
| 461 | _table.push_back(vc); | 
| 462 | else | 
| 463 | { | 
| 464 | stringstream errorMsg; | 
| 465 | errorMsg << " Could not parse line in type translation table types.txt -- incorect number of columns"; | 
| 466 | errorMsg << " found " << vc.size() << " expected " << _ncols << "."; | 
| 467 | obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo); | 
| 468 | } | 
| 469 | } | 
| 470 | _linecount++; | 
| 471 | } | 
| 472 |  | 
| 473 | bool OBTypeTable::SetFromType(const char* from) | 
| 474 | { | 
| 475 | if (!_init) | 
| 476 | Init(); | 
| 477 |  | 
| 478 | string tmp = from; | 
| 479 |  | 
| 480 | unsigned int i; | 
| 481 | for (i = 0;i < _colnames.size();i++) | 
| 482 | if (tmp == _colnames[i]) | 
| 483 | { | 
| 484 | _from = i; | 
| 485 | return(true); | 
| 486 | } | 
| 487 |  | 
| 488 | obErrorLog.ThrowError(__func__, "Requested type column not found", obInfo); | 
| 489 |  | 
| 490 | return(false); | 
| 491 | } | 
| 492 |  | 
| 493 | bool OBTypeTable::SetToType(const char* to) | 
| 494 | { | 
| 495 | if (!_init) | 
| 496 | Init(); | 
| 497 |  | 
| 498 | string tmp = to; | 
| 499 |  | 
| 500 | unsigned int i; | 
| 501 | for (i = 0;i < _colnames.size();i++) | 
| 502 | if (tmp == _colnames[i]) | 
| 503 | { | 
| 504 | _to = i; | 
| 505 | return(true); | 
| 506 | } | 
| 507 |  | 
| 508 | obErrorLog.ThrowError(__func__, "Requested type column not found", obInfo); | 
| 509 |  | 
| 510 | return(false); | 
| 511 | } | 
| 512 |  | 
| 513 | bool OBTypeTable::Translate(char *to, const char *from) | 
| 514 | { | 
| 515 | if (!_init) | 
| 516 | Init(); | 
| 517 |  | 
| 518 | bool rval; | 
| 519 | string sto,sfrom; | 
| 520 | sfrom = from; | 
| 521 | rval = Translate(sto,sfrom); | 
| 522 | strcpy(to,(char*)sto.c_str()); | 
| 523 |  | 
| 524 | return(rval); | 
| 525 | } | 
| 526 |  | 
| 527 | bool OBTypeTable::Translate(string &to, const string &from) | 
| 528 | { | 
| 529 | if (!_init) | 
| 530 | Init(); | 
| 531 |  | 
| 532 | if (from == "") | 
| 533 | return(false); | 
| 534 |  | 
| 535 | if (_from >= 0 && _to >= 0 && | 
| 536 | _from < _table.size() && _to < _table.size()) | 
| 537 | { | 
| 538 | vector<vector<string> >::iterator i; | 
| 539 | for (i = _table.begin();i != _table.end();i++) | 
| 540 | if ((signed)(*i).size() > _from &&  (*i)[_from] == from) | 
| 541 | { | 
| 542 | to = (*i)[_to]; | 
| 543 | return(true); | 
| 544 | } | 
| 545 | } | 
| 546 |  | 
| 547 | // Throw an error, copy the string and return false | 
| 548 | obErrorLog.ThrowError(__func__, "Cannot perform atom type translation: table cannot find requested types.", obWarning); | 
| 549 | to = from; | 
| 550 | return(false); | 
| 551 | } | 
| 552 |  | 
| 553 | std::string OBTypeTable::GetFromType() | 
| 554 | { | 
| 555 | if (!_init) | 
| 556 | Init(); | 
| 557 |  | 
| 558 | if (_from > 0 && _from < _table.size()) | 
| 559 | return( _colnames[_from] ); | 
| 560 | else | 
| 561 | return( _colnames[0] ); | 
| 562 | } | 
| 563 |  | 
| 564 | std::string OBTypeTable::GetToType() | 
| 565 | { | 
| 566 | if (!_init) | 
| 567 | Init(); | 
| 568 |  | 
| 569 | if (_to > 0 && _to < _table.size()) | 
| 570 | return( _colnames[_to] ); | 
| 571 | else | 
| 572 | return( _colnames[0] ); | 
| 573 | } | 
| 574 |  | 
| 575 | void Toupper(string &s) | 
| 576 | { | 
| 577 | unsigned int i; | 
| 578 | for (i = 0;i < s.size();i++) | 
| 579 | s[i] = toupper(s[i]); | 
| 580 | } | 
| 581 |  | 
| 582 | void Tolower(string &s) | 
| 583 | { | 
| 584 | unsigned int i; | 
| 585 | for (i = 0;i < s.size();i++) | 
| 586 | s[i] = tolower(s[i]); | 
| 587 | } | 
| 588 |  | 
| 589 | /////////////////////////////////////////////////////////////////////// | 
| 590 | OBResidueData::OBResidueData() | 
| 591 | { | 
| 592 | _init = false; | 
| 593 | STR_DEFINE(_dir, FRC_PATH); | 
| 594 | _envvar = "FORCE_PARAM_PATH"; | 
| 595 | _filename = "resdata.txt"; | 
| 596 | _subdir = "data"; | 
| 597 | _dataptr = ResidueData; | 
| 598 | } | 
| 599 |  | 
| 600 | bool OBResidueData::AssignBonds(OBMol &mol,OBBitVec &bv) | 
| 601 | { | 
| 602 | OBAtom *a1,*a2; | 
| 603 | OBResidue *r1,*r2; | 
| 604 | vector<OBNodeBase*>::iterator i,j; | 
| 605 | vector3 v; | 
| 606 |  | 
| 607 | int bo; | 
| 608 | unsigned int skipres=0; | 
| 609 | string rname = ""; | 
| 610 | //assign residue bonds | 
| 611 | for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) | 
| 612 | { | 
| 613 | r1 = a1->GetResidue(); | 
| 614 | if (skipres && r1->GetNum() == skipres) | 
| 615 | continue; | 
| 616 |  | 
| 617 | if (r1->GetName() != rname) | 
| 618 | { | 
| 619 | skipres = SetResName(r1->GetName()) ? 0 : r1->GetNum(); | 
| 620 | rname = r1->GetName(); | 
| 621 | } | 
| 622 | //assign bonds for each atom | 
| 623 | for (j=i,a2 = mol.NextAtom(j);a2;a2 = mol.NextAtom(j)) | 
| 624 | { | 
| 625 | r2 = a2->GetResidue(); | 
| 626 | if (r1->GetNum() != r2->GetNum()) | 
| 627 | break; | 
| 628 | if (r1->GetName() != r2->GetName()) | 
| 629 | break; | 
| 630 |  | 
| 631 | if ((bo = LookupBO(r1->GetAtomID(a1),r2->GetAtomID(a2)))) | 
| 632 | { | 
| 633 | v = a1->GetVector() - a2->GetVector(); | 
| 634 | if (v.length_2() < 3.5) //check by distance | 
| 635 | mol.AddBond(a1->GetIdx(),a2->GetIdx(),bo); | 
| 636 | } | 
| 637 | } | 
| 638 | } | 
| 639 |  | 
| 640 | int hyb; | 
| 641 | string type; | 
| 642 |  | 
| 643 | //types and hybridization | 
| 644 | for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) | 
| 645 | { | 
| 646 | if (a1->IsOxygen() && !a1->GetValence()) | 
| 647 | { | 
| 648 | a1->SetType("O3"); | 
| 649 | continue; | 
| 650 | } | 
| 651 |  | 
| 652 | if (a1->IsHydrogen()) | 
| 653 | { | 
| 654 | a1->SetType("H"); | 
| 655 | continue; | 
| 656 | } | 
| 657 |  | 
| 658 | r1 = a1->GetResidue(); | 
| 659 | if (skipres && r1->GetNum() == skipres) | 
| 660 | continue; | 
| 661 |  | 
| 662 | if (r1->GetName() != rname) | 
| 663 | { | 
| 664 | skipres = SetResName(r1->GetName()) ? 0 : r1->GetNum(); | 
| 665 | rname = r1->GetName(); | 
| 666 | } | 
| 667 |  | 
| 668 | //***valence rule for O- | 
| 669 | if (a1->IsOxygen() && a1->GetValence() == 1) | 
| 670 | { | 
| 671 | OBBond *bond; | 
| 672 | bond = (OBBond*)*(a1->BeginBonds()); | 
| 673 | if (bond->GetBO() == 2) | 
| 674 | { | 
| 675 | a1->SetType("O2"); | 
| 676 | a1->SetHyb(2); | 
| 677 | } | 
| 678 | if (bond->GetBO() == 1) | 
| 679 | { | 
| 680 | a1->SetType("O-"); | 
| 681 | a1->SetHyb(3); | 
| 682 | a1->SetFormalCharge(-1); | 
| 683 | } | 
| 684 | } | 
| 685 | else | 
| 686 | if (LookupType(r1->GetAtomID(a1),type,hyb)) | 
| 687 | { | 
| 688 | a1->SetType(type); | 
| 689 | a1->SetHyb(hyb); | 
| 690 | } | 
| 691 | else // try to figure it out by bond order ??? | 
| 692 | {} | 
| 693 | } | 
| 694 |  | 
| 695 | return(true); | 
| 696 | } | 
| 697 |  | 
| 698 | void OBResidueData::ParseLine(const char *buffer) | 
| 699 | { | 
| 700 | int bo; | 
| 701 | string s; | 
| 702 | vector<string> vs; | 
| 703 |  | 
| 704 | if (buffer[0] == '#') | 
| 705 | return; | 
| 706 |  | 
| 707 | tokenize(vs,buffer); | 
| 708 | if (!vs.empty()) | 
| 709 | { | 
| 710 | if (vs[0] == "BOND") | 
| 711 | { | 
| 712 | s = (vs[1] < vs[2]) ? vs[1] + " " + vs[2] : | 
| 713 | vs[2] + " " + vs[1]; | 
| 714 | bo = atoi(vs[3].c_str()); | 
| 715 | _vtmp.push_back(pair<string,int> (s,bo)); | 
| 716 | } | 
| 717 |  | 
| 718 | if (vs[0] == "ATOM" && vs.size() == 4) | 
| 719 | { | 
| 720 | _vatmtmp.push_back(vs[1]); | 
| 721 | _vatmtmp.push_back(vs[2]); | 
| 722 | _vatmtmp.push_back(vs[3]); | 
| 723 | } | 
| 724 |  | 
| 725 | if (vs[0] == "RES") | 
| 726 | _resname.push_back(vs[1]); | 
| 727 |  | 
| 728 | if (vs[0]== "END") | 
| 729 | { | 
| 730 | _resatoms.push_back(_vatmtmp); | 
| 731 | _resbonds.push_back(_vtmp); | 
| 732 | _vtmp.clear(); | 
| 733 | _vatmtmp.clear(); | 
| 734 | } | 
| 735 | } | 
| 736 | } | 
| 737 |  | 
| 738 | bool OBResidueData::SetResName(const string &s) | 
| 739 | { | 
| 740 | unsigned int i; | 
| 741 | for (i = 0;i < _resname.size();i++) | 
| 742 | if (_resname[i] == s) | 
| 743 | { | 
| 744 | _resnum = i; | 
| 745 | return(true); | 
| 746 | } | 
| 747 |  | 
| 748 | _resnum = -1; | 
| 749 | return(false); | 
| 750 | } | 
| 751 |  | 
| 752 | int OBResidueData::LookupBO(const string &s) | 
| 753 | { | 
| 754 | if (_resnum == -1) | 
| 755 | return(0); | 
| 756 |  | 
| 757 | unsigned int i; | 
| 758 | for (i = 0;i < _resbonds[_resnum].size();i++) | 
| 759 | if (_resbonds[_resnum][i].first == s) | 
| 760 | return(_resbonds[_resnum][i].second); | 
| 761 |  | 
| 762 | return(0); | 
| 763 | } | 
| 764 |  | 
| 765 | int OBResidueData::LookupBO(const string &s1, const string &s2) | 
| 766 | { | 
| 767 | if (_resnum == -1) | 
| 768 | return(0); | 
| 769 | string s; | 
| 770 |  | 
| 771 | s = (s1 < s2) ? s1 + " " + s2 : s2 + " " + s1; | 
| 772 |  | 
| 773 | unsigned int i; | 
| 774 | for (i = 0;i < _resbonds[_resnum].size();i++) | 
| 775 | if (_resbonds[_resnum][i].first == s) | 
| 776 | return(_resbonds[_resnum][i].second); | 
| 777 |  | 
| 778 | return(0); | 
| 779 | } | 
| 780 |  | 
| 781 | bool OBResidueData::LookupType(const string &atmid,string &type,int &hyb) | 
| 782 | { | 
| 783 | if (_resnum == -1) | 
| 784 | return(false); | 
| 785 |  | 
| 786 | string s; | 
| 787 | vector<string>::iterator i; | 
| 788 |  | 
| 789 | for (i = _resatoms[_resnum].begin();i != _resatoms[_resnum].end();i+=3) | 
| 790 | if (atmid == *i) | 
| 791 | { | 
| 792 | i++; | 
| 793 | type = *i; | 
| 794 | i++; | 
| 795 | hyb = atoi((*i).c_str()); | 
| 796 | return(true); | 
| 797 | } | 
| 798 |  | 
| 799 | return(false); | 
| 800 | } | 
| 801 |  | 
| 802 | void OBGlobalDataBase::Init() | 
| 803 | { | 
| 804 | if (_init) | 
| 805 | return; | 
| 806 | _init = true; | 
| 807 |  | 
| 808 | char buffer[BUFF_SIZE],subbuffer[BUFF_SIZE]; | 
| 809 | ifstream ifs1, ifs2, ifs3, ifs4, *ifsP; | 
| 810 | // First, look for an environment variable | 
| 811 | if (getenv(_envvar.c_str()) != NULL) | 
| 812 | { | 
| 813 | strcpy(buffer,getenv(_envvar.c_str())); | 
| 814 | strcat(buffer,FILE_SEP_CHAR); | 
| 815 |  | 
| 816 | if (!_subdir.empty()) | 
| 817 | { | 
| 818 | strcpy(subbuffer,buffer); | 
| 819 | strcat(subbuffer,_subdir.c_str()); | 
| 820 | strcat(subbuffer,FILE_SEP_CHAR); | 
| 821 | } | 
| 822 |  | 
| 823 | strcat(buffer,(char*)_filename.c_str()); | 
| 824 | strcat(subbuffer,(char*)_filename.c_str()); | 
| 825 |  | 
| 826 | ifs1.open(subbuffer); | 
| 827 | ifsP= &ifs1; | 
| 828 | if (!(*ifsP)) | 
| 829 | { | 
| 830 | ifs2.open(buffer); | 
| 831 | ifsP = &ifs2; | 
| 832 | } | 
| 833 | } | 
| 834 | // Then, check the configured data directory | 
| 835 | else // if (!(*ifsP)) | 
| 836 | { | 
| 837 | strcpy(buffer,_dir.c_str()); | 
| 838 | strcat(buffer,FILE_SEP_CHAR); | 
| 839 |  | 
| 840 | strcpy(subbuffer,buffer); | 
| 841 | strcat(subbuffer,BABEL_VERSION); | 
| 842 | strcat(subbuffer,FILE_SEP_CHAR); | 
| 843 | strcat(subbuffer,(char*)_filename.c_str()); | 
| 844 |  | 
| 845 | strcat(buffer,(char*)_filename.c_str()); | 
| 846 |  | 
| 847 | ifs3.open(subbuffer); | 
| 848 | ifsP= &ifs3; | 
| 849 | if (!(*ifsP)) | 
| 850 | { | 
| 851 | ifs4.open(buffer); | 
| 852 | ifsP = &ifs4; | 
| 853 | } | 
| 854 | } | 
| 855 |  | 
| 856 | if ((*ifsP)) | 
| 857 | { | 
| 858 | while(ifsP->getline(buffer,BUFF_SIZE)) | 
| 859 | ParseLine(buffer); | 
| 860 | } | 
| 861 |  | 
| 862 | else | 
| 863 | // If all else fails, use the compiled in values | 
| 864 | if (_dataptr) | 
| 865 | { | 
| 866 | const char *p1,*p2; | 
| 867 | for (p1 = p2 = _dataptr;*p2 != '\0';p2++) | 
| 868 | if (*p2 == '\n') | 
| 869 | { | 
| 870 | strncpy(buffer, p1, (p2 - p1)); | 
| 871 | buffer[(p2 - p1)] = '\0'; | 
| 872 | ParseLine(buffer); | 
| 873 | p1 = ++p2; | 
| 874 | } | 
| 875 | } | 
| 876 | else | 
| 877 | { | 
| 878 | string s = "Unable to open data file '"; | 
| 879 | s += _filename; | 
| 880 | s += "'"; | 
| 881 | obErrorLog.ThrowError(__func__, s, obWarning); | 
| 882 | } | 
| 883 |  | 
| 884 | if (ifs1) | 
| 885 | ifs1.close(); | 
| 886 | if (ifs2) | 
| 887 | ifs2.close(); | 
| 888 | if (ifs3) | 
| 889 | ifs3.close(); | 
| 890 | if (ifs4) | 
| 891 | ifs4.close(); | 
| 892 |  | 
| 893 | if (GetSize() == 0) | 
| 894 | { | 
| 895 | string s = "Cannot initialize database '"; | 
| 896 | s += _filename; | 
| 897 | s += "' which may cause further errors."; | 
| 898 | obErrorLog.ThrowError(__func__, "Cannot initialize database", obWarning); | 
| 899 | } | 
| 900 |  | 
| 901 | } | 
| 902 |  | 
| 903 | } // end namespace OpenBabel | 
| 904 |  | 
| 905 | //! \file data.cpp | 
| 906 | //! \brief Global data and resource file parsers. |