| 1 |
tim |
741 |
/********************************************************************** |
| 2 |
|
|
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
| 3 |
|
|
Some portions Copyright (C) 2003-2005 Geoffrey R. Hutchison |
| 4 |
|
|
Some portions Copyright (C) 2004 by Chris Morley |
| 5 |
|
|
|
| 6 |
|
|
This program is free software; you can redistribute it and/or modify |
| 7 |
|
|
it under the terms of the GNU General Public License as published by |
| 8 |
|
|
the Free Software Foundation version 2 of the License. |
| 9 |
|
|
|
| 10 |
|
|
This program is distributed in the hope that it will be useful, |
| 11 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 |
|
|
GNU General Public License for more details. |
| 14 |
|
|
***********************************************************************/ |
| 15 |
|
|
|
| 16 |
gezelter |
1081 |
#include "config.h" |
| 17 |
|
|
#include "mol.hpp" |
| 18 |
|
|
#include "obconversion.hpp" |
| 19 |
|
|
#include "obmolecformat.hpp" |
| 20 |
tim |
741 |
|
| 21 |
|
|
#include <vector> |
| 22 |
|
|
#include <map> |
| 23 |
|
|
|
| 24 |
|
|
#include <sstream> |
| 25 |
|
|
|
| 26 |
|
|
using namespace std; |
| 27 |
|
|
namespace OpenBabel |
| 28 |
|
|
{ |
| 29 |
|
|
|
| 30 |
gezelter |
1081 |
class PDBFormat : public OBMoleculeFormat |
| 31 |
|
|
{ |
| 32 |
|
|
public: |
| 33 |
|
|
//Register this format type ID |
| 34 |
|
|
PDBFormat() |
| 35 |
|
|
{ |
| 36 |
|
|
OBConversion::RegisterFormat("pdb",this, "chemical/x-pdb"); |
| 37 |
|
|
OBConversion::RegisterFormat("ent",this, "chemical/x-pdb"); |
| 38 |
|
|
} |
| 39 |
tim |
741 |
|
| 40 |
gezelter |
1081 |
virtual const char* Description() //required |
| 41 |
|
|
{ |
| 42 |
|
|
return |
| 43 |
|
|
"Protein Data Bank format\n \ |
| 44 |
|
|
Read Options e.g. -as\n\ |
| 45 |
|
|
s Output single bonds only\n\ |
| 46 |
|
|
b Disable bonding entirely\n\n"; |
| 47 |
|
|
}; |
| 48 |
tim |
741 |
|
| 49 |
gezelter |
1081 |
virtual const char* SpecificationURL() |
| 50 |
|
|
{ return "http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html";}; |
| 51 |
tim |
741 |
|
| 52 |
gezelter |
1081 |
virtual const char* GetMIMEType() |
| 53 |
|
|
{ return "chemical/x-pdb"; }; |
| 54 |
|
|
|
| 55 |
|
|
//Flags() can return be any the following combined by | or be omitted if none apply |
| 56 |
|
|
// NOTREADABLE READONEONLY NOTWRITABLE WRITEONEONLY |
| 57 |
|
|
virtual unsigned int Flags() |
| 58 |
|
|
{ |
| 59 |
|
|
return READONEONLY; |
| 60 |
|
|
}; |
| 61 |
|
|
|
| 62 |
|
|
//*** This section identical for most OBMol conversions *** |
| 63 |
|
|
//////////////////////////////////////////////////// |
| 64 |
|
|
/// The "API" interface functions |
| 65 |
|
|
virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv); |
| 66 |
|
|
virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); |
| 67 |
|
|
|
| 68 |
|
|
}; |
| 69 |
|
|
//*** |
| 70 |
|
|
|
| 71 |
|
|
//Make an instance of the format class |
| 72 |
|
|
PDBFormat thePDBFormat; |
| 73 |
|
|
|
| 74 |
|
|
///////////////////////////////////////////////////////////////// |
| 75 |
|
|
|
| 76 |
|
|
|
| 77 |
|
|
static bool ParseAtomRecord(char *, OBMol &,int); |
| 78 |
|
|
static bool ParseConectRecord(char *,OBMol &); |
| 79 |
|
|
|
| 80 |
|
|
//extern OBResidueData resdat; now in mol.h |
| 81 |
|
|
|
| 82 |
|
|
///////////////////////////////////////////////////////////////// |
| 83 |
|
|
bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) |
| 84 |
|
|
{ |
| 85 |
|
|
|
| 86 |
tim |
741 |
OBMol* pmol = dynamic_cast<OBMol*>(pOb); |
| 87 |
|
|
if(pmol==NULL) |
| 88 |
gezelter |
1081 |
return false; |
| 89 |
tim |
741 |
|
| 90 |
|
|
//Define some references so we can use the old parameter names |
| 91 |
|
|
istream &ifs = *pConv->GetInStream(); |
| 92 |
|
|
OBMol &mol = *pmol; |
| 93 |
|
|
const char* title = pConv->GetTitle(); |
| 94 |
|
|
|
| 95 |
|
|
int chainNum = 1; |
| 96 |
|
|
char buffer[BUFF_SIZE]; |
| 97 |
|
|
OBBitVec bs; |
| 98 |
|
|
|
| 99 |
|
|
mol.SetTitle(title); |
| 100 |
|
|
|
| 101 |
|
|
mol.BeginModify(); |
| 102 |
|
|
while (ifs.getline(buffer,BUFF_SIZE) && !EQn(buffer,"END",3)) |
| 103 |
gezelter |
1081 |
{ |
| 104 |
tim |
741 |
if (EQn(buffer,"TER",3)) |
| 105 |
gezelter |
1081 |
chainNum++; |
| 106 |
tim |
741 |
if (EQn(buffer,"ATOM",4) || EQn(buffer,"HETATM",6)) |
| 107 |
gezelter |
1081 |
{ |
| 108 |
tim |
741 |
ParseAtomRecord(buffer,mol,chainNum); |
| 109 |
|
|
if (EQn(buffer,"ATOM",4)) |
| 110 |
gezelter |
1081 |
bs.SetBitOn(mol.NumAtoms()); |
| 111 |
|
|
} |
| 112 |
tim |
741 |
|
| 113 |
|
|
if (EQn(buffer,"CONECT",6)) |
| 114 |
gezelter |
1081 |
ParseConectRecord(buffer,mol); |
| 115 |
|
|
} |
| 116 |
tim |
741 |
|
| 117 |
|
|
resdat.AssignBonds(mol,bs); |
| 118 |
|
|
/*assign hetatm bonds based on distance*/ |
| 119 |
|
|
|
| 120 |
|
|
if (!pConv->IsOption("b",OBConversion::INOPTIONS)) |
| 121 |
|
|
mol.ConnectTheDots(); |
| 122 |
|
|
|
| 123 |
|
|
if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) |
| 124 |
gezelter |
1081 |
mol.PerceiveBondOrders(); |
| 125 |
tim |
741 |
|
| 126 |
|
|
// clean out remaining blank lines |
| 127 |
|
|
while(ifs.peek() != EOF && ifs.good() && |
| 128 |
gezelter |
1081 |
(ifs.peek() == '\n' || ifs.peek() == '\r')) |
| 129 |
tim |
741 |
ifs.getline(buffer,BUFF_SIZE); |
| 130 |
|
|
|
| 131 |
|
|
mol.EndModify(); |
| 132 |
|
|
|
| 133 |
|
|
mol.SetAtomTypesPerceived(); |
| 134 |
|
|
atomtyper.AssignImplicitValence(mol); |
| 135 |
|
|
|
| 136 |
|
|
if (!mol.NumAtoms()) |
| 137 |
gezelter |
1081 |
return(false); |
| 138 |
tim |
741 |
return(true); |
| 139 |
gezelter |
1081 |
} |
| 140 |
tim |
741 |
|
| 141 |
gezelter |
1081 |
//////////////////////////////////////////////////////////////// |
| 142 |
|
|
static bool ParseAtomRecord(char *buffer, OBMol &mol,int chainNum) |
| 143 |
|
|
/* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,1x,i3)" */ |
| 144 |
|
|
{ |
| 145 |
tim |
741 |
string sbuf = &buffer[6]; |
| 146 |
|
|
if (sbuf.size() < 48) |
| 147 |
gezelter |
1081 |
return(false); |
| 148 |
tim |
741 |
|
| 149 |
|
|
bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false; |
| 150 |
|
|
|
| 151 |
|
|
/* serial number */ |
| 152 |
|
|
string serno = sbuf.substr(0,5); |
| 153 |
|
|
//SerialNum(the_atom) = atoi(tmp_str); |
| 154 |
|
|
|
| 155 |
|
|
/* atom name */ |
| 156 |
|
|
string atmid = sbuf.substr(6,4); |
| 157 |
|
|
|
| 158 |
|
|
/* element */ |
| 159 |
|
|
string element; |
| 160 |
|
|
if (sbuf.size() > 71) |
| 161 |
gezelter |
1081 |
element = sbuf.substr(70,2); |
| 162 |
tim |
741 |
else |
| 163 |
gezelter |
1081 |
element = " "; |
| 164 |
tim |
741 |
|
| 165 |
|
|
//trim spaces on the right and left sides |
| 166 |
|
|
while (!atmid.empty() && atmid[0] == ' ') |
| 167 |
gezelter |
1081 |
atmid = atmid.substr(1,atmid.size()-1); |
| 168 |
tim |
741 |
|
| 169 |
|
|
while (!atmid.empty() && atmid[atmid.size()-1] == ' ') |
| 170 |
gezelter |
1081 |
atmid = atmid.substr(0,atmid.size()-1); |
| 171 |
tim |
741 |
|
| 172 |
|
|
/* residue name */ |
| 173 |
|
|
|
| 174 |
|
|
string resname = sbuf.substr(11,3); |
| 175 |
|
|
if (resname == " ") |
| 176 |
gezelter |
1081 |
resname = "UNK"; |
| 177 |
tim |
741 |
else |
| 178 |
gezelter |
1081 |
{ |
| 179 |
tim |
741 |
while (!resname.empty() && resname[0] == ' ') |
| 180 |
gezelter |
1081 |
resname = resname.substr(1,resname.size()-1); |
| 181 |
tim |
741 |
|
| 182 |
|
|
while (!resname.empty() && resname[resname.size()-1] == ' ') |
| 183 |
gezelter |
1081 |
resname = resname.substr(0,resname.size()-1); |
| 184 |
|
|
} |
| 185 |
tim |
741 |
|
| 186 |
|
|
/* residue sequence number */ |
| 187 |
|
|
|
| 188 |
|
|
string resnum = sbuf.substr(16,4); |
| 189 |
|
|
|
| 190 |
|
|
/* X, Y, Z */ |
| 191 |
|
|
string xstr = sbuf.substr(24,8); |
| 192 |
|
|
string ystr = sbuf.substr(32,8); |
| 193 |
|
|
string zstr = sbuf.substr(40,8); |
| 194 |
|
|
|
| 195 |
|
|
string type; |
| 196 |
|
|
|
| 197 |
|
|
if (EQn(buffer,"ATOM",4)) |
| 198 |
gezelter |
1081 |
{ |
| 199 |
tim |
741 |
type = atmid.substr(0,2); |
| 200 |
|
|
if (isdigit(type[0])) |
| 201 |
gezelter |
1081 |
type = atmid.substr(1,1); |
| 202 |
tim |
741 |
else if (sbuf[6] == ' ' && |
| 203 |
gezelter |
1081 |
strncasecmp(type.c_str(), "Zn", 2) != 0 && |
| 204 |
|
|
strncasecmp(type.c_str(), "Fe", 2) != 0) |
| 205 |
|
|
type = atmid.substr(0,1); // one-character element |
| 206 |
|
|
|
| 207 |
tim |
741 |
|
| 208 |
|
|
if (resname.substr(0,2) == "AS" || resname[0] == 'N') |
| 209 |
gezelter |
1081 |
{ |
| 210 |
tim |
741 |
if (atmid == "AD1") |
| 211 |
gezelter |
1081 |
type = "O"; |
| 212 |
tim |
741 |
if (atmid == "AD2") |
| 213 |
gezelter |
1081 |
type = "N"; |
| 214 |
|
|
} |
| 215 |
tim |
741 |
if (resname.substr(0,3) == "HIS" || resname[0] == 'H') |
| 216 |
gezelter |
1081 |
{ |
| 217 |
tim |
741 |
if (atmid == "AD1" || atmid == "AE2") |
| 218 |
gezelter |
1081 |
type = "N"; |
| 219 |
tim |
741 |
if (atmid == "AE1" || atmid == "AD2") |
| 220 |
gezelter |
1081 |
type = "C"; |
| 221 |
|
|
} |
| 222 |
tim |
741 |
if (resname.substr(0,2) == "GL" || resname[0] == 'Q') |
| 223 |
gezelter |
1081 |
{ |
| 224 |
tim |
741 |
if (atmid == "AE1") |
| 225 |
gezelter |
1081 |
type = "O"; |
| 226 |
tim |
741 |
if (atmid == "AE2") |
| 227 |
gezelter |
1081 |
type = "N"; |
| 228 |
|
|
} |
| 229 |
|
|
} |
| 230 |
tim |
741 |
else //must be hetatm record |
| 231 |
gezelter |
1081 |
{ |
| 232 |
tim |
741 |
if (isalpha(element[1]) && (isalpha(element[0]) || (element[0] == ' '))) |
| 233 |
gezelter |
1081 |
{ |
| 234 |
tim |
741 |
if (isalpha(element[0])) |
| 235 |
gezelter |
1081 |
type = element.substr(0,2); |
| 236 |
tim |
741 |
else |
| 237 |
gezelter |
1081 |
type = element.substr(1,1); |
| 238 |
tim |
741 |
if (type.size() == 2) |
| 239 |
gezelter |
1081 |
type[1] = tolower(type[1]); |
| 240 |
|
|
} |
| 241 |
tim |
741 |
else |
| 242 |
gezelter |
1081 |
{ |
| 243 |
|
|
if (isalpha(atmid[0])) |
| 244 |
|
|
type = atmid.substr(0,2); |
| 245 |
tim |
741 |
else if (atmid[0] == ' ') |
| 246 |
gezelter |
1081 |
type = atmid.substr(1,1); // one char element |
| 247 |
tim |
741 |
else |
| 248 |
gezelter |
1081 |
type = atmid.substr(1,2); |
| 249 |
tim |
741 |
|
| 250 |
|
|
if (atmid == resname) |
| 251 |
gezelter |
1081 |
{ |
| 252 |
|
|
type = atmid; |
| 253 |
tim |
741 |
if (type.size() == 2) |
| 254 |
gezelter |
1081 |
type[1] = tolower(type[1]); |
| 255 |
|
|
} |
| 256 |
tim |
741 |
else |
| 257 |
gezelter |
1081 |
if (resname == "ADR" || resname == "COA" || resname == "FAD" || |
| 258 |
|
|
resname == "GPG" || resname == "NAD" || resname == "NAL" || |
| 259 |
|
|
resname == "NDP") |
| 260 |
|
|
{ |
| 261 |
|
|
if (type.size() > 1) |
| 262 |
|
|
type = type.substr(0,1); |
| 263 |
|
|
//type.erase(1,type.size()-1); |
| 264 |
tim |
741 |
} |
| 265 |
gezelter |
1081 |
else |
| 266 |
|
|
if (isdigit(type[0])) |
| 267 |
|
|
{ |
| 268 |
|
|
type = type.substr(1,1); |
| 269 |
|
|
//type.erase(0,1); |
| 270 |
|
|
//if (type.size() > 1) type.erase(1,type.size()-1); |
| 271 |
|
|
} |
| 272 |
|
|
else |
| 273 |
|
|
if (type.size() > 1 && isdigit(type[1])) |
| 274 |
|
|
type = type.substr(0,1); |
| 275 |
tim |
741 |
//type.erase(1,1); |
| 276 |
gezelter |
1081 |
else |
| 277 |
|
|
if (type.size() > 1 && isalpha(type[1]) && isupper(type[1])) |
| 278 |
|
|
type[1] = tolower(type[1]); |
| 279 |
|
|
} |
| 280 |
|
|
|
| 281 |
|
|
} |
| 282 |
tim |
741 |
|
| 283 |
|
|
OBAtom atom; |
| 284 |
|
|
vector3 v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str())); |
| 285 |
|
|
atom.SetVector(v); |
| 286 |
|
|
|
| 287 |
|
|
atom.SetAtomicNum(etab.GetAtomicNum(type.c_str())); |
| 288 |
|
|
atom.SetType(type); |
| 289 |
|
|
|
| 290 |
|
|
int rnum = atoi(resnum.c_str()); |
| 291 |
|
|
OBResidue *res = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL; |
| 292 |
|
|
if (res == NULL || res->GetName() != resname || static_cast<int>(res->GetNum()) |
| 293 |
gezelter |
1081 |
!= rnum) |
| 294 |
|
|
{ |
| 295 |
tim |
741 |
vector<OBResidue*>::iterator ri; |
| 296 |
|
|
for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri)) |
| 297 |
gezelter |
1081 |
if (res->GetName() == resname && static_cast<int>(res->GetNum()) |
| 298 |
|
|
== rnum) |
| 299 |
|
|
break; |
| 300 |
tim |
741 |
|
| 301 |
|
|
if (res == NULL) |
| 302 |
gezelter |
1081 |
{ |
| 303 |
tim |
741 |
res = mol.NewResidue() |
| 304 |
gezelter |
1081 |
; |
| 305 |
tim |
741 |
res->SetChainNum(chainNum); |
| 306 |
|
|
res->SetName(resname); |
| 307 |
|
|
res->SetNum(rnum); |
| 308 |
gezelter |
1081 |
} |
| 309 |
|
|
} |
| 310 |
tim |
741 |
|
| 311 |
|
|
if (!mol.AddAtom(atom) |
| 312 |
gezelter |
1081 |
) |
| 313 |
|
|
return(false); |
| 314 |
tim |
741 |
else |
| 315 |
gezelter |
1081 |
{ |
| 316 |
tim |
741 |
OBAtom *atom = mol.GetAtom(mol.NumAtoms()); |
| 317 |
|
|
|
| 318 |
|
|
res->AddAtom(atom); |
| 319 |
|
|
res->SetSerialNum(atom, atoi(serno.c_str())); |
| 320 |
|
|
res->SetAtomID(atom, atmid); |
| 321 |
|
|
res->SetHetAtom(atom, hetatm); |
| 322 |
|
|
|
| 323 |
|
|
return(true); |
| 324 |
gezelter |
1081 |
} |
| 325 |
|
|
} |
| 326 |
tim |
741 |
|
| 327 |
gezelter |
1081 |
///////////////////////////////////////////////////////////////////////// |
| 328 |
|
|
//! Utility function to read a 5-digit integer starting from a specified column |
| 329 |
|
|
/*! This function reads a 5-digit integer, starting from column |
| 330 |
|
|
columnAsSpecifiedInPDB from the buffer, converts it to a long |
| 331 |
|
|
integer, and returns either false or true, if the conversion was |
| 332 |
|
|
successful or not. If the conversion was not successful, the target |
| 333 |
|
|
is set to a random value. |
| 334 |
tim |
741 |
|
| 335 |
gezelter |
1081 |
For instance, the PDB Format Description for a CONECT record specifies |
| 336 |
tim |
741 |
|
| 337 |
gezelter |
1081 |
COLUMNS DATA TYPE FIELD DEFINITION |
| 338 |
|
|
--------------------------------------------------------------------------------- |
| 339 |
|
|
1 - 6 Record name "CONECT" |
| 340 |
|
|
7 - 11 Integer serial Atom serial number |
| 341 |
|
|
... |
| 342 |
tim |
741 |
|
| 343 |
gezelter |
1081 |
To read the Atom serial number, you would call |
| 344 |
tim |
741 |
|
| 345 |
gezelter |
1081 |
long int target; |
| 346 |
|
|
if ( readIntegerFromRecord(buffer, 7, &target) == false ) { |
| 347 |
tim |
741 |
cerr << "Could not parse" << endl; |
| 348 |
gezelter |
1081 |
} |
| 349 |
tim |
741 |
|
| 350 |
gezelter |
1081 |
This function does not check the length of the buffer, or |
| 351 |
|
|
strlen(buffer). If the buffer is not long enough => SEGFAULT. |
| 352 |
|
|
*/ |
| 353 |
|
|
static bool readIntegerFromRecord(char *buffer, unsigned int columnAsSpecifiedInPDB, long int *target) |
| 354 |
|
|
{ |
| 355 |
tim |
741 |
char integerBuffer[6]; |
| 356 |
gezelter |
1081 |
integerBuffer[5] = '\0'; |
| 357 |
tim |
741 |
|
| 358 |
|
|
strncpy(integerBuffer, buffer+columnAsSpecifiedInPDB-1, 5); |
| 359 |
|
|
|
| 360 |
|
|
char *errorCheckingEndPtr; |
| 361 |
|
|
*target = strtol(integerBuffer, &errorCheckingEndPtr, 10); |
| 362 |
|
|
if (integerBuffer == errorCheckingEndPtr) |
| 363 |
gezelter |
1081 |
return(false); |
| 364 |
tim |
741 |
return(true); |
| 365 |
gezelter |
1081 |
} |
| 366 |
tim |
741 |
|
| 367 |
gezelter |
1081 |
//! Read a CONECT record |
| 368 |
|
|
/*! This function reads a CONECT record, as specified |
| 369 |
|
|
http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html, |
| 370 |
|
|
in short: |
| 371 |
tim |
741 |
|
| 372 |
gezelter |
1081 |
COLUMNS DATA TYPE FIELD DEFINITION |
| 373 |
|
|
--------------------------------------------------------------------------------- |
| 374 |
|
|
1 - 6 Record name "CONECT" |
| 375 |
|
|
7 - 11 Integer serial Atom serial number |
| 376 |
|
|
12 - 16 Integer serial Serial number of bonded atom |
| 377 |
|
|
17 - 21 Integer serial Serial number of bonded atom |
| 378 |
|
|
22 - 26 Integer serial Serial number of bonded atom |
| 379 |
|
|
27 - 31 Integer serial Serial number of bonded atom |
| 380 |
|
|
32 - 36 Integer serial Serial number of hydrogen bonded atom |
| 381 |
|
|
37 - 41 Integer serial Serial number of hydrogen bonded atom |
| 382 |
|
|
42 - 46 Integer serial Serial number of salt bridged atom |
| 383 |
|
|
47 - 51 Integer serial Serial number of hydrogen bonded atom |
| 384 |
|
|
52 - 56 Integer serial Serial number of hydrogen bonded atom |
| 385 |
|
|
57 - 61 Integer serial Serial number of salt bridged atom |
| 386 |
tim |
741 |
|
| 387 |
gezelter |
1081 |
Hydrogen bonds and salt bridges are ignored. --Stefan Kebekus. |
| 388 |
|
|
*/ |
| 389 |
tim |
741 |
|
| 390 |
gezelter |
1081 |
static bool ParseConectRecord(char *buffer,OBMol &mol) |
| 391 |
|
|
{ |
| 392 |
tim |
741 |
stringstream errorMsg; |
| 393 |
gezelter |
1081 |
string clearError; |
| 394 |
tim |
741 |
|
| 395 |
|
|
// Setup strings and string buffers |
| 396 |
gezelter |
1081 |
vector<string> vs; |
| 397 |
tim |
741 |
buffer[70] = '\0'; |
| 398 |
|
|
if (strlen(buffer) < 70) |
| 399 |
gezelter |
1081 |
{ |
| 400 |
tim |
741 |
errorMsg << "WARNING: Problems reading a PDB file\n" |
| 401 |
gezelter |
1081 |
<< " Problems reading a CONECT record.\n" |
| 402 |
|
|
<< " According to the PDB specification,\n" |
| 403 |
|
|
<< " the record should have 70 columns, but OpenBabel found " |
| 404 |
|
|
<< strlen(buffer) << " columns." << endl; |
| 405 |
|
|
obErrorLog.ThrowError(__func__, errorMsg.str() , obInfo); |
| 406 |
|
|
errorMsg.str(clearError); |
| 407 |
|
|
} |
| 408 |
tim |
741 |
|
| 409 |
|
|
// Serial number of the first atom, read from column 7-11 of the |
| 410 |
|
|
// connect record, to which the other atoms connect to. |
| 411 |
|
|
long int startAtomSerialNumber; |
| 412 |
gezelter |
1081 |
// A pointer to the first atom. |
| 413 |
|
|
OBAtom *firstAtom = NULL; |
| 414 |
tim |
741 |
// Serial numbers of the atoms which bind to firstAtom, read from |
| 415 |
|
|
// columns 12-16, 17-21, 22-27 and 27-31 of the connect record. Note |
| 416 |
|
|
// that we reserve space for 5 integers, but read only four of |
| 417 |
|
|
// them. This is to simplify the determination of the bond order; |
| 418 |
|
|
// see below. |
| 419 |
|
|
long int boundedAtomsSerialNumbers[5] = {0,0,0,0,0}; |
| 420 |
|
|
// Bools which tell us which of the serial numbers in |
| 421 |
|
|
// boundedAtomsSerialNumbers are read from the file, and which are |
| 422 |
|
|
// invalid |
| 423 |
|
|
bool boundedAtomsSerialNumbersValid[5] = {false, false, false, false, false}; |
| 424 |
|
|
|
| 425 |
gezelter |
1081 |
// Pragmatic approach -- too many non-standard PDB files out there |
| 426 |
|
|
// (including some old ones from us) |
| 427 |
|
|
// So if we have a small number of atoms, then try to break by spaces |
| 428 |
|
|
// Otherwise (i.e., NumAtoms() > 9,999 we need to go by position) |
| 429 |
|
|
// We'll switch back and forth a few times to save duplicating common code |
| 430 |
tim |
741 |
|
| 431 |
gezelter |
1081 |
if (mol.NumAtoms() <= 9999) |
| 432 |
|
|
{ |
| 433 |
|
|
// make sure we don't look at salt bridges or whatever, so cut the buffer short |
| 434 |
|
|
buffer[32] = '\0'; |
| 435 |
|
|
tokenize(vs,buffer); |
| 436 |
|
|
if( vs.empty() || vs.size() < 2) |
| 437 |
|
|
return false; |
| 438 |
|
|
vs.erase(vs.begin()); // remove "CONECT" |
| 439 |
|
|
|
| 440 |
|
|
startAtomSerialNumber = atoi(vs[0].c_str()); |
| 441 |
|
|
} |
| 442 |
|
|
else |
| 443 |
|
|
{ |
| 444 |
|
|
if (readIntegerFromRecord(buffer, 7, &startAtomSerialNumber) == false) |
| 445 |
|
|
{ |
| 446 |
|
|
errorMsg << "WARNING: Problems reading a PDB file\n" |
| 447 |
|
|
<< " Problems reading a CONECT record.\n" |
| 448 |
|
|
<< " According to the PDB specification,\n" |
| 449 |
|
|
<< " columns 7-11 should contain the serial number of an atom.\n" |
| 450 |
|
|
<< " THIS CONECT RECORD WILL BE IGNORED." << endl; |
| 451 |
|
|
obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning); |
| 452 |
|
|
return(false); |
| 453 |
|
|
} |
| 454 |
|
|
} |
| 455 |
|
|
|
| 456 |
|
|
vector<OBNodeBase*>::iterator i; |
| 457 |
|
|
for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) |
| 458 |
|
|
if (static_cast<long int>(a1->GetResidue()-> |
| 459 |
|
|
GetSerialNum(a1)) == startAtomSerialNumber) |
| 460 |
|
|
{ |
| 461 |
|
|
firstAtom = a1; |
| 462 |
|
|
break; |
| 463 |
|
|
} |
| 464 |
|
|
if (firstAtom == NULL) |
| 465 |
|
|
{ |
| 466 |
|
|
errorMsg << "WARNING: Problems reading a PDB file:\n" |
| 467 |
|
|
<< " Problems reading a CONECT record.\n" |
| 468 |
|
|
<< " According to the PDB specification,\n" |
| 469 |
|
|
<< " columns 7-11 should contain the serial number of an atom.\n" |
| 470 |
|
|
<< " No atom was found with this serial number.\n" |
| 471 |
|
|
<< " THIS CONECT RECORD WILL BE IGNORED." << endl; |
| 472 |
|
|
obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning); |
| 473 |
|
|
return(false); |
| 474 |
|
|
} |
| 475 |
|
|
|
| 476 |
|
|
if (mol.NumAtoms() < 9999) |
| 477 |
|
|
{ |
| 478 |
|
|
if (vs.size() > 1) boundedAtomsSerialNumbers[0] = atoi(vs[1].c_str()); |
| 479 |
|
|
if (vs.size() > 2) boundedAtomsSerialNumbers[1] = atoi(vs[2].c_str()); |
| 480 |
|
|
if (vs.size() > 3) boundedAtomsSerialNumbers[2] = atoi(vs[3].c_str()); |
| 481 |
|
|
if (vs.size() > 4) boundedAtomsSerialNumbers[3] = atoi(vs[4].c_str()); |
| 482 |
|
|
|
| 483 |
|
|
unsigned int limit = 4; |
| 484 |
|
|
if (vs.size() <= 4) |
| 485 |
|
|
limit = vs.size() - 1; |
| 486 |
|
|
|
| 487 |
|
|
for (unsigned int i = 0; i < limit; i++) |
| 488 |
|
|
boundedAtomsSerialNumbersValid[i] = true; |
| 489 |
|
|
} |
| 490 |
|
|
else |
| 491 |
|
|
{ |
| 492 |
|
|
// Now read the serial numbers. If the first serial number is not |
| 493 |
|
|
// present, this connect record probably contains only hydrogen |
| 494 |
|
|
// bonds and salt bridges, which we ignore. In that case, we just |
| 495 |
|
|
// exit gracefully. |
| 496 |
|
|
boundedAtomsSerialNumbersValid[0] = readIntegerFromRecord(buffer, 12, boundedAtomsSerialNumbers+0); |
| 497 |
|
|
if (boundedAtomsSerialNumbersValid[0] == false) |
| 498 |
|
|
return(true); |
| 499 |
|
|
boundedAtomsSerialNumbersValid[1] = readIntegerFromRecord(buffer, 17, boundedAtomsSerialNumbers+1); |
| 500 |
|
|
boundedAtomsSerialNumbersValid[2] = readIntegerFromRecord(buffer, 22, boundedAtomsSerialNumbers+2); |
| 501 |
|
|
boundedAtomsSerialNumbersValid[3] = readIntegerFromRecord(buffer, 27, boundedAtomsSerialNumbers+3); |
| 502 |
|
|
} |
| 503 |
|
|
|
| 504 |
tim |
741 |
// Now iterate over the VALID boundedAtomsSerialNumbers and connect |
| 505 |
|
|
// the atoms. |
| 506 |
|
|
for(unsigned int k=0; boundedAtomsSerialNumbersValid[k]; k++) |
| 507 |
gezelter |
1081 |
{ |
| 508 |
tim |
741 |
// Find atom that is connected to, write an error message |
| 509 |
|
|
OBAtom *connectedAtom = 0L; |
| 510 |
|
|
for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) |
| 511 |
gezelter |
1081 |
if (static_cast<long int>(a1->GetResidue()-> |
| 512 |
|
|
GetSerialNum(a1)) == boundedAtomsSerialNumbers[k]) |
| 513 |
tim |
741 |
{ |
| 514 |
gezelter |
1081 |
connectedAtom = a1; |
| 515 |
|
|
break; |
| 516 |
tim |
741 |
} |
| 517 |
|
|
if (connectedAtom == 0L) |
| 518 |
gezelter |
1081 |
{ |
| 519 |
tim |
741 |
errorMsg << "WARNING: Problems reading a PDB file:\n" |
| 520 |
gezelter |
1081 |
<< " Problems reading a CONECT record.\n" |
| 521 |
|
|
<< " According to the PDB specification,\n" |
| 522 |
|
|
<< " Atoms with serial #" << startAtomSerialNumber |
| 523 |
|
|
<< " and #" << boundedAtomsSerialNumbers[k] |
| 524 |
|
|
<< " should be connected\n" |
| 525 |
|
|
<< " However, an atom with serial #" << boundedAtomsSerialNumbers[k] << " was not found.\n" |
| 526 |
|
|
<< " THIS CONECT RECORD WILL BE IGNORED." << endl; |
| 527 |
|
|
obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning); |
| 528 |
|
|
return(false); |
| 529 |
|
|
} |
| 530 |
tim |
741 |
|
| 531 |
|
|
// Figure the bond order |
| 532 |
|
|
unsigned char order = 0; |
| 533 |
|
|
while(boundedAtomsSerialNumbersValid[k+order+1] && (boundedAtomsSerialNumbers[k+order] |
| 534 |
gezelter |
1081 |
== boundedAtomsSerialNumbers[k+order+1])) |
| 535 |
|
|
order++; |
| 536 |
tim |
741 |
k += order; |
| 537 |
gezelter |
1081 |
|
| 538 |
tim |
741 |
// Generate the bond |
| 539 |
|
|
mol.AddBond(firstAtom->GetIdx(), connectedAtom->GetIdx(), order+1); |
| 540 |
gezelter |
1081 |
} |
| 541 |
tim |
741 |
return(true); |
| 542 |
gezelter |
1081 |
} |
| 543 |
tim |
741 |
|
| 544 |
gezelter |
1081 |
////////////////////////////////////////////////////////////////////////////// |
| 545 |
|
|
bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) |
| 546 |
|
|
{ |
| 547 |
tim |
741 |
OBMol* pmol = dynamic_cast<OBMol*>(pOb); |
| 548 |
|
|
if(pmol==NULL) |
| 549 |
gezelter |
1081 |
return false; |
| 550 |
tim |
741 |
|
| 551 |
|
|
//Define some references so we can use the old parameter names |
| 552 |
|
|
ostream &ofs = *pConv->GetOutStream(); |
| 553 |
|
|
OBMol &mol = *pmol; |
| 554 |
|
|
|
| 555 |
|
|
unsigned int i; |
| 556 |
|
|
char buffer[BUFF_SIZE]; |
| 557 |
|
|
char type_name[10], padded_name[10]; |
| 558 |
|
|
char the_res[10]; |
| 559 |
|
|
char *element_name; |
| 560 |
|
|
int res_num; |
| 561 |
|
|
bool het=true; |
| 562 |
|
|
|
| 563 |
|
|
if (strlen(mol.GetTitle()) > 0) |
| 564 |
gezelter |
1081 |
snprintf(buffer, BUFF_SIZE, "COMPND %s ",mol.GetTitle()); |
| 565 |
tim |
741 |
else |
| 566 |
gezelter |
1081 |
snprintf(buffer, BUFF_SIZE, "COMPND UNNAMED"); |
| 567 |
tim |
741 |
ofs << buffer << endl; |
| 568 |
|
|
|
| 569 |
gezelter |
1081 |
snprintf(buffer, BUFF_SIZE, "AUTHOR GENERATED BY OPEN BABEL %s",BABEL_VERSION); |
| 570 |
tim |
741 |
ofs << buffer << endl; |
| 571 |
|
|
|
| 572 |
gezelter |
1081 |
// before we write any records, we should check to see if any coord < -1000 |
| 573 |
|
|
// which will cause errors in the formatting |
| 574 |
|
|
|
| 575 |
|
|
double minX, minY, minZ; |
| 576 |
|
|
minX = minY = minZ = -999.0f; |
| 577 |
|
|
FOR_ATOMS_OF_MOL(a, mol) |
| 578 |
|
|
{ |
| 579 |
|
|
if (a->GetX() < minX) |
| 580 |
|
|
minX = a->GetX(); |
| 581 |
|
|
if (a->GetY() < minY) |
| 582 |
|
|
minY = a->GetY(); |
| 583 |
|
|
if (a->GetZ() < minZ) |
| 584 |
|
|
minZ = a->GetZ(); |
| 585 |
|
|
} |
| 586 |
|
|
vector3 transV = VZero; |
| 587 |
|
|
if (minX < -999.0) |
| 588 |
|
|
transV.SetX(-1.0*minX - 900.0); |
| 589 |
|
|
if (minY < -999.0) |
| 590 |
|
|
transV.SetY(-1.0*minY - 900.0); |
| 591 |
|
|
if (minZ < -999.0) |
| 592 |
|
|
transV.SetZ(-1.0*minZ - 900.0); |
| 593 |
|
|
|
| 594 |
|
|
// if minX, minY, or minZ was never changed, shift will be 0.0f |
| 595 |
|
|
// otherwise, move enough so that smallest coord is > -999.0f |
| 596 |
|
|
mol.Translate(transV); |
| 597 |
|
|
|
| 598 |
tim |
741 |
OBAtom *atom; |
| 599 |
|
|
OBResidue *res; |
| 600 |
|
|
for (i = 1; i <= mol.NumAtoms(); i++) |
| 601 |
gezelter |
1081 |
{ |
| 602 |
tim |
741 |
atom = mol.GetAtom(i); |
| 603 |
gezelter |
1081 |
strncpy(type_name, etab.GetSymbol(atom->GetAtomicNum()), sizeof(type_name)); |
| 604 |
|
|
type_name[sizeof(type_name) - 1] = '\0'; |
| 605 |
tim |
741 |
|
| 606 |
|
|
//two char. elements are on position 13 and 14 one char. start at 14 |
| 607 |
|
|
if (strlen(type_name) > 1) |
| 608 |
gezelter |
1081 |
type_name[1] = toupper(type_name[1]); |
| 609 |
tim |
741 |
else |
| 610 |
gezelter |
1081 |
{ |
| 611 |
tim |
741 |
char tmp[10]; |
| 612 |
gezelter |
1081 |
strncpy(tmp, type_name, 10); |
| 613 |
|
|
snprintf(type_name, sizeof(type_name), " %-3s", tmp); |
| 614 |
|
|
} |
| 615 |
tim |
741 |
|
| 616 |
|
|
if ( (res = atom->GetResidue()) ) |
| 617 |
gezelter |
1081 |
{ |
| 618 |
tim |
741 |
het = res->IsHetAtom(atom); |
| 619 |
|
|
snprintf(the_res,4,"%s",(char*)res->GetName().c_str()); |
| 620 |
|
|
snprintf(type_name,5,"%s",(char*)res->GetAtomID(atom).c_str()); |
| 621 |
|
|
|
| 622 |
|
|
//two char. elements are on position 13 and 14 one char. start at 14 |
| 623 |
|
|
if (strlen(etab.GetSymbol(atom->GetAtomicNum())) == 1) |
| 624 |
gezelter |
1081 |
{ |
| 625 |
tim |
741 |
if (strlen(type_name) < 4) |
| 626 |
gezelter |
1081 |
{ |
| 627 |
tim |
741 |
char tmp[10]; |
| 628 |
|
|
strcpy(tmp, type_name); |
| 629 |
gezelter |
1081 |
snprintf(padded_name, sizeof(padded_name), " %-3s", tmp); |
| 630 |
tim |
741 |
strncpy(type_name,padded_name,4); |
| 631 |
|
|
type_name[4] = '\0'; |
| 632 |
gezelter |
1081 |
} |
| 633 |
tim |
741 |
else |
| 634 |
gezelter |
1081 |
{ |
| 635 |
tim |
741 |
type_name[4] = type_name[3]; |
| 636 |
|
|
type_name[3] = type_name[2]; |
| 637 |
|
|
type_name[2] = type_name[1]; |
| 638 |
|
|
type_name[1] = type_name[0]; |
| 639 |
|
|
type_name[0] = type_name[4]; |
| 640 |
|
|
type_name[4] = '\0'; |
| 641 |
gezelter |
1081 |
} |
| 642 |
|
|
} |
| 643 |
tim |
741 |
res_num = res->GetNum(); |
| 644 |
gezelter |
1081 |
} |
| 645 |
tim |
741 |
else |
| 646 |
gezelter |
1081 |
{ |
| 647 |
tim |
741 |
strcpy(the_res,"UNK"); |
| 648 |
gezelter |
1081 |
snprintf(padded_name,sizeof(padded_name), "%s",type_name); |
| 649 |
tim |
741 |
strncpy(type_name,padded_name,4); |
| 650 |
|
|
type_name[4] = '\0'; |
| 651 |
|
|
res_num = 1; |
| 652 |
gezelter |
1081 |
} |
| 653 |
tim |
741 |
|
| 654 |
|
|
element_name = etab.GetSymbol(atom->GetAtomicNum()); |
| 655 |
|
|
if (strlen(element_name) == 2) |
| 656 |
gezelter |
1081 |
element_name[1] = toupper(element_name[1]); |
| 657 |
|
|
snprintf(buffer, BUFF_SIZE, "%s%5d %-4s %-3s %4d %8.3f%8.3f%8.3f 1.00 0.00 %2s \n", |
| 658 |
tim |
741 |
het?"HETATM":"ATOM ", |
| 659 |
|
|
i, |
| 660 |
|
|
type_name, |
| 661 |
|
|
the_res, |
| 662 |
|
|
res_num, |
| 663 |
|
|
atom->GetX(), |
| 664 |
|
|
atom->GetY(), |
| 665 |
|
|
atom->GetZ(), |
| 666 |
|
|
element_name); |
| 667 |
|
|
ofs << buffer; |
| 668 |
gezelter |
1081 |
} |
| 669 |
tim |
741 |
|
| 670 |
|
|
OBAtom *nbr; |
| 671 |
|
|
int count; |
| 672 |
|
|
vector<OBEdgeBase*>::iterator k; |
| 673 |
|
|
for (i = 1; i <= mol.NumAtoms(); i ++) |
| 674 |
gezelter |
1081 |
{ |
| 675 |
tim |
741 |
atom = mol.GetAtom(i); |
| 676 |
|
|
if (atom->GetValence() <= 4) |
| 677 |
gezelter |
1081 |
{ |
| 678 |
|
|
snprintf(buffer, BUFF_SIZE, "CONECT%5d", i); |
| 679 |
tim |
741 |
ofs << buffer; |
| 680 |
|
|
for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k)) |
| 681 |
gezelter |
1081 |
{ |
| 682 |
|
|
snprintf(buffer, BUFF_SIZE, "%5d", nbr->GetIdx()); |
| 683 |
tim |
741 |
ofs << buffer; |
| 684 |
gezelter |
1081 |
} |
| 685 |
tim |
741 |
for (count = 0; count < (4 - (int)atom->GetValence()); count++) |
| 686 |
gezelter |
1081 |
{ |
| 687 |
|
|
snprintf(buffer, BUFF_SIZE, " "); |
| 688 |
tim |
741 |
ofs << buffer; |
| 689 |
gezelter |
1081 |
} |
| 690 |
tim |
741 |
ofs << " " << endl; |
| 691 |
gezelter |
1081 |
} |
| 692 |
|
|
} |
| 693 |
|
|
snprintf(buffer, BUFF_SIZE, "MASTER 0 0 0 0 0 0 0 0 "); |
| 694 |
tim |
741 |
ofs << buffer; |
| 695 |
gezelter |
1081 |
snprintf(buffer, BUFF_SIZE, "%4d 0 %4d 0\n",mol.NumAtoms(),mol.NumAtoms()); |
| 696 |
|
|
ofs << buffer; |
| 697 |
|
|
ofs << "END\n"; |
| 698 |
tim |
741 |
return(true); |
| 699 |
gezelter |
1081 |
} |
| 700 |
tim |
741 |
|
| 701 |
|
|
|
| 702 |
|
|
} //namespace OpenBabel |