13 |
|
GNU General Public License for more details. |
14 |
|
***********************************************************************/ |
15 |
|
|
16 |
< |
#include "pdbformat.hpp" |
16 |
> |
#include "config.h" |
17 |
> |
#include "mol.hpp" |
18 |
> |
#include "obconversion.hpp" |
19 |
> |
#include "obmolecformat.hpp" |
20 |
|
|
18 |
– |
|
19 |
– |
#if !HAVE_SNPRINTF |
20 |
– |
extern "C" int snprintf( char *, size_t, const char *, /* args */ ...); |
21 |
– |
#endif |
22 |
– |
|
21 |
|
#include <vector> |
22 |
|
#include <map> |
23 |
|
|
26 |
– |
#ifdef HAVE_SSTREAM |
24 |
|
#include <sstream> |
28 |
– |
#else |
29 |
– |
#include <strstream> |
30 |
– |
#endif |
25 |
|
|
26 |
|
using namespace std; |
27 |
|
namespace OpenBabel |
28 |
|
{ |
29 |
|
|
30 |
< |
static bool ParseAtomRecord(char *, OBMol &,int); |
31 |
< |
static bool ParseConectRecord(char *,OBMol &); |
30 |
> |
class PDBFormat : public OBMoleculeFormat |
31 |
> |
{ |
32 |
> |
public: |
33 |
> |
//Register this format type ID |
34 |
> |
PDBFormat() |
35 |
> |
{ |
36 |
> |
OBConversion::RegisterFormat("pdb",this, "chemical/x-pdb"); |
37 |
> |
OBConversion::RegisterFormat("ent",this, "chemical/x-pdb"); |
38 |
> |
} |
39 |
|
|
40 |
< |
//extern OBResidueData resdat; now in mol.h |
40 |
> |
virtual const char* Description() //required |
41 |
> |
{ |
42 |
> |
return |
43 |
> |
"Protein Data Bank format\n \ |
44 |
> |
Read Options e.g. -as\n\ |
45 |
> |
s Output single bonds only\n\ |
46 |
> |
b Disable bonding entirely\n\n"; |
47 |
> |
}; |
48 |
|
|
49 |
< |
///////////////////////////////////////////////////////////////// |
50 |
< |
bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) |
43 |
< |
{ |
49 |
> |
virtual const char* SpecificationURL() |
50 |
> |
{ return "http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html";}; |
51 |
|
|
52 |
+ |
virtual const char* GetMIMEType() |
53 |
+ |
{ return "chemical/x-pdb"; }; |
54 |
+ |
|
55 |
+ |
//Flags() can return be any the following combined by | or be omitted if none apply |
56 |
+ |
// NOTREADABLE READONEONLY NOTWRITABLE WRITEONEONLY |
57 |
+ |
virtual unsigned int Flags() |
58 |
+ |
{ |
59 |
+ |
return READONEONLY; |
60 |
+ |
}; |
61 |
+ |
|
62 |
+ |
//*** This section identical for most OBMol conversions *** |
63 |
+ |
//////////////////////////////////////////////////// |
64 |
+ |
/// The "API" interface functions |
65 |
+ |
virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv); |
66 |
+ |
virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); |
67 |
+ |
|
68 |
+ |
}; |
69 |
+ |
//*** |
70 |
+ |
|
71 |
+ |
//Make an instance of the format class |
72 |
+ |
PDBFormat thePDBFormat; |
73 |
+ |
|
74 |
+ |
///////////////////////////////////////////////////////////////// |
75 |
+ |
|
76 |
+ |
|
77 |
+ |
static bool ParseAtomRecord(char *, OBMol &,int); |
78 |
+ |
static bool ParseConectRecord(char *,OBMol &); |
79 |
+ |
|
80 |
+ |
//extern OBResidueData resdat; now in mol.h |
81 |
+ |
|
82 |
+ |
///////////////////////////////////////////////////////////////// |
83 |
+ |
bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) |
84 |
+ |
{ |
85 |
+ |
|
86 |
|
OBMol* pmol = dynamic_cast<OBMol*>(pOb); |
87 |
|
if(pmol==NULL) |
88 |
< |
return false; |
88 |
> |
return false; |
89 |
|
|
90 |
|
//Define some references so we can use the old parameter names |
91 |
|
istream &ifs = *pConv->GetInStream(); |
100 |
|
|
101 |
|
mol.BeginModify(); |
102 |
|
while (ifs.getline(buffer,BUFF_SIZE) && !EQn(buffer,"END",3)) |
103 |
< |
{ |
103 |
> |
{ |
104 |
|
if (EQn(buffer,"TER",3)) |
105 |
< |
chainNum++; |
105 |
> |
chainNum++; |
106 |
|
if (EQn(buffer,"ATOM",4) || EQn(buffer,"HETATM",6)) |
107 |
< |
{ |
107 |
> |
{ |
108 |
|
ParseAtomRecord(buffer,mol,chainNum); |
109 |
|
if (EQn(buffer,"ATOM",4)) |
110 |
< |
bs.SetBitOn(mol.NumAtoms()); |
111 |
< |
} |
110 |
> |
bs.SetBitOn(mol.NumAtoms()); |
111 |
> |
} |
112 |
|
|
113 |
|
if (EQn(buffer,"CONECT",6)) |
114 |
< |
ParseConectRecord(buffer,mol); |
115 |
< |
} |
114 |
> |
ParseConectRecord(buffer,mol); |
115 |
> |
} |
116 |
|
|
117 |
|
resdat.AssignBonds(mol,bs); |
118 |
|
/*assign hetatm bonds based on distance*/ |
120 |
|
if (!pConv->IsOption("b",OBConversion::INOPTIONS)) |
121 |
|
mol.ConnectTheDots(); |
122 |
|
|
82 |
– |
if (mol.NumAtoms() < 250) // Minimize time required on real proteins |
123 |
|
if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) |
124 |
< |
mol.PerceiveBondOrders(); |
124 |
> |
mol.PerceiveBondOrders(); |
125 |
|
|
126 |
|
// clean out remaining blank lines |
127 |
|
while(ifs.peek() != EOF && ifs.good() && |
128 |
< |
(ifs.peek() == '\n' || ifs.peek() == '\r')) |
128 |
> |
(ifs.peek() == '\n' || ifs.peek() == '\r')) |
129 |
|
ifs.getline(buffer,BUFF_SIZE); |
130 |
|
|
131 |
|
mol.EndModify(); |
134 |
|
atomtyper.AssignImplicitValence(mol); |
135 |
|
|
136 |
|
if (!mol.NumAtoms()) |
137 |
< |
return(false); |
137 |
> |
return(false); |
138 |
|
return(true); |
139 |
< |
} |
139 |
> |
} |
140 |
|
|
141 |
< |
//////////////////////////////////////////////////////////////// |
142 |
< |
static bool ParseAtomRecord(char *buffer, OBMol &mol,int chainNum) |
143 |
< |
/* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,1x,i3)" */ |
144 |
< |
{ |
141 |
> |
//////////////////////////////////////////////////////////////// |
142 |
> |
static bool ParseAtomRecord(char *buffer, OBMol &mol,int chainNum) |
143 |
> |
/* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,1x,i3)" */ |
144 |
> |
{ |
145 |
|
string sbuf = &buffer[6]; |
146 |
|
if (sbuf.size() < 48) |
147 |
< |
return(false); |
147 |
> |
return(false); |
148 |
|
|
149 |
|
bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false; |
150 |
|
|
158 |
|
/* element */ |
159 |
|
string element; |
160 |
|
if (sbuf.size() > 71) |
161 |
< |
element = sbuf.substr(70,2); |
161 |
> |
element = sbuf.substr(70,2); |
162 |
|
else |
163 |
< |
element = " "; |
163 |
> |
element = " "; |
164 |
|
|
165 |
|
//trim spaces on the right and left sides |
166 |
|
while (!atmid.empty() && atmid[0] == ' ') |
167 |
< |
atmid = atmid.substr(1,atmid.size()-1); |
167 |
> |
atmid = atmid.substr(1,atmid.size()-1); |
168 |
|
|
169 |
|
while (!atmid.empty() && atmid[atmid.size()-1] == ' ') |
170 |
< |
atmid = atmid.substr(0,atmid.size()-1); |
170 |
> |
atmid = atmid.substr(0,atmid.size()-1); |
171 |
|
|
172 |
|
/* residue name */ |
173 |
|
|
174 |
|
string resname = sbuf.substr(11,3); |
175 |
|
if (resname == " ") |
176 |
< |
resname = "UNK"; |
176 |
> |
resname = "UNK"; |
177 |
|
else |
178 |
< |
{ |
178 |
> |
{ |
179 |
|
while (!resname.empty() && resname[0] == ' ') |
180 |
< |
resname = resname.substr(1,resname.size()-1); |
180 |
> |
resname = resname.substr(1,resname.size()-1); |
181 |
|
|
182 |
|
while (!resname.empty() && resname[resname.size()-1] == ' ') |
183 |
< |
resname = resname.substr(0,resname.size()-1); |
184 |
< |
} |
183 |
> |
resname = resname.substr(0,resname.size()-1); |
184 |
> |
} |
185 |
|
|
186 |
|
/* residue sequence number */ |
187 |
|
|
195 |
|
string type; |
196 |
|
|
197 |
|
if (EQn(buffer,"ATOM",4)) |
198 |
< |
{ |
198 |
> |
{ |
199 |
|
type = atmid.substr(0,2); |
200 |
|
if (isdigit(type[0])) |
201 |
< |
type = atmid.substr(1,1); |
201 |
> |
type = atmid.substr(1,1); |
202 |
|
else if (sbuf[6] == ' ' && |
203 |
< |
strncasecmp(type.c_str(), "Zn", 2) != 0 && |
204 |
< |
strncasecmp(type.c_str(), "Fe", 2) != 0) |
205 |
< |
type = atmid.substr(0,1); // one-character element |
206 |
< |
|
203 |
> |
strncasecmp(type.c_str(), "Zn", 2) != 0 && |
204 |
> |
strncasecmp(type.c_str(), "Fe", 2) != 0) |
205 |
> |
type = atmid.substr(0,1); // one-character element |
206 |
> |
|
207 |
|
|
208 |
|
if (resname.substr(0,2) == "AS" || resname[0] == 'N') |
209 |
< |
{ |
209 |
> |
{ |
210 |
|
if (atmid == "AD1") |
211 |
< |
type = "O"; |
211 |
> |
type = "O"; |
212 |
|
if (atmid == "AD2") |
213 |
< |
type = "N"; |
214 |
< |
} |
213 |
> |
type = "N"; |
214 |
> |
} |
215 |
|
if (resname.substr(0,3) == "HIS" || resname[0] == 'H') |
216 |
< |
{ |
216 |
> |
{ |
217 |
|
if (atmid == "AD1" || atmid == "AE2") |
218 |
< |
type = "N"; |
218 |
> |
type = "N"; |
219 |
|
if (atmid == "AE1" || atmid == "AD2") |
220 |
< |
type = "C"; |
221 |
< |
} |
220 |
> |
type = "C"; |
221 |
> |
} |
222 |
|
if (resname.substr(0,2) == "GL" || resname[0] == 'Q') |
223 |
< |
{ |
223 |
> |
{ |
224 |
|
if (atmid == "AE1") |
225 |
< |
type = "O"; |
225 |
> |
type = "O"; |
226 |
|
if (atmid == "AE2") |
227 |
< |
type = "N"; |
228 |
< |
} |
229 |
< |
} |
227 |
> |
type = "N"; |
228 |
> |
} |
229 |
> |
} |
230 |
|
else //must be hetatm record |
231 |
< |
{ |
231 |
> |
{ |
232 |
|
if (isalpha(element[1]) && (isalpha(element[0]) || (element[0] == ' '))) |
233 |
< |
{ |
233 |
> |
{ |
234 |
|
if (isalpha(element[0])) |
235 |
< |
type = element.substr(0,2); |
235 |
> |
type = element.substr(0,2); |
236 |
|
else |
237 |
< |
type = element.substr(1,1); |
237 |
> |
type = element.substr(1,1); |
238 |
|
if (type.size() == 2) |
239 |
< |
type[1] = tolower(type[1]); |
240 |
< |
} |
239 |
> |
type[1] = tolower(type[1]); |
240 |
> |
} |
241 |
|
else |
242 |
< |
{ |
243 |
< |
if (isalpha(atmid[0])) |
244 |
< |
type = atmid.substr(0,2); |
242 |
> |
{ |
243 |
> |
if (isalpha(atmid[0])) |
244 |
> |
type = atmid.substr(0,2); |
245 |
|
else if (atmid[0] == ' ') |
246 |
< |
type = atmid.substr(1,1); // one char element |
246 |
> |
type = atmid.substr(1,1); // one char element |
247 |
|
else |
248 |
< |
type = atmid.substr(1,2); |
248 |
> |
type = atmid.substr(1,2); |
249 |
|
|
250 |
|
if (atmid == resname) |
251 |
< |
{ |
252 |
< |
type = atmid; |
251 |
> |
{ |
252 |
> |
type = atmid; |
253 |
|
if (type.size() == 2) |
254 |
< |
type[1] = tolower(type[1]); |
255 |
< |
} |
254 |
> |
type[1] = tolower(type[1]); |
255 |
> |
} |
256 |
|
else |
257 |
< |
if (resname == "ADR" || resname == "COA" || resname == "FAD" || |
258 |
< |
resname == "GPG" || resname == "NAD" || resname == "NAL" || |
259 |
< |
resname == "NDP") |
260 |
< |
{ |
261 |
< |
if (type.size() > 1) |
262 |
< |
type = type.substr(0,1); |
263 |
< |
//type.erase(1,type.size()-1); |
257 |
> |
if (resname == "ADR" || resname == "COA" || resname == "FAD" || |
258 |
> |
resname == "GPG" || resname == "NAD" || resname == "NAL" || |
259 |
> |
resname == "NDP") |
260 |
> |
{ |
261 |
> |
if (type.size() > 1) |
262 |
> |
type = type.substr(0,1); |
263 |
> |
//type.erase(1,type.size()-1); |
264 |
|
} |
265 |
< |
else |
266 |
< |
if (isdigit(type[0])) |
267 |
< |
{ |
268 |
< |
type = type.substr(1,1); |
269 |
< |
//type.erase(0,1); |
270 |
< |
//if (type.size() > 1) type.erase(1,type.size()-1); |
271 |
< |
} |
272 |
< |
else |
273 |
< |
if (type.size() > 1 && isdigit(type[1])) |
274 |
< |
type = type.substr(0,1); |
265 |
> |
else |
266 |
> |
if (isdigit(type[0])) |
267 |
> |
{ |
268 |
> |
type = type.substr(1,1); |
269 |
> |
//type.erase(0,1); |
270 |
> |
//if (type.size() > 1) type.erase(1,type.size()-1); |
271 |
> |
} |
272 |
> |
else |
273 |
> |
if (type.size() > 1 && isdigit(type[1])) |
274 |
> |
type = type.substr(0,1); |
275 |
|
//type.erase(1,1); |
276 |
< |
else |
277 |
< |
if (type.size() > 1 && isalpha(type[1]) && isupper(type[1])) |
278 |
< |
type[1] = tolower(type[1]); |
279 |
< |
} |
280 |
< |
|
281 |
< |
} |
276 |
> |
else |
277 |
> |
if (type.size() > 1 && isalpha(type[1]) && isupper(type[1])) |
278 |
> |
type[1] = tolower(type[1]); |
279 |
> |
} |
280 |
> |
|
281 |
> |
} |
282 |
|
|
283 |
|
OBAtom atom; |
284 |
|
vector3 v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str())); |
290 |
|
int rnum = atoi(resnum.c_str()); |
291 |
|
OBResidue *res = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL; |
292 |
|
if (res == NULL || res->GetName() != resname || static_cast<int>(res->GetNum()) |
293 |
< |
!= rnum) |
294 |
< |
{ |
293 |
> |
!= rnum) |
294 |
> |
{ |
295 |
|
vector<OBResidue*>::iterator ri; |
296 |
|
for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri)) |
297 |
< |
if (res->GetName() == resname && static_cast<int>(res->GetNum()) |
298 |
< |
== rnum) |
299 |
< |
break; |
297 |
> |
if (res->GetName() == resname && static_cast<int>(res->GetNum()) |
298 |
> |
== rnum) |
299 |
> |
break; |
300 |
|
|
301 |
|
if (res == NULL) |
302 |
< |
{ |
302 |
> |
{ |
303 |
|
res = mol.NewResidue() |
304 |
< |
; |
304 |
> |
; |
305 |
|
res->SetChainNum(chainNum); |
306 |
|
res->SetName(resname); |
307 |
|
res->SetNum(rnum); |
308 |
< |
} |
309 |
< |
} |
308 |
> |
} |
309 |
> |
} |
310 |
|
|
311 |
|
if (!mol.AddAtom(atom) |
312 |
< |
) |
313 |
< |
return(false); |
312 |
> |
) |
313 |
> |
return(false); |
314 |
|
else |
315 |
< |
{ |
315 |
> |
{ |
316 |
|
OBAtom *atom = mol.GetAtom(mol.NumAtoms()); |
317 |
|
|
318 |
|
res->AddAtom(atom); |
321 |
|
res->SetHetAtom(atom, hetatm); |
322 |
|
|
323 |
|
return(true); |
324 |
< |
} |
325 |
< |
} |
324 |
> |
} |
325 |
> |
} |
326 |
|
|
327 |
< |
///////////////////////////////////////////////////////////////////////// |
328 |
< |
//! Utility function to read a 5-digit integer starting from a specified column |
329 |
< |
/*! This function reads a 5-digit integer, starting from column |
330 |
< |
columnAsSpecifiedInPDB from the buffer, converts it to a long |
331 |
< |
integer, and returns either false or true, if the conversion was |
332 |
< |
successful or not. If the conversion was not successful, the target |
333 |
< |
is set to a random value. |
327 |
> |
///////////////////////////////////////////////////////////////////////// |
328 |
> |
//! Utility function to read a 5-digit integer starting from a specified column |
329 |
> |
/*! This function reads a 5-digit integer, starting from column |
330 |
> |
columnAsSpecifiedInPDB from the buffer, converts it to a long |
331 |
> |
integer, and returns either false or true, if the conversion was |
332 |
> |
successful or not. If the conversion was not successful, the target |
333 |
> |
is set to a random value. |
334 |
|
|
335 |
< |
For instance, the PDB Format Description for a CONECT record specifies |
335 |
> |
For instance, the PDB Format Description for a CONECT record specifies |
336 |
|
|
337 |
< |
COLUMNS DATA TYPE FIELD DEFINITION |
338 |
< |
--------------------------------------------------------------------------------- |
339 |
< |
1 - 6 Record name "CONECT" |
340 |
< |
7 - 11 Integer serial Atom serial number |
341 |
< |
... |
337 |
> |
COLUMNS DATA TYPE FIELD DEFINITION |
338 |
> |
--------------------------------------------------------------------------------- |
339 |
> |
1 - 6 Record name "CONECT" |
340 |
> |
7 - 11 Integer serial Atom serial number |
341 |
> |
... |
342 |
|
|
343 |
< |
To read the Atom serial number, you would call |
343 |
> |
To read the Atom serial number, you would call |
344 |
|
|
345 |
< |
long int target; |
346 |
< |
if ( readIntegerFromRecord(buffer, 7, &target) == false ) { |
345 |
> |
long int target; |
346 |
> |
if ( readIntegerFromRecord(buffer, 7, &target) == false ) { |
347 |
|
cerr << "Could not parse" << endl; |
348 |
< |
} |
348 |
> |
} |
349 |
|
|
350 |
< |
This function does not check the length of the buffer, or |
351 |
< |
strlen(buffer). If the buffer is not long enough => SEGFAULT. |
352 |
< |
*/ |
353 |
< |
static bool readIntegerFromRecord(char *buffer, unsigned int columnAsSpecifiedInPDB, long int *target) |
354 |
< |
{ |
350 |
> |
This function does not check the length of the buffer, or |
351 |
> |
strlen(buffer). If the buffer is not long enough => SEGFAULT. |
352 |
> |
*/ |
353 |
> |
static bool readIntegerFromRecord(char *buffer, unsigned int columnAsSpecifiedInPDB, long int *target) |
354 |
> |
{ |
355 |
|
char integerBuffer[6]; |
356 |
< |
integerBuffer[5] = 0; |
356 |
> |
integerBuffer[5] = '\0'; |
357 |
|
|
358 |
|
strncpy(integerBuffer, buffer+columnAsSpecifiedInPDB-1, 5); |
359 |
|
|
360 |
|
char *errorCheckingEndPtr; |
361 |
|
*target = strtol(integerBuffer, &errorCheckingEndPtr, 10); |
362 |
|
if (integerBuffer == errorCheckingEndPtr) |
363 |
< |
return(false); |
363 |
> |
return(false); |
364 |
|
return(true); |
365 |
< |
} |
365 |
> |
} |
366 |
|
|
367 |
< |
//! Read a CONECT record |
368 |
< |
/*! This function reads a CONECT record, as specified |
369 |
< |
http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html, |
370 |
< |
in short: |
367 |
> |
//! Read a CONECT record |
368 |
> |
/*! This function reads a CONECT record, as specified |
369 |
> |
http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html, |
370 |
> |
in short: |
371 |
|
|
372 |
< |
COLUMNS DATA TYPE FIELD DEFINITION |
373 |
< |
--------------------------------------------------------------------------------- |
374 |
< |
1 - 6 Record name "CONECT" |
375 |
< |
7 - 11 Integer serial Atom serial number |
376 |
< |
12 - 16 Integer serial Serial number of bonded atom |
377 |
< |
17 - 21 Integer serial Serial number of bonded atom |
378 |
< |
22 - 26 Integer serial Serial number of bonded atom |
379 |
< |
27 - 31 Integer serial Serial number of bonded atom |
380 |
< |
32 - 36 Integer serial Serial number of hydrogen bonded atom |
381 |
< |
37 - 41 Integer serial Serial number of hydrogen bonded atom |
382 |
< |
42 - 46 Integer serial Serial number of salt bridged atom |
383 |
< |
47 - 51 Integer serial Serial number of hydrogen bonded atom |
384 |
< |
52 - 56 Integer serial Serial number of hydrogen bonded atom |
385 |
< |
57 - 61 Integer serial Serial number of salt bridged atom |
372 |
> |
COLUMNS DATA TYPE FIELD DEFINITION |
373 |
> |
--------------------------------------------------------------------------------- |
374 |
> |
1 - 6 Record name "CONECT" |
375 |
> |
7 - 11 Integer serial Atom serial number |
376 |
> |
12 - 16 Integer serial Serial number of bonded atom |
377 |
> |
17 - 21 Integer serial Serial number of bonded atom |
378 |
> |
22 - 26 Integer serial Serial number of bonded atom |
379 |
> |
27 - 31 Integer serial Serial number of bonded atom |
380 |
> |
32 - 36 Integer serial Serial number of hydrogen bonded atom |
381 |
> |
37 - 41 Integer serial Serial number of hydrogen bonded atom |
382 |
> |
42 - 46 Integer serial Serial number of salt bridged atom |
383 |
> |
47 - 51 Integer serial Serial number of hydrogen bonded atom |
384 |
> |
52 - 56 Integer serial Serial number of hydrogen bonded atom |
385 |
> |
57 - 61 Integer serial Serial number of salt bridged atom |
386 |
|
|
387 |
< |
Hydrogen bonds and salt bridges are ignored. --Stefan Kebekus. |
388 |
< |
*/ |
389 |
< |
|
390 |
< |
static bool ParseConectRecord(char *buffer,OBMol &mol) |
391 |
< |
{ |
352 |
< |
#ifdef HAVE_SSTREAM |
387 |
> |
Hydrogen bonds and salt bridges are ignored. --Stefan Kebekus. |
388 |
> |
*/ |
389 |
> |
|
390 |
> |
static bool ParseConectRecord(char *buffer,OBMol &mol) |
391 |
> |
{ |
392 |
|
stringstream errorMsg; |
393 |
< |
#else |
355 |
< |
strstream errorMsg; |
356 |
< |
#endif |
393 |
> |
string clearError; |
394 |
|
|
395 |
|
// Setup strings and string buffers |
396 |
+ |
vector<string> vs; |
397 |
|
buffer[70] = '\0'; |
398 |
|
if (strlen(buffer) < 70) |
399 |
< |
{ |
399 |
> |
{ |
400 |
|
errorMsg << "WARNING: Problems reading a PDB file\n" |
401 |
< |
<< " Problems reading a CONECT record.\n" |
402 |
< |
<< " According to the PDB specification,\n" |
403 |
< |
<< " the record should have 70 columns, but OpenBabel found " |
404 |
< |
<< strlen(buffer) << " columns."; |
405 |
< |
obErrorLog.ThrowError(__func__, errorMsg.str() , obInfo); |
406 |
< |
} |
401 |
> |
<< " Problems reading a CONECT record.\n" |
402 |
> |
<< " According to the PDB specification,\n" |
403 |
> |
<< " the record should have 70 columns, but OpenBabel found " |
404 |
> |
<< strlen(buffer) << " columns." << endl; |
405 |
> |
obErrorLog.ThrowError(__func__, errorMsg.str() , obInfo); |
406 |
> |
errorMsg.str(clearError); |
407 |
> |
} |
408 |
|
|
409 |
|
// Serial number of the first atom, read from column 7-11 of the |
410 |
|
// connect record, to which the other atoms connect to. |
411 |
|
long int startAtomSerialNumber; |
412 |
< |
if (readIntegerFromRecord(buffer, 7, &startAtomSerialNumber) == false) |
413 |
< |
{ |
375 |
< |
errorMsg << "WARNING: Problems reading a PDB file\n" |
376 |
< |
<< " Problems reading a CONECT record.\n" |
377 |
< |
<< " According to the PDB specification,\n" |
378 |
< |
<< " columns 7-11 should contain the serial number of an atom.\n" |
379 |
< |
<< " THIS CONECT RECORD WILL BE IGNORED."; |
380 |
< |
obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning); |
381 |
< |
return(false); |
382 |
< |
} |
383 |
< |
|
384 |
< |
// Find a pointer to the first atom. |
385 |
< |
OBAtom *firstAtom = 0L; |
386 |
< |
vector<OBNodeBase*>::iterator i; |
387 |
< |
for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) |
388 |
< |
if (static_cast<long int>(a1->GetResidue()-> |
389 |
< |
GetSerialNum(a1)) == startAtomSerialNumber) |
390 |
< |
{ |
391 |
< |
firstAtom = a1; |
392 |
< |
break; |
393 |
< |
} |
394 |
< |
if (firstAtom == 0L) |
395 |
< |
{ |
396 |
< |
errorMsg << "WARNING: Problems reading a PDB file:\n" |
397 |
< |
<< " Problems reading a CONECT record.\n" |
398 |
< |
<< " According to the PDB specification,\n" |
399 |
< |
<< " columns 7-11 should contain the serial number of an atom.\n" |
400 |
< |
<< " No atom was found with this serial number.\n" |
401 |
< |
<< " THIS CONECT RECORD WILL BE IGNORED."; |
402 |
< |
obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning); |
403 |
< |
return(false); |
404 |
< |
} |
405 |
< |
|
412 |
> |
// A pointer to the first atom. |
413 |
> |
OBAtom *firstAtom = NULL; |
414 |
|
// Serial numbers of the atoms which bind to firstAtom, read from |
415 |
|
// columns 12-16, 17-21, 22-27 and 27-31 of the connect record. Note |
416 |
|
// that we reserve space for 5 integers, but read only four of |
422 |
|
// invalid |
423 |
|
bool boundedAtomsSerialNumbersValid[5] = {false, false, false, false, false}; |
424 |
|
|
425 |
< |
// Now read the serial numbers. If the first serial number is not |
426 |
< |
// present, this connect record probably contains only hydrogen |
427 |
< |
// bonds and salt bridges, which we ignore. In that case, we just |
428 |
< |
// exit gracefully. |
429 |
< |
boundedAtomsSerialNumbersValid[0] = readIntegerFromRecord(buffer, 12, boundedAtomsSerialNumbers+0); |
422 |
< |
if (boundedAtomsSerialNumbersValid[0] == false) |
423 |
< |
return(true); |
424 |
< |
boundedAtomsSerialNumbersValid[1] = readIntegerFromRecord(buffer, 17, boundedAtomsSerialNumbers+1); |
425 |
< |
boundedAtomsSerialNumbersValid[2] = readIntegerFromRecord(buffer, 22, boundedAtomsSerialNumbers+2); |
426 |
< |
boundedAtomsSerialNumbersValid[3] = readIntegerFromRecord(buffer, 27, boundedAtomsSerialNumbers+3); |
425 |
> |
// Pragmatic approach -- too many non-standard PDB files out there |
426 |
> |
// (including some old ones from us) |
427 |
> |
// So if we have a small number of atoms, then try to break by spaces |
428 |
> |
// Otherwise (i.e., NumAtoms() > 9,999 we need to go by position) |
429 |
> |
// We'll switch back and forth a few times to save duplicating common code |
430 |
|
|
431 |
+ |
if (mol.NumAtoms() <= 9999) |
432 |
+ |
{ |
433 |
+ |
// make sure we don't look at salt bridges or whatever, so cut the buffer short |
434 |
+ |
buffer[32] = '\0'; |
435 |
+ |
tokenize(vs,buffer); |
436 |
+ |
if( vs.empty() || vs.size() < 2) |
437 |
+ |
return false; |
438 |
+ |
vs.erase(vs.begin()); // remove "CONECT" |
439 |
+ |
|
440 |
+ |
startAtomSerialNumber = atoi(vs[0].c_str()); |
441 |
+ |
} |
442 |
+ |
else |
443 |
+ |
{ |
444 |
+ |
if (readIntegerFromRecord(buffer, 7, &startAtomSerialNumber) == false) |
445 |
+ |
{ |
446 |
+ |
errorMsg << "WARNING: Problems reading a PDB file\n" |
447 |
+ |
<< " Problems reading a CONECT record.\n" |
448 |
+ |
<< " According to the PDB specification,\n" |
449 |
+ |
<< " columns 7-11 should contain the serial number of an atom.\n" |
450 |
+ |
<< " THIS CONECT RECORD WILL BE IGNORED." << endl; |
451 |
+ |
obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning); |
452 |
+ |
return(false); |
453 |
+ |
} |
454 |
+ |
} |
455 |
+ |
|
456 |
+ |
vector<OBNodeBase*>::iterator i; |
457 |
+ |
for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) |
458 |
+ |
if (static_cast<long int>(a1->GetResidue()-> |
459 |
+ |
GetSerialNum(a1)) == startAtomSerialNumber) |
460 |
+ |
{ |
461 |
+ |
firstAtom = a1; |
462 |
+ |
break; |
463 |
+ |
} |
464 |
+ |
if (firstAtom == NULL) |
465 |
+ |
{ |
466 |
+ |
errorMsg << "WARNING: Problems reading a PDB file:\n" |
467 |
+ |
<< " Problems reading a CONECT record.\n" |
468 |
+ |
<< " According to the PDB specification,\n" |
469 |
+ |
<< " columns 7-11 should contain the serial number of an atom.\n" |
470 |
+ |
<< " No atom was found with this serial number.\n" |
471 |
+ |
<< " THIS CONECT RECORD WILL BE IGNORED." << endl; |
472 |
+ |
obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning); |
473 |
+ |
return(false); |
474 |
+ |
} |
475 |
+ |
|
476 |
+ |
if (mol.NumAtoms() < 9999) |
477 |
+ |
{ |
478 |
+ |
if (vs.size() > 1) boundedAtomsSerialNumbers[0] = atoi(vs[1].c_str()); |
479 |
+ |
if (vs.size() > 2) boundedAtomsSerialNumbers[1] = atoi(vs[2].c_str()); |
480 |
+ |
if (vs.size() > 3) boundedAtomsSerialNumbers[2] = atoi(vs[3].c_str()); |
481 |
+ |
if (vs.size() > 4) boundedAtomsSerialNumbers[3] = atoi(vs[4].c_str()); |
482 |
+ |
|
483 |
+ |
unsigned int limit = 4; |
484 |
+ |
if (vs.size() <= 4) |
485 |
+ |
limit = vs.size() - 1; |
486 |
+ |
|
487 |
+ |
for (unsigned int i = 0; i < limit; i++) |
488 |
+ |
boundedAtomsSerialNumbersValid[i] = true; |
489 |
+ |
} |
490 |
+ |
else |
491 |
+ |
{ |
492 |
+ |
// Now read the serial numbers. If the first serial number is not |
493 |
+ |
// present, this connect record probably contains only hydrogen |
494 |
+ |
// bonds and salt bridges, which we ignore. In that case, we just |
495 |
+ |
// exit gracefully. |
496 |
+ |
boundedAtomsSerialNumbersValid[0] = readIntegerFromRecord(buffer, 12, boundedAtomsSerialNumbers+0); |
497 |
+ |
if (boundedAtomsSerialNumbersValid[0] == false) |
498 |
+ |
return(true); |
499 |
+ |
boundedAtomsSerialNumbersValid[1] = readIntegerFromRecord(buffer, 17, boundedAtomsSerialNumbers+1); |
500 |
+ |
boundedAtomsSerialNumbersValid[2] = readIntegerFromRecord(buffer, 22, boundedAtomsSerialNumbers+2); |
501 |
+ |
boundedAtomsSerialNumbersValid[3] = readIntegerFromRecord(buffer, 27, boundedAtomsSerialNumbers+3); |
502 |
+ |
} |
503 |
+ |
|
504 |
|
// Now iterate over the VALID boundedAtomsSerialNumbers and connect |
505 |
|
// the atoms. |
506 |
|
for(unsigned int k=0; boundedAtomsSerialNumbersValid[k]; k++) |
507 |
< |
{ |
507 |
> |
{ |
508 |
|
// Find atom that is connected to, write an error message |
509 |
|
OBAtom *connectedAtom = 0L; |
510 |
|
for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) |
511 |
< |
if (static_cast<long int>(a1->GetResidue()-> |
512 |
< |
GetSerialNum(a1)) == boundedAtomsSerialNumbers[k]) |
511 |
> |
if (static_cast<long int>(a1->GetResidue()-> |
512 |
> |
GetSerialNum(a1)) == boundedAtomsSerialNumbers[k]) |
513 |
|
{ |
514 |
< |
connectedAtom = a1; |
515 |
< |
break; |
514 |
> |
connectedAtom = a1; |
515 |
> |
break; |
516 |
|
} |
517 |
|
if (connectedAtom == 0L) |
518 |
< |
{ |
518 |
> |
{ |
519 |
|
errorMsg << "WARNING: Problems reading a PDB file:\n" |
520 |
< |
<< " Problems reading a CONECT record.\n" |
521 |
< |
<< " According to the PDB specification,\n" |
522 |
< |
<< " Atoms with serial #" << startAtomSerialNumber |
523 |
< |
<< " and #" << boundedAtomsSerialNumbers[k] |
524 |
< |
<< " should be connected\n" |
525 |
< |
<< " However, an atom with serial #" << boundedAtomsSerialNumbers[k] << " was not found.\n" |
526 |
< |
<< " THIS CONECT RECORD WILL BE IGNORED."; |
527 |
< |
obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning); |
528 |
< |
break; |
529 |
< |
} |
520 |
> |
<< " Problems reading a CONECT record.\n" |
521 |
> |
<< " According to the PDB specification,\n" |
522 |
> |
<< " Atoms with serial #" << startAtomSerialNumber |
523 |
> |
<< " and #" << boundedAtomsSerialNumbers[k] |
524 |
> |
<< " should be connected\n" |
525 |
> |
<< " However, an atom with serial #" << boundedAtomsSerialNumbers[k] << " was not found.\n" |
526 |
> |
<< " THIS CONECT RECORD WILL BE IGNORED." << endl; |
527 |
> |
obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning); |
528 |
> |
return(false); |
529 |
> |
} |
530 |
|
|
531 |
|
// Figure the bond order |
532 |
|
unsigned char order = 0; |
533 |
|
while(boundedAtomsSerialNumbersValid[k+order+1] && (boundedAtomsSerialNumbers[k+order] |
534 |
< |
== boundedAtomsSerialNumbers[k+order+1])) |
535 |
< |
order++; |
534 |
> |
== boundedAtomsSerialNumbers[k+order+1])) |
535 |
> |
order++; |
536 |
|
k += order; |
537 |
< |
|
537 |
> |
|
538 |
|
// Generate the bond |
539 |
|
mol.AddBond(firstAtom->GetIdx(), connectedAtom->GetIdx(), order+1); |
540 |
< |
} |
540 |
> |
} |
541 |
|
return(true); |
542 |
< |
} |
542 |
> |
} |
543 |
|
|
544 |
< |
////////////////////////////////////////////////////////////////////////////// |
545 |
< |
bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) |
546 |
< |
{ |
544 |
> |
////////////////////////////////////////////////////////////////////////////// |
545 |
> |
bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) |
546 |
> |
{ |
547 |
|
OBMol* pmol = dynamic_cast<OBMol*>(pOb); |
548 |
|
if(pmol==NULL) |
549 |
< |
return false; |
549 |
> |
return false; |
550 |
|
|
551 |
|
//Define some references so we can use the old parameter names |
552 |
|
ostream &ofs = *pConv->GetOutStream(); |
560 |
|
int res_num; |
561 |
|
bool het=true; |
562 |
|
|
487 |
– |
// sprintf(buffer,"HEADER PROTEIN"); |
488 |
– |
// ofs << buffer << endl; |
489 |
– |
|
563 |
|
if (strlen(mol.GetTitle()) > 0) |
564 |
< |
sprintf(buffer,"COMPND %s ",mol.GetTitle()); |
564 |
> |
snprintf(buffer, BUFF_SIZE, "COMPND %s ",mol.GetTitle()); |
565 |
|
else |
566 |
< |
sprintf(buffer,"COMPND UNNAMED"); |
566 |
> |
snprintf(buffer, BUFF_SIZE, "COMPND UNNAMED"); |
567 |
|
ofs << buffer << endl; |
568 |
|
|
569 |
< |
sprintf(buffer,"AUTHOR GENERATED BY OPEN BABEL %s",BABEL_VERSION); |
569 |
> |
snprintf(buffer, BUFF_SIZE, "AUTHOR GENERATED BY OPEN BABEL %s",BABEL_VERSION); |
570 |
|
ofs << buffer << endl; |
571 |
|
|
572 |
+ |
// before we write any records, we should check to see if any coord < -1000 |
573 |
+ |
// which will cause errors in the formatting |
574 |
+ |
|
575 |
+ |
double minX, minY, minZ; |
576 |
+ |
minX = minY = minZ = -999.0f; |
577 |
+ |
FOR_ATOMS_OF_MOL(a, mol) |
578 |
+ |
{ |
579 |
+ |
if (a->GetX() < minX) |
580 |
+ |
minX = a->GetX(); |
581 |
+ |
if (a->GetY() < minY) |
582 |
+ |
minY = a->GetY(); |
583 |
+ |
if (a->GetZ() < minZ) |
584 |
+ |
minZ = a->GetZ(); |
585 |
+ |
} |
586 |
+ |
vector3 transV = VZero; |
587 |
+ |
if (minX < -999.0) |
588 |
+ |
transV.SetX(-1.0*minX - 900.0); |
589 |
+ |
if (minY < -999.0) |
590 |
+ |
transV.SetY(-1.0*minY - 900.0); |
591 |
+ |
if (minZ < -999.0) |
592 |
+ |
transV.SetZ(-1.0*minZ - 900.0); |
593 |
+ |
|
594 |
+ |
// if minX, minY, or minZ was never changed, shift will be 0.0f |
595 |
+ |
// otherwise, move enough so that smallest coord is > -999.0f |
596 |
+ |
mol.Translate(transV); |
597 |
+ |
|
598 |
|
OBAtom *atom; |
599 |
|
OBResidue *res; |
600 |
|
for (i = 1; i <= mol.NumAtoms(); i++) |
601 |
< |
{ |
601 |
> |
{ |
602 |
|
atom = mol.GetAtom(i); |
603 |
< |
strcpy(type_name,etab.GetSymbol(atom->GetAtomicNum())); |
603 |
> |
strncpy(type_name, etab.GetSymbol(atom->GetAtomicNum()), sizeof(type_name)); |
604 |
> |
type_name[sizeof(type_name) - 1] = '\0'; |
605 |
|
|
606 |
|
//two char. elements are on position 13 and 14 one char. start at 14 |
607 |
|
if (strlen(type_name) > 1) |
608 |
< |
type_name[1] = toupper(type_name[1]); |
608 |
> |
type_name[1] = toupper(type_name[1]); |
609 |
|
else |
610 |
< |
{ |
610 |
> |
{ |
611 |
|
char tmp[10]; |
612 |
< |
strcpy(tmp, type_name); |
613 |
< |
sprintf(type_name, " %-3s", tmp); |
614 |
< |
} |
612 |
> |
strncpy(tmp, type_name, 10); |
613 |
> |
snprintf(type_name, sizeof(type_name), " %-3s", tmp); |
614 |
> |
} |
615 |
|
|
616 |
|
if ( (res = atom->GetResidue()) ) |
617 |
< |
{ |
617 |
> |
{ |
618 |
|
het = res->IsHetAtom(atom); |
619 |
|
snprintf(the_res,4,"%s",(char*)res->GetName().c_str()); |
620 |
|
snprintf(type_name,5,"%s",(char*)res->GetAtomID(atom).c_str()); |
621 |
|
|
622 |
|
//two char. elements are on position 13 and 14 one char. start at 14 |
623 |
|
if (strlen(etab.GetSymbol(atom->GetAtomicNum())) == 1) |
624 |
< |
{ |
624 |
> |
{ |
625 |
|
if (strlen(type_name) < 4) |
626 |
< |
{ |
626 |
> |
{ |
627 |
|
char tmp[10]; |
628 |
|
strcpy(tmp, type_name); |
629 |
< |
sprintf(padded_name," %-3s", tmp); |
629 |
> |
snprintf(padded_name, sizeof(padded_name), " %-3s", tmp); |
630 |
|
strncpy(type_name,padded_name,4); |
631 |
|
type_name[4] = '\0'; |
632 |
< |
} |
632 |
> |
} |
633 |
|
else |
634 |
< |
{ |
634 |
> |
{ |
635 |
|
type_name[4] = type_name[3]; |
636 |
|
type_name[3] = type_name[2]; |
637 |
|
type_name[2] = type_name[1]; |
638 |
|
type_name[1] = type_name[0]; |
639 |
|
type_name[0] = type_name[4]; |
640 |
|
type_name[4] = '\0'; |
641 |
< |
} |
642 |
< |
} |
641 |
> |
} |
642 |
> |
} |
643 |
|
res_num = res->GetNum(); |
644 |
< |
} |
644 |
> |
} |
645 |
|
else |
646 |
< |
{ |
646 |
> |
{ |
647 |
|
strcpy(the_res,"UNK"); |
648 |
< |
sprintf(padded_name,"%s",type_name); |
648 |
> |
snprintf(padded_name,sizeof(padded_name), "%s",type_name); |
649 |
|
strncpy(type_name,padded_name,4); |
650 |
|
type_name[4] = '\0'; |
651 |
|
res_num = 1; |
652 |
< |
} |
652 |
> |
} |
653 |
|
|
654 |
|
element_name = etab.GetSymbol(atom->GetAtomicNum()); |
655 |
|
if (strlen(element_name) == 2) |
656 |
< |
element_name[1] = toupper(element_name[1]); |
657 |
< |
sprintf(buffer,"%s%5d %-4s %-3s %4d %8.3f%8.3f%8.3f 1.00 0.00 %2s \n", |
656 |
> |
element_name[1] = toupper(element_name[1]); |
657 |
> |
snprintf(buffer, BUFF_SIZE, "%s%5d %-4s %-3s %4d %8.3f%8.3f%8.3f 1.00 0.00 %2s \n", |
658 |
|
het?"HETATM":"ATOM ", |
659 |
|
i, |
660 |
|
type_name, |
665 |
|
atom->GetZ(), |
666 |
|
element_name); |
667 |
|
ofs << buffer; |
668 |
< |
} |
668 |
> |
} |
669 |
|
|
670 |
|
OBAtom *nbr; |
671 |
|
int count; |
672 |
|
vector<OBEdgeBase*>::iterator k; |
673 |
|
for (i = 1; i <= mol.NumAtoms(); i ++) |
674 |
< |
{ |
674 |
> |
{ |
675 |
|
atom = mol.GetAtom(i); |
676 |
|
if (atom->GetValence() <= 4) |
677 |
< |
{ |
678 |
< |
sprintf(buffer,"CONECT%5d", i); |
677 |
> |
{ |
678 |
> |
snprintf(buffer, BUFF_SIZE, "CONECT%5d", i); |
679 |
|
ofs << buffer; |
680 |
|
for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k)) |
681 |
< |
{ |
682 |
< |
sprintf(buffer,"%5d", nbr->GetIdx()); |
681 |
> |
{ |
682 |
> |
snprintf(buffer, BUFF_SIZE, "%5d", nbr->GetIdx()); |
683 |
|
ofs << buffer; |
684 |
< |
} |
684 |
> |
} |
685 |
|
for (count = 0; count < (4 - (int)atom->GetValence()); count++) |
686 |
< |
{ |
687 |
< |
sprintf(buffer, " "); |
686 |
> |
{ |
687 |
> |
snprintf(buffer, BUFF_SIZE, " "); |
688 |
|
ofs << buffer; |
689 |
< |
} |
689 |
> |
} |
690 |
|
ofs << " " << endl; |
691 |
< |
} |
692 |
< |
} |
693 |
< |
sprintf(buffer,"MASTER 0 0 0 0 0 0 0 0 "); |
691 |
> |
} |
692 |
> |
} |
693 |
> |
snprintf(buffer, BUFF_SIZE, "MASTER 0 0 0 0 0 0 0 0 "); |
694 |
|
ofs << buffer; |
695 |
< |
sprintf(buffer,"%4d 0 %4d 0",mol.NumAtoms(),mol.NumAtoms()); |
696 |
< |
ofs << buffer << endl; |
697 |
< |
sprintf(buffer,"END"); |
598 |
< |
ofs << buffer << endl; |
695 |
> |
snprintf(buffer, BUFF_SIZE, "%4d 0 %4d 0\n",mol.NumAtoms(),mol.NumAtoms()); |
696 |
> |
ofs << buffer; |
697 |
> |
ofs << "END\n"; |
698 |
|
return(true); |
699 |
< |
} |
699 |
> |
} |
700 |
|
|
701 |
|
|
702 |
|
} //namespace OpenBabel |