ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/pdbformat.cpp
Revision: 2445
Committed: Wed Nov 16 21:22:51 2005 UTC (18 years, 7 months ago) by tim
File size: 19897 byte(s)
Log Message:
adding more readers/writers

File Contents

# Content
1 /**********************************************************************
2 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
3 Some portions Copyright (C) 2003-2005 Geoffrey R. Hutchison
4 Some portions Copyright (C) 2004 by Chris Morley
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation version 2 of the License.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14 ***********************************************************************/
15
16 #include "pdbformat.hpp"
17
18
19 #if !HAVE_SNPRINTF
20 extern "C" int snprintf( char *, size_t, const char *, /* args */ ...);
21 #endif
22
23 #include <vector>
24 #include <map>
25
26 #ifdef HAVE_SSTREAM
27 #include <sstream>
28 #else
29 #include <strstream>
30 #endif
31
32 using namespace std;
33 namespace OpenBabel
34 {
35
36 static bool ParseAtomRecord(char *, OBMol &,int);
37 static bool ParseConectRecord(char *,OBMol &);
38
39 //extern OBResidueData resdat; now in mol.h
40
41 /////////////////////////////////////////////////////////////////
42 bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
43 {
44
45 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
46 if(pmol==NULL)
47 return false;
48
49 //Define some references so we can use the old parameter names
50 istream &ifs = *pConv->GetInStream();
51 OBMol &mol = *pmol;
52 const char* title = pConv->GetTitle();
53
54 int chainNum = 1;
55 char buffer[BUFF_SIZE];
56 OBBitVec bs;
57
58 mol.SetTitle(title);
59
60 mol.BeginModify();
61 while (ifs.getline(buffer,BUFF_SIZE) && !EQn(buffer,"END",3))
62 {
63 if (EQn(buffer,"TER",3))
64 chainNum++;
65 if (EQn(buffer,"ATOM",4) || EQn(buffer,"HETATM",6))
66 {
67 ParseAtomRecord(buffer,mol,chainNum);
68 if (EQn(buffer,"ATOM",4))
69 bs.SetBitOn(mol.NumAtoms());
70 }
71
72 if (EQn(buffer,"CONECT",6))
73 ParseConectRecord(buffer,mol);
74 }
75
76 resdat.AssignBonds(mol,bs);
77 /*assign hetatm bonds based on distance*/
78
79 if (!pConv->IsOption("b",OBConversion::INOPTIONS))
80 mol.ConnectTheDots();
81
82 if (mol.NumAtoms() < 250) // Minimize time required on real proteins
83 if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS))
84 mol.PerceiveBondOrders();
85
86 // clean out remaining blank lines
87 while(ifs.peek() != EOF && ifs.good() &&
88 (ifs.peek() == '\n' || ifs.peek() == '\r'))
89 ifs.getline(buffer,BUFF_SIZE);
90
91 mol.EndModify();
92
93 mol.SetAtomTypesPerceived();
94 atomtyper.AssignImplicitValence(mol);
95
96 if (!mol.NumAtoms())
97 return(false);
98 return(true);
99 }
100
101 ////////////////////////////////////////////////////////////////
102 static bool ParseAtomRecord(char *buffer, OBMol &mol,int chainNum)
103 /* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,1x,i3)" */
104 {
105 string sbuf = &buffer[6];
106 if (sbuf.size() < 48)
107 return(false);
108
109 bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false;
110
111 /* serial number */
112 string serno = sbuf.substr(0,5);
113 //SerialNum(the_atom) = atoi(tmp_str);
114
115 /* atom name */
116 string atmid = sbuf.substr(6,4);
117
118 /* element */
119 string element;
120 if (sbuf.size() > 71)
121 element = sbuf.substr(70,2);
122 else
123 element = " ";
124
125 //trim spaces on the right and left sides
126 while (!atmid.empty() && atmid[0] == ' ')
127 atmid = atmid.substr(1,atmid.size()-1);
128
129 while (!atmid.empty() && atmid[atmid.size()-1] == ' ')
130 atmid = atmid.substr(0,atmid.size()-1);
131
132 /* residue name */
133
134 string resname = sbuf.substr(11,3);
135 if (resname == " ")
136 resname = "UNK";
137 else
138 {
139 while (!resname.empty() && resname[0] == ' ')
140 resname = resname.substr(1,resname.size()-1);
141
142 while (!resname.empty() && resname[resname.size()-1] == ' ')
143 resname = resname.substr(0,resname.size()-1);
144 }
145
146 /* residue sequence number */
147
148 string resnum = sbuf.substr(16,4);
149
150 /* X, Y, Z */
151 string xstr = sbuf.substr(24,8);
152 string ystr = sbuf.substr(32,8);
153 string zstr = sbuf.substr(40,8);
154
155 string type;
156
157 if (EQn(buffer,"ATOM",4))
158 {
159 type = atmid.substr(0,2);
160 if (isdigit(type[0]))
161 type = atmid.substr(1,1);
162 else if (sbuf[6] == ' ' &&
163 strncasecmp(type.c_str(), "Zn", 2) != 0 &&
164 strncasecmp(type.c_str(), "Fe", 2) != 0)
165 type = atmid.substr(0,1); // one-character element
166
167
168 if (resname.substr(0,2) == "AS" || resname[0] == 'N')
169 {
170 if (atmid == "AD1")
171 type = "O";
172 if (atmid == "AD2")
173 type = "N";
174 }
175 if (resname.substr(0,3) == "HIS" || resname[0] == 'H')
176 {
177 if (atmid == "AD1" || atmid == "AE2")
178 type = "N";
179 if (atmid == "AE1" || atmid == "AD2")
180 type = "C";
181 }
182 if (resname.substr(0,2) == "GL" || resname[0] == 'Q')
183 {
184 if (atmid == "AE1")
185 type = "O";
186 if (atmid == "AE2")
187 type = "N";
188 }
189 }
190 else //must be hetatm record
191 {
192 if (isalpha(element[1]) && (isalpha(element[0]) || (element[0] == ' ')))
193 {
194 if (isalpha(element[0]))
195 type = element.substr(0,2);
196 else
197 type = element.substr(1,1);
198 if (type.size() == 2)
199 type[1] = tolower(type[1]);
200 }
201 else
202 {
203 if (isalpha(atmid[0]))
204 type = atmid.substr(0,2);
205 else if (atmid[0] == ' ')
206 type = atmid.substr(1,1); // one char element
207 else
208 type = atmid.substr(1,2);
209
210 if (atmid == resname)
211 {
212 type = atmid;
213 if (type.size() == 2)
214 type[1] = tolower(type[1]);
215 }
216 else
217 if (resname == "ADR" || resname == "COA" || resname == "FAD" ||
218 resname == "GPG" || resname == "NAD" || resname == "NAL" ||
219 resname == "NDP")
220 {
221 if (type.size() > 1)
222 type = type.substr(0,1);
223 //type.erase(1,type.size()-1);
224 }
225 else
226 if (isdigit(type[0]))
227 {
228 type = type.substr(1,1);
229 //type.erase(0,1);
230 //if (type.size() > 1) type.erase(1,type.size()-1);
231 }
232 else
233 if (type.size() > 1 && isdigit(type[1]))
234 type = type.substr(0,1);
235 //type.erase(1,1);
236 else
237 if (type.size() > 1 && isalpha(type[1]) && isupper(type[1]))
238 type[1] = tolower(type[1]);
239 }
240
241 }
242
243 OBAtom atom;
244 vector3 v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str()));
245 atom.SetVector(v);
246
247 atom.SetAtomicNum(etab.GetAtomicNum(type.c_str()));
248 atom.SetType(type);
249
250 int rnum = atoi(resnum.c_str());
251 OBResidue *res = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL;
252 if (res == NULL || res->GetName() != resname || static_cast<int>(res->GetNum())
253 != rnum)
254 {
255 vector<OBResidue*>::iterator ri;
256 for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri))
257 if (res->GetName() == resname && static_cast<int>(res->GetNum())
258 == rnum)
259 break;
260
261 if (res == NULL)
262 {
263 res = mol.NewResidue()
264 ;
265 res->SetChainNum(chainNum);
266 res->SetName(resname);
267 res->SetNum(rnum);
268 }
269 }
270
271 if (!mol.AddAtom(atom)
272 )
273 return(false);
274 else
275 {
276 OBAtom *atom = mol.GetAtom(mol.NumAtoms());
277
278 res->AddAtom(atom);
279 res->SetSerialNum(atom, atoi(serno.c_str()));
280 res->SetAtomID(atom, atmid);
281 res->SetHetAtom(atom, hetatm);
282
283 return(true);
284 }
285 }
286
287 /////////////////////////////////////////////////////////////////////////
288 //! Utility function to read a 5-digit integer starting from a specified column
289 /*! This function reads a 5-digit integer, starting from column
290 columnAsSpecifiedInPDB from the buffer, converts it to a long
291 integer, and returns either false or true, if the conversion was
292 successful or not. If the conversion was not successful, the target
293 is set to a random value.
294
295 For instance, the PDB Format Description for a CONECT record specifies
296
297 COLUMNS DATA TYPE FIELD DEFINITION
298 ---------------------------------------------------------------------------------
299 1 - 6 Record name "CONECT"
300 7 - 11 Integer serial Atom serial number
301 ...
302
303 To read the Atom serial number, you would call
304
305 long int target;
306 if ( readIntegerFromRecord(buffer, 7, &target) == false ) {
307 cerr << "Could not parse" << endl;
308 }
309
310 This function does not check the length of the buffer, or
311 strlen(buffer). If the buffer is not long enough => SEGFAULT.
312 */
313 static bool readIntegerFromRecord(char *buffer, unsigned int columnAsSpecifiedInPDB, long int *target)
314 {
315 char integerBuffer[6];
316 integerBuffer[5] = 0;
317
318 strncpy(integerBuffer, buffer+columnAsSpecifiedInPDB-1, 5);
319
320 char *errorCheckingEndPtr;
321 *target = strtol(integerBuffer, &errorCheckingEndPtr, 10);
322 if (integerBuffer == errorCheckingEndPtr)
323 return(false);
324 return(true);
325 }
326
327 //! Read a CONECT record
328 /*! This function reads a CONECT record, as specified
329 http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html,
330 in short:
331
332 COLUMNS DATA TYPE FIELD DEFINITION
333 ---------------------------------------------------------------------------------
334 1 - 6 Record name "CONECT"
335 7 - 11 Integer serial Atom serial number
336 12 - 16 Integer serial Serial number of bonded atom
337 17 - 21 Integer serial Serial number of bonded atom
338 22 - 26 Integer serial Serial number of bonded atom
339 27 - 31 Integer serial Serial number of bonded atom
340 32 - 36 Integer serial Serial number of hydrogen bonded atom
341 37 - 41 Integer serial Serial number of hydrogen bonded atom
342 42 - 46 Integer serial Serial number of salt bridged atom
343 47 - 51 Integer serial Serial number of hydrogen bonded atom
344 52 - 56 Integer serial Serial number of hydrogen bonded atom
345 57 - 61 Integer serial Serial number of salt bridged atom
346
347 Hydrogen bonds and salt bridges are ignored. --Stefan Kebekus.
348 */
349
350 static bool ParseConectRecord(char *buffer,OBMol &mol)
351 {
352 #ifdef HAVE_SSTREAM
353 stringstream errorMsg;
354 #else
355 strstream errorMsg;
356 #endif
357
358 // Setup strings and string buffers
359 buffer[70] = '\0';
360 if (strlen(buffer) < 70)
361 {
362 errorMsg << "WARNING: Problems reading a PDB file\n"
363 << " Problems reading a CONECT record.\n"
364 << " According to the PDB specification,\n"
365 << " the record should have 70 columns, but OpenBabel found "
366 << strlen(buffer) << " columns.";
367 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obInfo);
368 }
369
370 // Serial number of the first atom, read from column 7-11 of the
371 // connect record, to which the other atoms connect to.
372 long int startAtomSerialNumber;
373 if (readIntegerFromRecord(buffer, 7, &startAtomSerialNumber) == false)
374 {
375 errorMsg << "WARNING: Problems reading a PDB file\n"
376 << " Problems reading a CONECT record.\n"
377 << " According to the PDB specification,\n"
378 << " columns 7-11 should contain the serial number of an atom.\n"
379 << " THIS CONECT RECORD WILL BE IGNORED.";
380 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
381 return(false);
382 }
383
384 // Find a pointer to the first atom.
385 OBAtom *firstAtom = 0L;
386 vector<OBNodeBase*>::iterator i;
387 for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
388 if (static_cast<long int>(a1->GetResidue()->
389 GetSerialNum(a1)) == startAtomSerialNumber)
390 {
391 firstAtom = a1;
392 break;
393 }
394 if (firstAtom == 0L)
395 {
396 errorMsg << "WARNING: Problems reading a PDB file:\n"
397 << " Problems reading a CONECT record.\n"
398 << " According to the PDB specification,\n"
399 << " columns 7-11 should contain the serial number of an atom.\n"
400 << " No atom was found with this serial number.\n"
401 << " THIS CONECT RECORD WILL BE IGNORED.";
402 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
403 return(false);
404 }
405
406 // Serial numbers of the atoms which bind to firstAtom, read from
407 // columns 12-16, 17-21, 22-27 and 27-31 of the connect record. Note
408 // that we reserve space for 5 integers, but read only four of
409 // them. This is to simplify the determination of the bond order;
410 // see below.
411 long int boundedAtomsSerialNumbers[5] = {0,0,0,0,0};
412 // Bools which tell us which of the serial numbers in
413 // boundedAtomsSerialNumbers are read from the file, and which are
414 // invalid
415 bool boundedAtomsSerialNumbersValid[5] = {false, false, false, false, false};
416
417 // Now read the serial numbers. If the first serial number is not
418 // present, this connect record probably contains only hydrogen
419 // bonds and salt bridges, which we ignore. In that case, we just
420 // exit gracefully.
421 boundedAtomsSerialNumbersValid[0] = readIntegerFromRecord(buffer, 12, boundedAtomsSerialNumbers+0);
422 if (boundedAtomsSerialNumbersValid[0] == false)
423 return(true);
424 boundedAtomsSerialNumbersValid[1] = readIntegerFromRecord(buffer, 17, boundedAtomsSerialNumbers+1);
425 boundedAtomsSerialNumbersValid[2] = readIntegerFromRecord(buffer, 22, boundedAtomsSerialNumbers+2);
426 boundedAtomsSerialNumbersValid[3] = readIntegerFromRecord(buffer, 27, boundedAtomsSerialNumbers+3);
427
428 // Now iterate over the VALID boundedAtomsSerialNumbers and connect
429 // the atoms.
430 for(unsigned int k=0; boundedAtomsSerialNumbersValid[k]; k++)
431 {
432 // Find atom that is connected to, write an error message
433 OBAtom *connectedAtom = 0L;
434 for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
435 if (static_cast<long int>(a1->GetResidue()->
436 GetSerialNum(a1)) == boundedAtomsSerialNumbers[k])
437 {
438 connectedAtom = a1;
439 break;
440 }
441 if (connectedAtom == 0L)
442 {
443 errorMsg << "WARNING: Problems reading a PDB file:\n"
444 << " Problems reading a CONECT record.\n"
445 << " According to the PDB specification,\n"
446 << " Atoms with serial #" << startAtomSerialNumber
447 << " and #" << boundedAtomsSerialNumbers[k]
448 << " should be connected\n"
449 << " However, an atom with serial #" << boundedAtomsSerialNumbers[k] << " was not found.\n"
450 << " THIS CONECT RECORD WILL BE IGNORED.";
451 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
452 break;
453 }
454
455 // Figure the bond order
456 unsigned char order = 0;
457 while(boundedAtomsSerialNumbersValid[k+order+1] && (boundedAtomsSerialNumbers[k+order]
458 == boundedAtomsSerialNumbers[k+order+1]))
459 order++;
460 k += order;
461
462 // Generate the bond
463 mol.AddBond(firstAtom->GetIdx(), connectedAtom->GetIdx(), order+1);
464 }
465 return(true);
466 }
467
468 //////////////////////////////////////////////////////////////////////////////
469 bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
470 {
471 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
472 if(pmol==NULL)
473 return false;
474
475 //Define some references so we can use the old parameter names
476 ostream &ofs = *pConv->GetOutStream();
477 OBMol &mol = *pmol;
478
479 unsigned int i;
480 char buffer[BUFF_SIZE];
481 char type_name[10], padded_name[10];
482 char the_res[10];
483 char *element_name;
484 int res_num;
485 bool het=true;
486
487 // sprintf(buffer,"HEADER PROTEIN");
488 // ofs << buffer << endl;
489
490 if (strlen(mol.GetTitle()) > 0)
491 sprintf(buffer,"COMPND %s ",mol.GetTitle());
492 else
493 sprintf(buffer,"COMPND UNNAMED");
494 ofs << buffer << endl;
495
496 sprintf(buffer,"AUTHOR GENERATED BY OPEN BABEL %s",BABEL_VERSION);
497 ofs << buffer << endl;
498
499 OBAtom *atom;
500 OBResidue *res;
501 for (i = 1; i <= mol.NumAtoms(); i++)
502 {
503 atom = mol.GetAtom(i);
504 strcpy(type_name,etab.GetSymbol(atom->GetAtomicNum()));
505
506 //two char. elements are on position 13 and 14 one char. start at 14
507 if (strlen(type_name) > 1)
508 type_name[1] = toupper(type_name[1]);
509 else
510 {
511 char tmp[10];
512 strcpy(tmp, type_name);
513 sprintf(type_name, " %-3s", tmp);
514 }
515
516 if ( (res = atom->GetResidue()) )
517 {
518 het = res->IsHetAtom(atom);
519 snprintf(the_res,4,"%s",(char*)res->GetName().c_str());
520 snprintf(type_name,5,"%s",(char*)res->GetAtomID(atom).c_str());
521
522 //two char. elements are on position 13 and 14 one char. start at 14
523 if (strlen(etab.GetSymbol(atom->GetAtomicNum())) == 1)
524 {
525 if (strlen(type_name) < 4)
526 {
527 char tmp[10];
528 strcpy(tmp, type_name);
529 sprintf(padded_name," %-3s", tmp);
530 strncpy(type_name,padded_name,4);
531 type_name[4] = '\0';
532 }
533 else
534 {
535 type_name[4] = type_name[3];
536 type_name[3] = type_name[2];
537 type_name[2] = type_name[1];
538 type_name[1] = type_name[0];
539 type_name[0] = type_name[4];
540 type_name[4] = '\0';
541 }
542 }
543 res_num = res->GetNum();
544 }
545 else
546 {
547 strcpy(the_res,"UNK");
548 sprintf(padded_name,"%s",type_name);
549 strncpy(type_name,padded_name,4);
550 type_name[4] = '\0';
551 res_num = 1;
552 }
553
554 element_name = etab.GetSymbol(atom->GetAtomicNum());
555 if (strlen(element_name) == 2)
556 element_name[1] = toupper(element_name[1]);
557 sprintf(buffer,"%s%5d %-4s %-3s %4d %8.3f%8.3f%8.3f 1.00 0.00 %2s \n",
558 het?"HETATM":"ATOM ",
559 i,
560 type_name,
561 the_res,
562 res_num,
563 atom->GetX(),
564 atom->GetY(),
565 atom->GetZ(),
566 element_name);
567 ofs << buffer;
568 }
569
570 OBAtom *nbr;
571 int count;
572 vector<OBEdgeBase*>::iterator k;
573 for (i = 1; i <= mol.NumAtoms(); i ++)
574 {
575 atom = mol.GetAtom(i);
576 if (atom->GetValence() <= 4)
577 {
578 sprintf(buffer,"CONECT%5d", i);
579 ofs << buffer;
580 for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k))
581 {
582 sprintf(buffer,"%5d", nbr->GetIdx());
583 ofs << buffer;
584 }
585 for (count = 0; count < (4 - (int)atom->GetValence()); count++)
586 {
587 sprintf(buffer, " ");
588 ofs << buffer;
589 }
590 ofs << " " << endl;
591 }
592 }
593 sprintf(buffer,"MASTER 0 0 0 0 0 0 0 0 ");
594 ofs << buffer;
595 sprintf(buffer,"%4d 0 %4d 0",mol.NumAtoms(),mol.NumAtoms());
596 ofs << buffer << endl;
597 sprintf(buffer,"END");
598 ofs << buffer << endl;
599 return(true);
600 }
601
602
603 } //namespace OpenBabel