ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/pdbformat.cpp
Revision: 2445
Committed: Wed Nov 16 21:22:51 2005 UTC (18 years, 8 months ago) by tim
File size: 19897 byte(s)
Log Message:
adding more readers/writers

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
3     Some portions Copyright (C) 2003-2005 Geoffrey R. Hutchison
4     Some portions Copyright (C) 2004 by Chris Morley
5    
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation version 2 of the License.
9    
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     GNU General Public License for more details.
14     ***********************************************************************/
15    
16 tim 2445 #include "pdbformat.hpp"
17 tim 2440
18 tim 2445
19 tim 2440 #if !HAVE_SNPRINTF
20     extern "C" int snprintf( char *, size_t, const char *, /* args */ ...);
21     #endif
22    
23     #include <vector>
24     #include <map>
25    
26     #ifdef HAVE_SSTREAM
27     #include <sstream>
28     #else
29     #include <strstream>
30     #endif
31    
32     using namespace std;
33     namespace OpenBabel
34     {
35    
36     static bool ParseAtomRecord(char *, OBMol &,int);
37     static bool ParseConectRecord(char *,OBMol &);
38    
39     //extern OBResidueData resdat; now in mol.h
40    
41     /////////////////////////////////////////////////////////////////
42     bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
43     {
44    
45     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
46     if(pmol==NULL)
47     return false;
48    
49     //Define some references so we can use the old parameter names
50     istream &ifs = *pConv->GetInStream();
51     OBMol &mol = *pmol;
52     const char* title = pConv->GetTitle();
53    
54     int chainNum = 1;
55     char buffer[BUFF_SIZE];
56     OBBitVec bs;
57    
58     mol.SetTitle(title);
59    
60     mol.BeginModify();
61     while (ifs.getline(buffer,BUFF_SIZE) && !EQn(buffer,"END",3))
62     {
63     if (EQn(buffer,"TER",3))
64     chainNum++;
65     if (EQn(buffer,"ATOM",4) || EQn(buffer,"HETATM",6))
66     {
67     ParseAtomRecord(buffer,mol,chainNum);
68     if (EQn(buffer,"ATOM",4))
69     bs.SetBitOn(mol.NumAtoms());
70     }
71    
72     if (EQn(buffer,"CONECT",6))
73     ParseConectRecord(buffer,mol);
74     }
75    
76     resdat.AssignBonds(mol,bs);
77     /*assign hetatm bonds based on distance*/
78    
79     if (!pConv->IsOption("b",OBConversion::INOPTIONS))
80     mol.ConnectTheDots();
81    
82     if (mol.NumAtoms() < 250) // Minimize time required on real proteins
83     if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS))
84     mol.PerceiveBondOrders();
85    
86     // clean out remaining blank lines
87     while(ifs.peek() != EOF && ifs.good() &&
88     (ifs.peek() == '\n' || ifs.peek() == '\r'))
89     ifs.getline(buffer,BUFF_SIZE);
90    
91     mol.EndModify();
92    
93     mol.SetAtomTypesPerceived();
94     atomtyper.AssignImplicitValence(mol);
95    
96     if (!mol.NumAtoms())
97     return(false);
98     return(true);
99     }
100    
101     ////////////////////////////////////////////////////////////////
102     static bool ParseAtomRecord(char *buffer, OBMol &mol,int chainNum)
103     /* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,1x,i3)" */
104     {
105     string sbuf = &buffer[6];
106     if (sbuf.size() < 48)
107     return(false);
108    
109     bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false;
110    
111     /* serial number */
112     string serno = sbuf.substr(0,5);
113     //SerialNum(the_atom) = atoi(tmp_str);
114    
115     /* atom name */
116     string atmid = sbuf.substr(6,4);
117    
118     /* element */
119     string element;
120     if (sbuf.size() > 71)
121     element = sbuf.substr(70,2);
122     else
123     element = " ";
124    
125     //trim spaces on the right and left sides
126     while (!atmid.empty() && atmid[0] == ' ')
127     atmid = atmid.substr(1,atmid.size()-1);
128    
129     while (!atmid.empty() && atmid[atmid.size()-1] == ' ')
130     atmid = atmid.substr(0,atmid.size()-1);
131    
132     /* residue name */
133    
134     string resname = sbuf.substr(11,3);
135     if (resname == " ")
136     resname = "UNK";
137     else
138     {
139     while (!resname.empty() && resname[0] == ' ')
140     resname = resname.substr(1,resname.size()-1);
141    
142     while (!resname.empty() && resname[resname.size()-1] == ' ')
143     resname = resname.substr(0,resname.size()-1);
144     }
145    
146     /* residue sequence number */
147    
148     string resnum = sbuf.substr(16,4);
149    
150     /* X, Y, Z */
151     string xstr = sbuf.substr(24,8);
152     string ystr = sbuf.substr(32,8);
153     string zstr = sbuf.substr(40,8);
154    
155     string type;
156    
157     if (EQn(buffer,"ATOM",4))
158     {
159     type = atmid.substr(0,2);
160     if (isdigit(type[0]))
161     type = atmid.substr(1,1);
162     else if (sbuf[6] == ' ' &&
163     strncasecmp(type.c_str(), "Zn", 2) != 0 &&
164     strncasecmp(type.c_str(), "Fe", 2) != 0)
165     type = atmid.substr(0,1); // one-character element
166    
167    
168     if (resname.substr(0,2) == "AS" || resname[0] == 'N')
169     {
170     if (atmid == "AD1")
171     type = "O";
172     if (atmid == "AD2")
173     type = "N";
174     }
175     if (resname.substr(0,3) == "HIS" || resname[0] == 'H')
176     {
177     if (atmid == "AD1" || atmid == "AE2")
178     type = "N";
179     if (atmid == "AE1" || atmid == "AD2")
180     type = "C";
181     }
182     if (resname.substr(0,2) == "GL" || resname[0] == 'Q')
183     {
184     if (atmid == "AE1")
185     type = "O";
186     if (atmid == "AE2")
187     type = "N";
188     }
189     }
190     else //must be hetatm record
191     {
192     if (isalpha(element[1]) && (isalpha(element[0]) || (element[0] == ' ')))
193     {
194     if (isalpha(element[0]))
195     type = element.substr(0,2);
196     else
197     type = element.substr(1,1);
198     if (type.size() == 2)
199     type[1] = tolower(type[1]);
200     }
201     else
202     {
203     if (isalpha(atmid[0]))
204     type = atmid.substr(0,2);
205     else if (atmid[0] == ' ')
206     type = atmid.substr(1,1); // one char element
207     else
208     type = atmid.substr(1,2);
209    
210     if (atmid == resname)
211     {
212     type = atmid;
213     if (type.size() == 2)
214     type[1] = tolower(type[1]);
215     }
216     else
217     if (resname == "ADR" || resname == "COA" || resname == "FAD" ||
218     resname == "GPG" || resname == "NAD" || resname == "NAL" ||
219     resname == "NDP")
220     {
221     if (type.size() > 1)
222     type = type.substr(0,1);
223     //type.erase(1,type.size()-1);
224     }
225     else
226     if (isdigit(type[0]))
227     {
228     type = type.substr(1,1);
229     //type.erase(0,1);
230     //if (type.size() > 1) type.erase(1,type.size()-1);
231     }
232     else
233     if (type.size() > 1 && isdigit(type[1]))
234     type = type.substr(0,1);
235     //type.erase(1,1);
236     else
237     if (type.size() > 1 && isalpha(type[1]) && isupper(type[1]))
238     type[1] = tolower(type[1]);
239     }
240    
241     }
242    
243     OBAtom atom;
244     vector3 v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str()));
245     atom.SetVector(v);
246    
247     atom.SetAtomicNum(etab.GetAtomicNum(type.c_str()));
248     atom.SetType(type);
249    
250     int rnum = atoi(resnum.c_str());
251     OBResidue *res = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL;
252     if (res == NULL || res->GetName() != resname || static_cast<int>(res->GetNum())
253     != rnum)
254     {
255     vector<OBResidue*>::iterator ri;
256     for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri))
257     if (res->GetName() == resname && static_cast<int>(res->GetNum())
258     == rnum)
259     break;
260    
261     if (res == NULL)
262     {
263     res = mol.NewResidue()
264     ;
265     res->SetChainNum(chainNum);
266     res->SetName(resname);
267     res->SetNum(rnum);
268     }
269     }
270    
271     if (!mol.AddAtom(atom)
272     )
273     return(false);
274     else
275     {
276     OBAtom *atom = mol.GetAtom(mol.NumAtoms());
277    
278     res->AddAtom(atom);
279     res->SetSerialNum(atom, atoi(serno.c_str()));
280     res->SetAtomID(atom, atmid);
281     res->SetHetAtom(atom, hetatm);
282    
283     return(true);
284     }
285     }
286    
287     /////////////////////////////////////////////////////////////////////////
288     //! Utility function to read a 5-digit integer starting from a specified column
289     /*! This function reads a 5-digit integer, starting from column
290     columnAsSpecifiedInPDB from the buffer, converts it to a long
291     integer, and returns either false or true, if the conversion was
292     successful or not. If the conversion was not successful, the target
293     is set to a random value.
294    
295     For instance, the PDB Format Description for a CONECT record specifies
296    
297     COLUMNS DATA TYPE FIELD DEFINITION
298     ---------------------------------------------------------------------------------
299     1 - 6 Record name "CONECT"
300     7 - 11 Integer serial Atom serial number
301     ...
302    
303     To read the Atom serial number, you would call
304    
305     long int target;
306     if ( readIntegerFromRecord(buffer, 7, &target) == false ) {
307     cerr << "Could not parse" << endl;
308     }
309    
310     This function does not check the length of the buffer, or
311     strlen(buffer). If the buffer is not long enough => SEGFAULT.
312     */
313     static bool readIntegerFromRecord(char *buffer, unsigned int columnAsSpecifiedInPDB, long int *target)
314     {
315     char integerBuffer[6];
316     integerBuffer[5] = 0;
317    
318     strncpy(integerBuffer, buffer+columnAsSpecifiedInPDB-1, 5);
319    
320     char *errorCheckingEndPtr;
321     *target = strtol(integerBuffer, &errorCheckingEndPtr, 10);
322     if (integerBuffer == errorCheckingEndPtr)
323     return(false);
324     return(true);
325     }
326    
327     //! Read a CONECT record
328     /*! This function reads a CONECT record, as specified
329     http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html,
330     in short:
331    
332     COLUMNS DATA TYPE FIELD DEFINITION
333     ---------------------------------------------------------------------------------
334     1 - 6 Record name "CONECT"
335     7 - 11 Integer serial Atom serial number
336     12 - 16 Integer serial Serial number of bonded atom
337     17 - 21 Integer serial Serial number of bonded atom
338     22 - 26 Integer serial Serial number of bonded atom
339     27 - 31 Integer serial Serial number of bonded atom
340     32 - 36 Integer serial Serial number of hydrogen bonded atom
341     37 - 41 Integer serial Serial number of hydrogen bonded atom
342     42 - 46 Integer serial Serial number of salt bridged atom
343     47 - 51 Integer serial Serial number of hydrogen bonded atom
344     52 - 56 Integer serial Serial number of hydrogen bonded atom
345     57 - 61 Integer serial Serial number of salt bridged atom
346    
347     Hydrogen bonds and salt bridges are ignored. --Stefan Kebekus.
348     */
349    
350     static bool ParseConectRecord(char *buffer,OBMol &mol)
351     {
352     #ifdef HAVE_SSTREAM
353     stringstream errorMsg;
354     #else
355     strstream errorMsg;
356     #endif
357    
358     // Setup strings and string buffers
359     buffer[70] = '\0';
360     if (strlen(buffer) < 70)
361     {
362     errorMsg << "WARNING: Problems reading a PDB file\n"
363     << " Problems reading a CONECT record.\n"
364     << " According to the PDB specification,\n"
365     << " the record should have 70 columns, but OpenBabel found "
366     << strlen(buffer) << " columns.";
367     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obInfo);
368     }
369    
370     // Serial number of the first atom, read from column 7-11 of the
371     // connect record, to which the other atoms connect to.
372     long int startAtomSerialNumber;
373     if (readIntegerFromRecord(buffer, 7, &startAtomSerialNumber) == false)
374     {
375     errorMsg << "WARNING: Problems reading a PDB file\n"
376     << " Problems reading a CONECT record.\n"
377     << " According to the PDB specification,\n"
378     << " columns 7-11 should contain the serial number of an atom.\n"
379     << " THIS CONECT RECORD WILL BE IGNORED.";
380     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
381     return(false);
382     }
383    
384     // Find a pointer to the first atom.
385     OBAtom *firstAtom = 0L;
386     vector<OBNodeBase*>::iterator i;
387     for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
388     if (static_cast<long int>(a1->GetResidue()->
389     GetSerialNum(a1)) == startAtomSerialNumber)
390     {
391     firstAtom = a1;
392     break;
393     }
394     if (firstAtom == 0L)
395     {
396     errorMsg << "WARNING: Problems reading a PDB file:\n"
397     << " Problems reading a CONECT record.\n"
398     << " According to the PDB specification,\n"
399     << " columns 7-11 should contain the serial number of an atom.\n"
400     << " No atom was found with this serial number.\n"
401     << " THIS CONECT RECORD WILL BE IGNORED.";
402     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
403     return(false);
404     }
405    
406     // Serial numbers of the atoms which bind to firstAtom, read from
407     // columns 12-16, 17-21, 22-27 and 27-31 of the connect record. Note
408     // that we reserve space for 5 integers, but read only four of
409     // them. This is to simplify the determination of the bond order;
410     // see below.
411     long int boundedAtomsSerialNumbers[5] = {0,0,0,0,0};
412     // Bools which tell us which of the serial numbers in
413     // boundedAtomsSerialNumbers are read from the file, and which are
414     // invalid
415     bool boundedAtomsSerialNumbersValid[5] = {false, false, false, false, false};
416    
417     // Now read the serial numbers. If the first serial number is not
418     // present, this connect record probably contains only hydrogen
419     // bonds and salt bridges, which we ignore. In that case, we just
420     // exit gracefully.
421     boundedAtomsSerialNumbersValid[0] = readIntegerFromRecord(buffer, 12, boundedAtomsSerialNumbers+0);
422     if (boundedAtomsSerialNumbersValid[0] == false)
423     return(true);
424     boundedAtomsSerialNumbersValid[1] = readIntegerFromRecord(buffer, 17, boundedAtomsSerialNumbers+1);
425     boundedAtomsSerialNumbersValid[2] = readIntegerFromRecord(buffer, 22, boundedAtomsSerialNumbers+2);
426     boundedAtomsSerialNumbersValid[3] = readIntegerFromRecord(buffer, 27, boundedAtomsSerialNumbers+3);
427    
428     // Now iterate over the VALID boundedAtomsSerialNumbers and connect
429     // the atoms.
430     for(unsigned int k=0; boundedAtomsSerialNumbersValid[k]; k++)
431     {
432     // Find atom that is connected to, write an error message
433     OBAtom *connectedAtom = 0L;
434     for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
435     if (static_cast<long int>(a1->GetResidue()->
436     GetSerialNum(a1)) == boundedAtomsSerialNumbers[k])
437     {
438     connectedAtom = a1;
439     break;
440     }
441     if (connectedAtom == 0L)
442     {
443     errorMsg << "WARNING: Problems reading a PDB file:\n"
444     << " Problems reading a CONECT record.\n"
445     << " According to the PDB specification,\n"
446     << " Atoms with serial #" << startAtomSerialNumber
447     << " and #" << boundedAtomsSerialNumbers[k]
448     << " should be connected\n"
449     << " However, an atom with serial #" << boundedAtomsSerialNumbers[k] << " was not found.\n"
450     << " THIS CONECT RECORD WILL BE IGNORED.";
451     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
452     break;
453     }
454    
455     // Figure the bond order
456     unsigned char order = 0;
457     while(boundedAtomsSerialNumbersValid[k+order+1] && (boundedAtomsSerialNumbers[k+order]
458     == boundedAtomsSerialNumbers[k+order+1]))
459     order++;
460     k += order;
461    
462     // Generate the bond
463     mol.AddBond(firstAtom->GetIdx(), connectedAtom->GetIdx(), order+1);
464     }
465     return(true);
466     }
467    
468     //////////////////////////////////////////////////////////////////////////////
469     bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
470     {
471     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
472     if(pmol==NULL)
473     return false;
474    
475     //Define some references so we can use the old parameter names
476     ostream &ofs = *pConv->GetOutStream();
477     OBMol &mol = *pmol;
478    
479     unsigned int i;
480     char buffer[BUFF_SIZE];
481     char type_name[10], padded_name[10];
482     char the_res[10];
483     char *element_name;
484     int res_num;
485     bool het=true;
486    
487     // sprintf(buffer,"HEADER PROTEIN");
488     // ofs << buffer << endl;
489    
490     if (strlen(mol.GetTitle()) > 0)
491     sprintf(buffer,"COMPND %s ",mol.GetTitle());
492     else
493     sprintf(buffer,"COMPND UNNAMED");
494     ofs << buffer << endl;
495    
496     sprintf(buffer,"AUTHOR GENERATED BY OPEN BABEL %s",BABEL_VERSION);
497     ofs << buffer << endl;
498    
499     OBAtom *atom;
500     OBResidue *res;
501     for (i = 1; i <= mol.NumAtoms(); i++)
502     {
503     atom = mol.GetAtom(i);
504     strcpy(type_name,etab.GetSymbol(atom->GetAtomicNum()));
505    
506     //two char. elements are on position 13 and 14 one char. start at 14
507     if (strlen(type_name) > 1)
508     type_name[1] = toupper(type_name[1]);
509     else
510     {
511     char tmp[10];
512     strcpy(tmp, type_name);
513     sprintf(type_name, " %-3s", tmp);
514     }
515    
516     if ( (res = atom->GetResidue()) )
517     {
518     het = res->IsHetAtom(atom);
519     snprintf(the_res,4,"%s",(char*)res->GetName().c_str());
520     snprintf(type_name,5,"%s",(char*)res->GetAtomID(atom).c_str());
521    
522     //two char. elements are on position 13 and 14 one char. start at 14
523     if (strlen(etab.GetSymbol(atom->GetAtomicNum())) == 1)
524     {
525     if (strlen(type_name) < 4)
526     {
527     char tmp[10];
528     strcpy(tmp, type_name);
529     sprintf(padded_name," %-3s", tmp);
530     strncpy(type_name,padded_name,4);
531     type_name[4] = '\0';
532     }
533     else
534     {
535     type_name[4] = type_name[3];
536     type_name[3] = type_name[2];
537     type_name[2] = type_name[1];
538     type_name[1] = type_name[0];
539     type_name[0] = type_name[4];
540     type_name[4] = '\0';
541     }
542     }
543     res_num = res->GetNum();
544     }
545     else
546     {
547     strcpy(the_res,"UNK");
548     sprintf(padded_name,"%s",type_name);
549     strncpy(type_name,padded_name,4);
550     type_name[4] = '\0';
551     res_num = 1;
552     }
553    
554     element_name = etab.GetSymbol(atom->GetAtomicNum());
555     if (strlen(element_name) == 2)
556     element_name[1] = toupper(element_name[1]);
557     sprintf(buffer,"%s%5d %-4s %-3s %4d %8.3f%8.3f%8.3f 1.00 0.00 %2s \n",
558     het?"HETATM":"ATOM ",
559     i,
560     type_name,
561     the_res,
562     res_num,
563     atom->GetX(),
564     atom->GetY(),
565     atom->GetZ(),
566     element_name);
567     ofs << buffer;
568     }
569    
570     OBAtom *nbr;
571     int count;
572     vector<OBEdgeBase*>::iterator k;
573     for (i = 1; i <= mol.NumAtoms(); i ++)
574     {
575     atom = mol.GetAtom(i);
576     if (atom->GetValence() <= 4)
577     {
578     sprintf(buffer,"CONECT%5d", i);
579     ofs << buffer;
580     for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k))
581     {
582     sprintf(buffer,"%5d", nbr->GetIdx());
583     ofs << buffer;
584     }
585     for (count = 0; count < (4 - (int)atom->GetValence()); count++)
586     {
587     sprintf(buffer, " ");
588     ofs << buffer;
589     }
590     ofs << " " << endl;
591     }
592     }
593     sprintf(buffer,"MASTER 0 0 0 0 0 0 0 0 ");
594     ofs << buffer;
595     sprintf(buffer,"%4d 0 %4d 0",mol.NumAtoms(),mol.NumAtoms());
596     ofs << buffer << endl;
597     sprintf(buffer,"END");
598     ofs << buffer << endl;
599     return(true);
600     }
601    
602    
603     } //namespace OpenBabel