ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/pdbformat.cpp
(Generate patch)

Comparing trunk/OOPSE-4/src/openbabel/pdbformat.cpp (file contents):
Revision 2518 by tim, Fri Dec 16 21:52:50 2005 UTC vs.
Revision 3057 by gezelter, Thu Oct 19 20:49:05 2006 UTC

# Line 13 | Line 13 | GNU General Public License for more details.
13   GNU General Public License for more details.
14   ***********************************************************************/
15  
16 < #include "pdbformat.hpp"
16 > #include "config.h"
17 > #include "mol.hpp"
18 > #include "obconversion.hpp"
19 > #include "obmolecformat.hpp"
20  
18
19 #if !HAVE_SNPRINTF
20 extern "C" int snprintf( char *, size_t, const char *, /* args */ ...);
21 #endif
22
21   #include <vector>
22   #include <map>
23  
26 #ifdef HAVE_SSTREAM
24   #include <sstream>
28 #else
29 #include <strstream>
30 #endif
25  
26   using namespace std;
27   namespace OpenBabel
28   {
29  
30 < static bool ParseAtomRecord(char *, OBMol &,int);
31 < static bool ParseConectRecord(char *,OBMol &);
30 >  class PDBFormat : public OBMoleculeFormat
31 >  {
32 >  public:
33 >    //Register this format type ID
34 >    PDBFormat()
35 >    {
36 >      OBConversion::RegisterFormat("pdb",this, "chemical/x-pdb");
37 >      OBConversion::RegisterFormat("ent",this, "chemical/x-pdb");
38 >    }
39  
40 < //extern OBResidueData    resdat; now in mol.h
40 >    virtual const char* Description() //required
41 >    {
42 >      return
43 >        "Protein Data Bank format\n \
44 >       Read Options e.g. -as\n\
45 >        s  Output single bonds only\n\
46 >        b  Disable bonding entirely\n\n";
47 >    };
48  
49 < /////////////////////////////////////////////////////////////////
50 < bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
43 < {
49 >    virtual const char* SpecificationURL()
50 >    { return "http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html";};
51  
52 +    virtual const char* GetMIMEType()
53 +    { return "chemical/x-pdb"; };
54 +
55 +    //Flags() can return be any the following combined by | or be omitted if none apply
56 +    // NOTREADABLE  READONEONLY  NOTWRITABLE  WRITEONEONLY
57 +    virtual unsigned int Flags()
58 +    {
59 +      return READONEONLY;
60 +    };
61 +
62 +    //*** This section identical for most OBMol conversions ***
63 +    ////////////////////////////////////////////////////
64 +    /// The "API" interface functions
65 +    virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
66 +    virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
67 +
68 +  };
69 +  //***
70 +
71 +  //Make an instance of the format class
72 +  PDBFormat thePDBFormat;
73 +
74 +  /////////////////////////////////////////////////////////////////
75 +
76 +
77 +  static bool ParseAtomRecord(char *, OBMol &,int);
78 +  static bool ParseConectRecord(char *,OBMol &);
79 +
80 +  //extern OBResidueData    resdat; now in mol.h
81 +
82 +  /////////////////////////////////////////////////////////////////
83 +  bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
84 +  {
85 +
86      OBMol* pmol = dynamic_cast<OBMol*>(pOb);
87      if(pmol==NULL)
88 <        return false;
88 >      return false;
89  
90      //Define some references so we can use the old parameter names
91      istream &ifs = *pConv->GetInStream();
# Line 59 | Line 100 | bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion
100  
101      mol.BeginModify();
102      while (ifs.getline(buffer,BUFF_SIZE) && !EQn(buffer,"END",3))
103 <    {
103 >      {
104          if (EQn(buffer,"TER",3))
105 <            chainNum++;
105 >          chainNum++;
106          if (EQn(buffer,"ATOM",4) || EQn(buffer,"HETATM",6))
107 <        {
107 >          {
108              ParseAtomRecord(buffer,mol,chainNum);
109              if (EQn(buffer,"ATOM",4))
110 <                bs.SetBitOn(mol.NumAtoms());
111 <        }
110 >              bs.SetBitOn(mol.NumAtoms());
111 >          }
112  
113          if (EQn(buffer,"CONECT",6))
114 <            ParseConectRecord(buffer,mol);
115 <    }
114 >          ParseConectRecord(buffer,mol);
115 >      }
116  
117      resdat.AssignBonds(mol,bs);
118      /*assign hetatm bonds based on distance*/
# Line 79 | Line 120 | bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion
120      if (!pConv->IsOption("b",OBConversion::INOPTIONS))
121        mol.ConnectTheDots();
122  
82    if (mol.NumAtoms() < 250) // Minimize time required on real proteins
123        if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS))
124 <        mol.PerceiveBondOrders();
124 >        mol.PerceiveBondOrders();
125  
126      // clean out remaining blank lines
127      while(ifs.peek() != EOF && ifs.good() &&
128 <          (ifs.peek() == '\n' || ifs.peek() == '\r'))
128 >          (ifs.peek() == '\n' || ifs.peek() == '\r'))
129        ifs.getline(buffer,BUFF_SIZE);
130  
131      mol.EndModify();
# Line 94 | Line 134 | bool PDBFormat::ReadMolecule(OBBase* pOb, OBConversion
134      atomtyper.AssignImplicitValence(mol);
135  
136      if (!mol.NumAtoms())
137 <        return(false);
137 >      return(false);
138      return(true);
139 < }
139 >  }
140  
141 < ////////////////////////////////////////////////////////////////
142 < static bool ParseAtomRecord(char *buffer, OBMol &mol,int chainNum)
143 < /* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,1x,i3)" */
144 < {
141 >  ////////////////////////////////////////////////////////////////
142 >  static bool ParseAtomRecord(char *buffer, OBMol &mol,int chainNum)
143 >    /* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,1x,i3)" */
144 >  {
145      string sbuf = &buffer[6];
146      if (sbuf.size() < 48)
147 <        return(false);
147 >      return(false);
148  
149      bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false;
150  
# Line 118 | Line 158 | static bool ParseAtomRecord(char *buffer, OBMol &mol,i
158      /* element */
159      string element;
160      if (sbuf.size() > 71)
161 <        element = sbuf.substr(70,2);
161 >      element = sbuf.substr(70,2);
162      else
163 <        element = "  ";
163 >      element = "  ";
164  
165      //trim spaces on the right and left sides
166      while (!atmid.empty() && atmid[0] == ' ')
167 <        atmid = atmid.substr(1,atmid.size()-1);
167 >      atmid = atmid.substr(1,atmid.size()-1);
168  
169      while (!atmid.empty() && atmid[atmid.size()-1] == ' ')
170 <        atmid = atmid.substr(0,atmid.size()-1);
170 >      atmid = atmid.substr(0,atmid.size()-1);
171  
172      /* residue name */
173  
174      string resname = sbuf.substr(11,3);
175      if (resname == "   ")
176 <        resname = "UNK";
176 >      resname = "UNK";
177      else
178 <    {
178 >      {
179          while (!resname.empty() && resname[0] == ' ')
180 <            resname = resname.substr(1,resname.size()-1);
180 >          resname = resname.substr(1,resname.size()-1);
181  
182          while (!resname.empty() && resname[resname.size()-1] == ' ')
183 <            resname = resname.substr(0,resname.size()-1);
184 <    }
183 >          resname = resname.substr(0,resname.size()-1);
184 >      }
185  
186      /* residue sequence number */
187  
# Line 155 | Line 195 | static bool ParseAtomRecord(char *buffer, OBMol &mol,i
195      string type;
196  
197      if (EQn(buffer,"ATOM",4))
198 <    {
198 >      {
199          type = atmid.substr(0,2);
200          if (isdigit(type[0]))
201 <          type = atmid.substr(1,1);
201 >          type = atmid.substr(1,1);
202          else if (sbuf[6] == ' ' &&
203 <                 strncasecmp(type.c_str(), "Zn", 2) != 0 &&
204 <                 strncasecmp(type.c_str(), "Fe", 2) != 0)
205 <          type = atmid.substr(0,1);     // one-character element
206 <        
203 >                 strncasecmp(type.c_str(), "Zn", 2) != 0 &&
204 >                 strncasecmp(type.c_str(), "Fe", 2) != 0)
205 >          type = atmid.substr(0,1);     // one-character element
206 >        
207  
208          if (resname.substr(0,2) == "AS" || resname[0] == 'N')
209 <        {
209 >          {
210              if (atmid == "AD1")
211 <                type = "O";
211 >              type = "O";
212              if (atmid == "AD2")
213 <                type = "N";
214 <        }
213 >              type = "N";
214 >          }
215          if (resname.substr(0,3) == "HIS" || resname[0] == 'H')
216 <        {
216 >          {
217              if (atmid == "AD1" || atmid == "AE2")
218 <                type = "N";
218 >              type = "N";
219              if (atmid == "AE1" || atmid == "AD2")
220 <                type = "C";
221 <        }
220 >              type = "C";
221 >          }
222          if (resname.substr(0,2) == "GL" || resname[0] == 'Q')
223 <        {
223 >          {
224              if (atmid == "AE1")
225 <                type = "O";
225 >              type = "O";
226              if (atmid == "AE2")
227 <                type = "N";
228 <        }
229 <    }
227 >              type = "N";
228 >          }
229 >      }
230      else //must be hetatm record
231 <    {
231 >      {
232          if (isalpha(element[1]) && (isalpha(element[0]) || (element[0] == ' ')))
233 <        {
233 >          {
234              if (isalpha(element[0]))
235 <                type = element.substr(0,2);
235 >              type = element.substr(0,2);
236              else
237 <                type = element.substr(1,1);
237 >              type = element.substr(1,1);
238              if (type.size() == 2)
239 <                type[1] = tolower(type[1]);
240 <        }
239 >              type[1] = tolower(type[1]);
240 >          }
241          else
242 <          {
243 <            if (isalpha(atmid[0]))
244 <              type = atmid.substr(0,2);
242 >          {
243 >            if (isalpha(atmid[0]))
244 >              type = atmid.substr(0,2);
245              else if (atmid[0] == ' ')
246 <              type = atmid.substr(1,1); // one char element
246 >              type = atmid.substr(1,1); // one char element
247              else
248 <              type = atmid.substr(1,2);
248 >              type = atmid.substr(1,2);
249  
250              if (atmid == resname)
251 <              {
252 <                type = atmid;
251 >              {
252 >                type = atmid;
253                  if (type.size() == 2)
254 <                  type[1] = tolower(type[1]);
255 <              }
254 >                  type[1] = tolower(type[1]);
255 >              }
256              else
257 <              if (resname == "ADR" || resname == "COA" || resname == "FAD" ||
258 <                  resname == "GPG" || resname == "NAD" || resname == "NAL" ||
259 <                  resname == "NDP")
260 <                {
261 <                  if (type.size() > 1)
262 <                    type = type.substr(0,1);
263 <                  //type.erase(1,type.size()-1);
257 >              if (resname == "ADR" || resname == "COA" || resname == "FAD" ||
258 >                  resname == "GPG" || resname == "NAD" || resname == "NAL" ||
259 >                  resname == "NDP")
260 >                {
261 >                  if (type.size() > 1)
262 >                    type = type.substr(0,1);
263 >                  //type.erase(1,type.size()-1);
264                  }
265 <              else
266 <                if (isdigit(type[0]))
267 <                  {
268 <                    type = type.substr(1,1);
269 <                    //type.erase(0,1);
270 <                    //if (type.size() > 1) type.erase(1,type.size()-1);
271 <                  }
272 <                else
273 <                  if (type.size() > 1 && isdigit(type[1]))
274 <                    type = type.substr(0,1);
265 >              else
266 >                if (isdigit(type[0]))
267 >                  {
268 >                    type = type.substr(1,1);
269 >                    //type.erase(0,1);
270 >                    //if (type.size() > 1) type.erase(1,type.size()-1);
271 >                  }
272 >                else
273 >                  if (type.size() > 1 && isdigit(type[1]))
274 >                    type = type.substr(0,1);
275              //type.erase(1,1);
276 <                  else
277 <                    if (type.size() > 1 && isalpha(type[1]) && isupper(type[1]))
278 <                      type[1] = tolower(type[1]);
279 <          }
280 <        
281 <    }
276 >                  else
277 >                    if (type.size() > 1 && isalpha(type[1]) && isupper(type[1]))
278 >                      type[1] = tolower(type[1]);
279 >          }
280 >        
281 >      }
282  
283      OBAtom atom;
284      vector3 v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str()));
# Line 250 | Line 290 | static bool ParseAtomRecord(char *buffer, OBMol &mol,i
290      int        rnum = atoi(resnum.c_str());
291      OBResidue *res  = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL;
292      if (res == NULL || res->GetName() != resname || static_cast<int>(res->GetNum())
293 <            != rnum)
294 <    {
293 >        != rnum)
294 >      {
295          vector<OBResidue*>::iterator ri;
296          for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri))
297 <            if (res->GetName() == resname && static_cast<int>(res->GetNum())
298 <                    == rnum)
299 <                break;
297 >          if (res->GetName() == resname && static_cast<int>(res->GetNum())
298 >              == rnum)
299 >            break;
300  
301          if (res == NULL)
302 <        {
302 >          {
303              res = mol.NewResidue()
304 <                  ;
304 >              ;
305              res->SetChainNum(chainNum);
306              res->SetName(resname);
307              res->SetNum(rnum);
308 <        }
309 <    }
308 >          }
309 >      }
310  
311      if (!mol.AddAtom(atom)
312 <       )
313 <        return(false);
312 >        )
313 >      return(false);
314      else
315 <    {
315 >      {
316          OBAtom *atom = mol.GetAtom(mol.NumAtoms());
317  
318          res->AddAtom(atom);
# Line 281 | Line 321 | static bool ParseAtomRecord(char *buffer, OBMol &mol,i
321          res->SetHetAtom(atom, hetatm);
322  
323          return(true);
324 <    }
325 < }
324 >      }
325 >  }
326  
327 < /////////////////////////////////////////////////////////////////////////
328 < //! Utility function to read a 5-digit integer starting from a specified column
329 < /*! This function reads a 5-digit integer, starting from column
330 <  columnAsSpecifiedInPDB from the buffer, converts it to a long
331 <  integer, and returns either false or true, if the conversion was
332 <  successful or not. If the conversion was not successful, the target
333 <  is set to a random value.
327 >  /////////////////////////////////////////////////////////////////////////
328 >  //! Utility function to read a 5-digit integer starting from a specified column
329 >  /*! This function reads a 5-digit integer, starting from column
330 >    columnAsSpecifiedInPDB from the buffer, converts it to a long
331 >    integer, and returns either false or true, if the conversion was
332 >    successful or not. If the conversion was not successful, the target
333 >    is set to a random value.
334  
335 <  For instance, the PDB Format Description for a CONECT record specifies
335 >    For instance, the PDB Format Description for a CONECT record specifies
336  
337 <  COLUMNS        DATA TYPE        FIELD           DEFINITION
338 <  ---------------------------------------------------------------------------------
339 <  1 -  6         Record name      "CONECT"
340 <  7 - 11         Integer          serial          Atom serial number
341 <  ...
337 >    COLUMNS        DATA TYPE        FIELD           DEFINITION
338 >    ---------------------------------------------------------------------------------
339 >    1 -  6         Record name      "CONECT"
340 >    7 - 11         Integer          serial          Atom serial number
341 >    ...
342  
343 <  To read the Atom serial number, you would call
343 >    To read the Atom serial number, you would call
344  
345 <  long int target;
346 <  if ( readIntegerFromRecord(buffer, 7, &target) == false ) {
345 >    long int target;
346 >    if ( readIntegerFromRecord(buffer, 7, &target) == false ) {
347      cerr << "Could not parse" << endl;
348 <  }
348 >    }
349    
350 <  This function does not check the length of the buffer, or
351 <  strlen(buffer). If the buffer is not long enough => SEGFAULT.
352 < */
353 < static bool readIntegerFromRecord(char *buffer, unsigned int columnAsSpecifiedInPDB, long int *target)
354 < {
350 >    This function does not check the length of the buffer, or
351 >    strlen(buffer). If the buffer is not long enough => SEGFAULT.
352 >  */
353 >  static bool readIntegerFromRecord(char *buffer, unsigned int columnAsSpecifiedInPDB, long int *target)
354 >  {
355      char integerBuffer[6];
356 <    integerBuffer[5] = 0;
356 >    integerBuffer[5] = '\0';
357  
358      strncpy(integerBuffer, buffer+columnAsSpecifiedInPDB-1, 5);
359  
360      char *errorCheckingEndPtr;
361      *target = strtol(integerBuffer, &errorCheckingEndPtr, 10);
362      if (integerBuffer == errorCheckingEndPtr)
363 <        return(false);
363 >      return(false);
364      return(true);
365 < }
365 >  }
366  
367 < //! Read a CONECT record
368 < /*! This function reads a CONECT record, as specified
369 <  http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html,
370 <  in short:
367 >  //! Read a CONECT record
368 >  /*! This function reads a CONECT record, as specified
369 >    http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html,
370 >    in short:
371  
372 <  COLUMNS         DATA TYPE        FIELD           DEFINITION
373 <  ---------------------------------------------------------------------------------
374 <   1 -  6         Record name      "CONECT"
375 <   7 - 11         Integer          serial          Atom serial number
376 <  12 - 16         Integer          serial          Serial number of bonded atom
377 <  17 - 21         Integer          serial          Serial number of bonded atom
378 <  22 - 26         Integer          serial          Serial number of bonded atom
379 <  27 - 31         Integer          serial          Serial number of bonded atom
380 <  32 - 36         Integer          serial          Serial number of hydrogen bonded atom
381 <  37 - 41         Integer          serial          Serial number of hydrogen bonded atom
382 <  42 - 46         Integer          serial          Serial number of salt bridged atom
383 <  47 - 51         Integer          serial          Serial number of hydrogen bonded atom
384 <  52 - 56         Integer          serial          Serial number of hydrogen bonded atom
385 <  57 - 61         Integer          serial          Serial number of salt bridged atom
372 >    COLUMNS         DATA TYPE        FIELD           DEFINITION
373 >    ---------------------------------------------------------------------------------
374 >    1 -  6         Record name      "CONECT"
375 >    7 - 11         Integer          serial          Atom serial number
376 >    12 - 16         Integer          serial          Serial number of bonded atom
377 >    17 - 21         Integer          serial          Serial number of bonded atom
378 >    22 - 26         Integer          serial          Serial number of bonded atom
379 >    27 - 31         Integer          serial          Serial number of bonded atom
380 >    32 - 36         Integer          serial          Serial number of hydrogen bonded atom
381 >    37 - 41         Integer          serial          Serial number of hydrogen bonded atom
382 >    42 - 46         Integer          serial          Serial number of salt bridged atom
383 >    47 - 51         Integer          serial          Serial number of hydrogen bonded atom
384 >    52 - 56         Integer          serial          Serial number of hydrogen bonded atom
385 >    57 - 61         Integer          serial          Serial number of salt bridged atom
386  
387 <  Hydrogen bonds and salt bridges are ignored. --Stefan Kebekus.
388 < */
389 <
390 < static bool ParseConectRecord(char *buffer,OBMol &mol)
391 < {
352 < #ifdef HAVE_SSTREAM
387 >    Hydrogen bonds and salt bridges are ignored. --Stefan Kebekus.
388 >  */
389 >
390 >  static bool ParseConectRecord(char *buffer,OBMol &mol)
391 >  {
392      stringstream errorMsg;
393 < #else
355 <    strstream errorMsg;
356 < #endif
393 >    string clearError;
394  
395      // Setup strings and string buffers
396 +    vector<string> vs;
397      buffer[70] = '\0';
398      if (strlen(buffer) < 70)
399 <    {
399 >      {
400          errorMsg << "WARNING: Problems reading a PDB file\n"
401 <                 << "  Problems reading a CONECT record.\n"
402 <                 << "  According to the PDB specification,\n"
403 <                 << "  the record should have 70 columns, but OpenBabel found "
404 <                 << strlen(buffer) << " columns.";
405 <        obErrorLog.ThrowError(__func__, errorMsg.str() , obInfo);
406 <    }
401 >                 << "  Problems reading a CONECT record.\n"
402 >                 << "  According to the PDB specification,\n"
403 >                 << "  the record should have 70 columns, but OpenBabel found "
404 >                 << strlen(buffer) << " columns." << endl;
405 >        obErrorLog.ThrowError(__func__, errorMsg.str() , obInfo);
406 >        errorMsg.str(clearError);
407 >      }
408  
409      // Serial number of the first atom, read from column 7-11 of the
410      // connect record, to which the other atoms connect to.
411      long int startAtomSerialNumber;
412 <    if (readIntegerFromRecord(buffer, 7, &startAtomSerialNumber) == false)
413 <    {
375 <      errorMsg << "WARNING: Problems reading a PDB file\n"
376 <               << "  Problems reading a CONECT record.\n"
377 <               << "  According to the PDB specification,\n"
378 <               << "  columns 7-11 should contain the serial number of an atom.\n"
379 <               << "  THIS CONECT RECORD WILL BE IGNORED.";
380 <        obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning);
381 <        return(false);
382 <    }
383 <
384 <    // Find a pointer to the first atom.
385 <    OBAtom *firstAtom = 0L;
386 <    vector<OBNodeBase*>::iterator i;
387 <    for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
388 <        if (static_cast<long int>(a1->GetResidue()->
389 <                                  GetSerialNum(a1)) == startAtomSerialNumber)
390 <        {
391 <            firstAtom = a1;
392 <            break;
393 <        }
394 <    if (firstAtom == 0L)
395 <    {
396 <        errorMsg << "WARNING: Problems reading a PDB file:\n"
397 <                 << "  Problems reading a CONECT record.\n"
398 <                 << "  According to the PDB specification,\n"
399 <                 << "  columns 7-11 should contain the serial number of an atom.\n"
400 <                 << "  No atom was found with this serial number.\n"
401 <                 << "  THIS CONECT RECORD WILL BE IGNORED.";
402 <        obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning);
403 <        return(false);
404 <    }
405 <
412 >    // A pointer to the first atom.
413 >    OBAtom *firstAtom = NULL;
414      // Serial numbers of the atoms which bind to firstAtom, read from
415      // columns 12-16, 17-21, 22-27 and 27-31 of the connect record. Note
416      // that we reserve space for 5 integers, but read only four of
# Line 414 | Line 422 | static bool ParseConectRecord(char *buffer,OBMol &mol)
422      // invalid
423      bool boundedAtomsSerialNumbersValid[5] = {false, false, false, false, false};
424  
425 <    // Now read the serial numbers. If the first serial number is not
426 <    // present, this connect record probably contains only hydrogen
427 <    // bonds and salt bridges, which we ignore. In that case, we just
428 <    // exit gracefully.
429 <    boundedAtomsSerialNumbersValid[0] = readIntegerFromRecord(buffer, 12, boundedAtomsSerialNumbers+0);
422 <    if (boundedAtomsSerialNumbersValid[0] == false)
423 <        return(true);
424 <    boundedAtomsSerialNumbersValid[1] = readIntegerFromRecord(buffer, 17, boundedAtomsSerialNumbers+1);
425 <    boundedAtomsSerialNumbersValid[2] = readIntegerFromRecord(buffer, 22, boundedAtomsSerialNumbers+2);
426 <    boundedAtomsSerialNumbersValid[3] = readIntegerFromRecord(buffer, 27, boundedAtomsSerialNumbers+3);
425 >    // Pragmatic approach -- too many non-standard PDB files out there
426 >    // (including some old ones from us)
427 >    // So if we have a small number of atoms, then try to break by spaces
428 >    // Otherwise (i.e., NumAtoms() > 9,999 we need to go by position)
429 >    // We'll switch back and forth a few times to save duplicating common code
430  
431 +    if (mol.NumAtoms() <= 9999)
432 +      {
433 +        // make sure we don't look at salt bridges or whatever, so cut the buffer short
434 +        buffer[32] = '\0';
435 +        tokenize(vs,buffer);
436 +        if( vs.empty() || vs.size() < 2)
437 +          return false;
438 +        vs.erase(vs.begin()); // remove "CONECT"
439 +
440 +        startAtomSerialNumber = atoi(vs[0].c_str());
441 +      }
442 +    else
443 +      {
444 +        if (readIntegerFromRecord(buffer, 7, &startAtomSerialNumber) == false)
445 +          {
446 +            errorMsg << "WARNING: Problems reading a PDB file\n"
447 +                     << "  Problems reading a CONECT record.\n"
448 +                     << "  According to the PDB specification,\n"
449 +                     << "  columns 7-11 should contain the serial number of an atom.\n"
450 +                     << "  THIS CONECT RECORD WILL BE IGNORED." << endl;
451 +            obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning);
452 +            return(false);
453 +          }
454 +      }
455 +
456 +    vector<OBNodeBase*>::iterator i;
457 +    for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
458 +      if (static_cast<long int>(a1->GetResidue()->
459 +                                GetSerialNum(a1)) == startAtomSerialNumber)
460 +        {
461 +          firstAtom = a1;
462 +          break;
463 +        }
464 +    if (firstAtom == NULL)
465 +      {
466 +        errorMsg << "WARNING: Problems reading a PDB file:\n"
467 +                 << "  Problems reading a CONECT record.\n"
468 +                 << "  According to the PDB specification,\n"
469 +                 << "  columns 7-11 should contain the serial number of an atom.\n"
470 +                 << "  No atom was found with this serial number.\n"
471 +                 << "  THIS CONECT RECORD WILL BE IGNORED." << endl;
472 +        obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning);
473 +        return(false);
474 +      }
475 +
476 +    if (mol.NumAtoms() < 9999)
477 +      {
478 +        if (vs.size() > 1) boundedAtomsSerialNumbers[0] = atoi(vs[1].c_str());
479 +        if (vs.size() > 2) boundedAtomsSerialNumbers[1] = atoi(vs[2].c_str());
480 +        if (vs.size() > 3) boundedAtomsSerialNumbers[2] = atoi(vs[3].c_str());
481 +        if (vs.size() > 4) boundedAtomsSerialNumbers[3] = atoi(vs[4].c_str());
482 +
483 +        unsigned int limit = 4;
484 +        if (vs.size() <= 4)
485 +          limit = vs.size() - 1;
486 +
487 +        for (unsigned int i = 0; i < limit; i++)
488 +          boundedAtomsSerialNumbersValid[i] = true;
489 +      }
490 +    else
491 +      {
492 +        // Now read the serial numbers. If the first serial number is not
493 +        // present, this connect record probably contains only hydrogen
494 +        // bonds and salt bridges, which we ignore. In that case, we just
495 +        // exit gracefully.
496 +        boundedAtomsSerialNumbersValid[0] = readIntegerFromRecord(buffer, 12, boundedAtomsSerialNumbers+0);
497 +        if (boundedAtomsSerialNumbersValid[0] == false)
498 +          return(true);
499 +        boundedAtomsSerialNumbersValid[1] = readIntegerFromRecord(buffer, 17, boundedAtomsSerialNumbers+1);
500 +        boundedAtomsSerialNumbersValid[2] = readIntegerFromRecord(buffer, 22, boundedAtomsSerialNumbers+2);
501 +        boundedAtomsSerialNumbersValid[3] = readIntegerFromRecord(buffer, 27, boundedAtomsSerialNumbers+3);
502 +      }
503 +
504      // Now iterate over the VALID boundedAtomsSerialNumbers and connect
505      // the atoms.
506      for(unsigned int k=0; boundedAtomsSerialNumbersValid[k]; k++)
507 <    {
507 >      {
508          // Find atom that is connected to, write an error message
509          OBAtom *connectedAtom = 0L;
510          for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
511 <            if (static_cast<long int>(a1->GetResidue()->
512 <                                      GetSerialNum(a1)) == boundedAtomsSerialNumbers[k])
511 >          if (static_cast<long int>(a1->GetResidue()->
512 >                                    GetSerialNum(a1)) == boundedAtomsSerialNumbers[k])
513              {
514 <                connectedAtom = a1;
515 <                break;
514 >              connectedAtom = a1;
515 >              break;
516              }
517          if (connectedAtom == 0L)
518 <        {
518 >          {
519              errorMsg << "WARNING: Problems reading a PDB file:\n"
520 <                     << "  Problems reading a CONECT record.\n"
521 <                     << "  According to the PDB specification,\n"
522 <                     << "  Atoms with serial #" << startAtomSerialNumber
523 <                     << " and #" << boundedAtomsSerialNumbers[k]
524 <                     << " should be connected\n"
525 <                     << "  However, an atom with serial #" << boundedAtomsSerialNumbers[k] << " was not found.\n"
526 <                     << "  THIS CONECT RECORD WILL BE IGNORED.";
527 <            obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning);
528 <            break;
529 <        }
520 >                     << "  Problems reading a CONECT record.\n"
521 >                     << "  According to the PDB specification,\n"
522 >                     << "  Atoms with serial #" << startAtomSerialNumber
523 >                     << " and #" << boundedAtomsSerialNumbers[k]
524 >                     << " should be connected\n"
525 >                     << "  However, an atom with serial #" << boundedAtomsSerialNumbers[k] << " was not found.\n"
526 >                     << "  THIS CONECT RECORD WILL BE IGNORED." << endl;
527 >            obErrorLog.ThrowError(__func__, errorMsg.str() , obWarning);
528 >            return(false);
529 >          }
530  
531          // Figure the bond order
532          unsigned char order = 0;
533          while(boundedAtomsSerialNumbersValid[k+order+1] && (boundedAtomsSerialNumbers[k+order]
534 <                == boundedAtomsSerialNumbers[k+order+1]))
535 <            order++;
534 >                                                            == boundedAtomsSerialNumbers[k+order+1]))
535 >          order++;
536          k += order;
537 <
537 >        
538          // Generate the bond
539          mol.AddBond(firstAtom->GetIdx(), connectedAtom->GetIdx(), order+1);
540 <    }
540 >      }
541      return(true);
542 < }
542 >  }
543  
544 < //////////////////////////////////////////////////////////////////////////////
545 < bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
546 < {
544 >  //////////////////////////////////////////////////////////////////////////////
545 >  bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
546 >  {
547      OBMol* pmol = dynamic_cast<OBMol*>(pOb);
548      if(pmol==NULL)
549 <        return false;
549 >      return false;
550  
551      //Define some references so we can use the old parameter names
552      ostream &ofs = *pConv->GetOutStream();
# Line 484 | Line 560 | bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversio
560      int res_num;
561      bool het=true;
562  
487    //  sprintf(buffer,"HEADER    PROTEIN");
488    //  ofs << buffer << endl;
489
563      if (strlen(mol.GetTitle()) > 0)
564 <        sprintf(buffer,"COMPND    %s ",mol.GetTitle());
564 >      snprintf(buffer, BUFF_SIZE, "COMPND    %s ",mol.GetTitle());
565      else
566 <        sprintf(buffer,"COMPND    UNNAMED");
566 >      snprintf(buffer, BUFF_SIZE, "COMPND    UNNAMED");
567      ofs << buffer << endl;
568  
569 <    sprintf(buffer,"AUTHOR    GENERATED BY OPEN BABEL %s",BABEL_VERSION);
569 >    snprintf(buffer, BUFF_SIZE, "AUTHOR    GENERATED BY OPEN BABEL %s",BABEL_VERSION);
570      ofs << buffer << endl;
571  
572 +    // before we write any records, we should check to see if any coord < -1000
573 +    // which will cause errors in the formatting
574 +
575 +    double minX, minY, minZ;
576 +    minX = minY = minZ = -999.0f;
577 +    FOR_ATOMS_OF_MOL(a, mol)
578 +      {
579 +        if (a->GetX() < minX)
580 +          minX = a->GetX();
581 +        if (a->GetY() < minY)
582 +          minY = a->GetY();
583 +        if (a->GetZ() < minZ)
584 +          minZ = a->GetZ();
585 +      }
586 +    vector3 transV = VZero;
587 +    if (minX < -999.0)
588 +      transV.SetX(-1.0*minX - 900.0);
589 +    if (minY < -999.0)
590 +      transV.SetY(-1.0*minY - 900.0);
591 +    if (minZ < -999.0)
592 +      transV.SetZ(-1.0*minZ - 900.0);
593 +
594 +    // if minX, minY, or minZ was never changed, shift will be 0.0f
595 +    // otherwise, move enough so that smallest coord is > -999.0f
596 +    mol.Translate(transV);
597 +
598      OBAtom *atom;
599      OBResidue *res;
600      for (i = 1; i <= mol.NumAtoms(); i++)
601 <    {
601 >      {
602          atom = mol.GetAtom(i);
603 <        strcpy(type_name,etab.GetSymbol(atom->GetAtomicNum()));
603 >        strncpy(type_name, etab.GetSymbol(atom->GetAtomicNum()), sizeof(type_name));
604 >        type_name[sizeof(type_name) - 1] = '\0';
605  
606          //two char. elements are on position 13 and 14 one char. start at 14
607          if (strlen(type_name) > 1)
608 <            type_name[1] = toupper(type_name[1]);
608 >          type_name[1] = toupper(type_name[1]);
609          else
610 <        {
610 >          {
611              char tmp[10];
612 <            strcpy(tmp, type_name);
613 <            sprintf(type_name, " %-3s", tmp);
614 <        }
612 >            strncpy(tmp, type_name, 10);
613 >            snprintf(type_name, sizeof(type_name), " %-3s", tmp);
614 >          }
615  
616          if ( (res = atom->GetResidue()) )
617 <        {
617 >          {
618              het = res->IsHetAtom(atom);
619              snprintf(the_res,4,"%s",(char*)res->GetName().c_str());
620              snprintf(type_name,5,"%s",(char*)res->GetAtomID(atom).c_str());
621  
622              //two char. elements are on position 13 and 14 one char. start at 14
623              if (strlen(etab.GetSymbol(atom->GetAtomicNum())) == 1)
624 <            {
624 >              {
625                  if (strlen(type_name) < 4)
626 <                {
626 >                  {
627                      char tmp[10];
628                      strcpy(tmp, type_name);
629 <                    sprintf(padded_name," %-3s", tmp);
629 >                    snprintf(padded_name, sizeof(padded_name), " %-3s", tmp);
630                      strncpy(type_name,padded_name,4);
631                      type_name[4] = '\0';
632 <                }
632 >                  }
633                  else
634 <                {
634 >                  {
635                      type_name[4] = type_name[3];
636                      type_name[3] = type_name[2];
637                      type_name[2] = type_name[1];
638                      type_name[1] = type_name[0];
639                      type_name[0] = type_name[4];
640                      type_name[4] = '\0';
641 <                }
642 <            }
641 >                  }
642 >              }
643              res_num = res->GetNum();
644 <        }
644 >          }
645          else
646 <        {
646 >          {
647              strcpy(the_res,"UNK");
648 <            sprintf(padded_name,"%s",type_name);
648 >            snprintf(padded_name,sizeof(padded_name), "%s",type_name);
649              strncpy(type_name,padded_name,4);
650              type_name[4] = '\0';
651              res_num = 1;
652 <        }
652 >          }
653  
654          element_name = etab.GetSymbol(atom->GetAtomicNum());
655          if (strlen(element_name) == 2)
656 <            element_name[1] = toupper(element_name[1]);
657 <        sprintf(buffer,"%s%5d %-4s %-3s  %4d    %8.3f%8.3f%8.3f  1.00  0.00          %2s  \n",
656 >          element_name[1] = toupper(element_name[1]);
657 >        snprintf(buffer, BUFF_SIZE, "%s%5d %-4s %-3s  %4d    %8.3f%8.3f%8.3f  1.00  0.00          %2s  \n",
658                  het?"HETATM":"ATOM  ",
659                  i,
660                  type_name,
# Line 565 | Line 665 | bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversio
665                  atom->GetZ(),
666                  element_name);
667          ofs << buffer;
668 <    }
668 >      }
669  
670      OBAtom *nbr;
671      int count;
672      vector<OBEdgeBase*>::iterator k;
673      for (i = 1; i <= mol.NumAtoms(); i ++)
674 <    {
674 >      {
675          atom = mol.GetAtom(i);
676          if (atom->GetValence() <= 4)
677 <        {
678 <            sprintf(buffer,"CONECT%5d", i);
677 >          {
678 >            snprintf(buffer, BUFF_SIZE, "CONECT%5d", i);
679              ofs << buffer;
680              for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k))
681 <            {
682 <                sprintf(buffer,"%5d", nbr->GetIdx());
681 >              {
682 >                snprintf(buffer, BUFF_SIZE, "%5d", nbr->GetIdx());
683                  ofs << buffer;
684 <            }
684 >              }
685              for (count = 0; count < (4 - (int)atom->GetValence()); count++)
686 <            {
687 <                sprintf(buffer, "     ");
686 >              {
687 >                snprintf(buffer, BUFF_SIZE, "     ");
688                  ofs << buffer;
689 <            }
689 >              }
690              ofs << "                                       " << endl;
691 <        }
692 <    }
693 <    sprintf(buffer,"MASTER        0    0    0    0    0    0    0    0 ");
691 >          }
692 >      }
693 >    snprintf(buffer, BUFF_SIZE, "MASTER        0    0    0    0    0    0    0    0 ");
694      ofs << buffer;
695 <    sprintf(buffer,"%4d    0 %4d    0",mol.NumAtoms(),mol.NumAtoms());
696 <    ofs << buffer << endl;
697 <    sprintf(buffer,"END");
598 <    ofs << buffer << endl;
695 >    snprintf(buffer, BUFF_SIZE, "%4d    0 %4d    0\n",mol.NumAtoms(),mol.NumAtoms());
696 >    ofs << buffer;
697 >    ofs << "END\n";
698      return(true);
699 < }
699 >  }
700  
701  
702   } //namespace OpenBabel

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines