OpenMD 3.0
Molecular Dynamics in the Open
Loading...
Searching...
No Matches
openmdformat.cpp
1/**********************************************************************
2Copyright (C) 2000 by OpenEye Scientific Software, Inc.
3Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
4Some portions Copyright (C) 2004 by Chris Morley
5Some portions Copyright (C) 2004-present by J. Daniel Gezelter
6
7This program is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation version 2 of the License.
10
11This program is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15***********************************************************************/
16
17#include <fstream>
18
19#include <openbabel/atom.h>
20#include <openbabel/babelconfig.h>
21#include <openbabel/bond.h>
22#include <openbabel/chains.h>
23#include <openbabel/data.h>
24#include <openbabel/mol.h>
25#include <openbabel/obiter.h>
26#include <openbabel/obmolecformat.h>
27#include <openbabel/obutil.h>
28
29#include "utils/StringUtils.hpp"
30
31using namespace std;
32namespace OpenBabel {
33
34 class OpenMDFormat : public OBMoleculeFormat {
35 public:
36 // Register this format type ID
37 OpenMDFormat() { OBConversion::RegisterFormat("omd", this); }
38
39 virtual const char* Description() // required
40 {
41 return "OpenMD combined meta-data / cartesian coordinates format\n\
42 No comments yet\n";
43 };
44
45 virtual const char* SpecificationURL() {
46 return "http://openmd.org";
47 }; // optional
48
49 virtual const char* GetMIMEType() { return "chemical/x-omd"; };
50
51 virtual unsigned int Flags() { return NOTREADABLE | WRITEONEONLY; }
52
53 virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
54
55 private:
56 bool AreSameFragments(OBMol& mol, vector<int>& frag1, vector<int>& frag2);
57 OBMol* createMolFromFragment(OBMol& mol, vector<int>& fragment);
58 void WriteMDFile(vector<OBMol*> mols, vector<int> numMols, ostream& os,
59 OBMol& mol, vector<int>& indices);
60 void CalcBoundingBox(OBMol& mol, double& min_x, double& max_x,
61 double& min_y, double& max_y, double& min_z,
62 double& max_z);
63 };
64
65 // Make an instance of the format class
66 OpenMDFormat theOpenMDFormat;
67
68 bool OpenMDFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) {
69 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
70 if (pmol == NULL) return false;
71
72 vector<vector<int>> fragmentLists;
73 pmol->ContigFragList(fragmentLists);
74 OBBitVec unused;
75 vector<bool> used(fragmentLists.size(), 0);
76 vector<vector<int>> molecules;
77 vector<int> indices;
78 for (std::size_t i = 0; i < used.size(); ++i) {
79 if (used[i]) continue;
80
81 used[i] = true;
82 vector<int> sameMolTypes;
83 sameMolTypes.push_back(i);
84 indices.insert(indices.end(), fragmentLists[i].begin(),
85 fragmentLists[i].end());
86 for (std::size_t j = i + 1; j < used.size(); ++j) {
87 if (used[j]) continue;
88
89 if (AreSameFragments(*pmol, fragmentLists[i], fragmentLists[j])) {
90 sameMolTypes.push_back(j);
91 indices.insert(indices.end(), fragmentLists[j].begin(),
92 fragmentLists[j].end());
93 used[j] = true;
94 }
95 }
96 molecules.push_back(sameMolTypes);
97 }
98
99 vector<OBMol*> mdMols;
100 vector<int> numMols;
101 for (vector<vector<int>>::iterator i = molecules.begin();
102 i != molecules.end(); ++i) {
103 mdMols.push_back(createMolFromFragment(*pmol, fragmentLists[i->front()]));
104 numMols.push_back((*i).size());
105 }
106
107 string OutputFileName = pConv->GetInFilename();
108 size_t pos = OutputFileName.rfind(".");
109 if (pos != string::npos)
110 OutputFileName = OutputFileName.substr(0, pos) + ".omd";
111 else
112 OutputFileName += ".omd";
113
114 ofstream ofs(OutputFileName.c_str());
115 if (!ofs) {
116 cerr << "Cannot write to " << OutputFileName << endl;
117 return false;
118 }
119
120 WriteMDFile(mdMols, numMols, ofs, *pmol, indices);
121
122 for (vector<OBMol*>::iterator i = mdMols.begin(); i != mdMols.end(); ++i) {
123 delete *i;
124 }
125
126 return (true);
127 }
128
129 bool OpenMDFormat::AreSameFragments(OBMol& mol, vector<int>& frag1,
130 vector<int>& frag2) {
131 if (frag1.size() != frag2.size()) return false;
132
133 // Exact graph matching is an NP complete problem.
134 // This just matches all of the atom atomic numbers and may falsely
135 // detect identical fragments which aren't really identical.
136 // @todo using sparse matrix to store the connectivities
137
138 for (unsigned int i = 0; i < frag1.size(); ++i) {
139 OBAtom* atom1 = mol.GetAtom(frag1[i]);
140 OBAtom* atom2 = mol.GetAtom(frag2[i]);
141
142 if (atom1->GetAtomicNum() != atom2->GetAtomicNum()) return false;
143 }
144 return true;
145 }
146
147 struct SameAngle {
148 bool operator()(const triple<OBAtom*, OBAtom*, OBAtom*> t1,
149 const triple<OBAtom*, OBAtom*, OBAtom*> t2) const {
150 return (t1.second == t2.second) &&
151 ((t1.first == t2.first && t1.third == t2.third) ||
152 (t1.first == t2.third && t1.third == t2.first));
153 }
154 };
155
156 OBMol* OpenMDFormat::createMolFromFragment(OBMol& mol,
157 vector<int>& fragment) {
158 OBMol* newMol = new OBMol();
159
160 newMol->ReserveAtoms(fragment.size());
161 newMol->BeginModify();
162 for (vector<int>::iterator i = fragment.begin(); i != fragment.end(); ++i) {
163 OBAtom* newAtom = newMol->NewAtom();
164 *newAtom = *mol.GetAtom(*i);
165 }
166
167 newMol->EndModify();
168 newMol->ConnectTheDots();
169 newMol->PerceiveBondOrders();
170
171 return newMol;
172 }
173
174 void OpenMDFormat::WriteMDFile(vector<OBMol*> mols, vector<int> numMols,
175 ostream& os, OBMol& mol,
176 vector<int>& indices) {
177 std::string molPrefix("MolName");
178 std::string resName;
179 unsigned int i;
180 const int BUFFLEN = 1024;
181 char buffer[BUFFLEN];
182 string str, str1, str2, str3;
183 bool molIsWater = false;
184 OBResidue* r;
185 double min_x, max_x, min_y, max_y, min_z, max_z; /* Edges of bounding box */
186
187 os << "<OpenMD version=2>" << endl;
188 os << " <MetaData>" << endl << endl;
189
190 for (i = 0; i < mols.size(); ++i) {
191 OBMol* pmol = mols[i];
192 map<OBAtom*, int> atomMap;
193
194 molIsWater = false;
195 FOR_RESIDUES_OF_MOL(residue, *pmol) {
196 if (residue->GetName().compare("HOH") == 0) { molIsWater = true; }
197 }
198
199 if (molIsWater) {
200 // water include files define all of the known water types
201 os << "#include \"water.omd\";\n";
202 pmol->SetTitle("HOH");
203 } else {
204 os << "molecule {\n";
205 snprintf(buffer, BUFFLEN, "%u", i);
206 os << " name = \"" << molPrefix << buffer << "\";\n";
207
208 int ai = 0;
209 FOR_ATOMS_OF_MOL(atom, *pmol) {
210 str = atom->GetType();
211
212 r = atom->GetResidue();
213
214 if (r == NULL)
215 resName = "NULL";
216 else
217 resName = r->GetName();
218
219 if (resName.compare("NULL") == 0 || resName.compare("LIG") == 0 ||
220 resName.compare("UNL") == 0 || resName.compare("UNK") == 0) {
221 // Either couldn't find a residue at all or couldn't find a
222 // reasonable residue name to use. We'll punt and use
223 // OpenBabel's internal atom typing:
224 ttab.SetFromType("INT");
225 ttab.SetToType("INT");
226 ttab.Translate(str1, str);
227 } else {
228 // If we know what residue we've got, the specific atom name can
229 // be used to help specify partial charges.
230
231 // resdat.SetResName(resName);
232
233 // atom type from residue:
234 str = r->GetAtomID(&*atom);
235
236 // arginine has separate indices for chemically-identical
237 // nitrogen atoms:
238 if (resName.compare("ARG") == 0) {
239 if (str.compare("NH1") == 0 || str.compare("NH2") == 0) {
240 str = "NH";
241 }
242 }
243 if (resName.compare("VAL") == 0) {
244 if (str.compare("CG1") == 0 || str.compare("CG2") == 0) {
245 str = "CG";
246 }
247 }
248 if (resName.compare("LEU") == 0) {
249 if (str.compare("CD1") == 0 || str.compare("CD2") == 0) {
250 str = "CD";
251 }
252 }
253 if (resName.compare("ASP") == 0) {
254 if (str.compare("OD1") == 0 || str.compare("OD2") == 0) {
255 str = "OD";
256 }
257 }
258 if (resName.compare("GLU") == 0) {
259 if (str.compare("OE1") == 0 || str.compare("OE2") == 0) {
260 str = "OE";
261 }
262 }
263 if (resName.compare("TYR") == 0) {
264 if (str.compare("CD1") == 0 || str.compare("CD2") == 0) {
265 str = "CD";
266 }
267 if (str.compare("CE1") == 0 || str.compare("CE2") == 0) {
268 str = "CE";
269 }
270 }
271
272 if ((&*atom)->GetAtomicNum() == 1) {
273 FOR_NBORS_OF_ATOM(nbr, *atom) {
274 str2 = r->GetAtomID(&*nbr);
275 size_t startpos = str2.find_first_not_of(" ");
276 size_t endpos = str2.find_last_not_of(" ");
277 if ((endpos - startpos) < 1) {
278 // if the bonded atom type has only one character (i.e. N)
279 // then the hydrogen will be labeled "HN" to show what
280 // kind of proton it is:
281 str3 = str2;
282 } else {
283 if (str2.compare("OH") == 0) {
284 str3 = "O";
285 } else {
286 // When the bonded atom type is more specific, we drop
287 // the first character: i.e. H bonded to OG1 is HG1 type:
288 str3 = str2.substr(startpos + 1, endpos - startpos);
289 }
290 }
291 str = "H" + str3;
292 }
293 // same problem with arginine NH atoms, but now for connected
294 // hydrogens
295 if (resName.compare("ARG") == 0) {
296 if (str.compare("HH1") == 0 || str.compare("HH2") == 0) {
297 str = "HH";
298 }
299 }
300 if (resName.compare("VAL") == 0) {
301 if (str.compare("HG1") == 0 || str.compare("HG2") == 0) {
302 str = "HG";
303 }
304 }
305 if (resName.compare("LEU") == 0) {
306 if (str.compare("HD1") == 0 || str.compare("HD2") == 0) {
307 str = "HD";
308 }
309 }
310 if (resName.compare("TYR") == 0) {
311 if (str.compare("HD1") == 0 || str.compare("HD2") == 0) {
312 str = "HD";
313 }
314 if (str.compare("HE1") == 0 || str.compare("HE2") == 0) {
315 str = "HE";
316 }
317 }
318 }
319
320 // atom type from residue table:
321 // resdat.LookupType(str, str2, hyb);
322 size_t startpos = str.find_first_not_of(" ");
323 size_t endpos = str.find_last_not_of(" ");
324 str = str.substr(startpos, endpos - startpos + 1);
325 str1 = resName + "-" + str;
326 }
327 os << " atom[" << ai << "] { ";
328 os << "type = "
329 << "\"" << str1 << "\""
330 << "; ";
331 os << "position( " << (&*atom)->GetX() << ", " << (&*atom)->GetY()
332 << ", " << (&*atom)->GetZ() << ");";
333 os << "}\n";
334 atomMap[&(*atom)] = ai++;
335 }
336 os << "\n";
337
338 // bond
339
340 int bi;
341 double bo;
342 OBAtom *a, *b;
343
344 FOR_BONDS_OF_MOL(bond, *pmol) {
345 a = bond->GetBeginAtom();
346 b = bond->GetEndAtom();
347
348 ai = atomMap[a];
349 bi = atomMap[b];
350 bo = bond->GetBondOrder();
351 if (bond->IsAromatic()) bo = 1.5;
352 // e.g., in Cp rings, may not be "aromatic" by OB but check
353 // for explicit hydrogen counts (e.g., biphenyl inter-ring
354 // is not aromatic)
355 if ((a->GetType()[2] == 'R' && b->GetType()[2] == 'R') &&
356 (a->ExplicitHydrogenCount() == 1 &&
357 b->ExplicitHydrogenCount() == 1))
358 bo = 1.5;
359 if (bond->IsAmide()) bo = 1.41;
360
361 os << " bond { ";
362
363 if (ai < bi)
364 os << "members(" << ai << ", " << bi << "); ";
365 else
366 os << "members(" << bi << ", " << ai << "); ";
367
368 if (bo != 1) os << "bondOrder = " << bo << "; ";
369
370 os << "}" << endl;
371 }
372
373 os << endl;
374
375 os << "}" << endl;
376 os << endl;
377 }
378 }
379
380 os << endl;
381
382 for (i = 0; i < mols.size(); ++i) {
383 OBMol* pmol = mols[i];
384 os << "component{" << endl;
385 if (std::string(pmol->GetTitle()).compare("HOH") == 0) {
386 os << " type = "
387 << "\"HOH\""
388 << "; // change to appropriate water model" << endl;
389 } else {
390 snprintf(buffer, BUFFLEN, "%u", i);
391 os << " type = " << molPrefix << buffer << ";" << endl;
392 }
393 os << " nMol = " << numMols[i] << ";" << endl;
394 os << "}" << endl;
395 }
396
397 os << " </MetaData>" << endl;
398 os << " <Snapshot>" << endl;
399 os << " <FrameData>" << endl;
400
401 snprintf(buffer, BUFFLEN, " Time: %.10g", 0.0);
402
403 os << buffer << endl;
404
405 CalcBoundingBox(mol, min_x, max_x, min_y, max_y, min_z, max_z);
406
407 // still to do: should compute a bounding box here
408 snprintf(
409 buffer, BUFFLEN,
410 " Hmat: {{ %.10g, %.10g, %.10g }, { %.10g, %.10g, %.10g }, { "
411 "%.10g, "
412 "%.10g, %.10g }}",
413 max_x - min_x, 0.0, 0.0, 0.0, max_y - min_y, 0.0, 0.0, 0.0,
414 max_z - min_z);
415
416 os << buffer << endl;
417 os << " </FrameData>" << endl;
418 os << " <StuntDoubles>" << endl;
419
420 OBAtom* atom;
421
422 for (vector<int>::iterator i = indices.begin(); i != indices.end(); ++i) {
423 atom = mol.GetAtom(*i);
424 snprintf(buffer, BUFFLEN,
425 "%10d %7s %18.10g %18.10g %18.10g %13e %13e %13e", *i - 1, "pv",
426 atom->GetX(), atom->GetY(), atom->GetZ(), 0.0, 0.0, 0.0);
427 os << buffer << endl;
428 }
429 os << " </StuntDoubles>" << endl;
430 os << " </Snapshot>" << endl;
431 os << "</OpenMD>" << endl;
432 }
433
434 void OpenMDFormat::CalcBoundingBox(OBMol& mol, double& min_x, double& max_x,
435 double& min_y, double& max_y,
436 double& min_z, double& max_z) {
437 /* ---- Init bounding-box variables ---- */
438 min_x = (double)0.0;
439 max_x = (double)0.0;
440 min_y = (double)0.0;
441 max_y = (double)0.0;
442 min_z = (double)0.0;
443 max_z = (double)0.0;
444
445 /* ---- Check all atoms ---- */
446 for (unsigned int i = 1; i <= mol.NumAtoms(); ++i) {
447 /* ---- Get a pointer to ith atom ---- */
448 OBAtom* atom = mol.GetAtom(i);
449
450 /* ---- Check for minimal/maximal x-position ---- */
451 if (atom->GetX() < min_x) min_x = atom->GetX();
452 if (atom->GetX() > max_x) max_x = atom->GetX();
453
454 /* ---- Check for minimal/maximal y-position ---- */
455 if (atom->GetY() < min_y) min_y = atom->GetY();
456 if (atom->GetY() > max_y) max_y = atom->GetY();
457
458 /* ---- Check for minimal/maximal z-position ---- */
459 if (atom->GetZ() < min_z) min_z = atom->GetZ();
460 if (atom->GetZ() > max_z) max_z = atom->GetZ();
461 }
462 }
463} // namespace OpenBabel