ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/chains.cpp
Revision: 3057
Committed: Thu Oct 19 20:49:05 2006 UTC (17 years, 9 months ago) by gezelter
File size: 61686 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# Content
1 /**********************************************************************
2 chains.cpp - Parse for macromolecule chains and residues.
3
4 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5 (original author, Roger Sayle, version 1.6, March 1998)
6 (modified by Joe Corkery, OpenEye Scientific Software, March 2001)
7 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
8
9 This file is part of the Open Babel project.
10 For more information, see <http://openbabel.sourceforge.net/>
11
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation version 2 of the License.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20 ***********************************************************************/
21
22 //////////////////////////////////////////////////////////////////////////////
23 // File Includes
24 //////////////////////////////////////////////////////////////////////////////
25 #include "config.h"
26
27 #include <stdlib.h>
28 #include <string.h>
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <map>
32
33 #include "mol.hpp"
34 #include "chains.hpp"
35
36 using namespace std;
37
38 //////////////////////////////////////////////////////////////////////////////
39 // Preprocessor Definitions
40 //////////////////////////////////////////////////////////////////////////////
41
42 //! The first available index for actual residues
43 //! 0, 1, 2 reserved for UNK, HOH, LIG
44 #define RESIDMIN 3
45 //! The maximum number of residue IDs for this code
46 #define RESIDMAX 32
47
48 //! An index of the residue names perceived during a run
49 //! 0, 1, and 2 reserved for UNK, HOH, LIG
50 static char ChainsResName[RESIDMAX][4] = {
51 /*0*/ "UNK", /*1*/ "HOH", /*2*/ "LIG"
52 };
53
54 #define ATOMMINAMINO 4
55 #define ATOMMINNUCLEIC 50
56 #define MAXPEPTIDE 11
57 #define MAXNUCLEIC 15
58 //! The number of amino acids recognized by this code
59 //! Currently: ILE, VAL, ALA, ASN, ASP, ARG, CYS, GLN, GLU
60 //! GLY, HIS, HYP, LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR
61 #define AMINOMAX 21
62 //! The number of nucleic acids recognized by this code
63 //! Currently A, C, T, G, U, I
64 #define NUCLEOMAX 6
65 #define STACKSIZE 20
66
67 #define AI_N 0
68 #define AI_CA 1
69 #define AI_C 2
70 #define AI_O 3
71 #define AI_OXT 37
72
73 #define AI_P 38
74 #define AI_O1P 39
75 #define AI_O2P 40
76 #define AI_O5 41
77 #define AI_C5 42
78 #define AI_C4 43
79 #define AI_O4 44
80 #define AI_C3 45
81 #define AI_O3 46
82 #define AI_C2 47
83 #define AI_O2 48
84 #define AI_C1 49
85
86 #define BitN 0x0001
87 #define BitNTer 0x0002
88 #define BitNPro 0x0004
89 #define BitNPT 0x0008
90 #define BitCA 0x0010
91 #define BitCAGly 0x0020
92 #define BitC 0x0100
93 #define BitCTer 0x0200
94 #define BitCOXT 0x0400
95 #define BitO 0x1000
96 #define BitOXT 0x2000
97
98 #define BitNAll 0x000F
99 #define BitCAAll 0x0030
100 #define BitCAll 0x0700
101 #define BitOAll 0x3000
102
103 #define BitP 0x0001
104 #define BitPTer 0x0002
105 #define BitOP 0x0004
106 #define BitO5 0x0008
107 #define BitO5Ter 0x0010
108 #define BitC5 0x0020
109 #define BitC4 0x0040
110 #define BitO4 0x0080
111 #define BitC3 0x0100
112 #define BitO3 0x0200
113 #define BitO3Ter 0x0400
114 #define BitC2RNA 0x0800
115 #define BitC2DNA 0x1000
116 #define BitO2 0x2000
117 #define BitC1 0x4000
118
119 #define BitPAll 0x0003
120 #define Bit05All 0x0018
121 #define BitO3All 0x0600
122 #define BitC2All 0x1800
123
124 #define BitVisit 0x8000
125
126 #define BC_ASSIGN 0x01
127 #define BC_COUNT 0x02
128 #define BC_ELEM 0x03
129 #define BC_EVAL 0x04
130 #define BC_IDENT 0x05
131 #define BC_LOCAL 0x06
132
133 #define BF_SINGLE 0x01
134 #define BF_DOUBLE 0x02
135 #define BF_TRIPLE 0x04
136 #define BF_AROMATIC 0x08
137
138 namespace OpenBabel
139 {
140
141 OBChainsParser chainsparser;
142
143 //////////////////////////////////////////////////////////////////////////////
144 // Structure / Type Definitions
145 //////////////////////////////////////////////////////////////////////////////
146
147 //! Template for backbone atoms in chain perception
148 typedef struct Template
149 {
150 int flag; //!< binary flag representing this atom type
151 short elem; //!< atomic number of this element
152 short count; //!< expected valence for this atom type
153 int n1; //!< mask 1 used by ConstrainBackbone() and MatchConstraint()
154 int n2; //!< mask 2 used by ConstrainBackbone() and MatchConstraint()
155 int n3; //!< mask 3 used by ConstrainBackbone() and MatchConstraint()
156 int n4; //!< mask 4 used by ConstrainBackbone() and MatchConstraint()
157 }
158 Template;
159
160 //! Generic template for peptide residue backbone
161 static Template Peptide[MAXPEPTIDE] = {
162 /* N */ { 0x0001, 7, 2, 0x0030, 0x0100, 0, 0 },
163 /* NTer */ { 0x0002, 7, 1, 0x0030, 0, 0, 0 },
164 /* NPro */ { 0x0004, 7, 3, 0x0030, 0x0100, -6, 0 },
165 /* NPT */ { 0x0008, 7, 2, 0x0030, -6, 0, 0 },
166 /* CA */ { 0x0010, 6, 3, 0x000F, 0x0700, -6, 0 },
167 /* CAGly */ { 0x0020, 6, 2, 0x0003, 0x0700, 0, 0 },
168 /* C */ { 0x0100, 6, 3, 0x0030, 0x1000, 0x0005, 0 },
169 /* CTer */ { 0x0200, 6, 2, 0x0030, 0x1000, 0, 0 },
170 /* COXT */ { 0x0400, 6, 3, 0x0030, 0x1000, 0x2000, 0 },
171 /* O */ { 0x1000, 8, 1, 0x0700, 0, 0, 0 },
172 /* OXT */ { 0x2000, 8, 1, 0x0400, 0, 0, 0 }
173 };
174
175 //! Generic template for peptide nucleotide backbone
176 static Template Nucleotide[MAXNUCLEIC] = {
177 /* P */ { 0x0001, 15, 4, 0x0004, 0x0004, 0x0008, 0x0200 },
178 /* PTer */ { 0x0002, 15, 3, 0x0004, 0x0004, 0x0008, 0 },
179 /* OP */ { 0x0004, 8, 1, 0x0003, 0, 0, 0 },
180 /* O5 */ { 0x0008, 8, 2, 0x0020, 0x0003, 0, 0 },
181 /* O5Ter */ { 0x0010, 8, 1, 0x0020, 0, 0, 0 },
182 /* C5 */ { 0x0020, 6, 2, 0x0018, 0x0040, 0, 0 },
183 /* C4 */ { 0x0040, 6, 3, 0x0020, 0x0080, 0x0100, 0 },
184 /* O4 */ { 0x0080, 8, 2, 0x0040, 0x4000, 0, 0 },
185 /* C3 */ { 0x0100, 6, 3, 0x0040, 0x0600, 0x1800, 0 },
186 /* O3 */ { 0x0200, 8, 2, 0x0100, 0x0001, 0, 0 },
187 /* O3Ter */ { 0x0400, 8, 1, 0x0100, 0, 0, 0 },
188 /* C2RNA */ { 0x0800, 6, 3, 0x0100, 0x4000, 0x2000, 0 },
189 /* C2DNA */ { 0x1000, 6, 2, 0x0100, 0x4000, 0, 0 },
190 /* O2 */ { 0x2000, 8, 1, 0x0800, 0, 0, 0 },
191 /* C1 */ { 0x4000, 6, 3, 0x0080, 0x1800, -7, 0 }
192 };
193
194
195 //////////////////////////////////////////////////////////////////////////////
196 // Global Variables / Tables
197 //////////////////////////////////////////////////////////////////////////////
198
199 //! The number of PDB atom type names recognized by this code
200 #define ATOMMAX 68
201
202 //! PDB atom types (i.e., columns 13-16 of a PDB file)
203 //! index numbers from this array are used in the pseudo-SMILES format
204 //! for side-chains in the AminoAcids[] & Nucleotides[] global arrays below
205 static char ChainsAtomName[ATOMMAX][4] = {
206 /* 0 */ { ' ', 'N', ' ', ' ' },
207 /* 1 */ { ' ', 'C', 'A', ' ' },
208 /* 2 */ { ' ', 'C', ' ', ' ' },
209 /* 3 */ { ' ', 'O', ' ', ' ' },
210 /* 4 */ { ' ', 'C', 'B', ' ' },
211 /* 5 */ { ' ', 'S', 'G', ' ' },
212 /* 6 */ { ' ', 'O', 'G', ' ' },
213 /* 7 */ { ' ', 'C', 'G', ' ' },
214 /* 8 */ { ' ', 'O', 'G', '1' },
215 /* 9 */ { ' ', 'C', 'G', '1' },
216 /* 10 */ { ' ', 'C', 'G', '2' },
217 /* 11 */ { ' ', 'C', 'D', ' ' },
218 /* 12 */ { ' ', 'O', 'D', ' ' },
219 /* 13 */ { ' ', 'S', 'D', ' ' },
220 /* 14 */ { ' ', 'C', 'D', '1' },
221 /* 15 */ { ' ', 'O', 'D', '1' },
222 /* 16 */ { ' ', 'N', 'D', '1' },
223 /* 17 */ { ' ', 'C', 'D', '2' },
224 /* 18 */ { ' ', 'O', 'D', '2' },
225 /* 19 */ { ' ', 'N', 'D', '2' },
226 /* 20 */ { ' ', 'C', 'E', ' ' },
227 /* 21 */ { ' ', 'N', 'E', ' ' },
228 /* 22 */ { ' ', 'C', 'E', '1' },
229 /* 23 */ { ' ', 'O', 'E', '1' },
230 /* 24 */ { ' ', 'N', 'E', '1' },
231 /* 25 */ { ' ', 'C', 'E', '2' },
232 /* 26 */ { ' ', 'O', 'E', '2' },
233 /* 27 */ { ' ', 'N', 'E', '2' },
234 /* 28 */ { ' ', 'C', 'E', '3' },
235 /* 29 */ { ' ', 'C', 'Z', ' ' },
236 /* 30 */ { ' ', 'N', 'Z', ' ' },
237 /* 31 */ { ' ', 'C', 'Z', '2' },
238 /* 32 */ { ' ', 'C', 'Z', '3' },
239 /* 33 */ { ' ', 'O', 'H', ' ' },
240 /* 34 */ { ' ', 'N', 'H', '1' },
241 /* 35 */ { ' ', 'N', 'H', '2' },
242 /* 36 */ { ' ', 'C', 'H', '2' },
243 /* 37 */ { ' ', 'O', 'X', 'T' },
244 /* 38 */ { ' ', 'P', ' ', ' ' },
245 /* 39 */ { ' ', 'O', '1', 'P' },
246 /* 40 */ { ' ', 'O', '2', 'P' },
247 /* 41 */ { ' ', 'O', '5', '*' },
248 /* 42 */ { ' ', 'C', '5', '*' },
249 /* 43 */ { ' ', 'C', '4', '*' },
250 /* 44 */ { ' ', 'O', '4', '*' },
251 /* 45 */ { ' ', 'C', '3', '*' },
252 /* 46 */ { ' ', 'O', '3', '*' },
253 /* 47 */ { ' ', 'C', '2', '*' },
254 /* 48 */ { ' ', 'O', '2', '*' },
255 /* 49 */ { ' ', 'C', '1', '*' },
256 /* 50 */ { ' ', 'N', '9', ' ' },
257 /* 51 */ { ' ', 'C', '8', ' ' },
258 /* 52 */ { ' ', 'N', '7', ' ' },
259 /* 53 */ { ' ', 'C', '5', ' ' },
260 /* 54 */ { ' ', 'C', '6', ' ' },
261 /* 55 */ { ' ', 'O', '6', ' ' },
262 /* 56 */ { ' ', 'N', '6', ' ' },
263 /* 57 */ { ' ', 'N', '1', ' ' },
264 /* 58 */ { ' ', 'C', '2', ' ' },
265 /* 59 */ { ' ', 'O', '2', ' ' },
266 /* 60 */ { ' ', 'N', '2', ' ' },
267 /* 61 */ { ' ', 'N', '3', ' ' },
268 /* 62 */ { ' ', 'C', '4', ' ' },
269 /* 63 */ { ' ', 'O', '4', ' ' },
270 /* 64 */ { ' ', 'N', '4', ' ' },
271 /* 65 */ { ' ', 'C', '5', ' ' },
272 /* 66 */ { ' ', 'C', '5', 'M' },
273 /* 67 */ { ' ', 'C', '6', ' ' }
274 };
275
276 //! Definition of side chains, associating overall residue name with
277 //! the pseudo-SMILES pattern
278 typedef struct
279 {
280 char *name; //!< Residue name, standardized by PDB
281 char *data; //!< pseudo-SMILES definition of side-chain
282 }
283 ResidType;
284
285 //! Side chains for recognized amino acids using a pseudo-SMARTS syntax
286 //! for branching and bonds. Numbers indicate atom types defined by
287 //! ChainsAtomName global array above
288 static ResidType AminoAcids[AMINOMAX] = {
289 { "ILE", "1-4(-9-14)-10" },
290 { "VAL", "1-4(-9)-10" },
291 { "ALA", "1-4" },
292 { "ASN", "1-4-7(=15)-19" },
293 { "ASP", "1-4-7(=15)-18" },
294 { "ARG", "1-4-7-11-21-29(=34)-35" },
295 { "CYS", "1-4-5" },
296 { "GLN", "1-4-7-11(=23)-27" },
297 { "GLU", "1-4-7-11(=23)-26" },
298 { "GLY", "1" },
299 { "HIS", "1-4-7^16~22^27^17~7" },
300 { "HYP", "1-4-7(-12)-11-0" },
301 { "LEU", "1-4-7(-14)-17" },
302 { "LYS", "1-4-7-11-20-30" },
303 { "MET", "1-4-7-13-20" },
304 { "PHE", "1-4-7~14^22~29^25~17^7" },
305 { "PRO", "1-4-7-11-0" },
306 { "SER", "1-4-6" },
307 { "THR", "1-4(-8)-10" },
308 { "TRP", "1-4-7~14^24^25~17(^7)^28~32^36~31^25" },
309 { "TYR", "1-4-7~14^22~29(-33)^25~17^7" }
310 };
311 // Other possible amino acid templates (less common)
312 /* Pyroglutamate (PCA): 1-4-7-11(=" OE ")-0 PDB Example: 1CEL */
313 /* Amino-N-Butyric Acid (ABA): 1-4-7 PDB Example: 1BBO */
314 /* Selenic Acid (SEC): 1-4-"SEG "(-15)-18 PDB Example: 1GP1 */
315
316 //! Side chains for recognized nucleotides using a pseudo-SMARTS syntax
317 //! for branching and bonds. Numbers indicate atom types defined by
318 //! ChainsAtomName global array above
319 static ResidType Nucleotides[NUCLEOMAX] = {
320 { " A", "49-50-51-52-53-54(-56)-57-58-61-62(-53)-50" },
321 { " C", "49-57-58(-59)-61-62(-64)-65-67-57" },
322 { " G", "49-50-51-52-53-54(-55)-57-58(-60)-61-62(-53)-50" },
323 { " T", "49-57-58(-59)-61-62(-63)-65(-66)-67-57" },
324 { " U", "49-57-58(-59)-61-62(-63)-65-67-57" },
325 { " I", "49-50-51-52-53-54(-55)-57-58-61-62(-53)-50" }
326 };
327
328
329 typedef struct
330 {
331 int atomid,elem;
332 int bcount;
333 int index;
334 }
335 MonoAtomType;
336
337 typedef struct
338 {
339 int src,dst;
340 int index;
341 int flag;
342 }
343 MonoBondType;
344
345 typedef struct
346 {
347 int type;
348 union _ByteCode *next;
349 }
350 MonOpStruct;
351
352 typedef struct
353 {
354 int type;
355 int value;
356 union _ByteCode *tcond;
357 union _ByteCode *fcond;
358 }
359 BinOpStruct;
360
361 //! Output array -- residue id, atom id, bond flags, etc.
362 typedef struct
363 {
364 int type;
365 int resid;
366 int *atomid;
367 int *bflags;
368 }
369 AssignStruct;
370
371 //! Chemical graph matching virtual machine
372 typedef union _ByteCode
373 {
374 int type;
375 MonOpStruct eval; //!< Eval - push current neighbors onto the stack
376 BinOpStruct count; //!< Count - test the number of eval bonds
377 BinOpStruct elem; //!< Element - test the element of current atom
378 BinOpStruct ident; //!< Ident - test the atom for backbone identity
379 BinOpStruct local; //!< Local - test whether the atom has been visited
380 AssignStruct assign; //!< Assign - assign residue name, atom name and bond type to output
381 } ByteCode;
382
383 typedef struct
384 {
385 int atom,bond;
386 int prev;
387 }
388 StackType;
389
390 static MonoAtomType MonoAtom[MaxMonoAtom];
391 static MonoBondType MonoBond[MaxMonoBond];
392 static int MonoAtomCount;
393 static int MonoBondCount;
394
395 static StackType Stack[STACKSIZE];
396 static int StackPtr;
397
398 static int AtomIndex;
399 static int BondIndex;
400 static bool StrictFlag = false;
401
402 //////////////////////////////////////////////////////////////////////////////
403 // Static Functions
404 //////////////////////////////////////////////////////////////////////////////
405
406 static ByteCode *AllocateByteCode(int type)
407 {
408 ByteCode *result;
409
410 result = (ByteCode*)malloc(sizeof(ByteCode));
411 if( !result )
412 {
413 obErrorLog.ThrowError(__func__, "Unable to allocate byte codes for biomolecule residue perception.", obError);
414 // exit(1);
415 }
416 result->type = type;
417 result->eval.next = NULL;
418 result->count.tcond = NULL;
419 result->count.fcond = NULL;
420 result->elem.tcond = NULL;
421 result->elem.fcond = NULL;
422 result->ident.tcond = NULL;
423 result->ident.fcond = NULL;
424 result->local.tcond = NULL;
425 result->local.fcond = NULL;
426 result->assign.atomid = NULL;
427 result->assign.bflags = NULL;
428
429 return (result);
430 }
431
432 //! Free a ByteCode and all corresponding data
433 static void DeleteByteCode(ByteCode *node)
434 {
435 if (node == NULL)
436 return;
437 else
438 {
439 switch (node->type)
440 {
441 case BC_ASSIGN:
442
443 if (node->assign.atomid != NULL)
444 free(node->assign.atomid);
445 if (node->assign.bflags != NULL)
446 free(node->assign.bflags);
447
448 break;
449
450 case BC_COUNT:
451
452 DeleteByteCode(node->count.tcond);
453 DeleteByteCode(node->count.fcond);
454 break;
455 case BC_ELEM:
456
457 DeleteByteCode(node->elem.tcond);
458 DeleteByteCode(node->elem.fcond);
459 break;
460
461 case BC_EVAL:
462
463 DeleteByteCode(node->eval.next);
464 break;
465
466 case BC_IDENT:
467
468 DeleteByteCode(node->ident.tcond);
469 DeleteByteCode(node->ident.fcond);
470 break;
471
472 case BC_LOCAL:
473
474 DeleteByteCode(node->local.tcond);
475 DeleteByteCode(node->local.fcond);
476 break;
477 }
478
479 free(node);
480 }
481 }
482
483 static void FatalMemoryError(void)
484 {
485 obErrorLog.ThrowError(__func__, "Unable to allocate memory for biomolecule residue / chain perception.", obError);
486 // exit(1);
487 }
488
489 void GenerateByteCodes(ByteCode **node, int resid, int curr, int prev, int bond)
490 {
491 StackType neighbour[4];
492 StackType original;
493 int count,i,j;
494 ByteCode *ptr;
495 bool done,found;
496
497 if( curr != prev )
498 {
499 if( MonoAtom[curr].atomid < ATOMMINAMINO )
500 {
501 found = false;
502 while( *node && ((*node)->type==BC_IDENT) )
503 {
504 if( (*node)->ident.value == MonoAtom[curr].atomid )
505 {
506 node = (ByteCode**)&(*node)->ident.tcond;
507 found = true;
508 break;
509 }
510 else
511 node = (ByteCode**)&(*node)->ident.fcond;
512 }
513
514 if (!found)
515 {
516 ptr = AllocateByteCode(BC_IDENT);
517 ptr->ident.tcond = (ByteCode*)0;
518 ptr->ident.fcond = *node;
519 *node = ptr;
520 node = (ByteCode**)&ptr->ident.tcond;
521 ptr->ident.value = MonoAtom[curr].atomid;
522 }
523 MonoBond[bond].index = BondIndex++;
524 done = true;
525 }
526 else if( MonoAtom[curr].index != -1 )
527 {
528 while( *node && ((*node)->type==BC_IDENT) )
529 node = (ByteCode**)&(*node)->ident.fcond;
530
531 found = false;
532 while( *node && ((*node)->type==BC_LOCAL) )
533 {
534 if( (*node)->local.value == MonoAtom[curr].index )
535 {
536 node = (ByteCode**)&(*node)->local.tcond;
537 found = true;
538 break;
539 }
540 else
541 node = (ByteCode**)&(*node)->local.fcond;
542 }
543
544 if (!found)
545 {
546 ptr = AllocateByteCode(BC_LOCAL);
547 ptr->local.tcond = (ByteCode*)0;
548 ptr->local.fcond = *node;
549 *node = ptr;
550 node = (ByteCode**)&ptr->local.tcond;
551 ptr->local.value = MonoAtom[curr].index;
552 }
553
554 MonoBond[bond].index = BondIndex++;
555 done = true;
556 }
557 else
558 {
559 while( *node && ((*node)->type==BC_IDENT) )
560 node = (ByteCode**)&(*node)->ident.fcond;
561 while( *node && ((*node)->type==BC_LOCAL) )
562 node = (ByteCode**)&(*node)->local.fcond;
563
564 found = false;
565 while( *node && ((*node)->type==BC_ELEM) )
566 {
567 if( (*node)->elem.value == MonoAtom[curr].elem )
568 {
569 node = (ByteCode**)&(*node)->elem.tcond;
570 found = true;
571 break;
572 }
573 else
574 node = (ByteCode**)&(*node)->elem.fcond;
575 }
576
577 if( !found )
578 {
579 ptr = AllocateByteCode(BC_ELEM);
580 ptr->elem.tcond = (ByteCode*)0;
581 ptr->elem.fcond = *node;
582 *node = ptr;
583 node = (ByteCode**)&ptr->elem.tcond;
584 ptr->elem.value = MonoAtom[curr].elem;
585 }
586
587 MonoAtom[curr].index = AtomIndex++;
588 MonoBond[bond].index = BondIndex++;
589 done = false;
590 }
591 }
592 else
593 {
594 MonoAtom[curr].index = AtomIndex++;
595 done = false;
596 }
597
598 count = 0;
599 if (!done)
600 {
601 for( i=0; i<MonoBondCount; i++ )
602 {
603 if( MonoBond[i].src == curr )
604 {
605 if( MonoBond[i].dst != prev )
606 {
607 neighbour[count].atom = MonoBond[i].dst;
608 neighbour[count].bond = i;
609 count++;
610 }
611 }
612 else if( MonoBond[i].dst == curr )
613 {
614 if( MonoBond[i].src != prev )
615 {
616 neighbour[count].atom = MonoBond[i].src;
617 neighbour[count].bond = i;
618 count++;
619 }
620 }
621 }
622
623 if ( *node && ((*node)->type==BC_EVAL) )
624 {
625 found = false;
626 node = (ByteCode**)&(*node)->eval.next;
627 while( *node && ((*node)->type==BC_COUNT) )
628 {
629 if( (*node)->count.value == count )
630 {
631 node = (ByteCode**)&(*node)->count.tcond;
632 found = true;
633 break;
634 }
635 else
636 node = (ByteCode**)&(*node)->count.fcond;
637 }
638
639 if( !found )
640 {
641 ptr = AllocateByteCode(BC_COUNT);
642 ptr->count.tcond = (ByteCode*)0;
643 ptr->count.fcond = *node;
644 *node = ptr;
645 node = (ByteCode**)&ptr->count.tcond;
646 ptr->count.value = count;
647 }
648 }
649 else if( count || StrictFlag || StackPtr )
650 {
651 ptr = AllocateByteCode(BC_EVAL);
652 ptr->eval.next = *node;
653 *node = ptr;
654 node = (ByteCode**)&ptr->eval.next;
655
656 ptr = AllocateByteCode(BC_COUNT);
657 ptr->count.tcond = (ByteCode*)0;
658 ptr->count.fcond = *node;
659 *node = ptr;
660 node = (ByteCode**)&ptr->count.tcond;
661 ptr->count.value = count;
662 }
663 }
664
665 if( count == 1 )
666 {
667 GenerateByteCodes(node,resid,neighbour[0].atom, curr,neighbour[0].bond);
668 }
669 else if( count == 2 )
670 {
671 original = Stack[StackPtr++];
672 Stack[StackPtr-1] = neighbour[0];
673 Stack[StackPtr-1].prev = curr;
674 GenerateByteCodes(node,resid,neighbour[1].atom,
675 curr,neighbour[1].bond);
676 Stack[StackPtr-1] = neighbour[1];
677 Stack[StackPtr-1].prev = curr;
678 GenerateByteCodes(node,resid,neighbour[0].atom,
679 curr,neighbour[0].bond);
680 Stack[--StackPtr] = original;
681 }
682 else if( count )
683 {
684 stringstream errorMsg;
685 errorMsg << "Maximum Monomer Fanout Exceeded!" << endl;
686 errorMsg << "Residue " << ChainsResName[resid] << " atom "
687 << curr << endl;
688 errorMsg << "Previous = " << prev << " Fanout = " << count << endl;
689 obErrorLog.ThrowError(__func__, errorMsg.str(), obWarning);
690 }
691 else if( StackPtr )
692 {
693 StackPtr--;
694 GenerateByteCodes(node,resid,Stack[StackPtr].atom,
695 Stack[StackPtr].prev,Stack[StackPtr].bond);
696 StackPtr++;
697 }
698 else if( !(*node) )
699 {
700 ptr = AllocateByteCode(BC_ASSIGN);
701 ptr->assign.resid = resid;
702 ptr->assign.atomid = (int*)malloc(AtomIndex*sizeof(int));
703 if( !ptr->assign.atomid )
704 FatalMemoryError();
705 for( i=0; i<MonoAtomCount; i++ )
706 if( (j=MonoAtom[i].index) != -1 )
707 ptr->assign.atomid[j] = MonoAtom[i].atomid;
708 if( BondIndex )
709 {
710 ptr->assign.bflags = (int*)malloc(BondIndex*sizeof(int));
711 for( i=0; i<MonoBondCount; i++ )
712 if( (j=MonoBond[i].index) != -1 )
713 ptr->assign.bflags[j] = MonoBond[i].flag;
714 }
715 *node = ptr;
716 }
717 else if( (*node)->type == BC_ASSIGN )
718 {
719 if( (*node)->assign.resid != resid )
720 {
721 stringstream errorMsg;
722 errorMsg << "Duplicated Monomer Specification!\n";
723 errorMsg << "Residue " << ChainsResName[resid]
724 << " matches residue ";
725 errorMsg << ChainsResName[(*node)->assign.resid] << endl;
726 obErrorLog.ThrowError(__func__, errorMsg.str(), obWarning);
727 }
728 }
729
730 /* Restore State! */
731 if( curr != prev )
732 {
733 if( !done )
734 {
735 MonoAtom[curr].index = -1;
736 AtomIndex--;
737 }
738 MonoBond[bond].index = -1;
739 BondIndex--;
740 }
741 }
742
743 //////////////////////////////////////////////////////////////////////////////
744 // Constructors / Destructors
745 //////////////////////////////////////////////////////////////////////////////
746
747 // validated
748 OBChainsParser::OBChainsParser(void)
749 {
750 int i, res = RESIDMIN;
751
752 PDecisionTree = (ByteCode*)0;
753 for( i=0 ; i < AMINOMAX ; i++ )
754 {
755 strncpy(ChainsResName[res],AminoAcids[i].name, sizeof(ChainsResName[res]) - 1);
756 ChainsResName[res][sizeof(ChainsResName[res]) - 1] = '\0';
757 DefineMonomer(&PDecisionTree,res,AminoAcids[i].data);
758 res++;
759 }
760
761 NDecisionTree = (ByteCode*)0;
762 for( i=0 ; i< NUCLEOMAX ; i++ )
763 {
764 strncpy(ChainsResName[res],Nucleotides[i].name, sizeof(ChainsResName[res]) - 1);
765 ChainsResName[res][sizeof(ChainsResName[res]) - 1] = '\0';
766 DefineMonomer(&NDecisionTree,res,Nucleotides[i].data);
767 res++;
768 }
769
770 bitmasks = NULL;
771 hetflags = NULL;
772 atomids = NULL;
773 resids = NULL;
774 resnos = NULL;
775 sernos = NULL;
776 hcounts = NULL;
777 chains = NULL;
778 flags = NULL;
779 }
780
781 OBChainsParser::~OBChainsParser(void)
782 {
783 DeleteByteCode((ByteCode*)PDecisionTree);
784 DeleteByteCode((ByteCode*)NDecisionTree);
785 }
786
787 //////////////////////////////////////////////////////////////////////////////
788 // Setup / Cleanup Functions
789 //////////////////////////////////////////////////////////////////////////////
790
791 //! Setup parsing for this molecule --
792 void OBChainsParser::SetupMol(OBMol &mol)
793 {
794 CleanupMol();
795
796 int i;
797 int asize = mol.NumAtoms();
798 int bsize = mol.NumBonds();
799
800 bitmasks = new unsigned short[asize];
801 resids = new unsigned char[asize];
802 flags = new unsigned char[bsize];
803 hetflags = new bool[asize];
804 atomids = new short[asize];
805 resnos = new short[asize];
806 sernos = new short[asize];
807 hcounts = new char[asize];
808 chains = new char[asize];
809
810 memset(bitmasks, 0, sizeof(unsigned short) * asize);
811 memset(resids, 0, sizeof(unsigned char) * asize);
812 memset(hetflags, 0, sizeof(bool) * asize);
813 memset(resnos, 0, sizeof(short) * asize);
814 memset(sernos, 0, sizeof(short) * asize);
815 memset(hcounts, 0, sizeof(char) * asize);
816 memset(chains, ' ', sizeof(char) * asize);
817
818 memset(flags, 0, sizeof(unsigned char) * bsize);
819
820 for ( i = 0 ; i < asize ; i++ )
821 {
822 atomids[i] = -1;
823 }
824 }
825
826 //! Clean up any molecular data left in memory -- frees all memory afterwards
827 //! Used by OBChainsParser::SetupMol()
828 void OBChainsParser::CleanupMol(void)
829 {
830 if (bitmasks != NULL)
831 {
832 delete bitmasks;
833 bitmasks = NULL;
834 }
835 if (hetflags != NULL)
836 {
837 delete hetflags;
838 hetflags = NULL;
839 }
840 if (atomids != NULL)
841 {
842 delete atomids;
843 atomids = NULL;
844 }
845 if (resids != NULL)
846 {
847 delete resids;
848 resids = NULL;
849 }
850 if (resnos != NULL)
851 {
852 delete resnos;
853 resnos = NULL;
854 }
855 if (sernos != NULL)
856 {
857 delete sernos;
858 sernos = NULL;
859 }
860 if (hcounts != NULL)
861 {
862 delete hcounts;
863 hcounts = NULL;
864 }
865 if (chains != NULL)
866 {
867 delete chains;
868 chains = NULL;
869 }
870 if (flags != NULL)
871 {
872 delete flags;
873 flags = NULL;
874 }
875 }
876
877 //! Clear all residue information for a supplied molecule
878 void OBChainsParser::ClearResidueInformation(OBMol &mol)
879 {
880 OBResidue *residue;
881 vector<OBResidue*> residues;
882 vector<OBResidue*>::iterator r;
883
884 for (residue = mol.BeginResidue(r) ; residue ; residue = mol.NextResidue(r))
885 residues.push_back(residue);
886
887 for ( unsigned int i = 0 ; i < residues.size() ; i++ )
888 mol.DeleteResidue(residues[i]);
889
890 residues.clear();
891 }
892
893 void OBChainsParser::SetResidueInformation(OBMol &mol, bool nukeSingleResidue)
894 {
895 char buffer[BUFF_SIZE];
896 char *symbol;
897 string atomid, name;
898
899 OBAtom *atom;
900 OBResidue *residue;
901 map<short, OBResidue *> resmap;
902
903 int size = mol.NumAtoms();
904 for ( int i = 0 ; i < size ; i++ )
905 {
906 atom = mol.GetAtom(i+1); // WARNING: ATOM INDEX ISSUE
907
908 if (atomids[i] == -1)
909 {
910 symbol = etab.GetSymbol(atom->GetAtomicNum());
911 if( symbol[1] )
912 {
913 buffer[0] = symbol[0];
914 buffer[1] = (char) toupper(symbol[1]);
915 }
916 else
917 {
918 buffer[0] = ' ';
919 buffer[1] = symbol[0];
920 }
921 buffer[2] = ' ';
922 buffer[3] = ' ';
923 buffer[4] = '\0';
924 }
925 else if (atom->IsHydrogen())
926 {
927 if (hcounts[i])
928 sprintf(buffer, "%cH%.2s", hcounts[i]+'0', ChainsAtomName[atomids[i]]+2);
929 else
930 sprintf(buffer, "H%.2s", ChainsAtomName[atomids[i]]+2);
931 }
932 else
933 sprintf(buffer, "%.4s", ChainsAtomName[atomids[i]]);
934
935 if (buffer[3] == ' ')
936 buffer[3] = '\0';
937
938 atomid = (buffer[0] == ' ') ? buffer + 1 : buffer;
939
940 if (resmap.find(resnos[i]) != resmap.end())
941 {
942 residue = resmap[resnos[i]];
943 residue->AddAtom(atom);
944 residue->SetAtomID(atom, atomid);
945 residue->SetHetAtom(atom, hetflags[i]);
946 residue->SetSerialNum(atom, sernos[i]);
947 }
948 else
949 {
950 name = ChainsResName[resids[i]];
951 residue = mol.NewResidue();
952
953 residue->SetName(name);
954 residue->SetNum(resnos[i]);
955 residue->SetChain(chains[i]);
956 residue->SetChainNum((chains[i] > 'A') ? (int)(chains[i] - 'A') : 1);
957
958 residue->AddAtom(atom);
959 residue->SetAtomID(atom, atomid);
960 residue->SetHetAtom(atom, hetflags[i]);
961 residue->SetSerialNum(atom, sernos[i]);
962
963 resmap[resnos[i]] = residue;
964 }
965 }
966
967 if (mol.NumResidues() == 1 && nukeSingleResidue)
968 mol.DeleteResidue(mol.GetResidue(0));
969 else if (mol.NumResidues() == 1
970 && (mol.GetResidue(0))->GetName() == "UNK")
971 mol.DeleteResidue(mol.GetResidue(0));
972 }
973
974 ////////////////////////////////////////////////////////////////////////////////
975 // Perception Functions
976 ////////////////////////////////////////////////////////////////////////////////
977
978 bool OBChainsParser::PerceiveChains(OBMol &mol, bool nukeSingleResidue)
979 {
980 bool result = true;
981
982 SetupMol(mol);
983 ClearResidueInformation(mol);
984
985 result = DetermineHetAtoms(mol) && result;
986 result = DetermineConnectedChains(mol) && result;
987 result = DeterminePeptideBackbone(mol) && result;
988 result = DeterminePeptideSidechains(mol) && result;
989 result = DetermineNucleicBackbone(mol) && result;
990 result = DetermineNucleicSidechains(mol) && result;
991 result = DetermineHydrogens(mol) && result;
992
993 SetResidueInformation(mol, nukeSingleResidue);
994 CleanupMol();
995
996 obErrorLog.ThrowError(__func__,
997 "Ran OpenBabel::PerceiveChains", obAuditMsg);
998
999 return result;
1000 }
1001
1002 ////////////////////////////////////////////////////////////////////////////////
1003 // Hetero Atom Perception
1004 ////////////////////////////////////////////////////////////////////////////////
1005
1006 bool OBChainsParser::DetermineHetAtoms(OBMol &mol)
1007 {
1008 OBAtom *atom;
1009 vector<OBNodeBase *>::iterator a;
1010 for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1011 if (!atom->IsHydrogen() && atom->GetValence() == 0)
1012 {
1013 // find un-connected atoms (e.g., HOH oxygen atoms)
1014 // if it's not an oxygen, it's probably some ligand
1015 resids[atom->GetIdx()-1] = (atom->IsOxygen()) ? 1 : 2;
1016 hetflags[atom->GetIdx()-1] = true;
1017 }
1018 return true;
1019 }
1020
1021 ////////////////////////////////////////////////////////////////////////////////
1022 // Connected Chain Perception
1023 ////////////////////////////////////////////////////////////////////////////////
1024
1025 bool OBChainsParser::DetermineConnectedChains(OBMol &mol)
1026 {
1027 int resid;
1028 int resno;
1029 int count;
1030 int size;
1031 int i,idx;
1032 int numAtoms;
1033
1034 resno = 1;
1035 count = 0;
1036 numAtoms = mol.NumAtoms();
1037
1038 OBAtom *atom;
1039 vector<OBNodeBase *>::iterator a;
1040 for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1041 {
1042 idx = atom->GetIdx() - 1;
1043 if (!hetflags[idx] && chains[idx] == ' ' && !atom->IsHydrogen())
1044 {
1045 size = RecurseChain(mol, idx, 'A' + count);
1046 if (size < 10)
1047 {
1048 if (size == 1 && atom->IsOxygen())
1049 resid = 1; /* HOH */
1050 else
1051 resid = 2; /* LIG */
1052
1053 for (i = 0 ; i < numAtoms ; i++)
1054 {
1055 if (chains[i] == ('A' + count))
1056 {
1057 hetflags[i] = true;
1058 resids[i] = resid;
1059 resnos[i] = resno;
1060 chains[i] = ' ';
1061 }
1062 }
1063 resno++;
1064 }
1065 else
1066 count++;
1067 }
1068 }
1069
1070 if( count == 1 )
1071 for ( i = 0 ; i < numAtoms ; i++ )
1072 chains[i] = ' ';
1073
1074 return true;
1075 }
1076
1077 int OBChainsParser::RecurseChain(OBMol &mol, int i, int c)
1078 {
1079 OBAtom *atom, *nbr;
1080 vector<OBEdgeBase *>::iterator b;
1081 int result, index;
1082
1083 atom = mol.GetAtom(i+1);
1084 if (atom->IsHydrogen() )
1085 return 0;
1086
1087 result = 1;
1088 chains[i] = c;
1089
1090 for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1091 {
1092 index = nbr->GetIdx() - 1;
1093 if (chains[index] == ' ')
1094 result += RecurseChain(mol, index,c);
1095 }
1096
1097 return (result);
1098 }
1099
1100 ////////////////////////////////////////////////////////////////////////////////
1101 // Peptide Backbone Perception
1102 ////////////////////////////////////////////////////////////////////////////////
1103
1104 bool OBChainsParser::DeterminePeptideBackbone(OBMol &mol)
1105 {
1106 ConstrainBackbone(mol, Peptide, MAXPEPTIDE);
1107
1108 int i, max = mol.NumAtoms();
1109
1110 /* Order Peptide Backbone */
1111
1112 for ( i = 0 ; i < max ; i++ )
1113 if (atomids[i] == -1)
1114 {
1115 if( bitmasks[i] & BitNTer )
1116 {
1117 atomids[i] = AI_N;
1118 TracePeptideChain(mol,i,1);
1119 }
1120 else if( (bitmasks[i]&BitNPT) && !(bitmasks[i]&BitN) )
1121 {
1122 atomids[i] = AI_N;
1123 TracePeptideChain(mol,i,1);
1124 }
1125 }
1126
1127 /* Carbonyl Double Bond */
1128
1129 OBBond *bond;
1130 vector<OBEdgeBase*>::iterator b;
1131 for (bond = mol.BeginBond(b) ; bond ; bond = mol.NextBond(b))
1132 {
1133 if ((atomids[bond->GetBeginAtomIdx()-1] == 2 && atomids[bond->GetEndAtomIdx()-1] == 3) ||
1134 (atomids[bond->GetBeginAtomIdx()-1] == 3 && atomids[bond->GetEndAtomIdx()-1] == 2))
1135 flags[bond->GetIdx()] |= BF_DOUBLE;
1136 }
1137
1138 return true;
1139 }
1140
1141 void OBChainsParser::ConstrainBackbone(OBMol &mol, Template *templ, int tmax)
1142 {
1143 static OBAtom *neighbour[6];
1144 Template *pep;
1145 OBAtom *na,*nb,*nc,*nd;
1146 OBAtom *atom, *nbr;
1147 bool change, result;
1148 int count;
1149 int i,idx;
1150
1151 vector<OBNodeBase *>::iterator a;
1152 vector<OBEdgeBase *>::iterator b;
1153
1154 /* First Pass */
1155
1156 for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1157 {
1158 idx = atom->GetIdx() - 1;
1159 bitmasks[idx] = 0;
1160 for ( i = 0 ; i < tmax ; i++ )
1161 if ( (static_cast<unsigned int>(templ[i].elem) == atom->GetAtomicNum())
1162 &&
1163 (static_cast<unsigned int>(templ[i].count) == atom->GetHvyValence()))
1164 bitmasks[idx] |= templ[i].flag;
1165 }
1166
1167 /* Second Pass */
1168
1169 do
1170 {
1171 change = false;
1172 for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1173 {
1174 idx = atom->GetIdx() - 1;
1175 if (bitmasks[idx]) // Determine Neighbours
1176 {
1177 count = 0;
1178 for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1179 if (!nbr->IsHydrogen())
1180 neighbour[count++] = nbr;
1181
1182 na = neighbour[0];
1183 nb = neighbour[1];
1184 nc = neighbour[2];
1185 nd = neighbour[3];
1186
1187 for ( i = 0 ; i < tmax ; i++ )
1188 if ( templ[i].flag & bitmasks[idx] )
1189 {
1190 pep = &templ[i];
1191 result = true;
1192
1193 if (count == 4)
1194 result = Match4Constraints(pep,na,nb,nc,nd);
1195 else if (count == 3)
1196 result = Match3Constraints(pep,na,nb,nc);
1197 else if (count == 2)
1198 result = Match2Constraints(pep,na,nb);
1199 else // count == 1
1200 result = MatchConstraint(na,pep->n1);
1201
1202 if(result == false)
1203 {
1204 bitmasks[idx] &= ~pep->flag;
1205 change = true;
1206 }
1207 }
1208 }
1209 }
1210 }
1211 while( change );
1212 }
1213
1214 bool OBChainsParser::MatchConstraint(OBAtom *atom, int mask)
1215 {
1216 if (atom == NULL)
1217 return (false);
1218
1219 if( mask < 0 )
1220 return(atom->GetAtomicNum() == static_cast<unsigned int>(-mask));
1221 else
1222 return(((bitmasks[atom->GetIdx()-1]&mask) == 0) ? false : true);
1223 }
1224
1225 bool OBChainsParser::Match2Constraints(Template *tmpl, OBAtom *na, OBAtom *nb)
1226 {
1227 if (na == NULL || nb == NULL)
1228 return (false); // don't even try to evaluate it
1229
1230 if( MatchConstraint(na,tmpl->n2) )
1231 if( MatchConstraint(nb,tmpl->n1) )
1232 return( true );
1233 if( MatchConstraint(nb,tmpl->n2) )
1234 if( MatchConstraint(na,tmpl->n1) )
1235 return( true );
1236 return( false );
1237 }
1238
1239 bool OBChainsParser::Match3Constraints(Template *tmpl, OBAtom *na, OBAtom *nb, OBAtom *nc)
1240 {
1241 if (na == NULL || nb == NULL || nc == NULL)
1242 return (false); // don't even try to evaluate it
1243
1244 if( MatchConstraint(na,tmpl->n3) )
1245 if( Match2Constraints(tmpl,nb,nc) )
1246 return( true );
1247 if( MatchConstraint(nb,tmpl->n3) )
1248 if( Match2Constraints(tmpl,na,nc) )
1249 return( true );
1250 if( MatchConstraint(nc,tmpl->n3) )
1251 if( Match2Constraints(tmpl,na,nb) )
1252 return( true );
1253 return( false );
1254 }
1255
1256 bool OBChainsParser::Match4Constraints(Template *tmpl, OBAtom *na, OBAtom *nb, OBAtom *nc, OBAtom *nd)
1257 {
1258 if (na == NULL || nb == NULL || nc == NULL || nd == NULL)
1259 return (false); // don't even try to evaluate it
1260
1261 if( MatchConstraint(na,tmpl->n4) )
1262 if( Match3Constraints(tmpl,nb,nc,nd) )
1263 return( true );
1264 if( MatchConstraint(nb,tmpl->n4) )
1265 if( Match3Constraints(tmpl,na,nc,nd) )
1266 return( true );
1267 if( MatchConstraint(nc,tmpl->n4) )
1268 if( Match3Constraints(tmpl,na,nb,nd) )
1269 return( true );
1270 if( MatchConstraint(nd,tmpl->n4) )
1271 if( Match3Constraints(tmpl,na,nb,nc) )
1272 return( true );
1273 return( false );
1274 }
1275
1276 void OBChainsParser::TracePeptideChain(OBMol &mol, int i, int r)
1277 {
1278 int neighbour[4];
1279 int na,nb,nc;
1280 OBAtom *atom, *nbr;
1281 int count;
1282 int j,k,idx;
1283
1284 vector<OBEdgeBase *>::iterator b;
1285
1286 /* Determine Neighbours */
1287
1288 atom = mol.GetAtom(i+1);
1289 idx = atom->GetIdx() - 1;
1290 bitmasks[i] &= BitVisit;
1291
1292 count = 0;
1293 for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1294 if (!nbr->IsHydrogen())
1295 neighbour[count++] = nbr->GetIdx()-1;
1296
1297 resnos[idx] = r;
1298
1299 na = neighbour[0];
1300 nb = neighbour[1];
1301 nc = neighbour[2];
1302
1303 switch( atomids[i] )
1304 {
1305 case(AI_N):
1306 for( j=0; j<count; j++ )
1307 if( bitmasks[neighbour[j]] & BitCAAll )
1308 {
1309 atomids[neighbour[j]] = AI_CA;
1310 if (!(bitmasks[neighbour[j]] & BitVisit))
1311 TracePeptideChain(mol,neighbour[j],r);
1312 }
1313 break;
1314
1315 case(AI_CA):
1316 if( count == 3 )
1317 {
1318 if ( bitmasks[na] & BitNAll )
1319 na = nc;
1320 else if ( bitmasks[nb] & BitNAll )
1321 nb = nc;
1322
1323 if ( bitmasks[na] & BitC )
1324 {
1325 j = na;
1326 k = nb;
1327 }
1328 else if ( bitmasks[nb] & BitC )
1329 {
1330 j = nb;
1331 k = na;
1332 }
1333 else if( bitmasks[na] & BitCAll )
1334 {
1335 j = na;
1336 k = nb;
1337 }
1338 else /* bitmasks[nb] & BitCAll */
1339 {
1340 j = nb;
1341 k = na;
1342 }
1343
1344 atomids[j] = AI_C;
1345 bitmasks[k] = 0;
1346
1347 if (!(bitmasks[j] & BitVisit))
1348 TracePeptideChain(mol,j,r);
1349 }
1350 else /* count == 2 */
1351 {
1352 if ( bitmasks[na] & BitCAll )
1353 {
1354 atomids[na] = AI_C;
1355 if (!(bitmasks[na] & BitVisit))
1356 TracePeptideChain(mol,na,r);
1357 }
1358 else
1359 {
1360 atomids[nb] = AI_C;
1361 if (!(bitmasks[nb] & BitVisit))
1362 TracePeptideChain(mol,nb,r);
1363 }
1364 }
1365 break;
1366
1367 case(AI_C):
1368 k = AI_O;
1369 for ( j = 0; j < count; j++ )
1370 {
1371 if ( bitmasks[neighbour[j]] & BitNAll )
1372 {
1373 atomids[neighbour[j]] = AI_N;
1374 if (!(bitmasks[neighbour[j]] & BitVisit))
1375 TracePeptideChain(mol,neighbour[j],r+1);
1376 }
1377 else if( bitmasks[neighbour[j]] & BitOAll )
1378 {
1379 atomids[neighbour[j]] = k;
1380 resnos[neighbour[j]] = r;
1381 k = AI_OXT; /* OXT */
1382 }
1383 }
1384 break;
1385 }
1386 }
1387
1388 ////////////////////////////////////////////////////////////////////////////////
1389 // Peptide Sidechains Perception
1390 ////////////////////////////////////////////////////////////////////////////////
1391
1392 bool OBChainsParser::DeterminePeptideSidechains(OBMol &mol)
1393 {
1394 int resid;
1395 int max = mol.NumAtoms();
1396
1397 for (int i = 0 ; i < max ; i++)
1398 if (atomids[i] == AI_CA)
1399 {
1400 resid = IdentifyResidue(PDecisionTree, mol, i, resnos[i]);
1401 AssignResidue(mol,resnos[i],chains[i],resid);
1402 }
1403
1404 return true;
1405 }
1406
1407 void OBChainsParser::AssignResidue(OBMol &mol, int r, int c, int i)
1408 {
1409 int max = mol.NumAtoms();
1410
1411 for (int j = 0 ; j < max ; j++)
1412 if ((resnos[j] == r) && (chains[j] == c) && !hetflags[j])
1413 resids[j] = i;
1414 }
1415
1416 int OBChainsParser::IdentifyResidue(void *tree, OBMol &mol, int seed, int resno)
1417 {
1418 ByteCode *ptr;
1419
1420 int AtomCount, BondCount;
1421 int curr,prev,bond;
1422 int bcount;
1423 int i,j;
1424
1425 ptr = (ByteCode *) tree;
1426 bcount = 0;
1427
1428 Stack[0].atom = seed;
1429 Stack[0].prev = seed;
1430 StackPtr = 0;
1431
1432 ResMonoAtom[0] = seed;
1433 AtomCount = 1;
1434 BondCount = 0;
1435
1436 OBAtom *atom, *nbr;
1437 vector<OBEdgeBase *>::iterator b;
1438
1439 while( ptr )
1440 switch(ptr->type)
1441 {
1442 case(BC_IDENT): curr = Stack[StackPtr-1].atom;
1443 if( atomids[curr] == ptr->ident.value )
1444 {
1445 bond = Stack[StackPtr-1].bond;
1446 ResMonoBond[BondCount++] = bond;
1447 ptr = ptr->ident.tcond;
1448 StackPtr--;
1449 }
1450 else
1451 ptr = ptr->ident.fcond;
1452 break;
1453
1454 case(BC_LOCAL): curr = Stack[StackPtr-1].atom;
1455 if( curr == ResMonoAtom[ptr->local.value] )
1456 {
1457 bond = Stack[StackPtr-1].bond;
1458 ResMonoBond[BondCount++] = bond;
1459 ptr = ptr->local.tcond;
1460 StackPtr--;
1461 }
1462 else
1463 ptr = ptr->local.fcond;
1464 break;
1465
1466 case(BC_ELEM): curr = Stack[StackPtr-1].atom;
1467 if( mol.GetAtom(curr+1)->GetAtomicNum() == static_cast<unsigned int>(ptr->elem.value)
1468 )
1469 {
1470 bond = Stack[StackPtr-1].bond;
1471 ResMonoAtom[AtomCount++] = curr;
1472 ResMonoBond[BondCount++] = bond;
1473 resnos[curr] = resno;
1474 ptr = ptr->elem.tcond;
1475 StackPtr--;
1476 }
1477 else
1478 ptr = ptr->elem.fcond;
1479 break;
1480
1481 case(BC_EVAL): bcount = 0;
1482 curr = Stack[StackPtr].atom;
1483 prev = Stack[StackPtr].prev;
1484
1485 atom = mol.GetAtom(curr+1); // WARNING, potential atom index issue
1486 for (nbr = atom->BeginNbrAtom(b); nbr; nbr = atom->NextNbrAtom(b))
1487 {
1488 if (nbr->IsHydrogen())
1489 continue;
1490
1491 j = nbr->GetIdx() - 1;
1492 if (!((curr == prev) && bitmasks[j]) && (j != prev))
1493 {
1494 Stack[StackPtr].prev = curr;
1495 Stack[StackPtr].atom = j;
1496 Stack[StackPtr].bond = (*b)->GetIdx();
1497 StackPtr++;
1498 bcount++;
1499 }
1500 }
1501
1502 ptr = ptr->eval.next;
1503 break;
1504
1505 case(BC_COUNT):
1506 if( bcount == ptr->count.value )
1507 {
1508 ptr = ptr->count.tcond;
1509 }
1510 else
1511 ptr = ptr->count.fcond;
1512 break;
1513
1514 case(BC_ASSIGN):
1515 for( i=0; i<AtomCount; i++ )
1516 if( !bitmasks[ResMonoAtom[i]] )
1517 {
1518 j = ptr->assign.atomid[i];
1519 atomids[ResMonoAtom[i]] = j;
1520 }
1521 for( i=0; i<BondCount; i++ )
1522 {
1523 j = ptr->assign.bflags[i];
1524 flags[ResMonoBond[i]] = j;
1525 }
1526 return( ptr->assign.resid );
1527 break;
1528
1529 default: /* Illegal Instruction! */
1530 return( 0 );
1531 }
1532 return 0;
1533 }
1534
1535 ////////////////////////////////////////////////////////////////////////////////
1536 // Nucleic Backbone Perception
1537 ////////////////////////////////////////////////////////////////////////////////
1538
1539 bool OBChainsParser::DetermineNucleicBackbone(OBMol &mol)
1540 {
1541 ConstrainBackbone(mol, Nucleotide, MAXNUCLEIC);
1542
1543 int i, max = mol.NumAtoms();
1544
1545 /* Order Nucleic Backbone */
1546
1547 for( i = 0 ; i < max ; i++ )
1548 if( atomids[i] == -1 )
1549 {
1550 if( bitmasks[i] & BitPTer )
1551 {
1552 atomids[i] = AI_P;
1553 TraceNucleicChain(mol,i,1);
1554 }
1555 else if( bitmasks[i] & BitO5Ter )
1556 {
1557 atomids[i] = AI_O5;
1558 TraceNucleicChain(mol,i,1);
1559 }
1560 }
1561
1562 return true;
1563 }
1564
1565 void OBChainsParser::TraceNucleicChain(OBMol &mol, int i, int r)
1566 {
1567 int neighbour[4];
1568 int na,nb,nc;
1569 int count;
1570 int j,k;
1571
1572 OBAtom *atom, *nbr;
1573 vector<OBEdgeBase *>::iterator b;
1574
1575 count = 0;
1576 atom = mol.GetAtom(i + 1);
1577 for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1578 if (!nbr->IsHydrogen())
1579 neighbour[count++] = nbr->GetIdx() - 1;
1580
1581 resnos[i] = r;
1582 bitmasks[i] &= BitVisit;
1583
1584 na = neighbour[0];
1585 nb = neighbour[1];
1586 nc = neighbour[2];
1587
1588 switch( atomids[i] )
1589 {
1590 case(AI_P):
1591 k = AI_O1P; /* O1P */
1592 for( j=0; j<count; j++ )
1593 {
1594 if( bitmasks[neighbour[j]] & BitO5 )
1595 {
1596 atomids[neighbour[j]] = AI_O5;
1597 if (!(bitmasks[neighbour[j]] & BitVisit))
1598 TraceNucleicChain(mol,neighbour[j],r);
1599 }
1600 else if( bitmasks[neighbour[j]] & BitOP )
1601 {
1602 atomids[neighbour[j]] = k;
1603 resnos[neighbour[j]] = r;
1604 k = AI_O2P; /* O2P */
1605 }
1606 }
1607
1608 break;
1609
1610 case(AI_O5):
1611 for( j=0; j<count; j++ )
1612 if( bitmasks[neighbour[j]] & BitC5 )
1613 {
1614 atomids[neighbour[j]] = AI_C5;
1615 if (!(bitmasks[neighbour[j]] & BitVisit))
1616 TraceNucleicChain(mol,neighbour[j],r);
1617 }
1618
1619 break;
1620
1621 case(AI_C5):
1622 for( j=0 ; j<count; j++ )
1623 if( bitmasks[neighbour[j]] & BitC4 )
1624 {
1625 atomids[neighbour[j]] = AI_C4;
1626 if (!(bitmasks[neighbour[j]] & BitVisit))
1627 TraceNucleicChain(mol,neighbour[j],r);
1628 }
1629
1630 break;
1631
1632 case(AI_C4):
1633 for( j=0; j<count; j++ )
1634 {
1635 if( bitmasks[neighbour[j]] & BitC3 )
1636 {
1637 atomids[neighbour[j]] = AI_C3;
1638 if (!(bitmasks[neighbour[j]] & BitVisit))
1639 TraceNucleicChain(mol,neighbour[j],r);
1640 }
1641 else if( bitmasks[neighbour[j]] & BitO4 )
1642 {
1643 atomids[neighbour[j]] = AI_O4;
1644 resnos[neighbour[j]] = r;
1645 }
1646 }
1647
1648 break;
1649
1650 case(AI_C3):
1651 for( j=0; j<count; j++ )
1652 {
1653 if( bitmasks[neighbour[j]] & BitO3All )
1654 {
1655 atomids[neighbour[j]] = AI_O3;
1656 if (!(bitmasks[neighbour[j]] & BitVisit))
1657 TraceNucleicChain(mol,neighbour[j],r);
1658 }
1659 else if( bitmasks[neighbour[j]] & BitC2All )
1660 {
1661 atomids[neighbour[j]] = AI_C2;
1662 if (!(bitmasks[neighbour[j]] & BitVisit))
1663 TraceNucleicChain(mol,neighbour[j],r);
1664 }
1665 }
1666
1667 break;
1668
1669 case(AI_O3):
1670 for( j=0; j<count; j++ )
1671 if( bitmasks[neighbour[j]] & BitP )
1672 {
1673 atomids[neighbour[j]] = AI_P;
1674 if (!(bitmasks[neighbour[j]] & BitVisit))
1675 TraceNucleicChain(mol,neighbour[j],r+1);
1676 }
1677
1678 break;
1679
1680 case(AI_C2):
1681 for( j=0; j<count; j++ )
1682 {
1683 if( bitmasks[neighbour[j]] & BitC1 )
1684 {
1685 atomids[neighbour[j]] = AI_C1;
1686 resnos[neighbour[j]] = r;
1687 }
1688 else if( bitmasks[neighbour[j]] & BitO2 )
1689 {
1690 atomids[neighbour[j]] = AI_O2;
1691 resnos[neighbour[j]] = r;
1692 }
1693 }
1694
1695 break;
1696 }
1697 }
1698
1699 ////////////////////////////////////////////////////////////////////////////////
1700 // Nucleic Sidechains Perception
1701 ////////////////////////////////////////////////////////////////////////////////
1702
1703 bool OBChainsParser::DetermineNucleicSidechains(OBMol &mol)
1704 {
1705 for( unsigned int i = 0 ; i < mol.NumAtoms() ; i++ )
1706 if( atomids[i] == 49 )
1707 {
1708 int resid = IdentifyResidue(NDecisionTree,mol,i,resnos[i]);
1709 AssignResidue(mol,resnos[i],chains[i],resid);
1710 }
1711
1712 return true;
1713 }
1714
1715 ////////////////////////////////////////////////////////////////////////////////
1716 // Hydrogens Perception
1717 ////////////////////////////////////////////////////////////////////////////////
1718
1719 bool OBChainsParser::DetermineHydrogens(OBMol &mol)
1720 {
1721 OBAtom *atom, *nbr;
1722 int idx,sidx;
1723
1724 int max = mol.NumAtoms();
1725 for ( int i = 0 ; i < max ; i++ )
1726 hcounts[i] = 0;
1727
1728 /* First Pass */
1729
1730 vector<OBNodeBase*>::iterator a;
1731 vector<OBEdgeBase*>::iterator b;
1732
1733 for(atom = mol.BeginAtom(a); atom ; atom = mol.NextAtom(a))
1734 if(atom->IsHydrogen())
1735 {
1736 nbr = atom->BeginNbrAtom(b);
1737 if (nbr != NULL)
1738 {
1739 idx = atom->GetIdx() - 1;
1740 sidx = nbr->GetIdx() - 1;
1741
1742 hcounts[idx] = ++hcounts[sidx];
1743 hetflags[idx] = hetflags[sidx];
1744 atomids[idx] = atomids[sidx];
1745 resids[idx] = resids[sidx];
1746 resnos[idx] = resnos[sidx];
1747 }
1748 }
1749
1750 /* Second Pass */
1751
1752 for(atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1753 if (atom->IsHydrogen())
1754 {
1755 nbr = atom->BeginNbrAtom(b);
1756 if (nbr != NULL && hcounts[nbr->GetIdx()-1] == 1)
1757 hcounts[atom->GetIdx()-1] = 0;
1758 }
1759
1760 return true;
1761 }
1762
1763 ////////////////////////////////////////////////////////////////////////////////
1764 // Utility Functions
1765 ////////////////////////////////////////////////////////////////////////////////
1766
1767 // validated
1768 void OBChainsParser::DefineMonomer(void **tree, int resid, char *smiles)
1769 {
1770 int i;
1771
1772 MonoAtomCount = 0;
1773 MonoBondCount = 0;
1774
1775 ParseSmiles(smiles,-1);
1776
1777 for( i=0; i<MonoBondCount; i++ )
1778 MonoBond[i].index = -1;
1779 for( i=0; i<MonoAtomCount; i++ )
1780 MonoAtom[i].index = -1;
1781 AtomIndex = BondIndex = 0;
1782
1783 StackPtr = 0;
1784 GenerateByteCodes((ByteCode**)tree, resid, 0, 0, 0 );
1785 }
1786
1787 int OBChainsParser::IdentifyElement(char *ptr)
1788 {
1789 int ch;
1790
1791 ch = toupper(ptr[1]);
1792 switch( toupper(ptr[0]) )
1793 {
1794 case(' '): switch( ch )
1795 {
1796 case('B'): return( 5 );
1797 case('C'): return( 6 );
1798 case('D'): return( 1 );
1799 case('F'): return( 9 );
1800 case('H'): return( 1 );
1801 case('I'): return( 53 );
1802 case('K'): return( 19 );
1803 case('L'): return( 1 );
1804 case('N'): return( 7 );
1805 case('O'): return( 8 );
1806 case('P'): return( 15 );
1807 case('S'): return( 16 );
1808 case('U'): return( 92 );
1809 case('V'): return( 23 );
1810 case('W'): return( 74 );
1811 case('Y'): return( 39 );
1812 }
1813 break;
1814
1815 case('A'): switch( ch )
1816 {
1817 case('C'): return( 89 );
1818 case('G'): return( 47 );
1819 case('L'): return( 13 );
1820 case('M'): return( 95 );
1821 case('R'): return( 18 );
1822 case('S'): return( 33 );
1823 case('T'): return( 85 );
1824 case('U'): return( 79 );
1825 }
1826 break;
1827
1828 case('B'): switch( ch )
1829 {
1830 case('A'): return( 56 );
1831 case('E'): return( 4 );
1832 case('I'): return( 83 );
1833 case('K'): return( 97 );
1834 case('R'): return( 35 );
1835 case(' '): return( 5 );
1836 }
1837 break;
1838
1839 case('C'): switch( ch )
1840 {
1841 case('A'): return( 20 );
1842 case('D'): return( 48 );
1843 case('E'): return( 58 );
1844 case('F'): return( 98 );
1845 case('L'): return( 17 );
1846 case('M'): return( 96 );
1847 case('O'): return( 27 );
1848 case('R'): return( 24 );
1849 case('S'): return( 55 );
1850 case('U'): return( 29 );
1851 case(' '): return( 6 );
1852 }
1853 break;
1854
1855 case('D'): if( ch=='Y' )
1856 {
1857 return( 66 );
1858 }
1859 else if( ch==' ' )
1860 return( 1 );
1861 break;
1862
1863 case('E'): if( ch=='R' )
1864 {
1865 return( 68 );
1866 }
1867 else if( ch=='S' )
1868 {
1869 return( 99 );
1870 }
1871 else if( ch=='U' )
1872 return( 63 );
1873 break;
1874
1875 case('F'): if( ch=='E' )
1876 {
1877 return( 26 );
1878 }
1879 else if( ch=='M' )
1880 {
1881 return( 100 );
1882 }
1883 else if( ch=='R' )
1884 {
1885 return( 87 );
1886 }
1887 else if( ch=='F' )
1888 return( 9 );
1889 break;
1890
1891 case('G'): if( ch=='A' )
1892 {
1893 return( 31 );
1894 }
1895 else if( ch=='D' )
1896 {
1897 return( 64 );
1898 }
1899 else if( ch=='E' )
1900 return( 32 );
1901 break;
1902
1903 case('H'): if( ch=='E' )
1904 {
1905 return( 2 );
1906 }
1907 else if( ch=='F' )
1908 {
1909 return( 72 );
1910 }
1911 else if( ch=='G' )
1912 {
1913 return( 80 );
1914 }
1915 else if( ch=='O' )
1916 {
1917 return( 67 );
1918 }
1919 else if( ch==' ' )
1920 return( 1 );
1921 break;
1922
1923 case('I'): if( ch=='N' )
1924 {
1925 return( 49 );
1926 }
1927 else if( ch=='R' )
1928 {
1929 return( 77 );
1930 }
1931 else if( ch==' ' )
1932 return( 53 );
1933 break;
1934
1935 case('K'): if( ch=='R' )
1936 {
1937 return( 36 );
1938 }
1939 else if( ch==' ' )
1940 return( 19 );
1941 break;
1942
1943 case('L'): if( ch=='A' )
1944 {
1945 return( 57 );
1946 }
1947 else if( ch=='I' )
1948 {
1949 return( 3 );
1950 }
1951 else if( (ch=='R') || (ch=='W') )
1952 {
1953 return( 103 );
1954 }
1955 else if( ch=='U' )
1956 {
1957 return( 71 );
1958 }
1959 else if( ch==' ' )
1960 return( 1 );
1961 break;
1962
1963 case('M'): if( ch=='D' )
1964 {
1965 return( 101 );
1966 }
1967 else if( ch=='G' )
1968 {
1969 return( 12 );
1970 }
1971 else if( ch=='N' )
1972 {
1973 return( 25 );
1974 }
1975 else if( ch=='O' )
1976 return( 42 );
1977 break;
1978
1979 case('N'): switch( ch )
1980 {
1981 case('A'): return( 11 );
1982 case('B'): return( 41 );
1983 case('D'): return( 60 );
1984 case('E'): return( 10 );
1985 case('I'): return( 28 );
1986 case('O'): return( 102 );
1987 case('P'): return( 93 );
1988 case(' '): return( 7 );
1989 }
1990 break;
1991
1992 case('O'): if( ch=='S' )
1993 {
1994 return( 76 );
1995 }
1996 else if( ch==' ' )
1997 return( 8 );
1998 break;
1999
2000 case('P'): switch( ch )
2001 {
2002 case('A'): return( 91 );
2003 case('B'): return( 82 );
2004 case('D'): return( 46 );
2005 case('M'): return( 61 );
2006 case('O'): return( 84 );
2007 case('R'): return( 59 );
2008 case('T'): return( 78 );
2009 case('U'): return( 94 );
2010 case(' '): return( 15 );
2011 }
2012 break;
2013
2014 case('R'): switch( ch )
2015 {
2016 case('A'): return( 88 );
2017 case('B'): return( 37 );
2018 case('E'): return( 75 );
2019 case('H'): return( 45 );
2020 case('N'): return( 86 );
2021 case('U'): return( 44 );
2022 }
2023 break;
2024
2025 case('S'): switch( ch )
2026 {
2027 case('B'): return( 51 );
2028 case('C'): return( 21 );
2029 case('E'): return( 34 );
2030 case('I'): return( 14 );
2031 case('M'): return( 62 );
2032 case('N'): return( 50 );
2033 case('R'): return( 38 );
2034 case(' '): return( 16 );
2035 }
2036 break;
2037
2038 case('T'): switch( ch )
2039 {
2040 case('A'): return( 73 );
2041 case('B'): return( 65 );
2042 case('C'): return( 43 );
2043 case('E'): return( 52 );
2044 case('H'): return( 90 );
2045 case('I'): return( 22 );
2046 case('L'): return( 81 );
2047 case('M'): return( 69 );
2048 }
2049 break;
2050
2051 case('U'): if( ch==' ' )
2052 return( 92 );
2053 break;
2054
2055 case('V'): if( ch==' ' )
2056 return( 23 );
2057 break;
2058
2059 case('W'): if( ch==' ' )
2060 return( 74 );
2061 break;
2062
2063 case('X'): if( ch=='E' )
2064 return( 54 );
2065 break;
2066
2067 case('Y'): if( ch=='B' )
2068 {
2069 return( 70 );
2070 }
2071 else if( ch==' ' )
2072 return( 39 );
2073 break;
2074
2075 case('Z'): if( ch=='N' )
2076 {
2077 return( 30 );
2078 }
2079 else if( ch=='R' )
2080 return( 40 );
2081 break;
2082 }
2083
2084 if( (*ptr>='0') && (*ptr<='9') )
2085 if( (ch=='H') || (ch=='D') )
2086 return( 1 ); /* Hydrogen */
2087
2088 return( 0 );
2089 }
2090
2091 char *OBChainsParser::ParseSmiles(char *ptr, int prev)
2092 {
2093 char *name;
2094 int atomid;
2095 int next;
2096 int type;
2097 int ch;
2098
2099 type = 0;
2100 while( (ch = *ptr++) )
2101 {
2102 switch( ch )
2103 {
2104 case('-'): type = BF_SINGLE;
2105 break;
2106 case('='): type = BF_DOUBLE;
2107 break;
2108 case('#'): type = BF_TRIPLE;
2109 break;
2110 case('^'): type = BF_SINGLE|BF_AROMATIC;
2111 break;
2112 case('~'): type = BF_DOUBLE|BF_AROMATIC;
2113 break;
2114
2115 case(')'): return( ptr );
2116 case('.'): prev = -1;
2117 break;
2118 case('('): ptr = ParseSmiles(ptr,prev);
2119 break;
2120
2121 default:
2122 atomid = ch-'0';
2123 while( isdigit(*ptr) )
2124 atomid = (atomid*10)+(*ptr++)-'0';
2125
2126 for( next=0; next<MonoAtomCount; next++ )
2127 if( MonoAtom[next].atomid == atomid )
2128 break;
2129
2130 if( next == MonoAtomCount )
2131 {
2132 name = ChainsAtomName[atomid];
2133 MonoAtom[next].elem = IdentifyElement(name);
2134 MonoAtom[next].atomid = atomid;
2135 MonoAtom[next].bcount = 0;
2136 MonoAtomCount++;
2137 }
2138
2139 if( prev != -1 )
2140 {
2141 MonoBond[MonoBondCount].flag = type;
2142 MonoBond[MonoBondCount].src = prev;
2143 MonoBond[MonoBondCount].dst = next;
2144 MonoBondCount++;
2145
2146 MonoAtom[prev].bcount++;
2147 MonoAtom[next].bcount++;
2148 }
2149 prev = next;
2150 }
2151 }
2152 return( ptr-1 );
2153 }
2154
2155 } // end namespace OpenBabel
2156
2157 //! \file chains.cpp
2158 //! \brief Parse for macromolecule chains and residues.