ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/chains.cpp
Revision: 3057
Committed: Thu Oct 19 20:49:05 2006 UTC (17 years, 9 months ago) by gezelter
File size: 61686 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     chains.cpp - Parse for macromolecule chains and residues.
3    
4     Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5     (original author, Roger Sayle, version 1.6, March 1998)
6     (modified by Joe Corkery, OpenEye Scientific Software, March 2001)
7 gezelter 3057 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
8 tim 2440
9     This file is part of the Open Babel project.
10     For more information, see <http://openbabel.sourceforge.net/>
11    
12     This program is free software; you can redistribute it and/or modify
13     it under the terms of the GNU General Public License as published by
14     the Free Software Foundation version 2 of the License.
15    
16     This program is distributed in the hope that it will be useful,
17     but WITHOUT ANY WARRANTY; without even the implied warranty of
18     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     GNU General Public License for more details.
20     ***********************************************************************/
21    
22     //////////////////////////////////////////////////////////////////////////////
23     // File Includes
24     //////////////////////////////////////////////////////////////////////////////
25 gezelter 3057 #include "config.h"
26 tim 2440
27     #include <stdlib.h>
28     #include <string.h>
29     #include <stdio.h>
30     #include <ctype.h>
31     #include <map>
32    
33     #include "mol.hpp"
34     #include "chains.hpp"
35    
36     using namespace std;
37    
38     //////////////////////////////////////////////////////////////////////////////
39     // Preprocessor Definitions
40     //////////////////////////////////////////////////////////////////////////////
41    
42 gezelter 3057 //! The first available index for actual residues
43     //! 0, 1, 2 reserved for UNK, HOH, LIG
44 tim 2440 #define RESIDMIN 3
45 gezelter 3057 //! The maximum number of residue IDs for this code
46 tim 2440 #define RESIDMAX 32
47 gezelter 3057
48     //! An index of the residue names perceived during a run
49     //! 0, 1, and 2 reserved for UNK, HOH, LIG
50     static char ChainsResName[RESIDMAX][4] = {
51     /*0*/ "UNK", /*1*/ "HOH", /*2*/ "LIG"
52     };
53    
54 tim 2440 #define ATOMMINAMINO 4
55     #define ATOMMINNUCLEIC 50
56     #define MAXPEPTIDE 11
57     #define MAXNUCLEIC 15
58 gezelter 3057 //! The number of amino acids recognized by this code
59     //! Currently: ILE, VAL, ALA, ASN, ASP, ARG, CYS, GLN, GLU
60     //! GLY, HIS, HYP, LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR
61 tim 2440 #define AMINOMAX 21
62 gezelter 3057 //! The number of nucleic acids recognized by this code
63     //! Currently A, C, T, G, U, I
64 tim 2440 #define NUCLEOMAX 6
65     #define STACKSIZE 20
66    
67     #define AI_N 0
68     #define AI_CA 1
69     #define AI_C 2
70     #define AI_O 3
71     #define AI_OXT 37
72    
73     #define AI_P 38
74     #define AI_O1P 39
75     #define AI_O2P 40
76     #define AI_O5 41
77     #define AI_C5 42
78     #define AI_C4 43
79     #define AI_O4 44
80     #define AI_C3 45
81     #define AI_O3 46
82     #define AI_C2 47
83     #define AI_O2 48
84     #define AI_C1 49
85    
86     #define BitN 0x0001
87     #define BitNTer 0x0002
88     #define BitNPro 0x0004
89     #define BitNPT 0x0008
90     #define BitCA 0x0010
91     #define BitCAGly 0x0020
92     #define BitC 0x0100
93     #define BitCTer 0x0200
94     #define BitCOXT 0x0400
95     #define BitO 0x1000
96     #define BitOXT 0x2000
97    
98     #define BitNAll 0x000F
99     #define BitCAAll 0x0030
100     #define BitCAll 0x0700
101     #define BitOAll 0x3000
102    
103     #define BitP 0x0001
104     #define BitPTer 0x0002
105     #define BitOP 0x0004
106     #define BitO5 0x0008
107     #define BitO5Ter 0x0010
108     #define BitC5 0x0020
109     #define BitC4 0x0040
110     #define BitO4 0x0080
111     #define BitC3 0x0100
112     #define BitO3 0x0200
113     #define BitO3Ter 0x0400
114     #define BitC2RNA 0x0800
115     #define BitC2DNA 0x1000
116     #define BitO2 0x2000
117     #define BitC1 0x4000
118    
119     #define BitPAll 0x0003
120     #define Bit05All 0x0018
121     #define BitO3All 0x0600
122     #define BitC2All 0x1800
123    
124 gezelter 3057 #define BitVisit 0x8000
125    
126 tim 2440 #define BC_ASSIGN 0x01
127     #define BC_COUNT 0x02
128     #define BC_ELEM 0x03
129     #define BC_EVAL 0x04
130     #define BC_IDENT 0x05
131     #define BC_LOCAL 0x06
132    
133     #define BF_SINGLE 0x01
134     #define BF_DOUBLE 0x02
135     #define BF_TRIPLE 0x04
136     #define BF_AROMATIC 0x08
137    
138     namespace OpenBabel
139     {
140    
141 gezelter 3057 OBChainsParser chainsparser;
142 tim 2440
143 gezelter 3057 //////////////////////////////////////////////////////////////////////////////
144     // Structure / Type Definitions
145     //////////////////////////////////////////////////////////////////////////////
146 tim 2440
147 gezelter 3057 //! Template for backbone atoms in chain perception
148     typedef struct Template
149     {
150     int flag; //!< binary flag representing this atom type
151     short elem; //!< atomic number of this element
152     short count; //!< expected valence for this atom type
153     int n1; //!< mask 1 used by ConstrainBackbone() and MatchConstraint()
154     int n2; //!< mask 2 used by ConstrainBackbone() and MatchConstraint()
155     int n3; //!< mask 3 used by ConstrainBackbone() and MatchConstraint()
156     int n4; //!< mask 4 used by ConstrainBackbone() and MatchConstraint()
157     }
158     Template;
159 tim 2440
160 gezelter 3057 //! Generic template for peptide residue backbone
161     static Template Peptide[MAXPEPTIDE] = {
162     /* N */ { 0x0001, 7, 2, 0x0030, 0x0100, 0, 0 },
163     /* NTer */ { 0x0002, 7, 1, 0x0030, 0, 0, 0 },
164     /* NPro */ { 0x0004, 7, 3, 0x0030, 0x0100, -6, 0 },
165     /* NPT */ { 0x0008, 7, 2, 0x0030, -6, 0, 0 },
166     /* CA */ { 0x0010, 6, 3, 0x000F, 0x0700, -6, 0 },
167     /* CAGly */ { 0x0020, 6, 2, 0x0003, 0x0700, 0, 0 },
168     /* C */ { 0x0100, 6, 3, 0x0030, 0x1000, 0x0005, 0 },
169     /* CTer */ { 0x0200, 6, 2, 0x0030, 0x1000, 0, 0 },
170     /* COXT */ { 0x0400, 6, 3, 0x0030, 0x1000, 0x2000, 0 },
171     /* O */ { 0x1000, 8, 1, 0x0700, 0, 0, 0 },
172     /* OXT */ { 0x2000, 8, 1, 0x0400, 0, 0, 0 }
173     };
174    
175     //! Generic template for peptide nucleotide backbone
176     static Template Nucleotide[MAXNUCLEIC] = {
177     /* P */ { 0x0001, 15, 4, 0x0004, 0x0004, 0x0008, 0x0200 },
178     /* PTer */ { 0x0002, 15, 3, 0x0004, 0x0004, 0x0008, 0 },
179     /* OP */ { 0x0004, 8, 1, 0x0003, 0, 0, 0 },
180     /* O5 */ { 0x0008, 8, 2, 0x0020, 0x0003, 0, 0 },
181     /* O5Ter */ { 0x0010, 8, 1, 0x0020, 0, 0, 0 },
182     /* C5 */ { 0x0020, 6, 2, 0x0018, 0x0040, 0, 0 },
183     /* C4 */ { 0x0040, 6, 3, 0x0020, 0x0080, 0x0100, 0 },
184     /* O4 */ { 0x0080, 8, 2, 0x0040, 0x4000, 0, 0 },
185     /* C3 */ { 0x0100, 6, 3, 0x0040, 0x0600, 0x1800, 0 },
186     /* O3 */ { 0x0200, 8, 2, 0x0100, 0x0001, 0, 0 },
187     /* O3Ter */ { 0x0400, 8, 1, 0x0100, 0, 0, 0 },
188     /* C2RNA */ { 0x0800, 6, 3, 0x0100, 0x4000, 0x2000, 0 },
189     /* C2DNA */ { 0x1000, 6, 2, 0x0100, 0x4000, 0, 0 },
190     /* O2 */ { 0x2000, 8, 1, 0x0800, 0, 0, 0 },
191     /* C1 */ { 0x4000, 6, 3, 0x0080, 0x1800, -7, 0 }
192     };
193    
194    
195     //////////////////////////////////////////////////////////////////////////////
196     // Global Variables / Tables
197     //////////////////////////////////////////////////////////////////////////////
198    
199     //! The number of PDB atom type names recognized by this code
200     #define ATOMMAX 68
201    
202     //! PDB atom types (i.e., columns 13-16 of a PDB file)
203     //! index numbers from this array are used in the pseudo-SMILES format
204     //! for side-chains in the AminoAcids[] & Nucleotides[] global arrays below
205     static char ChainsAtomName[ATOMMAX][4] = {
206     /* 0 */ { ' ', 'N', ' ', ' ' },
207     /* 1 */ { ' ', 'C', 'A', ' ' },
208     /* 2 */ { ' ', 'C', ' ', ' ' },
209     /* 3 */ { ' ', 'O', ' ', ' ' },
210     /* 4 */ { ' ', 'C', 'B', ' ' },
211     /* 5 */ { ' ', 'S', 'G', ' ' },
212     /* 6 */ { ' ', 'O', 'G', ' ' },
213     /* 7 */ { ' ', 'C', 'G', ' ' },
214     /* 8 */ { ' ', 'O', 'G', '1' },
215     /* 9 */ { ' ', 'C', 'G', '1' },
216     /* 10 */ { ' ', 'C', 'G', '2' },
217     /* 11 */ { ' ', 'C', 'D', ' ' },
218     /* 12 */ { ' ', 'O', 'D', ' ' },
219     /* 13 */ { ' ', 'S', 'D', ' ' },
220     /* 14 */ { ' ', 'C', 'D', '1' },
221     /* 15 */ { ' ', 'O', 'D', '1' },
222     /* 16 */ { ' ', 'N', 'D', '1' },
223     /* 17 */ { ' ', 'C', 'D', '2' },
224     /* 18 */ { ' ', 'O', 'D', '2' },
225     /* 19 */ { ' ', 'N', 'D', '2' },
226     /* 20 */ { ' ', 'C', 'E', ' ' },
227     /* 21 */ { ' ', 'N', 'E', ' ' },
228     /* 22 */ { ' ', 'C', 'E', '1' },
229     /* 23 */ { ' ', 'O', 'E', '1' },
230     /* 24 */ { ' ', 'N', 'E', '1' },
231     /* 25 */ { ' ', 'C', 'E', '2' },
232     /* 26 */ { ' ', 'O', 'E', '2' },
233     /* 27 */ { ' ', 'N', 'E', '2' },
234     /* 28 */ { ' ', 'C', 'E', '3' },
235     /* 29 */ { ' ', 'C', 'Z', ' ' },
236     /* 30 */ { ' ', 'N', 'Z', ' ' },
237     /* 31 */ { ' ', 'C', 'Z', '2' },
238     /* 32 */ { ' ', 'C', 'Z', '3' },
239     /* 33 */ { ' ', 'O', 'H', ' ' },
240     /* 34 */ { ' ', 'N', 'H', '1' },
241     /* 35 */ { ' ', 'N', 'H', '2' },
242     /* 36 */ { ' ', 'C', 'H', '2' },
243     /* 37 */ { ' ', 'O', 'X', 'T' },
244     /* 38 */ { ' ', 'P', ' ', ' ' },
245     /* 39 */ { ' ', 'O', '1', 'P' },
246     /* 40 */ { ' ', 'O', '2', 'P' },
247     /* 41 */ { ' ', 'O', '5', '*' },
248     /* 42 */ { ' ', 'C', '5', '*' },
249     /* 43 */ { ' ', 'C', '4', '*' },
250     /* 44 */ { ' ', 'O', '4', '*' },
251     /* 45 */ { ' ', 'C', '3', '*' },
252     /* 46 */ { ' ', 'O', '3', '*' },
253     /* 47 */ { ' ', 'C', '2', '*' },
254     /* 48 */ { ' ', 'O', '2', '*' },
255     /* 49 */ { ' ', 'C', '1', '*' },
256     /* 50 */ { ' ', 'N', '9', ' ' },
257     /* 51 */ { ' ', 'C', '8', ' ' },
258     /* 52 */ { ' ', 'N', '7', ' ' },
259     /* 53 */ { ' ', 'C', '5', ' ' },
260     /* 54 */ { ' ', 'C', '6', ' ' },
261     /* 55 */ { ' ', 'O', '6', ' ' },
262     /* 56 */ { ' ', 'N', '6', ' ' },
263     /* 57 */ { ' ', 'N', '1', ' ' },
264     /* 58 */ { ' ', 'C', '2', ' ' },
265     /* 59 */ { ' ', 'O', '2', ' ' },
266     /* 60 */ { ' ', 'N', '2', ' ' },
267     /* 61 */ { ' ', 'N', '3', ' ' },
268     /* 62 */ { ' ', 'C', '4', ' ' },
269     /* 63 */ { ' ', 'O', '4', ' ' },
270     /* 64 */ { ' ', 'N', '4', ' ' },
271     /* 65 */ { ' ', 'C', '5', ' ' },
272     /* 66 */ { ' ', 'C', '5', 'M' },
273     /* 67 */ { ' ', 'C', '6', ' ' }
274     };
275    
276     //! Definition of side chains, associating overall residue name with
277     //! the pseudo-SMILES pattern
278     typedef struct
279     {
280     char *name; //!< Residue name, standardized by PDB
281     char *data; //!< pseudo-SMILES definition of side-chain
282     }
283     ResidType;
284    
285     //! Side chains for recognized amino acids using a pseudo-SMARTS syntax
286     //! for branching and bonds. Numbers indicate atom types defined by
287     //! ChainsAtomName global array above
288     static ResidType AminoAcids[AMINOMAX] = {
289     { "ILE", "1-4(-9-14)-10" },
290     { "VAL", "1-4(-9)-10" },
291     { "ALA", "1-4" },
292     { "ASN", "1-4-7(=15)-19" },
293     { "ASP", "1-4-7(=15)-18" },
294     { "ARG", "1-4-7-11-21-29(=34)-35" },
295     { "CYS", "1-4-5" },
296     { "GLN", "1-4-7-11(=23)-27" },
297     { "GLU", "1-4-7-11(=23)-26" },
298     { "GLY", "1" },
299     { "HIS", "1-4-7^16~22^27^17~7" },
300     { "HYP", "1-4-7(-12)-11-0" },
301     { "LEU", "1-4-7(-14)-17" },
302     { "LYS", "1-4-7-11-20-30" },
303     { "MET", "1-4-7-13-20" },
304     { "PHE", "1-4-7~14^22~29^25~17^7" },
305     { "PRO", "1-4-7-11-0" },
306     { "SER", "1-4-6" },
307     { "THR", "1-4(-8)-10" },
308     { "TRP", "1-4-7~14^24^25~17(^7)^28~32^36~31^25" },
309     { "TYR", "1-4-7~14^22~29(-33)^25~17^7" }
310     };
311     // Other possible amino acid templates (less common)
312     /* Pyroglutamate (PCA): 1-4-7-11(=" OE ")-0 PDB Example: 1CEL */
313     /* Amino-N-Butyric Acid (ABA): 1-4-7 PDB Example: 1BBO */
314     /* Selenic Acid (SEC): 1-4-"SEG "(-15)-18 PDB Example: 1GP1 */
315    
316     //! Side chains for recognized nucleotides using a pseudo-SMARTS syntax
317     //! for branching and bonds. Numbers indicate atom types defined by
318     //! ChainsAtomName global array above
319     static ResidType Nucleotides[NUCLEOMAX] = {
320     { " A", "49-50-51-52-53-54(-56)-57-58-61-62(-53)-50" },
321     { " C", "49-57-58(-59)-61-62(-64)-65-67-57" },
322     { " G", "49-50-51-52-53-54(-55)-57-58(-60)-61-62(-53)-50" },
323     { " T", "49-57-58(-59)-61-62(-63)-65(-66)-67-57" },
324     { " U", "49-57-58(-59)-61-62(-63)-65-67-57" },
325     { " I", "49-50-51-52-53-54(-55)-57-58-61-62(-53)-50" }
326     };
327    
328    
329     typedef struct
330     {
331 tim 2440 int atomid,elem;
332     int bcount;
333     int index;
334 gezelter 3057 }
335     MonoAtomType;
336 tim 2440
337 gezelter 3057 typedef struct
338     {
339 tim 2440 int src,dst;
340     int index;
341     int flag;
342 gezelter 3057 }
343     MonoBondType;
344 tim 2440
345 gezelter 3057 typedef struct
346     {
347 tim 2440 int type;
348     union _ByteCode *next;
349 gezelter 3057 }
350     MonOpStruct;
351 tim 2440
352 gezelter 3057 typedef struct
353     {
354 tim 2440 int type;
355     int value;
356     union _ByteCode *tcond;
357     union _ByteCode *fcond;
358 gezelter 3057 }
359     BinOpStruct;
360 tim 2440
361 gezelter 3057 //! Output array -- residue id, atom id, bond flags, etc.
362     typedef struct
363     {
364 tim 2440 int type;
365     int resid;
366     int *atomid;
367     int *bflags;
368 gezelter 3057 }
369     AssignStruct;
370 tim 2440
371 gezelter 3057 //! Chemical graph matching virtual machine
372     typedef union _ByteCode
373     {
374 tim 2440 int type;
375 gezelter 3057 MonOpStruct eval; //!< Eval - push current neighbors onto the stack
376     BinOpStruct count; //!< Count - test the number of eval bonds
377     BinOpStruct elem; //!< Element - test the element of current atom
378     BinOpStruct ident; //!< Ident - test the atom for backbone identity
379     BinOpStruct local; //!< Local - test whether the atom has been visited
380     AssignStruct assign; //!< Assign - assign residue name, atom name and bond type to output
381     } ByteCode;
382 tim 2440
383 gezelter 3057 typedef struct
384     {
385 tim 2440 int atom,bond;
386     int prev;
387 gezelter 3057 }
388     StackType;
389 tim 2440
390 gezelter 3057 static MonoAtomType MonoAtom[MaxMonoAtom];
391     static MonoBondType MonoBond[MaxMonoBond];
392     static int MonoAtomCount;
393     static int MonoBondCount;
394 tim 2440
395 gezelter 3057 static StackType Stack[STACKSIZE];
396     static int StackPtr;
397 tim 2440
398 gezelter 3057 static int AtomIndex;
399     static int BondIndex;
400     static bool StrictFlag = false;
401 tim 2440
402 gezelter 3057 //////////////////////////////////////////////////////////////////////////////
403     // Static Functions
404     //////////////////////////////////////////////////////////////////////////////
405 tim 2440
406 gezelter 3057 static ByteCode *AllocateByteCode(int type)
407     {
408 tim 2440 ByteCode *result;
409    
410     result = (ByteCode*)malloc(sizeof(ByteCode));
411     if( !result )
412 gezelter 3057 {
413     obErrorLog.ThrowError(__func__, "Unable to allocate byte codes for biomolecule residue perception.", obError);
414     // exit(1);
415     }
416 tim 2440 result->type = type;
417     result->eval.next = NULL;
418     result->count.tcond = NULL;
419     result->count.fcond = NULL;
420     result->elem.tcond = NULL;
421     result->elem.fcond = NULL;
422     result->ident.tcond = NULL;
423     result->ident.fcond = NULL;
424     result->local.tcond = NULL;
425     result->local.fcond = NULL;
426     result->assign.atomid = NULL;
427     result->assign.bflags = NULL;
428    
429     return (result);
430 gezelter 3057 }
431 tim 2440
432 gezelter 3057 //! Free a ByteCode and all corresponding data
433     static void DeleteByteCode(ByteCode *node)
434     {
435     if (node == NULL)
436     return;
437     else
438     {
439     switch (node->type)
440     {
441     case BC_ASSIGN:
442 tim 2440
443 gezelter 3057 if (node->assign.atomid != NULL)
444     free(node->assign.atomid);
445     if (node->assign.bflags != NULL)
446     free(node->assign.bflags);
447 tim 2440
448 gezelter 3057 break;
449 tim 2440
450 gezelter 3057 case BC_COUNT:
451 tim 2440
452 gezelter 3057 DeleteByteCode(node->count.tcond);
453     DeleteByteCode(node->count.fcond);
454     break;
455     case BC_ELEM:
456 tim 2440
457 gezelter 3057 DeleteByteCode(node->elem.tcond);
458     DeleteByteCode(node->elem.fcond);
459     break;
460 tim 2440
461 gezelter 3057 case BC_EVAL:
462 tim 2440
463 gezelter 3057 DeleteByteCode(node->eval.next);
464     break;
465 tim 2440
466 gezelter 3057 case BC_IDENT:
467 tim 2440
468 gezelter 3057 DeleteByteCode(node->ident.tcond);
469     DeleteByteCode(node->ident.fcond);
470     break;
471 tim 2440
472 gezelter 3057 case BC_LOCAL:
473 tim 2440
474 gezelter 3057 DeleteByteCode(node->local.tcond);
475     DeleteByteCode(node->local.fcond);
476     break;
477     }
478 tim 2440
479 gezelter 3057 free(node);
480     }
481     }
482 tim 2440
483 gezelter 3057 static void FatalMemoryError(void)
484     {
485     obErrorLog.ThrowError(__func__, "Unable to allocate memory for biomolecule residue / chain perception.", obError);
486 tim 2440 // exit(1);
487 gezelter 3057 }
488 tim 2440
489 gezelter 3057 void GenerateByteCodes(ByteCode **node, int resid, int curr, int prev, int bond)
490     {
491 tim 2440 StackType neighbour[4];
492     StackType original;
493     int count,i,j;
494     ByteCode *ptr;
495     bool done,found;
496    
497     if( curr != prev )
498 gezelter 3057 {
499 tim 2440 if( MonoAtom[curr].atomid < ATOMMINAMINO )
500 gezelter 3057 {
501 tim 2440 found = false;
502     while( *node && ((*node)->type==BC_IDENT) )
503 gezelter 3057 {
504 tim 2440 if( (*node)->ident.value == MonoAtom[curr].atomid )
505 gezelter 3057 {
506 tim 2440 node = (ByteCode**)&(*node)->ident.tcond;
507     found = true;
508     break;
509 gezelter 3057 }
510 tim 2440 else
511 gezelter 3057 node = (ByteCode**)&(*node)->ident.fcond;
512     }
513 tim 2440
514     if (!found)
515 gezelter 3057 {
516 tim 2440 ptr = AllocateByteCode(BC_IDENT);
517     ptr->ident.tcond = (ByteCode*)0;
518     ptr->ident.fcond = *node;
519     *node = ptr;
520     node = (ByteCode**)&ptr->ident.tcond;
521     ptr->ident.value = MonoAtom[curr].atomid;
522 gezelter 3057 }
523 tim 2440 MonoBond[bond].index = BondIndex++;
524     done = true;
525 gezelter 3057 }
526 tim 2440 else if( MonoAtom[curr].index != -1 )
527 gezelter 3057 {
528 tim 2440 while( *node && ((*node)->type==BC_IDENT) )
529 gezelter 3057 node = (ByteCode**)&(*node)->ident.fcond;
530 tim 2440
531     found = false;
532     while( *node && ((*node)->type==BC_LOCAL) )
533 gezelter 3057 {
534 tim 2440 if( (*node)->local.value == MonoAtom[curr].index )
535 gezelter 3057 {
536 tim 2440 node = (ByteCode**)&(*node)->local.tcond;
537     found = true;
538     break;
539 gezelter 3057 }
540 tim 2440 else
541 gezelter 3057 node = (ByteCode**)&(*node)->local.fcond;
542     }
543 tim 2440
544     if (!found)
545 gezelter 3057 {
546 tim 2440 ptr = AllocateByteCode(BC_LOCAL);
547     ptr->local.tcond = (ByteCode*)0;
548     ptr->local.fcond = *node;
549     *node = ptr;
550     node = (ByteCode**)&ptr->local.tcond;
551     ptr->local.value = MonoAtom[curr].index;
552 gezelter 3057 }
553 tim 2440
554     MonoBond[bond].index = BondIndex++;
555     done = true;
556 gezelter 3057 }
557 tim 2440 else
558 gezelter 3057 {
559 tim 2440 while( *node && ((*node)->type==BC_IDENT) )
560 gezelter 3057 node = (ByteCode**)&(*node)->ident.fcond;
561 tim 2440 while( *node && ((*node)->type==BC_LOCAL) )
562 gezelter 3057 node = (ByteCode**)&(*node)->local.fcond;
563 tim 2440
564     found = false;
565     while( *node && ((*node)->type==BC_ELEM) )
566 gezelter 3057 {
567 tim 2440 if( (*node)->elem.value == MonoAtom[curr].elem )
568 gezelter 3057 {
569 tim 2440 node = (ByteCode**)&(*node)->elem.tcond;
570     found = true;
571     break;
572 gezelter 3057 }
573 tim 2440 else
574 gezelter 3057 node = (ByteCode**)&(*node)->elem.fcond;
575     }
576 tim 2440
577     if( !found )
578 gezelter 3057 {
579 tim 2440 ptr = AllocateByteCode(BC_ELEM);
580     ptr->elem.tcond = (ByteCode*)0;
581     ptr->elem.fcond = *node;
582     *node = ptr;
583     node = (ByteCode**)&ptr->elem.tcond;
584     ptr->elem.value = MonoAtom[curr].elem;
585 gezelter 3057 }
586 tim 2440
587     MonoAtom[curr].index = AtomIndex++;
588     MonoBond[bond].index = BondIndex++;
589     done = false;
590 gezelter 3057 }
591     }
592 tim 2440 else
593 gezelter 3057 {
594 tim 2440 MonoAtom[curr].index = AtomIndex++;
595     done = false;
596 gezelter 3057 }
597 tim 2440
598     count = 0;
599     if (!done)
600 gezelter 3057 {
601 tim 2440 for( i=0; i<MonoBondCount; i++ )
602 gezelter 3057 {
603 tim 2440 if( MonoBond[i].src == curr )
604 gezelter 3057 {
605 tim 2440 if( MonoBond[i].dst != prev )
606 gezelter 3057 {
607 tim 2440 neighbour[count].atom = MonoBond[i].dst;
608     neighbour[count].bond = i;
609     count++;
610 gezelter 3057 }
611     }
612 tim 2440 else if( MonoBond[i].dst == curr )
613 gezelter 3057 {
614 tim 2440 if( MonoBond[i].src != prev )
615 gezelter 3057 {
616 tim 2440 neighbour[count].atom = MonoBond[i].src;
617     neighbour[count].bond = i;
618     count++;
619 gezelter 3057 }
620     }
621     }
622 tim 2440
623     if ( *node && ((*node)->type==BC_EVAL) )
624 gezelter 3057 {
625 tim 2440 found = false;
626     node = (ByteCode**)&(*node)->eval.next;
627     while( *node && ((*node)->type==BC_COUNT) )
628 gezelter 3057 {
629 tim 2440 if( (*node)->count.value == count )
630 gezelter 3057 {
631 tim 2440 node = (ByteCode**)&(*node)->count.tcond;
632     found = true;
633     break;
634 gezelter 3057 }
635 tim 2440 else
636 gezelter 3057 node = (ByteCode**)&(*node)->count.fcond;
637     }
638 tim 2440
639     if( !found )
640 gezelter 3057 {
641 tim 2440 ptr = AllocateByteCode(BC_COUNT);
642     ptr->count.tcond = (ByteCode*)0;
643     ptr->count.fcond = *node;
644     *node = ptr;
645     node = (ByteCode**)&ptr->count.tcond;
646     ptr->count.value = count;
647 gezelter 3057 }
648     }
649 tim 2440 else if( count || StrictFlag || StackPtr )
650 gezelter 3057 {
651 tim 2440 ptr = AllocateByteCode(BC_EVAL);
652     ptr->eval.next = *node;
653     *node = ptr;
654     node = (ByteCode**)&ptr->eval.next;
655    
656     ptr = AllocateByteCode(BC_COUNT);
657     ptr->count.tcond = (ByteCode*)0;
658     ptr->count.fcond = *node;
659     *node = ptr;
660     node = (ByteCode**)&ptr->count.tcond;
661     ptr->count.value = count;
662 gezelter 3057 }
663     }
664 tim 2440
665     if( count == 1 )
666 gezelter 3057 {
667 tim 2440 GenerateByteCodes(node,resid,neighbour[0].atom, curr,neighbour[0].bond);
668 gezelter 3057 }
669 tim 2440 else if( count == 2 )
670 gezelter 3057 {
671 tim 2440 original = Stack[StackPtr++];
672     Stack[StackPtr-1] = neighbour[0];
673     Stack[StackPtr-1].prev = curr;
674     GenerateByteCodes(node,resid,neighbour[1].atom,
675     curr,neighbour[1].bond);
676     Stack[StackPtr-1] = neighbour[1];
677     Stack[StackPtr-1].prev = curr;
678     GenerateByteCodes(node,resid,neighbour[0].atom,
679     curr,neighbour[0].bond);
680     Stack[--StackPtr] = original;
681 gezelter 3057 }
682 tim 2440 else if( count )
683 gezelter 3057 {
684     stringstream errorMsg;
685 tim 2440 errorMsg << "Maximum Monomer Fanout Exceeded!" << endl;
686     errorMsg << "Residue " << ChainsResName[resid] << " atom "
687 gezelter 3057 << curr << endl;
688     errorMsg << "Previous = " << prev << " Fanout = " << count << endl;
689     obErrorLog.ThrowError(__func__, errorMsg.str(), obWarning);
690     }
691 tim 2440 else if( StackPtr )
692 gezelter 3057 {
693 tim 2440 StackPtr--;
694     GenerateByteCodes(node,resid,Stack[StackPtr].atom,
695     Stack[StackPtr].prev,Stack[StackPtr].bond);
696     StackPtr++;
697 gezelter 3057 }
698 tim 2440 else if( !(*node) )
699 gezelter 3057 {
700 tim 2440 ptr = AllocateByteCode(BC_ASSIGN);
701     ptr->assign.resid = resid;
702     ptr->assign.atomid = (int*)malloc(AtomIndex*sizeof(int));
703     if( !ptr->assign.atomid )
704 gezelter 3057 FatalMemoryError();
705 tim 2440 for( i=0; i<MonoAtomCount; i++ )
706 gezelter 3057 if( (j=MonoAtom[i].index) != -1 )
707     ptr->assign.atomid[j] = MonoAtom[i].atomid;
708 tim 2440 if( BondIndex )
709 gezelter 3057 {
710 tim 2440 ptr->assign.bflags = (int*)malloc(BondIndex*sizeof(int));
711     for( i=0; i<MonoBondCount; i++ )
712 gezelter 3057 if( (j=MonoBond[i].index) != -1 )
713     ptr->assign.bflags[j] = MonoBond[i].flag;
714     }
715 tim 2440 *node = ptr;
716 gezelter 3057 }
717 tim 2440 else if( (*node)->type == BC_ASSIGN )
718 gezelter 3057 {
719 tim 2440 if( (*node)->assign.resid != resid )
720 gezelter 3057 {
721     stringstream errorMsg;
722     errorMsg << "Duplicated Monomer Specification!\n";
723     errorMsg << "Residue " << ChainsResName[resid]
724     << " matches residue ";
725     errorMsg << ChainsResName[(*node)->assign.resid] << endl;
726     obErrorLog.ThrowError(__func__, errorMsg.str(), obWarning);
727     }
728     }
729 tim 2440
730     /* Restore State! */
731     if( curr != prev )
732 gezelter 3057 {
733 tim 2440 if( !done )
734 gezelter 3057 {
735 tim 2440 MonoAtom[curr].index = -1;
736     AtomIndex--;
737 gezelter 3057 }
738 tim 2440 MonoBond[bond].index = -1;
739     BondIndex--;
740 gezelter 3057 }
741     }
742 tim 2440
743 gezelter 3057 //////////////////////////////////////////////////////////////////////////////
744     // Constructors / Destructors
745     //////////////////////////////////////////////////////////////////////////////
746 tim 2440
747 gezelter 3057 // validated
748     OBChainsParser::OBChainsParser(void)
749     {
750 tim 2440 int i, res = RESIDMIN;
751    
752     PDecisionTree = (ByteCode*)0;
753     for( i=0 ; i < AMINOMAX ; i++ )
754 gezelter 3057 {
755     strncpy(ChainsResName[res],AminoAcids[i].name, sizeof(ChainsResName[res]) - 1);
756     ChainsResName[res][sizeof(ChainsResName[res]) - 1] = '\0';
757 tim 2440 DefineMonomer(&PDecisionTree,res,AminoAcids[i].data);
758     res++;
759 gezelter 3057 }
760 tim 2440
761     NDecisionTree = (ByteCode*)0;
762     for( i=0 ; i< NUCLEOMAX ; i++ )
763 gezelter 3057 {
764     strncpy(ChainsResName[res],Nucleotides[i].name, sizeof(ChainsResName[res]) - 1);
765     ChainsResName[res][sizeof(ChainsResName[res]) - 1] = '\0';
766 tim 2440 DefineMonomer(&NDecisionTree,res,Nucleotides[i].data);
767     res++;
768 gezelter 3057 }
769 tim 2440
770     bitmasks = NULL;
771     hetflags = NULL;
772     atomids = NULL;
773     resids = NULL;
774     resnos = NULL;
775     sernos = NULL;
776     hcounts = NULL;
777     chains = NULL;
778     flags = NULL;
779 gezelter 3057 }
780 tim 2440
781 gezelter 3057 OBChainsParser::~OBChainsParser(void)
782     {
783     DeleteByteCode((ByteCode*)PDecisionTree);
784     DeleteByteCode((ByteCode*)NDecisionTree);
785     }
786 tim 2440
787 gezelter 3057 //////////////////////////////////////////////////////////////////////////////
788     // Setup / Cleanup Functions
789     //////////////////////////////////////////////////////////////////////////////
790 tim 2440
791 gezelter 3057 //! Setup parsing for this molecule --
792     void OBChainsParser::SetupMol(OBMol &mol)
793     {
794 tim 2440 CleanupMol();
795    
796     int i;
797     int asize = mol.NumAtoms();
798     int bsize = mol.NumBonds();
799    
800     bitmasks = new unsigned short[asize];
801     resids = new unsigned char[asize];
802     flags = new unsigned char[bsize];
803     hetflags = new bool[asize];
804     atomids = new short[asize];
805     resnos = new short[asize];
806     sernos = new short[asize];
807     hcounts = new char[asize];
808     chains = new char[asize];
809    
810     memset(bitmasks, 0, sizeof(unsigned short) * asize);
811     memset(resids, 0, sizeof(unsigned char) * asize);
812     memset(hetflags, 0, sizeof(bool) * asize);
813     memset(resnos, 0, sizeof(short) * asize);
814     memset(sernos, 0, sizeof(short) * asize);
815     memset(hcounts, 0, sizeof(char) * asize);
816     memset(chains, ' ', sizeof(char) * asize);
817    
818     memset(flags, 0, sizeof(unsigned char) * bsize);
819    
820     for ( i = 0 ; i < asize ; i++ )
821 gezelter 3057 {
822 tim 2440 atomids[i] = -1;
823 gezelter 3057 }
824     }
825 tim 2440
826 gezelter 3057 //! Clean up any molecular data left in memory -- frees all memory afterwards
827     //! Used by OBChainsParser::SetupMol()
828     void OBChainsParser::CleanupMol(void)
829     {
830 tim 2440 if (bitmasks != NULL)
831 gezelter 3057 {
832 tim 2440 delete bitmasks;
833     bitmasks = NULL;
834 gezelter 3057 }
835 tim 2440 if (hetflags != NULL)
836 gezelter 3057 {
837 tim 2440 delete hetflags;
838     hetflags = NULL;
839 gezelter 3057 }
840 tim 2440 if (atomids != NULL)
841 gezelter 3057 {
842 tim 2440 delete atomids;
843     atomids = NULL;
844 gezelter 3057 }
845 tim 2440 if (resids != NULL)
846 gezelter 3057 {
847 tim 2440 delete resids;
848     resids = NULL;
849 gezelter 3057 }
850 tim 2440 if (resnos != NULL)
851 gezelter 3057 {
852 tim 2440 delete resnos;
853     resnos = NULL;
854 gezelter 3057 }
855 tim 2440 if (sernos != NULL)
856 gezelter 3057 {
857 tim 2440 delete sernos;
858     sernos = NULL;
859 gezelter 3057 }
860 tim 2440 if (hcounts != NULL)
861 gezelter 3057 {
862 tim 2440 delete hcounts;
863     hcounts = NULL;
864 gezelter 3057 }
865 tim 2440 if (chains != NULL)
866 gezelter 3057 {
867 tim 2440 delete chains;
868     chains = NULL;
869 gezelter 3057 }
870 tim 2440 if (flags != NULL)
871 gezelter 3057 {
872 tim 2440 delete flags;
873     flags = NULL;
874 gezelter 3057 }
875     }
876 tim 2440
877 gezelter 3057 //! Clear all residue information for a supplied molecule
878     void OBChainsParser::ClearResidueInformation(OBMol &mol)
879     {
880 tim 2440 OBResidue *residue;
881     vector<OBResidue*> residues;
882     vector<OBResidue*>::iterator r;
883    
884     for (residue = mol.BeginResidue(r) ; residue ; residue = mol.NextResidue(r))
885 gezelter 3057 residues.push_back(residue);
886 tim 2440
887     for ( unsigned int i = 0 ; i < residues.size() ; i++ )
888 gezelter 3057 mol.DeleteResidue(residues[i]);
889 tim 2440
890     residues.clear();
891 gezelter 3057 }
892 tim 2440
893 gezelter 3057 void OBChainsParser::SetResidueInformation(OBMol &mol, bool nukeSingleResidue)
894     {
895     char buffer[BUFF_SIZE];
896 tim 2440 char *symbol;
897     string atomid, name;
898    
899     OBAtom *atom;
900     OBResidue *residue;
901     map<short, OBResidue *> resmap;
902    
903     int size = mol.NumAtoms();
904     for ( int i = 0 ; i < size ; i++ )
905 gezelter 3057 {
906 tim 2440 atom = mol.GetAtom(i+1); // WARNING: ATOM INDEX ISSUE
907    
908     if (atomids[i] == -1)
909 gezelter 3057 {
910 tim 2440 symbol = etab.GetSymbol(atom->GetAtomicNum());
911 gezelter 3057 if( symbol[1] )
912     {
913     buffer[0] = symbol[0];
914     buffer[1] = (char) toupper(symbol[1]);
915     }
916     else
917     {
918     buffer[0] = ' ';
919     buffer[1] = symbol[0];
920     }
921     buffer[2] = ' ';
922     buffer[3] = ' ';
923     buffer[4] = '\0';
924     }
925 tim 2440 else if (atom->IsHydrogen())
926 gezelter 3057 {
927 tim 2440 if (hcounts[i])
928 gezelter 3057 sprintf(buffer, "%cH%.2s", hcounts[i]+'0', ChainsAtomName[atomids[i]]+2);
929 tim 2440 else
930 gezelter 3057 sprintf(buffer, "H%.2s", ChainsAtomName[atomids[i]]+2);
931     }
932 tim 2440 else
933 gezelter 3057 sprintf(buffer, "%.4s", ChainsAtomName[atomids[i]]);
934 tim 2440
935     if (buffer[3] == ' ')
936 gezelter 3057 buffer[3] = '\0';
937 tim 2440
938     atomid = (buffer[0] == ' ') ? buffer + 1 : buffer;
939    
940     if (resmap.find(resnos[i]) != resmap.end())
941 gezelter 3057 {
942 tim 2440 residue = resmap[resnos[i]];
943     residue->AddAtom(atom);
944     residue->SetAtomID(atom, atomid);
945     residue->SetHetAtom(atom, hetflags[i]);
946     residue->SetSerialNum(atom, sernos[i]);
947 gezelter 3057 }
948 tim 2440 else
949 gezelter 3057 {
950 tim 2440 name = ChainsResName[resids[i]];
951     residue = mol.NewResidue();
952    
953     residue->SetName(name);
954     residue->SetNum(resnos[i]);
955     residue->SetChain(chains[i]);
956     residue->SetChainNum((chains[i] > 'A') ? (int)(chains[i] - 'A') : 1);
957    
958     residue->AddAtom(atom);
959     residue->SetAtomID(atom, atomid);
960     residue->SetHetAtom(atom, hetflags[i]);
961     residue->SetSerialNum(atom, sernos[i]);
962    
963     resmap[resnos[i]] = residue;
964 gezelter 3057 }
965     }
966 tim 2440
967     if (mol.NumResidues() == 1 && nukeSingleResidue)
968 gezelter 3057 mol.DeleteResidue(mol.GetResidue(0));
969     else if (mol.NumResidues() == 1
970     && (mol.GetResidue(0))->GetName() == "UNK")
971     mol.DeleteResidue(mol.GetResidue(0));
972     }
973 tim 2440
974 gezelter 3057 ////////////////////////////////////////////////////////////////////////////////
975     // Perception Functions
976     ////////////////////////////////////////////////////////////////////////////////
977 tim 2440
978 gezelter 3057 bool OBChainsParser::PerceiveChains(OBMol &mol, bool nukeSingleResidue)
979     {
980 tim 2440 bool result = true;
981    
982     SetupMol(mol);
983     ClearResidueInformation(mol);
984    
985     result = DetermineHetAtoms(mol) && result;
986     result = DetermineConnectedChains(mol) && result;
987     result = DeterminePeptideBackbone(mol) && result;
988     result = DeterminePeptideSidechains(mol) && result;
989     result = DetermineNucleicBackbone(mol) && result;
990     result = DetermineNucleicSidechains(mol) && result;
991     result = DetermineHydrogens(mol) && result;
992    
993     SetResidueInformation(mol, nukeSingleResidue);
994     CleanupMol();
995    
996 tim 2518 obErrorLog.ThrowError(__func__,
997 gezelter 3057 "Ran OpenBabel::PerceiveChains", obAuditMsg);
998 tim 2440
999     return result;
1000 gezelter 3057 }
1001 tim 2440
1002 gezelter 3057 ////////////////////////////////////////////////////////////////////////////////
1003     // Hetero Atom Perception
1004     ////////////////////////////////////////////////////////////////////////////////
1005 tim 2440
1006 gezelter 3057 bool OBChainsParser::DetermineHetAtoms(OBMol &mol)
1007     {
1008 tim 2440 OBAtom *atom;
1009     vector<OBNodeBase *>::iterator a;
1010     for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1011 gezelter 3057 if (!atom->IsHydrogen() && atom->GetValence() == 0)
1012 tim 2440 {
1013 gezelter 3057 // find un-connected atoms (e.g., HOH oxygen atoms)
1014     // if it's not an oxygen, it's probably some ligand
1015     resids[atom->GetIdx()-1] = (atom->IsOxygen()) ? 1 : 2;
1016     hetflags[atom->GetIdx()-1] = true;
1017 tim 2440 }
1018     return true;
1019 gezelter 3057 }
1020 tim 2440
1021 gezelter 3057 ////////////////////////////////////////////////////////////////////////////////
1022     // Connected Chain Perception
1023     ////////////////////////////////////////////////////////////////////////////////
1024 tim 2440
1025 gezelter 3057 bool OBChainsParser::DetermineConnectedChains(OBMol &mol)
1026     {
1027 tim 2440 int resid;
1028     int resno;
1029     int count;
1030     int size;
1031     int i,idx;
1032     int numAtoms;
1033    
1034     resno = 1;
1035     count = 0;
1036     numAtoms = mol.NumAtoms();
1037    
1038     OBAtom *atom;
1039     vector<OBNodeBase *>::iterator a;
1040     for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1041 gezelter 3057 {
1042 tim 2440 idx = atom->GetIdx() - 1;
1043     if (!hetflags[idx] && chains[idx] == ' ' && !atom->IsHydrogen())
1044 gezelter 3057 {
1045 tim 2440 size = RecurseChain(mol, idx, 'A' + count);
1046     if (size < 10)
1047 gezelter 3057 {
1048 tim 2440 if (size == 1 && atom->IsOxygen())
1049 gezelter 3057 resid = 1; /* HOH */
1050 tim 2440 else
1051 gezelter 3057 resid = 2; /* LIG */
1052 tim 2440
1053     for (i = 0 ; i < numAtoms ; i++)
1054 gezelter 3057 {
1055 tim 2440 if (chains[i] == ('A' + count))
1056 gezelter 3057 {
1057 tim 2440 hetflags[i] = true;
1058     resids[i] = resid;
1059     resnos[i] = resno;
1060     chains[i] = ' ';
1061 gezelter 3057 }
1062     }
1063 tim 2440 resno++;
1064 gezelter 3057 }
1065 tim 2440 else
1066 gezelter 3057 count++;
1067     }
1068     }
1069 tim 2440
1070     if( count == 1 )
1071 gezelter 3057 for ( i = 0 ; i < numAtoms ; i++ )
1072     chains[i] = ' ';
1073 tim 2440
1074     return true;
1075 gezelter 3057 }
1076 tim 2440
1077 gezelter 3057 int OBChainsParser::RecurseChain(OBMol &mol, int i, int c)
1078     {
1079 tim 2440 OBAtom *atom, *nbr;
1080     vector<OBEdgeBase *>::iterator b;
1081     int result, index;
1082    
1083     atom = mol.GetAtom(i+1);
1084     if (atom->IsHydrogen() )
1085     return 0;
1086    
1087     result = 1;
1088     chains[i] = c;
1089    
1090     for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1091     {
1092 gezelter 3057 index = nbr->GetIdx() - 1;
1093 tim 2440 if (chains[index] == ' ')
1094 gezelter 3057 result += RecurseChain(mol, index,c);
1095 tim 2440 }
1096    
1097     return (result);
1098 gezelter 3057 }
1099 tim 2440
1100 gezelter 3057 ////////////////////////////////////////////////////////////////////////////////
1101     // Peptide Backbone Perception
1102     ////////////////////////////////////////////////////////////////////////////////
1103 tim 2440
1104 gezelter 3057 bool OBChainsParser::DeterminePeptideBackbone(OBMol &mol)
1105     {
1106 tim 2440 ConstrainBackbone(mol, Peptide, MAXPEPTIDE);
1107    
1108     int i, max = mol.NumAtoms();
1109    
1110     /* Order Peptide Backbone */
1111    
1112     for ( i = 0 ; i < max ; i++ )
1113 gezelter 3057 if (atomids[i] == -1)
1114 tim 2440 {
1115 gezelter 3057 if( bitmasks[i] & BitNTer )
1116 tim 2440 {
1117 gezelter 3057 atomids[i] = AI_N;
1118     TracePeptideChain(mol,i,1);
1119 tim 2440 }
1120 gezelter 3057 else if( (bitmasks[i]&BitNPT) && !(bitmasks[i]&BitN) )
1121 tim 2440 {
1122 gezelter 3057 atomids[i] = AI_N;
1123     TracePeptideChain(mol,i,1);
1124 tim 2440 }
1125     }
1126    
1127     /* Carbonyl Double Bond */
1128    
1129     OBBond *bond;
1130     vector<OBEdgeBase*>::iterator b;
1131     for (bond = mol.BeginBond(b) ; bond ; bond = mol.NextBond(b))
1132 gezelter 3057 {
1133 tim 2440 if ((atomids[bond->GetBeginAtomIdx()-1] == 2 && atomids[bond->GetEndAtomIdx()-1] == 3) ||
1134 gezelter 3057 (atomids[bond->GetBeginAtomIdx()-1] == 3 && atomids[bond->GetEndAtomIdx()-1] == 2))
1135     flags[bond->GetIdx()] |= BF_DOUBLE;
1136     }
1137 tim 2440
1138     return true;
1139 gezelter 3057 }
1140 tim 2440
1141 gezelter 3057 void OBChainsParser::ConstrainBackbone(OBMol &mol, Template *templ, int tmax)
1142     {
1143 tim 2440 static OBAtom *neighbour[6];
1144     Template *pep;
1145     OBAtom *na,*nb,*nc,*nd;
1146     OBAtom *atom, *nbr;
1147     bool change, result;
1148     int count;
1149     int i,idx;
1150    
1151     vector<OBNodeBase *>::iterator a;
1152     vector<OBEdgeBase *>::iterator b;
1153    
1154     /* First Pass */
1155    
1156     for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1157 gezelter 3057 {
1158 tim 2440 idx = atom->GetIdx() - 1;
1159     bitmasks[idx] = 0;
1160     for ( i = 0 ; i < tmax ; i++ )
1161 gezelter 3057 if ( (static_cast<unsigned int>(templ[i].elem) == atom->GetAtomicNum())
1162     &&
1163     (static_cast<unsigned int>(templ[i].count) == atom->GetHvyValence()))
1164     bitmasks[idx] |= templ[i].flag;
1165     }
1166 tim 2440
1167     /* Second Pass */
1168    
1169     do
1170 gezelter 3057 {
1171 tim 2440 change = false;
1172     for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1173 gezelter 3057 {
1174 tim 2440 idx = atom->GetIdx() - 1;
1175     if (bitmasks[idx]) // Determine Neighbours
1176 gezelter 3057 {
1177 tim 2440 count = 0;
1178     for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1179 gezelter 3057 if (!nbr->IsHydrogen())
1180     neighbour[count++] = nbr;
1181 tim 2440
1182     na = neighbour[0];
1183     nb = neighbour[1];
1184     nc = neighbour[2];
1185     nd = neighbour[3];
1186    
1187     for ( i = 0 ; i < tmax ; i++ )
1188 gezelter 3057 if ( templ[i].flag & bitmasks[idx] )
1189 tim 2440 {
1190 gezelter 3057 pep = &templ[i];
1191     result = true;
1192 tim 2440
1193 gezelter 3057 if (count == 4)
1194     result = Match4Constraints(pep,na,nb,nc,nd);
1195     else if (count == 3)
1196     result = Match3Constraints(pep,na,nb,nc);
1197     else if (count == 2)
1198     result = Match2Constraints(pep,na,nb);
1199     else // count == 1
1200     result = MatchConstraint(na,pep->n1);
1201 tim 2440
1202 gezelter 3057 if(result == false)
1203 tim 2440 {
1204 gezelter 3057 bitmasks[idx] &= ~pep->flag;
1205     change = true;
1206 tim 2440 }
1207     }
1208 gezelter 3057 }
1209     }
1210     }
1211 tim 2440 while( change );
1212 gezelter 3057 }
1213 tim 2440
1214 gezelter 3057 bool OBChainsParser::MatchConstraint(OBAtom *atom, int mask)
1215     {
1216     if (atom == NULL)
1217     return (false);
1218 tim 2440
1219     if( mask < 0 )
1220 gezelter 3057 return(atom->GetAtomicNum() == static_cast<unsigned int>(-mask));
1221 tim 2440 else
1222 gezelter 3057 return(((bitmasks[atom->GetIdx()-1]&mask) == 0) ? false : true);
1223     }
1224 tim 2440
1225 gezelter 3057 bool OBChainsParser::Match2Constraints(Template *tmpl, OBAtom *na, OBAtom *nb)
1226     {
1227     if (na == NULL || nb == NULL)
1228     return (false); // don't even try to evaluate it
1229 tim 2440
1230     if( MatchConstraint(na,tmpl->n2) )
1231 gezelter 3057 if( MatchConstraint(nb,tmpl->n1) )
1232     return( true );
1233 tim 2440 if( MatchConstraint(nb,tmpl->n2) )
1234 gezelter 3057 if( MatchConstraint(na,tmpl->n1) )
1235     return( true );
1236 tim 2440 return( false );
1237 gezelter 3057 }
1238 tim 2440
1239 gezelter 3057 bool OBChainsParser::Match3Constraints(Template *tmpl, OBAtom *na, OBAtom *nb, OBAtom *nc)
1240     {
1241     if (na == NULL || nb == NULL || nc == NULL)
1242     return (false); // don't even try to evaluate it
1243 tim 2440
1244     if( MatchConstraint(na,tmpl->n3) )
1245 gezelter 3057 if( Match2Constraints(tmpl,nb,nc) )
1246     return( true );
1247 tim 2440 if( MatchConstraint(nb,tmpl->n3) )
1248 gezelter 3057 if( Match2Constraints(tmpl,na,nc) )
1249     return( true );
1250 tim 2440 if( MatchConstraint(nc,tmpl->n3) )
1251 gezelter 3057 if( Match2Constraints(tmpl,na,nb) )
1252     return( true );
1253 tim 2440 return( false );
1254 gezelter 3057 }
1255 tim 2440
1256 gezelter 3057 bool OBChainsParser::Match4Constraints(Template *tmpl, OBAtom *na, OBAtom *nb, OBAtom *nc, OBAtom *nd)
1257     {
1258     if (na == NULL || nb == NULL || nc == NULL || nd == NULL)
1259     return (false); // don't even try to evaluate it
1260 tim 2440
1261     if( MatchConstraint(na,tmpl->n4) )
1262 gezelter 3057 if( Match3Constraints(tmpl,nb,nc,nd) )
1263     return( true );
1264 tim 2440 if( MatchConstraint(nb,tmpl->n4) )
1265 gezelter 3057 if( Match3Constraints(tmpl,na,nc,nd) )
1266     return( true );
1267 tim 2440 if( MatchConstraint(nc,tmpl->n4) )
1268 gezelter 3057 if( Match3Constraints(tmpl,na,nb,nd) )
1269     return( true );
1270 tim 2440 if( MatchConstraint(nd,tmpl->n4) )
1271 gezelter 3057 if( Match3Constraints(tmpl,na,nb,nc) )
1272     return( true );
1273 tim 2440 return( false );
1274 gezelter 3057 }
1275 tim 2440
1276 gezelter 3057 void OBChainsParser::TracePeptideChain(OBMol &mol, int i, int r)
1277     {
1278 tim 2440 int neighbour[4];
1279     int na,nb,nc;
1280     OBAtom *atom, *nbr;
1281     int count;
1282     int j,k,idx;
1283    
1284     vector<OBEdgeBase *>::iterator b;
1285    
1286     /* Determine Neighbours */
1287    
1288     atom = mol.GetAtom(i+1);
1289     idx = atom->GetIdx() - 1;
1290 gezelter 3057 bitmasks[i] &= BitVisit;
1291 tim 2440
1292     count = 0;
1293     for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1294 gezelter 3057 if (!nbr->IsHydrogen())
1295     neighbour[count++] = nbr->GetIdx()-1;
1296 tim 2440
1297     resnos[idx] = r;
1298    
1299     na = neighbour[0];
1300     nb = neighbour[1];
1301     nc = neighbour[2];
1302    
1303     switch( atomids[i] )
1304 gezelter 3057 {
1305     case(AI_N):
1306     for( j=0; j<count; j++ )
1307     if( bitmasks[neighbour[j]] & BitCAAll )
1308     {
1309     atomids[neighbour[j]] = AI_CA;
1310     if (!(bitmasks[neighbour[j]] & BitVisit))
1311     TracePeptideChain(mol,neighbour[j],r);
1312     }
1313 tim 2440 break;
1314    
1315 gezelter 3057 case(AI_CA):
1316     if( count == 3 )
1317     {
1318     if ( bitmasks[na] & BitNAll )
1319     na = nc;
1320     else if ( bitmasks[nb] & BitNAll )
1321     nb = nc;
1322 tim 2440
1323 gezelter 3057 if ( bitmasks[na] & BitC )
1324     {
1325     j = na;
1326     k = nb;
1327     }
1328     else if ( bitmasks[nb] & BitC )
1329     {
1330     j = nb;
1331     k = na;
1332     }
1333     else if( bitmasks[na] & BitCAll )
1334     {
1335     j = na;
1336     k = nb;
1337     }
1338     else /* bitmasks[nb] & BitCAll */
1339     {
1340     j = nb;
1341     k = na;
1342     }
1343 tim 2440
1344 gezelter 3057 atomids[j] = AI_C;
1345     bitmasks[k] = 0;
1346 tim 2440
1347 gezelter 3057 if (!(bitmasks[j] & BitVisit))
1348     TracePeptideChain(mol,j,r);
1349     }
1350     else /* count == 2 */
1351     {
1352     if ( bitmasks[na] & BitCAll )
1353     {
1354     atomids[na] = AI_C;
1355     if (!(bitmasks[na] & BitVisit))
1356     TracePeptideChain(mol,na,r);
1357     }
1358     else
1359     {
1360     atomids[nb] = AI_C;
1361     if (!(bitmasks[nb] & BitVisit))
1362     TracePeptideChain(mol,nb,r);
1363     }
1364     }
1365 tim 2440 break;
1366    
1367 gezelter 3057 case(AI_C):
1368     k = AI_O;
1369 tim 2440 for ( j = 0; j < count; j++ )
1370 gezelter 3057 {
1371 tim 2440 if ( bitmasks[neighbour[j]] & BitNAll )
1372 gezelter 3057 {
1373 tim 2440 atomids[neighbour[j]] = AI_N;
1374 gezelter 3057 if (!(bitmasks[neighbour[j]] & BitVisit))
1375     TracePeptideChain(mol,neighbour[j],r+1);
1376     }
1377 tim 2440 else if( bitmasks[neighbour[j]] & BitOAll )
1378 gezelter 3057 {
1379 tim 2440 atomids[neighbour[j]] = k;
1380     resnos[neighbour[j]] = r;
1381     k = AI_OXT; /* OXT */
1382 gezelter 3057 }
1383     }
1384 tim 2440 break;
1385 gezelter 3057 }
1386     }
1387 tim 2440
1388 gezelter 3057 ////////////////////////////////////////////////////////////////////////////////
1389     // Peptide Sidechains Perception
1390     ////////////////////////////////////////////////////////////////////////////////
1391 tim 2440
1392 gezelter 3057 bool OBChainsParser::DeterminePeptideSidechains(OBMol &mol)
1393     {
1394 tim 2440 int resid;
1395     int max = mol.NumAtoms();
1396    
1397     for (int i = 0 ; i < max ; i++)
1398 gezelter 3057 if (atomids[i] == AI_CA)
1399 tim 2440 {
1400 gezelter 3057 resid = IdentifyResidue(PDecisionTree, mol, i, resnos[i]);
1401     AssignResidue(mol,resnos[i],chains[i],resid);
1402 tim 2440 }
1403    
1404     return true;
1405 gezelter 3057 }
1406 tim 2440
1407 gezelter 3057 void OBChainsParser::AssignResidue(OBMol &mol, int r, int c, int i)
1408     {
1409 tim 2440 int max = mol.NumAtoms();
1410 gezelter 3057
1411 tim 2440 for (int j = 0 ; j < max ; j++)
1412 gezelter 3057 if ((resnos[j] == r) && (chains[j] == c) && !hetflags[j])
1413     resids[j] = i;
1414     }
1415 tim 2440
1416 gezelter 3057 int OBChainsParser::IdentifyResidue(void *tree, OBMol &mol, int seed, int resno)
1417     {
1418 tim 2440 ByteCode *ptr;
1419    
1420     int AtomCount, BondCount;
1421     int curr,prev,bond;
1422     int bcount;
1423     int i,j;
1424    
1425     ptr = (ByteCode *) tree;
1426     bcount = 0;
1427    
1428     Stack[0].atom = seed;
1429     Stack[0].prev = seed;
1430     StackPtr = 0;
1431    
1432     ResMonoAtom[0] = seed;
1433     AtomCount = 1;
1434     BondCount = 0;
1435    
1436     OBAtom *atom, *nbr;
1437     vector<OBEdgeBase *>::iterator b;
1438    
1439     while( ptr )
1440 gezelter 3057 switch(ptr->type)
1441 tim 2440 {
1442     case(BC_IDENT): curr = Stack[StackPtr-1].atom;
1443 gezelter 3057 if( atomids[curr] == ptr->ident.value )
1444 tim 2440 {
1445 gezelter 3057 bond = Stack[StackPtr-1].bond;
1446     ResMonoBond[BondCount++] = bond;
1447     ptr = ptr->ident.tcond;
1448     StackPtr--;
1449 tim 2440 }
1450 gezelter 3057 else
1451     ptr = ptr->ident.fcond;
1452     break;
1453 tim 2440
1454     case(BC_LOCAL): curr = Stack[StackPtr-1].atom;
1455 gezelter 3057 if( curr == ResMonoAtom[ptr->local.value] )
1456 tim 2440 {
1457 gezelter 3057 bond = Stack[StackPtr-1].bond;
1458     ResMonoBond[BondCount++] = bond;
1459     ptr = ptr->local.tcond;
1460     StackPtr--;
1461 tim 2440 }
1462 gezelter 3057 else
1463     ptr = ptr->local.fcond;
1464     break;
1465 tim 2440
1466     case(BC_ELEM): curr = Stack[StackPtr-1].atom;
1467 gezelter 3057 if( mol.GetAtom(curr+1)->GetAtomicNum() == static_cast<unsigned int>(ptr->elem.value)
1468 tim 2440 )
1469     {
1470 gezelter 3057 bond = Stack[StackPtr-1].bond;
1471     ResMonoAtom[AtomCount++] = curr;
1472     ResMonoBond[BondCount++] = bond;
1473     resnos[curr] = resno;
1474     ptr = ptr->elem.tcond;
1475     StackPtr--;
1476 tim 2440 }
1477 gezelter 3057 else
1478     ptr = ptr->elem.fcond;
1479     break;
1480 tim 2440
1481     case(BC_EVAL): bcount = 0;
1482 gezelter 3057 curr = Stack[StackPtr].atom;
1483     prev = Stack[StackPtr].prev;
1484 tim 2440
1485 gezelter 3057 atom = mol.GetAtom(curr+1); // WARNING, potential atom index issue
1486     for (nbr = atom->BeginNbrAtom(b); nbr; nbr = atom->NextNbrAtom(b))
1487     {
1488     if (nbr->IsHydrogen())
1489     continue;
1490 tim 2440
1491 gezelter 3057 j = nbr->GetIdx() - 1;
1492     if (!((curr == prev) && bitmasks[j]) && (j != prev))
1493     {
1494     Stack[StackPtr].prev = curr;
1495     Stack[StackPtr].atom = j;
1496     Stack[StackPtr].bond = (*b)->GetIdx();
1497     StackPtr++;
1498     bcount++;
1499     }
1500     }
1501 tim 2440
1502 gezelter 3057 ptr = ptr->eval.next;
1503     break;
1504    
1505 tim 2440 case(BC_COUNT):
1506 gezelter 3057 if( bcount == ptr->count.value )
1507     {
1508     ptr = ptr->count.tcond;
1509     }
1510     else
1511     ptr = ptr->count.fcond;
1512     break;
1513 tim 2440
1514     case(BC_ASSIGN):
1515 gezelter 3057 for( i=0; i<AtomCount; i++ )
1516     if( !bitmasks[ResMonoAtom[i]] )
1517     {
1518     j = ptr->assign.atomid[i];
1519     atomids[ResMonoAtom[i]] = j;
1520     }
1521     for( i=0; i<BondCount; i++ )
1522 tim 2440 {
1523 gezelter 3057 j = ptr->assign.bflags[i];
1524     flags[ResMonoBond[i]] = j;
1525 tim 2440 }
1526 gezelter 3057 return( ptr->assign.resid );
1527     break;
1528 tim 2440
1529     default: /* Illegal Instruction! */
1530 gezelter 3057 return( 0 );
1531 tim 2440 }
1532     return 0;
1533 gezelter 3057 }
1534 tim 2440
1535 gezelter 3057 ////////////////////////////////////////////////////////////////////////////////
1536     // Nucleic Backbone Perception
1537     ////////////////////////////////////////////////////////////////////////////////
1538 tim 2440
1539 gezelter 3057 bool OBChainsParser::DetermineNucleicBackbone(OBMol &mol)
1540     {
1541 tim 2440 ConstrainBackbone(mol, Nucleotide, MAXNUCLEIC);
1542    
1543     int i, max = mol.NumAtoms();
1544    
1545     /* Order Nucleic Backbone */
1546    
1547     for( i = 0 ; i < max ; i++ )
1548 gezelter 3057 if( atomids[i] == -1 )
1549 tim 2440 {
1550 gezelter 3057 if( bitmasks[i] & BitPTer )
1551 tim 2440 {
1552 gezelter 3057 atomids[i] = AI_P;
1553     TraceNucleicChain(mol,i,1);
1554 tim 2440 }
1555 gezelter 3057 else if( bitmasks[i] & BitO5Ter )
1556 tim 2440 {
1557 gezelter 3057 atomids[i] = AI_O5;
1558     TraceNucleicChain(mol,i,1);
1559 tim 2440 }
1560     }
1561    
1562     return true;
1563 gezelter 3057 }
1564 tim 2440
1565 gezelter 3057 void OBChainsParser::TraceNucleicChain(OBMol &mol, int i, int r)
1566     {
1567 tim 2440 int neighbour[4];
1568     int na,nb,nc;
1569     int count;
1570     int j,k;
1571    
1572     OBAtom *atom, *nbr;
1573     vector<OBEdgeBase *>::iterator b;
1574    
1575     count = 0;
1576     atom = mol.GetAtom(i + 1);
1577     for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1578 gezelter 3057 if (!nbr->IsHydrogen())
1579     neighbour[count++] = nbr->GetIdx() - 1;
1580 tim 2440
1581     resnos[i] = r;
1582 gezelter 3057 bitmasks[i] &= BitVisit;
1583 tim 2440
1584     na = neighbour[0];
1585     nb = neighbour[1];
1586     nc = neighbour[2];
1587    
1588     switch( atomids[i] )
1589 gezelter 3057 {
1590     case(AI_P):
1591     k = AI_O1P; /* O1P */
1592 tim 2440 for( j=0; j<count; j++ )
1593 gezelter 3057 {
1594 tim 2440 if( bitmasks[neighbour[j]] & BitO5 )
1595 gezelter 3057 {
1596 tim 2440 atomids[neighbour[j]] = AI_O5;
1597 gezelter 3057 if (!(bitmasks[neighbour[j]] & BitVisit))
1598     TraceNucleicChain(mol,neighbour[j],r);
1599     }
1600 tim 2440 else if( bitmasks[neighbour[j]] & BitOP )
1601 gezelter 3057 {
1602 tim 2440 atomids[neighbour[j]] = k;
1603     resnos[neighbour[j]] = r;
1604     k = AI_O2P; /* O2P */
1605 gezelter 3057 }
1606     }
1607 tim 2440
1608     break;
1609    
1610 gezelter 3057 case(AI_O5):
1611     for( j=0; j<count; j++ )
1612     if( bitmasks[neighbour[j]] & BitC5 )
1613     {
1614     atomids[neighbour[j]] = AI_C5;
1615     if (!(bitmasks[neighbour[j]] & BitVisit))
1616     TraceNucleicChain(mol,neighbour[j],r);
1617     }
1618 tim 2440
1619     break;
1620    
1621 gezelter 3057 case(AI_C5):
1622     for( j=0 ; j<count; j++ )
1623     if( bitmasks[neighbour[j]] & BitC4 )
1624 tim 2440 {
1625 gezelter 3057 atomids[neighbour[j]] = AI_C4;
1626     if (!(bitmasks[neighbour[j]] & BitVisit))
1627     TraceNucleicChain(mol,neighbour[j],r);
1628 tim 2440 }
1629    
1630     break;
1631    
1632 gezelter 3057 case(AI_C4):
1633     for( j=0; j<count; j++ )
1634     {
1635     if( bitmasks[neighbour[j]] & BitC3 )
1636     {
1637     atomids[neighbour[j]] = AI_C3;
1638     if (!(bitmasks[neighbour[j]] & BitVisit))
1639     TraceNucleicChain(mol,neighbour[j],r);
1640     }
1641     else if( bitmasks[neighbour[j]] & BitO4 )
1642     {
1643     atomids[neighbour[j]] = AI_O4;
1644     resnos[neighbour[j]] = r;
1645     }
1646     }
1647 tim 2440
1648     break;
1649    
1650 gezelter 3057 case(AI_C3):
1651     for( j=0; j<count; j++ )
1652     {
1653     if( bitmasks[neighbour[j]] & BitO3All )
1654     {
1655     atomids[neighbour[j]] = AI_O3;
1656     if (!(bitmasks[neighbour[j]] & BitVisit))
1657     TraceNucleicChain(mol,neighbour[j],r);
1658     }
1659     else if( bitmasks[neighbour[j]] & BitC2All )
1660     {
1661     atomids[neighbour[j]] = AI_C2;
1662     if (!(bitmasks[neighbour[j]] & BitVisit))
1663     TraceNucleicChain(mol,neighbour[j],r);
1664     }
1665     }
1666 tim 2440
1667     break;
1668    
1669 gezelter 3057 case(AI_O3):
1670     for( j=0; j<count; j++ )
1671     if( bitmasks[neighbour[j]] & BitP )
1672 tim 2440 {
1673 gezelter 3057 atomids[neighbour[j]] = AI_P;
1674     if (!(bitmasks[neighbour[j]] & BitVisit))
1675     TraceNucleicChain(mol,neighbour[j],r+1);
1676 tim 2440 }
1677    
1678     break;
1679    
1680 gezelter 3057 case(AI_C2):
1681     for( j=0; j<count; j++ )
1682     {
1683     if( bitmasks[neighbour[j]] & BitC1 )
1684     {
1685     atomids[neighbour[j]] = AI_C1;
1686     resnos[neighbour[j]] = r;
1687     }
1688     else if( bitmasks[neighbour[j]] & BitO2 )
1689     {
1690     atomids[neighbour[j]] = AI_O2;
1691     resnos[neighbour[j]] = r;
1692     }
1693     }
1694 tim 2440
1695 gezelter 3057 break;
1696     }
1697     }
1698    
1699     ////////////////////////////////////////////////////////////////////////////////
1700     // Nucleic Sidechains Perception
1701     ////////////////////////////////////////////////////////////////////////////////
1702    
1703     bool OBChainsParser::DetermineNucleicSidechains(OBMol &mol)
1704     {
1705 tim 2440 for( unsigned int i = 0 ; i < mol.NumAtoms() ; i++ )
1706 gezelter 3057 if( atomids[i] == 49 )
1707 tim 2440 {
1708 gezelter 3057 int resid = IdentifyResidue(NDecisionTree,mol,i,resnos[i]);
1709     AssignResidue(mol,resnos[i],chains[i],resid);
1710 tim 2440 }
1711    
1712     return true;
1713 gezelter 3057 }
1714 tim 2440
1715 gezelter 3057 ////////////////////////////////////////////////////////////////////////////////
1716     // Hydrogens Perception
1717     ////////////////////////////////////////////////////////////////////////////////
1718 tim 2440
1719 gezelter 3057 bool OBChainsParser::DetermineHydrogens(OBMol &mol)
1720     {
1721 tim 2440 OBAtom *atom, *nbr;
1722     int idx,sidx;
1723    
1724     int max = mol.NumAtoms();
1725     for ( int i = 0 ; i < max ; i++ )
1726 gezelter 3057 hcounts[i] = 0;
1727 tim 2440
1728     /* First Pass */
1729    
1730     vector<OBNodeBase*>::iterator a;
1731     vector<OBEdgeBase*>::iterator b;
1732    
1733     for(atom = mol.BeginAtom(a); atom ; atom = mol.NextAtom(a))
1734 gezelter 3057 if(atom->IsHydrogen())
1735 tim 2440 {
1736 gezelter 3057 nbr = atom->BeginNbrAtom(b);
1737     if (nbr != NULL)
1738 tim 2440 {
1739 gezelter 3057 idx = atom->GetIdx() - 1;
1740     sidx = nbr->GetIdx() - 1;
1741 tim 2440
1742 gezelter 3057 hcounts[idx] = ++hcounts[sidx];
1743     hetflags[idx] = hetflags[sidx];
1744     atomids[idx] = atomids[sidx];
1745     resids[idx] = resids[sidx];
1746     resnos[idx] = resnos[sidx];
1747 tim 2440 }
1748     }
1749    
1750     /* Second Pass */
1751    
1752     for(atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1753 gezelter 3057 if (atom->IsHydrogen())
1754 tim 2440 {
1755 gezelter 3057 nbr = atom->BeginNbrAtom(b);
1756     if (nbr != NULL && hcounts[nbr->GetIdx()-1] == 1)
1757     hcounts[atom->GetIdx()-1] = 0;
1758 tim 2440 }
1759    
1760     return true;
1761 gezelter 3057 }
1762 tim 2440
1763 gezelter 3057 ////////////////////////////////////////////////////////////////////////////////
1764     // Utility Functions
1765     ////////////////////////////////////////////////////////////////////////////////
1766 tim 2440
1767 gezelter 3057 // validated
1768     void OBChainsParser::DefineMonomer(void **tree, int resid, char *smiles)
1769     {
1770 tim 2440 int i;
1771    
1772     MonoAtomCount = 0;
1773     MonoBondCount = 0;
1774    
1775     ParseSmiles(smiles,-1);
1776    
1777     for( i=0; i<MonoBondCount; i++ )
1778 gezelter 3057 MonoBond[i].index = -1;
1779 tim 2440 for( i=0; i<MonoAtomCount; i++ )
1780 gezelter 3057 MonoAtom[i].index = -1;
1781 tim 2440 AtomIndex = BondIndex = 0;
1782    
1783     StackPtr = 0;
1784     GenerateByteCodes((ByteCode**)tree, resid, 0, 0, 0 );
1785 gezelter 3057 }
1786 tim 2440
1787 gezelter 3057 int OBChainsParser::IdentifyElement(char *ptr)
1788     {
1789 tim 2440 int ch;
1790    
1791     ch = toupper(ptr[1]);
1792     switch( toupper(ptr[0]) )
1793 gezelter 3057 {
1794     case(' '): switch( ch )
1795     {
1796     case('B'): return( 5 );
1797     case('C'): return( 6 );
1798     case('D'): return( 1 );
1799     case('F'): return( 9 );
1800     case('H'): return( 1 );
1801     case('I'): return( 53 );
1802     case('K'): return( 19 );
1803     case('L'): return( 1 );
1804     case('N'): return( 7 );
1805     case('O'): return( 8 );
1806     case('P'): return( 15 );
1807     case('S'): return( 16 );
1808     case('U'): return( 92 );
1809     case('V'): return( 23 );
1810     case('W'): return( 74 );
1811     case('Y'): return( 39 );
1812     }
1813 tim 2440 break;
1814    
1815 gezelter 3057 case('A'): switch( ch )
1816     {
1817     case('C'): return( 89 );
1818     case('G'): return( 47 );
1819     case('L'): return( 13 );
1820     case('M'): return( 95 );
1821     case('R'): return( 18 );
1822     case('S'): return( 33 );
1823     case('T'): return( 85 );
1824     case('U'): return( 79 );
1825     }
1826 tim 2440 break;
1827    
1828 gezelter 3057 case('B'): switch( ch )
1829     {
1830     case('A'): return( 56 );
1831     case('E'): return( 4 );
1832     case('I'): return( 83 );
1833     case('K'): return( 97 );
1834     case('R'): return( 35 );
1835     case(' '): return( 5 );
1836     }
1837 tim 2440 break;
1838    
1839 gezelter 3057 case('C'): switch( ch )
1840     {
1841     case('A'): return( 20 );
1842     case('D'): return( 48 );
1843     case('E'): return( 58 );
1844     case('F'): return( 98 );
1845     case('L'): return( 17 );
1846     case('M'): return( 96 );
1847     case('O'): return( 27 );
1848     case('R'): return( 24 );
1849     case('S'): return( 55 );
1850     case('U'): return( 29 );
1851     case(' '): return( 6 );
1852     }
1853 tim 2440 break;
1854    
1855 gezelter 3057 case('D'): if( ch=='Y' )
1856     {
1857     return( 66 );
1858     }
1859     else if( ch==' ' )
1860     return( 1 );
1861 tim 2440 break;
1862    
1863 gezelter 3057 case('E'): if( ch=='R' )
1864     {
1865     return( 68 );
1866     }
1867     else if( ch=='S' )
1868     {
1869     return( 99 );
1870     }
1871     else if( ch=='U' )
1872     return( 63 );
1873 tim 2440 break;
1874    
1875 gezelter 3057 case('F'): if( ch=='E' )
1876     {
1877     return( 26 );
1878     }
1879     else if( ch=='M' )
1880     {
1881     return( 100 );
1882     }
1883     else if( ch=='R' )
1884     {
1885     return( 87 );
1886     }
1887     else if( ch=='F' )
1888     return( 9 );
1889 tim 2440 break;
1890    
1891 gezelter 3057 case('G'): if( ch=='A' )
1892     {
1893     return( 31 );
1894     }
1895     else if( ch=='D' )
1896     {
1897     return( 64 );
1898     }
1899     else if( ch=='E' )
1900     return( 32 );
1901 tim 2440 break;
1902    
1903 gezelter 3057 case('H'): if( ch=='E' )
1904     {
1905     return( 2 );
1906     }
1907     else if( ch=='F' )
1908     {
1909     return( 72 );
1910     }
1911     else if( ch=='G' )
1912     {
1913     return( 80 );
1914     }
1915     else if( ch=='O' )
1916     {
1917     return( 67 );
1918     }
1919     else if( ch==' ' )
1920     return( 1 );
1921 tim 2440 break;
1922    
1923 gezelter 3057 case('I'): if( ch=='N' )
1924     {
1925     return( 49 );
1926     }
1927     else if( ch=='R' )
1928     {
1929     return( 77 );
1930     }
1931     else if( ch==' ' )
1932     return( 53 );
1933 tim 2440 break;
1934    
1935 gezelter 3057 case('K'): if( ch=='R' )
1936     {
1937     return( 36 );
1938     }
1939     else if( ch==' ' )
1940     return( 19 );
1941 tim 2440 break;
1942    
1943 gezelter 3057 case('L'): if( ch=='A' )
1944     {
1945     return( 57 );
1946     }
1947     else if( ch=='I' )
1948     {
1949     return( 3 );
1950     }
1951     else if( (ch=='R') || (ch=='W') )
1952     {
1953     return( 103 );
1954     }
1955     else if( ch=='U' )
1956     {
1957     return( 71 );
1958     }
1959     else if( ch==' ' )
1960     return( 1 );
1961 tim 2440 break;
1962    
1963 gezelter 3057 case('M'): if( ch=='D' )
1964     {
1965     return( 101 );
1966     }
1967     else if( ch=='G' )
1968     {
1969     return( 12 );
1970     }
1971     else if( ch=='N' )
1972     {
1973     return( 25 );
1974     }
1975     else if( ch=='O' )
1976     return( 42 );
1977 tim 2440 break;
1978    
1979 gezelter 3057 case('N'): switch( ch )
1980     {
1981     case('A'): return( 11 );
1982     case('B'): return( 41 );
1983     case('D'): return( 60 );
1984     case('E'): return( 10 );
1985     case('I'): return( 28 );
1986     case('O'): return( 102 );
1987     case('P'): return( 93 );
1988     case(' '): return( 7 );
1989     }
1990 tim 2440 break;
1991    
1992 gezelter 3057 case('O'): if( ch=='S' )
1993     {
1994     return( 76 );
1995     }
1996     else if( ch==' ' )
1997     return( 8 );
1998 tim 2440 break;
1999    
2000 gezelter 3057 case('P'): switch( ch )
2001     {
2002     case('A'): return( 91 );
2003     case('B'): return( 82 );
2004     case('D'): return( 46 );
2005     case('M'): return( 61 );
2006     case('O'): return( 84 );
2007     case('R'): return( 59 );
2008     case('T'): return( 78 );
2009     case('U'): return( 94 );
2010     case(' '): return( 15 );
2011     }
2012 tim 2440 break;
2013    
2014 gezelter 3057 case('R'): switch( ch )
2015     {
2016     case('A'): return( 88 );
2017     case('B'): return( 37 );
2018     case('E'): return( 75 );
2019     case('H'): return( 45 );
2020     case('N'): return( 86 );
2021     case('U'): return( 44 );
2022     }
2023 tim 2440 break;
2024    
2025 gezelter 3057 case('S'): switch( ch )
2026     {
2027     case('B'): return( 51 );
2028     case('C'): return( 21 );
2029     case('E'): return( 34 );
2030     case('I'): return( 14 );
2031     case('M'): return( 62 );
2032     case('N'): return( 50 );
2033     case('R'): return( 38 );
2034     case(' '): return( 16 );
2035     }
2036 tim 2440 break;
2037    
2038 gezelter 3057 case('T'): switch( ch )
2039     {
2040     case('A'): return( 73 );
2041     case('B'): return( 65 );
2042     case('C'): return( 43 );
2043     case('E'): return( 52 );
2044     case('H'): return( 90 );
2045     case('I'): return( 22 );
2046     case('L'): return( 81 );
2047     case('M'): return( 69 );
2048     }
2049 tim 2440 break;
2050    
2051 gezelter 3057 case('U'): if( ch==' ' )
2052     return( 92 );
2053 tim 2440 break;
2054    
2055 gezelter 3057 case('V'): if( ch==' ' )
2056     return( 23 );
2057 tim 2440 break;
2058    
2059 gezelter 3057 case('W'): if( ch==' ' )
2060     return( 74 );
2061 tim 2440 break;
2062    
2063 gezelter 3057 case('X'): if( ch=='E' )
2064     return( 54 );
2065 tim 2440 break;
2066    
2067 gezelter 3057 case('Y'): if( ch=='B' )
2068     {
2069     return( 70 );
2070     }
2071     else if( ch==' ' )
2072     return( 39 );
2073 tim 2440 break;
2074    
2075 gezelter 3057 case('Z'): if( ch=='N' )
2076     {
2077     return( 30 );
2078     }
2079     else if( ch=='R' )
2080     return( 40 );
2081 tim 2440 break;
2082 gezelter 3057 }
2083 tim 2440
2084     if( (*ptr>='0') && (*ptr<='9') )
2085 gezelter 3057 if( (ch=='H') || (ch=='D') )
2086     return( 1 ); /* Hydrogen */
2087 tim 2440
2088     return( 0 );
2089 gezelter 3057 }
2090 tim 2440
2091 gezelter 3057 char *OBChainsParser::ParseSmiles(char *ptr, int prev)
2092     {
2093 tim 2440 char *name;
2094     int atomid;
2095     int next;
2096     int type;
2097     int ch;
2098    
2099     type = 0;
2100     while( (ch = *ptr++) )
2101 gezelter 3057 {
2102 tim 2440 switch( ch )
2103 gezelter 3057 {
2104     case('-'): type = BF_SINGLE;
2105 tim 2440 break;
2106 gezelter 3057 case('='): type = BF_DOUBLE;
2107 tim 2440 break;
2108 gezelter 3057 case('#'): type = BF_TRIPLE;
2109 tim 2440 break;
2110 gezelter 3057 case('^'): type = BF_SINGLE|BF_AROMATIC;
2111 tim 2440 break;
2112 gezelter 3057 case('~'): type = BF_DOUBLE|BF_AROMATIC;
2113 tim 2440 break;
2114    
2115 gezelter 3057 case(')'): return( ptr );
2116     case('.'): prev = -1;
2117 tim 2440 break;
2118 gezelter 3057 case('('): ptr = ParseSmiles(ptr,prev);
2119 tim 2440 break;
2120    
2121 gezelter 3057 default:
2122 tim 2440 atomid = ch-'0';
2123     while( isdigit(*ptr) )
2124 gezelter 3057 atomid = (atomid*10)+(*ptr++)-'0';
2125 tim 2440
2126     for( next=0; next<MonoAtomCount; next++ )
2127 gezelter 3057 if( MonoAtom[next].atomid == atomid )
2128     break;
2129 tim 2440
2130     if( next == MonoAtomCount )
2131 gezelter 3057 {
2132 tim 2440 name = ChainsAtomName[atomid];
2133     MonoAtom[next].elem = IdentifyElement(name);
2134     MonoAtom[next].atomid = atomid;
2135     MonoAtom[next].bcount = 0;
2136     MonoAtomCount++;
2137 gezelter 3057 }
2138 tim 2440
2139     if( prev != -1 )
2140 gezelter 3057 {
2141 tim 2440 MonoBond[MonoBondCount].flag = type;
2142     MonoBond[MonoBondCount].src = prev;
2143     MonoBond[MonoBondCount].dst = next;
2144     MonoBondCount++;
2145    
2146     MonoAtom[prev].bcount++;
2147     MonoAtom[next].bcount++;
2148 gezelter 3057 }
2149 tim 2440 prev = next;
2150 gezelter 3057 }
2151     }
2152 tim 2440 return( ptr-1 );
2153 gezelter 3057 }
2154 tim 2440
2155     } // end namespace OpenBabel
2156    
2157     //! \file chains.cpp
2158     //! \brief Parse for macromolecule chains and residues.