ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/parsmart.cpp
Revision: 3057
Committed: Thu Oct 19 20:49:05 2006 UTC (17 years, 11 months ago) by gezelter
File size: 97203 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     parsmart.cpp - SMARTS parser.
3    
4     Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5 gezelter 3057 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
6 tim 2440
7     This file is part of the Open Babel project.
8     For more information, see <http://openbabel.sourceforge.net/>
9    
10     This program is free software; you can redistribute it and/or modify
11     it under the terms of the GNU General Public License as published by
12     the Free Software Foundation version 2 of the License.
13    
14     This program is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17     GNU General Public License for more details.
18     ***********************************************************************/
19    
20     #include <ctype.h>
21 gezelter 3057 #include <iostream>
22     #include <iomanip>
23 tim 2440
24     #include "mol.hpp"
25     #include "bitvec.hpp"
26     #include "parsmart.hpp"
27    
28     #ifndef True
29     #define True 1
30     #define False 0
31     #endif
32    
33     /* Strict syntax checking! */
34     // Currently causes problems with aromatic flags in atomtyp.txt
35     // #define STRICT
36     #define VERBOSE
37    
38     #ifdef __sgi
39     #define UnusedArgument(x) ((x)=(x))
40     #else
41     #define UnusedArgument(x)
42     #endif
43    
44     using namespace std;
45    
46     namespace OpenBabel
47     {
48    
49 gezelter 3057 /*! \class OBSmartsPattern
50 tim 2440
51 gezelter 3057 Substructure search is an incredibly useful tool in the context of a
52     small molecule programming library. Having an efficient substructure
53     search engine reduces the amount of hard code needed for molecule
54     perception, as well as increases the flexibility of certain
55     operations. For instance, atom typing can be easily performed based on
56     hard coded rules of element type and bond orders (or
57     hybridization). Alternatively, atom typing can also be done by
58     matching a set of substructure rules read at run time. In the latter
59     case customization based on application (such as changing the pH)
60     becomes a facile operation. Fortunately for Open Babel and its users,
61     Roger Sayle donated a SMARTS parser which became the basis for SMARTS
62     matching in Open Babel.
63 tim 2440
64 gezelter 3057 The SMARTS matcher, or OBSmartsPattern, is a separate object which can
65     match patterns in the OBMol class. The following code demonstrates how
66     to use the OBSmartsPattern class:
67     \code
68     OBMol mol;
69     ...
70     OBSmartsPattern sp;
71     sp.Init("CC");
72     sp.Match(mol);
73     vector<vector<int> > maplist;
74     maplist = sp.GetMapList();
75     //or maplist = sp.GetUMapList();
76     //print out the results
77     vector<vector<int> >::iterator i;
78     vector<int>::iterator j;
79     for (i = maplist.begin();i != maplist.end();i++)
80     {
81     for (j = i->begin();j != i->end();j++)
82 tim 2440 cout << j << ' `;
83 gezelter 3057 cout << endl;
84     }
85     \endcode
86     The preceding code reads in a molecule, initializes a smarts pattern
87     of two single-bonded carbons, and locates all instances of the
88     pattern in the molecule. Note that calling the Match() function
89     does not return the results of the substructure match. The results
90     from a match are stored in the OBSmartsPattern, and a call to
91     GetMapList() or GetUMapList() must be made to extract the
92     results. The function GetMapList() returns all matches of a
93     particular pattern while GetUMapList() returns only the unique
94     matches. For instance, the pattern [OD1]~C~[OD1] describes a
95     carboxylate group. This pattern will match both atom number
96     permutations of the carboxylate, and if GetMapList() is called, both
97     matches will be returned. If GetUMapList() is called only unique
98     matches of the pattern will be returned. A unique match is defined as
99     one which does not cover the identical atoms that a previous match
100     has covered.
101     */
102 tim 2440
103 gezelter 3057 /*============================*/
104     /* Period Table of Elements */
105     /*============================*/
106 tim 2440
107 gezelter 3057 std::vector<std::pair<Pattern*,std::vector<bool> > > RSCACHE; //recursive smarts cache
108 tim 2440
109 gezelter 3057 typedef struct
110     {
111 tim 2440 char *symbol;
112     int organic;
113     int aromflag;
114     double weight;
115 gezelter 3057 }
116     Element;
117 tim 2440
118     #define ELEMMAX 104
119    
120 gezelter 3057 typedef struct
121     {
122 tim 2440 BondExpr *closord[100];
123     int closure[100];
124     int closindex;
125 gezelter 3057 }
126     ParseState;
127 tim 2440
128 gezelter 3057 /*
129     #define ATOMEXPRPOOL 16
130     #define BONDEXPRPOOL 16
131     #define ATOMPOOL 16
132     #define BONDPOOL 16
133     */
134 tim 2440
135     #define ATOMEXPRPOOL 1
136     #define BONDEXPRPOOL 1
137     #define ATOMPOOL 1
138     #define BONDPOOL 1
139    
140 gezelter 3057 /*=====================*/
141     /* BondExpr Bit Sets */
142     /*=====================*/
143 tim 2440
144     #define BF_NONRINGUNSPEC 0x0001
145     #define BF_NONRINGDOWN 0x0002
146     #define BF_NONRINGUP 0x0004
147     #define BF_NONRINGDOUBLE 0x0008
148     #define BF_NONRINGTRIPLE 0x0010
149     #define BF_RINGUNSPEC 0x0020
150     #define BF_RINGDOWN 0x0040
151     #define BF_RINGUP 0x0080
152     #define BF_RINGAROM 0x0100
153     #define BF_RINGDOUBLE 0x0200
154     #define BF_RINGTRIPLE 0x0400
155    
156     #define BS_ALL 0x07FF
157     #define BS_SINGLE 0x00E7
158     #define BS_DOUBLE 0x0208
159     #define BS_TRIPLE 0x0410
160     #define BS_AROM 0x0100
161     #define BS_UP 0x0084
162     #define BS_DOWN 0x0042
163     #define BS_UPUNSPEC 0x00A5
164     #define BS_DOWNUNSPEC 0x0063
165     #define BS_RING 0x07E0
166     #define BS_DEFAULT 0x01E7
167    
168 gezelter 3057 static char *MainPtr;
169     static char *LexPtr;
170 tim 2440
171     #define BUFMAX 1024
172 gezelter 3057 static char Buffer[BUFMAX];
173     static char Descr[BUFMAX];
174 tim 2440
175 gezelter 3057 static bool match(OBMol &mol,Pattern *pat,std::vector<std::vector<int> > &mlist,bool single=false);
176     static bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom);
177     static bool EvalBondExpr(BondExpr *expr,OBBond *bond);
178     static int GetVectorBinding();
179     static int CreateAtom(Pattern*,AtomExpr*,int,int vb=0);
180 tim 2440
181 gezelter 3057 /*=============================*/
182     /* Standard Utility Routines */
183     /*=============================*/
184 tim 2440
185 gezelter 3057 static void FatalAllocationError( char *ptr )
186     {
187     printf("Error: Unable to allocate %s!\n",ptr);
188     // exit(1);
189     }
190 tim 2440
191 gezelter 3057 /*================================*/
192     /* Atom Expression Manipulation */
193     /*================================*/
194 tim 2440
195 gezelter 3057 static void FreePattern( Pattern* );
196     static Pattern *CopyPattern( Pattern* );
197 tim 2440
198 gezelter 3057 static AtomExpr *AllocAtomExpr( void )
199     {
200     register AtomExpr *result;
201 tim 2440
202 gezelter 3057 result = (AtomExpr*)malloc(sizeof(AtomExpr));
203     return result;
204     }
205 tim 2440
206 gezelter 3057 static AtomExpr *CopyAtomExpr( AtomExpr *expr )
207     {
208     register AtomExpr *result;
209 tim 2440
210 gezelter 3057 result = AllocAtomExpr();
211     result->type = expr->type;
212     switch( expr->type )
213     {
214     case(AE_ANDHI):
215     case(AE_ANDLO):
216     case(AE_OR): result->bin.lft = CopyAtomExpr(expr->bin.lft);
217     result->bin.rgt = CopyAtomExpr(expr->bin.rgt);
218     break;
219 tim 2440
220 gezelter 3057 case(AE_NOT): result->mon.arg = CopyAtomExpr(expr->mon.arg);
221     break;
222 tim 2440
223 gezelter 3057 case(AE_RECUR): result->recur.recur = CopyPattern(
224     (Pattern*)expr->recur.recur );
225     break;
226 tim 2440
227 gezelter 3057 case(AE_LEAF): result->leaf.prop = expr->leaf.prop;
228     result->leaf.value = expr->leaf.value;
229     break;
230     }
231     return result;
232     }
233 tim 2440
234 gezelter 3057 static void FreeAtomExpr( AtomExpr *expr )
235     {
236     if( expr )
237     {
238     switch( expr->type )
239     {
240     case(AE_ANDHI):
241     case(AE_ANDLO):
242     case(AE_OR): FreeAtomExpr(expr->bin.lft);
243     FreeAtomExpr(expr->bin.rgt);
244     break;
245 tim 2440
246 gezelter 3057 case(AE_NOT): FreeAtomExpr(expr->mon.arg);
247     break;
248 tim 2440
249 gezelter 3057 case(AE_RECUR): FreePattern( (Pattern*)expr->recur.recur );
250     break;
251     }
252     if (expr)
253     {
254     free(expr);
255     expr = (AtomExpr*)NULL;
256     }
257     }
258     }
259 tim 2440
260 gezelter 3057 static AtomExpr *BuildAtomLeaf( int prop, int val )
261     {
262     register AtomExpr *result;
263 tim 2440
264 gezelter 3057 result = AllocAtomExpr();
265     result->leaf.type = AE_LEAF;
266     result->leaf.prop = prop;
267     result->leaf.value = val;
268     return result;
269     }
270 tim 2440
271 gezelter 3057 static AtomExpr *BuildAtomNot( AtomExpr *expr )
272     {
273     register AtomExpr *result;
274 tim 2440
275 gezelter 3057 result = AllocAtomExpr();
276     result->mon.type = AE_NOT;
277     result->mon.arg = expr;
278     return result;
279     }
280 tim 2440
281 gezelter 3057 static AtomExpr *BuildAtomBin( int op, AtomExpr *lft, AtomExpr *rgt )
282     {
283     register AtomExpr *result;
284 tim 2440
285 gezelter 3057 result = AllocAtomExpr();
286     result->bin.type = op;
287     result->bin.lft = lft;
288     result->bin.rgt = rgt;
289     return result;
290     }
291 tim 2440
292 gezelter 3057 static AtomExpr *BuildAtomRecurs( Pattern *pat )
293     {
294     register AtomExpr *result;
295 tim 2440
296 gezelter 3057 result = AllocAtomExpr();
297     result->recur.type = AE_RECUR;
298     result->recur.recur = (void*)pat;
299     return result;
300     }
301 tim 2440
302 gezelter 3057 static AtomExpr *GenerateElement( int elem )
303     {
304     return BuildAtomLeaf(AL_ELEM,elem);
305     }
306 tim 2440
307 gezelter 3057 static AtomExpr *GenerateAromElem( int elem, int flag )
308     {
309     AtomExpr *expr1;
310     AtomExpr *expr2;
311 tim 2440
312 gezelter 3057 expr1 = BuildAtomLeaf(AL_AROM,flag);
313     expr2 = BuildAtomLeaf(AL_ELEM,elem);
314     return BuildAtomBin(AE_ANDHI,expr1,expr2);
315     }
316 tim 2440
317 gezelter 3057 static int IsInvalidAtom( AtomExpr *expr )
318     {
319     if( !expr )
320     return True;
321     return( (expr->type==AE_LEAF) &&
322     (expr->leaf.prop==AL_CONST)
323     && !expr->leaf.value );
324     }
325 tim 2440
326 gezelter 3057 /*================================*/
327     /* Bond Expression Manipulation */
328     /*================================*/
329 tim 2440
330 gezelter 3057 static BondExpr *AllocBondExpr( void )
331     {
332     register BondExpr *result;
333 tim 2440
334 gezelter 3057 result = (BondExpr*)malloc(sizeof(BondExpr));
335     return result;
336     }
337 tim 2440
338 gezelter 3057 static BondExpr *CopyBondExpr( BondExpr *expr )
339     {
340     register BondExpr *result;
341 tim 2440
342 gezelter 3057 result = AllocBondExpr();
343     result->type = expr->type;
344     switch( expr->type )
345     {
346     case(AE_ANDHI):
347     case(AE_ANDLO):
348     case(AE_OR): result->bin.lft = CopyBondExpr(expr->bin.lft);
349     result->bin.rgt = CopyBondExpr(expr->bin.rgt);
350     break;
351 tim 2440
352 gezelter 3057 case(AE_NOT): result->mon.arg = CopyBondExpr(expr->mon.arg);
353     break;
354 tim 2440
355 gezelter 3057 case(AE_LEAF): result->leaf.prop = expr->leaf.prop;
356     result->leaf.value = expr->leaf.value;
357     break;
358     }
359     return result;
360     }
361 tim 2440
362 gezelter 3057 static bool EquivalentBondExpr( BondExpr *expr1, BondExpr *expr2 )
363     {
364     if (expr1 == NULL && expr2 == NULL)
365     return true;
366     else if (expr1 == NULL && expr2 != NULL)
367     return false;
368     else if (expr1 != NULL && expr2 == NULL)
369     return false;
370    
371     if (expr1->type != expr2->type)
372     return false;
373    
374     bool result = false;
375     switch( expr1->type )
376     {
377     case(AE_ANDHI):
378     case(AE_ANDLO):
379     case(AE_OR):
380     result = (EquivalentBondExpr(expr1->bin.lft, expr2->bin.lft)) &&
381     (EquivalentBondExpr(expr1->bin.rgt, expr2->bin.rgt));
382     break;
383    
384     case(AE_NOT):
385     result = EquivalentBondExpr(expr1->mon.arg, expr2->mon.arg);
386     break;
387    
388     case(AE_LEAF):
389     result = (expr1->leaf.prop == expr2->leaf.prop) &&
390     (expr1->leaf.value == expr2->leaf.value);
391     break;
392     }
393     return result;
394     }
395    
396     static void FreeBondExpr( BondExpr *expr )
397     {
398     if( expr )
399     {
400     switch( expr->type )
401     {
402     case(BE_ANDHI):
403     case(BE_ANDLO):
404     case(BE_OR): FreeBondExpr(expr->bin.lft);
405     FreeBondExpr(expr->bin.rgt);
406     break;
407 tim 2440
408 gezelter 3057 case(BE_NOT): FreeBondExpr(expr->mon.arg);
409     break;
410     }
411 tim 2440
412 gezelter 3057 if (expr)
413     {
414     free(expr);
415     expr = (BondExpr*)NULL;
416     }
417     }
418     }
419 tim 2440
420 gezelter 3057 static BondExpr *BuildBondLeaf( int prop, int val )
421     {
422     register BondExpr *result;
423 tim 2440
424 gezelter 3057 result = AllocBondExpr();
425     result->leaf.type = BE_LEAF;
426     result->leaf.prop = prop;
427     result->leaf.value = val;
428     return result;
429     }
430 tim 2440
431 gezelter 3057 static BondExpr *BuildBondNot( BondExpr *expr )
432     {
433     register BondExpr *result;
434 tim 2440
435 gezelter 3057 result = AllocBondExpr();
436     result->mon.type = BE_NOT;
437     result->mon.arg = expr;
438     return result;
439     }
440 tim 2440
441 gezelter 3057 static BondExpr *BuildBondBin( int op, BondExpr *lft, BondExpr *rgt )
442     {
443     register BondExpr *result;
444 tim 2440
445 gezelter 3057 result = AllocBondExpr();
446     result->bin.type = op;
447     result->bin.lft = lft;
448     result->bin.rgt = rgt;
449     return result;
450     }
451 tim 2440
452 gezelter 3057 static BondExpr *GenerateDefaultBond( void )
453     {
454     register BondExpr *expr1;
455     register BondExpr *expr2;
456 tim 2440
457 gezelter 3057 expr1 = BuildBondLeaf(BL_TYPE,BT_SINGLE);
458     expr2 = BuildBondLeaf(BL_TYPE,BT_AROM);
459     return(BuildBondBin(BE_OR,expr1,expr2));
460     }
461 tim 2440
462 gezelter 3057 /*===============================*/
463     /* SMARTS Pattern Manipulation */
464     /*===============================*/
465 tim 2440
466 gezelter 3057 static Pattern *AllocPattern( void )
467     {
468     Pattern *ptr;
469 tim 2440
470 gezelter 3057 ptr = (Pattern*)malloc(sizeof(Pattern));
471     if( !ptr )
472     FatalAllocationError("pattern");
473 tim 2440
474 gezelter 3057 ptr->atom = (AtomSpec*)0;
475     ptr->aalloc = 0;
476     ptr->acount = 0;
477 tim 2440
478 gezelter 3057 ptr->bond = (BondSpec*)0;
479     ptr->balloc = 0;
480     ptr->bcount = 0;
481 tim 2440
482 gezelter 3057 ptr->parts = 1;
483     return ptr;
484     }
485 tim 2440
486 gezelter 3057 static int CreateAtom( Pattern *pat, AtomExpr *expr, int part,int vb)
487     {
488     int index,size;
489 tim 2440
490 gezelter 3057 if( pat->acount == pat->aalloc )
491     {
492     pat->aalloc += ATOMPOOL;
493     size = (int)(pat->aalloc*sizeof(AtomSpec));
494     if( pat->atom )
495     {
496     pat->atom = (AtomSpec*)realloc(pat->atom,size);
497     }
498     else
499     pat->atom = (AtomSpec*)malloc(size);
500     if( !pat->atom )
501     FatalAllocationError("atom pool");
502     }
503 tim 2440
504 gezelter 3057 index = pat->acount++;
505     pat->atom[index].part = part;
506     pat->atom[index].expr = expr;
507     pat->atom[index].vb = vb; //std::vector binding
508 tim 2440
509 gezelter 3057 return index;
510     }
511 tim 2440
512 gezelter 3057 static int CreateBond( Pattern *pat, BondExpr *expr, int src, int dst )
513     {
514     int index,size;
515 tim 2440
516 gezelter 3057 if( pat->bcount == pat->balloc )
517     {
518     pat->balloc += BONDPOOL;
519     size = (int)(pat->balloc*sizeof(BondSpec));
520     if( pat->bond )
521     {
522     pat->bond = (BondSpec*)realloc(pat->bond,size);
523     }
524     else
525     pat->bond = (BondSpec*)malloc(size);
526     if( !pat->bond )
527     FatalAllocationError("bond pool");
528     }
529 tim 2440
530 gezelter 3057 index = pat->bcount++;
531     pat->bond[index].expr = expr;
532     pat->bond[index].src = src;
533     pat->bond[index].dst = dst;
534     return(index);
535     }
536 tim 2440
537 gezelter 3057 static Pattern *CopyPattern( Pattern *pat )
538     {
539     Pattern *result;
540     AtomExpr *aexpr;
541     BondExpr *bexpr;
542     int i;
543 tim 2440
544 gezelter 3057 result = AllocPattern();
545     result->parts = pat->parts;
546     for( i=0; i<pat->acount; i++ )
547     {
548     aexpr = CopyAtomExpr(pat->atom[i].expr);
549     CreateAtom(result,aexpr,pat->atom[i].part);
550     }
551 tim 2440
552 gezelter 3057 for( i=0; i<pat->bcount; i++ )
553     {
554     bexpr = CopyBondExpr(pat->bond[i].expr);
555     CreateBond(result,bexpr,pat->bond[i].src,pat->bond[i].dst);
556     }
557 tim 2440
558 gezelter 3057 return result;
559     }
560 tim 2440
561 gezelter 3057 static void FreePattern( Pattern *pat )
562     {
563     int i;
564 tim 2440
565 gezelter 3057 if( pat )
566     {
567     if( pat->aalloc )
568     {
569     for( i=0; i<pat->acount; i++ )
570     FreeAtomExpr(pat->atom[i].expr);
571     free(pat->atom);
572     }
573 tim 2440
574 gezelter 3057 if( pat->balloc )
575     {
576     for( i=0; i<pat->bcount; i++ )
577     FreeBondExpr(pat->bond[i].expr);
578     free(pat->bond);
579     }
580     free(pat);
581     }
582     }
583 tim 2440
584 gezelter 3057 /*=========================*/
585     /* SMARTS Syntax Parsing */
586     /*=========================*/
587 tim 2440
588 gezelter 3057 static Pattern *ParseSMARTSPattern( void );
589     static Pattern *ParseSMARTSPart( Pattern*, int );
590 tim 2440
591 gezelter 3057 static Pattern *SMARTSError( Pattern *pat )
592     {
593     /* char *ptr;
594 tim 2440
595 gezelter 3057 fprintf(stderr,"SMARTS Error: %s\n",MainPtr);
596 tim 2440
597 gezelter 3057 fputs(" ",stdout);
598     for( ptr=MainPtr; ptr<LexPtr; ptr++ )
599 tim 2440 fputc(' ',stdout);
600 gezelter 3057 fputs("^\n",stdout);
601     */
602 tim 2440
603 gezelter 3057 #ifdef HAVE_SSTREAM
604     stringstream errorMsg;
605     #else
606     strstream errorMsg;
607     #endif
608     errorMsg << "SMARTS Error:\n" << MainPtr << endl;
609     errorMsg << setw(LexPtr-MainPtr+1) << '^' << endl;
610     obErrorLog.ThrowError(__func__, errorMsg.str(), obError);
611    
612     FreePattern(pat);
613     return (Pattern*)0;
614     }
615    
616     static AtomExpr *ParseSimpleAtomPrimitive( void )
617     {
618     switch( *LexPtr++ )
619     {
620     case '*':
621     return BuildAtomLeaf(AL_CONST,True);
622 tim 2440 #ifndef STRICT
623 gezelter 3057 case 'A':
624     return BuildAtomLeaf(AL_AROM,False);
625 tim 2440 #endif
626    
627 gezelter 3057 case 'B':
628     if( *LexPtr == 'r' )
629     {
630     LexPtr++;
631     return GenerateElement(35);
632     }
633     return GenerateElement(5);
634 tim 2440
635 gezelter 3057 case 'C':
636     if( *LexPtr == 'l' )
637     {
638     LexPtr++;
639     return GenerateElement(17);
640     }
641     return GenerateAromElem(6,False);
642 tim 2440
643 gezelter 3057 case 'F':
644     return GenerateElement( 9);
645     case 'I':
646     return GenerateElement(53);
647     case 'N':
648     return GenerateAromElem( 7,False);
649     case 'O':
650     return GenerateAromElem( 8,False);
651     case 'P':
652     return GenerateElement(15);
653     case 'S':
654     return GenerateAromElem(16,False);
655 tim 2440
656     #ifndef STRICT
657 gezelter 3057 case 'a':
658     if( *LexPtr == 's' )
659     {
660     LexPtr++;
661     return GenerateAromElem(33, true);
662     }
663     return BuildAtomLeaf(AL_AROM,True);
664 tim 2440 #endif
665    
666 gezelter 3057 case 'c':
667     return GenerateAromElem( 6,True);
668     case 'n':
669     return GenerateAromElem( 7,True);
670     case 'o':
671     return GenerateAromElem( 8,True);
672     case 'p':
673     return GenerateAromElem(15,True);
674     case 's':
675     if( *LexPtr == 'e' )
676     {
677     LexPtr++;
678     return GenerateAromElem(34, true);
679     }
680     return GenerateAromElem(16,True);
681     }
682     LexPtr--;
683     return (AtomExpr*)0;
684     }
685 tim 2440
686 gezelter 3057 static AtomExpr *ParseComplexAtomPrimitive( void )
687     {
688     register Pattern *pat;
689     register int index;
690 tim 2440
691 gezelter 3057 switch( *LexPtr++ )
692     {
693     case('#'):
694     if( !isdigit(*LexPtr) )
695     return( (AtomExpr*)0 );
696 tim 2440
697 gezelter 3057 index = 0;
698     while( isdigit(*LexPtr) )
699     index = index*10 + ((*LexPtr++)-'0');
700     if( index > ELEMMAX )
701     {
702     LexPtr--;
703     return( (AtomExpr*)0 );
704     }
705     else if( !index )
706     return( (AtomExpr*)0 );
707     return( GenerateElement(index) );
708    
709     case('$'):
710     if( *LexPtr != '(' )
711     return( (AtomExpr*)0 );
712     LexPtr++;
713 tim 2440 #ifdef STRICT
714 gezelter 3057 pat = ParseSMARTSPart(AllocPattern(),0);
715 tim 2440 #else
716 gezelter 3057 pat = ParseSMARTSPattern();
717 tim 2440 #endif
718 gezelter 3057
719     if( !pat )
720     return( (AtomExpr*)0 );
721     if( *LexPtr != ')' )
722     {
723     FreePattern(pat);
724     return( (AtomExpr*)0 );
725     }
726     LexPtr++;
727     return( BuildAtomRecurs(pat) );
728    
729     case('*'):
730     return( BuildAtomLeaf(AL_CONST,True) );
731 tim 2440
732 gezelter 3057 case('+'):
733     if( isdigit(*LexPtr) )
734     {
735     index = 0;
736     while( isdigit(*LexPtr) )
737     index = index*10 + ((*LexPtr++)-'0');
738     }
739     else
740     {
741     index = 1;
742     while( *LexPtr == '+' )
743     {
744     LexPtr++;
745     index++;
746     }
747     }
748     return( BuildAtomLeaf(AL_POSITIVE,index) );
749    
750     case('-'):
751     if( isdigit(*LexPtr) )
752     {
753     index = 0;
754     while( isdigit(*LexPtr) )
755     index = index*10 + ((*LexPtr++)-'0');
756     }
757     else
758     {
759     index = 1;
760     while( *LexPtr == '-' )
761     {
762     LexPtr++;
763     index++;
764     }
765     }
766     return BuildAtomLeaf(AL_NEGATIVE,index);
767 tim 2440
768 gezelter 3057 case '@':
769     if (*LexPtr == '?')
770     {
771     LexPtr++;
772     return(BuildAtomLeaf(AL_CHIRAL, 0)); // unspecified
773     }
774     else if (*LexPtr != '@')
775     return(BuildAtomLeaf(AL_CHIRAL,AL_ANTICLOCKWISE));
776     else
777     {
778     LexPtr++;
779     return(BuildAtomLeaf(AL_CHIRAL,AL_CLOCKWISE));
780     }
781    
782     case '^':
783     if (isdigit(*LexPtr))
784     {
785     index = 0;
786     while( isdigit(*LexPtr) )
787     index = index*10 + ((*LexPtr++)-'0');
788     return(BuildAtomLeaf(AL_HYB,index));
789     }
790     else
791     return(BuildAtomLeaf(AL_HYB,1));
792    
793     case('0'): case('1'): case('2'): case('3'): case('4'):
794     case('5'): case('6'): case('7'): case('8'): case('9'):
795     index = LexPtr[-1]-'0';
796     while( isdigit(*LexPtr) )
797     index = index*10 + ((*LexPtr++)-'0');
798     return BuildAtomLeaf(AL_MASS,index);
799 tim 2440
800 gezelter 3057 case('A'):
801     switch( *LexPtr++ )
802     {
803     case('c'): return GenerateElement(89);
804     case('g'): return GenerateElement(47);
805     case('l'): return GenerateElement(13);
806     case('m'): return GenerateElement(95);
807     case('r'): return GenerateElement(18);
808     case('s'): return GenerateElement(33);
809     case('t'): return GenerateElement(85);
810     case('u'): return GenerateElement(79);
811     }
812     LexPtr--;
813     return BuildAtomLeaf(AL_AROM,False);
814    
815     case('B'):
816     switch( *LexPtr++ )
817     {
818     case('a'): return GenerateElement(56);
819     case('e'): return GenerateElement( 4);
820     case('i'): return GenerateElement(83);
821     case('k'): return GenerateElement(97);
822     case('r'): return GenerateElement(35);
823     }
824     LexPtr--;
825     return GenerateElement(5);
826    
827     case('C'):
828     switch( *LexPtr++ )
829     {
830     case('a'): return GenerateElement(20);
831     case('d'): return GenerateElement(48);
832     case('e'): return GenerateElement(58);
833     case('f'): return GenerateElement(98);
834     case('l'): return GenerateElement(17);
835     case('m'): return GenerateElement(96);
836     case('o'): return GenerateElement(27);
837     case('r'): return GenerateElement(24);
838     case('s'): return GenerateElement(55);
839     case('u'): return GenerateElement(29);
840     }
841     LexPtr--;
842     return GenerateAromElem(6,False);
843 tim 2440
844 gezelter 3057 case('D'):
845     if( *LexPtr == 'y' )
846     {
847     LexPtr++;
848     return GenerateElement(66);
849     }
850     else if( isdigit(*LexPtr) )
851     {
852     index = 0;
853     while( isdigit(*LexPtr) )
854     index = index*10 + ((*LexPtr++)-'0');
855     return BuildAtomLeaf(AL_DEGREE,index);
856     }
857     return BuildAtomLeaf(AL_DEGREE, 1);
858     break;
859 tim 2440
860 gezelter 3057 case('E'):
861     if( *LexPtr == 'r' )
862     {
863     LexPtr++;
864     return GenerateElement(68);
865     }
866     else if( *LexPtr == 's' )
867     {
868     LexPtr++;
869     return GenerateElement(99);
870     }
871     else if( *LexPtr == 'u' )
872     {
873     LexPtr++;
874     return GenerateElement(63);
875     }
876     break;
877 tim 2440
878 gezelter 3057 case('F'):
879     if( *LexPtr == 'e' )
880     {
881     LexPtr++;
882     return GenerateElement(26);
883     }
884     else if( *LexPtr == 'm' )
885     {
886     LexPtr++;
887     return GenerateElement(100);
888     }
889     else if( *LexPtr == 'r' )
890     {
891     LexPtr++;
892     return GenerateElement(87);
893     }
894     return GenerateElement(9);
895 tim 2440
896 gezelter 3057 case('G'):
897     if( *LexPtr == 'a' )
898     {
899     LexPtr++;
900     return( GenerateElement(31) );
901     }
902     else if( *LexPtr == 'd' )
903     {
904     LexPtr++;
905     return( GenerateElement(64) );
906     }
907     else if( *LexPtr == 'e' )
908     {
909     LexPtr++;
910     return( GenerateElement(32) );
911     }
912     break;
913 tim 2440
914 gezelter 3057 case('H'):
915     if( *LexPtr == 'e' )
916     {
917     LexPtr++;
918     return( GenerateElement( 2) );
919     }
920     else if( *LexPtr == 'f' )
921     {
922     LexPtr++;
923     return( GenerateElement(72) );
924     }
925     else if( *LexPtr == 'g' )
926     {
927     LexPtr++;
928     return( GenerateElement(80) );
929     }
930     else if( *LexPtr == 'o' )
931     {
932     LexPtr++;
933     return( GenerateElement(67) );
934     }
935     else if( isdigit(*LexPtr) )
936     {
937     index = 0;
938     while( isdigit(*LexPtr) )
939     index = index*10 + ((*LexPtr++)-'0');
940     return( BuildAtomLeaf(AL_HCOUNT,index) );
941     }
942     return( BuildAtomLeaf(AL_HCOUNT,1) );
943 tim 2440
944 gezelter 3057 case('I'):
945     if( *LexPtr == 'n' )
946     {
947     LexPtr++;
948     return( GenerateElement(49) );
949     }
950     else if( *LexPtr == 'r' )
951     {
952     LexPtr++;
953     return( GenerateElement(77) );
954     }
955     return( GenerateElement(53) );
956 tim 2440
957 gezelter 3057 case('K'):
958     if( *LexPtr == 'r' )
959     {
960     LexPtr++;
961     return( GenerateElement(36) );
962     }
963     return( GenerateElement(19) );
964    
965     case('L'):
966     if( *LexPtr == 'a' )
967     {
968     LexPtr++;
969     return( GenerateElement( 57) );
970     }
971     else if( *LexPtr == 'i' )
972     {
973     LexPtr++;
974     return( GenerateElement( 3) );
975     }
976     else if( *LexPtr == 'r' )
977     {
978     LexPtr++;
979     return( GenerateElement(103) );
980     }
981     else if( *LexPtr == 'u' )
982     {
983     LexPtr++;
984     return( GenerateElement( 71) );
985     }
986     break;
987 tim 2440
988 gezelter 3057 case('M'):
989     if( *LexPtr == 'd' )
990     {
991     LexPtr++;
992     return( GenerateElement(101) );
993     }
994     else if( *LexPtr == 'g' )
995     {
996     LexPtr++;
997     return( GenerateElement( 12) );
998     }
999     else if( *LexPtr == 'n' )
1000     {
1001     LexPtr++;
1002     return( GenerateElement( 25) );
1003     }
1004     else if( *LexPtr == 'o' )
1005     {
1006     LexPtr++;
1007     return( GenerateElement( 42) );
1008     }
1009     break;
1010 tim 2440
1011 gezelter 3057 case('N'):
1012     switch( *LexPtr++ )
1013     {
1014     case('a'): return( GenerateElement( 11) );
1015     case('b'): return( GenerateElement( 41) );
1016     case('d'): return( GenerateElement( 60) );
1017     case('e'): return( GenerateElement( 10) );
1018     case('i'): return( GenerateElement( 28) );
1019     case('o'): return( GenerateElement(102) );
1020     case('p'): return( GenerateElement( 93) );
1021     }
1022     LexPtr--;
1023     return( GenerateAromElem(7,False) );
1024    
1025     case('O'):
1026     if( *LexPtr == 's' )
1027     {
1028     LexPtr++;
1029     return( GenerateElement(76) );
1030     }
1031     return( GenerateAromElem(8,False) );
1032    
1033     case('P'):
1034     switch( *LexPtr++ )
1035     {
1036     case('a'): return( GenerateElement(91) );
1037     case('b'): return( GenerateElement(82) );
1038     case('d'): return( GenerateElement(46) );
1039     case('m'): return( GenerateElement(61) );
1040     case('o'): return( GenerateElement(84) );
1041     case('r'): return( GenerateElement(59) );
1042     case('t'): return( GenerateElement(78) );
1043     case('u'): return( GenerateElement(94) );
1044     }
1045     LexPtr--;
1046     return( GenerateElement(15) );
1047    
1048     case('R'):
1049     switch( *LexPtr++ )
1050     {
1051     case('a'): return( GenerateElement(88) );
1052     case('b'): return( GenerateElement(37) );
1053     case('e'): return( GenerateElement(75) );
1054     case('h'): return( GenerateElement(45) );
1055     case('n'): return( GenerateElement(86) );
1056     case('u'): return( GenerateElement(44) );
1057     }
1058     LexPtr--;
1059     if( isdigit(*LexPtr) )
1060     {
1061     index = 0;
1062     while( isdigit(*LexPtr) )
1063     index = index*10 + ((*LexPtr++)-'0');
1064     }
1065     else
1066     index = -1;
1067     return( BuildAtomLeaf(AL_RINGS,index) );
1068    
1069     case('S'):
1070     switch( *LexPtr++ )
1071     {
1072     case('b'): return( GenerateElement(51) );
1073     case('c'): return( GenerateElement(21) );
1074     case('e'): return( GenerateElement(34) );
1075     case('i'): return( GenerateElement(14) );
1076     case('m'): return( GenerateElement(62) );
1077     case('n'): return( GenerateElement(50) );
1078     case('r'): return( GenerateElement(38) );
1079     }
1080     LexPtr--;
1081     return( GenerateAromElem(16,False) );
1082    
1083     case('T'):
1084     switch( *LexPtr++ )
1085     {
1086     case('a'): return( GenerateElement(73) );
1087     case('b'): return( GenerateElement(65) );
1088     case('c'): return( GenerateElement(43) );
1089     case('e'): return( GenerateElement(52) );
1090     case('h'): return( GenerateElement(90) );
1091     case('i'): return( GenerateElement(22) );
1092     case('l'): return( GenerateElement(81) );
1093     case('m'): return( GenerateElement(69) );
1094     }
1095     LexPtr--;
1096     break;
1097    
1098     case('U'): return( GenerateElement(92) );
1099     case('V'): return( GenerateElement(23) );
1100     case('W'): return( GenerateElement(74) );
1101    
1102     case('X'):
1103     if( *LexPtr == 'e' )
1104     {
1105     LexPtr++;
1106     return( GenerateElement(54) );
1107     }
1108     else if( isdigit(*LexPtr) )
1109     {
1110     index = 0;
1111     while( isdigit(*LexPtr) )
1112     index = index*10 + ((*LexPtr++)-'0');
1113     return( BuildAtomLeaf(AL_CONNECT,index) );
1114     }
1115     return( BuildAtomLeaf(AL_CONNECT, 1) );
1116     break;
1117    
1118     case('Y'):
1119     if( *LexPtr == 'b' )
1120     {
1121     LexPtr++;
1122     return( GenerateElement(70) );
1123     }
1124     return( GenerateElement(39) );
1125    
1126     case('Z'):
1127     if( *LexPtr == 'n' )
1128     {
1129     LexPtr++;
1130     return GenerateElement(30);
1131     }
1132     else if( *LexPtr == 'r' )
1133     {
1134     LexPtr++;
1135     return GenerateElement(40);
1136     }
1137     break;
1138 tim 2440
1139 gezelter 3057 case('a'):
1140     if( *LexPtr == 's' )
1141     {
1142     LexPtr++;
1143     return GenerateAromElem(33,True);
1144     }
1145     return BuildAtomLeaf(AL_AROM,True);
1146 tim 2440
1147 gezelter 3057 case('c'): return GenerateAromElem(6,True);
1148 tim 2440
1149 gezelter 3057 case('h'):
1150     if( isdigit(*LexPtr) )
1151     {
1152     index = 0;
1153     while( isdigit(*LexPtr) )
1154     index = index*10 + ((*LexPtr++)-'0');
1155     }
1156     else
1157     index = 1;
1158     return BuildAtomLeaf(AL_IMPLICIT,index);
1159 tim 2440
1160 gezelter 3057 case('n'): return GenerateAromElem(7,True);
1161     case('o'): return GenerateAromElem(8,True);
1162     case('p'): return GenerateAromElem(15,True);
1163    
1164     case('r'):
1165     if( isdigit(*LexPtr) )
1166     {
1167     index = 0;
1168     while( isdigit(*LexPtr) )
1169     index = index*10 + ((*LexPtr++)-'0');
1170     if( index == 0 )
1171     return BuildAtomLeaf(AL_RINGS,0);
1172     return BuildAtomLeaf(AL_SIZE,index);
1173     }
1174     return BuildAtomLeaf(AL_RINGS,-1);
1175    
1176     case('s'):
1177     if( *LexPtr == 'e' )
1178     {
1179     LexPtr++;
1180     return GenerateAromElem(34,True);
1181     }
1182     return GenerateAromElem(16,True);
1183    
1184     case('v'):
1185     if( isdigit(*LexPtr) )
1186     {
1187     index = 0;
1188     while( isdigit(*LexPtr) )
1189     index = index*10 + ((*LexPtr++)-'0');
1190     return BuildAtomLeaf(AL_VALENCE,index);
1191     }
1192     return BuildAtomLeaf(AL_VALENCE, 1);
1193     break;
1194 tim 2440 }
1195 gezelter 3057 LexPtr--;
1196     return (AtomExpr*)0;
1197     }
1198    
1199     static AtomExpr *ParseAtomExpr( int level )
1200     {
1201     register AtomExpr *expr1;
1202     register AtomExpr *expr2;
1203     register char *prev;
1204    
1205     switch( level )
1206 tim 2440 {
1207 gezelter 3057 case(0): /* Low Precedence Conjunction */
1208     if( !(expr1=ParseAtomExpr(1)) )
1209     return (AtomExpr*)0;
1210 tim 2440
1211 gezelter 3057 while( *LexPtr == ';' )
1212     {
1213     LexPtr++;
1214     if( !(expr2=ParseAtomExpr(1)) )
1215     {
1216     FreeAtomExpr(expr1);
1217     return (AtomExpr*)0;
1218     }
1219     expr1 = BuildAtomBin(AE_ANDLO,expr1,expr2);
1220     }
1221     return expr1;
1222 tim 2440
1223 gezelter 3057 case(1): /* Disjunction */
1224     if( !(expr1=ParseAtomExpr(2)) )
1225     return (AtomExpr*)0;
1226 tim 2440
1227 gezelter 3057 while( *LexPtr == ',' )
1228     {
1229     LexPtr++;
1230     if( !(expr2=ParseAtomExpr(2)) )
1231     {
1232     FreeAtomExpr(expr1);
1233     return( (AtomExpr*)0 );
1234     }
1235     expr1 = BuildAtomBin(AE_OR,expr1,expr2);
1236     }
1237     return( expr1 );
1238 tim 2440
1239 gezelter 3057 case(2): /* High Precedence Conjunction */
1240     if( !(expr1=ParseAtomExpr(3)) )
1241     return( (AtomExpr*)0 );
1242 tim 2440
1243 gezelter 3057 while( (*LexPtr!=']') && (*LexPtr!=';') &&
1244     (*LexPtr!=',') && *LexPtr )
1245     {
1246     if( *LexPtr=='&' )
1247     LexPtr++;
1248     prev = LexPtr;
1249     if( !(expr2=ParseAtomExpr(3)) )
1250     {
1251     if( prev != LexPtr )
1252     {
1253     FreeAtomExpr(expr1);
1254     return( (AtomExpr*)0 );
1255     }
1256     else
1257     return( expr1 );
1258     }
1259     expr1 = BuildAtomBin(AE_ANDHI,expr1,expr2);
1260     }
1261     return( expr1 );
1262    
1263     case(3): /* Negation or Primitive */
1264     if( *LexPtr == '!' )
1265     {
1266     LexPtr++;
1267     if( !(expr1=ParseAtomExpr(3)) )
1268     return( (AtomExpr*)0 );
1269     return( BuildAtomNot(expr1) );
1270     }
1271     return( ParseComplexAtomPrimitive() );
1272 tim 2440 }
1273 gezelter 3057 return (AtomExpr*)0;
1274     }
1275    
1276     static BondExpr *ParseBondPrimitive( void )
1277     {
1278     switch( *LexPtr++ )
1279 tim 2440 {
1280 gezelter 3057 case('-'): return BuildBondLeaf(BL_TYPE,BT_SINGLE);
1281     case('='): return BuildBondLeaf(BL_TYPE,BT_DOUBLE);
1282     case('#'): return BuildBondLeaf(BL_TYPE,BT_TRIPLE);
1283     case(':'): return BuildBondLeaf(BL_TYPE,BT_AROM);
1284     case('@'): return BuildBondLeaf(BL_TYPE,BT_RING);
1285     case('~'): return BuildBondLeaf(BL_CONST,True);
1286 tim 2440
1287 gezelter 3057 case('/'):
1288     if( *LexPtr == '?' )
1289 tim 2440 {
1290 gezelter 3057 LexPtr++;
1291     return BuildBondLeaf(BL_TYPE,BT_UPUNSPEC);
1292 tim 2440 }
1293 gezelter 3057 return BuildBondLeaf(BL_TYPE,BT_UP);
1294 tim 2440
1295 gezelter 3057 case('\\'):
1296     if( *LexPtr == '?' )
1297     {
1298     LexPtr++;
1299     return BuildBondLeaf(BL_TYPE,BT_DOWNUNSPEC);
1300     }
1301 tim 2440
1302 gezelter 3057 return BuildBondLeaf(BL_TYPE,BT_DOWN);
1303 tim 2440 }
1304 gezelter 3057 LexPtr--;
1305     return (BondExpr*)0;
1306     }
1307    
1308     static BondExpr *ParseBondExpr( int level )
1309     {
1310     register BondExpr *expr1;
1311     register BondExpr *expr2;
1312     register char *prev;
1313    
1314     switch( level )
1315 tim 2440 {
1316 gezelter 3057 case(0): /* Low Precedence Conjunction */
1317     if( !(expr1=ParseBondExpr(1)) )
1318     return (BondExpr*)0;
1319 tim 2440
1320 gezelter 3057 while( *LexPtr == ';' )
1321     {
1322     LexPtr++;
1323     if( !(expr2=ParseBondExpr(1)) )
1324     {
1325     FreeBondExpr(expr1);
1326     return (BondExpr*)0;
1327     }
1328     expr1 = BuildBondBin(BE_ANDLO,expr1,expr2);
1329     }
1330     return expr1;
1331 tim 2440
1332 gezelter 3057 case(1): /* Disjunction */
1333     if( !(expr1=ParseBondExpr(2)) )
1334     return (BondExpr*)0;
1335 tim 2440
1336 gezelter 3057 while( *LexPtr == ',' )
1337     {
1338     LexPtr++;
1339     if( !(expr2=ParseBondExpr(2)) )
1340     {
1341     FreeBondExpr(expr1);
1342     return (BondExpr*)0;
1343     }
1344     expr1 = BuildBondBin(BE_OR,expr1,expr2);
1345     }
1346     return expr1;
1347 tim 2440
1348 gezelter 3057 case(2): /* High Precedence Conjunction */
1349     if( !(expr1=ParseBondExpr(3)) )
1350     return (BondExpr*)0;
1351 tim 2440
1352 gezelter 3057 while( (*LexPtr!=']') && (*LexPtr!=';') &&
1353     (*LexPtr!=',') && *LexPtr )
1354     {
1355     if( *LexPtr == '&' )
1356     LexPtr++;
1357     prev = LexPtr;
1358     if( !(expr2=ParseBondExpr(3)) )
1359     {
1360     if( prev != LexPtr )
1361     {
1362     FreeBondExpr(expr1);
1363     return (BondExpr*)0;
1364     }
1365     else
1366     return expr1;
1367     }
1368     expr1 = BuildBondBin(BE_ANDHI,expr1,expr2);
1369     }
1370     return expr1;
1371 tim 2440
1372 gezelter 3057 case(3): /* Negation or Primitive */
1373     if( *LexPtr == '!' )
1374     {
1375     LexPtr++;
1376     if( !(expr1=ParseBondExpr(3)) )
1377     return (BondExpr*)0;
1378     return BuildBondNot(expr1);
1379     }
1380     return ParseBondPrimitive();
1381 tim 2440 }
1382 gezelter 3057 return (BondExpr*)0;
1383     }
1384 tim 2440
1385 gezelter 3057 static int GetVectorBinding()
1386     {
1387     int vb=0;
1388 tim 2440
1389 gezelter 3057 LexPtr++; //skip colon
1390     if(isdigit(*LexPtr))
1391 tim 2440 {
1392 gezelter 3057 vb = 0;
1393     while( isdigit(*LexPtr) )
1394     vb = vb*10 + ((*LexPtr++)-'0');
1395 tim 2440 }
1396    
1397 gezelter 3057 return(vb);
1398     }
1399 tim 2440
1400 gezelter 3057 static Pattern *ParseSMARTSError( Pattern *pat, BondExpr *expr )
1401     {
1402     if( expr )
1403     FreeBondExpr(expr);
1404     return SMARTSError(pat);
1405     }
1406 tim 2440
1407 gezelter 3057 static Pattern *SMARTSParser( Pattern *pat, ParseState *stat,
1408     int prev, int part )
1409     {
1410     int vb = 0;
1411     register AtomExpr *aexpr;
1412     register BondExpr *bexpr;
1413     register int index;
1414 tim 2440
1415 gezelter 3057 bexpr = (BondExpr*)0;
1416 tim 2440
1417 gezelter 3057 while( *LexPtr )
1418     {
1419     switch( *LexPtr++ )
1420     {
1421     case('.'):
1422     // if( bexpr || (prev==-1) )
1423     return ParseSMARTSError(pat,bexpr);
1424     prev = -1;
1425     break;
1426 tim 2440
1427 gezelter 3057 case('-'): case('='): case('#'):
1428     case(':'): case('~'): case('@'):
1429     case('/'): case('\\'): case('!'):
1430     LexPtr--;
1431     if( (prev==-1) || bexpr )
1432     return ParseSMARTSError(pat,bexpr);
1433     if( !(bexpr=ParseBondExpr(0)) )
1434     return ParseSMARTSError(pat,bexpr);
1435     break;
1436 tim 2440
1437 gezelter 3057 case('('):
1438 tim 2440 #ifdef STRICT
1439 gezelter 3057 if( (prev==-1) || bexpr )
1440     {
1441     LexPtr--;
1442     return ParseSMARTSError(pat,bexpr);
1443     }
1444     pat = SMARTSParser(pat,stat,prev,part);
1445     if( !pat )
1446     return (Pattern*)0;
1447 tim 2440 #else /* STRICT */
1448    
1449 gezelter 3057 if( bexpr )
1450     {
1451     LexPtr--;
1452     return ParseSMARTSError(pat,bexpr);
1453     }
1454     if( prev == -1 )
1455     {
1456     index = pat->acount;
1457     pat = SMARTSParser(pat,stat,-1,part);
1458     if( !pat )
1459     return( (Pattern*)0 );
1460     if( index == pat->acount )
1461     return ParseSMARTSError(pat,bexpr);
1462     prev = index;
1463     }
1464     else
1465     {
1466     pat = SMARTSParser(pat,stat,prev,part);
1467     if( !pat )
1468     return (Pattern*)0;
1469     }
1470 tim 2440 #endif /* STRICT */
1471    
1472 gezelter 3057 if( *LexPtr != ')' )
1473     return ParseSMARTSError(pat,bexpr);
1474     LexPtr++;
1475     break;
1476 tim 2440
1477 gezelter 3057 case(')'): LexPtr--;
1478     if( (prev==-1) || bexpr )
1479     return ParseSMARTSError(pat,bexpr);
1480     return pat;
1481 tim 2440
1482 gezelter 3057 case('%'): if( prev == -1 )
1483 tim 2440 {
1484 gezelter 3057 LexPtr--;
1485     return ParseSMARTSError(pat,bexpr);
1486 tim 2440 }
1487    
1488 gezelter 3057 if( isdigit(LexPtr[0]) && isdigit(LexPtr[1]) )
1489     {
1490     index = 10*(LexPtr[0]-'0') + (LexPtr[1]-'0');
1491     LexPtr += 2;
1492     }
1493     else
1494     return ParseSMARTSError(pat,bexpr);
1495 tim 2440
1496 gezelter 3057 if( stat->closure[index] == -1 )
1497     {
1498     stat->closord[index] = bexpr;
1499     stat->closure[index] = prev;
1500     }
1501     else if( stat->closure[index] != prev )
1502     {
1503     if( !bexpr ) {
1504     if (!stat->closord[index]) {
1505     bexpr = GenerateDefaultBond();
1506     FreeBondExpr(stat->closord[index]);
1507     } else
1508     bexpr = stat->closord[index];
1509     } else if (!EquivalentBondExpr(bexpr, stat->closord[index]))
1510     return ParseSMARTSError(pat,bexpr);
1511    
1512     CreateBond(pat,bexpr,prev,stat->closure[index]);
1513     stat->closure[index] = -1;
1514     bexpr = (BondExpr*)0;
1515     }
1516     else
1517     return ParseSMARTSError(pat,bexpr);
1518     break;
1519 tim 2440
1520 gezelter 3057 case('0'): case('1'): case('2'):
1521     case('3'): case('4'): case('5'):
1522     case('6'): case('7'): case('8'):
1523     case('9'): LexPtr--;
1524     if( prev == -1 )
1525     return ParseSMARTSError(pat,bexpr);
1526     index = (*LexPtr++)-'0';
1527 tim 2440
1528 gezelter 3057 if( stat->closure[index] == -1 )
1529     {
1530     stat->closord[index] = bexpr;
1531     stat->closure[index] = prev;
1532     bexpr = (BondExpr*)0;
1533     }
1534     else if( stat->closure[index] != prev )
1535     {
1536     if( !bexpr ) {
1537     if (!stat->closord[index]) {
1538     bexpr = GenerateDefaultBond();
1539     FreeBondExpr(stat->closord[index]);
1540     } else
1541     bexpr = stat->closord[index];
1542     } else if (!EquivalentBondExpr(bexpr, stat->closord[index]))
1543     return ParseSMARTSError(pat,bexpr);
1544    
1545     CreateBond(pat,bexpr,prev,stat->closure[index]);
1546     stat->closure[index] = -1;
1547     bexpr = (BondExpr*)0;
1548     }
1549     else
1550     return ParseSMARTSError(pat,bexpr);
1551     break;
1552 tim 2440
1553 gezelter 3057 case('['): aexpr = ParseAtomExpr(0);
1554     vb = (*LexPtr == ':') ? GetVectorBinding():0;
1555     if( !aexpr || (*LexPtr!=']') )
1556     return ParseSMARTSError(pat,bexpr);
1557     index = CreateAtom(pat,aexpr,part,vb);
1558     if( prev != -1 )
1559     {
1560     if( !bexpr )
1561     bexpr = GenerateDefaultBond();
1562     CreateBond(pat,bexpr,prev,index);
1563     bexpr = (BondExpr*)0;
1564     }
1565     prev = index;
1566     LexPtr++;
1567     break;
1568    
1569     default:
1570     LexPtr--;
1571     aexpr = ParseSimpleAtomPrimitive();
1572     if( !aexpr )
1573     return ParseSMARTSError(pat,bexpr);
1574     index = CreateAtom(pat,aexpr,part);
1575     if( prev != -1 )
1576     {
1577     if( !bexpr )
1578     bexpr = GenerateDefaultBond();
1579     CreateBond(pat,bexpr,prev,index);
1580     bexpr = (BondExpr*)0;
1581     }
1582     prev = index;
1583     }
1584     }
1585 tim 2440
1586 gezelter 3057 if( (prev==-1) || bexpr )
1587     return ParseSMARTSError(pat,bexpr);
1588 tim 2440
1589 gezelter 3057 return pat;
1590     }
1591 tim 2440
1592 gezelter 3057 static void MarkGrowBonds(Pattern *pat)
1593     {
1594     int i;
1595     OBBitVec bv;
1596 tim 2440
1597 gezelter 3057 for (i = 0;i < pat->bcount;i++)
1598     {
1599     pat->bond[i].grow = (bv[pat->bond[i].src] && bv[pat->bond[i].dst])?
1600     false:true;
1601 tim 2440
1602 gezelter 3057 bv.SetBitOn(pat->bond[i].src);
1603     bv.SetBitOn(pat->bond[i].dst);
1604     }
1605     }
1606 tim 2440
1607 gezelter 3057 static int GetChiralFlag(AtomExpr *expr)
1608     {
1609     int size=0;
1610 tim 2440 #define OB_EVAL_STACKSIZE 40
1611    
1612 gezelter 3057 AtomExpr *stack[OB_EVAL_STACKSIZE];
1613     memset(stack,'\0',sizeof(AtomExpr*)*OB_EVAL_STACKSIZE);
1614 tim 2440 #undef OB_EVAL_STACKSIZE
1615    
1616 gezelter 3057 bool lftest=true;
1617 tim 2440
1618 gezelter 3057 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
1619     {
1620     switch (expr->type)
1621     {
1622     case AE_LEAF:
1623     if (expr->leaf.prop == AL_CHIRAL)
1624     return(expr->leaf.value);
1625     size--;
1626     break;
1627 tim 2440
1628 gezelter 3057 case AE_ANDHI:
1629     case AE_ANDLO:
1630 tim 2440
1631 gezelter 3057 if (stack[size+1] == expr->bin.rgt)
1632     size--;
1633     else if (stack[size+1] == expr->bin.lft)
1634     {
1635     if (lftest)
1636     {
1637     size++;
1638     stack[size] = expr->bin.rgt;
1639     }
1640     else
1641     size--;
1642     }
1643     else
1644     {
1645     size++;
1646     stack[size] = expr->bin.lft;
1647     }
1648     break;
1649 tim 2440
1650 gezelter 3057 case AE_OR:
1651 tim 2440
1652 gezelter 3057 if (stack[size+1] == expr->bin.rgt)
1653     size--;
1654     else if (stack[size+1] == expr->bin.lft)
1655     {
1656     if (!lftest)
1657     {
1658     size++;
1659     stack[size] = expr->bin.rgt;
1660     }
1661     else
1662     size--;
1663     }
1664     else
1665     {
1666     size++;
1667     stack[size] = expr->bin.lft;
1668     }
1669     break;
1670 tim 2440
1671 gezelter 3057 case AE_NOT:
1672     if (stack[size+1] != expr->mon.arg)
1673     {
1674     size++;
1675     stack[size] = expr->mon.arg;
1676     }
1677     else
1678     {
1679     lftest = !lftest;
1680     size--;
1681     }
1682     break;
1683 tim 2440
1684 gezelter 3057 case AE_RECUR:
1685     size--;
1686     break;
1687     }
1688     }
1689 tim 2440
1690 gezelter 3057 return((int)false);
1691     }
1692 tim 2440
1693 gezelter 3057 static Pattern *ParseSMARTSPart( Pattern *result, int part )
1694     {
1695     auto ParseState stat;
1696     int i,flag;
1697 tim 2440
1698 gezelter 3057 for( i=0; i<100; i++ )
1699     stat.closure[i] = -1;
1700 tim 2440
1701 gezelter 3057 result = SMARTSParser(result,&stat,-1,part);
1702 tim 2440
1703 gezelter 3057 flag = False;
1704     for( i=0; i<100; i++ )
1705     if( stat.closure[i] != -1 )
1706     {
1707     FreeBondExpr(stat.closord[i]);
1708     flag = True;
1709     }
1710    
1711     if( result )
1712 tim 2440 {
1713 gezelter 3057 if( flag )
1714     return(SMARTSError(result));
1715     else
1716     {
1717     MarkGrowBonds(result);
1718     result->ischiral = false;
1719     for (i = 0;i < result->acount;i++)
1720     {
1721     result->atom[i].chiral_flag = GetChiralFlag(result->atom[i].expr);
1722     if (result->atom[i].chiral_flag)
1723     result->ischiral = true;
1724     }
1725     return(result);
1726     }
1727 tim 2440 }
1728 gezelter 3057 else
1729     return (Pattern*)0;
1730     }
1731 tim 2440
1732    
1733 gezelter 3057 static Pattern *ParseSMARTSPattern( void )
1734     {
1735     Pattern *result;
1736     result = AllocPattern();
1737 tim 2440
1738 gezelter 3057 while( *LexPtr == '(' )
1739     {
1740     LexPtr++;
1741     result = ParseSMARTSPart(result,result->parts);
1742     if( !result )
1743     return (Pattern*)0;
1744     result->parts++;
1745 tim 2440
1746 gezelter 3057 if( *LexPtr != ')' )
1747     return SMARTSError(result);
1748     LexPtr++;
1749 tim 2440
1750 gezelter 3057 if( !*LexPtr || (*LexPtr==')') )
1751     return result;
1752 tim 2440
1753 gezelter 3057 if( *LexPtr != '.' )
1754     return SMARTSError(result);
1755     LexPtr++;
1756     }
1757 tim 2440
1758 gezelter 3057 return ParseSMARTSPart(result,0);
1759     }
1760 tim 2440
1761 gezelter 3057 static Pattern *ParseSMARTSString( char *ptr )
1762     {
1763     register Pattern *result;
1764 tim 2440
1765 gezelter 3057 if( !ptr || !*ptr )
1766     return (Pattern*)0;
1767 tim 2440
1768 gezelter 3057 LexPtr = MainPtr = ptr;
1769     result = ParseSMARTSPattern();
1770     if( result && *LexPtr )
1771     return SMARTSError(result);
1772     return result;
1773     }
1774 tim 2440
1775 gezelter 3057 Pattern *ParseSMARTSRecord( char *ptr )
1776     {
1777     register char *src,*dst;
1778 tim 2440
1779 gezelter 3057 src = ptr;
1780     while( *src && !isspace(*src) )
1781     src++;
1782 tim 2440
1783 gezelter 3057 if( isspace(*src) )
1784     {
1785     *src++ = '\0';
1786     while( isspace(*src) )
1787     src++;
1788     }
1789 tim 2440
1790 gezelter 3057 dst = Descr;
1791     while( *src && (dst<Descr+78) )
1792     {
1793     if( isspace(*src) )
1794     {
1795     *dst++ = ' ';
1796     while( isspace(*src) )
1797     src++;
1798     }
1799     else
1800     *dst++ = *src++;
1801     }
1802     *dst = '\0';
1803 tim 2440
1804 gezelter 3057 return ParseSMARTSString(Buffer);
1805     }
1806 tim 2440
1807 gezelter 3057 /*==============================*/
1808     /* SMARTS Component Traversal */
1809     /*==============================*/
1810 tim 2440
1811 gezelter 3057 static void TraverseSMARTS( Pattern *pat, int i )
1812     {
1813     register int j,k;
1814 tim 2440
1815 gezelter 3057 pat->atom[i].visit = True;
1816     for( j=0; j<pat->bcount; j++ )
1817     if( pat->bond[j].visit == -1 )
1818     {
1819     if( pat->bond[j].src == i )
1820     {
1821     pat->bond[j].visit = i;
1822     k = pat->bond[j].dst;
1823     if( !pat->atom[k].visit )
1824     TraverseSMARTS(pat,k);
1825     }
1826     else if( pat->bond[j].dst == i )
1827     {
1828     pat->bond[j].visit = i;
1829     k = pat->bond[j].src;
1830     if( !pat->atom[k].visit )
1831     TraverseSMARTS(pat,k);
1832     }
1833     }
1834     }
1835 tim 2440
1836 gezelter 3057 /*============================*/
1837     /* Canonical SMARTS Pattern */
1838     /*============================*/
1839 tim 2440
1840 gezelter 3057 static AtomExpr *NotAtomExpr( AtomExpr* );
1841     static AtomExpr *AndAtomExpr( AtomExpr*, AtomExpr* );
1842     static AtomExpr *OrAtomExpr( AtomExpr*, AtomExpr* );
1843     //static AtomExpr *TransformAtomExpr( AtomExpr* );
1844     //static Pattern *CanonicaliseSMARTS( Pattern* );
1845 tim 2440
1846 gezelter 3057 static int IsBooleanAtomLeaf( AtomExpr *expr )
1847     {
1848     return (expr->leaf.prop==AL_AROM) ||
1849     (expr->leaf.prop==AL_CONST);
1850     }
1851 tim 2440
1852 gezelter 3057 static int IsNegatingAtomLeaf( AtomExpr *expr )
1853     {
1854     return (expr->leaf.prop==AL_RINGS);
1855     }
1856 tim 2440
1857 gezelter 3057 static int EqualAtomExpr( AtomExpr *lft, AtomExpr *rgt )
1858     {
1859     if( lft->type != rgt->type )
1860     return False;
1861 tim 2440
1862 gezelter 3057 if( lft->type == AE_LEAF )
1863     {
1864     return( (lft->leaf.prop==rgt->leaf.prop) &&
1865     (lft->leaf.value==rgt->leaf.value) );
1866     }
1867     else if( lft->type == AE_NOT )
1868     {
1869     return EqualAtomExpr(lft->mon.arg,rgt->mon.arg);
1870     }
1871     else if( lft->type == AE_RECUR )
1872     return False;
1873 tim 2440
1874 gezelter 3057 return EqualAtomExpr(lft->bin.lft,rgt->bin.lft) &&
1875     EqualAtomExpr(lft->bin.rgt,rgt->bin.rgt);
1876     }
1877 tim 2440
1878 gezelter 3057 static int OrderAtomExpr( AtomExpr *lft, AtomExpr *rgt )
1879     {
1880     register AtomExpr *larg;
1881     register AtomExpr *rarg;
1882     register int stat;
1883 tim 2440
1884 gezelter 3057 if( lft->type == AE_NOT )
1885     { /* larg->type == AE_LEAF */
1886     larg = lft->mon.arg;
1887     }
1888     else
1889     larg = lft;
1890 tim 2440
1891 gezelter 3057 if( rgt->type == AE_NOT )
1892     { /* rarg->type == AE_LEAF */
1893     rarg = rgt->mon.arg;
1894     }
1895     else
1896     rarg = rgt;
1897 tim 2440
1898 gezelter 3057 if( larg->type > rarg->type )
1899     {
1900     return 1;
1901     }
1902     else if( larg->type < rarg->type )
1903     return -1;
1904 tim 2440
1905 gezelter 3057 if( larg->type == AE_LEAF )
1906     {
1907     if( larg->leaf.prop > rarg->leaf.prop )
1908     return 1;
1909     if( larg->leaf.prop < rarg->leaf.prop )
1910     return -1;
1911     return( larg->leaf.value - rarg->leaf.value );
1912     }
1913 tim 2440
1914 gezelter 3057 stat = OrderAtomExpr(lft->bin.lft,rgt->bin.lft);
1915     if( stat != 0 )
1916     return stat;
1917     return OrderAtomExpr(lft->bin.rgt,rgt->bin.rgt);
1918     }
1919 tim 2440
1920 gezelter 3057 static int AtomLeafConflict( AtomExpr *lft, AtomExpr *rgt )
1921     {
1922     register AtomExpr *tmp;
1923 tim 2440
1924 gezelter 3057 if( (lft->type==AE_LEAF) && (rgt->type==AE_LEAF) )
1925     {
1926     if( lft->leaf.prop == rgt->leaf.prop )
1927     {
1928     if( IsNegatingAtomLeaf(lft) )
1929     {
1930     if( lft->leaf.value == 0 )
1931     {
1932     return rgt->leaf.value != 0;
1933     }
1934     else if( lft->leaf.value == -1 )
1935     return rgt->leaf.value == 0;
1936 tim 2440
1937 gezelter 3057 if( rgt->leaf.value == 0 )
1938     {
1939     return lft->leaf.value != 0;
1940     }
1941     else if( rgt->leaf.value == -1 )
1942     return lft->leaf.value == 0;
1943     }
1944     return lft->leaf.value != rgt->leaf.value;
1945     }
1946 tim 2440
1947 gezelter 3057 if( lft->leaf.prop > rgt->leaf.prop )
1948     {
1949     tmp = lft;
1950     lft = rgt;
1951     rgt = tmp;
1952     }
1953 tim 2440
1954 gezelter 3057 /* Aromaticity -> Ring */
1955     if( (lft->leaf.prop==AL_AROM) && (rgt->leaf.prop==AL_RINGS) )
1956     return( lft->leaf.value && !rgt->leaf.value );
1957 tim 2440
1958 gezelter 3057 /* Positive charge ~ Negative charge */
1959     if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1960     return( (lft->leaf.value!=0) || (rgt->leaf.value!=0) );
1961 tim 2440
1962 gezelter 3057 /* Total hcount >= Implicit hcount */
1963     if( (lft->leaf.prop==AL_HCOUNT) && (rgt->leaf.prop==AL_IMPLICIT) )
1964     return( lft->leaf.value < rgt->leaf.value );
1965     }
1966 tim 2440
1967 gezelter 3057 if( (lft->type==AE_LEAF) && (rgt->type==AE_NOT) )
1968     {
1969     rgt = rgt->mon.arg;
1970     if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1971     return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1972     if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
1973     return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1974     return False;
1975     }
1976 tim 2440
1977 gezelter 3057 if( (lft->type==AE_NOT) && (rgt->type==AE_LEAF) )
1978     {
1979     lft = lft->mon.arg;
1980     if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1981     return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1982     if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
1983     return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1984     return False;
1985     }
1986 tim 2440
1987 gezelter 3057 return False;
1988     }
1989 tim 2440
1990 gezelter 3057 static int AtomExprConflict( AtomExpr *lft, AtomExpr *rgt )
1991     {
1992     while( rgt->type == AE_ANDHI )
1993     {
1994     if( AtomLeafConflict(lft,rgt->bin.lft) )
1995     return True;
1996     rgt = rgt->bin.rgt;
1997     }
1998     return AtomLeafConflict(lft,rgt);
1999     }
2000 tim 2440
2001 gezelter 3057 /* return LEAF(lft) => LEAF(rgt); */
2002     static int AtomLeafImplies( AtomExpr *lft, AtomExpr *rgt )
2003     {
2004     if( (lft->type==AE_LEAF) && (rgt->type==AE_LEAF) )
2005     { /* Implied Ring Membership */
2006     if( (rgt->leaf.prop==AL_RINGS) && (rgt->leaf.value==-1) )
2007     {
2008     if( lft->leaf.prop == AL_AROM )
2009     return lft->leaf.value;
2010 tim 2440
2011 gezelter 3057 if( lft->leaf.prop == AL_RINGS )
2012     return lft->leaf.value > 0;
2013 tim 2440
2014 gezelter 3057 if( lft->leaf.prop == AL_SIZE )
2015     return lft->leaf.value > 0;
2016     }
2017 tim 2440
2018 gezelter 3057 /* Positive charge ~ Negative charge */
2019     if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
2020     return (lft->leaf.value==0) && (rgt->leaf.value==0);
2021     return False;
2022     }
2023 tim 2440
2024 gezelter 3057 if( (lft->type==AE_LEAF) && (rgt->type==AE_NOT) )
2025     {
2026     rgt = rgt->mon.arg;
2027     if( lft->leaf.prop == rgt->leaf.prop )
2028     return lft->leaf.value != rgt->leaf.value;
2029 tim 2440
2030 gezelter 3057 if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
2031     return True;
2032     if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
2033     return True;
2034     return False;
2035     }
2036 tim 2440
2037 gezelter 3057 return False;
2038     }
2039 tim 2440
2040 gezelter 3057 /* return EXPR(rgt) => LEAF(lft); */
2041     static int AtomExprImplied( AtomExpr *lft, AtomExpr *rgt )
2042     {
2043     while( rgt->type == AE_ANDHI )
2044     {
2045     if( AtomLeafImplies(rgt->bin.lft,lft) )
2046     return True;
2047     rgt = rgt->bin.rgt;
2048     }
2049     return AtomLeafImplies(rgt,lft);
2050     }
2051 tim 2440
2052 gezelter 3057 /* remove implied nodes from EXPR(rgt) */
2053     static AtomExpr *AtomExprImplies( AtomExpr *lft, AtomExpr *rgt )
2054     {
2055     register AtomExpr *tmp;
2056 tim 2440
2057 gezelter 3057 if( rgt->type != AE_ANDHI )
2058     {
2059     if( AtomLeafImplies(lft,rgt) )
2060     {
2061     FreeAtomExpr(rgt);
2062     return (AtomExpr*)0;
2063     }
2064     return rgt;
2065     }
2066 tim 2440
2067 gezelter 3057 tmp = AtomExprImplies(lft,rgt->bin.rgt);
2068 tim 2440
2069 gezelter 3057 if( tmp )
2070     {
2071     if( AtomLeafImplies(lft,rgt->bin.lft) )
2072     {
2073     rgt->bin.rgt = (AtomExpr*)0;
2074     FreeAtomExpr(rgt);
2075     return tmp;
2076     }
2077     rgt->bin.rgt = tmp;
2078     return rgt;
2079     }
2080     else
2081     {
2082     rgt->bin.rgt = (AtomExpr*)0;
2083     if( AtomLeafImplies(lft,rgt->bin.lft) )
2084     {
2085     FreeAtomExpr(rgt);
2086     return (AtomExpr*)0;
2087     }
2088     tmp = rgt->bin.lft;
2089     rgt->bin.lft = (AtomExpr*)0;
2090     FreeAtomExpr(rgt);
2091     return tmp;
2092     }
2093     }
2094 tim 2440
2095 gezelter 3057 static AtomExpr *AndAtomExprLeaf( AtomExpr *lft, AtomExpr *rgt )
2096     {
2097     if( AtomExprConflict(lft,rgt) )
2098     {
2099     FreeAtomExpr(lft);
2100     FreeAtomExpr(rgt);
2101     return BuildAtomLeaf(AL_CONST,False);
2102     }
2103 tim 2440
2104 gezelter 3057 if( AtomExprImplied(lft,rgt) )
2105     {
2106     FreeAtomExpr(lft);
2107     return rgt;
2108     }
2109 tim 2440
2110 gezelter 3057 rgt = AtomExprImplies(lft,rgt);
2111     if( !rgt )
2112     return lft;
2113 tim 2440
2114 gezelter 3057 return BuildAtomBin(AE_ANDHI,lft,rgt);
2115     }
2116 tim 2440
2117 gezelter 3057 static AtomExpr *ConstrainRecursion( AtomExpr *recur, AtomExpr *expr )
2118     {
2119     register AtomExpr *head;
2120     register Pattern *pat;
2121 tim 2440
2122 gezelter 3057 pat = (Pattern*)recur->recur.recur;
2123     head = AndAtomExpr(pat->atom[0].expr,expr);
2124     pat->atom[0].expr = head;
2125 tim 2440
2126 gezelter 3057 if( IsInvalidAtom(head) )
2127     {
2128     FreePattern(pat);
2129     return BuildAtomLeaf(AL_CONST,False);
2130     }
2131     return recur;
2132     }
2133 tim 2440
2134 gezelter 3057 static AtomExpr *AndAtomExpr( AtomExpr *lft, AtomExpr *rgt )
2135     {
2136     register AtomExpr *expr;
2137     register int order;
2138 tim 2440
2139 gezelter 3057 /* Identities */
2140     if( EqualAtomExpr(lft,rgt) )
2141     {
2142     FreeAtomExpr(rgt);
2143     return lft;
2144     }
2145 tim 2440
2146 gezelter 3057 if( (lft->type==AE_LEAF) && (lft->leaf.prop==AL_CONST) )
2147     {
2148     if( lft->leaf.value )
2149     {
2150     FreeAtomExpr(lft);
2151     return rgt;
2152     }
2153     else
2154     {
2155     FreeAtomExpr(rgt);
2156     return lft;
2157     }
2158     }
2159 tim 2440
2160 gezelter 3057 if( (rgt->type==AE_LEAF) && (rgt->leaf.prop==AL_CONST) )
2161     {
2162     if( rgt->leaf.value )
2163     {
2164     FreeAtomExpr(rgt);
2165     return lft;
2166     }
2167     else
2168     {
2169     FreeAtomExpr(lft);
2170     return rgt;
2171     }
2172     }
2173 tim 2440
2174 gezelter 3057 /* Distributivity */
2175     if( lft->type == AE_OR )
2176     {
2177     expr = CopyAtomExpr(rgt);
2178     expr = OrAtomExpr(AndAtomExpr(expr,lft->bin.lft),
2179     AndAtomExpr(rgt, lft->bin.rgt));
2180     lft->bin.lft = (AtomExpr*)0;
2181     lft->bin.rgt = (AtomExpr*)0;
2182     FreeAtomExpr(lft);
2183     return( expr );
2184     }
2185 tim 2440
2186 gezelter 3057 if( rgt->type == AE_OR )
2187     {
2188     expr = CopyAtomExpr(lft);
2189     expr = OrAtomExpr(AndAtomExpr(expr,rgt->bin.lft),
2190     AndAtomExpr(lft, rgt->bin.rgt));
2191     rgt->bin.lft = (AtomExpr*)0;
2192     rgt->bin.rgt = (AtomExpr*)0;
2193     FreeAtomExpr(rgt);
2194     return( expr );
2195     }
2196 tim 2440
2197 gezelter 3057 /* Recursion */
2198     if( (rgt->type==AE_RECUR) && (lft->type!=AE_RECUR) )
2199     return ConstrainRecursion(rgt,lft);
2200 tim 2440
2201 gezelter 3057 if( (rgt->type!=AE_RECUR) && (lft->type==AE_RECUR) )
2202     return ConstrainRecursion(lft,rgt);
2203 tim 2440
2204 gezelter 3057 order = OrderAtomExpr(lft,rgt);
2205     if( order > 0 )
2206     {
2207     expr = lft;
2208     lft = rgt;
2209     rgt = expr;
2210     }
2211 tim 2440
2212 gezelter 3057 if( lft->type == AE_ANDHI )
2213     {
2214     expr = AndAtomExpr(lft->bin.rgt,rgt);
2215     expr = AndAtomExpr(lft->bin.lft,expr);
2216     lft->bin.lft = (AtomExpr*)0;
2217     lft->bin.rgt = (AtomExpr*)0;
2218     FreeAtomExpr(lft);
2219     return expr;
2220     }
2221 tim 2440
2222 gezelter 3057 if( rgt->type == AE_ANDHI )
2223     {
2224     if( OrderAtomExpr(lft,rgt->bin.lft) > 0 )
2225     {
2226     expr = AndAtomExpr(lft,rgt->bin.rgt);
2227     expr = AndAtomExpr(rgt->bin.lft,expr);
2228     rgt->bin.lft = (AtomExpr*)0;
2229     rgt->bin.rgt = (AtomExpr*)0;
2230     FreeAtomExpr(rgt);
2231     return expr;
2232     }
2233 tim 2440
2234 gezelter 3057 if( EqualAtomExpr(lft,rgt->bin.lft) )
2235     {
2236     FreeAtomExpr(lft);
2237     return rgt;
2238     }
2239     }
2240 tim 2440
2241 gezelter 3057 return AndAtomExprLeaf(lft,rgt);
2242     }
2243 tim 2440
2244 gezelter 3057 static AtomExpr *OrAtomExprLeaf( AtomExpr *lft, AtomExpr *rgt )
2245     {
2246     return BuildAtomBin(AE_OR,lft,rgt);
2247     }
2248 tim 2440
2249 gezelter 3057 static AtomExpr *OrAtomExpr( AtomExpr *lft, AtomExpr *rgt )
2250     {
2251     register AtomExpr *expr;
2252     register int order;
2253 tim 2440
2254 gezelter 3057 /* Identities */
2255     if( EqualAtomExpr(lft,rgt) )
2256     {
2257     FreeAtomExpr(rgt);
2258     return lft;
2259     }
2260 tim 2440
2261 gezelter 3057 if( (lft->type==AE_LEAF) && (lft->leaf.prop==AL_CONST) )
2262     {
2263     if( lft->leaf.value )
2264     {
2265     FreeAtomExpr(rgt);
2266     return lft;
2267     }
2268     else
2269     {
2270     FreeAtomExpr(lft);
2271     return rgt;
2272     }
2273     }
2274 tim 2440
2275 gezelter 3057 if( (rgt->type==AE_LEAF) && (rgt->leaf.prop==AL_CONST) )
2276     {
2277     if( rgt->leaf.value )
2278     {
2279     FreeAtomExpr(lft);
2280     return rgt;
2281     }
2282     else
2283     {
2284     FreeAtomExpr(rgt);
2285     return lft;
2286     }
2287     }
2288 tim 2440
2289 gezelter 3057 order = OrderAtomExpr(lft,rgt);
2290     if( order > 0 )
2291     {
2292     expr = lft;
2293     lft = rgt;
2294     rgt = expr;
2295     }
2296 tim 2440
2297 gezelter 3057 if( lft->type == AE_OR )
2298     {
2299     expr = OrAtomExpr(lft->bin.rgt,rgt);
2300     expr = OrAtomExpr(lft->bin.lft,expr);
2301     lft->bin.lft = (AtomExpr*)0;
2302     lft->bin.rgt = (AtomExpr*)0;
2303     FreeAtomExpr(lft);
2304     return expr;
2305     }
2306 tim 2440
2307 gezelter 3057 if( rgt->type == AE_OR )
2308     {
2309     if( OrderAtomExpr(lft,rgt->bin.lft) > 0 )
2310     {
2311     expr = OrAtomExpr(lft,rgt->bin.rgt);
2312     expr = OrAtomExpr(rgt->bin.lft,expr);
2313     rgt->bin.lft = (AtomExpr*)0;
2314     rgt->bin.rgt = (AtomExpr*)0;
2315     FreeAtomExpr(rgt);
2316     return expr;
2317     }
2318 tim 2440
2319 gezelter 3057 if( EqualAtomExpr(lft,rgt->bin.lft) )
2320     {
2321     FreeAtomExpr(lft);
2322     return rgt;
2323     }
2324     }
2325 tim 2440
2326 gezelter 3057 return OrAtomExprLeaf(lft,rgt);
2327     }
2328 tim 2440
2329 gezelter 3057 static AtomExpr *NotAtomExpr( AtomExpr *expr )
2330     {
2331     register AtomExpr *result;
2332     register AtomExpr *lft;
2333     register AtomExpr *rgt;
2334 tim 2440
2335 gezelter 3057 if( expr->type == AE_LEAF )
2336     {
2337     if( IsBooleanAtomLeaf(expr) )
2338     {
2339     expr->leaf.value = !expr->leaf.value;
2340     return expr;
2341     }
2342     else if( IsNegatingAtomLeaf(expr) )
2343     {
2344     if( expr->leaf.value == -1 )
2345     {
2346     expr->leaf.value = 0;
2347     return expr;
2348     }
2349     else if( expr->leaf.value == 0 )
2350     {
2351     expr->leaf.value = -1;
2352     return expr;
2353     }
2354     }
2355     }
2356     else if( expr->type == AE_NOT )
2357     {
2358     result = expr->mon.arg;
2359     expr->mon.arg = (AtomExpr*)0;
2360     FreeAtomExpr(expr);
2361     return result;
2362     }
2363     else if( (expr->type==AE_ANDHI) ||
2364     (expr->type==AE_ANDLO) )
2365     {
2366     lft = NotAtomExpr(expr->bin.lft);
2367     rgt = NotAtomExpr(expr->bin.rgt);
2368     expr->bin.lft = (AtomExpr*)0;
2369     expr->bin.rgt = (AtomExpr*)0;
2370     FreeAtomExpr(expr);
2371     return OrAtomExpr(lft,rgt);
2372     }
2373     else if( expr->type == AE_OR )
2374     {
2375     lft = NotAtomExpr(expr->bin.lft);
2376     rgt = NotAtomExpr(expr->bin.rgt);
2377     expr->bin.lft = (AtomExpr*)0;
2378     expr->bin.rgt = (AtomExpr*)0;
2379     FreeAtomExpr(expr);
2380     return AndAtomExpr(lft,rgt);
2381     }
2382     return BuildAtomNot(expr);
2383     }
2384 tim 2440
2385 gezelter 3057 /*==============================*/
2386     /* Canonical Bond Expressions */
2387     /*==============================*/
2388 tim 2440
2389 gezelter 3057 static int GetBondLeafIndex( BondExpr *expr )
2390     {
2391     if( expr->leaf.prop == BL_CONST )
2392     {
2393     if( expr->leaf.value )
2394     {
2395     return( BS_ALL );
2396     }
2397     else
2398     return( 0 );
2399     }
2400     else /* expr->leaf.prop == BL_TYPE */
2401     switch( expr->leaf.value )
2402 tim 2440 {
2403 gezelter 3057 case(BT_SINGLE): return( BS_SINGLE );
2404     case(BT_DOUBLE): return( BS_DOUBLE );
2405     case(BT_TRIPLE): return( BS_TRIPLE );
2406     case(BT_AROM): return( BS_AROM );
2407     case(BT_UP): return( BS_UP );
2408     case(BT_DOWN): return( BS_DOWN );
2409     case(BT_UPUNSPEC): return( BS_UPUNSPEC );
2410     case(BT_DOWNUNSPEC): return( BS_DOWNUNSPEC );
2411     case(BT_RING): return( BS_RING );
2412 tim 2440 }
2413 gezelter 3057 return 0;
2414     }
2415 tim 2440
2416 gezelter 3057 static int GetBondExprIndex( BondExpr *expr )
2417     {
2418     register int lft,rgt;
2419     register int arg;
2420 tim 2440
2421 gezelter 3057 switch( expr->type )
2422     {
2423     case(BE_LEAF): return GetBondLeafIndex(expr);
2424 tim 2440
2425 gezelter 3057 case(BE_NOT): arg = GetBondExprIndex(expr->mon.arg);
2426     return( arg ^ BS_ALL );
2427 tim 2440
2428 gezelter 3057 case(BE_ANDHI):
2429     case(BE_ANDLO): lft = GetBondExprIndex(expr->bin.lft);
2430     rgt = GetBondExprIndex(expr->bin.rgt);
2431     return( lft & rgt );
2432 tim 2440
2433 gezelter 3057 case(BE_OR): lft = GetBondExprIndex(expr->bin.lft);
2434     rgt = GetBondExprIndex(expr->bin.rgt);
2435     return( lft | rgt );
2436     }
2437     /* Avoid Compiler Warning */
2438     return 0;
2439     }
2440 tim 2440
2441 gezelter 3057 static BondExpr *NotBondExpr( BondExpr *expr )
2442     {
2443     register BondExpr *result;
2444 tim 2440
2445 gezelter 3057 if( expr->type == BE_LEAF )
2446     {
2447     if( expr->leaf.prop == BL_CONST )
2448     {
2449     expr->leaf.value = !expr->leaf.value;
2450     return expr;
2451     }
2452     }
2453     else if( expr->type == BE_NOT )
2454     {
2455     result = expr->mon.arg;
2456     expr->mon.arg = (BondExpr*)0;
2457     FreeBondExpr(expr);
2458     return result;
2459     }
2460     return BuildBondNot(expr);
2461     }
2462 tim 2440
2463 gezelter 3057 static BondExpr *TransformBondExpr( BondExpr *expr )
2464     {
2465     register BondExpr *lft,*rgt;
2466     register BondExpr *arg;
2467 tim 2440
2468 gezelter 3057 if( expr->type == BE_LEAF )
2469     {
2470     return expr;
2471     }
2472     else if( expr->type == BE_NOT )
2473     {
2474     arg = expr->mon.arg;
2475     arg = TransformBondExpr(arg);
2476     expr->mon.arg = (BondExpr*)0;
2477     FreeBondExpr(expr);
2478     return NotBondExpr(arg);
2479     }
2480     else if( expr->type == BE_ANDHI )
2481     {
2482     lft = expr->bin.lft;
2483     rgt = expr->bin.rgt;
2484     lft = TransformBondExpr(lft);
2485     rgt = TransformBondExpr(rgt);
2486     expr->bin.lft = lft;
2487     expr->bin.rgt = rgt;
2488     return expr;
2489     }
2490     else if( expr->type == BE_ANDLO )
2491     {
2492     lft = expr->bin.lft;
2493     rgt = expr->bin.rgt;
2494     lft = TransformBondExpr(lft);
2495     rgt = TransformBondExpr(rgt);
2496     expr->bin.lft = lft;
2497     expr->bin.rgt = rgt;
2498     return expr;
2499     }
2500     else if( expr->type == BE_OR )
2501     {
2502     lft = expr->bin.lft;
2503     rgt = expr->bin.rgt;
2504     lft = TransformBondExpr(lft);
2505     rgt = TransformBondExpr(rgt);
2506     expr->bin.lft = lft;
2507     expr->bin.rgt = rgt;
2508     return expr;
2509     }
2510     return expr;
2511     }
2512 tim 2440
2513     #ifdef FOO
2514 gezelter 3057 static BondExpr *CanonicaliseBond( BondExpr *expr )
2515     {
2516 tim 2440 #ifndef ORIG
2517 gezelter 3057 register int index;
2518 tim 2440
2519 gezelter 3057 index = GetBondExprIndex(expr);
2520     FreeBondExpr(expr);
2521 tim 2440
2522 gezelter 3057 LexPtr = CanBondExpr[index];
2523     if( *LexPtr )
2524     {
2525     expr = ParseBondExpr(0);
2526     }
2527     else
2528     expr = GenerateDefaultBond();
2529 tim 2440 #endif
2530    
2531 gezelter 3057 return TransformBondExpr(expr);
2532     }
2533 tim 2440 #endif
2534    
2535    
2536 gezelter 3057 //**********************************
2537     //********Pattern Matching**********
2538     //**********************************
2539 tim 2440
2540 gezelter 3057 bool OBSmartsPattern::Init(const char *buffer)
2541     {
2542     strncpy(Buffer,buffer, sizeof(Buffer) - 1);
2543     Buffer[sizeof(Buffer) - 1] = '\0';
2544 tim 2440
2545 gezelter 3057 _pat = ParseSMARTSRecord(Buffer);
2546     _str = buffer;
2547 tim 2440
2548 gezelter 3057 return(_pat != (Pattern*)NULL);
2549     }
2550 tim 2440
2551 gezelter 3057 bool OBSmartsPattern::Init(const std::string &s)
2552     {
2553     strncpy(Buffer, s.c_str(), sizeof(Buffer) - 1);
2554     Buffer[sizeof(Buffer) - 1] = '\0';
2555 tim 2440
2556 gezelter 3057 _pat = ParseSMARTSRecord(Buffer);
2557     _str = s;
2558 tim 2440
2559 gezelter 3057 return(_pat != (Pattern*)NULL);
2560     }
2561 tim 2440
2562 gezelter 3057 OBSmartsPattern::~OBSmartsPattern()
2563     {
2564     if (_pat)
2565     FreePattern(_pat);
2566     }
2567 tim 2440
2568 gezelter 3057 bool OBSmartsPattern::Match(OBMol &mol,bool single)
2569     {
2570     RSCACHE.clear();
2571     return(match(mol,_pat,_mlist,single));
2572     }
2573 tim 2440
2574 gezelter 3057 bool OBSmartsPattern::RestrictedMatch(OBMol &mol,std::vector<std::pair<int,int> > &pr,bool single)
2575     {
2576     bool ok;
2577     std::vector<std::vector<int> > mlist;
2578     std::vector<std::vector<int> >::iterator i;
2579     std::vector<std::pair<int,int> >::iterator j;
2580 tim 2440
2581 gezelter 3057 RSCACHE.clear();
2582     match(mol,_pat,mlist);
2583     _mlist.clear();
2584     if (mlist.empty())
2585     return(false);
2586 tim 2440
2587 gezelter 3057 for (i = mlist.begin();i != mlist.end();i++)
2588     {
2589     ok = true;
2590     for (j = pr.begin();j != pr.end() && ok;j++)
2591     if ((*i)[j->first] != j->second)
2592     ok = false;
2593 tim 2440
2594 gezelter 3057 if (ok)
2595     _mlist.push_back(*i);
2596     if (single && !_mlist.empty())
2597     return(true);
2598     }
2599 tim 2440
2600 gezelter 3057 return((_mlist.empty()) ? false:true);
2601     }
2602 tim 2440
2603 gezelter 3057 bool OBSmartsPattern::RestrictedMatch(OBMol &mol,OBBitVec &vres,bool single)
2604     {
2605     bool ok;
2606     std::vector<int>::iterator j;
2607     std::vector<std::vector<int> > mlist;
2608     std::vector<std::vector<int> >::iterator i;
2609 tim 2440
2610 gezelter 3057 RSCACHE.clear();
2611     match(mol,_pat,mlist);
2612 tim 2440
2613 gezelter 3057 _mlist.clear();
2614     if (mlist.empty())
2615     return(false);
2616 tim 2440
2617 gezelter 3057 for (i = mlist.begin();i != mlist.end();i++)
2618     {
2619     ok = true;
2620     for (j = i->begin();j != i->end();j++)
2621     if (!vres[*j])
2622     {
2623     ok = false;
2624     break;
2625     }
2626     if (!ok)
2627     continue;
2628 tim 2440
2629 gezelter 3057 _mlist.push_back(*i);
2630     if (single && !_mlist.empty())
2631     return(true);
2632     }
2633 tim 2440
2634 gezelter 3057 return((_mlist.empty()) ? false:true);
2635     }
2636 tim 2440
2637 gezelter 3057 void SetupAtomMatchTable(std::vector<std::vector<bool> > &ttab,Pattern *pat,OBMol &mol)
2638     {
2639     int i;
2640 tim 2440
2641 gezelter 3057 ttab.resize(pat->acount);
2642     for (i = 0;i < pat->acount;i++)
2643     ttab[i].resize(mol.NumAtoms()+1);
2644 tim 2440
2645 gezelter 3057 OBAtom *atom;
2646     std::vector<OBNodeBase*>::iterator j;
2647     for (i = 0;i < pat->acount;i++)
2648     for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
2649     if (EvalAtomExpr(pat->atom[0].expr,atom))
2650     ttab[i][atom->GetIdx()] = true;
2651     }
2652 tim 2440
2653 gezelter 3057 static void FastSingleMatch(OBMol &mol,Pattern *pat,std::vector<std::vector<int> > &mlist)
2654     {
2655     OBAtom *atom,*a1,*nbr;
2656     std::vector<OBNodeBase*>::iterator i;
2657 tim 2440
2658 gezelter 3057 OBBitVec bv(mol.NumAtoms()+1);
2659     std::vector<int> map;
2660     map.resize(pat->acount);
2661     std::vector<std::vector<OBEdgeBase*>::iterator> vi;
2662     std::vector<bool> vif;
2663 tim 2440
2664 gezelter 3057 if (pat->bcount)
2665     {
2666     vif.resize(pat->bcount);
2667     vi.resize(pat->bcount);
2668     }
2669 tim 2440
2670 gezelter 3057 int bcount;
2671     for (atom = mol.BeginAtom(i);atom;atom=mol.NextAtom(i))
2672     if (EvalAtomExpr(pat->atom[0].expr,atom))
2673     {
2674     map[0] = atom->GetIdx();
2675     if (pat->bcount)
2676     vif[0] = false;
2677     bv.Clear();
2678     bv.SetBitOn(atom->GetIdx());
2679 tim 2440
2680 gezelter 3057 for (bcount=0;bcount >=0;)
2681     {
2682     //***entire pattern matched***
2683     if (bcount == pat->bcount) //save full match here
2684     {
2685     mlist.push_back(map);
2686     bcount--;
2687     return; //found a single match
2688     }
2689 tim 2440
2690 gezelter 3057 //***match the next bond***
2691     if (!pat->bond[bcount].grow) //just check bond here
2692     {
2693     if ( !vif[bcount] )
2694     {
2695     OBBond *bond = mol.GetBond(map[pat->bond[bcount].src],
2696     map[pat->bond[bcount].dst]);
2697     if (bond && EvalBondExpr(pat->bond[bcount].expr,bond))
2698     {
2699     vif[bcount++] = true;
2700     if (bcount < pat->bcount)
2701     vif[bcount] = false;
2702     }
2703     else
2704     bcount--;
2705     }
2706     else //bond must have already been visited - backtrack
2707     bcount--;
2708     }
2709     else //need to map atom and check bond
2710     {
2711     a1 = mol.GetAtom(map[pat->bond[bcount].src]);
2712 tim 2440
2713 gezelter 3057 if (!vif[bcount]) //figure out which nbr atom we are mapping
2714     {
2715     nbr = a1->BeginNbrAtom(vi[bcount]);
2716     }
2717     else
2718     {
2719     bv.SetBitOff(map[pat->bond[bcount].dst]);
2720     nbr = a1->NextNbrAtom(vi[bcount]);
2721     }
2722 tim 2440
2723 gezelter 3057 for (;nbr;nbr=a1->NextNbrAtom(vi[bcount]))
2724     if (!bv[nbr->GetIdx()])
2725     if (EvalAtomExpr(pat->atom[pat->bond[bcount].dst].expr,nbr)
2726     && EvalBondExpr(pat->bond[bcount].expr,(OBBond *)*(vi[bcount])))
2727     {
2728     bv.SetBitOn(nbr->GetIdx());
2729     map[pat->bond[bcount].dst] = nbr->GetIdx();
2730     vif[bcount] = true;
2731     bcount++;
2732     if (bcount < pat->bcount)
2733     vif[bcount] = false;
2734     break;
2735     }
2736 tim 2440
2737 gezelter 3057 if (!nbr)//no match - time to backtrack
2738     bcount--;
2739     }
2740     }
2741     }
2742     }
2743 tim 2440
2744    
2745 gezelter 3057 static bool match(OBMol &mol,Pattern *pat,std::vector<std::vector<int> > &mlist,bool single)
2746     {
2747     mlist.clear();
2748     if (!pat || pat->acount == 0)
2749     return(false);//shouldn't ever happen
2750 tim 2440
2751 gezelter 3057 if (single && !pat->ischiral)
2752     FastSingleMatch(mol,pat,mlist);
2753     else
2754     {
2755     OBSSMatch ssm(mol,pat);
2756     ssm.Match(mlist);
2757     }
2758 tim 2440
2759 gezelter 3057 if (pat->ischiral && mol.Has3D())
2760     {
2761     int j,k,r1,r2,r3,r4;
2762     std::vector<std::vector<int> >::iterator m;
2763     OBAtom *ra1,*ra2,*ra3,*ra4;
2764     std::vector<std::vector<int> > tmpmlist;
2765 tim 2440
2766 gezelter 3057 for (j = 0;j < pat->acount;j++)
2767     if (pat->atom[j].chiral_flag)
2768     {
2769     r1 = r2 = r3 = r4 = -1;
2770     r2 = j;
2771     for (k = 0;k < pat->bcount;k++)
2772     if (pat->bond[k].dst == r2)
2773     if (r1 == -1)
2774     r1 = pat->bond[k].src;
2775     else if (r3 == -1)
2776     r3 = pat->bond[k].src;
2777     else if (r4 == -1)
2778     r4 = pat->bond[k].src;
2779 tim 2440
2780 gezelter 3057 for (k = 0;k < pat->bcount;k++)
2781     if (pat->bond[k].src == r2)
2782     if (r1 == -1)
2783     r1 = pat->bond[k].dst;
2784     else if (r3 == -1)
2785     r3 = pat->bond[k].dst;
2786     else if (r4 == -1)
2787     r4 = pat->bond[k].dst;
2788 tim 2440
2789 gezelter 3057 if (r1 == -1 || r2 == -1 || r3 == -1 || r4 == -1)
2790     continue;
2791 tim 2440
2792 gezelter 3057 tmpmlist.clear();
2793     for (m = mlist.begin();m != mlist.end();m++)
2794     {
2795     ra1 = mol.GetAtom((*m)[r1]);
2796     ra2 = mol.GetAtom((*m)[r2]);
2797     ra3 = mol.GetAtom((*m)[r3]);
2798     ra4 = mol.GetAtom((*m)[r4]);
2799     double sign = CalcTorsionAngle(ra1->GetVector(),
2800     ra2->GetVector(),
2801     ra3->GetVector(),
2802     ra4->GetVector());
2803     if (sign > 0.0 && pat->atom[j].chiral_flag == AL_ANTICLOCKWISE)
2804     continue;
2805     if (sign < 0.0 && pat->atom[j].chiral_flag == AL_CLOCKWISE)
2806     continue;
2807 tim 2440
2808 gezelter 3057 //ok - go ahead and save it
2809     tmpmlist.push_back(*m);
2810     }
2811     mlist = tmpmlist;
2812     }
2813     }
2814 tim 2440
2815 gezelter 3057 return(!mlist.empty());
2816     }
2817 tim 2440
2818     #define RECURSIVE
2819    
2820     #ifdef RECURSIVE
2821 gezelter 3057 static bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom)
2822     {
2823     for (;;)
2824     switch (expr->type)
2825     {
2826     case AE_LEAF:
2827     switch( expr->leaf.prop )
2828     {
2829     case AL_ELEM:
2830     return(expr->leaf.value == (int)atom->GetAtomicNum());
2831     case AL_AROM:
2832     if( !expr->leaf.value )
2833     return !atom->IsAromatic();
2834     return atom->IsAromatic();
2835     case AL_HCOUNT:
2836     return (expr->leaf.value==(signed int)atom->ExplicitHydrogenCount() + (signed int)atom->ImplicitHydrogenCount());
2837     case AL_DEGREE:
2838     return(expr->leaf.value == (int)atom->GetValence());
2839     case AL_VALENCE:
2840     return(expr->leaf.value == (int)atom->KBOSum());
2841     case AL_CONNECT:
2842     return(expr->leaf.value == (int)atom->GetImplicitValence());
2843     case AL_NEGATIVE:
2844     return(expr->leaf.value == -(atom->GetFormalCharge()));
2845     case AL_POSITIVE:
2846     return(expr->leaf.value == atom->GetFormalCharge());
2847     case AL_HYB:
2848     return(expr->leaf.value == (int)atom->GetHyb());
2849 tim 2440
2850 gezelter 3057 case AL_RINGS:
2851     if( expr->leaf.value == -1 )
2852     return atom->IsInRing();
2853     else if( expr->leaf.value == 0 )
2854     return !atom->IsInRing();
2855     else
2856     return expr->leaf.value == (int)atom->MemberOfRingCount();
2857 tim 2440
2858 gezelter 3057 case AL_SIZE:
2859     if( expr->leaf.value == -1 )
2860     return atom->IsInRing();
2861     if (!expr->leaf.value)
2862     return !atom->IsInRing();
2863     else
2864     return atom->IsInRingSize(expr->leaf.value);
2865 tim 2440
2866 gezelter 3057 case AL_IMPLICIT:
2867     return expr->leaf.value == (signed int)atom->ImplicitHydrogenCount();
2868 tim 2440
2869 gezelter 3057 case AL_CONST:
2870     if( !expr->leaf.value )
2871     return false;
2872     return(true);
2873    
2874     case AL_CHIRAL:
2875     if( expr->leaf.value == AL_CLOCKWISE)
2876     return atom->IsClockwise();
2877     else if ( expr->leaf.value == AL_ANTICLOCKWISE)
2878     return atom->IsAntiClockwise();
2879     else if ( expr->leaf.value == 0) // unspecified
2880     return (atom->IsChiral() && !atom->HasChiralitySpecified());
2881    
2882     default:
2883     return false;
2884     }
2885 tim 2440
2886 gezelter 3057 case AE_NOT:
2887     return(!EvalAtomExpr(expr->mon.arg,atom));
2888     case AE_ANDHI: /* Same as AE_ANDLO */
2889     case AE_ANDLO:
2890     if( !EvalAtomExpr(expr->bin.lft,atom))
2891     return(false);
2892     expr = expr->bin.rgt;
2893     break;
2894     case AE_OR:
2895     if(EvalAtomExpr(expr->bin.lft,atom))
2896     return(true);
2897     expr = expr->bin.rgt;
2898     break;
2899 tim 2440
2900 gezelter 3057 case AE_RECUR:
2901     {
2902     //see if pattern has been matched
2903     std::vector<std::pair<Pattern*,std::vector<bool> > >::iterator i;
2904     for (i = RSCACHE.begin();i != RSCACHE.end();i++)
2905     if (i->first == (Pattern*)expr->recur.recur)
2906     return(i->second[atom->GetIdx()]);
2907 tim 2440
2908 gezelter 3057 //perceive and match pattern
2909     std::vector<std::vector<int> >::iterator j;
2910     std::vector<bool> vb(((OBMol*) atom->GetParent())->NumAtoms()+1);
2911     std::vector<std::vector<int> > mlist;
2912     if (match( *((OBMol *) atom->GetParent()),
2913     (Pattern*)expr->recur.recur,mlist))
2914     for (j = mlist.begin();j != mlist.end();j++)
2915     vb[(*j)[0]] = true;
2916 tim 2440
2917 gezelter 3057 RSCACHE.push_back(std::pair<Pattern*,std::vector<bool> > ((Pattern*)expr->recur.recur,vb));
2918 tim 2440
2919 gezelter 3057 return(vb[atom->GetIdx()]);
2920     }
2921 tim 2440
2922 gezelter 3057 default:
2923     return(false);
2924     }
2925     }
2926 tim 2440
2927     #else
2928    
2929 gezelter 3057 static bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom)
2930     {
2931     int size=0;
2932 tim 2440 #define OB_EVAL_STACKSIZE 40
2933    
2934 gezelter 3057 AtomExpr *stack[OB_EVAL_STACKSIZE];
2935     memset(stack,'\0',sizeof(AtomExpr*)*OB_EVAL_STACKSIZE);
2936 tim 2440 #undef OB_EVAL_STACKSIZE
2937    
2938 gezelter 3057 bool lftest=true;
2939 tim 2440
2940 gezelter 3057 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
2941     {
2942     switch (expr->type)
2943     {
2944     case AE_LEAF:
2945     switch( expr->leaf.prop )
2946     {
2947     //expr->leaf.value
2948     case AL_ELEM:
2949     lftest = (expr->leaf.value == atom->GetAtomicNum());
2950     break;
2951     case AL_AROM:
2952     lftest = (expr->leaf.value == (int)atom->IsAromatic());
2953     break;
2954     case AL_HCOUNT:
2955     if (atom->ExplicitHydrogenCount() > atom->ImplicitHydrogenCount())
2956     lftest=(expr->leaf.value==atom->ExplicitHydrogenCount());
2957     else
2958     lftest=(expr->leaf.value==atom->ImplicitHydrogenCount());
2959     break;
2960     case AL_DEGREE:
2961     lftest = (expr->leaf.value == atom->GetHvyValence());
2962     break;
2963     case AL_VALENCE:
2964     lftest = (expr->leaf.value == atom->BOSum());
2965     break;
2966     case AL_CONNECT: //X
2967     lftest = (expr->leaf.value == atom->GetImplicitValence());
2968     break;
2969     case AL_NEGATIVE:
2970     lftest=(expr->leaf.value == -1*(atom->GetFormalCharge()));
2971     break;
2972     case AL_POSITIVE:
2973     lftest=(expr->leaf.value == atom->GetFormalCharge());
2974     break;
2975     case AL_HYB:
2976     lftest=(expr->leaf.value == atom->GetHyb());
2977     break;
2978     case AL_RINGS:
2979     if (expr->leaf.value == -1)
2980     lftest = (atom->IsInRing());
2981     else
2982     if (expr->leaf.value == 0)
2983     lftest = !(atom->IsInRing());
2984     else
2985     lftest=(atom->MemberOfRingCount()==expr->leaf.value);
2986     break;
2987     case AL_SIZE:
2988     if (!expr->leaf.value)
2989     lftest = !atom->IsInRing();
2990     else
2991     lftest = atom->IsInRingSize(expr->leaf.value);
2992     break;
2993 tim 2440
2994 gezelter 3057 case AL_IMPLICIT:
2995     lftest=(expr->leaf.value==atom->ImplicitHydrogenCount());
2996     break;
2997     case AL_CONST:
2998     lftest= true; // not limited to non-hydrogens
2999     break;
3000     case AL_MASS:
3001     break;
3002     default:
3003     break;
3004     }
3005     size--;
3006     break;
3007 tim 2440
3008 gezelter 3057 case AE_ANDHI:
3009 tim 2440
3010 gezelter 3057 if (stack[size+1] == expr->bin.rgt)
3011     size--;
3012     else if (stack[size+1] == expr->bin.lft)
3013     {
3014     if (lftest)
3015     {
3016     size++;
3017     stack[size] = expr->bin.rgt;
3018     }
3019     else
3020     size--;
3021     }
3022     else
3023     {
3024     size++;
3025     stack[size] = expr->bin.lft;
3026     }
3027     break;
3028 tim 2440
3029 gezelter 3057 case AE_OR:
3030 tim 2440
3031 gezelter 3057 if (stack[size+1] == expr->bin.rgt)
3032     size--;
3033     else if (stack[size+1] == expr->bin.lft)
3034     {
3035     if (!lftest)
3036     {
3037     size++;
3038     stack[size] = expr->bin.rgt;
3039     }
3040     else
3041     size--;
3042     }
3043     else
3044     {
3045     size++;
3046     stack[size] = expr->bin.lft;
3047     }
3048     break;
3049 tim 2440
3050 gezelter 3057 case AE_ANDLO:
3051 tim 2440
3052 gezelter 3057 if (stack[size+1] == expr->bin.rgt)
3053     size--;
3054     else if (stack[size+1] == expr->bin.lft)
3055     {
3056     if (lftest)
3057     {
3058     size++;
3059     stack[size] = expr->bin.rgt;
3060     }
3061     else
3062     size--;
3063     }
3064     else
3065     {
3066     size++;
3067     stack[size] = expr->bin.lft;
3068     }
3069     break;
3070 tim 2440
3071 gezelter 3057 case AE_NOT:
3072     if (stack[size+1] != expr->mon.arg)
3073     {
3074     size++;
3075     stack[size] = expr->mon.arg;
3076     }
3077     else
3078     {
3079     lftest = !lftest;
3080     size--;
3081     }
3082     break;
3083 tim 2440
3084 gezelter 3057 case AE_RECUR:
3085     //see if pattern has been matched
3086     bool matched=false;
3087 tim 2440
3088 gezelter 3057 std::vector<std::pair<Pattern*,std::vector<bool> > >::iterator i;
3089     for (i = RSCACHE.begin();i != RSCACHE.end();i++)
3090     if (i->first == (Pattern*)expr->recur.recur)
3091 tim 2440 {
3092 gezelter 3057 lftest = i->second[atom->GetIdx()];
3093     matched = true;
3094     break;
3095 tim 2440 }
3096    
3097 gezelter 3057 if (!matched)
3098     {
3099     std::vector<bool> vb(atom->GetParent()->NumAtoms()+1);
3100     std::vector<std::vector<int> > mlist;
3101     lftest = false;
3102     if (match((*atom->GetParent()),(Pattern*)expr->recur.recur,mlist))
3103     {
3104     std::vector<std::vector<int> >::iterator i;
3105     for (i = mlist.begin();i != mlist.end();i++)
3106     {
3107     if ((*i)[0] == atom->GetIdx())
3108     lftest = true;
3109     vb[(*i)[0]] = true;
3110     }
3111     }
3112     RSCACHE.push_back(std::pair<Pattern*,std::vector<bool> > ((Pattern*)expr->recur.recur,vb));
3113     }
3114    
3115     size--;
3116     break;
3117     }
3118     }
3119 tim 2440
3120 gezelter 3057 return(lftest);
3121     }
3122 tim 2440 #endif
3123    
3124     #ifdef RECURSIVE
3125    
3126 gezelter 3057 static bool EvalBondExpr(BondExpr *expr,OBBond *bond)
3127     {
3128     for (;;)
3129     switch( expr->type )
3130     {
3131     case BE_LEAF:
3132 tim 2440
3133 gezelter 3057 if( expr->leaf.prop == BL_CONST )
3134     return((expr->leaf.value != 0) ? true : false);
3135     else
3136     switch( expr->leaf.value )
3137     {
3138     case BT_SINGLE:
3139     return(bond->GetBO() == 1 && !bond->IsAromatic());
3140     case BT_AROM:
3141     return(bond->IsAromatic());
3142     case BT_DOUBLE:
3143     return(bond->GetBO()==2 && !bond->IsAromatic());
3144     case BT_TRIPLE:
3145     return(bond->GetBO()==3);
3146     case BT_RING:
3147     return(bond->IsInRing());
3148     case BT_UP:
3149     return(bond->IsUp());
3150     case BT_DOWN:
3151     return(bond->IsDown());
3152     case BT_UPUNSPEC: // up or unspecified (i.e., not down)
3153     return(!bond->IsDown());
3154     case BT_DOWNUNSPEC: // down or unspecified (i.e., not up)
3155     return(!bond->IsUp());
3156     default:
3157     return(false);
3158     }
3159 tim 2440
3160    
3161 gezelter 3057 case BE_NOT:
3162     return(!EvalBondExpr(expr->mon.arg,bond));
3163     case BE_ANDHI:
3164     case BE_ANDLO:
3165     if (!EvalBondExpr(expr->bin.lft,bond))
3166     return(false);
3167     expr = expr->bin.rgt;
3168     break;
3169 tim 2440
3170 gezelter 3057 case BE_OR:
3171     if (EvalBondExpr(expr->bin.lft,bond))
3172     return(true);
3173     expr = expr->bin.rgt;
3174     break;
3175     default:
3176     return false;
3177     }
3178     }
3179 tim 2440
3180     #else
3181    
3182 gezelter 3057 static bool EvalBondExpr(BondExpr *expr,OBBond *bond)
3183     {
3184     int size=0;
3185 tim 2440 #define OB_EVAL_STACKSIZE 40
3186    
3187 gezelter 3057 BondExpr *stack[OB_EVAL_STACKSIZE];
3188     memset(stack,'\0',sizeof(AtomExpr*)*OB_EVAL_STACKSIZE);
3189 tim 2440 #undef OB_EVAL_STACKSIZE
3190    
3191 gezelter 3057 bool lftest=true;
3192     for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3193     switch( expr->type )
3194     {
3195     case(BE_LEAF):
3196 tim 2440
3197 gezelter 3057 if( expr->leaf.prop == BL_CONST )
3198     lftest = (expr->leaf.value)?true:false;
3199     else /* expr->leaf.prop == BL_TYPE */
3200     switch( expr->leaf.value )
3201     {
3202     case(BT_SINGLE):
3203     lftest = (bond->GetBO() == 1 && !bond->IsAromatic());
3204     break;
3205     case(BT_DOUBLE):
3206     lftest = (bond->GetBO()==2 && !bond->IsAromatic());
3207     break;
3208     case(BT_TRIPLE):
3209     lftest = (bond->GetBO()==3);
3210     break;
3211     case(BT_AROM): lftest=bond->IsAromatic();
3212     break;
3213     case(BT_RING): lftest=bond->IsInRing();
3214     break;
3215     case(BT_UP): lftest= (bond->IsUp() && bond->GetBO()==1 && !bond->IsAromatic());
3216     break;
3217     case(BT_DOWN): lftest= (bond->IsDown() && bond->GetBO()==1 && !bond->IsAromatic());
3218     break;
3219     case(BT_UPUNSPEC): lftest= !bond->IsDown();
3220     break;
3221     case(BT_DOWNUNSPEC): lftest= !bond->IsUp();
3222     break;
3223     }
3224     size--;
3225     break;
3226 tim 2440
3227 gezelter 3057 case(BE_NOT):
3228     if (stack[size+1] != expr->mon.arg)
3229     {
3230     size++;
3231     stack[size] = expr->mon.arg;
3232     }
3233     else
3234     {
3235     lftest = !lftest;
3236     size--;
3237     }
3238     break;
3239 tim 2440
3240 gezelter 3057 case(BE_ANDHI):
3241     if (stack[size+1] == expr->bin.rgt)
3242     size--;
3243     else if (stack[size+1] == expr->bin.lft)
3244     {
3245     if (lftest)
3246     {
3247     size++;
3248     stack[size] = expr->bin.rgt;
3249     }
3250     else
3251     size--;
3252     }
3253     else
3254     {
3255     size++;
3256     stack[size] = expr->bin.lft;
3257     }
3258     break;
3259 tim 2440
3260 gezelter 3057 case(BE_ANDLO):
3261     if (stack[size+1] == expr->bin.rgt)
3262     size--;
3263     else if (stack[size+1] == expr->bin.lft)
3264     {
3265     if (lftest)
3266     {
3267     size++;
3268     stack[size] = expr->bin.rgt;
3269     }
3270     else
3271     size--;
3272     }
3273     else
3274     {
3275     size++;
3276     stack[size] = expr->bin.lft;
3277     }
3278     break;
3279 tim 2440
3280 gezelter 3057 case(BE_OR):
3281     if (stack[size+1] == expr->bin.rgt)
3282     size--;
3283     else if (stack[size+1] == expr->bin.lft)
3284     {
3285     if (!lftest)
3286     {
3287     size++;
3288     stack[size] = expr->bin.rgt;
3289     }
3290     else
3291     size--;
3292     }
3293     else
3294     {
3295     size++;
3296     stack[size] = expr->bin.lft;
3297     }
3298     break;
3299     }
3300     return(lftest);
3301     }
3302 tim 2440 #endif
3303    
3304 gezelter 3057 std::vector<std::vector<int> > &OBSmartsPattern::GetUMapList()
3305     {
3306     if (_mlist.empty() || _mlist.size() == 1)
3307     return(_mlist);
3308 tim 2440
3309 gezelter 3057 bool ok;
3310     OBBitVec bv;
3311     std::vector<OBBitVec> vbv;
3312     std::vector<std::vector<int> > mlist;
3313     std::vector<std::vector<int> >::iterator i;
3314     std::vector<OBBitVec>::iterator j;
3315 tim 2440
3316 gezelter 3057 for (i = _mlist.begin();i != _mlist.end();i++)
3317     {
3318     ok = true;
3319     bv.Clear();
3320     bv.FromVecInt(*i);
3321     for (j = vbv.begin();j != vbv.end() && ok;j++)
3322     if ((*j) == bv)
3323     ok = false;
3324 tim 2440
3325 gezelter 3057 if (ok)
3326     {
3327     mlist.push_back(*i);
3328     vbv.push_back(bv);
3329     }
3330     }
3331 tim 2440
3332 gezelter 3057 _mlist = mlist;
3333     return(_mlist);
3334     }
3335 tim 2440
3336 gezelter 3057 void OBSmartsPattern::WriteMapList(ostream &ofs)
3337     {
3338     std::vector<std::vector<int> >::iterator i;
3339     std::vector<int>::iterator j;
3340 tim 2440
3341 gezelter 3057 for ( i = _mlist.begin() ; i != _mlist.end() ; i++ )
3342     {
3343     for (j = (*i).begin();j != (*i).end();j++)
3344     ofs << *j << ' ' << ends;
3345     ofs << endl;
3346     }
3347     }
3348 tim 2440
3349 gezelter 3057 //*******************************************************************
3350     // The OBSSMatch class performs exhaustive matching using recursion
3351     // Explicit stack handling is used to find just a single match in
3352     // match()
3353     //*******************************************************************
3354 tim 2440
3355 gezelter 3057 OBSSMatch::OBSSMatch(OBMol &mol,Pattern *pat)
3356     {
3357     _mol = &mol;
3358     _pat = pat;
3359     _map.resize(pat->acount);
3360 tim 2440
3361 gezelter 3057 if (!mol.Empty())
3362     {
3363     _uatoms = new bool [mol.NumAtoms()+1];
3364     memset((char*)_uatoms,'\0',sizeof(bool)*(mol.NumAtoms()+1));
3365     }
3366     else
3367     _uatoms = (bool*)NULL;
3368     }
3369 tim 2440
3370 gezelter 3057 OBSSMatch::~OBSSMatch()
3371     {
3372     if (_uatoms)
3373     delete [] _uatoms;
3374     }
3375 tim 2440
3376 gezelter 3057 void OBSSMatch::Match(std::vector<std::vector<int> > &mlist,int bidx)
3377     {
3378     if (bidx == -1)
3379     {
3380     OBAtom *atom;
3381     std::vector<OBNodeBase*>::iterator i;
3382     for (atom = _mol->BeginAtom(i);atom;atom = _mol->NextAtom(i))
3383     if (EvalAtomExpr(_pat->atom[0].expr,atom))
3384     {
3385     _map[0] = atom->GetIdx();
3386     _uatoms[atom->GetIdx()] = true;
3387     Match(mlist,0);
3388     _map[0] = 0;
3389     _uatoms[atom->GetIdx()] = false;
3390     }
3391     return;
3392     }
3393 tim 2440
3394 gezelter 3057 if (bidx == _pat->bcount) //save full match here
3395     {
3396     mlist.push_back(_map);
3397     return;
3398     }
3399 tim 2440
3400 gezelter 3057 if (_pat->bond[bidx].grow) //match the next bond
3401     {
3402     int src,dst;
3403     src = _pat->bond[bidx].src;
3404     dst = _pat->bond[bidx].dst;
3405 tim 2440
3406 gezelter 3057 if (_map[src] <= 0 || _map[src] > _mol->NumAtoms())
3407     return;
3408 tim 2440
3409 gezelter 3057 AtomExpr *aexpr = _pat->atom[dst].expr;
3410     BondExpr *bexpr = _pat->bond[bidx].expr;
3411     OBAtom *atom,*nbr;
3412     std::vector<OBEdgeBase*>::iterator i;
3413 tim 2440
3414 gezelter 3057 atom = _mol->GetAtom(_map[src]);
3415     for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
3416     if (!_uatoms[nbr->GetIdx()] && EvalAtomExpr(aexpr,nbr) &&
3417     EvalBondExpr(bexpr,((OBBond*) *i)))
3418     {
3419     _map[dst] = nbr->GetIdx();
3420     _uatoms[nbr->GetIdx()] = true;
3421     Match(mlist,bidx+1);
3422     _uatoms[nbr->GetIdx()] = false;
3423     _map[dst] = 0;
3424     }
3425     }
3426     else //just check bond here
3427 tim 2440 {
3428 gezelter 3057 OBBond *bond = _mol->GetBond(_map[_pat->bond[bidx].src],
3429     _map[_pat->bond[bidx].dst]);
3430     if (bond && EvalBondExpr(_pat->bond[bidx].expr,bond))
3431     Match(mlist,bidx+1);
3432 tim 2440 }
3433 gezelter 3057 }
3434 tim 2440
3435 gezelter 3057 static int GetExprOrder(BondExpr *expr)
3436     {
3437     int size=0;
3438     BondExpr *stack[15];
3439     memset(stack,'\0',sizeof(AtomExpr*)*15);
3440     bool lftest=true;
3441 tim 2440
3442 gezelter 3057 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3443     switch( expr->type )
3444 tim 2440 {
3445 gezelter 3057 case(BE_LEAF):
3446    
3447     if( expr->leaf.prop == BL_CONST )
3448     lftest = true;
3449     else /* expr->leaf.prop == BL_TYPE */
3450     switch( expr->leaf.value )
3451     {
3452     case(BT_SINGLE): return(1);
3453     case(BT_DOUBLE): return(2);
3454     case(BT_TRIPLE): return(3);
3455     case(BT_AROM): return(5);
3456     default:
3457     lftest = true;
3458     }
3459     size--;
3460     break;
3461    
3462     case(BE_NOT): return(0);
3463     case(BE_ANDHI):
3464     case(BE_ANDLO):
3465     case(BE_OR):
3466     if (stack[size+1] == expr->bin.rgt)
3467     size--;
3468     else if (stack[size+1] == expr->bin.lft)
3469 tim 2440 {
3470 gezelter 3057 if (lftest)
3471 tim 2440 {
3472 gezelter 3057 size++;
3473     stack[size] = expr->bin.rgt;
3474 tim 2440 }
3475 gezelter 3057 else
3476     size--;
3477 tim 2440 }
3478 gezelter 3057 else
3479 tim 2440 {
3480 gezelter 3057 size++;
3481     stack[size] = expr->bin.lft;
3482 tim 2440 }
3483 gezelter 3057 break;
3484 tim 2440 }
3485    
3486 gezelter 3057 return(0);
3487     }
3488 tim 2440
3489 gezelter 3057 int OBSmartsPattern::GetCharge(int idx)
3490     {
3491     AtomExpr *expr = _pat->atom[idx].expr;
3492 tim 2440
3493 gezelter 3057 int size=0;
3494     AtomExpr *stack[15];
3495     memset(stack,'\0',sizeof(AtomExpr*)*15);
3496     bool lftest=true;
3497 tim 2440
3498 gezelter 3057 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3499     {
3500     switch (expr->type)
3501     {
3502     case AE_LEAF:
3503     switch( expr->leaf.prop )
3504     {
3505     case AL_NEGATIVE:
3506     return(-1*(int)expr->leaf.value);
3507     case AL_POSITIVE:
3508     return((int)expr->leaf.value);
3509     default:
3510     lftest=true;
3511     }
3512     size--;
3513     break;
3514 tim 2440
3515 gezelter 3057 case AE_OR:
3516     case AE_ANDHI:
3517     case AE_ANDLO:
3518 tim 2440
3519 gezelter 3057 if (stack[size+1] == expr->bin.rgt)
3520     size--;
3521     else if (stack[size+1] == expr->bin.lft)
3522     {
3523     if (lftest)
3524     {
3525     size++;
3526     stack[size] = expr->bin.rgt;
3527     }
3528     else
3529     size--;
3530     }
3531     else
3532     {
3533     size++;
3534     stack[size] = expr->bin.lft;
3535     }
3536     break;
3537 tim 2440
3538 gezelter 3057 case AE_NOT:
3539     return(0);
3540     case AE_RECUR:
3541     return(0);
3542     }
3543     }
3544 tim 2440
3545 gezelter 3057 return(0);
3546     }
3547 tim 2440
3548 gezelter 3057 int OBSmartsPattern::GetAtomicNum(int idx)
3549     {
3550     AtomExpr *expr = _pat->atom[idx].expr;
3551    
3552     int size=0;
3553     AtomExpr *stack[15];
3554     memset(stack,'\0',sizeof(AtomExpr*)*15);
3555     bool lftest=true;
3556    
3557     for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3558     {
3559     switch (expr->type)
3560     {
3561     case AE_LEAF:
3562     if ( expr->leaf.prop == AL_ELEM)
3563     return(expr->leaf.value);
3564     lftest = true;
3565     size--;
3566     break;
3567    
3568     case AE_OR:
3569     case AE_ANDHI:
3570     case AE_ANDLO:
3571    
3572     if (stack[size+1] == expr->bin.rgt)
3573     size--;
3574     else if (stack[size+1] == expr->bin.lft)
3575     {
3576     if (lftest)
3577     {
3578     size++;
3579     stack[size] = expr->bin.rgt;
3580     }
3581     else
3582     size--;
3583     }
3584     else
3585     {
3586     size++;
3587     stack[size] = expr->bin.lft;
3588     }
3589     break;
3590    
3591     case AE_NOT:
3592     return(0);
3593     case AE_RECUR:
3594     return(0);
3595     }
3596     }
3597    
3598     return(0);
3599     }
3600 tim 2440
3601 gezelter 3057 void OBSmartsPattern::GetBond(int &src,int &dst,int &ord,int idx)
3602     {
3603     src = _pat->bond[idx].src;
3604     dst = _pat->bond[idx].dst;
3605     ord = GetExprOrder(_pat->bond[idx].expr);
3606     }
3607    
3608     void SmartsLexReplace(std::string &s,std::vector<std::pair<std::string,std::string> > &vlex)
3609     {
3610     size_t j,pos;
3611     std::string token,repstr;
3612     std::vector<std::pair<std::string,std::string> >::iterator i;
3613 tim 2440
3614 gezelter 3057 for (pos = 0,pos = s.find("$",pos);pos < s.size();pos = s.find("$",pos))
3615     //for (pos = 0,pos = s.find("$",pos);pos != std::string::npos;pos = s.find("$",pos))
3616     {
3617     pos++;
3618     for (j = pos;j < s.size();j++)
3619     if (!isalpha(s[j]) && !isdigit(s[j]) && s[j] != '_')
3620     break;
3621     if (pos == j)
3622     continue;
3623 tim 2440
3624 gezelter 3057 token = s.substr(pos,j-pos);
3625     for (i = vlex.begin();i != vlex.end();i++)
3626     if (token == i->first)
3627     {
3628     repstr = "(" + i->second + ")";
3629     s.replace(pos,j-pos,repstr);
3630     j = 0;
3631     }
3632     pos = j;
3633     }
3634     }
3635 tim 2440
3636     } // end namespace OpenBabel
3637    
3638     //! \file parsmart.cpp
3639     //! \brief Implementation of Daylight SMARTS parser.