ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/parsmart.cpp
Revision: 2440
Committed: Wed Nov 16 19:42:11 2005 UTC (18 years, 7 months ago) by tim
File size: 79347 byte(s)
Log Message:
adding openbabel

File Contents

# Content
1 /**********************************************************************
2 parsmart.cpp - SMARTS parser.
3
4 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5 Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison
6
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.sourceforge.net/>
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19
20 #include <ctype.h>
21
22 #include "mol.hpp"
23 #include "bitvec.hpp"
24 #include "parsmart.hpp"
25
26 #ifndef True
27 #define True 1
28 #define False 0
29 #endif
30
31 /* Strict syntax checking! */
32 // Currently causes problems with aromatic flags in atomtyp.txt
33 // #define STRICT
34 #define VERBOSE
35
36 #ifdef __sgi
37 #define UnusedArgument(x) ((x)=(x))
38 #else
39 #define UnusedArgument(x)
40 #endif
41
42 using namespace std;
43
44 namespace OpenBabel
45 {
46
47 /*! \class OBSmartsPattern
48
49 Substructure search is an incredibly useful tool in the context of a
50 small molecule programming library. Having an efficient substructure
51 search engine reduces the amount of hard code needed for molecule
52 perception, as well as increases the flexibility of certain
53 operations. For instance, atom typing can be easily performed based on
54 hard coded rules of element type and bond orders (or
55 hybridization). Alternatively, atom typing can also be done by
56 matching a set of substructure rules read at run time. In the latter
57 case customization based on application (such as changing the pH)
58 becomes a facile operation. Fortunately for Open Babel and its users,
59 Roger Sayle donated a SMARTS parser which became the basis for SMARTS
60 matching in Open Babel.
61
62 The SMARTS matcher, or OBSmartsPattern, is a separate object which can
63 match patterns in the OBMol class. The following code demonstrates how
64 to use the OBSmartsPattern class:
65 \code
66 OBMol mol;
67 ...
68 OBSmartsPattern sp;
69 sp.Init("CC");
70 sp.Match(mol);
71 vector<vector<int> > maplist;
72 maplist = sp.GetMapList();
73 //or maplist = sp.GetUMapList();
74 //print out the results
75 vector<vector<int> >::iterator i;
76 vector<int>::iterator j;
77 for (i = maplist.begin();i != maplist.end();i++)
78 {
79 for (j = i->begin();j != i->end();j++)
80 cout << j << ' `;
81 cout << endl;
82 }
83 \endcode
84 The preceding code reads in a molecule, initializes a smarts pattern
85 of two single-bonded carbons, and locates all instances of the
86 pattern in the molecule. Note that calling the Match() function
87 does not return the results of the substructure match. The results
88 from a match are stored in the OBSmartsPattern, and a call to
89 GetMapList() or GetUMapList() must be made to extract the
90 results. The function GetMapList() returns all matches of a
91 particular pattern while GetUMapList() returns only the unique
92 matches. For instance, the pattern [OD1]~C~[OD1] describes a
93 carboxylate group. This pattern will match both atom number
94 permutations of the carboxylate, and if GetMapList() is called, both
95 matches will be returned. If GetUMapList() is called only unique
96 matches of the pattern will be returned. A unique match is defined as
97 one which does not cover the identical atoms that a previous match
98 has covered.
99 */
100
101 /*============================*/
102 /* Period Table of Elements */
103 /*============================*/
104
105 std::vector<std::pair<Pattern*,std::vector<bool> > > RSCACHE; //recursive smarts cache
106
107 typedef struct
108 {
109 char *symbol;
110 int organic;
111 int aromflag;
112 double weight;
113 }
114 Element;
115
116 #define ELEMMAX 104
117
118 typedef struct
119 {
120 BondExpr *closord[100];
121 int closure[100];
122 int closindex;
123 }
124 ParseState;
125
126 /*
127 #define ATOMEXPRPOOL 16
128 #define BONDEXPRPOOL 16
129 #define ATOMPOOL 16
130 #define BONDPOOL 16
131 */
132
133 #define ATOMEXPRPOOL 1
134 #define BONDEXPRPOOL 1
135 #define ATOMPOOL 1
136 #define BONDPOOL 1
137
138 /*=====================*/
139 /* BondExpr Bit Sets */
140 /*=====================*/
141
142 #define BF_NONRINGUNSPEC 0x0001
143 #define BF_NONRINGDOWN 0x0002
144 #define BF_NONRINGUP 0x0004
145 #define BF_NONRINGDOUBLE 0x0008
146 #define BF_NONRINGTRIPLE 0x0010
147 #define BF_RINGUNSPEC 0x0020
148 #define BF_RINGDOWN 0x0040
149 #define BF_RINGUP 0x0080
150 #define BF_RINGAROM 0x0100
151 #define BF_RINGDOUBLE 0x0200
152 #define BF_RINGTRIPLE 0x0400
153
154 #define BS_ALL 0x07FF
155 #define BS_SINGLE 0x00E7
156 #define BS_DOUBLE 0x0208
157 #define BS_TRIPLE 0x0410
158 #define BS_AROM 0x0100
159 #define BS_UP 0x0084
160 #define BS_DOWN 0x0042
161 #define BS_UPUNSPEC 0x00A5
162 #define BS_DOWNUNSPEC 0x0063
163 #define BS_RING 0x07E0
164 #define BS_DEFAULT 0x01E7
165
166 static char *MainPtr;
167 static char *LexPtr;
168
169 #define BUFMAX 1024
170 static char Buffer[BUFMAX];
171 static char Descr[BUFMAX];
172
173 static bool match(OBMol &mol,Pattern *pat,std::vector<std::vector<int> > &mlist,bool single=false);
174 static bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom);
175 static bool EvalBondExpr(BondExpr *expr,OBBond *bond);
176 static int GetVectorBinding();
177 static int CreateAtom(Pattern*,AtomExpr*,int,int vb=0);
178
179 /*=============================*/
180 /* Standard Utility Routines */
181 /*=============================*/
182
183 static void FatalAllocationError( char *ptr )
184 {
185 printf("Error: Unable to allocate %s!\n",ptr);
186 // exit(1);
187 }
188
189 /*================================*/
190 /* Atom Expression Manipulation */
191 /*================================*/
192
193 static void FreePattern( Pattern* );
194 static Pattern *CopyPattern( Pattern* );
195
196 static AtomExpr *AllocAtomExpr( void )
197 {
198 register AtomExpr *result;
199
200 result = (AtomExpr*)malloc(sizeof(AtomExpr));
201 return result;
202 }
203
204 static AtomExpr *CopyAtomExpr( AtomExpr *expr )
205 {
206 register AtomExpr *result;
207
208 result = AllocAtomExpr();
209 result->type = expr->type;
210 switch( expr->type )
211 {
212 case(AE_ANDHI):
213 case(AE_ANDLO):
214 case(AE_OR): result->bin.lft = CopyAtomExpr(expr->bin.lft);
215 result->bin.rgt = CopyAtomExpr(expr->bin.rgt);
216 break;
217
218 case(AE_NOT): result->mon.arg = CopyAtomExpr(expr->mon.arg);
219 break;
220
221 case(AE_RECUR): result->recur.recur = CopyPattern(
222 (Pattern*)expr->recur.recur );
223 break;
224
225 case(AE_LEAF): result->leaf.prop = expr->leaf.prop;
226 result->leaf.value = expr->leaf.value;
227 break;
228 }
229 return result;
230 }
231
232 static void FreeAtomExpr( AtomExpr *expr )
233 {
234 if( expr )
235 {
236 switch( expr->type )
237 {
238 case(AE_ANDHI):
239 case(AE_ANDLO):
240 case(AE_OR): FreeAtomExpr(expr->bin.lft);
241 FreeAtomExpr(expr->bin.rgt);
242 break;
243
244 case(AE_NOT): FreeAtomExpr(expr->mon.arg);
245 break;
246
247 case(AE_RECUR): FreePattern( (Pattern*)expr->recur.recur );
248 break;
249 }
250 if (expr)
251 {
252 free(expr);
253 expr = (AtomExpr*)NULL;
254 }
255 }
256 }
257
258 static AtomExpr *BuildAtomLeaf( int prop, int val )
259 {
260 register AtomExpr *result;
261
262 result = AllocAtomExpr();
263 result->leaf.type = AE_LEAF;
264 result->leaf.prop = prop;
265 result->leaf.value = val;
266 return result;
267 }
268
269 static AtomExpr *BuildAtomNot( AtomExpr *expr )
270 {
271 register AtomExpr *result;
272
273 result = AllocAtomExpr();
274 result->mon.type = AE_NOT;
275 result->mon.arg = expr;
276 return result;
277 }
278
279 static AtomExpr *BuildAtomBin( int op, AtomExpr *lft, AtomExpr *rgt )
280 {
281 register AtomExpr *result;
282
283 result = AllocAtomExpr();
284 result->bin.type = op;
285 result->bin.lft = lft;
286 result->bin.rgt = rgt;
287 return result;
288 }
289
290 static AtomExpr *BuildAtomRecurs( Pattern *pat )
291 {
292 register AtomExpr *result;
293
294 result = AllocAtomExpr();
295 result->recur.type = AE_RECUR;
296 result->recur.recur = (void*)pat;
297 return result;
298 }
299
300 static AtomExpr *GenerateElement( int elem )
301 {
302 return BuildAtomLeaf(AL_ELEM,elem);
303 }
304
305 static AtomExpr *GenerateAromElem( int elem, int flag )
306 {
307 AtomExpr *expr1;
308 AtomExpr *expr2;
309
310 expr1 = BuildAtomLeaf(AL_AROM,flag);
311 expr2 = BuildAtomLeaf(AL_ELEM,elem);
312 return BuildAtomBin(AE_ANDHI,expr1,expr2);
313 }
314
315 static int IsInvalidAtom( AtomExpr *expr )
316 {
317 if( !expr )
318 return True;
319 return( (expr->type==AE_LEAF) &&
320 (expr->leaf.prop==AL_CONST)
321 && !expr->leaf.value );
322 }
323
324 /*================================*/
325 /* Bond Expression Manipulation */
326 /*================================*/
327
328 static BondExpr *AllocBondExpr( void )
329 {
330 register BondExpr *result;
331
332 result = (BondExpr*)malloc(sizeof(BondExpr));
333 return result;
334 }
335
336 static BondExpr *CopyBondExpr( BondExpr *expr )
337 {
338 register BondExpr *result;
339
340 result = AllocBondExpr();
341 result->type = expr->type;
342 switch( expr->type )
343 {
344 case(AE_ANDHI):
345 case(AE_ANDLO):
346 case(AE_OR): result->bin.lft = CopyBondExpr(expr->bin.lft);
347 result->bin.rgt = CopyBondExpr(expr->bin.rgt);
348 break;
349
350 case(AE_NOT): result->mon.arg = CopyBondExpr(expr->mon.arg);
351 break;
352
353 case(AE_LEAF): result->leaf.prop = expr->leaf.prop;
354 result->leaf.value = expr->leaf.value;
355 break;
356 }
357 return result;
358 }
359
360 static void FreeBondExpr( BondExpr *expr )
361 {
362 if( expr )
363 {
364 switch( expr->type )
365 {
366 case(BE_ANDHI):
367 case(BE_ANDLO):
368 case(BE_OR): FreeBondExpr(expr->bin.lft);
369 FreeBondExpr(expr->bin.rgt);
370 break;
371
372 case(BE_NOT): FreeBondExpr(expr->mon.arg);
373 break;
374 }
375
376 if (expr)
377 {
378 free(expr);
379 expr = (BondExpr*)NULL;
380 }
381 }
382 }
383
384 static BondExpr *BuildBondLeaf( int prop, int val )
385 {
386 register BondExpr *result;
387
388 result = AllocBondExpr();
389 result->leaf.type = BE_LEAF;
390 result->leaf.prop = prop;
391 result->leaf.value = val;
392 return result;
393 }
394
395 static BondExpr *BuildBondNot( BondExpr *expr )
396 {
397 register BondExpr *result;
398
399 result = AllocBondExpr();
400 result->mon.type = BE_NOT;
401 result->mon.arg = expr;
402 return result;
403 }
404
405 static BondExpr *BuildBondBin( int op, BondExpr *lft, BondExpr *rgt )
406 {
407 register BondExpr *result;
408
409 result = AllocBondExpr();
410 result->bin.type = op;
411 result->bin.lft = lft;
412 result->bin.rgt = rgt;
413 return result;
414 }
415
416 static BondExpr *GenerateDefaultBond( void )
417 {
418 register BondExpr *expr1;
419 register BondExpr *expr2;
420
421 expr1 = BuildBondLeaf(BL_TYPE,BT_SINGLE);
422 expr2 = BuildBondLeaf(BL_TYPE,BT_AROM);
423 return(BuildBondBin(BE_OR,expr1,expr2));
424 }
425
426 /*===============================*/
427 /* SMARTS Pattern Manipulation */
428 /*===============================*/
429
430 static Pattern *AllocPattern( void )
431 {
432 Pattern *ptr;
433
434 ptr = (Pattern*)malloc(sizeof(Pattern));
435 if( !ptr )
436 FatalAllocationError("pattern");
437
438 ptr->atom = (AtomSpec*)0;
439 ptr->aalloc = 0;
440 ptr->acount = 0;
441
442 ptr->bond = (BondSpec*)0;
443 ptr->balloc = 0;
444 ptr->bcount = 0;
445
446 ptr->parts = 1;
447 return ptr;
448 }
449
450 static int CreateAtom( Pattern *pat, AtomExpr *expr, int part,int vb)
451 {
452 int index,size;
453
454 if( pat->acount == pat->aalloc )
455 {
456 pat->aalloc += ATOMPOOL;
457 size = (int)(pat->aalloc*sizeof(AtomSpec));
458 if( pat->atom )
459 {
460 pat->atom = (AtomSpec*)realloc(pat->atom,size);
461 }
462 else
463 pat->atom = (AtomSpec*)malloc(size);
464 if( !pat->atom )
465 FatalAllocationError("atom pool");
466 }
467
468 index = pat->acount++;
469 pat->atom[index].part = part;
470 pat->atom[index].expr = expr;
471 pat->atom[index].vb = vb; //std::vector binding
472
473 return index;
474 }
475
476 static int CreateBond( Pattern *pat, BondExpr *expr, int src, int dst )
477 {
478 int index,size;
479
480 if( pat->bcount == pat->balloc )
481 {
482 pat->balloc += BONDPOOL;
483 size = (int)(pat->balloc*sizeof(BondSpec));
484 if( pat->bond )
485 {
486 pat->bond = (BondSpec*)realloc(pat->bond,size);
487 }
488 else
489 pat->bond = (BondSpec*)malloc(size);
490 if( !pat->bond )
491 FatalAllocationError("bond pool");
492 }
493
494 index = pat->bcount++;
495 pat->bond[index].expr = expr;
496 pat->bond[index].src = src;
497 pat->bond[index].dst = dst;
498 return(index);
499 }
500
501 static Pattern *CopyPattern( Pattern *pat )
502 {
503 Pattern *result;
504 AtomExpr *aexpr;
505 BondExpr *bexpr;
506 int i;
507
508 result = AllocPattern();
509 result->parts = pat->parts;
510 for( i=0; i<pat->acount; i++ )
511 {
512 aexpr = CopyAtomExpr(pat->atom[i].expr);
513 CreateAtom(result,aexpr,pat->atom[i].part);
514 }
515
516 for( i=0; i<pat->bcount; i++ )
517 {
518 bexpr = CopyBondExpr(pat->bond[i].expr);
519 CreateBond(result,bexpr,pat->bond[i].src,pat->bond[i].dst);
520 }
521
522 return result;
523 }
524
525 static void FreePattern( Pattern *pat )
526 {
527 int i;
528
529 if( pat )
530 {
531 if( pat->aalloc )
532 {
533 for( i=0; i<pat->acount; i++ )
534 FreeAtomExpr(pat->atom[i].expr);
535 free(pat->atom);
536 }
537
538 if( pat->balloc )
539 {
540 for( i=0; i<pat->bcount; i++ )
541 FreeBondExpr(pat->bond[i].expr);
542 free(pat->bond);
543 }
544 free(pat);
545 }
546 }
547
548 /*=========================*/
549 /* SMARTS Syntax Parsing */
550 /*=========================*/
551
552 static Pattern *ParseSMARTSPattern( void );
553 static Pattern *ParseSMARTSPart( Pattern*, int );
554
555 static Pattern *SMARTSError( Pattern *pat )
556 {
557 char *ptr;
558
559 fprintf(stderr,"SMARTS Error: %s\n",MainPtr);
560
561 fputs(" ",stdout);
562 for( ptr=MainPtr; ptr<LexPtr; ptr++ )
563 fputc(' ',stdout);
564 fputs("^\n",stdout);
565
566 FreePattern(pat);
567 return (Pattern*)0;
568 }
569
570 static AtomExpr *ParseSimpleAtomPrimitive( void )
571 {
572 switch( *LexPtr++ )
573 {
574 case '*':
575 return BuildAtomLeaf(AL_CONST,True);
576 #ifndef STRICT
577
578 case 'A':
579 return BuildAtomLeaf(AL_AROM,False);
580 #endif
581
582 case 'B':
583 if( *LexPtr == 'r' )
584 {
585 LexPtr++;
586 return GenerateElement(35);
587 }
588 return GenerateElement(5);
589
590 case 'C':
591 if( *LexPtr == 'l' )
592 {
593 LexPtr++;
594 return GenerateElement(17);
595 }
596 return GenerateAromElem(6,False);
597
598 case 'F':
599 return GenerateElement( 9);
600 case 'I':
601 return GenerateElement(53);
602 case 'N':
603 return GenerateAromElem( 7,False);
604 case 'O':
605 return GenerateAromElem( 8,False);
606 case 'P':
607 return GenerateElement(15);
608 case 'S':
609 return GenerateAromElem(16,False);
610
611 #ifndef STRICT
612
613 case 'a':
614 return BuildAtomLeaf(AL_AROM,True);
615 #endif
616
617 case 'c':
618 return GenerateAromElem( 6,True);
619 case 'n':
620 return GenerateAromElem( 7,True);
621 case 'o':
622 return GenerateAromElem( 8,True);
623 case 'p':
624 return GenerateAromElem(15,True);
625 case 's':
626 return GenerateAromElem(16,True);
627 }
628 LexPtr--;
629 return (AtomExpr*)0;
630 }
631
632 static AtomExpr *ParseComplexAtomPrimitive( void )
633 {
634 register Pattern *pat;
635 register int index;
636
637 switch( *LexPtr++ )
638 {
639 case('#'): if( !isdigit(*LexPtr) )
640 return( (AtomExpr*)0 );
641
642 index = 0;
643 while( isdigit(*LexPtr) )
644 index = index*10 + ((*LexPtr++)-'0');
645 if( index > ELEMMAX )
646 {
647 LexPtr--;
648 return( (AtomExpr*)0 );
649 }
650 else if( !index )
651 return( (AtomExpr*)0 );
652 return( GenerateElement(index) );
653
654 case('$'): if( *LexPtr != '(' )
655 return( (AtomExpr*)0 );
656 LexPtr++;
657 #ifdef STRICT
658
659 pat = ParseSMARTSPart(AllocPattern(),0);
660 #else
661
662 pat = ParseSMARTSPattern();
663 #endif
664
665 if( !pat )
666 return( (AtomExpr*)0 );
667 if( *LexPtr != ')' )
668 {
669 FreePattern(pat);
670 return( (AtomExpr*)0 );
671 }
672 LexPtr++;
673 return( BuildAtomRecurs(pat) );
674
675 case('*'): return( BuildAtomLeaf(AL_CONST,True) );
676
677 case('+'): if( isdigit(*LexPtr) )
678 {
679 index = 0;
680 while( isdigit(*LexPtr) )
681 index = index*10 + ((*LexPtr++)-'0');
682 }
683 else
684 {
685 index = 1;
686 while( *LexPtr == '+' )
687 {
688 LexPtr++;
689 index++;
690 }
691 }
692 return( BuildAtomLeaf(AL_POSITIVE,index) );
693
694 case('-'): if( isdigit(*LexPtr) )
695 {
696 index = 0;
697 while( isdigit(*LexPtr) )
698 index = index*10 + ((*LexPtr++)-'0');
699 }
700 else
701 {
702 index = 1;
703 while( *LexPtr == '-' )
704 {
705 LexPtr++;
706 index++;
707 }
708 }
709 return BuildAtomLeaf(AL_NEGATIVE,index);
710
711 case '@':
712 if (*LexPtr != '@')
713 return(BuildAtomLeaf(AL_CHIRAL,AL_ANTICLOCKWISE));
714 LexPtr++;
715 return(BuildAtomLeaf(AL_CHIRAL,AL_CLOCKWISE));
716
717 case '^':
718 if (isdigit(*LexPtr))
719 {
720 index = 0;
721 while( isdigit(*LexPtr) )
722 index = index*10 + ((*LexPtr++)-'0');
723 return(BuildAtomLeaf(AL_HYB,index));
724 }
725 else
726 return(BuildAtomLeaf(AL_HYB,1));
727
728 case('0'): case('1'): case('2'): case('3'): case('4'):
729 case('5'): case('6'): case('7'): case('8'): case('9'):
730 index = LexPtr[-1]-'0';
731 while( isdigit(*LexPtr) )
732 index = index*10 + ((*LexPtr++)-'0');
733 return BuildAtomLeaf(AL_MASS,index);
734
735 case('A'): switch( *LexPtr++ )
736 {
737 case('c'): return GenerateElement(89);
738 case('g'): return GenerateElement(47);
739 case('l'): return GenerateElement(13);
740 case('m'): return GenerateElement(95);
741 case('r'): return GenerateElement(18);
742 case('s'): return GenerateElement(33);
743 case('t'): return GenerateElement(85);
744 case('u'): return GenerateElement(79);
745 }
746 LexPtr--;
747 return BuildAtomLeaf(AL_AROM,False);
748
749 case('B'): switch( *LexPtr++ )
750 {
751 case('a'): return GenerateElement(56);
752 case('e'): return GenerateElement( 4);
753 case('i'): return GenerateElement(83);
754 case('k'): return GenerateElement(97);
755 case('r'): return GenerateElement(35);
756 }
757 LexPtr--;
758 return GenerateElement(5);
759
760 case('C'): switch( *LexPtr++ )
761 {
762 case('a'): return GenerateElement(20);
763 case('d'): return GenerateElement(48);
764 case('e'): return GenerateElement(58);
765 case('f'): return GenerateElement(98);
766 case('l'): return GenerateElement(17);
767 case('m'): return GenerateElement(96);
768 case('o'): return GenerateElement(27);
769 case('r'): return GenerateElement(24);
770 case('s'): return GenerateElement(55);
771 case('u'): return GenerateElement(29);
772 }
773 LexPtr--;
774 return GenerateAromElem(6,False);
775
776 case('D'): if( *LexPtr == 'y' )
777 {
778 LexPtr++;
779 return GenerateElement(66);
780 }
781 else if( isdigit(*LexPtr) )
782 {
783 index = 0;
784 while( isdigit(*LexPtr) )
785 index = index*10 + ((*LexPtr++)-'0');
786 return BuildAtomLeaf(AL_DEGREE,index);
787 }
788 break;
789
790 case('E'): if( *LexPtr == 'r' )
791 {
792 LexPtr++;
793 return GenerateElement(68);
794 }
795 else if( *LexPtr == 's' )
796 {
797 LexPtr++;
798 return GenerateElement(99);
799 }
800 else if( *LexPtr == 'u' )
801 {
802 LexPtr++;
803 return GenerateElement(63);
804 }
805 break;
806
807 case('F'): if( *LexPtr == 'e' )
808 {
809 LexPtr++;
810 return GenerateElement(26);
811 }
812 else if( *LexPtr == 'm' )
813 {
814 LexPtr++;
815 return GenerateElement(100);
816 }
817 else if( *LexPtr == 'r' )
818 {
819 LexPtr++;
820 return GenerateElement(87);
821 }
822 return GenerateElement(9);
823
824 case('G'): if( *LexPtr == 'a' )
825 {
826 LexPtr++;
827 return( GenerateElement(31) );
828 }
829 else if( *LexPtr == 'd' )
830 {
831 LexPtr++;
832 return( GenerateElement(64) );
833 }
834 else if( *LexPtr == 'e' )
835 {
836 LexPtr++;
837 return( GenerateElement(32) );
838 }
839 break;
840
841 case('H'): if( *LexPtr == 'e' )
842 {
843 LexPtr++;
844 return( GenerateElement( 2) );
845 }
846 else if( *LexPtr == 'f' )
847 {
848 LexPtr++;
849 return( GenerateElement(72) );
850 }
851 else if( *LexPtr == 'g' )
852 {
853 LexPtr++;
854 return( GenerateElement(80) );
855 }
856 else if( *LexPtr == 'o' )
857 {
858 LexPtr++;
859 return( GenerateElement(67) );
860 }
861 else if( isdigit(*LexPtr) )
862 {
863 index = 0;
864 while( isdigit(*LexPtr) )
865 index = index*10 + ((*LexPtr++)-'0');
866 return( BuildAtomLeaf(AL_HCOUNT,index) );
867 }
868 return( BuildAtomLeaf(AL_HCOUNT,1) );
869 /* BuildAtomLeaf(AL_HCOUNT,1) ??? */
870 /* or else GenerateElement(1) ??? */
871
872 case('I'): if( *LexPtr == 'n' )
873 {
874 LexPtr++;
875 return( GenerateElement(49) );
876 }
877 else if( *LexPtr == 'r' )
878 {
879 LexPtr++;
880 return( GenerateElement(77) );
881 }
882 return( GenerateElement(53) );
883
884 case('K'): if( *LexPtr == 'r' )
885 {
886 LexPtr++;
887 return( GenerateElement(36) );
888 }
889 return( GenerateElement(19) );
890
891 case('L'): if( *LexPtr == 'a' )
892 {
893 LexPtr++;
894 return( GenerateElement( 57) );
895 }
896 else if( *LexPtr == 'i' )
897 {
898 LexPtr++;
899 return( GenerateElement( 3) );
900 }
901 else if( *LexPtr == 'r' )
902 {
903 LexPtr++;
904 return( GenerateElement(103) );
905 }
906 else if( *LexPtr == 'u' )
907 {
908 LexPtr++;
909 return( GenerateElement( 71) );
910 }
911 break;
912
913 case('M'): if( *LexPtr == 'd' )
914 {
915 LexPtr++;
916 return( GenerateElement(101) );
917 }
918 else if( *LexPtr == 'g' )
919 {
920 LexPtr++;
921 return( GenerateElement( 12) );
922 }
923 else if( *LexPtr == 'n' )
924 {
925 LexPtr++;
926 return( GenerateElement( 25) );
927 }
928 else if( *LexPtr == 'o' )
929 {
930 LexPtr++;
931 return( GenerateElement( 42) );
932 }
933 break;
934
935 case('N'): switch( *LexPtr++ )
936 {
937 case('a'): return( GenerateElement( 11) );
938 case('b'): return( GenerateElement( 41) );
939 case('d'): return( GenerateElement( 60) );
940 case('e'): return( GenerateElement( 10) );
941 case('i'): return( GenerateElement( 28) );
942 case('o'): return( GenerateElement(102) );
943 case('p'): return( GenerateElement( 93) );
944 }
945 LexPtr--;
946 return( GenerateAromElem(7,False) );
947
948 case('O'): if( *LexPtr == 's' )
949 {
950 LexPtr++;
951 return( GenerateElement(76) );
952 }
953 return( GenerateAromElem(8,False) );
954
955 case('P'): switch( *LexPtr++ )
956 {
957 case('a'): return( GenerateElement(91) );
958 case('b'): return( GenerateElement(82) );
959 case('d'): return( GenerateElement(46) );
960 case('m'): return( GenerateElement(61) );
961 case('o'): return( GenerateElement(84) );
962 case('r'): return( GenerateElement(59) );
963 case('t'): return( GenerateElement(78) );
964 case('u'): return( GenerateElement(94) );
965 }
966 LexPtr--;
967 return( GenerateElement(15) );
968
969 case('R'): switch( *LexPtr++ )
970 {
971 case('a'): return( GenerateElement(88) );
972 case('b'): return( GenerateElement(37) );
973 case('e'): return( GenerateElement(75) );
974 case('h'): return( GenerateElement(45) );
975 case('n'): return( GenerateElement(86) );
976 case('u'): return( GenerateElement(44) );
977 }
978 LexPtr--;
979 if( isdigit(*LexPtr) )
980 {
981 index = 0;
982 while( isdigit(*LexPtr) )
983 index = index*10 + ((*LexPtr++)-'0');
984 }
985 else
986 index = -1;
987 return( BuildAtomLeaf(AL_RINGS,index) );
988
989 case('S'): switch( *LexPtr++ )
990 {
991 case('b'): return( GenerateElement(51) );
992 case('c'): return( GenerateElement(21) );
993 case('e'): return( GenerateElement(34) );
994 case('i'): return( GenerateElement(14) );
995 case('m'): return( GenerateElement(62) );
996 case('n'): return( GenerateElement(50) );
997 case('r'): return( GenerateElement(38) );
998 }
999 LexPtr--;
1000 return( GenerateAromElem(16,False) );
1001
1002 case('T'): switch( *LexPtr++ )
1003 {
1004 case('a'): return( GenerateElement(73) );
1005 case('b'): return( GenerateElement(65) );
1006 case('c'): return( GenerateElement(43) );
1007 case('e'): return( GenerateElement(52) );
1008 case('h'): return( GenerateElement(90) );
1009 case('i'): return( GenerateElement(22) );
1010 case('l'): return( GenerateElement(81) );
1011 case('m'): return( GenerateElement(69) );
1012 }
1013 LexPtr--;
1014 break;
1015
1016 case('U'): return( GenerateElement(92) );
1017 case('V'): return( GenerateElement(23) );
1018 case('W'): return( GenerateElement(74) );
1019
1020 case('X'): if( *LexPtr == 'e' )
1021 {
1022 LexPtr++;
1023 return( GenerateElement(54) );
1024 }
1025 else if( isdigit(*LexPtr) )
1026 {
1027 index = 0;
1028 while( isdigit(*LexPtr) )
1029 index = index*10 + ((*LexPtr++)-'0');
1030 return( BuildAtomLeaf(AL_CONNECT,index) );
1031 }
1032 break;
1033
1034 case('Y'): if( *LexPtr == 'b' )
1035 {
1036 LexPtr++;
1037 return( GenerateElement(70) );
1038 }
1039 return( GenerateElement(39) );
1040
1041 case('Z'): if( *LexPtr == 'n' )
1042 {
1043 LexPtr++;
1044 return GenerateElement(30);
1045 }
1046 else if( *LexPtr == 'r' )
1047 {
1048 LexPtr++;
1049 return GenerateElement(40);
1050 }
1051 break;
1052
1053 case('a'): if( *LexPtr == 's' )
1054 {
1055 LexPtr++;
1056 return GenerateAromElem(33,True);
1057 }
1058 return BuildAtomLeaf(AL_AROM,True);
1059
1060 case('c'): return GenerateAromElem(6,True);
1061
1062 case('h'): if( isdigit(*LexPtr) )
1063 {
1064 index = 0;
1065 while( isdigit(*LexPtr) )
1066 index = index*10 + ((*LexPtr++)-'0');
1067 }
1068 else
1069 index = 1;
1070 return BuildAtomLeaf(AL_IMPLICIT,index);
1071
1072 case('n'): return GenerateAromElem(7,True);
1073 case('o'): return GenerateAromElem(8,True);
1074 case('p'): return GenerateAromElem(15,True);
1075
1076 case('r'): if( isdigit(*LexPtr) )
1077 {
1078 index = 0;
1079 while( isdigit(*LexPtr) )
1080 index = index*10 + ((*LexPtr++)-'0');
1081 if( index == 0 )
1082 return BuildAtomLeaf(AL_RINGS,0);
1083 return BuildAtomLeaf(AL_SIZE,index);
1084 }
1085 return BuildAtomLeaf(AL_RINGS,-1);
1086
1087 case('s'): if( *LexPtr == 'i' )
1088 {
1089 LexPtr++;
1090 return GenerateAromElem(14,True);
1091 }
1092 return GenerateAromElem(16,True);
1093
1094 case('v'): if( isdigit(*LexPtr) )
1095 {
1096 index = 0;
1097 while( isdigit(*LexPtr) )
1098 index = index*10 + ((*LexPtr++)-'0');
1099 return BuildAtomLeaf(AL_VALENCE,index);
1100 }
1101 break;
1102 }
1103 LexPtr--;
1104 return (AtomExpr*)0;
1105 }
1106
1107 static AtomExpr *ParseAtomExpr( int level )
1108 {
1109 register AtomExpr *expr1;
1110 register AtomExpr *expr2;
1111 register char *prev;
1112
1113 switch( level )
1114 {
1115 case(0): /* Low Precedence Conjunction */
1116 if( !(expr1=ParseAtomExpr(1)) )
1117 return (AtomExpr*)0;
1118
1119 while( *LexPtr == ';' )
1120 {
1121 LexPtr++;
1122 if( !(expr2=ParseAtomExpr(1)) )
1123 {
1124 FreeAtomExpr(expr1);
1125 return (AtomExpr*)0;
1126 }
1127 expr1 = BuildAtomBin(AE_ANDLO,expr1,expr2);
1128 }
1129 return expr1;
1130
1131 case(1): /* Disjunction */
1132 if( !(expr1=ParseAtomExpr(2)) )
1133 return (AtomExpr*)0;
1134
1135 while( *LexPtr == ',' )
1136 {
1137 LexPtr++;
1138 if( !(expr2=ParseAtomExpr(2)) )
1139 {
1140 FreeAtomExpr(expr1);
1141 return( (AtomExpr*)0 );
1142 }
1143 expr1 = BuildAtomBin(AE_OR,expr1,expr2);
1144 }
1145 return( expr1 );
1146
1147 case(2): /* High Precedence Conjunction */
1148 if( !(expr1=ParseAtomExpr(3)) )
1149 return( (AtomExpr*)0 );
1150
1151 while( (*LexPtr!=']') && (*LexPtr!=';') &&
1152 (*LexPtr!=',') && *LexPtr )
1153 {
1154 if( *LexPtr=='&' )
1155 LexPtr++;
1156 prev = LexPtr;
1157 if( !(expr2=ParseAtomExpr(3)) )
1158 {
1159 if( prev != LexPtr )
1160 {
1161 FreeAtomExpr(expr1);
1162 return( (AtomExpr*)0 );
1163 }
1164 else
1165 return( expr1 );
1166 }
1167 expr1 = BuildAtomBin(AE_ANDHI,expr1,expr2);
1168 }
1169 return( expr1 );
1170
1171 case(3): /* Negation or Primitive */
1172 if( *LexPtr == '!' )
1173 {
1174 LexPtr++;
1175 if( !(expr1=ParseAtomExpr(3)) )
1176 return( (AtomExpr*)0 );
1177 return( BuildAtomNot(expr1) );
1178 }
1179 return( ParseComplexAtomPrimitive() );
1180 }
1181 return (AtomExpr*)0;
1182 }
1183
1184 static BondExpr *ParseBondPrimitive( void )
1185 {
1186 switch( *LexPtr++ )
1187 {
1188 case('-'): return BuildBondLeaf(BL_TYPE,BT_SINGLE);
1189 case('='): return BuildBondLeaf(BL_TYPE,BT_DOUBLE);
1190 case('#'): return BuildBondLeaf(BL_TYPE,BT_TRIPLE);
1191 case(':'): return BuildBondLeaf(BL_TYPE,BT_AROM);
1192 case('@'): return BuildBondLeaf(BL_TYPE,BT_RING);
1193 case('~'): return BuildBondLeaf(BL_CONST,True);
1194
1195 case('/'): if( *LexPtr == '?' )
1196 {
1197 LexPtr++;
1198 return BuildBondLeaf(BL_TYPE,BT_UPUNSPEC);
1199 }
1200 return BuildBondLeaf(BL_TYPE,BT_UP);
1201
1202 case('\\'): if( *LexPtr == '?' )
1203 {
1204 LexPtr++;
1205 return BuildBondLeaf(BL_TYPE,BT_DOWNUNSPEC);
1206 }
1207
1208 return BuildBondLeaf(BL_TYPE,BT_DOWN);
1209 }
1210 LexPtr--;
1211 return (BondExpr*)0;
1212 }
1213
1214 static BondExpr *ParseBondExpr( int level )
1215 {
1216 register BondExpr *expr1;
1217 register BondExpr *expr2;
1218 register char *prev;
1219
1220 switch( level )
1221 {
1222 case(0): /* Low Precedence Conjunction */
1223 if( !(expr1=ParseBondExpr(1)) )
1224 return (BondExpr*)0;
1225
1226 while( *LexPtr == ';' )
1227 {
1228 LexPtr++;
1229 if( !(expr2=ParseBondExpr(1)) )
1230 {
1231 FreeBondExpr(expr1);
1232 return (BondExpr*)0;
1233 }
1234 expr1 = BuildBondBin(BE_ANDLO,expr1,expr2);
1235 }
1236 return expr1;
1237
1238 case(1): /* Disjunction */
1239 if( !(expr1=ParseBondExpr(2)) )
1240 return (BondExpr*)0;
1241
1242 while( *LexPtr == ',' )
1243 {
1244 LexPtr++;
1245 if( !(expr2=ParseBondExpr(2)) )
1246 {
1247 FreeBondExpr(expr1);
1248 return (BondExpr*)0;
1249 }
1250 expr1 = BuildBondBin(BE_OR,expr1,expr2);
1251 }
1252 return expr1;
1253
1254 case(2): /* High Precedence Conjunction */
1255 if( !(expr1=ParseBondExpr(3)) )
1256 return (BondExpr*)0;
1257
1258 while( (*LexPtr!=']') && (*LexPtr!=';') &&
1259 (*LexPtr!=',') && *LexPtr )
1260 {
1261 if( *LexPtr == '&' )
1262 LexPtr++;
1263 prev = LexPtr;
1264 if( !(expr2=ParseBondExpr(3)) )
1265 {
1266 if( prev != LexPtr )
1267 {
1268 FreeBondExpr(expr1);
1269 return (BondExpr*)0;
1270 }
1271 else
1272 return expr1;
1273 }
1274 expr1 = BuildBondBin(BE_ANDHI,expr1,expr2);
1275 }
1276 return expr1;
1277
1278 case(3): /* Negation or Primitive */
1279 if( *LexPtr == '!' )
1280 {
1281 LexPtr++;
1282 if( !(expr1=ParseBondExpr(3)) )
1283 return (BondExpr*)0;
1284 return BuildBondNot(expr1);
1285 }
1286 return ParseBondPrimitive();
1287 }
1288 return (BondExpr*)0;
1289 }
1290
1291 static int GetVectorBinding()
1292 {
1293 int vb=0;
1294
1295 LexPtr++; //skip colon
1296 if(isdigit(*LexPtr))
1297 {
1298 vb = 0;
1299 while( isdigit(*LexPtr) )
1300 vb = vb*10 + ((*LexPtr++)-'0');
1301 }
1302
1303 return(vb);
1304 }
1305
1306 static Pattern *ParseSMARTSError( Pattern *pat, BondExpr *expr )
1307 {
1308 if( expr )
1309 FreeBondExpr(expr);
1310 return SMARTSError(pat);
1311 }
1312
1313 static Pattern *SMARTSParser( Pattern *pat, ParseState *stat,
1314 int prev, int part )
1315 {
1316 int vb = 0;
1317 register AtomExpr *aexpr;
1318 register BondExpr *bexpr;
1319 register int index;
1320
1321 bexpr = (BondExpr*)0;
1322
1323 while( *LexPtr )
1324 {
1325 switch( *LexPtr++ )
1326 {
1327 case('.'): if( bexpr || (prev==-1) )
1328 return ParseSMARTSError(pat,bexpr);
1329 prev = -1;
1330 break;
1331
1332 case('-'): case('='): case('#'):
1333 case(':'): case('~'): case('@'):
1334 case('/'): case('\\'): case('!'):
1335 LexPtr--;
1336 if( (prev==-1) || bexpr )
1337 return ParseSMARTSError(pat,bexpr);
1338 if( !(bexpr=ParseBondExpr(0)) )
1339 return ParseSMARTSError(pat,bexpr);
1340 break;
1341
1342 case('('):
1343 #ifdef STRICT
1344 if( (prev==-1) || bexpr )
1345 {
1346 LexPtr--;
1347 return ParseSMARTSError(pat,bexpr);
1348 }
1349 pat = SMARTSParser(pat,stat,prev,part);
1350 if( !pat )
1351 return (Pattern*)0;
1352 #else /* STRICT */
1353
1354 if( bexpr )
1355 {
1356 LexPtr--;
1357 return ParseSMARTSError(pat,bexpr);
1358 }
1359 if( prev == -1 )
1360 {
1361 index = pat->acount;
1362 pat = SMARTSParser(pat,stat,-1,part);
1363 if( !pat )
1364 return( (Pattern*)0 );
1365 if( index == pat->acount )
1366 return ParseSMARTSError(pat,bexpr);
1367 prev = index;
1368 }
1369 else
1370 {
1371 pat = SMARTSParser(pat,stat,prev,part);
1372 if( !pat )
1373 return (Pattern*)0;
1374 }
1375 #endif /* STRICT */
1376
1377 if( *LexPtr != ')' )
1378 return ParseSMARTSError(pat,bexpr);
1379 LexPtr++;
1380 break;
1381
1382 case(')'): LexPtr--;
1383 if( (prev==-1) || bexpr )
1384 return ParseSMARTSError(pat,bexpr);
1385 return pat;
1386
1387 case('%'): if( prev == -1 )
1388 {
1389 LexPtr--;
1390 return ParseSMARTSError(pat,bexpr);
1391 }
1392
1393 if( isdigit(LexPtr[0]) && isdigit(LexPtr[1]) )
1394 {
1395 index = 10*(LexPtr[0]-'0') + (LexPtr[1]-'0');
1396 LexPtr += 2;
1397 }
1398 else
1399 return ParseSMARTSError(pat,bexpr);
1400
1401 if( stat->closure[index] == -1 )
1402 {
1403 stat->closord[index] = bexpr;
1404 stat->closure[index] = prev;
1405 }
1406 else if( stat->closure[index] != prev )
1407 {
1408 FreeBondExpr(stat->closord[index]);
1409 if( !bexpr )
1410 bexpr = GenerateDefaultBond();
1411 CreateBond(pat,bexpr,prev,stat->closure[index]);
1412 stat->closure[index] = -1;
1413 bexpr = (BondExpr*)0;
1414 }
1415 else
1416 return ParseSMARTSError(pat,bexpr);
1417 break;
1418
1419 case('0'): case('1'): case('2'):
1420 case('3'): case('4'): case('5'):
1421 case('6'): case('7'): case('8'):
1422 case('9'): LexPtr--;
1423 if( prev == -1 )
1424 return ParseSMARTSError(pat,bexpr);
1425 index = (*LexPtr++)-'0';
1426
1427 if( stat->closure[index] == -1 )
1428 {
1429 stat->closord[index] = bexpr;
1430 stat->closure[index] = prev;
1431 bexpr = (BondExpr*)0;
1432 }
1433 else if( stat->closure[index] != prev )
1434 {
1435 FreeBondExpr(stat->closord[index]);
1436 if( !bexpr )
1437 bexpr = GenerateDefaultBond();
1438 CreateBond(pat,bexpr,prev,stat->closure[index]);
1439 stat->closure[index] = -1;
1440 bexpr = (BondExpr*)0;
1441 }
1442 else
1443 return ParseSMARTSError(pat,bexpr);
1444 break;
1445
1446 case('['): aexpr = ParseAtomExpr(0);
1447 vb = (*LexPtr == ':') ? GetVectorBinding():0;
1448 if( !aexpr || (*LexPtr!=']') )
1449 return ParseSMARTSError(pat,bexpr);
1450 index = CreateAtom(pat,aexpr,part,vb);
1451 if( prev != -1 )
1452 {
1453 if( !bexpr )
1454 bexpr = GenerateDefaultBond();
1455 CreateBond(pat,bexpr,prev,index);
1456 bexpr = (BondExpr*)0;
1457 }
1458 prev = index;
1459 LexPtr++;
1460 break;
1461
1462 default:
1463 LexPtr--;
1464 aexpr = ParseSimpleAtomPrimitive();
1465 if( !aexpr )
1466 return ParseSMARTSError(pat,bexpr);
1467 index = CreateAtom(pat,aexpr,part);
1468 if( prev != -1 )
1469 {
1470 if( !bexpr )
1471 bexpr = GenerateDefaultBond();
1472 CreateBond(pat,bexpr,prev,index);
1473 bexpr = (BondExpr*)0;
1474 }
1475 prev = index;
1476 }
1477 }
1478
1479 if( (prev==-1) || bexpr )
1480 return ParseSMARTSError(pat,bexpr);
1481
1482 return pat;
1483 }
1484
1485 static void MarkGrowBonds(Pattern *pat)
1486 {
1487 int i;
1488 OBBitVec bv;
1489
1490 for (i = 0;i < pat->bcount;i++)
1491 {
1492 pat->bond[i].grow = (bv[pat->bond[i].src] && bv[pat->bond[i].dst])?
1493 false:true;
1494
1495 bv.SetBitOn(pat->bond[i].src);
1496 bv.SetBitOn(pat->bond[i].dst);
1497 }
1498 }
1499
1500 static int GetChiralFlag(AtomExpr *expr)
1501 {
1502 int size=0;
1503 #define OB_EVAL_STACKSIZE 40
1504
1505 AtomExpr *stack[OB_EVAL_STACKSIZE];
1506 memset(stack,'\0',sizeof(AtomExpr*)*OB_EVAL_STACKSIZE);
1507 #undef OB_EVAL_STACKSIZE
1508
1509 bool lftest=true;
1510
1511 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
1512 {
1513 switch (expr->type)
1514 {
1515 case AE_LEAF:
1516 if (expr->leaf.prop == AL_CHIRAL)
1517 return(expr->leaf.value);
1518 size--;
1519 break;
1520
1521 case AE_ANDHI:
1522 case AE_ANDLO:
1523
1524 if (stack[size+1] == expr->bin.rgt)
1525 size--;
1526 else if (stack[size+1] == expr->bin.lft)
1527 {
1528 if (lftest)
1529 {
1530 size++;
1531 stack[size] = expr->bin.rgt;
1532 }
1533 else
1534 size--;
1535 }
1536 else
1537 {
1538 size++;
1539 stack[size] = expr->bin.lft;
1540 }
1541 break;
1542
1543 case AE_OR:
1544
1545 if (stack[size+1] == expr->bin.rgt)
1546 size--;
1547 else if (stack[size+1] == expr->bin.lft)
1548 {
1549 if (!lftest)
1550 {
1551 size++;
1552 stack[size] = expr->bin.rgt;
1553 }
1554 else
1555 size--;
1556 }
1557 else
1558 {
1559 size++;
1560 stack[size] = expr->bin.lft;
1561 }
1562 break;
1563
1564 case AE_NOT:
1565 if (stack[size+1] != expr->mon.arg)
1566 {
1567 size++;
1568 stack[size] = expr->mon.arg;
1569 }
1570 else
1571 {
1572 lftest = !lftest;
1573 size--;
1574 }
1575 break;
1576
1577 case AE_RECUR:
1578 size--;
1579 break;
1580 }
1581 }
1582
1583 return((int)false);
1584 }
1585
1586 static Pattern *ParseSMARTSPart( Pattern *result, int part )
1587 {
1588 auto ParseState stat;
1589 int i,flag;
1590
1591 for( i=0; i<100; i++ )
1592 stat.closure[i] = -1;
1593
1594 result = SMARTSParser(result,&stat,-1,part);
1595
1596 flag = False;
1597 for( i=0; i<100; i++ )
1598 if( stat.closure[i] != -1 )
1599 {
1600 FreeBondExpr(stat.closord[i]);
1601 flag = True;
1602 }
1603
1604 if( result )
1605 {
1606 if( flag )
1607 return(SMARTSError(result));
1608 else
1609 {
1610 MarkGrowBonds(result);
1611 result->ischiral = false;
1612 for (i = 0;i < result->acount;i++)
1613 {
1614 result->atom[i].chiral_flag = GetChiralFlag(result->atom[i].expr);
1615 if (result->atom[i].chiral_flag)
1616 result->ischiral = true;
1617 }
1618 return(result);
1619 }
1620 }
1621 else
1622 return (Pattern*)0;
1623 }
1624
1625
1626 static Pattern *ParseSMARTSPattern( void )
1627 {
1628 Pattern *result;
1629 result = AllocPattern();
1630
1631 while( *LexPtr == '(' )
1632 {
1633 LexPtr++;
1634 result = ParseSMARTSPart(result,result->parts);
1635 if( !result )
1636 return (Pattern*)0;
1637 result->parts++;
1638
1639 if( *LexPtr != ')' )
1640 return SMARTSError(result);
1641 LexPtr++;
1642
1643 if( !*LexPtr || (*LexPtr==')') )
1644 return result;
1645
1646 if( *LexPtr != '.' )
1647 return SMARTSError(result);
1648 LexPtr++;
1649 }
1650
1651 return ParseSMARTSPart(result,0);
1652 }
1653
1654 static Pattern *ParseSMARTSString( char *ptr )
1655 {
1656 register Pattern *result;
1657
1658 if( !ptr || !*ptr )
1659 return (Pattern*)0;
1660
1661 LexPtr = MainPtr = ptr;
1662 result = ParseSMARTSPattern();
1663 if( result && *LexPtr )
1664 return SMARTSError(result);
1665 return result;
1666 }
1667
1668 Pattern *ParseSMARTSRecord( char *ptr )
1669 {
1670 register char *src,*dst;
1671
1672 src = ptr;
1673 while( *src && !isspace(*src) )
1674 src++;
1675
1676 if( isspace(*src) )
1677 {
1678 *src++ = '\0';
1679 while( isspace(*src) )
1680 src++;
1681 }
1682
1683 dst = Descr;
1684 while( *src && (dst<Descr+78) )
1685 {
1686 if( isspace(*src) )
1687 {
1688 *dst++ = ' ';
1689 while( isspace(*src) )
1690 src++;
1691 }
1692 else
1693 *dst++ = *src++;
1694 }
1695 *dst = '\0';
1696
1697 return ParseSMARTSString(Buffer);
1698 }
1699
1700 /*==============================*/
1701 /* SMARTS Component Traversal */
1702 /*==============================*/
1703
1704 static void TraverseSMARTS( Pattern *pat, int i )
1705 {
1706 register int j,k;
1707
1708 pat->atom[i].visit = True;
1709 for( j=0; j<pat->bcount; j++ )
1710 if( pat->bond[j].visit == -1 )
1711 {
1712 if( pat->bond[j].src == i )
1713 {
1714 pat->bond[j].visit = i;
1715 k = pat->bond[j].dst;
1716 if( !pat->atom[k].visit )
1717 TraverseSMARTS(pat,k);
1718 }
1719 else if( pat->bond[j].dst == i )
1720 {
1721 pat->bond[j].visit = i;
1722 k = pat->bond[j].src;
1723 if( !pat->atom[k].visit )
1724 TraverseSMARTS(pat,k);
1725 }
1726 }
1727 }
1728
1729 /*============================*/
1730 /* Canonical SMARTS Pattern */
1731 /*============================*/
1732
1733 static AtomExpr *NotAtomExpr( AtomExpr* );
1734 static AtomExpr *AndAtomExpr( AtomExpr*, AtomExpr* );
1735 static AtomExpr *OrAtomExpr( AtomExpr*, AtomExpr* );
1736 //static AtomExpr *TransformAtomExpr( AtomExpr* );
1737 //static Pattern *CanonicaliseSMARTS( Pattern* );
1738
1739 static int IsBooleanAtomLeaf( AtomExpr *expr )
1740 {
1741 return (expr->leaf.prop==AL_AROM) ||
1742 (expr->leaf.prop==AL_CONST);
1743 }
1744
1745 static int IsNegatingAtomLeaf( AtomExpr *expr )
1746 {
1747 return (expr->leaf.prop==AL_RINGS);
1748 }
1749
1750 static int EqualAtomExpr( AtomExpr *lft, AtomExpr *rgt )
1751 {
1752 if( lft->type != rgt->type )
1753 return False;
1754
1755 if( lft->type == AE_LEAF )
1756 {
1757 return( (lft->leaf.prop==rgt->leaf.prop) &&
1758 (lft->leaf.value==rgt->leaf.value) );
1759 }
1760 else if( lft->type == AE_NOT )
1761 {
1762 return EqualAtomExpr(lft->mon.arg,rgt->mon.arg);
1763 }
1764 else if( lft->type == AE_RECUR )
1765 return False;
1766
1767 return EqualAtomExpr(lft->bin.lft,rgt->bin.lft) &&
1768 EqualAtomExpr(lft->bin.rgt,rgt->bin.rgt);
1769 }
1770
1771 static int OrderAtomExpr( AtomExpr *lft, AtomExpr *rgt )
1772 {
1773 register AtomExpr *larg;
1774 register AtomExpr *rarg;
1775 register int stat;
1776
1777 if( lft->type == AE_NOT )
1778 { /* larg->type == AE_LEAF */
1779 larg = lft->mon.arg;
1780 }
1781 else
1782 larg = lft;
1783
1784 if( rgt->type == AE_NOT )
1785 { /* rarg->type == AE_LEAF */
1786 rarg = rgt->mon.arg;
1787 }
1788 else
1789 rarg = rgt;
1790
1791 if( larg->type > rarg->type )
1792 {
1793 return 1;
1794 }
1795 else if( larg->type < rarg->type )
1796 return -1;
1797
1798 if( larg->type == AE_LEAF )
1799 {
1800 if( larg->leaf.prop > rarg->leaf.prop )
1801 return 1;
1802 if( larg->leaf.prop < rarg->leaf.prop )
1803 return -1;
1804 return( larg->leaf.value - rarg->leaf.value );
1805 }
1806
1807 stat = OrderAtomExpr(lft->bin.lft,rgt->bin.lft);
1808 if( stat != 0 )
1809 return stat;
1810 return OrderAtomExpr(lft->bin.rgt,rgt->bin.rgt);
1811 }
1812
1813 static int AtomLeafConflict( AtomExpr *lft, AtomExpr *rgt )
1814 {
1815 register AtomExpr *tmp;
1816
1817 if( (lft->type==AE_LEAF) && (rgt->type==AE_LEAF) )
1818 {
1819 if( lft->leaf.prop == rgt->leaf.prop )
1820 {
1821 if( IsNegatingAtomLeaf(lft) )
1822 {
1823 if( lft->leaf.value == 0 )
1824 {
1825 return rgt->leaf.value != 0;
1826 }
1827 else if( lft->leaf.value == -1 )
1828 return rgt->leaf.value == 0;
1829
1830 if( rgt->leaf.value == 0 )
1831 {
1832 return lft->leaf.value != 0;
1833 }
1834 else if( rgt->leaf.value == -1 )
1835 return lft->leaf.value == 0;
1836 }
1837 return lft->leaf.value != rgt->leaf.value;
1838 }
1839
1840 if( lft->leaf.prop > rgt->leaf.prop )
1841 {
1842 tmp = lft;
1843 lft = rgt;
1844 rgt = tmp;
1845 }
1846
1847 /* Aromaticity -> Ring */
1848 if( (lft->leaf.prop==AL_AROM) && (rgt->leaf.prop==AL_RINGS) )
1849 return( lft->leaf.value && !rgt->leaf.value );
1850
1851 /* Positive charge ~ Negative charge */
1852 if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1853 return( (lft->leaf.value!=0) || (rgt->leaf.value!=0) );
1854
1855 /* Total hcount >= Implicit hcount */
1856 if( (lft->leaf.prop==AL_HCOUNT) && (rgt->leaf.prop==AL_IMPLICIT) )
1857 return( lft->leaf.value < rgt->leaf.value );
1858 }
1859
1860 if( (lft->type==AE_LEAF) && (rgt->type==AE_NOT) )
1861 {
1862 rgt = rgt->mon.arg;
1863 if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1864 return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1865 if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
1866 return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1867 return False;
1868 }
1869
1870 if( (lft->type==AE_NOT) && (rgt->type==AE_LEAF) )
1871 {
1872 lft = lft->mon.arg;
1873 if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1874 return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1875 if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
1876 return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1877 return False;
1878 }
1879
1880 return False;
1881 }
1882
1883 static int AtomExprConflict( AtomExpr *lft, AtomExpr *rgt )
1884 {
1885 while( rgt->type == AE_ANDHI )
1886 {
1887 if( AtomLeafConflict(lft,rgt->bin.lft) )
1888 return True;
1889 rgt = rgt->bin.rgt;
1890 }
1891 return AtomLeafConflict(lft,rgt);
1892 }
1893
1894 /* return LEAF(lft) => LEAF(rgt); */
1895 static int AtomLeafImplies( AtomExpr *lft, AtomExpr *rgt )
1896 {
1897 if( (lft->type==AE_LEAF) && (rgt->type==AE_LEAF) )
1898 { /* Implied Ring Membership */
1899 if( (rgt->leaf.prop==AL_RINGS) && (rgt->leaf.value==-1) )
1900 {
1901 if( lft->leaf.prop == AL_AROM )
1902 return lft->leaf.value;
1903
1904 if( lft->leaf.prop == AL_RINGS )
1905 return lft->leaf.value > 0;
1906
1907 if( lft->leaf.prop == AL_SIZE )
1908 return lft->leaf.value > 0;
1909 }
1910
1911 /* Positive charge ~ Negative charge */
1912 if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
1913 return (lft->leaf.value==0) && (rgt->leaf.value==0);
1914 return False;
1915 }
1916
1917 if( (lft->type==AE_LEAF) && (rgt->type==AE_NOT) )
1918 {
1919 rgt = rgt->mon.arg;
1920 if( lft->leaf.prop == rgt->leaf.prop )
1921 return lft->leaf.value != rgt->leaf.value;
1922
1923 if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
1924 return True;
1925 if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1926 return True;
1927 return False;
1928 }
1929
1930 return False;
1931 }
1932
1933 /* return EXPR(rgt) => LEAF(lft); */
1934 static int AtomExprImplied( AtomExpr *lft, AtomExpr *rgt )
1935 {
1936 while( rgt->type == AE_ANDHI )
1937 {
1938 if( AtomLeafImplies(rgt->bin.lft,lft) )
1939 return True;
1940 rgt = rgt->bin.rgt;
1941 }
1942 return AtomLeafImplies(rgt,lft);
1943 }
1944
1945 /* remove implied nodes from EXPR(rgt) */
1946 static AtomExpr *AtomExprImplies( AtomExpr *lft, AtomExpr *rgt )
1947 {
1948 register AtomExpr *tmp;
1949
1950 if( rgt->type != AE_ANDHI )
1951 {
1952 if( AtomLeafImplies(lft,rgt) )
1953 {
1954 FreeAtomExpr(rgt);
1955 return (AtomExpr*)0;
1956 }
1957 return rgt;
1958 }
1959
1960 tmp = AtomExprImplies(lft,rgt->bin.rgt);
1961
1962 if( tmp )
1963 {
1964 if( AtomLeafImplies(lft,rgt->bin.lft) )
1965 {
1966 rgt->bin.rgt = (AtomExpr*)0;
1967 FreeAtomExpr(rgt);
1968 return tmp;
1969 }
1970 rgt->bin.rgt = tmp;
1971 return rgt;
1972 }
1973 else
1974 {
1975 rgt->bin.rgt = (AtomExpr*)0;
1976 if( AtomLeafImplies(lft,rgt->bin.lft) )
1977 {
1978 FreeAtomExpr(rgt);
1979 return (AtomExpr*)0;
1980 }
1981 tmp = rgt->bin.lft;
1982 rgt->bin.lft = (AtomExpr*)0;
1983 FreeAtomExpr(rgt);
1984 return tmp;
1985 }
1986 }
1987
1988 static AtomExpr *AndAtomExprLeaf( AtomExpr *lft, AtomExpr *rgt )
1989 {
1990 if( AtomExprConflict(lft,rgt) )
1991 {
1992 FreeAtomExpr(lft);
1993 FreeAtomExpr(rgt);
1994 return BuildAtomLeaf(AL_CONST,False);
1995 }
1996
1997 if( AtomExprImplied(lft,rgt) )
1998 {
1999 FreeAtomExpr(lft);
2000 return rgt;
2001 }
2002
2003 rgt = AtomExprImplies(lft,rgt);
2004 if( !rgt )
2005 return lft;
2006
2007 return BuildAtomBin(AE_ANDHI,lft,rgt);
2008 }
2009
2010 static AtomExpr *ConstrainRecursion( AtomExpr *recur, AtomExpr *expr )
2011 {
2012 register AtomExpr *head;
2013 register Pattern *pat;
2014
2015 pat = (Pattern*)recur->recur.recur;
2016 head = AndAtomExpr(pat->atom[0].expr,expr);
2017 pat->atom[0].expr = head;
2018
2019 if( IsInvalidAtom(head) )
2020 {
2021 FreePattern(pat);
2022 return BuildAtomLeaf(AL_CONST,False);
2023 }
2024 return recur;
2025 }
2026
2027 static AtomExpr *AndAtomExpr( AtomExpr *lft, AtomExpr *rgt )
2028 {
2029 register AtomExpr *expr;
2030 register int order;
2031
2032 /* Identities */
2033 if( EqualAtomExpr(lft,rgt) )
2034 {
2035 FreeAtomExpr(rgt);
2036 return lft;
2037 }
2038
2039 if( (lft->type==AE_LEAF) && (lft->leaf.prop==AL_CONST) )
2040 {
2041 if( lft->leaf.value )
2042 {
2043 FreeAtomExpr(lft);
2044 return rgt;
2045 }
2046 else
2047 {
2048 FreeAtomExpr(rgt);
2049 return lft;
2050 }
2051 }
2052
2053 if( (rgt->type==AE_LEAF) && (rgt->leaf.prop==AL_CONST) )
2054 {
2055 if( rgt->leaf.value )
2056 {
2057 FreeAtomExpr(rgt);
2058 return lft;
2059 }
2060 else
2061 {
2062 FreeAtomExpr(lft);
2063 return rgt;
2064 }
2065 }
2066
2067 /* Distributivity */
2068 if( lft->type == AE_OR )
2069 {
2070 expr = CopyAtomExpr(rgt);
2071 expr = OrAtomExpr(AndAtomExpr(expr,lft->bin.lft),
2072 AndAtomExpr(rgt, lft->bin.rgt));
2073 lft->bin.lft = (AtomExpr*)0;
2074 lft->bin.rgt = (AtomExpr*)0;
2075 FreeAtomExpr(lft);
2076 return( expr );
2077 }
2078
2079 if( rgt->type == AE_OR )
2080 {
2081 expr = CopyAtomExpr(lft);
2082 expr = OrAtomExpr(AndAtomExpr(expr,rgt->bin.lft),
2083 AndAtomExpr(lft, rgt->bin.rgt));
2084 rgt->bin.lft = (AtomExpr*)0;
2085 rgt->bin.rgt = (AtomExpr*)0;
2086 FreeAtomExpr(rgt);
2087 return( expr );
2088 }
2089
2090 /* Recursion */
2091 if( (rgt->type==AE_RECUR) && (lft->type!=AE_RECUR) )
2092 return ConstrainRecursion(rgt,lft);
2093
2094 if( (rgt->type!=AE_RECUR) && (lft->type==AE_RECUR) )
2095 return ConstrainRecursion(lft,rgt);
2096
2097 order = OrderAtomExpr(lft,rgt);
2098 if( order > 0 )
2099 {
2100 expr = lft;
2101 lft = rgt;
2102 rgt = expr;
2103 }
2104
2105 if( lft->type == AE_ANDHI )
2106 {
2107 expr = AndAtomExpr(lft->bin.rgt,rgt);
2108 expr = AndAtomExpr(lft->bin.lft,expr);
2109 lft->bin.lft = (AtomExpr*)0;
2110 lft->bin.rgt = (AtomExpr*)0;
2111 FreeAtomExpr(lft);
2112 return expr;
2113 }
2114
2115 if( rgt->type == AE_ANDHI )
2116 {
2117 if( OrderAtomExpr(lft,rgt->bin.lft) > 0 )
2118 {
2119 expr = AndAtomExpr(lft,rgt->bin.rgt);
2120 expr = AndAtomExpr(rgt->bin.lft,expr);
2121 rgt->bin.lft = (AtomExpr*)0;
2122 rgt->bin.rgt = (AtomExpr*)0;
2123 FreeAtomExpr(rgt);
2124 return expr;
2125 }
2126
2127 if( EqualAtomExpr(lft,rgt->bin.lft) )
2128 {
2129 FreeAtomExpr(lft);
2130 return rgt;
2131 }
2132 }
2133
2134 return AndAtomExprLeaf(lft,rgt);
2135 }
2136
2137 static AtomExpr *OrAtomExprLeaf( AtomExpr *lft, AtomExpr *rgt )
2138 {
2139 return BuildAtomBin(AE_OR,lft,rgt);
2140 }
2141
2142 static AtomExpr *OrAtomExpr( AtomExpr *lft, AtomExpr *rgt )
2143 {
2144 register AtomExpr *expr;
2145 register int order;
2146
2147 /* Identities */
2148 if( EqualAtomExpr(lft,rgt) )
2149 {
2150 FreeAtomExpr(rgt);
2151 return lft;
2152 }
2153
2154 if( (lft->type==AE_LEAF) && (lft->leaf.prop==AL_CONST) )
2155 {
2156 if( lft->leaf.value )
2157 {
2158 FreeAtomExpr(rgt);
2159 return lft;
2160 }
2161 else
2162 {
2163 FreeAtomExpr(lft);
2164 return rgt;
2165 }
2166 }
2167
2168 if( (rgt->type==AE_LEAF) && (rgt->leaf.prop==AL_CONST) )
2169 {
2170 if( rgt->leaf.value )
2171 {
2172 FreeAtomExpr(lft);
2173 return rgt;
2174 }
2175 else
2176 {
2177 FreeAtomExpr(rgt);
2178 return lft;
2179 }
2180 }
2181
2182 order = OrderAtomExpr(lft,rgt);
2183 if( order > 0 )
2184 {
2185 expr = lft;
2186 lft = rgt;
2187 rgt = expr;
2188 }
2189
2190 if( lft->type == AE_OR )
2191 {
2192 expr = OrAtomExpr(lft->bin.rgt,rgt);
2193 expr = OrAtomExpr(lft->bin.lft,expr);
2194 lft->bin.lft = (AtomExpr*)0;
2195 lft->bin.rgt = (AtomExpr*)0;
2196 FreeAtomExpr(lft);
2197 return expr;
2198 }
2199
2200 if( rgt->type == AE_OR )
2201 {
2202 if( OrderAtomExpr(lft,rgt->bin.lft) > 0 )
2203 {
2204 expr = OrAtomExpr(lft,rgt->bin.rgt);
2205 expr = OrAtomExpr(rgt->bin.lft,expr);
2206 rgt->bin.lft = (AtomExpr*)0;
2207 rgt->bin.rgt = (AtomExpr*)0;
2208 FreeAtomExpr(rgt);
2209 return expr;
2210 }
2211
2212 if( EqualAtomExpr(lft,rgt->bin.lft) )
2213 {
2214 FreeAtomExpr(lft);
2215 return rgt;
2216 }
2217 }
2218
2219 return OrAtomExprLeaf(lft,rgt);
2220 }
2221
2222 static AtomExpr *NotAtomExpr( AtomExpr *expr )
2223 {
2224 register AtomExpr *result;
2225 register AtomExpr *lft;
2226 register AtomExpr *rgt;
2227
2228 if( expr->type == AE_LEAF )
2229 {
2230 if( IsBooleanAtomLeaf(expr) )
2231 {
2232 expr->leaf.value = !expr->leaf.value;
2233 return expr;
2234 }
2235 else if( IsNegatingAtomLeaf(expr) )
2236 {
2237 if( expr->leaf.value == -1 )
2238 {
2239 expr->leaf.value = 0;
2240 return expr;
2241 }
2242 else if( expr->leaf.value == 0 )
2243 {
2244 expr->leaf.value = -1;
2245 return expr;
2246 }
2247 }
2248 }
2249 else if( expr->type == AE_NOT )
2250 {
2251 result = expr->mon.arg;
2252 expr->mon.arg = (AtomExpr*)0;
2253 FreeAtomExpr(expr);
2254 return result;
2255 }
2256 else if( (expr->type==AE_ANDHI) ||
2257 (expr->type==AE_ANDLO) )
2258 {
2259 lft = NotAtomExpr(expr->bin.lft);
2260 rgt = NotAtomExpr(expr->bin.rgt);
2261 expr->bin.lft = (AtomExpr*)0;
2262 expr->bin.rgt = (AtomExpr*)0;
2263 FreeAtomExpr(expr);
2264 return OrAtomExpr(lft,rgt);
2265 }
2266 else if( expr->type == AE_OR )
2267 {
2268 lft = NotAtomExpr(expr->bin.lft);
2269 rgt = NotAtomExpr(expr->bin.rgt);
2270 expr->bin.lft = (AtomExpr*)0;
2271 expr->bin.rgt = (AtomExpr*)0;
2272 FreeAtomExpr(expr);
2273 return AndAtomExpr(lft,rgt);
2274 }
2275 return BuildAtomNot(expr);
2276 }
2277
2278 /*==============================*/
2279 /* Canonical Bond Expressions */
2280 /*==============================*/
2281
2282 static int GetBondLeafIndex( BondExpr *expr )
2283 {
2284 if( expr->leaf.prop == BL_CONST )
2285 {
2286 if( expr->leaf.value )
2287 {
2288 return( BS_ALL );
2289 }
2290 else
2291 return( 0 );
2292 }
2293 else /* expr->leaf.prop == BL_TYPE */
2294 switch( expr->leaf.value )
2295 {
2296 case(BT_SINGLE): return( BS_SINGLE );
2297 case(BT_DOUBLE): return( BS_DOUBLE );
2298 case(BT_TRIPLE): return( BS_TRIPLE );
2299 case(BT_AROM): return( BS_AROM );
2300 case(BT_UP): return( BS_UP );
2301 case(BT_DOWN): return( BS_DOWN );
2302 case(BT_UPUNSPEC): return( BS_UPUNSPEC );
2303 case(BT_DOWNUNSPEC): return( BS_DOWNUNSPEC );
2304 case(BT_RING): return( BS_RING );
2305 }
2306 return 0;
2307 }
2308
2309 static int GetBondExprIndex( BondExpr *expr )
2310 {
2311 register int lft,rgt;
2312 register int arg;
2313
2314 switch( expr->type )
2315 {
2316 case(BE_LEAF): return GetBondLeafIndex(expr);
2317
2318 case(BE_NOT): arg = GetBondExprIndex(expr->mon.arg);
2319 return( arg ^ BS_ALL );
2320
2321 case(BE_ANDHI):
2322 case(BE_ANDLO): lft = GetBondExprIndex(expr->bin.lft);
2323 rgt = GetBondExprIndex(expr->bin.rgt);
2324 return( lft & rgt );
2325
2326 case(BE_OR): lft = GetBondExprIndex(expr->bin.lft);
2327 rgt = GetBondExprIndex(expr->bin.rgt);
2328 return( lft | rgt );
2329 }
2330 /* Avoid Compiler Warning */
2331 return 0;
2332 }
2333
2334 static BondExpr *NotBondExpr( BondExpr *expr )
2335 {
2336 register BondExpr *result;
2337
2338 if( expr->type == BE_LEAF )
2339 {
2340 if( expr->leaf.prop == BL_CONST )
2341 {
2342 expr->leaf.value = !expr->leaf.value;
2343 return expr;
2344 }
2345 }
2346 else if( expr->type == BE_NOT )
2347 {
2348 result = expr->mon.arg;
2349 expr->mon.arg = (BondExpr*)0;
2350 FreeBondExpr(expr);
2351 return result;
2352 }
2353 return BuildBondNot(expr);
2354 }
2355
2356 static BondExpr *TransformBondExpr( BondExpr *expr )
2357 {
2358 register BondExpr *lft,*rgt;
2359 register BondExpr *arg;
2360
2361 if( expr->type == BE_LEAF )
2362 {
2363 return expr;
2364 }
2365 else if( expr->type == BE_NOT )
2366 {
2367 arg = expr->mon.arg;
2368 arg = TransformBondExpr(arg);
2369 expr->mon.arg = (BondExpr*)0;
2370 FreeBondExpr(expr);
2371 return NotBondExpr(arg);
2372 }
2373 else if( expr->type == BE_ANDHI )
2374 {
2375 lft = expr->bin.lft;
2376 rgt = expr->bin.rgt;
2377 lft = TransformBondExpr(lft);
2378 rgt = TransformBondExpr(rgt);
2379 expr->bin.lft = lft;
2380 expr->bin.rgt = rgt;
2381 return expr;
2382 }
2383 else if( expr->type == BE_ANDLO )
2384 {
2385 lft = expr->bin.lft;
2386 rgt = expr->bin.rgt;
2387 lft = TransformBondExpr(lft);
2388 rgt = TransformBondExpr(rgt);
2389 expr->bin.lft = lft;
2390 expr->bin.rgt = rgt;
2391 return expr;
2392 }
2393 else if( expr->type == BE_OR )
2394 {
2395 lft = expr->bin.lft;
2396 rgt = expr->bin.rgt;
2397 lft = TransformBondExpr(lft);
2398 rgt = TransformBondExpr(rgt);
2399 expr->bin.lft = lft;
2400 expr->bin.rgt = rgt;
2401 return expr;
2402 }
2403 return expr;
2404 }
2405
2406 #ifdef FOO
2407 static BondExpr *CanonicaliseBond( BondExpr *expr )
2408 {
2409 #ifndef ORIG
2410 register int index;
2411
2412 index = GetBondExprIndex(expr);
2413 FreeBondExpr(expr);
2414
2415 LexPtr = CanBondExpr[index];
2416 if( *LexPtr )
2417 {
2418 expr = ParseBondExpr(0);
2419 }
2420 else
2421 expr = GenerateDefaultBond();
2422 #endif
2423
2424 return TransformBondExpr(expr);
2425 }
2426 #endif
2427
2428
2429 //**********************************
2430 //********Pattern Matching**********
2431 //**********************************
2432
2433 bool OBSmartsPattern::Init(const char *buffer)
2434 {
2435 strcpy(Buffer,buffer);
2436
2437 _pat = ParseSMARTSRecord(Buffer);
2438 _str = buffer;
2439
2440 return(_pat != (Pattern*)NULL);
2441 }
2442
2443 bool OBSmartsPattern::Init(const std::string &s)
2444 {
2445 strcpy(Buffer, s.c_str());
2446
2447 _pat = ParseSMARTSRecord(Buffer);
2448 _str = s;
2449
2450 return(_pat != (Pattern*)NULL);
2451 }
2452
2453 OBSmartsPattern::~OBSmartsPattern()
2454 {
2455 if (_pat)
2456 FreePattern(_pat);
2457 }
2458
2459 bool OBSmartsPattern::Match(OBMol &mol,bool single)
2460 {
2461 RSCACHE.clear();
2462 return(match(mol,_pat,_mlist,single));
2463 }
2464
2465 bool OBSmartsPattern::RestrictedMatch(OBMol &mol,std::vector<std::pair<int,int> > &pr,bool single)
2466 {
2467 bool ok;
2468 std::vector<std::vector<int> > mlist;
2469 std::vector<std::vector<int> >::iterator i;
2470 std::vector<std::pair<int,int> >::iterator j;
2471
2472 RSCACHE.clear();
2473 match(mol,_pat,mlist);
2474 _mlist.clear();
2475 if (mlist.empty())
2476 return(false);
2477
2478 for (i = mlist.begin();i != mlist.end();i++)
2479 {
2480 ok = true;
2481 for (j = pr.begin();j != pr.end() && ok;j++)
2482 if ((*i)[j->first] != j->second)
2483 ok = false;
2484
2485 if (ok)
2486 _mlist.push_back(*i);
2487 if (single && !_mlist.empty())
2488 return(true);
2489 }
2490
2491 return((_mlist.empty()) ? false:true);
2492 }
2493
2494 bool OBSmartsPattern::RestrictedMatch(OBMol &mol,OBBitVec &vres,bool single)
2495 {
2496 bool ok;
2497 std::vector<int>::iterator j;
2498 std::vector<std::vector<int> > mlist;
2499 std::vector<std::vector<int> >::iterator i;
2500
2501 RSCACHE.clear();
2502 match(mol,_pat,mlist);
2503
2504 _mlist.clear();
2505 if (mlist.empty())
2506 return(false);
2507
2508 for (i = mlist.begin();i != mlist.end();i++)
2509 {
2510 ok = true;
2511 for (j = i->begin();j != i->end();j++)
2512 if (!vres[*j])
2513 {
2514 ok = false;
2515 break;
2516 }
2517 if (!ok)
2518 continue;
2519
2520 _mlist.push_back(*i);
2521 if (single && !_mlist.empty())
2522 return(true);
2523 }
2524
2525 return((_mlist.empty()) ? false:true);
2526 }
2527
2528 void SetupAtomMatchTable(std::vector<std::vector<bool> > &ttab,Pattern *pat,OBMol &mol)
2529 {
2530 int i;
2531
2532 ttab.resize(pat->acount);
2533 for (i = 0;i < pat->acount;i++)
2534 ttab[i].resize(mol.NumAtoms()+1);
2535
2536 OBAtom *atom;
2537 std::vector<OBNodeBase*>::iterator j;
2538 for (i = 0;i < pat->acount;i++)
2539 for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
2540 if (EvalAtomExpr(pat->atom[0].expr,atom))
2541 ttab[i][atom->GetIdx()] = true;
2542 }
2543
2544 static void FastSingleMatch(OBMol &mol,Pattern *pat,std::vector<std::vector<int> > &mlist)
2545 {
2546 OBAtom *atom,*a1,*nbr;
2547 std::vector<OBNodeBase*>::iterator i;
2548
2549 OBBitVec bv(mol.NumAtoms()+1);
2550 std::vector<int> map;
2551 map.resize(pat->acount);
2552 std::vector<std::vector<OBEdgeBase*>::iterator> vi;
2553 std::vector<bool> vif;
2554
2555 if (pat->bcount)
2556 {
2557 vif.resize(pat->bcount);
2558 vi.resize(pat->bcount);
2559 }
2560
2561 int bcount;
2562 for (atom = mol.BeginAtom(i);atom;atom=mol.NextAtom(i))
2563 if (EvalAtomExpr(pat->atom[0].expr,atom))
2564 {
2565 map[0] = atom->GetIdx();
2566 if (pat->bcount)
2567 vif[0] = false;
2568 bv.Clear();
2569 bv.SetBitOn(atom->GetIdx());
2570
2571 for (bcount=0;bcount >=0;)
2572 {
2573 //***entire pattern matched***
2574 if (bcount == pat->bcount) //save full match here
2575 {
2576 mlist.push_back(map);
2577 bcount--;
2578 return; //found a single match
2579 }
2580
2581 //***match the next bond***
2582 if (!pat->bond[bcount].grow) //just check bond here
2583 {
2584 if ( !vif[bcount] )
2585 {
2586 OBBond *bond = mol.GetBond(map[pat->bond[bcount].src],
2587 map[pat->bond[bcount].dst]);
2588 if (bond && EvalBondExpr(pat->bond[bcount].expr,bond))
2589 {
2590 vif[bcount++] = true;
2591 if (bcount < pat->bcount)
2592 vif[bcount] = false;
2593 }
2594 else
2595 bcount--;
2596 }
2597 else //bond must have already been visited - backtrack
2598 bcount--;
2599 }
2600 else //need to map atom and check bond
2601 {
2602 a1 = mol.GetAtom(map[pat->bond[bcount].src]);
2603
2604 if (!vif[bcount]) //figure out which nbr atom we are mapping
2605 {
2606 nbr = a1->BeginNbrAtom(vi[bcount]);
2607 }
2608 else
2609 {
2610 bv.SetBitOff(map[pat->bond[bcount].dst]);
2611 nbr = a1->NextNbrAtom(vi[bcount]);
2612 }
2613
2614 for (;nbr;nbr=a1->NextNbrAtom(vi[bcount]))
2615 if (!bv[nbr->GetIdx()])
2616 if (EvalAtomExpr(pat->atom[pat->bond[bcount].dst].expr,nbr)
2617 && EvalBondExpr(pat->bond[bcount].expr,(OBBond *)*(vi[bcount])))
2618 {
2619 bv.SetBitOn(nbr->GetIdx());
2620 map[pat->bond[bcount].dst] = nbr->GetIdx();
2621 vif[bcount] = true;
2622 bcount++;
2623 if (bcount < pat->bcount)
2624 vif[bcount] = false;
2625 break;
2626 }
2627
2628 if (!nbr)//no match - time to backtrack
2629 bcount--;
2630 }
2631 }
2632 }
2633 }
2634
2635
2636 static bool match(OBMol &mol,Pattern *pat,std::vector<std::vector<int> > &mlist,bool single)
2637 {
2638 mlist.clear();
2639 if (!pat || pat->acount == 0)
2640 return(false);//shouldn't ever happen
2641
2642 if (single && !pat->ischiral)
2643 FastSingleMatch(mol,pat,mlist);
2644 else
2645 {
2646 OBSSMatch ssm(mol,pat);
2647 ssm.Match(mlist);
2648 }
2649
2650 if (pat->ischiral && mol.Has3D())
2651 {
2652 int j,k,r1,r2,r3,r4;
2653 std::vector<std::vector<int> >::iterator m;
2654 OBAtom *ra1,*ra2,*ra3,*ra4;
2655 std::vector<std::vector<int> > tmpmlist;
2656
2657 for (j = 0;j < pat->acount;j++)
2658 if (pat->atom[j].chiral_flag)
2659 {
2660 r1 = r2 = r3 = r4 = -1;
2661 r2 = j;
2662 for (k = 0;k < pat->bcount;k++)
2663 if (pat->bond[k].dst == r2)
2664 if (r1 == -1)
2665 r1 = pat->bond[k].src;
2666 else if (r3 == -1)
2667 r3 = pat->bond[k].src;
2668 else if (r4 == -1)
2669 r4 = pat->bond[k].src;
2670
2671 for (k = 0;k < pat->bcount;k++)
2672 if (pat->bond[k].src == r2)
2673 if (r1 == -1)
2674 r1 = pat->bond[k].dst;
2675 else if (r3 == -1)
2676 r3 = pat->bond[k].dst;
2677 else if (r4 == -1)
2678 r4 = pat->bond[k].dst;
2679
2680 if (r1 == -1 || r2 == -1 || r3 == -1 || r4 == -1)
2681 continue;
2682
2683 tmpmlist.clear();
2684 for (m = mlist.begin();m != mlist.end();m++)
2685 {
2686 ra1 = mol.GetAtom((*m)[r1]);
2687 ra2 = mol.GetAtom((*m)[r2]);
2688 ra3 = mol.GetAtom((*m)[r3]);
2689 ra4 = mol.GetAtom((*m)[r4]);
2690 double sign = CalcTorsionAngle(ra1->GetVector(),
2691 ra2->GetVector(),
2692 ra3->GetVector(),
2693 ra4->GetVector());
2694 if (sign > 0.0 && pat->atom[j].chiral_flag == AL_ANTICLOCKWISE)
2695 continue;
2696 if (sign < 0.0 && pat->atom[j].chiral_flag == AL_CLOCKWISE)
2697 continue;
2698
2699 //ok - go ahead and save it
2700 tmpmlist.push_back(*m);
2701 }
2702 mlist = tmpmlist;
2703 }
2704 }
2705
2706 return(!mlist.empty());
2707 }
2708
2709 #define RECURSIVE
2710
2711 #ifdef RECURSIVE
2712 static bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom)
2713 {
2714 for (;;)
2715 switch (expr->type)
2716 {
2717 case AE_LEAF:
2718 switch( expr->leaf.prop )
2719 {
2720 case AL_ELEM:
2721 return(expr->leaf.value == (int)atom->GetAtomicNum());
2722 case AL_AROM:
2723 if( !expr->leaf.value )
2724 return !atom->IsAromatic();
2725 return atom->IsAromatic();
2726 case AL_HCOUNT: // should always return the number of explicit Hs.
2727 return (expr->leaf.value==(signed int)atom->ExplicitHydrogenCount());
2728 case AL_DEGREE:
2729 return(expr->leaf.value == (int)atom->GetValence());
2730 case AL_VALENCE:
2731 return(expr->leaf.value == (int)atom->KBOSum());
2732 case AL_CONNECT:
2733 return(expr->leaf.value == (int)atom->GetImplicitValence());
2734 case AL_NEGATIVE:
2735 return(expr->leaf.value == -(atom->GetFormalCharge()));
2736 case AL_POSITIVE:
2737 return(expr->leaf.value == atom->GetFormalCharge());
2738 case AL_HYB:
2739 return(expr->leaf.value == (int)atom->GetHyb());
2740
2741 case AL_RINGS:
2742 if( expr->leaf.value == -1 )
2743 return atom->IsInRing();
2744 else if( expr->leaf.value == 0 )
2745 return !atom->IsInRing();
2746 else
2747 return expr->leaf.value == (int)atom->MemberOfRingCount();
2748
2749 case AL_SIZE:
2750 if( expr->leaf.value == -1 )
2751 return atom->IsInRing();
2752 if (!expr->leaf.value)
2753 return !atom->IsInRing();
2754 else
2755 return atom->IsInRingSize(expr->leaf.value);
2756
2757 case AL_IMPLICIT:
2758 return expr->leaf.value == (int)atom->ImplicitHydrogenCount();
2759
2760 case AL_CONST:
2761 if( !expr->leaf.value )
2762 return false;
2763 return(true);
2764 default:
2765 return false;
2766 }
2767
2768 case AE_NOT:
2769 return(!EvalAtomExpr(expr->mon.arg,atom));
2770 case AE_ANDHI: /* Same as AE_ANDLO */
2771 case AE_ANDLO:
2772 if( !EvalAtomExpr(expr->bin.lft,atom))
2773 return(false);
2774 expr = expr->bin.rgt;
2775 break;
2776 case AE_OR:
2777 if(EvalAtomExpr(expr->bin.lft,atom))
2778 return(true);
2779 expr = expr->bin.rgt;
2780 break;
2781
2782 case AE_RECUR:
2783 {
2784 //see if pattern has been matched
2785 std::vector<std::pair<Pattern*,std::vector<bool> > >::iterator i;
2786 for (i = RSCACHE.begin();i != RSCACHE.end();i++)
2787 if (i->first == (Pattern*)expr->recur.recur)
2788 return(i->second[atom->GetIdx()]);
2789
2790 //perceive and match pattern
2791 std::vector<std::vector<int> >::iterator j;
2792 std::vector<bool> vb(((OBMol*) atom->GetParent())->NumAtoms()+1);
2793 std::vector<std::vector<int> > mlist;
2794 if (match( *((OBMol *) atom->GetParent()),
2795 (Pattern*)expr->recur.recur,mlist))
2796 for (j = mlist.begin();j != mlist.end();j++)
2797 vb[(*j)[0]] = true;
2798
2799 RSCACHE.push_back(std::pair<Pattern*,std::vector<bool> > ((Pattern*)expr->recur.recur,vb));
2800
2801 return(vb[atom->GetIdx()]);
2802 }
2803
2804 default:
2805 return(false);
2806 }
2807 }
2808
2809 #else
2810
2811 static bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom)
2812 {
2813 int size=0;
2814 #define OB_EVAL_STACKSIZE 40
2815
2816 AtomExpr *stack[OB_EVAL_STACKSIZE];
2817 memset(stack,'\0',sizeof(AtomExpr*)*OB_EVAL_STACKSIZE);
2818 #undef OB_EVAL_STACKSIZE
2819
2820 bool lftest=true;
2821
2822 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
2823 {
2824 switch (expr->type)
2825 {
2826 case AE_LEAF:
2827 switch( expr->leaf.prop )
2828 {
2829 //expr->leaf.value
2830 case AL_ELEM:
2831 lftest = (expr->leaf.value == atom->GetAtomicNum());
2832 break;
2833 case AL_AROM:
2834 lftest = (expr->leaf.value == (int)atom->IsAromatic());
2835 break;
2836 case AL_HCOUNT:
2837 if (atom->ExplicitHydrogenCount() > atom->ImplicitHydrogenCount())
2838 lftest=(expr->leaf.value==atom->ExplicitHydrogenCount());
2839 else
2840 lftest=(expr->leaf.value==atom->ImplicitHydrogenCount());
2841 break;
2842 case AL_DEGREE:
2843 lftest = (expr->leaf.value == atom->GetHvyValence());
2844 break;
2845 case AL_VALENCE:
2846 lftest = (expr->leaf.value == atom->BOSum());
2847 break;
2848 case AL_CONNECT: //X
2849 lftest = (expr->leaf.value == atom->GetImplicitValence());
2850 break;
2851 case AL_NEGATIVE:
2852 lftest=(expr->leaf.value == -1*(atom->GetFormalCharge()));
2853 break;
2854 case AL_POSITIVE:
2855 lftest=(expr->leaf.value == atom->GetFormalCharge());
2856 break;
2857 case AL_HYB:
2858 lftest=(expr->leaf.value == atom->GetHyb());
2859 break;
2860 case AL_RINGS:
2861 if (expr->leaf.value == -1)
2862 lftest = (atom->IsInRing());
2863 else
2864 if (expr->leaf.value == 0)
2865 lftest = !(atom->IsInRing());
2866 else
2867 lftest=(atom->MemberOfRingCount()==expr->leaf.value);
2868 break;
2869 case AL_SIZE:
2870 if (!expr->leaf.value)
2871 lftest = !atom->IsInRing();
2872 else
2873 lftest = atom->IsInRingSize(expr->leaf.value);
2874 break;
2875
2876 case AL_IMPLICIT:
2877 lftest=(expr->leaf.value==atom->ImplicitHydrogenCount());
2878 break;
2879 case AL_CONST:
2880 lftest= true; // not limited to non-hydrogens
2881 break;
2882 case AL_MASS:
2883 break;
2884 default:
2885 break;
2886 }
2887 size--;
2888 break;
2889
2890 case AE_ANDHI:
2891
2892 if (stack[size+1] == expr->bin.rgt)
2893 size--;
2894 else if (stack[size+1] == expr->bin.lft)
2895 {
2896 if (lftest)
2897 {
2898 size++;
2899 stack[size] = expr->bin.rgt;
2900 }
2901 else
2902 size--;
2903 }
2904 else
2905 {
2906 size++;
2907 stack[size] = expr->bin.lft;
2908 }
2909 break;
2910
2911 case AE_OR:
2912
2913 if (stack[size+1] == expr->bin.rgt)
2914 size--;
2915 else if (stack[size+1] == expr->bin.lft)
2916 {
2917 if (!lftest)
2918 {
2919 size++;
2920 stack[size] = expr->bin.rgt;
2921 }
2922 else
2923 size--;
2924 }
2925 else
2926 {
2927 size++;
2928 stack[size] = expr->bin.lft;
2929 }
2930 break;
2931
2932 case AE_ANDLO:
2933
2934 if (stack[size+1] == expr->bin.rgt)
2935 size--;
2936 else if (stack[size+1] == expr->bin.lft)
2937 {
2938 if (lftest)
2939 {
2940 size++;
2941 stack[size] = expr->bin.rgt;
2942 }
2943 else
2944 size--;
2945 }
2946 else
2947 {
2948 size++;
2949 stack[size] = expr->bin.lft;
2950 }
2951 break;
2952
2953 case AE_NOT:
2954 if (stack[size+1] != expr->mon.arg)
2955 {
2956 size++;
2957 stack[size] = expr->mon.arg;
2958 }
2959 else
2960 {
2961 lftest = !lftest;
2962 size--;
2963 }
2964 break;
2965
2966 case AE_RECUR:
2967 //see if pattern has been matched
2968 bool matched=false;
2969
2970 std::vector<std::pair<Pattern*,std::vector<bool> > >::iterator i;
2971 for (i = RSCACHE.begin();i != RSCACHE.end();i++)
2972 if (i->first == (Pattern*)expr->recur.recur)
2973 {
2974 lftest = i->second[atom->GetIdx()];
2975 matched = true;
2976 break;
2977 }
2978
2979 if (!matched)
2980 {
2981 std::vector<bool> vb(atom->GetParent()->NumAtoms()+1);
2982 std::vector<std::vector<int> > mlist;
2983 lftest = false;
2984 if (match((*atom->GetParent()),(Pattern*)expr->recur.recur,mlist))
2985 {
2986 std::vector<std::vector<int> >::iterator i;
2987 for (i = mlist.begin();i != mlist.end();i++)
2988 {
2989 if ((*i)[0] == atom->GetIdx())
2990 lftest = true;
2991 vb[(*i)[0]] = true;
2992 }
2993 }
2994 RSCACHE.push_back(std::pair<Pattern*,std::vector<bool> > ((Pattern*)expr->recur.recur,vb));
2995 }
2996
2997 size--;
2998 break;
2999 }
3000 }
3001
3002 return(lftest);
3003 }
3004 #endif
3005
3006 #ifdef RECURSIVE
3007
3008 static bool EvalBondExpr(BondExpr *expr,OBBond *bond)
3009 {
3010 for (;;)
3011 switch( expr->type )
3012 {
3013 case BE_LEAF:
3014
3015 if( expr->leaf.prop == BL_CONST )
3016 return((expr->leaf.value != 0) ? true : false);
3017 else
3018 switch( expr->leaf.value )
3019 {
3020 case BT_SINGLE:
3021 return(bond->GetBO() == 1 && !bond->IsAromatic());
3022 case BT_AROM:
3023 return(bond->IsAromatic());
3024 case BT_DOUBLE:
3025 return(bond->GetBO()==2 && !bond->IsAromatic());
3026 case BT_TRIPLE:
3027 return(bond->GetBO()==3);
3028 case BT_RING:
3029 return(bond->IsInRing());
3030 case BT_UP:
3031 return(bond->IsUp());
3032 case BT_DOWN:
3033 return(bond->IsDown());
3034 case BT_UPUNSPEC: // up or unspecified (i.e., not down)
3035 return(!bond->IsDown());
3036 case BT_DOWNUNSPEC: // down or unspecified (i.e., not up)
3037 return(!bond->IsUp());
3038 default:
3039 return(false);
3040 }
3041
3042
3043 case BE_NOT:
3044 return(!EvalBondExpr(expr->mon.arg,bond));
3045 case BE_ANDHI:
3046 case BE_ANDLO:
3047 if (!EvalBondExpr(expr->bin.lft,bond))
3048 return(false);
3049 expr = expr->bin.rgt;
3050 break;
3051
3052 case BE_OR:
3053 if (EvalBondExpr(expr->bin.lft,bond))
3054 return(true);
3055 expr = expr->bin.rgt;
3056 break;
3057 default:
3058 return false;
3059 }
3060 }
3061
3062 #else
3063
3064 static bool EvalBondExpr(BondExpr *expr,OBBond *bond)
3065 {
3066 int size=0;
3067 #define OB_EVAL_STACKSIZE 40
3068
3069 BondExpr *stack[OB_EVAL_STACKSIZE];
3070 memset(stack,'\0',sizeof(AtomExpr*)*OB_EVAL_STACKSIZE);
3071 #undef OB_EVAL_STACKSIZE
3072
3073 bool lftest=true;
3074 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3075 switch( expr->type )
3076 {
3077 case(BE_LEAF):
3078
3079 if( expr->leaf.prop == BL_CONST )
3080 lftest = (expr->leaf.value)?true:false;
3081 else /* expr->leaf.prop == BL_TYPE */
3082 switch( expr->leaf.value )
3083 {
3084 case(BT_SINGLE):
3085 lftest = (bond->GetBO() == 1 && !bond->IsAromatic());
3086 break;
3087 case(BT_DOUBLE):
3088 lftest = (bond->GetBO()==2 && !bond->IsAromatic());
3089 break;
3090 case(BT_TRIPLE):
3091 lftest = (bond->GetBO()==3);
3092 break;
3093 case(BT_AROM): lftest=bond->IsAromatic();
3094 break;
3095 case(BT_RING): lftest=bond->IsInRing();
3096 break;
3097 case(BT_UP): lftest= (bond->IsUp() && bond->GetBO()==1 && !bond->IsAromatic());
3098 break;
3099 case(BT_DOWN): lftest= (bond->IsDown() && bond->GetBO()==1 && !bond->IsAromatic());
3100 break;
3101 case(BT_UPUNSPEC): lftest= !bond->IsDown();
3102 break;
3103 case(BT_DOWNUNSPEC): lftest= !bond->IsUp();
3104 break;
3105 }
3106 size--;
3107 break;
3108
3109 case(BE_NOT):
3110 if (stack[size+1] != expr->mon.arg)
3111 {
3112 size++;
3113 stack[size] = expr->mon.arg;
3114 }
3115 else
3116 {
3117 lftest = !lftest;
3118 size--;
3119 }
3120 break;
3121
3122 case(BE_ANDHI):
3123 if (stack[size+1] == expr->bin.rgt)
3124 size--;
3125 else if (stack[size+1] == expr->bin.lft)
3126 {
3127 if (lftest)
3128 {
3129 size++;
3130 stack[size] = expr->bin.rgt;
3131 }
3132 else
3133 size--;
3134 }
3135 else
3136 {
3137 size++;
3138 stack[size] = expr->bin.lft;
3139 }
3140 break;
3141
3142 case(BE_ANDLO):
3143 if (stack[size+1] == expr->bin.rgt)
3144 size--;
3145 else if (stack[size+1] == expr->bin.lft)
3146 {
3147 if (lftest)
3148 {
3149 size++;
3150 stack[size] = expr->bin.rgt;
3151 }
3152 else
3153 size--;
3154 }
3155 else
3156 {
3157 size++;
3158 stack[size] = expr->bin.lft;
3159 }
3160 break;
3161
3162 case(BE_OR):
3163 if (stack[size+1] == expr->bin.rgt)
3164 size--;
3165 else if (stack[size+1] == expr->bin.lft)
3166 {
3167 if (!lftest)
3168 {
3169 size++;
3170 stack[size] = expr->bin.rgt;
3171 }
3172 else
3173 size--;
3174 }
3175 else
3176 {
3177 size++;
3178 stack[size] = expr->bin.lft;
3179 }
3180 break;
3181 }
3182 return(lftest);
3183 }
3184 #endif
3185
3186 std::vector<std::vector<int> > &OBSmartsPattern::GetUMapList()
3187 {
3188 if (_mlist.empty() || _mlist.size() == 1)
3189 return(_mlist);
3190
3191 bool ok;
3192 OBBitVec bv;
3193 std::vector<OBBitVec> vbv;
3194 std::vector<std::vector<int> > mlist;
3195 std::vector<std::vector<int> >::iterator i;
3196 std::vector<OBBitVec>::iterator j;
3197
3198 for (i = _mlist.begin();i != _mlist.end();i++)
3199 {
3200 ok = true;
3201 bv.Clear();
3202 bv.FromVecInt(*i);
3203 for (j = vbv.begin();j != vbv.end() && ok;j++)
3204 if ((*j) == bv)
3205 ok = false;
3206
3207 if (ok)
3208 {
3209 mlist.push_back(*i);
3210 vbv.push_back(bv);
3211 }
3212 }
3213
3214 _mlist = mlist;
3215 return(_mlist);
3216 }
3217
3218 void OBSmartsPattern::WriteMapList(ostream &ofs)
3219 {
3220 std::vector<std::vector<int> >::iterator i;
3221 std::vector<int>::iterator j;
3222
3223 for ( i = _mlist.begin() ; i != _mlist.end() ; i++ )
3224 {
3225 for (j = (*i).begin();j != (*i).end();j++)
3226 ofs << *j << ' ' << ends;
3227 ofs << endl;
3228 }
3229 }
3230
3231 //*******************************************************************
3232 // The OBSSMatch class performs exhaustive matching using recursion
3233 // Explicit stack handling is used to find just a single match in
3234 // match()
3235 //*******************************************************************
3236
3237 OBSSMatch::OBSSMatch(OBMol &mol,Pattern *pat)
3238 {
3239 _mol = &mol;
3240 _pat = pat;
3241 _map.resize(pat->acount);
3242
3243 if (!mol.Empty())
3244 {
3245 _uatoms = new bool [mol.NumAtoms()+1];
3246 memset((char*)_uatoms,'\0',sizeof(bool)*(mol.NumAtoms()+1));
3247 }
3248 else
3249 _uatoms = (bool*)NULL;
3250 }
3251
3252 OBSSMatch::~OBSSMatch()
3253 {
3254 if (_uatoms)
3255 delete [] _uatoms;
3256 }
3257
3258 void OBSSMatch::Match(std::vector<std::vector<int> > &mlist,int bidx)
3259 {
3260 if (bidx == -1)
3261 {
3262 OBAtom *atom;
3263 std::vector<OBNodeBase*>::iterator i;
3264 for (atom = _mol->BeginAtom(i);atom;atom = _mol->NextAtom(i))
3265 if (EvalAtomExpr(_pat->atom[0].expr,atom))
3266 {
3267 _map[0] = atom->GetIdx();
3268 _uatoms[atom->GetIdx()] = true;
3269 Match(mlist,0);
3270 _map[0] = 0;
3271 _uatoms[atom->GetIdx()] = false;
3272 }
3273 return;
3274 }
3275
3276 if (bidx == _pat->bcount) //save full match here
3277 {
3278 mlist.push_back(_map);
3279 return;
3280 }
3281
3282 if (_pat->bond[bidx].grow) //match the next bond
3283 {
3284 int src,dst;
3285 src = _pat->bond[bidx].src;
3286 dst = _pat->bond[bidx].dst;
3287
3288 if (_map[src] <= 0 || _map[src] > _mol->NumAtoms())
3289 return;
3290
3291 AtomExpr *aexpr = _pat->atom[dst].expr;
3292 BondExpr *bexpr = _pat->bond[bidx].expr;
3293 OBAtom *atom,*nbr;
3294 std::vector<OBEdgeBase*>::iterator i;
3295
3296 atom = _mol->GetAtom(_map[src]);
3297 for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
3298 if (!_uatoms[nbr->GetIdx()] && EvalAtomExpr(aexpr,nbr) &&
3299 EvalBondExpr(bexpr,((OBBond*) *i)))
3300 {
3301 _map[dst] = nbr->GetIdx();
3302 _uatoms[nbr->GetIdx()] = true;
3303 Match(mlist,bidx+1);
3304 _uatoms[nbr->GetIdx()] = false;
3305 _map[dst] = 0;
3306 }
3307 }
3308 else //just check bond here
3309 {
3310 OBBond *bond = _mol->GetBond(_map[_pat->bond[bidx].src],
3311 _map[_pat->bond[bidx].dst]);
3312 if (bond && EvalBondExpr(_pat->bond[bidx].expr,bond))
3313 Match(mlist,bidx+1);
3314 }
3315 }
3316
3317 static int GetExprOrder(BondExpr *expr)
3318 {
3319 int size=0;
3320 BondExpr *stack[15];
3321 memset(stack,'\0',sizeof(AtomExpr*)*15);
3322 bool lftest=true;
3323
3324 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3325 switch( expr->type )
3326 {
3327 case(BE_LEAF):
3328
3329 if( expr->leaf.prop == BL_CONST )
3330 lftest = true;
3331 else /* expr->leaf.prop == BL_TYPE */
3332 switch( expr->leaf.value )
3333 {
3334 case(BT_SINGLE): return(1);
3335 case(BT_DOUBLE): return(2);
3336 case(BT_TRIPLE): return(3);
3337 case(BT_AROM): return(5);
3338 default:
3339 lftest = true;
3340 }
3341 size--;
3342 break;
3343
3344 case(BE_NOT): return(0);
3345 case(BE_ANDHI):
3346 case(BE_ANDLO):
3347 case(BE_OR):
3348 if (stack[size+1] == expr->bin.rgt)
3349 size--;
3350 else if (stack[size+1] == expr->bin.lft)
3351 {
3352 if (lftest)
3353 {
3354 size++;
3355 stack[size] = expr->bin.rgt;
3356 }
3357 else
3358 size--;
3359 }
3360 else
3361 {
3362 size++;
3363 stack[size] = expr->bin.lft;
3364 }
3365 break;
3366 }
3367
3368 return(0);
3369 }
3370
3371 int OBSmartsPattern::GetCharge(int idx)
3372 {
3373 AtomExpr *expr = _pat->atom[idx].expr;
3374
3375 int size=0;
3376 AtomExpr *stack[15];
3377 memset(stack,'\0',sizeof(AtomExpr*)*15);
3378 bool lftest=true;
3379
3380 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3381 {
3382 switch (expr->type)
3383 {
3384 case AE_LEAF:
3385 switch( expr->leaf.prop )
3386 {
3387 case AL_NEGATIVE:
3388 return(-1*(int)expr->leaf.value);
3389 case AL_POSITIVE:
3390 return((int)expr->leaf.value);
3391 default:
3392 lftest=true;
3393 }
3394 size--;
3395 break;
3396
3397 case AE_OR:
3398 case AE_ANDHI:
3399 case AE_ANDLO:
3400
3401 if (stack[size+1] == expr->bin.rgt)
3402 size--;
3403 else if (stack[size+1] == expr->bin.lft)
3404 {
3405 if (lftest)
3406 {
3407 size++;
3408 stack[size] = expr->bin.rgt;
3409 }
3410 else
3411 size--;
3412 }
3413 else
3414 {
3415 size++;
3416 stack[size] = expr->bin.lft;
3417 }
3418 break;
3419
3420 case AE_NOT:
3421 return(0);
3422 case AE_RECUR:
3423 return(0);
3424 }
3425 }
3426
3427 return(0);
3428 }
3429
3430 int OBSmartsPattern::GetAtomicNum(int idx)
3431 {
3432 AtomExpr *expr = _pat->atom[idx].expr;
3433
3434 int size=0;
3435 AtomExpr *stack[15];
3436 memset(stack,'\0',sizeof(AtomExpr*)*15);
3437 bool lftest=true;
3438
3439 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3440 {
3441 switch (expr->type)
3442 {
3443 case AE_LEAF:
3444 if ( expr->leaf.prop == AL_ELEM)
3445 return(expr->leaf.value);
3446 lftest = true;
3447 size--;
3448 break;
3449
3450 case AE_OR:
3451 case AE_ANDHI:
3452 case AE_ANDLO:
3453
3454 if (stack[size+1] == expr->bin.rgt)
3455 size--;
3456 else if (stack[size+1] == expr->bin.lft)
3457 {
3458 if (lftest)
3459 {
3460 size++;
3461 stack[size] = expr->bin.rgt;
3462 }
3463 else
3464 size--;
3465 }
3466 else
3467 {
3468 size++;
3469 stack[size] = expr->bin.lft;
3470 }
3471 break;
3472
3473 case AE_NOT:
3474 return(0);
3475 case AE_RECUR:
3476 return(0);
3477 }
3478 }
3479
3480 return(0);
3481 }
3482
3483 void OBSmartsPattern::GetBond(int &src,int &dst,int &ord,int idx)
3484 {
3485 src = _pat->bond[idx].src;
3486 dst = _pat->bond[idx].dst;
3487 ord = GetExprOrder(_pat->bond[idx].expr);
3488 }
3489
3490 void SmartsLexReplace(std::string &s,std::vector<std::pair<std::string,std::string> > &vlex)
3491 {
3492 size_t j,pos;
3493 std::string token,repstr;
3494 std::vector<std::pair<std::string,std::string> >::iterator i;
3495
3496 for (pos = 0,pos = s.find("$",pos);pos < s.size();pos = s.find("$",pos))
3497 //for (pos = 0,pos = s.find("$",pos);pos != std::string::npos;pos = s.find("$",pos))
3498 {
3499 pos++;
3500 for (j = pos;j < s.size();j++)
3501 if (!isalpha(s[j]) && !isdigit(s[j]) && s[j] != '_')
3502 break;
3503 if (pos == j)
3504 continue;
3505
3506 token = s.substr(pos,j-pos);
3507 for (i = vlex.begin();i != vlex.end();i++)
3508 if (token == i->first)
3509 {
3510 repstr = "(" + i->second + ")";
3511 s.replace(pos,j-pos,repstr);
3512 j = 0;
3513 }
3514 pos = j;
3515 }
3516 }
3517
3518 } // end namespace OpenBabel
3519
3520 //! \file parsmart.cpp
3521 //! \brief Implementation of Daylight SMARTS parser.