ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/openbabel/fingerprint.cpp
Revision: 3057
Committed: Thu Oct 19 20:49:05 2006 UTC (17 years, 11 months ago) by gezelter
File size: 21414 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     fingerprint.cpp - Implementation of fingerpring base class and fastsearching
3    
4     Copyright (C) 2005 by Chris Morley
5    
6     This file is part of the Open Babel project.
7     For more information, see <http://openbabel.sourceforge.net/>
8    
9     This program is free software; you can redistribute it and/or modify
10     it under the terms of the GNU General Public License as published by
11     the Free Software Foundation version 2 of the License.
12    
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     GNU General Public License for more details.
17     ***********************************************************************/
18    
19 gezelter 2450 #include "config.h"
20 tim 2440 #include <vector>
21     #include <algorithm>
22    
23     #if HAVE_IOSTREAM
24     #include <iostream>
25     #elif HAVE_IOSTREAM_H
26     #include <iostream.h>
27     #endif
28    
29     #if HAVE_FSTREAM
30     #include <fstream>
31     #elif HAVE_FSTREAM_H
32     #include <fstream.h>
33     #endif
34     #include <vector>
35    
36     #include "fingerprint.hpp"
37     #include "oberror.hpp"
38    
39     using namespace std;
40     namespace OpenBabel {
41    
42     OBFingerprint* OBFingerprint::_pDefault; //static variable
43     const unsigned int OBFingerprint::bitsperint = 8 * sizeof(unsigned int);
44 gezelter 3057 int OBFingerprint::rubbish = 666;
45 tim 2440
46     void OBFingerprint::SetBit(vector<unsigned int>& vec, unsigned int n)
47     {
48 gezelter 3057 vec[n/Getbitsperint()] |= (1 << (n % Getbitsperint()));
49 tim 2440 }
50    
51     ////////////////////////////////////////
52     void OBFingerprint::Fold(vector<unsigned int>& vec, unsigned int nbits)
53     {
54 gezelter 3057 while(vec.size()*Getbitsperint()/2 >= nbits)
55 tim 2440 vec.erase(transform(vec.begin(),vec.begin()+vec.size()/2,
56     vec.begin()+vec.size()/2, vec.begin(), bit_or()), vec.end());
57     }
58    
59     ////////////////////////////////////////
60     bool OBFingerprint::GetNextFPrt(std::string& id, OBFingerprint*& pFPrt)
61     {
62     Fptpos iter;
63     if(id.empty())
64     iter=FPtsMap().begin();
65     else
66     {
67     iter=FPtsMap().find(id);
68     if(iter!=FPtsMap().end())
69     ++iter;
70     }
71     if(iter==FPtsMap().end())
72     return false;
73     id = iter->first;
74     pFPrt = iter->second;
75     return true;
76     }
77    
78     OBFingerprint* OBFingerprint::FindFingerprint(string& ID)
79     {
80     if(ID.empty())
81     return _pDefault;
82     Fptpos iter = FPtsMap().find(ID);
83     if(iter==FPtsMap().end())
84     return NULL;
85     else
86     return iter->second;
87     }
88    
89     double OBFingerprint::Tanimoto(const vector<unsigned int>& vec1, const vector<unsigned int>& vec2)
90     {
91     //Independent of sizeof(unsigned int)
92     if(vec1.size()!=vec2.size())
93     return -1; //different number of bits
94     int andbits=0, orbits=0;
95     for (unsigned i=0;i<vec1.size();++i)
96     {
97     int andfp = vec1[i] & vec2[i];
98     int orfp = vec1[i] | vec2[i];
99     //Count bits
100     for(;andfp;andfp=andfp<<1)
101     if(andfp<0) ++andbits;
102     for(;orfp;orfp=orfp<<1)
103     if(orfp<0) ++orbits;
104     }
105     return((double)andbits/(double)orbits);
106     }
107    
108     //*****************************************************************8
109     /*!
110    
111     */
112     bool FastSearch::Find(OBBase* pOb, vector<unsigned int>& SeekPositions,
113     unsigned int MaxCandidates)
114     {
115     ///Finds chemical objects in datafilename (which must previously have been indexed)
116     ///that have all the same bits set in their fingerprints as in the fingerprint of
117     ///a pattern object. (Usually a substructure search in molecules.)
118     ///The type of fingerprint and its degree of folding does not have to be specified
119     ///here because the values in the index file are used.
120     ///The positions of the candidate matching molecules in the original datafile are returned.
121    
122     vector<unsigned int> vecwords;
123 gezelter 3057 _pFP->GetFingerprint(pOb,vecwords, _index.header.words * OBFingerprint::Getbitsperint());
124 tim 2440
125     vector<unsigned int>candidates; //indices of matches from fingerprint screen
126     candidates.reserve(MaxCandidates);
127    
128     unsigned int dataSize = _index.header.nEntries;
129     // GetFingerprint(mol, vecwords, _index.header.words, _index.header.fptype);
130    
131     unsigned int words = _index.header.words;
132     unsigned int* nextp = &_index.fptdata[0];
133     unsigned int* ppat0 = &vecwords[0];
134     register unsigned int* p;
135     register unsigned int* ppat;
136     register unsigned int a;
137     unsigned int i; // need address of this, can't be register
138     for(i=0;i<dataSize; i++) //speed critical section
139     {
140     p=nextp;
141     nextp += words;
142     ppat=ppat0;
143     a=0;
144     while(p<nextp)
145     {
146     if ( (a=((*ppat) & (*p++)) ^ (*ppat++)) ) break;
147     }
148     if(!a)
149     {
150     candidates.push_back(i);
151     if(candidates.size()>=MaxCandidates)
152     break;
153     }
154     }
155    
156     if(i<_index.header.nEntries) //premature end to search
157     {
158     #ifdef HAVE_SSTREAM
159     stringstream errorMsg;
160     #else
161     strstream errorMsg;
162     #endif
163     errorMsg << "Stopped looking after " << i << " molecules." << endl;
164 gezelter 3057 obErrorLog.ThrowError(__func__, errorMsg.str(), obWarning);
165 tim 2440 }
166    
167     vector<unsigned int>::iterator itr;
168     for(itr=candidates.begin();itr!=candidates.end();itr++)
169     {
170     SeekPositions.push_back(_index.seekdata[*itr]);
171     }
172     return true;
173     }
174    
175     /////////////////////////////////////////////////////////
176     bool FastSearch::FindSimilar(OBBase* pOb, multimap<double, unsigned int>& SeekposMap,
177     double MinTani)
178     {
179     vector<unsigned int> targetfp;
180 gezelter 3057 _pFP->GetFingerprint(pOb,targetfp, _index.header.words * OBFingerprint::Getbitsperint());
181 tim 2440
182     unsigned int words = _index.header.words;
183     unsigned int dataSize = _index.header.nEntries;
184     unsigned int* nextp = &_index.fptdata[0];
185     register unsigned int* p;
186     register unsigned int i;
187     for(i=0;i<dataSize; i++) //speed critical section
188     {
189     p=nextp;
190     nextp += words;
191     double tani = OBFingerprint::Tanimoto(targetfp,p);
192     if(tani>MinTani)
193     SeekposMap.insert(pair<const double, unsigned int>(tani,_index.seekdata[i]));
194     }
195     return true;
196     }
197    
198     /////////////////////////////////////////////////////////
199     bool FastSearch::FindSimilar(OBBase* pOb, multimap<double, unsigned int>& SeekposMap,
200     int nCandidates)
201     {
202     ///If nCandidates is zero or omitted the original size of the multimap is used
203     if(nCandidates)
204     {
205     //initialise the multimap with nCandidate zero entries
206     SeekposMap.clear();
207     int i;
208     for(i=0;i<nCandidates;++i)
209     SeekposMap.insert(pair<const double, unsigned int>(0,0));
210     }
211     else if(SeekposMap.size()==0)
212     return false;
213    
214     vector<unsigned int> targetfp;
215 gezelter 3057 _pFP->GetFingerprint(pOb,targetfp, _index.header.words * OBFingerprint::Getbitsperint());
216 tim 2440
217     unsigned int words = _index.header.words;
218     unsigned int dataSize = _index.header.nEntries;
219     unsigned int* nextp = &_index.fptdata[0];
220     register unsigned int* p;
221     register unsigned int i;
222     for(i=0;i<dataSize; i++) //speed critical section
223     {
224     p=nextp;
225     nextp += words;
226     double tani = OBFingerprint::Tanimoto(targetfp,p);
227     if(tani>SeekposMap.begin()->first)
228     {
229     SeekposMap.insert(pair<const double, unsigned int>(tani,_index.seekdata[i]));
230     SeekposMap.erase(SeekposMap.begin());
231     }
232     }
233     return true;
234 gezelter 3057 }
235    
236     /////////////////////////////////////////////////////////
237 tim 2440 string FastSearch::ReadIndex(istream* pIndexstream)
238     {
239     //Reads fs index from istream into member variables
240 gezelter 3057 _index.Read(pIndexstream);
241 tim 2440
242 gezelter 3057 _pFP = _index.CheckFP();
243     if(!_pFP)
244     *(_index.header.datafilename) = '\0';
245    
246     return _index.header.datafilename; //will be empty on error
247     }
248    
249     //////////////////////////////////////////////////////////
250     bool FptIndex::Read(istream* pIndexstream)
251     {
252     pIndexstream->read((char*)&(header), sizeof(FptIndexHeader));
253     pIndexstream->seekg(header.headerlength);//allows header length to be changed
254     if(pIndexstream->fail() || header.headerlength != sizeof(FptIndexHeader))
255 tim 2440 {
256 gezelter 3057 *(header.datafilename) = '\0';
257     return false;
258     }
259 tim 2440
260 gezelter 3057 unsigned int nwords = header.nEntries * header.words;
261     fptdata.resize(nwords);
262     seekdata.resize(header.nEntries);
263 tim 2440
264 gezelter 3057 pIndexstream->read((char*)&(fptdata[0]), sizeof(unsigned int) * nwords);
265     pIndexstream->read((char*)&(seekdata[0]), sizeof(unsigned int) * header.nEntries);
266    
267     if(pIndexstream->fail())
268     {
269     *(header.datafilename) = '\0';
270     return false;
271     }
272     return true;
273     }
274 tim 2440
275 gezelter 3057 //////////////////////////////////////////////////////////
276     OBFingerprint* FptIndex::CheckFP()
277     {
278     //check that fingerprint type is available
279     string tempFP(header.fpid);
280     OBFingerprint* pFP = OBFingerprint::FindFingerprint(tempFP);
281     if(!pFP)
282     {
283     #ifdef HAVE_SSTREAM
284     stringstream errorMsg;
285     #else
286     strstream errorMsg;
287     #endif
288     errorMsg << "Index has Fingerprints of type '" << header.fpid
289     << " which is not currently loaded." << endl;
290     obErrorLog.ThrowError(__func__, errorMsg.str(), obError);
291 tim 2440 }
292 gezelter 3057 return pFP; //NULL if not available
293 tim 2440 }
294    
295     //*******************************************************
296     FastSearchIndexer::FastSearchIndexer(string& datafilename, ostream* os,
297     std::string& fpid, int FptBits)
298     {
299     ///Starts indexing process
300     _indexstream = os;
301     _nbits=FptBits;
302     _pindex= new FptIndex;
303     _pindex->header.headerlength = sizeof(FptIndexHeader);
304     strncpy(_pindex->header.fpid,fpid.c_str(),15);
305     strncpy(_pindex->header.datafilename, datafilename.c_str(), 255);
306 gezelter 3057
307     //check that fingerprint type is available
308     _pFP = _pindex->CheckFP();
309 tim 2440 }
310    
311     /////////////////////////////////////////////////////////////
312 gezelter 3057 FastSearchIndexer::FastSearchIndexer(FptIndex* pindex, std::ostream* os)
313     {
314     //Uses existing index
315     _indexstream = os;
316     _pindex = pindex;
317     _nbits = _pindex->header.words * OBFingerprint::Getbitsperint();
318    
319     //check that fingerprint type is available
320     _pFP = _pindex->CheckFP();
321     }
322    
323     /////////////////////////////////////////////////////////////
324 tim 2440 FastSearchIndexer::~FastSearchIndexer()
325     {
326     ///Saves index file
327     _pindex->header.nEntries = _pindex->seekdata.size();
328     _indexstream->write((const char*)&_pindex->header, sizeof(FptIndexHeader));
329     _indexstream->write((const char*)&_pindex->fptdata[0], _pindex->fptdata.size()*sizeof(unsigned int));
330     _indexstream->write((const char*)&_pindex->seekdata[0], _pindex->seekdata.size()*sizeof(unsigned int));
331     if(!_indexstream)
332 tim 2518 obErrorLog.ThrowError(__func__,
333 tim 2440 "Difficulty writing index", obWarning);
334     delete _pindex;
335     }
336    
337     ///////////////////////////////////////////////////////////////
338     bool FastSearchIndexer::Add(OBBase* pOb, streampos seekpos)
339     {
340     ///Adds a fingerprint
341    
342     vector<unsigned int> vecwords;
343     if(!_pFP)
344     return false;
345     if(_pFP->GetFingerprint(pOb, vecwords, _nbits))
346     {
347     _pindex->header.words = vecwords.size(); //Use size as returned from fingerprint
348     for(unsigned int i=0;i<_pindex->header.words;i++)
349     _pindex->fptdata.push_back(vecwords[i]);
350     _pindex->seekdata.push_back(seekpos);
351     return true;
352     }
353 tim 2518 obErrorLog.ThrowError(__func__, "Failed to make a fingerprint", obWarning);
354 tim 2440 return false;
355     }
356    
357     /*!
358     \class OBFingerprint
359     These fingerprints are condensed representation of molecules (or other objects)
360     as a list of boolean values (actually bits in a vector<unsigned>) with length
361     of a power of 2. The main motivation is for fast searching of data sources
362     containing large numbers of molecules (up to several million). OpenBabel
363     provides some routines which can search text files containing lists of molecules
364     in any format. See the documentation on the class FastSearch.
365    
366     There are descriptions of molecular fingerprints at <br>
367     http://www.daylight.com/dayhtml/doc/theory/theory.finger.html) and <br>
368     http://www.mesaac.com/Fingerprint.htm <br>
369     Many methods of preparing fingerprints have been described, but the type supported
370     currently in OpenBabel has each bit representing a substructure (or other
371     molecular property). If a substructure is present in the molecule, then a
372     particular bit is set to 1. But because the hashing method may also map other
373     substructures to the same bit, a match does not guarantee that a particular
374     substructure is present; there may be false positives. However, with proper design,
375     a large fraction of irrelevant molecules in a data set can be eliminated in a
376     fast search with boolean methods on the fingerprints.
377     It then becomes feasible to make a definitive substructure search by
378     conventional methods on this reduced list even if it is slow.
379    
380     OpenBabel provides a framework for applying new types of fingerprints without
381     changing any existing code. They are derived from OBFingerprint and the
382     source file is just compiled with the rest of OpenBabel. Alternatively,
383     they can be separately compiled as a DLL or shared library and discovered
384     when OpenBabel runs.
385    
386     Fingerprints derived from this abstract base class OBFingerprint can be for any
387     object derived from OBBase (not just for OBMol).
388     Each derived class provides an ID as a string and OBFingerprint keeps a map of
389     these to provides a pointer to the class when requested in FindFingerprint.
390    
391     <h4>-- To define a fingerprint type --</h4>
392     The classes derived form OBFingerprint are required to provide
393     a GetFingerprint() routine and a Description() routine
394     \code
395     class MyFpType : OBFingerprint
396     {
397     MyFpType(const char* id) : OBFingerprint(id){};
398     virtual bool GetFingerprint(OBBase* pOb, vector<unsigned int>& fp, int nbits)
399     {
400     //Convert pOb to the required type, usually OBMol
401     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
402     fp.resize(required_number_of_words);
403     ...
404     use SetBit(fp,n); to set the nth bit
405    
406     if(nbits)
407     Fold(fp, nbits);
408     }
409     virtual const char* Description(){ return "Some descriptive text";}
410     ...
411     };
412     \endcode
413     //Declare a global instance with the ID you will use in -f options to specify its use.
414     \code
415     MyFpType theMyFpType("myfpID");
416     \endcode
417    
418     <h4>-- To obtain a fingerprint --</h4>
419     \code
420     OBMol mol;
421     ...
422     vector<unsigned int> fp;
423     OBFingerprint::GetDefault()->GetFingerprint(&mol, fp); //gets default size of fingerprint
424     \endcode
425     or
426     \code
427     vector<unsigned int> fp;
428     OBFingerPrint* pFP = OBFingerprint::FindFingerprint("myfpID");
429     ...and maybe...
430     pFP->GetFingerprint(&mol,fp, 128); //fold down to 128bits if was originally larger
431     \endcode
432    
433     <h4>-- To print a list of available fingerprint types --</h4>
434     \code
435     std::string id;
436     OBFingerPrint* pFPrt=NULL;
437     while(OBFingerprint::GetNextFPrt(id, pFPrt))
438     {
439     cout << id << " -- " << pFPrt->Description() << endl;
440     }
441     \endcode
442    
443    
444     Fingerprints are handled as vector<unsigned int> so that the number of bits
445     in this vector and their order will be platform and compiler
446     dependent, because of size of int and endian differences. Use fingerprints
447     (and fastsearch indexes containing them) only for comparing with other
448     fingerprints prepared on the same machine.
449    
450     The FingerprintFormat class is an output format which displays fingerprints
451     as hexadecimal. When multiple molecules are supplied it will calculate the
452     Tanimoto coefficient from the first molecule to each of the others. It also
453     shows whether the first molecule is a possible substructure to all the others,
454     i.e. whether all the bits set in the fingerprint for the first molecule are
455     set in the fingerprint of the others. To display hexadecimal information when
456     multiple molecules are provided it is necessay to use the -xh option.
457    
458     To see a list of available format types, type babel -F on the command line.
459     The -xF option of the FingerprintFormat class also provides this output, but due
460     to a quirk in the way the program works, it is necessary to have a valid input
461     molecule for this option to work.
462     */
463    
464     /*! \class FastSearch
465     The FastSearch class searches an index to a datafile containing a list of molecules
466     (or other objects) prepared by FastSearchIndexer.
467    
468     OpenBabel can also search files for molecules containing a substructure specified
469     by a SMARTS string, using OBSmartsPattern or from the command line:
470     \code
471     babel datafile.xxx -outfile.yyy -sSMARTS
472     \endcode
473     But when the data file contains more than about 10,000 molecules this becomes
474     rather too slow to be used interactively. To do it more quickly, an index
475     of the molecules containing their fingerprints (see OBFingerprint) is prepared using
476     FastSearchIndexer. The indexing process may take a long time but only has to
477     be done once. The index can be searched very quickly with FastSearch. Because
478     of the nature of fingerprints a match to a bit does not guarantee
479     the presence of a particular substructure or other molecular property, so that
480     a definitive answer may require a subsequent search of the (much reduced) list
481     of candidates by another method (like OBSmartsPattern).
482    
483     Note that the index files are not portable. They need to be prepared on the
484     computer that will acess them.
485    
486     <h4>Using FastSearch and FastSearchIndexer in a program</h4>
487    
488     The index has two tables:
489     - an array of fingerprints of the molecules
490     - an array of the seek positions in the datasource file of all the molecules
491    
492     <h4>To prepare an fastsearch index file:</h4>
493     - Open an ostream to the index file.
494     - Make a FastSearchIndexer object on the heap or the stack, passing in as parameters:
495     - the datafile name, the indexfile stream,
496     - the id of the fingerprint type to be used,
497     - the number of bits the fingerprint is to be folded down to, If it is to be left
498     unfolded, set fpid to 0 or do not specify it.
499     .
500     - For each molecule, call Add() with its pointer and its position in the datafile.<br>
501     Currently the std::streampos value is implicitly cast to unsigned int so that
502     for 32bit machines the datafile has to be no longer than about 2Gbyte.
503     - The index file is written when the FastSearchIndexer object is deleted or goes out
504     of scope.
505    
506     <h4>To search in a fastsearch index file</h4>
507    
508     - Open a std::istream to the indexfile (in binary mode on some systems)
509     - Make a FastSearch object, read the index and open the datafile whose
510     name it provides
511     \code
512     ifstream ifs(indexname,ios::binary);
513     FastSearch fs;
514     string datafilename = fs.ReadIndex(&ifs);
515     if(datafilename.empty()/endcode
516     return false;
517     ifstream datastream(datafilename);
518     if(!datastream)
519     return false;
520     \endcode
521    
522     <strong>To do a search for molecules which have all the substructure bits the
523     OBMol object, patternMol</strong>
524     \code
525     vector<unsigned int>& SeekPositions;
526     if(!fs.Find(patternMol, SeekPositions, MaxCandidates))
527    
528     for(itr=SeekPositions.begin();itr!=SeekPositions.end();itr++)
529     {
530     datastream.seekg(*itr);
531     ... read the candidate molecule
532     and subject to more rigorous test if necessary
533     }
534     \endcode
535    
536     <strong>To do a similarity search based on the Tanimoto coefficient</strong>
537     This is defined as: <br>
538     <em>Number of bits set in (patternFP & targetFP)/Number of bits in (patternFP | targetFP)</em><br>
539     where the boolean operations between the fingerprints are bitwise<br>
540     The Tanimoto coefficient has no absolute meaning and depends on
541     the design of the fingerprint.
542     \code
543     multimap<double, unsigned int> SeekposMap;
544     // to find n best molecules
545     fs.FindSimilar(&patternMol, SeekposMap, n);
546     ...or
547     // to find molecules with Tanimoto coefficient > MinTani
548     fs.FindSimilar(&patternMol, SeekposMap, MinTani);
549    
550     multimap<double, unsigned int>::reverse_iterator itr;
551     for(itr=SeekposMap.rbegin();itr!=SeekposMap.rend();++itr)
552     {
553     datastream.seekg(itr->second);
554     ... read the candidate molecule
555     double tani = itr->first;
556     }
557     \endcode
558    
559     The FastSearchFormat class facilitates the use of these routine from the
560     command line or other front end program. For instance:
561    
562     <strong>Prepare an index:</strong>
563     \code
564     babel datafile.xxx index.fs
565     \endcode
566     or
567     \code
568     babel datafile.xxx -ofs namedindex
569     \endcode
570     With options you can specify:
571     - which type of fingerprint to be used, e.g. -xfFP2,
572     - whether it is folded to a specified number of bits, e.g. -xn128
573     (which should be a power of2)
574     - whether to pre-select the molecules which are indexed:
575     - by structure e.g only ethers and esters, -sCOC
576     - by excluding molecules with bezene rings, -vc1ccccc1
577     - by position in the datafile e.g. only mols 10 to 90, -f10 -l90
578     .
579     <strong>Substructure search</strong> in a previously prepared index file
580     \code
581     babel index.fs outfile.yyy -sSMILES
582     \endcode
583     The datafile can also be used as the input file, provided the input format is specified as fs
584     \code
585     babel datafile.xxx outfile.yyy -sSMILES -ifs
586     \endcode
587     A "slow" search not using fingerprints would be done on the same data by omitting -ifs.
588     A "slow" search can use SMARTS strings, but the fast search is restricted
589     to the subset which is valid SMILES.
590    
591     With the -S option, the target can be specified as a molecule in a file of any format
592     \code
593     babel datafile.xxx outfile.yyy -Spattern_mol.zzz -ifs
594     \endcode
595     These searches have two stages: a fingerprint search which produces
596     a number of candidate molecules and a definitive search which selects
597     from these using SMARTS pattern matching. The maximum number of candidates
598     is 4000 by default but you can change this with an option
599     e.g. -al 8000 (Note that you need the space between l and the number.)
600     If the fingerprint search reaches the maximum number it will not
601     look further and will tell you at what stage it stopped.
602    
603     <strong>Similarity search</strong> in a previously prepared index file<br>
604     This rather done (rather than a substructure search) if the -at option is used,
605     \code
606     babel datafile.xxx outfile.yyy -sSMILES -ifs -at0.7
607     \endcode
608     for instance
609     - -at0.7 will recover all molecules with Tanimoto greater than 0.7
610     - -at15 (no decimal point) will recover the 15 molecules with largest coefficients.
611     - -aa will add the Tanimoto coefficient to the titles of the output molecules.
612    
613     All stages, the indexing, the interpretation of the SMILES string in the -s option,
614     the file in the -S option and the final SMARTS filter convert to OBMol and apply
615     ConvertDativeBonds(). This ensures thatforms such as[N+]([O-])=O and N(=O)=O
616     are recognized as chemically identical.
617     */
618    
619     }//Openbabel
620    
621     //! \file fingerprint.cpp
622     //! \brief Definitions for OBFingerprint base class and fastsearch classes