ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-3.0/src/openbabel/fingerprint.hpp
Revision: 2440
Committed: Wed Nov 16 19:42:11 2005 UTC (18 years, 9 months ago) by tim
File size: 6086 byte(s)
Log Message:
adding openbabel

File Contents

# User Rev Content
1 tim 2440 /**********************************************************************
2     fingerprint.h - Base class for fingerprints and fast searching
3    
4     Copyright (C) 2005 by Chris Morley
5    
6     This file is part of the Open Babel project.
7     For more information, see <http://openbabel.sourceforge.net/>
8    
9     This program is free software; you can redistribute it and/or modify
10     it under the terms of the GNU General Public License as published by
11     the Free Software Foundation version 2 of the License.
12    
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     GNU General Public License for more details.
17     ***********************************************************************/
18    
19     #ifndef OB_FINGERPRINT_H
20     #define OB_FINGERPRINT_H
21    
22     #include <list>
23     #include <map>
24     #include <set>
25     #include <vector>
26     #include <string>
27    
28     namespace OpenBabel
29     {
30     class OBBase; //Forward declaration; used only as pointer.
31    
32     /// \brief The base class for fingerprints
33     class OBAPI OBFingerprint
34     {
35     //see end of cpp file for detailed documentation
36     public:
37     /// Sets the nth bit
38     void SetBit(std::vector<unsigned int>& vec, unsigned int n);
39    
40     /// Repeatedly ORs the top half with the bottom half until no smaller than nbits
41     void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
42    
43     /// Returns fingerprint in vector, which may be resized, folded to nbits (if nbits!=0)
44     virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
45    
46     /// Required short description of the fingerprint type.
47     virtual std::string Description()=0;
48    
49     /// Optional flags
50     enum FptFlag{FPT_UNIQUEBITS=1};
51     virtual unsigned int Flags() { return 0;};
52    
53     /// Obtain info on available fingerprints
54     static bool GetNextFPrt(std::string& id, OBFingerprint*& pFPrt);
55    
56     /// Returns a pointer to a fingerprint (the default if ID is empty), or NULL if not available
57     static OBFingerprint* FindFingerprint(std::string& ID);
58    
59     /// Returns the Tanimoto coefficient between two vectors (vector<unsigned int>& SeekPositions)
60     static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
61    
62     /// Inline version of Tanimoto() taking a pointer for the second vector
63     static double OBFingerprint::Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2)
64     {
65     ///If used for two vectors, vec1 and vec2, call as Tanimoto(vec1, &vec2[0]);
66     int andbits=0, orbits=0;
67     unsigned int i;
68     for (i=0;i<vec1.size();++i)
69     {
70     int andfp = vec1[i] & p2[i];
71     int orfp = vec1[i] | p2[i];
72     //Count bits
73     for(;andfp;andfp=andfp<<1)
74     if(andfp<0) ++andbits;
75     for(;orfp;orfp=orfp<<1)
76     if(orfp<0) ++orbits;
77     }
78     return((double)andbits/(double)orbits);
79     };
80    
81     static const unsigned int bitsperint;// = 8 * sizeof(unsigned int);
82    
83     private:
84     ///Function object to set bits
85     struct bit_or
86     {
87     unsigned int operator()(const unsigned int a, const unsigned int b)
88     {
89     return a | b;
90     }
91     };
92    
93     typedef std::map<std::string, OBFingerprint*> FPMapType;
94     typedef FPMapType::iterator Fptpos;
95    
96     protected:
97     ///This static function returns a reference to the FPtsMap
98     ///which, because it is a static local variable is constructed only once.
99     ///This fiddle is to avoid the "static initialization order fiasco"
100     ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
101     static FPMapType& FPtsMap()
102     {
103     static FPMapType* fptm = new FPMapType;
104     return *fptm;
105     };
106    
107     OBFingerprint(std::string ID, bool IsDefault=false)
108     {
109     FPtsMap()[ID] = this; //registers the derived fingerprint class
110     if(IsDefault || FPtsMap().empty())
111     _pDefault=this;
112     };
113    
114     private:
115     static OBFingerprint* _pDefault;
116     };
117    
118    
119    
120    
121     //*************************************************************
122     //Fast search routines
123     ///Header for fastsearch index file
124     struct OBAPI FptIndexHeader
125     {
126     unsigned int headerlength;///<offset to data: sizeof(FptIndexHeader)
127     unsigned int nEntries; ///<number of fingerprints
128     unsigned int words; ///<number 32bit words per fingerprint
129     char fpid[16]; ///<ID of the fingerprint type
130     char datafilename[256]; ///<the data that this is an index to
131     };
132     /// Structure of fastsearch index files
133     struct OBAPI FptIndex
134     {
135     FptIndexHeader header;
136     std::vector<unsigned int> fptdata;
137     std::vector<unsigned int> seekdata;
138     };
139    
140     /// \brief Class to search fingerprint index files
141     class OBAPI FastSearch
142     {
143     //see end of cpp file for detailed documentation
144     public:
145     std::string ReadIndex(std::istream* pIndexstream);
146     virtual ~FastSearch(){};
147    
148     /// \brief Does substructure search and returns vector of the file positions of matches
149     bool Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
150    
151     /// \brief Returns multimap containing objects whose Tanimoto coefficients with the target
152     /// is greater than the value specified.
153     bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
154     double MinTani);
155    
156     /// \brief Returns multimap containing the nCandidates objects with largest Tanimoto
157     /// coefficients with the target.
158     bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
159     int nCandidates=0);
160    
161     /// \brief Returns a pointer to the fingerprint type used to constuct the index
162     OBFingerprint* GetFingerprint() const{ return _pFP;};
163    
164     private:
165     FptIndex _index;
166     OBFingerprint* _pFP;
167     };
168    
169     //**********************************************
170     /// \brief Class to prepare fingerprint index files See FastSearch class for details
171     class OBAPI FastSearchIndexer
172     {
173     //see end of cpp file for detailed documentation
174     public:
175     FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
176     int FptBits=0);
177     ~FastSearchIndexer();
178    
179     ///\brief Called for each object
180     bool Add(OBBase* pOb, std::streampos seekpos);
181    
182     private:
183     std::ostream* _indexstream;
184     FptIndex* _pindex;
185     OBFingerprint* _pFP;
186     int _nbits;
187     };
188    
189     } //namespace OpenBabel
190     #endif
191    
192     //! \file fingerprint.h
193     //! \brief Declaration of OBFingerprint base class and fastsearch classes