| 1 | /********************************************************************** | 
| 2 | fingerprint.h - Base class for fingerprints and fast searching | 
| 3 |  | 
| 4 | Copyright (C) 2005 by Chris Morley | 
| 5 |  | 
| 6 | This file is part of the Open Babel project. | 
| 7 | For more information, see <http://openbabel.sourceforge.net/> | 
| 8 |  | 
| 9 | This program is free software; you can redistribute it and/or modify | 
| 10 | it under the terms of the GNU General Public License as published by | 
| 11 | the Free Software Foundation version 2 of the License. | 
| 12 |  | 
| 13 | This program is distributed in the hope that it will be useful, | 
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 16 | GNU General Public License for more details. | 
| 17 | ***********************************************************************/ | 
| 18 |  | 
| 19 | #ifndef OB_FINGERPRINT_H | 
| 20 | #define OB_FINGERPRINT_H | 
| 21 |  | 
| 22 | #include <list> | 
| 23 | #include <map> | 
| 24 | #include <set> | 
| 25 | #include <vector> | 
| 26 | #include <string> | 
| 27 |  | 
| 28 | namespace OpenBabel | 
| 29 | { | 
| 30 | class OBBase; //Forward declaration; used only as pointer. | 
| 31 |  | 
| 32 | /// \brief The base class for fingerprints | 
| 33 | class OBAPI OBFingerprint | 
| 34 | { | 
| 35 | //see end of cpp file for detailed documentation | 
| 36 | public: | 
| 37 | /// Sets the nth bit | 
| 38 | void SetBit(std::vector<unsigned int>& vec, unsigned int n); | 
| 39 |  | 
| 40 | /// Repeatedly ORs the top half with the bottom half until no smaller than nbits | 
| 41 | void Fold(std::vector<unsigned int>& vec, unsigned int nbits); | 
| 42 |  | 
| 43 | /// Returns fingerprint in vector, which may be resized, folded to nbits (if nbits!=0) | 
| 44 | virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0; | 
| 45 |  | 
| 46 | /// Required short description of the fingerprint type. | 
| 47 | virtual std::string Description()=0; | 
| 48 |  | 
| 49 | /// Optional flags | 
| 50 | enum FptFlag{FPT_UNIQUEBITS=1}; | 
| 51 | virtual unsigned int Flags() { return 0;}; | 
| 52 |  | 
| 53 | /// Obtain info on available fingerprints | 
| 54 | static bool GetNextFPrt(std::string& id, OBFingerprint*& pFPrt); | 
| 55 |  | 
| 56 | /// Returns a pointer to a fingerprint (the default if ID is empty), or NULL if not available | 
| 57 | static OBFingerprint* FindFingerprint(std::string& ID); | 
| 58 |  | 
| 59 | /// Returns the Tanimoto coefficient between two vectors (vector<unsigned int>& SeekPositions) | 
| 60 | static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2); | 
| 61 |  | 
| 62 | /// Inline version of Tanimoto() taking a pointer for the second vector | 
| 63 | static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2) | 
| 64 | { | 
| 65 | ///If used for two vectors, vec1 and vec2, call as Tanimoto(vec1, &vec2[0]); | 
| 66 | int andbits=0, orbits=0; | 
| 67 | unsigned int i; | 
| 68 | for (i=0;i<vec1.size();++i) | 
| 69 | { | 
| 70 | int andfp = vec1[i] & p2[i]; | 
| 71 | int orfp = vec1[i] | p2[i]; | 
| 72 | //Count bits | 
| 73 | for(;andfp;andfp=andfp<<1) | 
| 74 | if(andfp<0) ++andbits; | 
| 75 | for(;orfp;orfp=orfp<<1) | 
| 76 | if(orfp<0) ++orbits; | 
| 77 | } | 
| 78 | return((double)andbits/(double)orbits); | 
| 79 | }; | 
| 80 |  | 
| 81 | static const unsigned int bitsperint;// = 8 * sizeof(unsigned int); | 
| 82 |  | 
| 83 | private: | 
| 84 | ///Function object to set bits | 
| 85 | struct bit_or | 
| 86 | { | 
| 87 | unsigned int operator()(const unsigned int a, const unsigned int b) | 
| 88 | { | 
| 89 | return a | b; | 
| 90 | } | 
| 91 | }; | 
| 92 |  | 
| 93 | typedef std::map<std::string, OBFingerprint*> FPMapType; | 
| 94 | typedef FPMapType::iterator Fptpos; | 
| 95 |  | 
| 96 | protected: | 
| 97 | ///This static function returns a reference to the FPtsMap | 
| 98 | ///which, because it is a static local variable is constructed only once. | 
| 99 | ///This fiddle is to avoid the "static initialization order fiasco" | 
| 100 | ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/". | 
| 101 | static FPMapType& FPtsMap() | 
| 102 | { | 
| 103 | static FPMapType* fptm = new FPMapType; | 
| 104 | return *fptm; | 
| 105 | }; | 
| 106 |  | 
| 107 | OBFingerprint(std::string ID, bool IsDefault=false) | 
| 108 | { | 
| 109 | FPtsMap()[ID] = this; //registers the derived fingerprint class | 
| 110 | if(IsDefault || FPtsMap().empty()) | 
| 111 | _pDefault=this; | 
| 112 | }; | 
| 113 |  | 
| 114 | private: | 
| 115 | static OBFingerprint* _pDefault; | 
| 116 | }; | 
| 117 |  | 
| 118 |  | 
| 119 |  | 
| 120 |  | 
| 121 | //************************************************************* | 
| 122 | //Fast search routines | 
| 123 | ///Header for fastsearch index file | 
| 124 | struct OBAPI FptIndexHeader | 
| 125 | { | 
| 126 | unsigned int headerlength;///<offset to data: sizeof(FptIndexHeader) | 
| 127 | unsigned int nEntries;    ///<number of fingerprints | 
| 128 | unsigned int words;                             ///<number 32bit words per fingerprint | 
| 129 | char fpid[16];            ///<ID of the fingerprint type | 
| 130 | char datafilename[256];   ///<the data that this is an index to | 
| 131 | }; | 
| 132 | /// Structure of fastsearch index files | 
| 133 | struct OBAPI FptIndex | 
| 134 | { | 
| 135 | FptIndexHeader header; | 
| 136 | std::vector<unsigned int> fptdata; | 
| 137 | std::vector<unsigned int> seekdata; | 
| 138 | }; | 
| 139 |  | 
| 140 | /// \brief Class to search fingerprint index files | 
| 141 | class OBAPI FastSearch | 
| 142 | { | 
| 143 | //see end of cpp file for detailed documentation | 
| 144 | public: | 
| 145 | std::string ReadIndex(std::istream* pIndexstream); | 
| 146 | virtual ~FastSearch(){}; | 
| 147 |  | 
| 148 | /// \brief Does substructure search and returns vector of the file positions of matches | 
| 149 | bool    Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates); | 
| 150 |  | 
| 151 | /// \brief Returns multimap containing objects whose Tanimoto coefficients with the target | 
| 152 | ///     is greater than the value specified. | 
| 153 | bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, | 
| 154 | double MinTani); | 
| 155 |  | 
| 156 | /// \brief Returns multimap containing the nCandidates objects with largest Tanimoto | 
| 157 | ///  coefficients with the target. | 
| 158 | bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, | 
| 159 | int nCandidates=0); | 
| 160 |  | 
| 161 | /// \brief Returns a pointer to the fingerprint type used to constuct the index | 
| 162 | OBFingerprint* GetFingerprint() const{ return _pFP;}; | 
| 163 |  | 
| 164 | private: | 
| 165 | FptIndex   _index; | 
| 166 | OBFingerprint* _pFP; | 
| 167 | }; | 
| 168 |  | 
| 169 | //********************************************** | 
| 170 | /// \brief Class to prepare fingerprint index files See FastSearch class for details | 
| 171 | class OBAPI FastSearchIndexer | 
| 172 | { | 
| 173 | //see end of cpp file for detailed documentation | 
| 174 | public: | 
| 175 | FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid, | 
| 176 | int FptBits=0); | 
| 177 | ~FastSearchIndexer(); | 
| 178 |  | 
| 179 | ///\brief Called for each object | 
| 180 | bool Add(OBBase* pOb, std::streampos seekpos); | 
| 181 |  | 
| 182 | private: | 
| 183 | std::ostream* _indexstream; | 
| 184 | FptIndex*               _pindex; | 
| 185 | OBFingerprint* _pFP; | 
| 186 | int _nbits; | 
| 187 | }; | 
| 188 |  | 
| 189 | } //namespace OpenBabel | 
| 190 | #endif | 
| 191 |  | 
| 192 | //! \file fingerprint.h | 
| 193 | //! \brief Declaration of OBFingerprint base class and fastsearch classes |