| 1 | tim | 741 | /********************************************************************** | 
| 2 |  |  | fingerprint.h - Base class for fingerprints and fast searching | 
| 3 |  |  |  | 
| 4 |  |  | Copyright (C) 2005 by Chris Morley | 
| 5 |  |  |  | 
| 6 |  |  | This file is part of the Open Babel project. | 
| 7 |  |  | For more information, see <http://openbabel.sourceforge.net/> | 
| 8 |  |  |  | 
| 9 |  |  | This program is free software; you can redistribute it and/or modify | 
| 10 |  |  | it under the terms of the GNU General Public License as published by | 
| 11 |  |  | the Free Software Foundation version 2 of the License. | 
| 12 |  |  |  | 
| 13 |  |  | This program is distributed in the hope that it will be useful, | 
| 14 |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 15 |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 16 |  |  | GNU General Public License for more details. | 
| 17 |  |  | ***********************************************************************/ | 
| 18 |  |  |  | 
| 19 |  |  | #ifndef OB_FINGERPRINT_H | 
| 20 |  |  | #define OB_FINGERPRINT_H | 
| 21 |  |  |  | 
| 22 |  |  | #include <list> | 
| 23 |  |  | #include <map> | 
| 24 |  |  | #include <set> | 
| 25 |  |  | #include <vector> | 
| 26 |  |  | #include <string> | 
| 27 |  |  |  | 
| 28 |  |  | namespace OpenBabel | 
| 29 |  |  | { | 
| 30 |  |  | class OBBase; //Forward declaration; used only as pointer. | 
| 31 |  |  |  | 
| 32 |  |  | /// \brief The base class for fingerprints | 
| 33 |  |  | class OBAPI OBFingerprint | 
| 34 |  |  | { | 
| 35 |  |  | //see end of cpp file for detailed documentation | 
| 36 |  |  | public: | 
| 37 |  |  | /// Sets the nth bit | 
| 38 |  |  | void SetBit(std::vector<unsigned int>& vec, unsigned int n); | 
| 39 |  |  |  | 
| 40 |  |  | /// Repeatedly ORs the top half with the bottom half until no smaller than nbits | 
| 41 |  |  | void Fold(std::vector<unsigned int>& vec, unsigned int nbits); | 
| 42 |  |  |  | 
| 43 |  |  | /// Returns fingerprint in vector, which may be resized, folded to nbits (if nbits!=0) | 
| 44 |  |  | virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0; | 
| 45 |  |  |  | 
| 46 |  |  | /// Required short description of the fingerprint type. | 
| 47 |  |  | virtual std::string Description()=0; | 
| 48 |  |  |  | 
| 49 |  |  | /// Optional flags | 
| 50 |  |  | enum FptFlag{FPT_UNIQUEBITS=1}; | 
| 51 |  |  | virtual unsigned int Flags() { return 0;}; | 
| 52 |  |  |  | 
| 53 |  |  | /// Obtain info on available fingerprints | 
| 54 |  |  | static bool GetNextFPrt(std::string& id, OBFingerprint*& pFPrt); | 
| 55 |  |  |  | 
| 56 |  |  | /// Returns a pointer to a fingerprint (the default if ID is empty), or NULL if not available | 
| 57 |  |  | static OBFingerprint* FindFingerprint(std::string& ID); | 
| 58 |  |  |  | 
| 59 |  |  | /// Returns the Tanimoto coefficient between two vectors (vector<unsigned int>& SeekPositions) | 
| 60 |  |  | static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2); | 
| 61 |  |  |  | 
| 62 |  |  | /// Inline version of Tanimoto() taking a pointer for the second vector | 
| 63 | tim | 746 | static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2) | 
| 64 | tim | 741 | { | 
| 65 |  |  | ///If used for two vectors, vec1 and vec2, call as Tanimoto(vec1, &vec2[0]); | 
| 66 |  |  | int andbits=0, orbits=0; | 
| 67 |  |  | unsigned int i; | 
| 68 |  |  | for (i=0;i<vec1.size();++i) | 
| 69 |  |  | { | 
| 70 |  |  | int andfp = vec1[i] & p2[i]; | 
| 71 |  |  | int orfp = vec1[i] | p2[i]; | 
| 72 |  |  | //Count bits | 
| 73 |  |  | for(;andfp;andfp=andfp<<1) | 
| 74 |  |  | if(andfp<0) ++andbits; | 
| 75 |  |  | for(;orfp;orfp=orfp<<1) | 
| 76 |  |  | if(orfp<0) ++orbits; | 
| 77 |  |  | } | 
| 78 |  |  | return((double)andbits/(double)orbits); | 
| 79 |  |  | }; | 
| 80 |  |  |  | 
| 81 |  |  | static const unsigned int bitsperint;// = 8 * sizeof(unsigned int); | 
| 82 |  |  |  | 
| 83 |  |  | private: | 
| 84 |  |  | ///Function object to set bits | 
| 85 |  |  | struct bit_or | 
| 86 |  |  | { | 
| 87 |  |  | unsigned int operator()(const unsigned int a, const unsigned int b) | 
| 88 |  |  | { | 
| 89 |  |  | return a | b; | 
| 90 |  |  | } | 
| 91 |  |  | }; | 
| 92 |  |  |  | 
| 93 |  |  | typedef std::map<std::string, OBFingerprint*> FPMapType; | 
| 94 |  |  | typedef FPMapType::iterator Fptpos; | 
| 95 |  |  |  | 
| 96 |  |  | protected: | 
| 97 |  |  | ///This static function returns a reference to the FPtsMap | 
| 98 |  |  | ///which, because it is a static local variable is constructed only once. | 
| 99 |  |  | ///This fiddle is to avoid the "static initialization order fiasco" | 
| 100 |  |  | ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/". | 
| 101 |  |  | static FPMapType& FPtsMap() | 
| 102 |  |  | { | 
| 103 |  |  | static FPMapType* fptm = new FPMapType; | 
| 104 |  |  | return *fptm; | 
| 105 |  |  | }; | 
| 106 |  |  |  | 
| 107 |  |  | OBFingerprint(std::string ID, bool IsDefault=false) | 
| 108 |  |  | { | 
| 109 |  |  | FPtsMap()[ID] = this; //registers the derived fingerprint class | 
| 110 |  |  | if(IsDefault || FPtsMap().empty()) | 
| 111 |  |  | _pDefault=this; | 
| 112 |  |  | }; | 
| 113 |  |  |  | 
| 114 |  |  | private: | 
| 115 |  |  | static OBFingerprint* _pDefault; | 
| 116 |  |  | }; | 
| 117 |  |  |  | 
| 118 |  |  |  | 
| 119 |  |  |  | 
| 120 |  |  |  | 
| 121 |  |  | //************************************************************* | 
| 122 |  |  | //Fast search routines | 
| 123 |  |  | ///Header for fastsearch index file | 
| 124 |  |  | struct OBAPI FptIndexHeader | 
| 125 |  |  | { | 
| 126 |  |  | unsigned int headerlength;///<offset to data: sizeof(FptIndexHeader) | 
| 127 |  |  | unsigned int nEntries;    ///<number of fingerprints | 
| 128 |  |  | unsigned int words;                             ///<number 32bit words per fingerprint | 
| 129 |  |  | char fpid[16];            ///<ID of the fingerprint type | 
| 130 |  |  | char datafilename[256];   ///<the data that this is an index to | 
| 131 |  |  | }; | 
| 132 |  |  | /// Structure of fastsearch index files | 
| 133 |  |  | struct OBAPI FptIndex | 
| 134 |  |  | { | 
| 135 |  |  | FptIndexHeader header; | 
| 136 |  |  | std::vector<unsigned int> fptdata; | 
| 137 |  |  | std::vector<unsigned int> seekdata; | 
| 138 |  |  | }; | 
| 139 |  |  |  | 
| 140 |  |  | /// \brief Class to search fingerprint index files | 
| 141 |  |  | class OBAPI FastSearch | 
| 142 |  |  | { | 
| 143 |  |  | //see end of cpp file for detailed documentation | 
| 144 |  |  | public: | 
| 145 |  |  | std::string ReadIndex(std::istream* pIndexstream); | 
| 146 |  |  | virtual ~FastSearch(){}; | 
| 147 |  |  |  | 
| 148 |  |  | /// \brief Does substructure search and returns vector of the file positions of matches | 
| 149 |  |  | bool    Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates); | 
| 150 |  |  |  | 
| 151 |  |  | /// \brief Returns multimap containing objects whose Tanimoto coefficients with the target | 
| 152 |  |  | ///     is greater than the value specified. | 
| 153 |  |  | bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, | 
| 154 |  |  | double MinTani); | 
| 155 |  |  |  | 
| 156 |  |  | /// \brief Returns multimap containing the nCandidates objects with largest Tanimoto | 
| 157 |  |  | ///  coefficients with the target. | 
| 158 |  |  | bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, | 
| 159 |  |  | int nCandidates=0); | 
| 160 |  |  |  | 
| 161 |  |  | /// \brief Returns a pointer to the fingerprint type used to constuct the index | 
| 162 |  |  | OBFingerprint* GetFingerprint() const{ return _pFP;}; | 
| 163 |  |  |  | 
| 164 |  |  | private: | 
| 165 |  |  | FptIndex   _index; | 
| 166 |  |  | OBFingerprint* _pFP; | 
| 167 |  |  | }; | 
| 168 |  |  |  | 
| 169 |  |  | //********************************************** | 
| 170 |  |  | /// \brief Class to prepare fingerprint index files See FastSearch class for details | 
| 171 |  |  | class OBAPI FastSearchIndexer | 
| 172 |  |  | { | 
| 173 |  |  | //see end of cpp file for detailed documentation | 
| 174 |  |  | public: | 
| 175 |  |  | FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid, | 
| 176 |  |  | int FptBits=0); | 
| 177 |  |  | ~FastSearchIndexer(); | 
| 178 |  |  |  | 
| 179 |  |  | ///\brief Called for each object | 
| 180 |  |  | bool Add(OBBase* pOb, std::streampos seekpos); | 
| 181 |  |  |  | 
| 182 |  |  | private: | 
| 183 |  |  | std::ostream* _indexstream; | 
| 184 |  |  | FptIndex*               _pindex; | 
| 185 |  |  | OBFingerprint* _pFP; | 
| 186 |  |  | int _nbits; | 
| 187 |  |  | }; | 
| 188 |  |  |  | 
| 189 |  |  | } //namespace OpenBabel | 
| 190 |  |  | #endif | 
| 191 |  |  |  | 
| 192 |  |  | //! \file fingerprint.h | 
| 193 |  |  | //! \brief Declaration of OBFingerprint base class and fastsearch classes |