| 1 | 
tim | 
770 | 
#ifndef INC_CharScanner_hpp__ | 
| 2 | 
  | 
  | 
#define INC_CharScanner_hpp__ | 
| 3 | 
  | 
  | 
 | 
| 4 | 
  | 
  | 
/* ANTLR Translator Generator | 
| 5 | 
  | 
  | 
 * Project led by Terence Parr at http://www.jGuru.com | 
| 6 | 
  | 
  | 
 * Software rights: http://www.antlr.org/license.html | 
| 7 | 
  | 
  | 
 * | 
| 8 | 
gezelter | 
1442 | 
 * $Id$ | 
| 9 | 
tim | 
770 | 
 */ | 
| 10 | 
  | 
  | 
 | 
| 11 | 
  | 
  | 
#include <antlr/config.hpp> | 
| 12 | 
gezelter | 
1782 | 
#include <cstdio> | 
| 13 | 
tim | 
770 | 
#include <map> | 
| 14 | 
gezelter | 
1782 | 
#include <cstring> | 
| 15 | 
tim | 
770 | 
 | 
| 16 | 
  | 
  | 
#ifdef HAS_NOT_CCTYPE_H | 
| 17 | 
  | 
  | 
#include <ctype.h> | 
| 18 | 
  | 
  | 
#else | 
| 19 | 
  | 
  | 
#include <cctype> | 
| 20 | 
  | 
  | 
#endif | 
| 21 | 
  | 
  | 
 | 
| 22 | 
  | 
  | 
#if ( _MSC_VER == 1200 ) | 
| 23 | 
  | 
  | 
// VC6 seems to need this | 
| 24 | 
  | 
  | 
// note that this is not a standard C++ include file. | 
| 25 | 
  | 
  | 
# include <stdio.h> | 
| 26 | 
  | 
  | 
#endif | 
| 27 | 
  | 
  | 
 | 
| 28 | 
  | 
  | 
#include <antlr/TokenStream.hpp> | 
| 29 | 
  | 
  | 
#include <antlr/RecognitionException.hpp> | 
| 30 | 
  | 
  | 
#include <antlr/SemanticException.hpp> | 
| 31 | 
  | 
  | 
#include <antlr/MismatchedCharException.hpp> | 
| 32 | 
  | 
  | 
#include <antlr/InputBuffer.hpp> | 
| 33 | 
  | 
  | 
#include <antlr/BitSet.hpp> | 
| 34 | 
  | 
  | 
#include <antlr/LexerSharedInputState.hpp> | 
| 35 | 
  | 
  | 
 | 
| 36 | 
  | 
  | 
#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE | 
| 37 | 
  | 
  | 
namespace antlr { | 
| 38 | 
  | 
  | 
#endif | 
| 39 | 
  | 
  | 
 | 
| 40 | 
  | 
  | 
class ANTLR_API CharScanner; | 
| 41 | 
  | 
  | 
 | 
| 42 | 
  | 
  | 
ANTLR_C_USING(tolower) | 
| 43 | 
  | 
  | 
 | 
| 44 | 
gezelter | 
1558 | 
#ifdef ANTLR_REALLY_NO_STRCASECMP | 
| 45 | 
  | 
  | 
// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior | 
| 46 | 
  | 
  | 
// on the mac has neither... | 
| 47 | 
tim | 
770 | 
inline int strcasecmp(const char *s1, const char *s2) | 
| 48 | 
  | 
  | 
{ | 
| 49 | 
  | 
  | 
        while (true) | 
| 50 | 
  | 
  | 
        { | 
| 51 | 
  | 
  | 
                char  c1 = tolower(*s1++), | 
| 52 | 
  | 
  | 
                                c2 = tolower(*s2++); | 
| 53 | 
  | 
  | 
                if (c1 < c2) return -1; | 
| 54 | 
  | 
  | 
                if (c1 > c2) return 1; | 
| 55 | 
  | 
  | 
                if (c1 == 0) return 0; | 
| 56 | 
  | 
  | 
        } | 
| 57 | 
  | 
  | 
} | 
| 58 | 
gezelter | 
1558 | 
#else | 
| 59 | 
  | 
  | 
#ifdef NO_STRCASECMP | 
| 60 | 
  | 
  | 
ANTLR_C_USING(stricmp) | 
| 61 | 
  | 
  | 
#else | 
| 62 | 
  | 
  | 
ANTLR_C_USING(strcasecmp) | 
| 63 | 
tim | 
770 | 
#endif | 
| 64 | 
gezelter | 
1558 | 
#endif | 
| 65 | 
tim | 
770 | 
 | 
| 66 | 
  | 
  | 
/** Functor for the literals map | 
| 67 | 
  | 
  | 
 */ | 
| 68 | 
  | 
  | 
class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> { | 
| 69 | 
  | 
  | 
private: | 
| 70 | 
  | 
  | 
        const CharScanner* scanner; | 
| 71 | 
  | 
  | 
public: | 
| 72 | 
  | 
  | 
#ifdef NO_TEMPLATE_PARTS | 
| 73 | 
  | 
  | 
        CharScannerLiteralsLess() {} // not really used, definition to appease MSVC | 
| 74 | 
  | 
  | 
#endif | 
| 75 | 
  | 
  | 
        CharScannerLiteralsLess(const CharScanner* theScanner) | 
| 76 | 
  | 
  | 
        : scanner(theScanner) | 
| 77 | 
  | 
  | 
        { | 
| 78 | 
  | 
  | 
        } | 
| 79 | 
  | 
  | 
        bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const; | 
| 80 | 
  | 
  | 
// defaults are good enough.. | 
| 81 | 
  | 
  | 
        //      CharScannerLiteralsLess(const CharScannerLiteralsLess&); | 
| 82 | 
  | 
  | 
        //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&); | 
| 83 | 
  | 
  | 
}; | 
| 84 | 
  | 
  | 
 | 
| 85 | 
  | 
  | 
/** Superclass of generated lexers | 
| 86 | 
  | 
  | 
 */ | 
| 87 | 
  | 
  | 
class ANTLR_API CharScanner : public TokenStream { | 
| 88 | 
  | 
  | 
protected: | 
| 89 | 
  | 
  | 
        typedef RefToken (*factory_type)(); | 
| 90 | 
  | 
  | 
public: | 
| 91 | 
  | 
  | 
        CharScanner(InputBuffer& cb, bool case_sensitive ); | 
| 92 | 
  | 
  | 
        CharScanner(InputBuffer* cb, bool case_sensitive ); | 
| 93 | 
  | 
  | 
        CharScanner(const LexerSharedInputState& state, bool case_sensitive ); | 
| 94 | 
  | 
  | 
 | 
| 95 | 
  | 
  | 
        virtual ~CharScanner() | 
| 96 | 
  | 
  | 
        { | 
| 97 | 
  | 
  | 
        } | 
| 98 | 
  | 
  | 
 | 
| 99 | 
  | 
  | 
        virtual int LA(unsigned int i); | 
| 100 | 
  | 
  | 
 | 
| 101 | 
  | 
  | 
        virtual void append(char c) | 
| 102 | 
  | 
  | 
        { | 
| 103 | 
  | 
  | 
                if (saveConsumedInput) | 
| 104 | 
  | 
  | 
                { | 
| 105 | 
  | 
  | 
                        size_t l = text.length(); | 
| 106 | 
  | 
  | 
 | 
| 107 | 
  | 
  | 
                        if ((l%256) == 0) | 
| 108 | 
  | 
  | 
                                text.reserve(l+256); | 
| 109 | 
  | 
  | 
 | 
| 110 | 
  | 
  | 
                        text.replace(l,0,&c,1); | 
| 111 | 
  | 
  | 
                } | 
| 112 | 
  | 
  | 
        } | 
| 113 | 
  | 
  | 
 | 
| 114 | 
  | 
  | 
        virtual void append(const ANTLR_USE_NAMESPACE(std)string& s) | 
| 115 | 
  | 
  | 
        { | 
| 116 | 
  | 
  | 
                if( saveConsumedInput ) | 
| 117 | 
  | 
  | 
                        text += s; | 
| 118 | 
  | 
  | 
        } | 
| 119 | 
  | 
  | 
 | 
| 120 | 
  | 
  | 
        virtual void commit() | 
| 121 | 
  | 
  | 
        { | 
| 122 | 
  | 
  | 
                inputState->getInput().commit(); | 
| 123 | 
  | 
  | 
        } | 
| 124 | 
  | 
  | 
 | 
| 125 | 
gezelter | 
1558 | 
        /** called by the generated lexer to do error recovery, override to | 
| 126 | 
  | 
  | 
         * customize the behaviour. | 
| 127 | 
  | 
  | 
         */ | 
| 128 | 
  | 
  | 
        virtual void recover(const RecognitionException& ex, const BitSet& tokenSet) | 
| 129 | 
  | 
  | 
        { | 
| 130 | 
  | 
  | 
                consume(); | 
| 131 | 
  | 
  | 
                consumeUntil(tokenSet); | 
| 132 | 
  | 
  | 
        } | 
| 133 | 
  | 
  | 
 | 
| 134 | 
tim | 
770 | 
        virtual void consume() | 
| 135 | 
  | 
  | 
        { | 
| 136 | 
  | 
  | 
                if (inputState->guessing == 0) | 
| 137 | 
  | 
  | 
                { | 
| 138 | 
  | 
  | 
                        int c = LA(1); | 
| 139 | 
  | 
  | 
                        if (caseSensitive) | 
| 140 | 
  | 
  | 
                        { | 
| 141 | 
  | 
  | 
                                append(c); | 
| 142 | 
  | 
  | 
                        } | 
| 143 | 
  | 
  | 
                        else | 
| 144 | 
  | 
  | 
                        { | 
| 145 | 
  | 
  | 
                                // use input.LA(), not LA(), to get original case | 
| 146 | 
  | 
  | 
                                // CharScanner.LA() would toLower it. | 
| 147 | 
  | 
  | 
                                append(inputState->getInput().LA(1)); | 
| 148 | 
  | 
  | 
                        } | 
| 149 | 
  | 
  | 
 | 
| 150 | 
  | 
  | 
                        // RK: in a sense I don't like this automatic handling. | 
| 151 | 
  | 
  | 
                        if (c == '\t') | 
| 152 | 
  | 
  | 
                                tab(); | 
| 153 | 
  | 
  | 
                        else | 
| 154 | 
  | 
  | 
                                inputState->column++; | 
| 155 | 
  | 
  | 
                } | 
| 156 | 
  | 
  | 
                inputState->getInput().consume(); | 
| 157 | 
  | 
  | 
        } | 
| 158 | 
  | 
  | 
 | 
| 159 | 
  | 
  | 
        /** Consume chars until one matches the given char */ | 
| 160 | 
  | 
  | 
        virtual void consumeUntil(int c) | 
| 161 | 
  | 
  | 
        { | 
| 162 | 
  | 
  | 
                for(;;) | 
| 163 | 
  | 
  | 
                { | 
| 164 | 
  | 
  | 
                        int la_1 = LA(1); | 
| 165 | 
  | 
  | 
                        if( la_1 == EOF_CHAR || la_1 == c ) | 
| 166 | 
  | 
  | 
                                break; | 
| 167 | 
  | 
  | 
                        consume(); | 
| 168 | 
  | 
  | 
                } | 
| 169 | 
  | 
  | 
        } | 
| 170 | 
  | 
  | 
 | 
| 171 | 
  | 
  | 
        /** Consume chars until one matches the given set */ | 
| 172 | 
  | 
  | 
        virtual void consumeUntil(const BitSet& set) | 
| 173 | 
  | 
  | 
        { | 
| 174 | 
  | 
  | 
                for(;;) | 
| 175 | 
  | 
  | 
                { | 
| 176 | 
  | 
  | 
                        int la_1 = LA(1); | 
| 177 | 
  | 
  | 
                        if( la_1 == EOF_CHAR || set.member(la_1) ) | 
| 178 | 
  | 
  | 
                                break; | 
| 179 | 
  | 
  | 
                        consume(); | 
| 180 | 
  | 
  | 
                } | 
| 181 | 
  | 
  | 
        } | 
| 182 | 
  | 
  | 
 | 
| 183 | 
  | 
  | 
        /// Mark the current position and return a id for it | 
| 184 | 
  | 
  | 
        virtual unsigned int mark() | 
| 185 | 
  | 
  | 
        { | 
| 186 | 
  | 
  | 
                return inputState->getInput().mark(); | 
| 187 | 
  | 
  | 
        } | 
| 188 | 
  | 
  | 
        /// Rewind the scanner to a previously marked position | 
| 189 | 
  | 
  | 
        virtual void rewind(unsigned int pos) | 
| 190 | 
  | 
  | 
        { | 
| 191 | 
  | 
  | 
                inputState->getInput().rewind(pos); | 
| 192 | 
  | 
  | 
        } | 
| 193 | 
  | 
  | 
 | 
| 194 | 
  | 
  | 
        /// See if input contains character 'c' throw MismatchedCharException if not | 
| 195 | 
  | 
  | 
        virtual void match(int c) | 
| 196 | 
  | 
  | 
        { | 
| 197 | 
  | 
  | 
                int la_1 = LA(1); | 
| 198 | 
  | 
  | 
                if ( la_1 != c ) | 
| 199 | 
  | 
  | 
                        throw MismatchedCharException(la_1, c, false, this); | 
| 200 | 
  | 
  | 
                consume(); | 
| 201 | 
  | 
  | 
        } | 
| 202 | 
  | 
  | 
 | 
| 203 | 
  | 
  | 
        /** See if input contains element from bitset b | 
| 204 | 
  | 
  | 
         * throw MismatchedCharException if not | 
| 205 | 
  | 
  | 
         */ | 
| 206 | 
  | 
  | 
        virtual void match(const BitSet& b) | 
| 207 | 
  | 
  | 
        { | 
| 208 | 
  | 
  | 
                int la_1 = LA(1); | 
| 209 | 
  | 
  | 
 | 
| 210 | 
  | 
  | 
                if ( !b.member(la_1) ) | 
| 211 | 
  | 
  | 
                        throw MismatchedCharException( la_1, b, false, this ); | 
| 212 | 
  | 
  | 
                consume(); | 
| 213 | 
  | 
  | 
        } | 
| 214 | 
  | 
  | 
 | 
| 215 | 
  | 
  | 
        /** See if input contains string 's' throw MismatchedCharException if not | 
| 216 | 
  | 
  | 
         * @note the string cannot match EOF | 
| 217 | 
  | 
  | 
         */ | 
| 218 | 
  | 
  | 
        virtual void match( const char* s ) | 
| 219 | 
  | 
  | 
        { | 
| 220 | 
  | 
  | 
                while( *s != '\0' ) | 
| 221 | 
  | 
  | 
                { | 
| 222 | 
  | 
  | 
                        // the & 0xFF is here to prevent sign extension lateron | 
| 223 | 
  | 
  | 
                        int la_1 = LA(1), c = (*s++ & 0xFF); | 
| 224 | 
  | 
  | 
 | 
| 225 | 
  | 
  | 
                        if ( la_1 != c ) | 
| 226 | 
  | 
  | 
                                throw MismatchedCharException(la_1, c, false, this); | 
| 227 | 
  | 
  | 
 | 
| 228 | 
  | 
  | 
                        consume(); | 
| 229 | 
  | 
  | 
                } | 
| 230 | 
  | 
  | 
        } | 
| 231 | 
  | 
  | 
        /** See if input contains string 's' throw MismatchedCharException if not | 
| 232 | 
  | 
  | 
         * @note the string cannot match EOF | 
| 233 | 
  | 
  | 
         */ | 
| 234 | 
  | 
  | 
        virtual void match(const ANTLR_USE_NAMESPACE(std)string& s) | 
| 235 | 
  | 
  | 
        { | 
| 236 | 
  | 
  | 
                size_t len = s.length(); | 
| 237 | 
  | 
  | 
 | 
| 238 | 
  | 
  | 
                for (size_t i = 0; i < len; i++) | 
| 239 | 
  | 
  | 
                { | 
| 240 | 
  | 
  | 
                        // the & 0xFF is here to prevent sign extension lateron | 
| 241 | 
  | 
  | 
                        int la_1 = LA(1), c = (s[i] & 0xFF); | 
| 242 | 
  | 
  | 
 | 
| 243 | 
  | 
  | 
                        if ( la_1 != c ) | 
| 244 | 
  | 
  | 
                                throw MismatchedCharException(la_1, c, false, this); | 
| 245 | 
  | 
  | 
 | 
| 246 | 
  | 
  | 
                        consume(); | 
| 247 | 
  | 
  | 
                } | 
| 248 | 
  | 
  | 
        } | 
| 249 | 
  | 
  | 
        /** See if input does not contain character 'c' | 
| 250 | 
  | 
  | 
         * throw MismatchedCharException if not | 
| 251 | 
  | 
  | 
         */ | 
| 252 | 
  | 
  | 
        virtual void matchNot(int c) | 
| 253 | 
  | 
  | 
        { | 
| 254 | 
  | 
  | 
                int la_1 = LA(1); | 
| 255 | 
  | 
  | 
 | 
| 256 | 
  | 
  | 
                if ( la_1 == c ) | 
| 257 | 
  | 
  | 
                        throw MismatchedCharException(la_1, c, true, this); | 
| 258 | 
  | 
  | 
 | 
| 259 | 
  | 
  | 
                consume(); | 
| 260 | 
  | 
  | 
        } | 
| 261 | 
  | 
  | 
        /** See if input contains character in range c1-c2 | 
| 262 | 
  | 
  | 
         * throw MismatchedCharException if not | 
| 263 | 
  | 
  | 
         */ | 
| 264 | 
  | 
  | 
        virtual void matchRange(int c1, int c2) | 
| 265 | 
  | 
  | 
        { | 
| 266 | 
  | 
  | 
                int la_1 = LA(1); | 
| 267 | 
  | 
  | 
 | 
| 268 | 
  | 
  | 
                if ( la_1 < c1 || la_1 > c2 ) | 
| 269 | 
  | 
  | 
                        throw MismatchedCharException(la_1, c1, c2, false, this); | 
| 270 | 
  | 
  | 
 | 
| 271 | 
  | 
  | 
                consume(); | 
| 272 | 
  | 
  | 
        } | 
| 273 | 
  | 
  | 
 | 
| 274 | 
  | 
  | 
        virtual bool getCaseSensitive() const | 
| 275 | 
  | 
  | 
        { | 
| 276 | 
  | 
  | 
                return caseSensitive; | 
| 277 | 
  | 
  | 
        } | 
| 278 | 
  | 
  | 
 | 
| 279 | 
  | 
  | 
        virtual void setCaseSensitive(bool t) | 
| 280 | 
  | 
  | 
        { | 
| 281 | 
  | 
  | 
                caseSensitive = t; | 
| 282 | 
  | 
  | 
        } | 
| 283 | 
  | 
  | 
 | 
| 284 | 
  | 
  | 
        virtual bool getCaseSensitiveLiterals() const=0; | 
| 285 | 
  | 
  | 
 | 
| 286 | 
  | 
  | 
        /// Get the line the scanner currently is in (starts at 1) | 
| 287 | 
  | 
  | 
        virtual int getLine() const | 
| 288 | 
  | 
  | 
        { | 
| 289 | 
  | 
  | 
                return inputState->line; | 
| 290 | 
  | 
  | 
        } | 
| 291 | 
  | 
  | 
 | 
| 292 | 
  | 
  | 
        /// set the line number | 
| 293 | 
  | 
  | 
        virtual void setLine(int l) | 
| 294 | 
  | 
  | 
        { | 
| 295 | 
  | 
  | 
                inputState->line = l; | 
| 296 | 
  | 
  | 
        } | 
| 297 | 
  | 
  | 
 | 
| 298 | 
  | 
  | 
        /// Get the column the scanner currently is in (starts at 1) | 
| 299 | 
  | 
  | 
        virtual int getColumn() const | 
| 300 | 
  | 
  | 
        { | 
| 301 | 
  | 
  | 
                return inputState->column; | 
| 302 | 
  | 
  | 
        } | 
| 303 | 
  | 
  | 
        /// set the column number | 
| 304 | 
  | 
  | 
        virtual void setColumn(int c) | 
| 305 | 
  | 
  | 
        { | 
| 306 | 
  | 
  | 
                inputState->column = c; | 
| 307 | 
  | 
  | 
        } | 
| 308 | 
  | 
  | 
 | 
| 309 | 
  | 
  | 
        /// get the filename for the file currently used | 
| 310 | 
  | 
  | 
        virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const | 
| 311 | 
  | 
  | 
        { | 
| 312 | 
  | 
  | 
                return inputState->filename; | 
| 313 | 
  | 
  | 
        } | 
| 314 | 
  | 
  | 
        /// Set the filename the scanner is using (used in error messages) | 
| 315 | 
  | 
  | 
        virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f) | 
| 316 | 
  | 
  | 
        { | 
| 317 | 
  | 
  | 
                inputState->filename = f; | 
| 318 | 
  | 
  | 
        } | 
| 319 | 
  | 
  | 
 | 
| 320 | 
  | 
  | 
        virtual bool getCommitToPath() const | 
| 321 | 
  | 
  | 
        { | 
| 322 | 
  | 
  | 
                return commitToPath; | 
| 323 | 
  | 
  | 
        } | 
| 324 | 
  | 
  | 
 | 
| 325 | 
  | 
  | 
        virtual void setCommitToPath(bool commit) | 
| 326 | 
  | 
  | 
        { | 
| 327 | 
  | 
  | 
                commitToPath = commit; | 
| 328 | 
  | 
  | 
        } | 
| 329 | 
  | 
  | 
 | 
| 330 | 
  | 
  | 
        /** return a copy of the current text buffer */ | 
| 331 | 
  | 
  | 
        virtual const ANTLR_USE_NAMESPACE(std)string& getText() const | 
| 332 | 
  | 
  | 
        { | 
| 333 | 
  | 
  | 
                return text; | 
| 334 | 
  | 
  | 
        } | 
| 335 | 
  | 
  | 
 | 
| 336 | 
  | 
  | 
        virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s) | 
| 337 | 
  | 
  | 
        { | 
| 338 | 
  | 
  | 
                text = s; | 
| 339 | 
  | 
  | 
        } | 
| 340 | 
  | 
  | 
 | 
| 341 | 
  | 
  | 
        virtual void resetText() | 
| 342 | 
  | 
  | 
        { | 
| 343 | 
  | 
  | 
                text = ""; | 
| 344 | 
  | 
  | 
                inputState->tokenStartColumn = inputState->column; | 
| 345 | 
  | 
  | 
                inputState->tokenStartLine = inputState->line; | 
| 346 | 
  | 
  | 
        } | 
| 347 | 
  | 
  | 
 | 
| 348 | 
  | 
  | 
        virtual RefToken getTokenObject() const | 
| 349 | 
  | 
  | 
        { | 
| 350 | 
  | 
  | 
                return _returnToken; | 
| 351 | 
  | 
  | 
        } | 
| 352 | 
  | 
  | 
 | 
| 353 | 
  | 
  | 
        /** Used to keep track of line breaks, needs to be called from | 
| 354 | 
  | 
  | 
         * within generated lexers when a \n \r is encountered. | 
| 355 | 
  | 
  | 
         */ | 
| 356 | 
  | 
  | 
        virtual void newline() | 
| 357 | 
  | 
  | 
        { | 
| 358 | 
  | 
  | 
                ++inputState->line; | 
| 359 | 
  | 
  | 
                inputState->column = 1; | 
| 360 | 
  | 
  | 
        } | 
| 361 | 
  | 
  | 
 | 
| 362 | 
  | 
  | 
        /** Advance the current column number by an appropriate amount according | 
| 363 | 
  | 
  | 
         * to the tabsize. This method needs to be explicitly called from the | 
| 364 | 
  | 
  | 
         * lexer rules encountering tabs. | 
| 365 | 
  | 
  | 
         */ | 
| 366 | 
  | 
  | 
        virtual void tab() | 
| 367 | 
  | 
  | 
        { | 
| 368 | 
  | 
  | 
                int c = getColumn(); | 
| 369 | 
  | 
  | 
                int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop | 
| 370 | 
  | 
  | 
                setColumn( nc ); | 
| 371 | 
  | 
  | 
        } | 
| 372 | 
  | 
  | 
        /// set the tabsize. Returns the old tabsize | 
| 373 | 
  | 
  | 
        int setTabsize( int size ) | 
| 374 | 
  | 
  | 
        { | 
| 375 | 
  | 
  | 
                int oldsize = tabsize; | 
| 376 | 
  | 
  | 
                tabsize = size; | 
| 377 | 
  | 
  | 
                return oldsize; | 
| 378 | 
  | 
  | 
        } | 
| 379 | 
  | 
  | 
        /// Return the tabsize used by the scanner | 
| 380 | 
  | 
  | 
        int getTabSize() const | 
| 381 | 
  | 
  | 
        { | 
| 382 | 
  | 
  | 
                return tabsize; | 
| 383 | 
  | 
  | 
        } | 
| 384 | 
  | 
  | 
 | 
| 385 | 
  | 
  | 
        /** Report exception errors caught in nextToken() */ | 
| 386 | 
  | 
  | 
        virtual void reportError(const RecognitionException& e); | 
| 387 | 
  | 
  | 
 | 
| 388 | 
  | 
  | 
        /** Parser error-reporting function can be overridden in subclass */ | 
| 389 | 
  | 
  | 
        virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s); | 
| 390 | 
  | 
  | 
 | 
| 391 | 
  | 
  | 
        /** Parser warning-reporting function can be overridden in subclass */ | 
| 392 | 
  | 
  | 
        virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s); | 
| 393 | 
  | 
  | 
 | 
| 394 | 
  | 
  | 
        virtual InputBuffer& getInputBuffer() | 
| 395 | 
  | 
  | 
        { | 
| 396 | 
  | 
  | 
                return inputState->getInput(); | 
| 397 | 
  | 
  | 
        } | 
| 398 | 
  | 
  | 
 | 
| 399 | 
  | 
  | 
        virtual LexerSharedInputState getInputState() | 
| 400 | 
  | 
  | 
        { | 
| 401 | 
  | 
  | 
                return inputState; | 
| 402 | 
  | 
  | 
        } | 
| 403 | 
  | 
  | 
 | 
| 404 | 
  | 
  | 
        /** set the input state for the lexer. | 
| 405 | 
  | 
  | 
         * @note state is a reference counted object, hence no reference */ | 
| 406 | 
  | 
  | 
        virtual void setInputState(LexerSharedInputState state) | 
| 407 | 
  | 
  | 
        { | 
| 408 | 
  | 
  | 
                inputState = state; | 
| 409 | 
  | 
  | 
        } | 
| 410 | 
  | 
  | 
 | 
| 411 | 
  | 
  | 
        /// Set the factory for created tokens | 
| 412 | 
  | 
  | 
        virtual void setTokenObjectFactory(factory_type factory) | 
| 413 | 
  | 
  | 
        { | 
| 414 | 
  | 
  | 
                tokenFactory = factory; | 
| 415 | 
  | 
  | 
        } | 
| 416 | 
  | 
  | 
 | 
| 417 | 
  | 
  | 
        /** Test the token text against the literals table | 
| 418 | 
  | 
  | 
         * Override this method to perform a different literals test | 
| 419 | 
  | 
  | 
         */ | 
| 420 | 
  | 
  | 
        virtual int testLiteralsTable(int ttype) const | 
| 421 | 
  | 
  | 
        { | 
| 422 | 
  | 
  | 
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text); | 
| 423 | 
  | 
  | 
                if (i != literals.end()) | 
| 424 | 
  | 
  | 
                        ttype = (*i).second; | 
| 425 | 
  | 
  | 
                return ttype; | 
| 426 | 
  | 
  | 
        } | 
| 427 | 
  | 
  | 
 | 
| 428 | 
  | 
  | 
        /** Test the text passed in against the literals table | 
| 429 | 
  | 
  | 
         * Override this method to perform a different literals test | 
| 430 | 
  | 
  | 
         * This is used primarily when you want to test a portion of | 
| 431 | 
  | 
  | 
         * a token | 
| 432 | 
  | 
  | 
         */ | 
| 433 | 
  | 
  | 
        virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const | 
| 434 | 
  | 
  | 
        { | 
| 435 | 
  | 
  | 
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt); | 
| 436 | 
  | 
  | 
                if (i != literals.end()) | 
| 437 | 
  | 
  | 
                        ttype = (*i).second; | 
| 438 | 
  | 
  | 
                return ttype; | 
| 439 | 
  | 
  | 
        } | 
| 440 | 
  | 
  | 
 | 
| 441 | 
  | 
  | 
        /// Override this method to get more specific case handling | 
| 442 | 
  | 
  | 
        virtual int toLower(int c) const | 
| 443 | 
  | 
  | 
        { | 
| 444 | 
  | 
  | 
                // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?) | 
| 445 | 
  | 
  | 
                // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix) | 
| 446 | 
  | 
  | 
                // this one is more structural. Maybe make this configurable. | 
| 447 | 
  | 
  | 
                return (c == EOF_CHAR ? EOF_CHAR : tolower(c)); | 
| 448 | 
  | 
  | 
        } | 
| 449 | 
  | 
  | 
 | 
| 450 | 
  | 
  | 
        /** This method is called by YourLexer::nextToken() when the lexer has | 
| 451 | 
  | 
  | 
         *  hit EOF condition.  EOF is NOT a character. | 
| 452 | 
  | 
  | 
         *  This method is not called if EOF is reached during | 
| 453 | 
  | 
  | 
         *  syntactic predicate evaluation or during evaluation | 
| 454 | 
  | 
  | 
         *  of normal lexical rules, which presumably would be | 
| 455 | 
  | 
  | 
         *  an IOException.  This traps the "normal" EOF condition. | 
| 456 | 
  | 
  | 
         * | 
| 457 | 
  | 
  | 
         *  uponEOF() is called after the complete evaluation of | 
| 458 | 
  | 
  | 
         *  the previous token and only if your parser asks | 
| 459 | 
  | 
  | 
         *  for another token beyond that last non-EOF token. | 
| 460 | 
  | 
  | 
         * | 
| 461 | 
  | 
  | 
         *  You might want to throw token or char stream exceptions | 
| 462 | 
  | 
  | 
         *  like: "Heh, premature eof" or a retry stream exception | 
| 463 | 
  | 
  | 
         *  ("I found the end of this file, go back to referencing file"). | 
| 464 | 
  | 
  | 
         */ | 
| 465 | 
  | 
  | 
        virtual void uponEOF() | 
| 466 | 
  | 
  | 
        { | 
| 467 | 
  | 
  | 
        } | 
| 468 | 
  | 
  | 
 | 
| 469 | 
  | 
  | 
        /// Methods used to change tracing behavior | 
| 470 | 
  | 
  | 
        virtual void traceIndent(); | 
| 471 | 
  | 
  | 
        virtual void traceIn(const char* rname); | 
| 472 | 
  | 
  | 
        virtual void traceOut(const char* rname); | 
| 473 | 
  | 
  | 
 | 
| 474 | 
  | 
  | 
#ifndef NO_STATIC_CONSTS | 
| 475 | 
  | 
  | 
        static const int EOF_CHAR = EOF; | 
| 476 | 
  | 
  | 
#else | 
| 477 | 
  | 
  | 
        enum { | 
| 478 | 
  | 
  | 
                EOF_CHAR = EOF | 
| 479 | 
  | 
  | 
        }; | 
| 480 | 
  | 
  | 
#endif | 
| 481 | 
  | 
  | 
protected: | 
| 482 | 
  | 
  | 
        ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token | 
| 483 | 
  | 
  | 
        /// flag indicating wether consume saves characters | 
| 484 | 
  | 
  | 
        bool saveConsumedInput; | 
| 485 | 
  | 
  | 
        factory_type tokenFactory;                              ///< Factory for tokens | 
| 486 | 
  | 
  | 
        bool caseSensitive;                                             ///< Is this lexer case sensitive | 
| 487 | 
  | 
  | 
        ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass | 
| 488 | 
  | 
  | 
 | 
| 489 | 
  | 
  | 
        RefToken _returnToken;          ///< used to return tokens w/o using return val | 
| 490 | 
  | 
  | 
 | 
| 491 | 
  | 
  | 
        /// Input state, gives access to input stream, shared among different lexers | 
| 492 | 
  | 
  | 
        LexerSharedInputState inputState; | 
| 493 | 
  | 
  | 
 | 
| 494 | 
  | 
  | 
        /** Used during filter mode to indicate that path is desired. | 
| 495 | 
  | 
  | 
         * A subsequent scan error will report an error as usual | 
| 496 | 
  | 
  | 
         * if acceptPath=true; | 
| 497 | 
  | 
  | 
         */ | 
| 498 | 
  | 
  | 
        bool commitToPath; | 
| 499 | 
  | 
  | 
 | 
| 500 | 
  | 
  | 
        int tabsize;    ///< tab size the scanner uses. | 
| 501 | 
  | 
  | 
 | 
| 502 | 
  | 
  | 
        /// Create a new RefToken of type t | 
| 503 | 
  | 
  | 
        virtual RefToken makeToken(int t) | 
| 504 | 
  | 
  | 
        { | 
| 505 | 
  | 
  | 
                RefToken tok = tokenFactory(); | 
| 506 | 
  | 
  | 
                tok->setType(t); | 
| 507 | 
  | 
  | 
                tok->setColumn(inputState->tokenStartColumn); | 
| 508 | 
  | 
  | 
                tok->setLine(inputState->tokenStartLine); | 
| 509 | 
  | 
  | 
                return tok; | 
| 510 | 
  | 
  | 
        } | 
| 511 | 
  | 
  | 
 | 
| 512 | 
  | 
  | 
        /** Tracer class, used when -traceLexer is passed to antlr | 
| 513 | 
  | 
  | 
         */ | 
| 514 | 
  | 
  | 
        class Tracer { | 
| 515 | 
  | 
  | 
        private: | 
| 516 | 
  | 
  | 
                CharScanner* parser; | 
| 517 | 
  | 
  | 
                const char* text; | 
| 518 | 
  | 
  | 
 | 
| 519 | 
  | 
  | 
                Tracer(const Tracer& other);                                    // undefined | 
| 520 | 
  | 
  | 
                Tracer& operator=(const Tracer& other);         // undefined | 
| 521 | 
  | 
  | 
        public: | 
| 522 | 
  | 
  | 
                Tracer( CharScanner* p,const char* t ) | 
| 523 | 
  | 
  | 
                : parser(p), text(t) | 
| 524 | 
  | 
  | 
                { | 
| 525 | 
  | 
  | 
                        parser->traceIn(text); | 
| 526 | 
  | 
  | 
                } | 
| 527 | 
  | 
  | 
                ~Tracer() | 
| 528 | 
  | 
  | 
                { | 
| 529 | 
  | 
  | 
                        parser->traceOut(text); | 
| 530 | 
  | 
  | 
                } | 
| 531 | 
  | 
  | 
        }; | 
| 532 | 
  | 
  | 
 | 
| 533 | 
  | 
  | 
        int traceDepth; | 
| 534 | 
  | 
  | 
private: | 
| 535 | 
  | 
  | 
        CharScanner( const CharScanner& other );                                        // undefined | 
| 536 | 
  | 
  | 
        CharScanner& operator=( const CharScanner& other );     // undefined | 
| 537 | 
  | 
  | 
 | 
| 538 | 
  | 
  | 
#ifndef NO_STATIC_CONSTS | 
| 539 | 
  | 
  | 
        static const int NO_CHAR = 0; | 
| 540 | 
  | 
  | 
#else | 
| 541 | 
  | 
  | 
        enum { | 
| 542 | 
  | 
  | 
                NO_CHAR = 0 | 
| 543 | 
  | 
  | 
        }; | 
| 544 | 
  | 
  | 
#endif | 
| 545 | 
  | 
  | 
}; | 
| 546 | 
  | 
  | 
 | 
| 547 | 
  | 
  | 
inline int CharScanner::LA(unsigned int i) | 
| 548 | 
  | 
  | 
{ | 
| 549 | 
  | 
  | 
        int c = inputState->getInput().LA(i); | 
| 550 | 
  | 
  | 
 | 
| 551 | 
  | 
  | 
        if ( caseSensitive ) | 
| 552 | 
  | 
  | 
                return c; | 
| 553 | 
  | 
  | 
        else | 
| 554 | 
  | 
  | 
                return toLower(c);      // VC 6 tolower bug caught in toLower. | 
| 555 | 
  | 
  | 
} | 
| 556 | 
  | 
  | 
 | 
| 557 | 
  | 
  | 
inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const | 
| 558 | 
  | 
  | 
{ | 
| 559 | 
  | 
  | 
        if (scanner->getCaseSensitiveLiterals()) | 
| 560 | 
  | 
  | 
                return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y); | 
| 561 | 
  | 
  | 
        else | 
| 562 | 
  | 
  | 
        { | 
| 563 | 
  | 
  | 
#ifdef NO_STRCASECMP | 
| 564 | 
  | 
  | 
                return (stricmp(x.c_str(),y.c_str())<0); | 
| 565 | 
  | 
  | 
#else | 
| 566 | 
  | 
  | 
                return (strcasecmp(x.c_str(),y.c_str())<0); | 
| 567 | 
  | 
  | 
#endif | 
| 568 | 
  | 
  | 
        } | 
| 569 | 
  | 
  | 
} | 
| 570 | 
  | 
  | 
 | 
| 571 | 
  | 
  | 
#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE | 
| 572 | 
  | 
  | 
} | 
| 573 | 
  | 
  | 
#endif | 
| 574 | 
  | 
  | 
 | 
| 575 | 
  | 
  | 
#endif //INC_CharScanner_hpp__ |