src/antlr/CharScanner.hpp

#ifndef INC_CharScanner_hpp__
#define INC_CharScanner_hpp__

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 * $Id: CharScanner.hpp,v 1.1 2005-12-02 15:38:02 tim Exp $
 */

#include <antlr/config.hpp>

#include <map>

#ifdef HAS_NOT_CCTYPE_H
#include <ctype.h>
#else
#include <cctype>
#endif

#if ( _MSC_VER == 1200 )
// VC6 seems to need this
// note that this is not a standard C++ include file.
# include <stdio.h>
#endif

#include <antlr/TokenStream.hpp>
#include <antlr/RecognitionException.hpp>
#include <antlr/SemanticException.hpp>
#include <antlr/MismatchedCharException.hpp>
#include <antlr/InputBuffer.hpp>
#include <antlr/BitSet.hpp>
#include <antlr/LexerSharedInputState.hpp>

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
namespace antlr {
#endif

class ANTLR_API CharScanner;

ANTLR_C_USING(tolower)

#ifdef ANTLR_REALLY_NO_STRCASECMP
// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
// on the mac has neither...
inline int strcasecmp(const char *s1, const char *s2)
{
        while (true)
        {
                char  c1 = tolower(*s1++),
                                c2 = tolower(*s2++);
                if (c1 < c2) return -1;
                if (c1 > c2) return 1;
                if (c1 == 0) return 0;
        }
}
#else
#ifdef NO_STRCASECMP
ANTLR_C_USING(stricmp)
#else
ANTLR_C_USING(strcasecmp)
#endif
#endif

/** Functor for the literals map
 */
class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
private:
        const CharScanner* scanner;
public:
#ifdef NO_TEMPLATE_PARTS
        CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
#endif
        CharScannerLiteralsLess(const CharScanner* theScanner)
        : scanner(theScanner)
        {
        }
        bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
// defaults are good enough..
        //      CharScannerLiteralsLess(const CharScannerLiteralsLess&);
        //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
};

/** Superclass of generated lexers
 */
class ANTLR_API CharScanner : public TokenStream {
protected:
        typedef RefToken (*factory_type)();
public:
        CharScanner(InputBuffer& cb, bool case_sensitive );
        CharScanner(InputBuffer* cb, bool case_sensitive );
        CharScanner(const LexerSharedInputState& state, bool case_sensitive );

        virtual ~CharScanner()
        {
        }

        virtual int LA(unsigned int i);

        virtual void append(char c)
        {
                if (saveConsumedInput)
                {
                        size_t l = text.length();

                        if ((l%256) == 0)
                                text.reserve(l+256);

                        text.replace(l,0,&c,1);
                }
        }

        virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                if( saveConsumedInput )
                        text += s;
        }

        virtual void commit()
        {
                inputState->getInput().commit();
        }

        virtual void consume()
        {
                if (inputState->guessing == 0)
                {
                        int c = LA(1);
                        if (caseSensitive)
                        {
                                append(c);
                        }
                        else
                        {
                                // use input.LA(), not LA(), to get original case
                                // CharScanner.LA() would toLower it.
                                append(inputState->getInput().LA(1));
                        }

                        // RK: in a sense I don't like this automatic handling.
                        if (c == '\t')
                                tab();
                        else
                                inputState->column++;
                }
                inputState->getInput().consume();
        }

        /** Consume chars until one matches the given char */
        virtual void consumeUntil(int c)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || la_1 == c )
                                break;
                        consume();
                }
        }

        /** Consume chars until one matches the given set */
        virtual void consumeUntil(const BitSet& set)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || set.member(la_1) )
                                break;
                        consume();
                }
        }

        /// Mark the current position and return a id for it
        virtual unsigned int mark()
        {
                return inputState->getInput().mark();
        }
        /// Rewind the scanner to a previously marked position
        virtual void rewind(unsigned int pos)
        {
                inputState->getInput().rewind(pos);
        }

        /// See if input contains character 'c' throw MismatchedCharException if not
        virtual void match(int c)
        {
                int la_1 = LA(1);
                if ( la_1 != c )
                        throw MismatchedCharException(la_1, c, false, this);
                consume();
        }

        /** See if input contains element from bitset b
         * throw MismatchedCharException if not
         */
        virtual void match(const BitSet& b)
        {
                int la_1 = LA(1);

                if ( !b.member(la_1) )
                        throw MismatchedCharException( la_1, b, false, this );
                consume();
        }

        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match( const char* s )
        {
                while( *s != '\0' )
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (*s++ & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                size_t len = s.length();

                for (size_t i = 0; i < len; i++)
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (s[i] & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input does not contain character 'c'
         * throw MismatchedCharException if not
         */
        virtual void matchNot(int c)
        {
                int la_1 = LA(1);

                if ( la_1 == c )
                        throw MismatchedCharException(la_1, c, true, this);

                consume();
        }
        /** See if input contains character in range c1-c2
         * throw MismatchedCharException if not
         */
        virtual void matchRange(int c1, int c2)
        {
                int la_1 = LA(1);

                if ( la_1 < c1 || la_1 > c2 )
                        throw MismatchedCharException(la_1, c1, c2, false, this);

                consume();
        }

        virtual bool getCaseSensitive() const
        {
                return caseSensitive;
        }

        virtual void setCaseSensitive(bool t)
        {
                caseSensitive = t;
        }

        virtual bool getCaseSensitiveLiterals() const=0;

        /// Get the line the scanner currently is in (starts at 1)
        virtual int getLine() const
        {
                return inputState->line;
        }

        /// set the line number
        virtual void setLine(int l)
        {
                inputState->line = l;
        }

        /// Get the column the scanner currently is in (starts at 1)
        virtual int getColumn() const
        {
                return inputState->column;
        }
        /// set the column number
        virtual void setColumn(int c)
        {
                inputState->column = c;
        }

        /// get the filename for the file currently used
        virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
        {
                return inputState->filename;
        }
        /// Set the filename the scanner is using (used in error messages)
        virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
        {
                inputState->filename = f;
        }

        virtual bool getCommitToPath() const
        {
                return commitToPath;
        }

        virtual void setCommitToPath(bool commit)
        {
                commitToPath = commit;
        }

        /** return a copy of the current text buffer */
        virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
        {
                return text;
        }

        virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                text = s;
        }

        virtual void resetText()
        {
                text = "";
                inputState->tokenStartColumn = inputState->column;
                inputState->tokenStartLine = inputState->line;
        }

        virtual RefToken getTokenObject() const
        {
                return _returnToken;
        }

        /** Used to keep track of line breaks, needs to be called from
         * within generated lexers when a \n \r is encountered.
         */
        virtual void newline()
        {
                ++inputState->line;
                inputState->column = 1;
        }

        /** Advance the current column number by an appropriate amount according
         * to the tabsize. This method needs to be explicitly called from the
         * lexer rules encountering tabs.
         */
        virtual void tab()
        {
                int c = getColumn();
                int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
                setColumn( nc );
        }
        /// set the tabsize. Returns the old tabsize
        int setTabsize( int size )
        {
                int oldsize = tabsize;
                tabsize = size;
                return oldsize;
        }
        /// Return the tabsize used by the scanner
        int getTabSize() const
        {
                return tabsize;
        }

        /** Report exception errors caught in nextToken() */
        virtual void reportError(const RecognitionException& e);

        /** Parser error-reporting function can be overridden in subclass */
        virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);

        /** Parser warning-reporting function can be overridden in subclass */
        virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);

        virtual InputBuffer& getInputBuffer()
        {
                return inputState->getInput();
        }

        virtual LexerSharedInputState getInputState()
        {
                return inputState;
        }

        /** set the input state for the lexer.
         * @note state is a reference counted object, hence no reference */
        virtual void setInputState(LexerSharedInputState state)
        {
                inputState = state;
        }

        /// Set the factory for created tokens
        virtual void setTokenObjectFactory(factory_type factory)
        {
                tokenFactory = factory;
        }

        /** Test the token text against the literals table
         * Override this method to perform a different literals test
         */
        virtual int testLiteralsTable(int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /** Test the text passed in against the literals table
         * Override this method to perform a different literals test
         * This is used primarily when you want to test a portion of
         * a token
         */
        virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /// Override this method to get more specific case handling
        virtual int toLower(int c) const
        {
                // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
                // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
                // this one is more structural. Maybe make this configurable.
                return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
        }

        /** This method is called by YourLexer::nextToken() when the lexer has
         *  hit EOF condition.  EOF is NOT a character.
         *  This method is not called if EOF is reached during
         *  syntactic predicate evaluation or during evaluation
         *  of normal lexical rules, which presumably would be
         *  an IOException.  This traps the "normal" EOF condition.
         *
         *  uponEOF() is called after the complete evaluation of
         *  the previous token and only if your parser asks
         *  for another token beyond that last non-EOF token.
         *
         *  You might want to throw token or char stream exceptions
         *  like: "Heh, premature eof" or a retry stream exception
         *  ("I found the end of this file, go back to referencing file").
         */
        virtual void uponEOF()
        {
        }

        /// Methods used to change tracing behavior
        virtual void traceIndent();
        virtual void traceIn(const char* rname);
        virtual void traceOut(const char* rname);

#ifndef NO_STATIC_CONSTS
        static const int EOF_CHAR = EOF;
#else
        enum {
                EOF_CHAR = EOF
        };
#endif
protected:
        ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
        /// flag indicating wether consume saves characters
        bool saveConsumedInput;
        factory_type tokenFactory;                              ///< Factory for tokens
        bool caseSensitive;                                             ///< Is this lexer case sensitive
        ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass

        RefToken _returnToken;          ///< used to return tokens w/o using return val

        /// Input state, gives access to input stream, shared among different lexers
        LexerSharedInputState inputState;

        /** Used during filter mode to indicate that path is desired.
         * A subsequent scan error will report an error as usual
         * if acceptPath=true;
         */
        bool commitToPath;

        int tabsize;    ///< tab size the scanner uses.

        /// Create a new RefToken of type t
        virtual RefToken makeToken(int t)
        {
                RefToken tok = tokenFactory();
                tok->setType(t);
                tok->setColumn(inputState->tokenStartColumn);
                tok->setLine(inputState->tokenStartLine);
                return tok;
        }

        /** Tracer class, used when -traceLexer is passed to antlr
         */
        class Tracer {
        private:
                CharScanner* parser;
                const char* text;

                Tracer(const Tracer& other);                                    // undefined
                Tracer& operator=(const Tracer& other);         // undefined
        public:
                Tracer( CharScanner* p,const char* t )
                : parser(p), text(t)
                {
                        parser->traceIn(text);
                }
                ~Tracer()
                {
                        parser->traceOut(text);
                }
        };

        int traceDepth;
private:
        CharScanner( const CharScanner& other );                                        // undefined
        CharScanner& operator=( const CharScanner& other );     // undefined

#ifndef NO_STATIC_CONSTS
        static const int NO_CHAR = 0;
#else
        enum {
                NO_CHAR = 0
        };
#endif
};

inline int CharScanner::LA(unsigned int i)
{
        int c = inputState->getInput().LA(i);

        if ( caseSensitive )
                return c;
        else
                return toLower(c);      // VC 6 tolower bug caught in toLower.
}

inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
{
        if (scanner->getCaseSensitiveLiterals())
                return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
        else
        {
#ifdef NO_STRCASECMP
                return (stricmp(x.c_str(),y.c_str())<0);
#else
                return (strcasecmp(x.c_str(),y.c_str())<0);
#endif
        }
}

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
}
#endif

#endif //INC_CharScanner_hpp__
Revision:	770
Committed:	Fri Dec 2 15:38:03 2005 UTC (19 years, 10 months ago) by tim
File size:	13486 byte(s)
Log Message:	End of the Link --> List Return of the Oject-Oriented replace yacc/lex parser with antlr parser
#	User	Rev	Content
1	tim	770	#ifndef INC_CharScanner_hpp__
2			#define INC_CharScanner_hpp__
3
4			/* ANTLR Translator Generator
5			* Project led by Terence Parr at http://www.jGuru.com
6			* Software rights: http://www.antlr.org/license.html
7			*
8			* $Id: CharScanner.hpp,v 1.1 2005-12-02 15:38:02 tim Exp $
9			*/
10
11			#include <antlr/config.hpp>
12
13			#include <map>
14
15			#ifdef HAS_NOT_CCTYPE_H
16			#include <ctype.h>
17			#else
18			#include <cctype>
19			#endif
20
21			#if ( _MSC_VER == 1200 )
22			// VC6 seems to need this
23			// note that this is not a standard C++ include file.
24			# include <stdio.h>
25			#endif
26
27			#include <antlr/TokenStream.hpp>
28			#include <antlr/RecognitionException.hpp>
29			#include <antlr/SemanticException.hpp>
30			#include <antlr/MismatchedCharException.hpp>
31			#include <antlr/InputBuffer.hpp>
32			#include <antlr/BitSet.hpp>
33			#include <antlr/LexerSharedInputState.hpp>
34
35			#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
36			namespace antlr {
37			#endif
38
39			class ANTLR_API CharScanner;
40
41			ANTLR_C_USING(tolower)
42
43			#ifdef ANTLR_REALLY_NO_STRCASECMP
44			// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
45			// on the mac has neither...
46			inline int strcasecmp(const char s1, const char s2)
47			{
48			while (true)
49			{
50			char c1 = tolower(*s1++),
51			c2 = tolower(*s2++);
52			if (c1 < c2) return -1;
53			if (c1 > c2) return 1;
54			if (c1 == 0) return 0;
55			}
56			}
57			#else
58			#ifdef NO_STRCASECMP
59			ANTLR_C_USING(stricmp)
60			#else
61			ANTLR_C_USING(strcasecmp)
62			#endif
63			#endif
64
65			/** Functor for the literals map
66			*/
67			class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
68			private:
69			const CharScanner* scanner;
70			public:
71			#ifdef NO_TEMPLATE_PARTS
72			CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
73			#endif
74			CharScannerLiteralsLess(const CharScanner* theScanner)
75			: scanner(theScanner)
76			{
77			}
78			bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
79			// defaults are good enough..
80			// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
81			// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
82			};
83
84			/** Superclass of generated lexers
85			*/
86			class ANTLR_API CharScanner : public TokenStream {
87			protected:
88			typedef RefToken (*factory_type)();
89			public:
90			CharScanner(InputBuffer& cb, bool case_sensitive );
91			CharScanner(InputBuffer* cb, bool case_sensitive );
92			CharScanner(const LexerSharedInputState& state, bool case_sensitive );
93
94			virtual ~CharScanner()
95			{
96			}
97
98			virtual int LA(unsigned int i);
99
100			virtual void append(char c)
101			{
102			if (saveConsumedInput)
103			{
104			size_t l = text.length();
105
106			if ((l%256) == 0)
107			text.reserve(l+256);
108
109			text.replace(l,0,&c,1);
110			}
111			}
112
113			virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
114			{
115			if( saveConsumedInput )
116			text += s;
117			}
118
119			virtual void commit()
120			{
121			inputState->getInput().commit();
122			}
123
124			virtual void consume()
125			{
126			if (inputState->guessing == 0)
127			{
128			int c = LA(1);
129			if (caseSensitive)
130			{
131			append(c);
132			}
133			else
134			{
135			// use input.LA(), not LA(), to get original case
136			// CharScanner.LA() would toLower it.
137			append(inputState->getInput().LA(1));
138			}
139
140			// RK: in a sense I don't like this automatic handling.
141			if (c == '\t')
142			tab();
143			else
144			inputState->column++;
145			}
146			inputState->getInput().consume();
147			}
148
149			/** Consume chars until one matches the given char */
150			virtual void consumeUntil(int c)
151			{
152			for(;;)
153			{
154			int la_1 = LA(1);
155			if( la_1 == EOF_CHAR \|\| la_1 == c )
156			break;
157			consume();
158			}
159			}
160
161			/** Consume chars until one matches the given set */
162			virtual void consumeUntil(const BitSet& set)
163			{
164			for(;;)
165			{
166			int la_1 = LA(1);
167			if( la_1 == EOF_CHAR \|\| set.member(la_1) )
168			break;
169			consume();
170			}
171			}
172
173			/// Mark the current position and return a id for it
174			virtual unsigned int mark()
175			{
176			return inputState->getInput().mark();
177			}
178			/// Rewind the scanner to a previously marked position
179			virtual void rewind(unsigned int pos)
180			{
181			inputState->getInput().rewind(pos);
182			}
183
184			/// See if input contains character 'c' throw MismatchedCharException if not
185			virtual void match(int c)
186			{
187			int la_1 = LA(1);
188			if ( la_1 != c )
189			throw MismatchedCharException(la_1, c, false, this);
190			consume();
191			}
192
193			/** See if input contains element from bitset b
194			* throw MismatchedCharException if not
195			*/
196			virtual void match(const BitSet& b)
197			{
198			int la_1 = LA(1);
199
200			if ( !b.member(la_1) )
201			throw MismatchedCharException( la_1, b, false, this );
202			consume();
203			}
204
205			/** See if input contains string 's' throw MismatchedCharException if not
206			* @note the string cannot match EOF
207			*/
208			virtual void match( const char* s )
209			{
210			while( *s != '\0' )
211			{
212			// the & 0xFF is here to prevent sign extension lateron
213			int la_1 = LA(1), c = (*s++ & 0xFF);
214
215			if ( la_1 != c )
216			throw MismatchedCharException(la_1, c, false, this);
217
218			consume();
219			}
220			}
221			/** See if input contains string 's' throw MismatchedCharException if not
222			* @note the string cannot match EOF
223			*/
224			virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
225			{
226			size_t len = s.length();
227
228			for (size_t i = 0; i < len; i++)
229			{
230			// the & 0xFF is here to prevent sign extension lateron
231			int la_1 = LA(1), c = (s[i] & 0xFF);
232
233			if ( la_1 != c )
234			throw MismatchedCharException(la_1, c, false, this);
235
236			consume();
237			}
238			}
239			/** See if input does not contain character 'c'
240			* throw MismatchedCharException if not
241			*/
242			virtual void matchNot(int c)
243			{
244			int la_1 = LA(1);
245
246			if ( la_1 == c )
247			throw MismatchedCharException(la_1, c, true, this);
248
249			consume();
250			}
251			/** See if input contains character in range c1-c2
252			* throw MismatchedCharException if not
253			*/
254			virtual void matchRange(int c1, int c2)
255			{
256			int la_1 = LA(1);
257
258			if ( la_1 < c1 \|\| la_1 > c2 )
259			throw MismatchedCharException(la_1, c1, c2, false, this);
260
261			consume();
262			}
263
264			virtual bool getCaseSensitive() const
265			{
266			return caseSensitive;
267			}
268
269			virtual void setCaseSensitive(bool t)
270			{
271			caseSensitive = t;
272			}
273
274			virtual bool getCaseSensitiveLiterals() const=0;
275
276			/// Get the line the scanner currently is in (starts at 1)
277			virtual int getLine() const
278			{
279			return inputState->line;
280			}
281
282			/// set the line number
283			virtual void setLine(int l)
284			{
285			inputState->line = l;
286			}
287
288			/// Get the column the scanner currently is in (starts at 1)
289			virtual int getColumn() const
290			{
291			return inputState->column;
292			}
293			/// set the column number
294			virtual void setColumn(int c)
295			{
296			inputState->column = c;
297			}
298
299			/// get the filename for the file currently used
300			virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
301			{
302			return inputState->filename;
303			}
304			/// Set the filename the scanner is using (used in error messages)
305			virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
306			{
307			inputState->filename = f;
308			}
309
310			virtual bool getCommitToPath() const
311			{
312			return commitToPath;
313			}
314
315			virtual void setCommitToPath(bool commit)
316			{
317			commitToPath = commit;
318			}
319
320			/** return a copy of the current text buffer */
321			virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
322			{
323			return text;
324			}
325
326			virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
327			{
328			text = s;
329			}
330
331			virtual void resetText()
332			{
333			text = "";
334			inputState->tokenStartColumn = inputState->column;
335			inputState->tokenStartLine = inputState->line;
336			}
337
338			virtual RefToken getTokenObject() const
339			{
340			return _returnToken;
341			}
342
343			/** Used to keep track of line breaks, needs to be called from
344			* within generated lexers when a \n \r is encountered.
345			*/
346			virtual void newline()
347			{
348			++inputState->line;
349			inputState->column = 1;
350			}
351
352			/** Advance the current column number by an appropriate amount according
353			* to the tabsize. This method needs to be explicitly called from the
354			* lexer rules encountering tabs.
355			*/
356			virtual void tab()
357			{
358			int c = getColumn();
359			int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
360			setColumn( nc );
361			}
362			/// set the tabsize. Returns the old tabsize
363			int setTabsize( int size )
364			{
365			int oldsize = tabsize;
366			tabsize = size;
367			return oldsize;
368			}
369			/// Return the tabsize used by the scanner
370			int getTabSize() const
371			{
372			return tabsize;
373			}
374
375			/** Report exception errors caught in nextToken() */
376			virtual void reportError(const RecognitionException& e);
377
378			/** Parser error-reporting function can be overridden in subclass */
379			virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
380
381			/** Parser warning-reporting function can be overridden in subclass */
382			virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
383
384			virtual InputBuffer& getInputBuffer()
385			{
386			return inputState->getInput();
387			}
388
389			virtual LexerSharedInputState getInputState()
390			{
391			return inputState;
392			}
393
394			/** set the input state for the lexer.
395			* @note state is a reference counted object, hence no reference */
396			virtual void setInputState(LexerSharedInputState state)
397			{
398			inputState = state;
399			}
400
401			/// Set the factory for created tokens
402			virtual void setTokenObjectFactory(factory_type factory)
403			{
404			tokenFactory = factory;
405			}
406
407			/** Test the token text against the literals table
408			* Override this method to perform a different literals test
409			*/
410			virtual int testLiteralsTable(int ttype) const
411			{
412			ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
413			if (i != literals.end())
414			ttype = (*i).second;
415			return ttype;
416			}
417
418			/** Test the text passed in against the literals table
419			* Override this method to perform a different literals test
420			* This is used primarily when you want to test a portion of
421			* a token
422			*/
423			virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
424			{
425			ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
426			if (i != literals.end())
427			ttype = (*i).second;
428			return ttype;
429			}
430
431			/// Override this method to get more specific case handling
432			virtual int toLower(int c) const
433			{
434			// test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
435			// also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
436			// this one is more structural. Maybe make this configurable.
437			return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
438			}
439
440			/** This method is called by YourLexer::nextToken() when the lexer has
441			* hit EOF condition. EOF is NOT a character.
442			* This method is not called if EOF is reached during
443			* syntactic predicate evaluation or during evaluation
444			* of normal lexical rules, which presumably would be
445			* an IOException. This traps the "normal" EOF condition.
446			*
447			* uponEOF() is called after the complete evaluation of
448			* the previous token and only if your parser asks
449			* for another token beyond that last non-EOF token.
450			*
451			* You might want to throw token or char stream exceptions
452			* like: "Heh, premature eof" or a retry stream exception
453			* ("I found the end of this file, go back to referencing file").
454			*/
455			virtual void uponEOF()
456			{
457			}
458
459			/// Methods used to change tracing behavior
460			virtual void traceIndent();
461			virtual void traceIn(const char* rname);
462			virtual void traceOut(const char* rname);
463
464			#ifndef NO_STATIC_CONSTS
465			static const int EOF_CHAR = EOF;
466			#else
467			enum {
468			EOF_CHAR = EOF
469			};
470			#endif
471			protected:
472			ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
473			/// flag indicating wether consume saves characters
474			bool saveConsumedInput;
475			factory_type tokenFactory; ///< Factory for tokens
476			bool caseSensitive; ///< Is this lexer case sensitive
477			ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
478
479			RefToken _returnToken; ///< used to return tokens w/o using return val
480
481			/// Input state, gives access to input stream, shared among different lexers
482			LexerSharedInputState inputState;
483
484			/** Used during filter mode to indicate that path is desired.
485			* A subsequent scan error will report an error as usual
486			* if acceptPath=true;
487			*/
488			bool commitToPath;
489
490			int tabsize; ///< tab size the scanner uses.
491
492			/// Create a new RefToken of type t
493			virtual RefToken makeToken(int t)
494			{
495			RefToken tok = tokenFactory();
496			tok->setType(t);
497			tok->setColumn(inputState->tokenStartColumn);
498			tok->setLine(inputState->tokenStartLine);
499			return tok;
500			}
501
502			/** Tracer class, used when -traceLexer is passed to antlr
503			*/
504			class Tracer {
505			private:
506			CharScanner* parser;
507			const char* text;
508
509			Tracer(const Tracer& other); // undefined
510			Tracer& operator=(const Tracer& other); // undefined
511			public:
512			Tracer( CharScanner* p,const char* t )
513			: parser(p), text(t)
514			{
515			parser->traceIn(text);
516			}
517			~Tracer()
518			{
519			parser->traceOut(text);
520			}
521			};
522
523			int traceDepth;
524			private:
525			CharScanner( const CharScanner& other ); // undefined
526			CharScanner& operator=( const CharScanner& other ); // undefined
527
528			#ifndef NO_STATIC_CONSTS
529			static const int NO_CHAR = 0;
530			#else
531			enum {
532			NO_CHAR = 0
533			};
534			#endif
535			};
536
537			inline int CharScanner::LA(unsigned int i)
538			{
539			int c = inputState->getInput().LA(i);
540
541			if ( caseSensitive )
542			return c;
543			else
544			return toLower(c); // VC 6 tolower bug caught in toLower.
545			}
546
547			inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
548			{
549			if (scanner->getCaseSensitiveLiterals())
550			return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
551			else
552			{
553			#ifdef NO_STRCASECMP
554			return (stricmp(x.c_str(),y.c_str())<0);
555			#else
556			return (strcasecmp(x.c_str(),y.c_str())<0);
557			#endif
558			}
559			}
560
561			#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
562			}
563			#endif
564
565			#endif //INC_CharScanner_hpp__