src/antlr/CharScanner.hpp

#ifndef INC_CharScanner_hpp__
#define INC_CharScanner_hpp__

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 * $Id$
 */

#include <antlr/config.hpp>
#include <cstdio>
#include <map>
#include <cstring>

#ifdef HAS_NOT_CCTYPE_H
#include <ctype.h>
#else
#include <cctype>
#endif

#if ( _MSC_VER == 1200 )
// VC6 seems to need this
// note that this is not a standard C++ include file.
# include <stdio.h>
#endif

#include <antlr/TokenStream.hpp>
#include <antlr/RecognitionException.hpp>
#include <antlr/SemanticException.hpp>
#include <antlr/MismatchedCharException.hpp>
#include <antlr/InputBuffer.hpp>
#include <antlr/BitSet.hpp>
#include <antlr/LexerSharedInputState.hpp>

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
namespace antlr {
#endif

class ANTLR_API CharScanner;

ANTLR_C_USING(tolower)

#ifdef ANTLR_REALLY_NO_STRCASECMP
// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
// on the mac has neither...
inline int strcasecmp(const char *s1, const char *s2)
{
        while (true)
        {
                char  c1 = tolower(*s1++),
                                c2 = tolower(*s2++);
                if (c1 < c2) return -1;
                if (c1 > c2) return 1;
                if (c1 == 0) return 0;
        }
}
#else
#ifdef NO_STRCASECMP
ANTLR_C_USING(stricmp)
#else
ANTLR_C_USING(strcasecmp)
#endif
#endif

/** Functor for the literals map
 */
class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
private:
        const CharScanner* scanner;
public:
#ifdef NO_TEMPLATE_PARTS
        CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
#endif
        CharScannerLiteralsLess(const CharScanner* theScanner)
        : scanner(theScanner)
        {
        }
        bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
// defaults are good enough..
        //      CharScannerLiteralsLess(const CharScannerLiteralsLess&);
        //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
};

/** Superclass of generated lexers
 */
class ANTLR_API CharScanner : public TokenStream {
protected:
        typedef RefToken (*factory_type)();
public:
        CharScanner(InputBuffer& cb, bool case_sensitive );
        CharScanner(InputBuffer* cb, bool case_sensitive );
        CharScanner(const LexerSharedInputState& state, bool case_sensitive );

        virtual ~CharScanner()
        {
        }

        virtual int LA(unsigned int i);

        virtual void append(char c)
        {
                if (saveConsumedInput)
                {
                        size_t l = text.length();

                        if ((l%256) == 0)
                                text.reserve(l+256);

                        text.replace(l,0,&c,1);
                }
        }

        virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                if( saveConsumedInput )
                        text += s;
        }

        virtual void commit()
        {
                inputState->getInput().commit();
        }

        /** called by the generated lexer to do error recovery, override to
         * customize the behaviour.
         */
        virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
        {
                consume();
                consumeUntil(tokenSet);
        }

        virtual void consume()
        {
                if (inputState->guessing == 0)
                {
                        int c = LA(1);
                        if (caseSensitive)
                        {
                                append(c);
                        }
                        else
                        {
                                // use input.LA(), not LA(), to get original case
                                // CharScanner.LA() would toLower it.
                                append(inputState->getInput().LA(1));
                        }

                        // RK: in a sense I don't like this automatic handling.
                        if (c == '\t')
                                tab();
                        else
                                inputState->column++;
                }
                inputState->getInput().consume();
        }

        /** Consume chars until one matches the given char */
        virtual void consumeUntil(int c)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || la_1 == c )
                                break;
                        consume();
                }
        }

        /** Consume chars until one matches the given set */
        virtual void consumeUntil(const BitSet& set)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || set.member(la_1) )
                                break;
                        consume();
                }
        }

        /// Mark the current position and return a id for it
        virtual unsigned int mark()
        {
                return inputState->getInput().mark();
        }
        /// Rewind the scanner to a previously marked position
        virtual void rewind(unsigned int pos)
        {
                inputState->getInput().rewind(pos);
        }

        /// See if input contains character 'c' throw MismatchedCharException if not
        virtual void match(int c)
        {
                int la_1 = LA(1);
                if ( la_1 != c )
                        throw MismatchedCharException(la_1, c, false, this);
                consume();
        }

        /** See if input contains element from bitset b
         * throw MismatchedCharException if not
         */
        virtual void match(const BitSet& b)
        {
                int la_1 = LA(1);

                if ( !b.member(la_1) )
                        throw MismatchedCharException( la_1, b, false, this );
                consume();
        }

        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match( const char* s )
        {
                while( *s != '\0' )
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (*s++ & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                size_t len = s.length();

                for (size_t i = 0; i < len; i++)
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (s[i] & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input does not contain character 'c'
         * throw MismatchedCharException if not
         */
        virtual void matchNot(int c)
        {
                int la_1 = LA(1);

                if ( la_1 == c )
                        throw MismatchedCharException(la_1, c, true, this);

                consume();
        }
        /** See if input contains character in range c1-c2
         * throw MismatchedCharException if not
         */
        virtual void matchRange(int c1, int c2)
        {
                int la_1 = LA(1);

                if ( la_1 < c1 || la_1 > c2 )
                        throw MismatchedCharException(la_1, c1, c2, false, this);

                consume();
        }

        virtual bool getCaseSensitive() const
        {
                return caseSensitive;
        }

        virtual void setCaseSensitive(bool t)
        {
                caseSensitive = t;
        }

        virtual bool getCaseSensitiveLiterals() const=0;

        /// Get the line the scanner currently is in (starts at 1)
        virtual int getLine() const
        {
                return inputState->line;
        }

        /// set the line number
        virtual void setLine(int l)
        {
                inputState->line = l;
        }

        /// Get the column the scanner currently is in (starts at 1)
        virtual int getColumn() const
        {
                return inputState->column;
        }
        /// set the column number
        virtual void setColumn(int c)
        {
                inputState->column = c;
        }

        /// get the filename for the file currently used
        virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
        {
                return inputState->filename;
        }
        /// Set the filename the scanner is using (used in error messages)
        virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
        {
                inputState->filename = f;
        }

        virtual bool getCommitToPath() const
        {
                return commitToPath;
        }

        virtual void setCommitToPath(bool commit)
        {
                commitToPath = commit;
        }

        /** return a copy of the current text buffer */
        virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
        {
                return text;
        }

        virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                text = s;
        }

        virtual void resetText()
        {
                text = "";
                inputState->tokenStartColumn = inputState->column;
                inputState->tokenStartLine = inputState->line;
        }

        virtual RefToken getTokenObject() const
        {
                return _returnToken;
        }

        /** Used to keep track of line breaks, needs to be called from
         * within generated lexers when a \n \r is encountered.
         */
        virtual void newline()
        {
                ++inputState->line;
                inputState->column = 1;
        }

        /** Advance the current column number by an appropriate amount according
         * to the tabsize. This method needs to be explicitly called from the
         * lexer rules encountering tabs.
         */
        virtual void tab()
        {
                int c = getColumn();
                int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
                setColumn( nc );
        }
        /// set the tabsize. Returns the old tabsize
        int setTabsize( int size )
        {
                int oldsize = tabsize;
                tabsize = size;
                return oldsize;
        }
        /// Return the tabsize used by the scanner
        int getTabSize() const
        {
                return tabsize;
        }

        /** Report exception errors caught in nextToken() */
        virtual void reportError(const RecognitionException& e);

        /** Parser error-reporting function can be overridden in subclass */
        virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);

        /** Parser warning-reporting function can be overridden in subclass */
        virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);

        virtual InputBuffer& getInputBuffer()
        {
                return inputState->getInput();
        }

        virtual LexerSharedInputState getInputState()
        {
                return inputState;
        }

        /** set the input state for the lexer.
         * @note state is a reference counted object, hence no reference */
        virtual void setInputState(LexerSharedInputState state)
        {
                inputState = state;
        }

        /// Set the factory for created tokens
        virtual void setTokenObjectFactory(factory_type factory)
        {
                tokenFactory = factory;
        }

        /** Test the token text against the literals table
         * Override this method to perform a different literals test
         */
        virtual int testLiteralsTable(int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /** Test the text passed in against the literals table
         * Override this method to perform a different literals test
         * This is used primarily when you want to test a portion of
         * a token
         */
        virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /// Override this method to get more specific case handling
        virtual int toLower(int c) const
        {
                // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
                // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
                // this one is more structural. Maybe make this configurable.
                return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
        }

        /** This method is called by YourLexer::nextToken() when the lexer has
         *  hit EOF condition.  EOF is NOT a character.
         *  This method is not called if EOF is reached during
         *  syntactic predicate evaluation or during evaluation
         *  of normal lexical rules, which presumably would be
         *  an IOException.  This traps the "normal" EOF condition.
         *
         *  uponEOF() is called after the complete evaluation of
         *  the previous token and only if your parser asks
         *  for another token beyond that last non-EOF token.
         *
         *  You might want to throw token or char stream exceptions
         *  like: "Heh, premature eof" or a retry stream exception
         *  ("I found the end of this file, go back to referencing file").
         */
        virtual void uponEOF()
        {
        }

        /// Methods used to change tracing behavior
        virtual void traceIndent();
        virtual void traceIn(const char* rname);
        virtual void traceOut(const char* rname);

#ifndef NO_STATIC_CONSTS
        static const int EOF_CHAR = EOF;
#else
        enum {
                EOF_CHAR = EOF
        };
#endif
protected:
        ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
        /// flag indicating wether consume saves characters
        bool saveConsumedInput;
        factory_type tokenFactory;                              ///< Factory for tokens
        bool caseSensitive;                                             ///< Is this lexer case sensitive
        ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass

        RefToken _returnToken;          ///< used to return tokens w/o using return val

        /// Input state, gives access to input stream, shared among different lexers
        LexerSharedInputState inputState;

        /** Used during filter mode to indicate that path is desired.
         * A subsequent scan error will report an error as usual
         * if acceptPath=true;
         */
        bool commitToPath;

        int tabsize;    ///< tab size the scanner uses.

        /// Create a new RefToken of type t
        virtual RefToken makeToken(int t)
        {
                RefToken tok = tokenFactory();
                tok->setType(t);
                tok->setColumn(inputState->tokenStartColumn);
                tok->setLine(inputState->tokenStartLine);
                return tok;
        }

        /** Tracer class, used when -traceLexer is passed to antlr
         */
        class Tracer {
        private:
                CharScanner* parser;
                const char* text;

                Tracer(const Tracer& other);                                    // undefined
                Tracer& operator=(const Tracer& other);         // undefined
        public:
                Tracer( CharScanner* p,const char* t )
                : parser(p), text(t)
                {
                        parser->traceIn(text);
                }
                ~Tracer()
                {
                        parser->traceOut(text);
                }
        };

        int traceDepth;
private:
        CharScanner( const CharScanner& other );                                        // undefined
        CharScanner& operator=( const CharScanner& other );     // undefined

#ifndef NO_STATIC_CONSTS
        static const int NO_CHAR = 0;
#else
        enum {
                NO_CHAR = 0
        };
#endif
};

inline int CharScanner::LA(unsigned int i)
{
        int c = inputState->getInput().LA(i);

        if ( caseSensitive )
                return c;
        else
                return toLower(c);      // VC 6 tolower bug caught in toLower.
}

inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
{
        if (scanner->getCaseSensitiveLiterals())
                return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
        else
        {
#ifdef NO_STRCASECMP
                return (stricmp(x.c_str(),y.c_str())<0);
#else
                return (strcasecmp(x.c_str(),y.c_str())<0);
#endif
        }
}

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
}
#endif

#endif //INC_CharScanner_hpp__
Revision:	1782
Committed:	Wed Aug 22 02:28:28 2012 UTC (13 years, 2 months ago) by gezelter
File size:	13697 byte(s)
Log Message:	MERGE OpenMD development branch 1465:1781 into trunk
#	User	Rev	Content
1	tim	770	#ifndef INC_CharScanner_hpp__
2			#define INC_CharScanner_hpp__
3
4			/* ANTLR Translator Generator
5			* Project led by Terence Parr at http://www.jGuru.com
6			* Software rights: http://www.antlr.org/license.html
7			*
8	gezelter	1442	* $Id$
9	tim	770	*/
10
11			#include <antlr/config.hpp>
12	gezelter	1782	#include <cstdio>
13	tim	770	#include <map>
14	gezelter	1782	#include <cstring>
15	tim	770
16			#ifdef HAS_NOT_CCTYPE_H
17			#include <ctype.h>
18			#else
19			#include <cctype>
20			#endif
21
22			#if ( _MSC_VER == 1200 )
23			// VC6 seems to need this
24			// note that this is not a standard C++ include file.
25			# include <stdio.h>
26			#endif
27
28			#include <antlr/TokenStream.hpp>
29			#include <antlr/RecognitionException.hpp>
30			#include <antlr/SemanticException.hpp>
31			#include <antlr/MismatchedCharException.hpp>
32			#include <antlr/InputBuffer.hpp>
33			#include <antlr/BitSet.hpp>
34			#include <antlr/LexerSharedInputState.hpp>
35
36			#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
37			namespace antlr {
38			#endif
39
40			class ANTLR_API CharScanner;
41
42			ANTLR_C_USING(tolower)
43
44	gezelter	1558	#ifdef ANTLR_REALLY_NO_STRCASECMP
45			// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
46			// on the mac has neither...
47	tim	770	inline int strcasecmp(const char s1, const char s2)
48			{
49			while (true)
50			{
51			char c1 = tolower(*s1++),
52			c2 = tolower(*s2++);
53			if (c1 < c2) return -1;
54			if (c1 > c2) return 1;
55			if (c1 == 0) return 0;
56			}
57			}
58	gezelter	1558	#else
59			#ifdef NO_STRCASECMP
60			ANTLR_C_USING(stricmp)
61			#else
62			ANTLR_C_USING(strcasecmp)
63	tim	770	#endif
64	gezelter	1558	#endif
65	tim	770
66			/** Functor for the literals map
67			*/
68			class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
69			private:
70			const CharScanner* scanner;
71			public:
72			#ifdef NO_TEMPLATE_PARTS
73			CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
74			#endif
75			CharScannerLiteralsLess(const CharScanner* theScanner)
76			: scanner(theScanner)
77			{
78			}
79			bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
80			// defaults are good enough..
81			// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
82			// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
83			};
84
85			/** Superclass of generated lexers
86			*/
87			class ANTLR_API CharScanner : public TokenStream {
88			protected:
89			typedef RefToken (*factory_type)();
90			public:
91			CharScanner(InputBuffer& cb, bool case_sensitive );
92			CharScanner(InputBuffer* cb, bool case_sensitive );
93			CharScanner(const LexerSharedInputState& state, bool case_sensitive );
94
95			virtual ~CharScanner()
96			{
97			}
98
99			virtual int LA(unsigned int i);
100
101			virtual void append(char c)
102			{
103			if (saveConsumedInput)
104			{
105			size_t l = text.length();
106
107			if ((l%256) == 0)
108			text.reserve(l+256);
109
110			text.replace(l,0,&c,1);
111			}
112			}
113
114			virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
115			{
116			if( saveConsumedInput )
117			text += s;
118			}
119
120			virtual void commit()
121			{
122			inputState->getInput().commit();
123			}
124
125	gezelter	1558	/** called by the generated lexer to do error recovery, override to
126			* customize the behaviour.
127			*/
128			virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
129			{
130			consume();
131			consumeUntil(tokenSet);
132			}
133
134	tim	770	virtual void consume()
135			{
136			if (inputState->guessing == 0)
137			{
138			int c = LA(1);
139			if (caseSensitive)
140			{
141			append(c);
142			}
143			else
144			{
145			// use input.LA(), not LA(), to get original case
146			// CharScanner.LA() would toLower it.
147			append(inputState->getInput().LA(1));
148			}
149
150			// RK: in a sense I don't like this automatic handling.
151			if (c == '\t')
152			tab();
153			else
154			inputState->column++;
155			}
156			inputState->getInput().consume();
157			}
158
159			/** Consume chars until one matches the given char */
160			virtual void consumeUntil(int c)
161			{
162			for(;;)
163			{
164			int la_1 = LA(1);
165			if( la_1 == EOF_CHAR \|\| la_1 == c )
166			break;
167			consume();
168			}
169			}
170
171			/** Consume chars until one matches the given set */
172			virtual void consumeUntil(const BitSet& set)
173			{
174			for(;;)
175			{
176			int la_1 = LA(1);
177			if( la_1 == EOF_CHAR \|\| set.member(la_1) )
178			break;
179			consume();
180			}
181			}
182
183			/// Mark the current position and return a id for it
184			virtual unsigned int mark()
185			{
186			return inputState->getInput().mark();
187			}
188			/// Rewind the scanner to a previously marked position
189			virtual void rewind(unsigned int pos)
190			{
191			inputState->getInput().rewind(pos);
192			}
193
194			/// See if input contains character 'c' throw MismatchedCharException if not
195			virtual void match(int c)
196			{
197			int la_1 = LA(1);
198			if ( la_1 != c )
199			throw MismatchedCharException(la_1, c, false, this);
200			consume();
201			}
202
203			/** See if input contains element from bitset b
204			* throw MismatchedCharException if not
205			*/
206			virtual void match(const BitSet& b)
207			{
208			int la_1 = LA(1);
209
210			if ( !b.member(la_1) )
211			throw MismatchedCharException( la_1, b, false, this );
212			consume();
213			}
214
215			/** See if input contains string 's' throw MismatchedCharException if not
216			* @note the string cannot match EOF
217			*/
218			virtual void match( const char* s )
219			{
220			while( *s != '\0' )
221			{
222			// the & 0xFF is here to prevent sign extension lateron
223			int la_1 = LA(1), c = (*s++ & 0xFF);
224
225			if ( la_1 != c )
226			throw MismatchedCharException(la_1, c, false, this);
227
228			consume();
229			}
230			}
231			/** See if input contains string 's' throw MismatchedCharException if not
232			* @note the string cannot match EOF
233			*/
234			virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
235			{
236			size_t len = s.length();
237
238			for (size_t i = 0; i < len; i++)
239			{
240			// the & 0xFF is here to prevent sign extension lateron
241			int la_1 = LA(1), c = (s[i] & 0xFF);
242
243			if ( la_1 != c )
244			throw MismatchedCharException(la_1, c, false, this);
245
246			consume();
247			}
248			}
249			/** See if input does not contain character 'c'
250			* throw MismatchedCharException if not
251			*/
252			virtual void matchNot(int c)
253			{
254			int la_1 = LA(1);
255
256			if ( la_1 == c )
257			throw MismatchedCharException(la_1, c, true, this);
258
259			consume();
260			}
261			/** See if input contains character in range c1-c2
262			* throw MismatchedCharException if not
263			*/
264			virtual void matchRange(int c1, int c2)
265			{
266			int la_1 = LA(1);
267
268			if ( la_1 < c1 \|\| la_1 > c2 )
269			throw MismatchedCharException(la_1, c1, c2, false, this);
270
271			consume();
272			}
273
274			virtual bool getCaseSensitive() const
275			{
276			return caseSensitive;
277			}
278
279			virtual void setCaseSensitive(bool t)
280			{
281			caseSensitive = t;
282			}
283
284			virtual bool getCaseSensitiveLiterals() const=0;
285
286			/// Get the line the scanner currently is in (starts at 1)
287			virtual int getLine() const
288			{
289			return inputState->line;
290			}
291
292			/// set the line number
293			virtual void setLine(int l)
294			{
295			inputState->line = l;
296			}
297
298			/// Get the column the scanner currently is in (starts at 1)
299			virtual int getColumn() const
300			{
301			return inputState->column;
302			}
303			/// set the column number
304			virtual void setColumn(int c)
305			{
306			inputState->column = c;
307			}
308
309			/// get the filename for the file currently used
310			virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
311			{
312			return inputState->filename;
313			}
314			/// Set the filename the scanner is using (used in error messages)
315			virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
316			{
317			inputState->filename = f;
318			}
319
320			virtual bool getCommitToPath() const
321			{
322			return commitToPath;
323			}
324
325			virtual void setCommitToPath(bool commit)
326			{
327			commitToPath = commit;
328			}
329
330			/** return a copy of the current text buffer */
331			virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
332			{
333			return text;
334			}
335
336			virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
337			{
338			text = s;
339			}
340
341			virtual void resetText()
342			{
343			text = "";
344			inputState->tokenStartColumn = inputState->column;
345			inputState->tokenStartLine = inputState->line;
346			}
347
348			virtual RefToken getTokenObject() const
349			{
350			return _returnToken;
351			}
352
353			/** Used to keep track of line breaks, needs to be called from
354			* within generated lexers when a \n \r is encountered.
355			*/
356			virtual void newline()
357			{
358			++inputState->line;
359			inputState->column = 1;
360			}
361
362			/** Advance the current column number by an appropriate amount according
363			* to the tabsize. This method needs to be explicitly called from the
364			* lexer rules encountering tabs.
365			*/
366			virtual void tab()
367			{
368			int c = getColumn();
369			int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
370			setColumn( nc );
371			}
372			/// set the tabsize. Returns the old tabsize
373			int setTabsize( int size )
374			{
375			int oldsize = tabsize;
376			tabsize = size;
377			return oldsize;
378			}
379			/// Return the tabsize used by the scanner
380			int getTabSize() const
381			{
382			return tabsize;
383			}
384
385			/** Report exception errors caught in nextToken() */
386			virtual void reportError(const RecognitionException& e);
387
388			/** Parser error-reporting function can be overridden in subclass */
389			virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
390
391			/** Parser warning-reporting function can be overridden in subclass */
392			virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
393
394			virtual InputBuffer& getInputBuffer()
395			{
396			return inputState->getInput();
397			}
398
399			virtual LexerSharedInputState getInputState()
400			{
401			return inputState;
402			}
403
404			/** set the input state for the lexer.
405			* @note state is a reference counted object, hence no reference */
406			virtual void setInputState(LexerSharedInputState state)
407			{
408			inputState = state;
409			}
410
411			/// Set the factory for created tokens
412			virtual void setTokenObjectFactory(factory_type factory)
413			{
414			tokenFactory = factory;
415			}
416
417			/** Test the token text against the literals table
418			* Override this method to perform a different literals test
419			*/
420			virtual int testLiteralsTable(int ttype) const
421			{
422			ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
423			if (i != literals.end())
424			ttype = (*i).second;
425			return ttype;
426			}
427
428			/** Test the text passed in against the literals table
429			* Override this method to perform a different literals test
430			* This is used primarily when you want to test a portion of
431			* a token
432			*/
433			virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
434			{
435			ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
436			if (i != literals.end())
437			ttype = (*i).second;
438			return ttype;
439			}
440
441			/// Override this method to get more specific case handling
442			virtual int toLower(int c) const
443			{
444			// test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
445			// also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
446			// this one is more structural. Maybe make this configurable.
447			return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
448			}
449
450			/** This method is called by YourLexer::nextToken() when the lexer has
451			* hit EOF condition. EOF is NOT a character.
452			* This method is not called if EOF is reached during
453			* syntactic predicate evaluation or during evaluation
454			* of normal lexical rules, which presumably would be
455			* an IOException. This traps the "normal" EOF condition.
456			*
457			* uponEOF() is called after the complete evaluation of
458			* the previous token and only if your parser asks
459			* for another token beyond that last non-EOF token.
460			*
461			* You might want to throw token or char stream exceptions
462			* like: "Heh, premature eof" or a retry stream exception
463			* ("I found the end of this file, go back to referencing file").
464			*/
465			virtual void uponEOF()
466			{
467			}
468
469			/// Methods used to change tracing behavior
470			virtual void traceIndent();
471			virtual void traceIn(const char* rname);
472			virtual void traceOut(const char* rname);
473
474			#ifndef NO_STATIC_CONSTS
475			static const int EOF_CHAR = EOF;
476			#else
477			enum {
478			EOF_CHAR = EOF
479			};
480			#endif
481			protected:
482			ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
483			/// flag indicating wether consume saves characters
484			bool saveConsumedInput;
485			factory_type tokenFactory; ///< Factory for tokens
486			bool caseSensitive; ///< Is this lexer case sensitive
487			ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
488
489			RefToken _returnToken; ///< used to return tokens w/o using return val
490
491			/// Input state, gives access to input stream, shared among different lexers
492			LexerSharedInputState inputState;
493
494			/** Used during filter mode to indicate that path is desired.
495			* A subsequent scan error will report an error as usual
496			* if acceptPath=true;
497			*/
498			bool commitToPath;
499
500			int tabsize; ///< tab size the scanner uses.
501
502			/// Create a new RefToken of type t
503			virtual RefToken makeToken(int t)
504			{
505			RefToken tok = tokenFactory();
506			tok->setType(t);
507			tok->setColumn(inputState->tokenStartColumn);
508			tok->setLine(inputState->tokenStartLine);
509			return tok;
510			}
511
512			/** Tracer class, used when -traceLexer is passed to antlr
513			*/
514			class Tracer {
515			private:
516			CharScanner* parser;
517			const char* text;
518
519			Tracer(const Tracer& other); // undefined
520			Tracer& operator=(const Tracer& other); // undefined
521			public:
522			Tracer( CharScanner* p,const char* t )
523			: parser(p), text(t)
524			{
525			parser->traceIn(text);
526			}
527			~Tracer()
528			{
529			parser->traceOut(text);
530			}
531			};
532
533			int traceDepth;
534			private:
535			CharScanner( const CharScanner& other ); // undefined
536			CharScanner& operator=( const CharScanner& other ); // undefined
537
538			#ifndef NO_STATIC_CONSTS
539			static const int NO_CHAR = 0;
540			#else
541			enum {
542			NO_CHAR = 0
543			};
544			#endif
545			};
546
547			inline int CharScanner::LA(unsigned int i)
548			{
549			int c = inputState->getInput().LA(i);
550
551			if ( caseSensitive )
552			return c;
553			else
554			return toLower(c); // VC 6 tolower bug caught in toLower.
555			}
556
557			inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
558			{
559			if (scanner->getCaseSensitiveLiterals())
560			return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
561			else
562			{
563			#ifdef NO_STRCASECMP
564			return (stricmp(x.c_str(),y.c_str())<0);
565			#else
566			return (strcasecmp(x.c_str(),y.c_str())<0);
567			#endif
568			}
569			}
570
571			#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
572			}
573			#endif
574
575			#endif //INC_CharScanner_hpp__